blob: 1538d6fa81652023eb774c9c603ec791e87a3de3 [file] [log] [blame]
Vinicius Costa Gomes25db26a2018-07-03 15:42:53 -07001// SPDX-License-Identifier: GPL-2.0
2
3/* net/sched/sch_etf.c Earliest TxTime First queueing discipline.
4 *
5 * Authors: Jesus Sanchez-Palencia <jesus.sanchez-palencia@intel.com>
6 * Vinicius Costa Gomes <vinicius.gomes@intel.com>
7 */
8
9#include <linux/module.h>
10#include <linux/types.h>
11#include <linux/kernel.h>
12#include <linux/string.h>
13#include <linux/errno.h>
Jesus Sanchez-Palencia4b15c702018-07-03 15:43:00 -070014#include <linux/errqueue.h>
Vinicius Costa Gomes25db26a2018-07-03 15:42:53 -070015#include <linux/rbtree.h>
16#include <linux/skbuff.h>
17#include <linux/posix-timers.h>
18#include <net/netlink.h>
19#include <net/sch_generic.h>
20#include <net/pkt_sched.h>
21#include <net/sock.h>
22
23#define DEADLINE_MODE_IS_ON(x) ((x)->flags & TC_ETF_DEADLINE_MODE_ON)
Jesus Sanchez-Palencia88cab772018-07-03 15:42:54 -070024#define OFFLOAD_IS_ON(x) ((x)->flags & TC_ETF_OFFLOAD_ON)
Vinicius Costa Gomes25db26a2018-07-03 15:42:53 -070025
26struct etf_sched_data {
Jesus Sanchez-Palencia88cab772018-07-03 15:42:54 -070027 bool offload;
Vinicius Costa Gomes25db26a2018-07-03 15:42:53 -070028 bool deadline_mode;
29 int clockid;
30 int queue;
31 s32 delta; /* in ns */
32 ktime_t last; /* The txtime of the last skb sent to the netdevice. */
33 struct rb_root head;
34 struct qdisc_watchdog watchdog;
35 ktime_t (*get_time)(void);
36};
37
38static const struct nla_policy etf_policy[TCA_ETF_MAX + 1] = {
39 [TCA_ETF_PARMS] = { .len = sizeof(struct tc_etf_qopt) },
40};
41
42static inline int validate_input_params(struct tc_etf_qopt *qopt,
43 struct netlink_ext_ack *extack)
44{
45 /* Check if params comply to the following rules:
46 * * Clockid and delta must be valid.
47 *
48 * * Dynamic clockids are not supported.
49 *
50 * * Delta must be a positive integer.
Jesus Sanchez-Palencia88cab772018-07-03 15:42:54 -070051 *
52 * Also note that for the HW offload case, we must
53 * expect that system clocks have been synchronized to PHC.
Vinicius Costa Gomes25db26a2018-07-03 15:42:53 -070054 */
55 if (qopt->clockid < 0) {
56 NL_SET_ERR_MSG(extack, "Dynamic clockids are not supported");
57 return -ENOTSUPP;
58 }
59
60 if (qopt->clockid != CLOCK_TAI) {
61 NL_SET_ERR_MSG(extack, "Invalid clockid. CLOCK_TAI must be used");
62 return -EINVAL;
63 }
64
65 if (qopt->delta < 0) {
66 NL_SET_ERR_MSG(extack, "Delta must be positive");
67 return -EINVAL;
68 }
69
70 return 0;
71}
72
73static bool is_packet_valid(struct Qdisc *sch, struct sk_buff *nskb)
74{
75 struct etf_sched_data *q = qdisc_priv(sch);
76 ktime_t txtime = nskb->tstamp;
77 struct sock *sk = nskb->sk;
78 ktime_t now;
79
80 if (!sk)
81 return false;
82
83 if (!sock_flag(sk, SOCK_TXTIME))
84 return false;
85
86 /* We don't perform crosstimestamping.
87 * Drop if packet's clockid differs from qdisc's.
88 */
89 if (sk->sk_clockid != q->clockid)
90 return false;
91
92 if (sk->sk_txtime_deadline_mode != q->deadline_mode)
93 return false;
94
95 now = q->get_time();
96 if (ktime_before(txtime, now) || ktime_before(txtime, q->last))
97 return false;
98
99 return true;
100}
101
102static struct sk_buff *etf_peek_timesortedlist(struct Qdisc *sch)
103{
104 struct etf_sched_data *q = qdisc_priv(sch);
105 struct rb_node *p;
106
107 p = rb_first(&q->head);
108 if (!p)
109 return NULL;
110
111 return rb_to_skb(p);
112}
113
114static void reset_watchdog(struct Qdisc *sch)
115{
116 struct etf_sched_data *q = qdisc_priv(sch);
117 struct sk_buff *skb = etf_peek_timesortedlist(sch);
118 ktime_t next;
119
120 if (!skb)
121 return;
122
123 next = ktime_sub_ns(skb->tstamp, q->delta);
124 qdisc_watchdog_schedule_ns(&q->watchdog, ktime_to_ns(next));
125}
126
Jesus Sanchez-Palencia4b15c702018-07-03 15:43:00 -0700127static void report_sock_error(struct sk_buff *skb, u32 err, u8 code)
128{
129 struct sock_exterr_skb *serr;
130 struct sk_buff *clone;
131 ktime_t txtime = skb->tstamp;
132
133 if (!skb->sk || !(skb->sk->sk_txtime_report_errors))
134 return;
135
136 clone = skb_clone(skb, GFP_ATOMIC);
137 if (!clone)
138 return;
139
140 serr = SKB_EXT_ERR(clone);
141 serr->ee.ee_errno = err;
142 serr->ee.ee_origin = SO_EE_ORIGIN_TXTIME;
143 serr->ee.ee_type = 0;
144 serr->ee.ee_code = code;
145 serr->ee.ee_pad = 0;
146 serr->ee.ee_data = (txtime >> 32); /* high part of tstamp */
147 serr->ee.ee_info = txtime; /* low part of tstamp */
148
149 if (sock_queue_err_skb(skb->sk, clone))
150 kfree_skb(clone);
151}
152
Vinicius Costa Gomes25db26a2018-07-03 15:42:53 -0700153static int etf_enqueue_timesortedlist(struct sk_buff *nskb, struct Qdisc *sch,
154 struct sk_buff **to_free)
155{
156 struct etf_sched_data *q = qdisc_priv(sch);
157 struct rb_node **p = &q->head.rb_node, *parent = NULL;
158 ktime_t txtime = nskb->tstamp;
159
Jesus Sanchez-Palencia4b15c702018-07-03 15:43:00 -0700160 if (!is_packet_valid(sch, nskb)) {
161 report_sock_error(nskb, EINVAL,
162 SO_EE_CODE_TXTIME_INVALID_PARAM);
Vinicius Costa Gomes25db26a2018-07-03 15:42:53 -0700163 return qdisc_drop(nskb, sch, to_free);
Jesus Sanchez-Palencia4b15c702018-07-03 15:43:00 -0700164 }
Vinicius Costa Gomes25db26a2018-07-03 15:42:53 -0700165
166 while (*p) {
167 struct sk_buff *skb;
168
169 parent = *p;
170 skb = rb_to_skb(parent);
171 if (ktime_after(txtime, skb->tstamp))
172 p = &parent->rb_right;
173 else
174 p = &parent->rb_left;
175 }
176 rb_link_node(&nskb->rbnode, parent, p);
177 rb_insert_color(&nskb->rbnode, &q->head);
178
179 qdisc_qstats_backlog_inc(sch, nskb);
180 sch->q.qlen++;
181
182 /* Now we may need to re-arm the qdisc watchdog for the next packet. */
183 reset_watchdog(sch);
184
185 return NET_XMIT_SUCCESS;
186}
187
188static void timesortedlist_erase(struct Qdisc *sch, struct sk_buff *skb,
189 bool drop)
190{
191 struct etf_sched_data *q = qdisc_priv(sch);
192
193 rb_erase(&skb->rbnode, &q->head);
194
195 /* The rbnode field in the skb re-uses these fields, now that
196 * we are done with the rbnode, reset them.
197 */
198 skb->next = NULL;
199 skb->prev = NULL;
200 skb->dev = qdisc_dev(sch);
201
202 qdisc_qstats_backlog_dec(sch, skb);
203
204 if (drop) {
205 struct sk_buff *to_free = NULL;
206
Jesus Sanchez-Palencia4b15c702018-07-03 15:43:00 -0700207 report_sock_error(skb, ECANCELED, SO_EE_CODE_TXTIME_MISSED);
208
Vinicius Costa Gomes25db26a2018-07-03 15:42:53 -0700209 qdisc_drop(skb, sch, &to_free);
210 kfree_skb_list(to_free);
211 qdisc_qstats_overlimit(sch);
212 } else {
213 qdisc_bstats_update(sch, skb);
214
215 q->last = skb->tstamp;
216 }
217
218 sch->q.qlen--;
219}
220
221static struct sk_buff *etf_dequeue_timesortedlist(struct Qdisc *sch)
222{
223 struct etf_sched_data *q = qdisc_priv(sch);
224 struct sk_buff *skb;
225 ktime_t now, next;
226
227 skb = etf_peek_timesortedlist(sch);
228 if (!skb)
229 return NULL;
230
231 now = q->get_time();
232
233 /* Drop if packet has expired while in queue. */
Vinicius Costa Gomes25db26a2018-07-03 15:42:53 -0700234 if (ktime_before(skb->tstamp, now)) {
235 timesortedlist_erase(sch, skb, true);
236 skb = NULL;
237 goto out;
238 }
239
240 /* When in deadline mode, dequeue as soon as possible and change the
241 * txtime from deadline to (now + delta).
242 */
243 if (q->deadline_mode) {
244 timesortedlist_erase(sch, skb, false);
245 skb->tstamp = now;
246 goto out;
247 }
248
249 next = ktime_sub_ns(skb->tstamp, q->delta);
250
251 /* Dequeue only if now is within the [txtime - delta, txtime] range. */
252 if (ktime_after(now, next))
253 timesortedlist_erase(sch, skb, false);
254 else
255 skb = NULL;
256
257out:
258 /* Now we may need to re-arm the qdisc watchdog for the next packet. */
259 reset_watchdog(sch);
260
261 return skb;
262}
263
Jesus Sanchez-Palencia88cab772018-07-03 15:42:54 -0700264static void etf_disable_offload(struct net_device *dev,
265 struct etf_sched_data *q)
266{
267 struct tc_etf_qopt_offload etf = { };
268 const struct net_device_ops *ops;
269 int err;
270
271 if (!q->offload)
272 return;
273
274 ops = dev->netdev_ops;
275 if (!ops->ndo_setup_tc)
276 return;
277
278 etf.queue = q->queue;
279 etf.enable = 0;
280
281 err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_ETF, &etf);
282 if (err < 0)
283 pr_warn("Couldn't disable ETF offload for queue %d\n",
284 etf.queue);
285}
286
287static int etf_enable_offload(struct net_device *dev, struct etf_sched_data *q,
288 struct netlink_ext_ack *extack)
289{
290 const struct net_device_ops *ops = dev->netdev_ops;
291 struct tc_etf_qopt_offload etf = { };
292 int err;
293
294 if (q->offload)
295 return 0;
296
297 if (!ops->ndo_setup_tc) {
298 NL_SET_ERR_MSG(extack, "Specified device does not support ETF offload");
299 return -EOPNOTSUPP;
300 }
301
302 etf.queue = q->queue;
303 etf.enable = 1;
304
305 err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_ETF, &etf);
306 if (err < 0) {
307 NL_SET_ERR_MSG(extack, "Specified device failed to setup ETF hardware offload");
308 return err;
309 }
310
311 return 0;
312}
313
Vinicius Costa Gomes25db26a2018-07-03 15:42:53 -0700314static int etf_init(struct Qdisc *sch, struct nlattr *opt,
315 struct netlink_ext_ack *extack)
316{
317 struct etf_sched_data *q = qdisc_priv(sch);
318 struct net_device *dev = qdisc_dev(sch);
319 struct nlattr *tb[TCA_ETF_MAX + 1];
320 struct tc_etf_qopt *qopt;
321 int err;
322
323 if (!opt) {
324 NL_SET_ERR_MSG(extack,
325 "Missing ETF qdisc options which are mandatory");
326 return -EINVAL;
327 }
328
329 err = nla_parse_nested(tb, TCA_ETF_MAX, opt, etf_policy, extack);
330 if (err < 0)
331 return err;
332
333 if (!tb[TCA_ETF_PARMS]) {
334 NL_SET_ERR_MSG(extack, "Missing mandatory ETF parameters");
335 return -EINVAL;
336 }
337
338 qopt = nla_data(tb[TCA_ETF_PARMS]);
339
Jesus Sanchez-Palencia88cab772018-07-03 15:42:54 -0700340 pr_debug("delta %d clockid %d offload %s deadline %s\n",
Vinicius Costa Gomes25db26a2018-07-03 15:42:53 -0700341 qopt->delta, qopt->clockid,
Jesus Sanchez-Palencia88cab772018-07-03 15:42:54 -0700342 OFFLOAD_IS_ON(qopt) ? "on" : "off",
Vinicius Costa Gomes25db26a2018-07-03 15:42:53 -0700343 DEADLINE_MODE_IS_ON(qopt) ? "on" : "off");
344
345 err = validate_input_params(qopt, extack);
346 if (err < 0)
347 return err;
348
349 q->queue = sch->dev_queue - netdev_get_tx_queue(dev, 0);
350
Jesus Sanchez-Palencia88cab772018-07-03 15:42:54 -0700351 if (OFFLOAD_IS_ON(qopt)) {
352 err = etf_enable_offload(dev, q, extack);
353 if (err < 0)
354 return err;
355 }
356
Vinicius Costa Gomes25db26a2018-07-03 15:42:53 -0700357 /* Everything went OK, save the parameters used. */
358 q->delta = qopt->delta;
359 q->clockid = qopt->clockid;
Jesus Sanchez-Palencia88cab772018-07-03 15:42:54 -0700360 q->offload = OFFLOAD_IS_ON(qopt);
Vinicius Costa Gomes25db26a2018-07-03 15:42:53 -0700361 q->deadline_mode = DEADLINE_MODE_IS_ON(qopt);
362
363 switch (q->clockid) {
364 case CLOCK_REALTIME:
365 q->get_time = ktime_get_real;
366 break;
367 case CLOCK_MONOTONIC:
368 q->get_time = ktime_get;
369 break;
370 case CLOCK_BOOTTIME:
371 q->get_time = ktime_get_boottime;
372 break;
373 case CLOCK_TAI:
374 q->get_time = ktime_get_clocktai;
375 break;
376 default:
377 NL_SET_ERR_MSG(extack, "Clockid is not supported");
378 return -ENOTSUPP;
379 }
380
381 qdisc_watchdog_init_clockid(&q->watchdog, sch, q->clockid);
382
383 return 0;
384}
385
386static void timesortedlist_clear(struct Qdisc *sch)
387{
388 struct etf_sched_data *q = qdisc_priv(sch);
389 struct rb_node *p = rb_first(&q->head);
390
391 while (p) {
392 struct sk_buff *skb = rb_to_skb(p);
393
394 p = rb_next(p);
395
396 rb_erase(&skb->rbnode, &q->head);
397 rtnl_kfree_skbs(skb, skb);
398 sch->q.qlen--;
399 }
400}
401
402static void etf_reset(struct Qdisc *sch)
403{
404 struct etf_sched_data *q = qdisc_priv(sch);
405
406 /* Only cancel watchdog if it's been initialized. */
407 if (q->watchdog.qdisc == sch)
408 qdisc_watchdog_cancel(&q->watchdog);
409
410 /* No matter which mode we are on, it's safe to clear both lists. */
411 timesortedlist_clear(sch);
412 __qdisc_reset_queue(&sch->q);
413
414 sch->qstats.backlog = 0;
415 sch->q.qlen = 0;
416
417 q->last = 0;
418}
419
420static void etf_destroy(struct Qdisc *sch)
421{
422 struct etf_sched_data *q = qdisc_priv(sch);
Jesus Sanchez-Palencia88cab772018-07-03 15:42:54 -0700423 struct net_device *dev = qdisc_dev(sch);
Vinicius Costa Gomes25db26a2018-07-03 15:42:53 -0700424
425 /* Only cancel watchdog if it's been initialized. */
426 if (q->watchdog.qdisc == sch)
427 qdisc_watchdog_cancel(&q->watchdog);
Jesus Sanchez-Palencia88cab772018-07-03 15:42:54 -0700428
429 etf_disable_offload(dev, q);
Vinicius Costa Gomes25db26a2018-07-03 15:42:53 -0700430}
431
432static int etf_dump(struct Qdisc *sch, struct sk_buff *skb)
433{
434 struct etf_sched_data *q = qdisc_priv(sch);
435 struct tc_etf_qopt opt = { };
436 struct nlattr *nest;
437
438 nest = nla_nest_start(skb, TCA_OPTIONS);
439 if (!nest)
440 goto nla_put_failure;
441
442 opt.delta = q->delta;
443 opt.clockid = q->clockid;
Jesus Sanchez-Palencia88cab772018-07-03 15:42:54 -0700444 if (q->offload)
445 opt.flags |= TC_ETF_OFFLOAD_ON;
446
Vinicius Costa Gomes25db26a2018-07-03 15:42:53 -0700447 if (q->deadline_mode)
448 opt.flags |= TC_ETF_DEADLINE_MODE_ON;
449
450 if (nla_put(skb, TCA_ETF_PARMS, sizeof(opt), &opt))
451 goto nla_put_failure;
452
453 return nla_nest_end(skb, nest);
454
455nla_put_failure:
456 nla_nest_cancel(skb, nest);
457 return -1;
458}
459
460static struct Qdisc_ops etf_qdisc_ops __read_mostly = {
461 .id = "etf",
462 .priv_size = sizeof(struct etf_sched_data),
463 .enqueue = etf_enqueue_timesortedlist,
464 .dequeue = etf_dequeue_timesortedlist,
465 .peek = etf_peek_timesortedlist,
466 .init = etf_init,
467 .reset = etf_reset,
468 .destroy = etf_destroy,
469 .dump = etf_dump,
470 .owner = THIS_MODULE,
471};
472
473static int __init etf_module_init(void)
474{
475 return register_qdisc(&etf_qdisc_ops);
476}
477
478static void __exit etf_module_exit(void)
479{
480 unregister_qdisc(&etf_qdisc_ops);
481}
482module_init(etf_module_init)
483module_exit(etf_module_exit)
484MODULE_LICENSE("GPL");