blob: 001182aa395910acbbac47150c6b346a7d71acd7 [file] [log] [blame]
Vinicius Costa Gomes5a781cc2018-09-28 17:59:43 -07001// SPDX-License-Identifier: GPL-2.0
2
3/* net/sched/sch_taprio.c Time Aware Priority Scheduler
4 *
5 * Authors: Vinicius Costa Gomes <vinicius.gomes@intel.com>
6 *
7 */
8
9#include <linux/types.h>
10#include <linux/slab.h>
11#include <linux/kernel.h>
12#include <linux/string.h>
13#include <linux/list.h>
14#include <linux/errno.h>
15#include <linux/skbuff.h>
Jakub Kicinski23bddf62019-04-17 13:51:57 -070016#include <linux/math64.h>
Vinicius Costa Gomes5a781cc2018-09-28 17:59:43 -070017#include <linux/module.h>
18#include <linux/spinlock.h>
19#include <net/netlink.h>
20#include <net/pkt_sched.h>
21#include <net/pkt_cls.h>
22#include <net/sch_generic.h>
23
Leandro Dorileo7b9eba72019-04-08 10:12:17 -070024static LIST_HEAD(taprio_list);
25static DEFINE_SPINLOCK(taprio_list_lock);
26
Vinicius Costa Gomes5a781cc2018-09-28 17:59:43 -070027#define TAPRIO_ALL_GATES_OPEN -1
28
29struct sched_entry {
30 struct list_head list;
31
32 /* The instant that this entry "closes" and the next one
33 * should open, the qdisc will make some effort so that no
34 * packet leaves after this time.
35 */
36 ktime_t close_time;
37 atomic_t budget;
38 int index;
39 u32 gate_mask;
40 u32 interval;
41 u8 command;
42};
43
44struct taprio_sched {
45 struct Qdisc **qdiscs;
46 struct Qdisc *root;
47 s64 base_time;
48 int clockid;
Leandro Dorileo7b9eba72019-04-08 10:12:17 -070049 atomic64_t picos_per_byte; /* Using picoseconds because for 10Gbps+
50 * speeds it's sub-nanoseconds per byte
51 */
Vinicius Costa Gomes5a781cc2018-09-28 17:59:43 -070052 size_t num_entries;
53
54 /* Protects the update side of the RCU protected current_entry */
55 spinlock_t current_entry_lock;
56 struct sched_entry __rcu *current_entry;
57 struct list_head entries;
58 ktime_t (*get_time)(void);
59 struct hrtimer advance_timer;
Leandro Dorileo7b9eba72019-04-08 10:12:17 -070060 struct list_head taprio_list;
Vinicius Costa Gomes5a781cc2018-09-28 17:59:43 -070061};
62
63static int taprio_enqueue(struct sk_buff *skb, struct Qdisc *sch,
64 struct sk_buff **to_free)
65{
66 struct taprio_sched *q = qdisc_priv(sch);
67 struct Qdisc *child;
68 int queue;
69
70 queue = skb_get_queue_mapping(skb);
71
72 child = q->qdiscs[queue];
73 if (unlikely(!child))
74 return qdisc_drop(skb, sch, to_free);
75
76 qdisc_qstats_backlog_inc(sch, skb);
77 sch->q.qlen++;
78
79 return qdisc_enqueue(skb, child, to_free);
80}
81
82static struct sk_buff *taprio_peek(struct Qdisc *sch)
83{
84 struct taprio_sched *q = qdisc_priv(sch);
85 struct net_device *dev = qdisc_dev(sch);
86 struct sched_entry *entry;
87 struct sk_buff *skb;
88 u32 gate_mask;
89 int i;
90
91 rcu_read_lock();
92 entry = rcu_dereference(q->current_entry);
93 gate_mask = entry ? entry->gate_mask : -1;
94 rcu_read_unlock();
95
96 if (!gate_mask)
97 return NULL;
98
99 for (i = 0; i < dev->num_tx_queues; i++) {
100 struct Qdisc *child = q->qdiscs[i];
101 int prio;
102 u8 tc;
103
104 if (unlikely(!child))
105 continue;
106
107 skb = child->ops->peek(child);
108 if (!skb)
109 continue;
110
111 prio = skb->priority;
112 tc = netdev_get_prio_tc_map(dev, prio);
113
114 if (!(gate_mask & BIT(tc)))
115 return NULL;
116
117 return skb;
118 }
119
120 return NULL;
121}
122
123static inline int length_to_duration(struct taprio_sched *q, int len)
124{
Jakub Kicinski23bddf62019-04-17 13:51:57 -0700125 return div_u64(len * atomic64_read(&q->picos_per_byte), 1000);
126}
127
128static void taprio_set_budget(struct taprio_sched *q, struct sched_entry *entry)
129{
130 atomic_set(&entry->budget,
131 div64_u64((u64)entry->interval * 1000,
132 atomic64_read(&q->picos_per_byte)));
Vinicius Costa Gomes5a781cc2018-09-28 17:59:43 -0700133}
134
135static struct sk_buff *taprio_dequeue(struct Qdisc *sch)
136{
137 struct taprio_sched *q = qdisc_priv(sch);
138 struct net_device *dev = qdisc_dev(sch);
139 struct sched_entry *entry;
140 struct sk_buff *skb;
141 u32 gate_mask;
142 int i;
143
Leandro Dorileo7b9eba72019-04-08 10:12:17 -0700144 if (atomic64_read(&q->picos_per_byte) == -1) {
145 WARN_ONCE(1, "taprio: dequeue() called with unknown picos per byte.");
146 return NULL;
147 }
148
Vinicius Costa Gomes5a781cc2018-09-28 17:59:43 -0700149 rcu_read_lock();
150 entry = rcu_dereference(q->current_entry);
151 /* if there's no entry, it means that the schedule didn't
152 * start yet, so force all gates to be open, this is in
153 * accordance to IEEE 802.1Qbv-2015 Section 8.6.9.4.5
154 * "AdminGateSates"
155 */
156 gate_mask = entry ? entry->gate_mask : TAPRIO_ALL_GATES_OPEN;
157 rcu_read_unlock();
158
159 if (!gate_mask)
160 return NULL;
161
162 for (i = 0; i < dev->num_tx_queues; i++) {
163 struct Qdisc *child = q->qdiscs[i];
164 ktime_t guard;
165 int prio;
166 int len;
167 u8 tc;
168
169 if (unlikely(!child))
170 continue;
171
172 skb = child->ops->peek(child);
173 if (!skb)
174 continue;
175
176 prio = skb->priority;
177 tc = netdev_get_prio_tc_map(dev, prio);
178
179 if (!(gate_mask & BIT(tc)))
180 continue;
181
182 len = qdisc_pkt_len(skb);
183 guard = ktime_add_ns(q->get_time(),
184 length_to_duration(q, len));
185
186 /* In the case that there's no gate entry, there's no
187 * guard band ...
188 */
189 if (gate_mask != TAPRIO_ALL_GATES_OPEN &&
190 ktime_after(guard, entry->close_time))
191 return NULL;
192
193 /* ... and no budget. */
194 if (gate_mask != TAPRIO_ALL_GATES_OPEN &&
195 atomic_sub_return(len, &entry->budget) < 0)
196 return NULL;
197
198 skb = child->ops->dequeue(child);
199 if (unlikely(!skb))
200 return NULL;
201
202 qdisc_bstats_update(sch, skb);
203 qdisc_qstats_backlog_dec(sch, skb);
204 sch->q.qlen--;
205
206 return skb;
207 }
208
209 return NULL;
210}
211
212static bool should_restart_cycle(const struct taprio_sched *q,
213 const struct sched_entry *entry)
214{
215 WARN_ON(!entry);
216
217 return list_is_last(&entry->list, &q->entries);
218}
219
220static enum hrtimer_restart advance_sched(struct hrtimer *timer)
221{
222 struct taprio_sched *q = container_of(timer, struct taprio_sched,
223 advance_timer);
224 struct sched_entry *entry, *next;
225 struct Qdisc *sch = q->root;
226 ktime_t close_time;
227
228 spin_lock(&q->current_entry_lock);
229 entry = rcu_dereference_protected(q->current_entry,
230 lockdep_is_held(&q->current_entry_lock));
231
232 /* This is the case that it's the first time that the schedule
233 * runs, so it only happens once per schedule. The first entry
234 * is pre-calculated during the schedule initialization.
235 */
236 if (unlikely(!entry)) {
237 next = list_first_entry(&q->entries, struct sched_entry,
238 list);
239 close_time = next->close_time;
240 goto first_run;
241 }
242
243 if (should_restart_cycle(q, entry))
244 next = list_first_entry(&q->entries, struct sched_entry,
245 list);
246 else
247 next = list_next_entry(entry, list);
248
249 close_time = ktime_add_ns(entry->close_time, next->interval);
250
251 next->close_time = close_time;
Jakub Kicinski23bddf62019-04-17 13:51:57 -0700252 taprio_set_budget(q, next);
Vinicius Costa Gomes5a781cc2018-09-28 17:59:43 -0700253
254first_run:
255 rcu_assign_pointer(q->current_entry, next);
256 spin_unlock(&q->current_entry_lock);
257
258 hrtimer_set_expires(&q->advance_timer, close_time);
259
260 rcu_read_lock();
261 __netif_schedule(sch);
262 rcu_read_unlock();
263
264 return HRTIMER_RESTART;
265}
266
267static const struct nla_policy entry_policy[TCA_TAPRIO_SCHED_ENTRY_MAX + 1] = {
268 [TCA_TAPRIO_SCHED_ENTRY_INDEX] = { .type = NLA_U32 },
269 [TCA_TAPRIO_SCHED_ENTRY_CMD] = { .type = NLA_U8 },
270 [TCA_TAPRIO_SCHED_ENTRY_GATE_MASK] = { .type = NLA_U32 },
271 [TCA_TAPRIO_SCHED_ENTRY_INTERVAL] = { .type = NLA_U32 },
272};
273
274static const struct nla_policy entry_list_policy[TCA_TAPRIO_SCHED_MAX + 1] = {
275 [TCA_TAPRIO_SCHED_ENTRY] = { .type = NLA_NESTED },
276};
277
278static const struct nla_policy taprio_policy[TCA_TAPRIO_ATTR_MAX + 1] = {
279 [TCA_TAPRIO_ATTR_PRIOMAP] = {
280 .len = sizeof(struct tc_mqprio_qopt)
281 },
282 [TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST] = { .type = NLA_NESTED },
283 [TCA_TAPRIO_ATTR_SCHED_BASE_TIME] = { .type = NLA_S64 },
284 [TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY] = { .type = NLA_NESTED },
285 [TCA_TAPRIO_ATTR_SCHED_CLOCKID] = { .type = NLA_S32 },
286};
287
288static int fill_sched_entry(struct nlattr **tb, struct sched_entry *entry,
289 struct netlink_ext_ack *extack)
290{
291 u32 interval = 0;
292
293 if (tb[TCA_TAPRIO_SCHED_ENTRY_CMD])
294 entry->command = nla_get_u8(
295 tb[TCA_TAPRIO_SCHED_ENTRY_CMD]);
296
297 if (tb[TCA_TAPRIO_SCHED_ENTRY_GATE_MASK])
298 entry->gate_mask = nla_get_u32(
299 tb[TCA_TAPRIO_SCHED_ENTRY_GATE_MASK]);
300
301 if (tb[TCA_TAPRIO_SCHED_ENTRY_INTERVAL])
302 interval = nla_get_u32(
303 tb[TCA_TAPRIO_SCHED_ENTRY_INTERVAL]);
304
305 if (interval == 0) {
306 NL_SET_ERR_MSG(extack, "Invalid interval for schedule entry");
307 return -EINVAL;
308 }
309
310 entry->interval = interval;
311
312 return 0;
313}
314
315static int parse_sched_entry(struct nlattr *n, struct sched_entry *entry,
316 int index, struct netlink_ext_ack *extack)
317{
318 struct nlattr *tb[TCA_TAPRIO_SCHED_ENTRY_MAX + 1] = { };
319 int err;
320
321 err = nla_parse_nested(tb, TCA_TAPRIO_SCHED_ENTRY_MAX, n,
322 entry_policy, NULL);
323 if (err < 0) {
324 NL_SET_ERR_MSG(extack, "Could not parse nested entry");
325 return -EINVAL;
326 }
327
328 entry->index = index;
329
330 return fill_sched_entry(tb, entry, extack);
331}
332
333/* Returns the number of entries in case of success */
334static int parse_sched_single_entry(struct nlattr *n,
335 struct taprio_sched *q,
336 struct netlink_ext_ack *extack)
337{
338 struct nlattr *tb_entry[TCA_TAPRIO_SCHED_ENTRY_MAX + 1] = { };
339 struct nlattr *tb_list[TCA_TAPRIO_SCHED_MAX + 1] = { };
340 struct sched_entry *entry;
341 bool found = false;
342 u32 index;
343 int err;
344
345 err = nla_parse_nested(tb_list, TCA_TAPRIO_SCHED_MAX,
346 n, entry_list_policy, NULL);
347 if (err < 0) {
348 NL_SET_ERR_MSG(extack, "Could not parse nested entry");
349 return -EINVAL;
350 }
351
352 if (!tb_list[TCA_TAPRIO_SCHED_ENTRY]) {
353 NL_SET_ERR_MSG(extack, "Single-entry must include an entry");
354 return -EINVAL;
355 }
356
357 err = nla_parse_nested(tb_entry, TCA_TAPRIO_SCHED_ENTRY_MAX,
358 tb_list[TCA_TAPRIO_SCHED_ENTRY],
359 entry_policy, NULL);
360 if (err < 0) {
361 NL_SET_ERR_MSG(extack, "Could not parse nested entry");
362 return -EINVAL;
363 }
364
365 if (!tb_entry[TCA_TAPRIO_SCHED_ENTRY_INDEX]) {
366 NL_SET_ERR_MSG(extack, "Entry must specify an index\n");
367 return -EINVAL;
368 }
369
370 index = nla_get_u32(tb_entry[TCA_TAPRIO_SCHED_ENTRY_INDEX]);
371 if (index >= q->num_entries) {
372 NL_SET_ERR_MSG(extack, "Index for single entry exceeds number of entries in schedule");
373 return -EINVAL;
374 }
375
376 list_for_each_entry(entry, &q->entries, list) {
377 if (entry->index == index) {
378 found = true;
379 break;
380 }
381 }
382
383 if (!found) {
384 NL_SET_ERR_MSG(extack, "Could not find entry");
385 return -ENOENT;
386 }
387
388 err = fill_sched_entry(tb_entry, entry, extack);
389 if (err < 0)
390 return err;
391
392 return q->num_entries;
393}
394
395static int parse_sched_list(struct nlattr *list,
396 struct taprio_sched *q,
397 struct netlink_ext_ack *extack)
398{
399 struct nlattr *n;
400 int err, rem;
401 int i = 0;
402
403 if (!list)
404 return -EINVAL;
405
406 nla_for_each_nested(n, list, rem) {
407 struct sched_entry *entry;
408
409 if (nla_type(n) != TCA_TAPRIO_SCHED_ENTRY) {
410 NL_SET_ERR_MSG(extack, "Attribute is not of type 'entry'");
411 continue;
412 }
413
414 entry = kzalloc(sizeof(*entry), GFP_KERNEL);
415 if (!entry) {
416 NL_SET_ERR_MSG(extack, "Not enough memory for entry");
417 return -ENOMEM;
418 }
419
420 err = parse_sched_entry(n, entry, i, extack);
421 if (err < 0) {
422 kfree(entry);
423 return err;
424 }
425
426 list_add_tail(&entry->list, &q->entries);
427 i++;
428 }
429
430 q->num_entries = i;
431
432 return i;
433}
434
435/* Returns the number of entries in case of success */
436static int parse_taprio_opt(struct nlattr **tb, struct taprio_sched *q,
437 struct netlink_ext_ack *extack)
438{
439 int err = 0;
440 int clockid;
441
442 if (tb[TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST] &&
443 tb[TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY])
444 return -EINVAL;
445
446 if (tb[TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY] && q->num_entries == 0)
447 return -EINVAL;
448
449 if (q->clockid == -1 && !tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID])
450 return -EINVAL;
451
452 if (tb[TCA_TAPRIO_ATTR_SCHED_BASE_TIME])
453 q->base_time = nla_get_s64(
454 tb[TCA_TAPRIO_ATTR_SCHED_BASE_TIME]);
455
456 if (tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]) {
457 clockid = nla_get_s32(tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]);
458
459 /* We only support static clockids and we don't allow
460 * for it to be modified after the first init.
461 */
462 if (clockid < 0 || (q->clockid != -1 && q->clockid != clockid))
463 return -EINVAL;
464
465 q->clockid = clockid;
466 }
467
468 if (tb[TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST])
469 err = parse_sched_list(
470 tb[TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST], q, extack);
471 else if (tb[TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY])
472 err = parse_sched_single_entry(
473 tb[TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY], q, extack);
474
475 /* parse_sched_* return the number of entries in the schedule,
476 * a schedule with zero entries is an error.
477 */
478 if (err == 0) {
479 NL_SET_ERR_MSG(extack, "The schedule should contain at least one entry");
480 return -EINVAL;
481 }
482
483 return err;
484}
485
486static int taprio_parse_mqprio_opt(struct net_device *dev,
487 struct tc_mqprio_qopt *qopt,
488 struct netlink_ext_ack *extack)
489{
490 int i, j;
491
492 if (!qopt) {
493 NL_SET_ERR_MSG(extack, "'mqprio' configuration is necessary");
494 return -EINVAL;
495 }
496
497 /* Verify num_tc is not out of max range */
498 if (qopt->num_tc > TC_MAX_QUEUE) {
499 NL_SET_ERR_MSG(extack, "Number of traffic classes is outside valid range");
500 return -EINVAL;
501 }
502
503 /* taprio imposes that traffic classes map 1:n to tx queues */
504 if (qopt->num_tc > dev->num_tx_queues) {
505 NL_SET_ERR_MSG(extack, "Number of traffic classes is greater than number of HW queues");
506 return -EINVAL;
507 }
508
509 /* Verify priority mapping uses valid tcs */
510 for (i = 0; i < TC_BITMASK + 1; i++) {
511 if (qopt->prio_tc_map[i] >= qopt->num_tc) {
512 NL_SET_ERR_MSG(extack, "Invalid traffic class in priority to traffic class mapping");
513 return -EINVAL;
514 }
515 }
516
517 for (i = 0; i < qopt->num_tc; i++) {
518 unsigned int last = qopt->offset[i] + qopt->count[i];
519
520 /* Verify the queue count is in tx range being equal to the
521 * real_num_tx_queues indicates the last queue is in use.
522 */
523 if (qopt->offset[i] >= dev->num_tx_queues ||
524 !qopt->count[i] ||
525 last > dev->real_num_tx_queues) {
526 NL_SET_ERR_MSG(extack, "Invalid queue in traffic class to queue mapping");
527 return -EINVAL;
528 }
529
530 /* Verify that the offset and counts do not overlap */
531 for (j = i + 1; j < qopt->num_tc; j++) {
532 if (last > qopt->offset[j]) {
533 NL_SET_ERR_MSG(extack, "Detected overlap in the traffic class to queue mapping");
534 return -EINVAL;
535 }
536 }
537 }
538
539 return 0;
540}
541
542static ktime_t taprio_get_start_time(struct Qdisc *sch)
543{
544 struct taprio_sched *q = qdisc_priv(sch);
545 struct sched_entry *entry;
546 ktime_t now, base, cycle;
547 s64 n;
548
549 base = ns_to_ktime(q->base_time);
550 cycle = 0;
551
552 /* Calculate the cycle_time, by summing all the intervals.
553 */
554 list_for_each_entry(entry, &q->entries, list)
555 cycle = ktime_add_ns(cycle, entry->interval);
556
557 if (!cycle)
558 return base;
559
560 now = q->get_time();
561
562 if (ktime_after(base, now))
563 return base;
564
565 /* Schedule the start time for the beginning of the next
566 * cycle.
567 */
568 n = div64_s64(ktime_sub_ns(now, base), cycle);
569
570 return ktime_add_ns(base, (n + 1) * cycle);
571}
572
573static void taprio_start_sched(struct Qdisc *sch, ktime_t start)
574{
575 struct taprio_sched *q = qdisc_priv(sch);
576 struct sched_entry *first;
577 unsigned long flags;
578
579 spin_lock_irqsave(&q->current_entry_lock, flags);
580
581 first = list_first_entry(&q->entries, struct sched_entry,
582 list);
583
584 first->close_time = ktime_add_ns(start, first->interval);
Jakub Kicinski23bddf62019-04-17 13:51:57 -0700585 taprio_set_budget(q, first);
Vinicius Costa Gomes5a781cc2018-09-28 17:59:43 -0700586 rcu_assign_pointer(q->current_entry, NULL);
587
588 spin_unlock_irqrestore(&q->current_entry_lock, flags);
589
590 hrtimer_start(&q->advance_timer, start, HRTIMER_MODE_ABS);
591}
592
Leandro Dorileo7b9eba72019-04-08 10:12:17 -0700593static void taprio_set_picos_per_byte(struct net_device *dev,
594 struct taprio_sched *q)
595{
596 struct ethtool_link_ksettings ecmd;
597 int picos_per_byte = -1;
598
599 if (!__ethtool_get_link_ksettings(dev, &ecmd) &&
600 ecmd.base.speed != SPEED_UNKNOWN)
601 picos_per_byte = div64_s64(NSEC_PER_SEC * 1000LL * 8,
602 ecmd.base.speed * 1000 * 1000);
603
604 atomic64_set(&q->picos_per_byte, picos_per_byte);
605 netdev_dbg(dev, "taprio: set %s's picos_per_byte to: %lld, linkspeed: %d\n",
606 dev->name, (long long)atomic64_read(&q->picos_per_byte),
607 ecmd.base.speed);
608}
609
610static int taprio_dev_notifier(struct notifier_block *nb, unsigned long event,
611 void *ptr)
612{
613 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
614 struct net_device *qdev;
615 struct taprio_sched *q;
616 bool found = false;
617
618 ASSERT_RTNL();
619
620 if (event != NETDEV_UP && event != NETDEV_CHANGE)
621 return NOTIFY_DONE;
622
623 spin_lock(&taprio_list_lock);
624 list_for_each_entry(q, &taprio_list, taprio_list) {
625 qdev = qdisc_dev(q->root);
626 if (qdev == dev) {
627 found = true;
628 break;
629 }
630 }
631 spin_unlock(&taprio_list_lock);
632
633 if (found)
634 taprio_set_picos_per_byte(dev, q);
635
636 return NOTIFY_DONE;
637}
638
Vinicius Costa Gomes5a781cc2018-09-28 17:59:43 -0700639static int taprio_change(struct Qdisc *sch, struct nlattr *opt,
640 struct netlink_ext_ack *extack)
641{
642 struct nlattr *tb[TCA_TAPRIO_ATTR_MAX + 1] = { };
643 struct taprio_sched *q = qdisc_priv(sch);
644 struct net_device *dev = qdisc_dev(sch);
645 struct tc_mqprio_qopt *mqprio = NULL;
Vinicius Costa Gomes5a781cc2018-09-28 17:59:43 -0700646 int i, err, size;
Vinicius Costa Gomes5a781cc2018-09-28 17:59:43 -0700647 ktime_t start;
648
649 err = nla_parse_nested(tb, TCA_TAPRIO_ATTR_MAX, opt,
650 taprio_policy, extack);
651 if (err < 0)
652 return err;
653
654 err = -EINVAL;
655 if (tb[TCA_TAPRIO_ATTR_PRIOMAP])
656 mqprio = nla_data(tb[TCA_TAPRIO_ATTR_PRIOMAP]);
657
658 err = taprio_parse_mqprio_opt(dev, mqprio, extack);
659 if (err < 0)
660 return err;
661
662 /* A schedule with less than one entry is an error */
663 size = parse_taprio_opt(tb, q, extack);
664 if (size < 0)
665 return size;
666
667 hrtimer_init(&q->advance_timer, q->clockid, HRTIMER_MODE_ABS);
668 q->advance_timer.function = advance_sched;
669
670 switch (q->clockid) {
671 case CLOCK_REALTIME:
672 q->get_time = ktime_get_real;
673 break;
674 case CLOCK_MONOTONIC:
675 q->get_time = ktime_get;
676 break;
677 case CLOCK_BOOTTIME:
678 q->get_time = ktime_get_boottime;
679 break;
680 case CLOCK_TAI:
681 q->get_time = ktime_get_clocktai;
682 break;
683 default:
684 return -ENOTSUPP;
685 }
686
687 for (i = 0; i < dev->num_tx_queues; i++) {
688 struct netdev_queue *dev_queue;
689 struct Qdisc *qdisc;
690
691 dev_queue = netdev_get_tx_queue(dev, i);
692 qdisc = qdisc_create_dflt(dev_queue,
693 &pfifo_qdisc_ops,
694 TC_H_MAKE(TC_H_MAJ(sch->handle),
695 TC_H_MIN(i + 1)),
696 extack);
697 if (!qdisc)
698 return -ENOMEM;
699
700 if (i < dev->real_num_tx_queues)
701 qdisc_hash_add(qdisc, false);
702
703 q->qdiscs[i] = qdisc;
704 }
705
706 if (mqprio) {
707 netdev_set_num_tc(dev, mqprio->num_tc);
708 for (i = 0; i < mqprio->num_tc; i++)
709 netdev_set_tc_queue(dev, i,
710 mqprio->count[i],
711 mqprio->offset[i]);
712
713 /* Always use supplied priority mappings */
714 for (i = 0; i < TC_BITMASK + 1; i++)
715 netdev_set_prio_tc_map(dev, i,
716 mqprio->prio_tc_map[i]);
717 }
718
Leandro Dorileo7b9eba72019-04-08 10:12:17 -0700719 taprio_set_picos_per_byte(dev, q);
Vinicius Costa Gomes5a781cc2018-09-28 17:59:43 -0700720 start = taprio_get_start_time(sch);
721 if (!start)
722 return 0;
723
724 taprio_start_sched(sch, start);
725
726 return 0;
727}
728
729static void taprio_destroy(struct Qdisc *sch)
730{
731 struct taprio_sched *q = qdisc_priv(sch);
732 struct net_device *dev = qdisc_dev(sch);
733 struct sched_entry *entry, *n;
734 unsigned int i;
735
Leandro Dorileo7b9eba72019-04-08 10:12:17 -0700736 spin_lock(&taprio_list_lock);
737 list_del(&q->taprio_list);
738 spin_unlock(&taprio_list_lock);
739
Vinicius Costa Gomes5a781cc2018-09-28 17:59:43 -0700740 hrtimer_cancel(&q->advance_timer);
741
742 if (q->qdiscs) {
743 for (i = 0; i < dev->num_tx_queues && q->qdiscs[i]; i++)
744 qdisc_put(q->qdiscs[i]);
745
746 kfree(q->qdiscs);
747 }
748 q->qdiscs = NULL;
749
750 netdev_set_num_tc(dev, 0);
751
752 list_for_each_entry_safe(entry, n, &q->entries, list) {
753 list_del(&entry->list);
754 kfree(entry);
755 }
756}
757
758static int taprio_init(struct Qdisc *sch, struct nlattr *opt,
759 struct netlink_ext_ack *extack)
760{
761 struct taprio_sched *q = qdisc_priv(sch);
762 struct net_device *dev = qdisc_dev(sch);
763
764 INIT_LIST_HEAD(&q->entries);
765 spin_lock_init(&q->current_entry_lock);
766
767 /* We may overwrite the configuration later */
768 hrtimer_init(&q->advance_timer, CLOCK_TAI, HRTIMER_MODE_ABS);
769
770 q->root = sch;
771
772 /* We only support static clockids. Use an invalid value as default
773 * and get the valid one on taprio_change().
774 */
775 q->clockid = -1;
776
777 if (sch->parent != TC_H_ROOT)
778 return -EOPNOTSUPP;
779
780 if (!netif_is_multiqueue(dev))
781 return -EOPNOTSUPP;
782
783 /* pre-allocate qdisc, attachment can't fail */
784 q->qdiscs = kcalloc(dev->num_tx_queues,
785 sizeof(q->qdiscs[0]),
786 GFP_KERNEL);
787
788 if (!q->qdiscs)
789 return -ENOMEM;
790
791 if (!opt)
792 return -EINVAL;
793
Leandro Dorileo7b9eba72019-04-08 10:12:17 -0700794 spin_lock(&taprio_list_lock);
795 list_add(&q->taprio_list, &taprio_list);
796 spin_unlock(&taprio_list_lock);
797
Vinicius Costa Gomes5a781cc2018-09-28 17:59:43 -0700798 return taprio_change(sch, opt, extack);
799}
800
801static struct netdev_queue *taprio_queue_get(struct Qdisc *sch,
802 unsigned long cl)
803{
804 struct net_device *dev = qdisc_dev(sch);
805 unsigned long ntx = cl - 1;
806
807 if (ntx >= dev->num_tx_queues)
808 return NULL;
809
810 return netdev_get_tx_queue(dev, ntx);
811}
812
813static int taprio_graft(struct Qdisc *sch, unsigned long cl,
814 struct Qdisc *new, struct Qdisc **old,
815 struct netlink_ext_ack *extack)
816{
817 struct taprio_sched *q = qdisc_priv(sch);
818 struct net_device *dev = qdisc_dev(sch);
819 struct netdev_queue *dev_queue = taprio_queue_get(sch, cl);
820
821 if (!dev_queue)
822 return -EINVAL;
823
824 if (dev->flags & IFF_UP)
825 dev_deactivate(dev);
826
827 *old = q->qdiscs[cl - 1];
828 q->qdiscs[cl - 1] = new;
829
830 if (new)
831 new->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
832
833 if (dev->flags & IFF_UP)
834 dev_activate(dev);
835
836 return 0;
837}
838
839static int dump_entry(struct sk_buff *msg,
840 const struct sched_entry *entry)
841{
842 struct nlattr *item;
843
844 item = nla_nest_start(msg, TCA_TAPRIO_SCHED_ENTRY);
845 if (!item)
846 return -ENOSPC;
847
848 if (nla_put_u32(msg, TCA_TAPRIO_SCHED_ENTRY_INDEX, entry->index))
849 goto nla_put_failure;
850
851 if (nla_put_u8(msg, TCA_TAPRIO_SCHED_ENTRY_CMD, entry->command))
852 goto nla_put_failure;
853
854 if (nla_put_u32(msg, TCA_TAPRIO_SCHED_ENTRY_GATE_MASK,
855 entry->gate_mask))
856 goto nla_put_failure;
857
858 if (nla_put_u32(msg, TCA_TAPRIO_SCHED_ENTRY_INTERVAL,
859 entry->interval))
860 goto nla_put_failure;
861
862 return nla_nest_end(msg, item);
863
864nla_put_failure:
865 nla_nest_cancel(msg, item);
866 return -1;
867}
868
869static int taprio_dump(struct Qdisc *sch, struct sk_buff *skb)
870{
871 struct taprio_sched *q = qdisc_priv(sch);
872 struct net_device *dev = qdisc_dev(sch);
873 struct tc_mqprio_qopt opt = { 0 };
874 struct nlattr *nest, *entry_list;
875 struct sched_entry *entry;
876 unsigned int i;
877
878 opt.num_tc = netdev_get_num_tc(dev);
879 memcpy(opt.prio_tc_map, dev->prio_tc_map, sizeof(opt.prio_tc_map));
880
881 for (i = 0; i < netdev_get_num_tc(dev); i++) {
882 opt.count[i] = dev->tc_to_txq[i].count;
883 opt.offset[i] = dev->tc_to_txq[i].offset;
884 }
885
886 nest = nla_nest_start(skb, TCA_OPTIONS);
887 if (!nest)
888 return -ENOSPC;
889
890 if (nla_put(skb, TCA_TAPRIO_ATTR_PRIOMAP, sizeof(opt), &opt))
891 goto options_error;
892
893 if (nla_put_s64(skb, TCA_TAPRIO_ATTR_SCHED_BASE_TIME,
894 q->base_time, TCA_TAPRIO_PAD))
895 goto options_error;
896
897 if (nla_put_s32(skb, TCA_TAPRIO_ATTR_SCHED_CLOCKID, q->clockid))
898 goto options_error;
899
900 entry_list = nla_nest_start(skb, TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST);
901 if (!entry_list)
902 goto options_error;
903
904 list_for_each_entry(entry, &q->entries, list) {
905 if (dump_entry(skb, entry) < 0)
906 goto options_error;
907 }
908
909 nla_nest_end(skb, entry_list);
910
911 return nla_nest_end(skb, nest);
912
913options_error:
914 nla_nest_cancel(skb, nest);
915 return -1;
916}
917
918static struct Qdisc *taprio_leaf(struct Qdisc *sch, unsigned long cl)
919{
920 struct netdev_queue *dev_queue = taprio_queue_get(sch, cl);
921
922 if (!dev_queue)
923 return NULL;
924
925 return dev_queue->qdisc_sleeping;
926}
927
928static unsigned long taprio_find(struct Qdisc *sch, u32 classid)
929{
930 unsigned int ntx = TC_H_MIN(classid);
931
932 if (!taprio_queue_get(sch, ntx))
933 return 0;
934 return ntx;
935}
936
937static int taprio_dump_class(struct Qdisc *sch, unsigned long cl,
938 struct sk_buff *skb, struct tcmsg *tcm)
939{
940 struct netdev_queue *dev_queue = taprio_queue_get(sch, cl);
941
942 tcm->tcm_parent = TC_H_ROOT;
943 tcm->tcm_handle |= TC_H_MIN(cl);
944 tcm->tcm_info = dev_queue->qdisc_sleeping->handle;
945
946 return 0;
947}
948
949static int taprio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
950 struct gnet_dump *d)
951 __releases(d->lock)
952 __acquires(d->lock)
953{
954 struct netdev_queue *dev_queue = taprio_queue_get(sch, cl);
955
956 sch = dev_queue->qdisc_sleeping;
957 if (gnet_stats_copy_basic(&sch->running, d, NULL, &sch->bstats) < 0 ||
Paolo Abeni5dd431b2019-03-28 16:53:12 +0100958 qdisc_qstats_copy(d, sch) < 0)
Vinicius Costa Gomes5a781cc2018-09-28 17:59:43 -0700959 return -1;
960 return 0;
961}
962
963static void taprio_walk(struct Qdisc *sch, struct qdisc_walker *arg)
964{
965 struct net_device *dev = qdisc_dev(sch);
966 unsigned long ntx;
967
968 if (arg->stop)
969 return;
970
971 arg->count = arg->skip;
972 for (ntx = arg->skip; ntx < dev->num_tx_queues; ntx++) {
973 if (arg->fn(sch, ntx + 1, arg) < 0) {
974 arg->stop = 1;
975 break;
976 }
977 arg->count++;
978 }
979}
980
981static struct netdev_queue *taprio_select_queue(struct Qdisc *sch,
982 struct tcmsg *tcm)
983{
984 return taprio_queue_get(sch, TC_H_MIN(tcm->tcm_parent));
985}
986
987static const struct Qdisc_class_ops taprio_class_ops = {
988 .graft = taprio_graft,
989 .leaf = taprio_leaf,
990 .find = taprio_find,
991 .walk = taprio_walk,
992 .dump = taprio_dump_class,
993 .dump_stats = taprio_dump_class_stats,
994 .select_queue = taprio_select_queue,
995};
996
997static struct Qdisc_ops taprio_qdisc_ops __read_mostly = {
998 .cl_ops = &taprio_class_ops,
999 .id = "taprio",
1000 .priv_size = sizeof(struct taprio_sched),
1001 .init = taprio_init,
1002 .destroy = taprio_destroy,
1003 .peek = taprio_peek,
1004 .dequeue = taprio_dequeue,
1005 .enqueue = taprio_enqueue,
1006 .dump = taprio_dump,
1007 .owner = THIS_MODULE,
1008};
1009
Leandro Dorileo7b9eba72019-04-08 10:12:17 -07001010static struct notifier_block taprio_device_notifier = {
1011 .notifier_call = taprio_dev_notifier,
1012};
1013
Vinicius Costa Gomes5a781cc2018-09-28 17:59:43 -07001014static int __init taprio_module_init(void)
1015{
Leandro Dorileo7b9eba72019-04-08 10:12:17 -07001016 int err = register_netdevice_notifier(&taprio_device_notifier);
1017
1018 if (err)
1019 return err;
1020
Vinicius Costa Gomes5a781cc2018-09-28 17:59:43 -07001021 return register_qdisc(&taprio_qdisc_ops);
1022}
1023
1024static void __exit taprio_module_exit(void)
1025{
1026 unregister_qdisc(&taprio_qdisc_ops);
Leandro Dorileo7b9eba72019-04-08 10:12:17 -07001027 unregister_netdevice_notifier(&taprio_device_notifier);
Vinicius Costa Gomes5a781cc2018-09-28 17:59:43 -07001028}
1029
1030module_init(taprio_module_init);
1031module_exit(taprio_module_exit);
1032MODULE_LICENSE("GPL");