blob: 72102277449e1bae0cb23db59878d698ee90c353 [file] [log] [blame]
Thomas Gleixner2874c5f2019-05-27 08:55:01 +02001// SPDX-License-Identifier: GPL-2.0-or-later
Linus Torvalds1da177e2005-04-16 15:20:36 -07002/*
3 * net/sched/sch_tbf.c Token Bucket Filter queue.
4 *
Linus Torvalds1da177e2005-04-16 15:20:36 -07005 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
6 * Dmitry Torokhov <dtor@mail.ru> - allow attaching inner qdiscs -
7 * original idea by Martin Devera
Linus Torvalds1da177e2005-04-16 15:20:36 -07008 */
9
Linus Torvalds1da177e2005-04-16 15:20:36 -070010#include <linux/module.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070011#include <linux/types.h>
12#include <linux/kernel.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070013#include <linux/string.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070014#include <linux/errno.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070015#include <linux/skbuff.h>
Patrick McHardy0ba48052007-07-02 22:49:07 -070016#include <net/netlink.h>
Jiri Pirkob757c932013-02-12 00:12:05 +000017#include <net/sch_generic.h>
Petr Machataef6aadc2020-01-24 15:23:06 +020018#include <net/pkt_cls.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070019#include <net/pkt_sched.h>
20
21
22/* Simple Token Bucket Filter.
23 =======================================
24
25 SOURCE.
26 -------
27
28 None.
29
30 Description.
31 ------------
32
33 A data flow obeys TBF with rate R and depth B, if for any
34 time interval t_i...t_f the number of transmitted bits
35 does not exceed B + R*(t_f-t_i).
36
37 Packetized version of this definition:
38 The sequence of packets of sizes s_i served at moments t_i
39 obeys TBF, if for any i<=k:
40
41 s_i+....+s_k <= B + R*(t_k - t_i)
42
43 Algorithm.
44 ----------
45
46 Let N(t_i) be B/R initially and N(t) grow continuously with time as:
47
48 N(t+delta) = min{B/R, N(t) + delta}
49
50 If the first packet in queue has length S, it may be
51 transmitted only at the time t_* when S/R <= N(t_*),
52 and in this case N(t) jumps:
53
54 N(t_* + 0) = N(t_* - 0) - S/R.
55
56
57
58 Actually, QoS requires two TBF to be applied to a data stream.
59 One of them controls steady state burst size, another
60 one with rate P (peak rate) and depth M (equal to link MTU)
61 limits bursts at a smaller time scale.
62
63 It is easy to see that P>R, and B>M. If P is infinity, this double
64 TBF is equivalent to a single one.
65
66 When TBF works in reshaping mode, latency is estimated as:
67
68 lat = max ((L-B)/R, (L-M)/P)
69
70
71 NOTES.
72 ------
73
74 If TBF throttles, it starts a watchdog timer, which will wake it up
75 when it is ready to transmit.
76 Note that the minimal timer resolution is 1/HZ.
77 If no new packets arrive during this period,
78 or if the device is not awaken by EOI for some previous packet,
79 TBF can stop its activity for 1/HZ.
80
81
82 This means, that with depth B, the maximal rate is
83
84 R_crit = B*HZ
85
86 F.e. for 10Mbit ethernet and HZ=100 the minimal allowed B is ~10Kbytes.
87
88 Note that the peak rate TBF is much more tough: with MTU 1500
89 P_crit = 150Kbytes/sec. So, if you need greater peak
90 rates, use alpha with HZ=1000 :-)
91
92 With classful TBF, limit is just kept for backwards compatibility.
93 It is passed to the default bfifo qdisc - if the inner qdisc is
94 changed the limit is not effective anymore.
95*/
96
Eric Dumazetcc7ec452011-01-19 19:26:56 +000097struct tbf_sched_data {
Linus Torvalds1da177e2005-04-16 15:20:36 -070098/* Parameters */
99 u32 limit; /* Maximal length of backlog: bytes */
Hiroaki SHIMODAa135e592014-03-02 17:30:26 +0900100 u32 max_size;
Jiri Pirkob757c932013-02-12 00:12:05 +0000101 s64 buffer; /* Token bucket depth/rate: MUST BE >= MTU/B */
102 s64 mtu;
Jiri Pirkob757c932013-02-12 00:12:05 +0000103 struct psched_ratecfg rate;
104 struct psched_ratecfg peak;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700105
106/* Variables */
Jiri Pirkob757c932013-02-12 00:12:05 +0000107 s64 tokens; /* Current number of B tokens */
108 s64 ptokens; /* Current number of P tokens */
109 s64 t_c; /* Time check-point */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700110 struct Qdisc *qdisc; /* Inner qdisc, default - bfifo queue */
Patrick McHardyf7f593e2007-03-16 01:20:07 -0700111 struct qdisc_watchdog watchdog; /* Watchdog timer */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700112};
113
Eric Dumazete43ac792013-05-21 08:16:46 +0000114
Yang Yingliangcc106e42013-12-10 14:59:27 +0800115/* Time to Length, convert time in ns to length in bytes
116 * to determinate how many bytes can be sent in given time.
117 */
118static u64 psched_ns_t2l(const struct psched_ratecfg *r,
119 u64 time_in_ns)
120{
121 /* The formula is :
122 * len = (time_in_ns * r->rate_bytes_ps) / NSEC_PER_SEC
123 */
124 u64 len = time_in_ns * r->rate_bytes_ps;
125
126 do_div(len, NSEC_PER_SEC);
127
Yang Yingliangd55d2822013-12-12 10:57:22 +0800128 if (unlikely(r->linklayer == TC_LINKLAYER_ATM)) {
129 do_div(len, 53);
130 len = len * 48;
131 }
Yang Yingliangcc106e42013-12-10 14:59:27 +0800132
133 if (len > r->overhead)
134 len -= r->overhead;
135 else
136 len = 0;
137
138 return len;
139}
140
Petr Machataef6aadc2020-01-24 15:23:06 +0200141static void tbf_offload_change(struct Qdisc *sch)
142{
143 struct tbf_sched_data *q = qdisc_priv(sch);
144 struct net_device *dev = qdisc_dev(sch);
145 struct tc_tbf_qopt_offload qopt;
146
147 if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
148 return;
149
150 qopt.command = TC_TBF_REPLACE;
151 qopt.handle = sch->handle;
152 qopt.parent = sch->parent;
153 qopt.replace_params.rate = q->rate;
154 qopt.replace_params.max_size = q->max_size;
155 qopt.replace_params.qstats = &sch->qstats;
156
157 dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_TBF, &qopt);
158}
159
160static void tbf_offload_destroy(struct Qdisc *sch)
161{
162 struct net_device *dev = qdisc_dev(sch);
163 struct tc_tbf_qopt_offload qopt;
164
165 if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
166 return;
167
168 qopt.command = TC_TBF_DESTROY;
169 qopt.handle = sch->handle;
170 qopt.parent = sch->parent;
171 dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_TBF, &qopt);
172}
173
174static int tbf_offload_dump(struct Qdisc *sch)
175{
176 struct tc_tbf_qopt_offload qopt;
177
178 qopt.command = TC_TBF_STATS;
179 qopt.handle = sch->handle;
180 qopt.parent = sch->parent;
181 qopt.stats.bstats = &sch->bstats;
182 qopt.stats.qstats = &sch->qstats;
183
184 return qdisc_offload_dump_helper(sch, TC_SETUP_QDISC_TBF, &qopt);
185}
186
Petr Machata6b3efbf2021-10-19 11:07:04 +0300187static void tbf_offload_graft(struct Qdisc *sch, struct Qdisc *new,
188 struct Qdisc *old, struct netlink_ext_ack *extack)
189{
190 struct tc_tbf_qopt_offload graft_offload = {
191 .handle = sch->handle,
192 .parent = sch->parent,
193 .child_handle = new->handle,
194 .command = TC_TBF_GRAFT,
195 };
196
197 qdisc_offload_graft_helper(qdisc_dev(sch), sch, new, old,
198 TC_SETUP_QDISC_TBF, &graft_offload, extack);
199}
200
Eric Dumazete43ac792013-05-21 08:16:46 +0000201/* GSO packet is too big, segment it so that tbf can transmit
202 * each segment in time
203 */
Eric Dumazet520ac302016-06-21 23:16:49 -0700204static int tbf_segment(struct sk_buff *skb, struct Qdisc *sch,
205 struct sk_buff **to_free)
Eric Dumazete43ac792013-05-21 08:16:46 +0000206{
207 struct tbf_sched_data *q = qdisc_priv(sch);
208 struct sk_buff *segs, *nskb;
209 netdev_features_t features = netif_skb_features(skb);
WANG Cong2ccccf52016-02-25 14:55:01 -0800210 unsigned int len = 0, prev_len = qdisc_pkt_len(skb);
Eric Dumazete43ac792013-05-21 08:16:46 +0000211 int ret, nb;
212
213 segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
214
215 if (IS_ERR_OR_NULL(segs))
Eric Dumazet520ac302016-06-21 23:16:49 -0700216 return qdisc_drop(skb, sch, to_free);
Eric Dumazete43ac792013-05-21 08:16:46 +0000217
218 nb = 0;
Jason A. Donenfeldb950d8a2020-01-13 18:42:30 -0500219 skb_list_walk_safe(segs, segs, nskb) {
David S. Millera8305bf2018-07-29 20:42:53 -0700220 skb_mark_not_on_list(segs);
Eric Dumazet4d0820c2013-11-23 12:59:20 -0800221 qdisc_skb_cb(segs)->pkt_len = segs->len;
WANG Cong2ccccf52016-02-25 14:55:01 -0800222 len += segs->len;
Eric Dumazet520ac302016-06-21 23:16:49 -0700223 ret = qdisc_enqueue(segs, q->qdisc, to_free);
Eric Dumazete43ac792013-05-21 08:16:46 +0000224 if (ret != NET_XMIT_SUCCESS) {
225 if (net_xmit_drop_count(ret))
John Fastabend25331d62014-09-28 11:53:29 -0700226 qdisc_qstats_drop(sch);
Eric Dumazete43ac792013-05-21 08:16:46 +0000227 } else {
228 nb++;
229 }
Eric Dumazete43ac792013-05-21 08:16:46 +0000230 }
231 sch->q.qlen += nb;
232 if (nb > 1)
WANG Cong2ccccf52016-02-25 14:55:01 -0800233 qdisc_tree_reduce_backlog(sch, 1 - nb, prev_len - len);
Eric Dumazete43ac792013-05-21 08:16:46 +0000234 consume_skb(skb);
235 return nb > 0 ? NET_XMIT_SUCCESS : NET_XMIT_DROP;
236}
237
Eric Dumazet520ac302016-06-21 23:16:49 -0700238static int tbf_enqueue(struct sk_buff *skb, struct Qdisc *sch,
239 struct sk_buff **to_free)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700240{
241 struct tbf_sched_data *q = qdisc_priv(sch);
Toke Høiland-Jørgensenf6bab192019-01-09 17:09:42 +0100242 unsigned int len = qdisc_pkt_len(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700243 int ret;
244
Eric Dumazete43ac792013-05-21 08:16:46 +0000245 if (qdisc_pkt_len(skb) > q->max_size) {
Daniel Axtensee78bbe2018-03-01 17:13:38 +1100246 if (skb_is_gso(skb) &&
247 skb_gso_validate_mac_len(skb, q->max_size))
Eric Dumazet520ac302016-06-21 23:16:49 -0700248 return tbf_segment(skb, sch, to_free);
249 return qdisc_drop(skb, sch, to_free);
Eric Dumazete43ac792013-05-21 08:16:46 +0000250 }
Eric Dumazet520ac302016-06-21 23:16:49 -0700251 ret = qdisc_enqueue(skb, q->qdisc, to_free);
Ben Greear9871e502010-08-10 01:45:40 -0700252 if (ret != NET_XMIT_SUCCESS) {
Jarek Poplawski378a2f02008-08-04 22:31:03 -0700253 if (net_xmit_drop_count(ret))
John Fastabend25331d62014-09-28 11:53:29 -0700254 qdisc_qstats_drop(sch);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700255 return ret;
256 }
257
Toke Høiland-Jørgensenf6bab192019-01-09 17:09:42 +0100258 sch->qstats.backlog += len;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700259 sch->q.qlen++;
Ben Greear9871e502010-08-10 01:45:40 -0700260 return NET_XMIT_SUCCESS;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700261}
262
Hiroaki SHIMODAa135e592014-03-02 17:30:26 +0900263static bool tbf_peak_present(const struct tbf_sched_data *q)
264{
265 return q->peak.rate_bytes_ps;
266}
267
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000268static struct sk_buff *tbf_dequeue(struct Qdisc *sch)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700269{
270 struct tbf_sched_data *q = qdisc_priv(sch);
271 struct sk_buff *skb;
272
Jarek Poplawski03c05f02008-10-31 00:46:19 -0700273 skb = q->qdisc->ops->peek(q->qdisc);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700274
275 if (skb) {
Jiri Pirkob757c932013-02-12 00:12:05 +0000276 s64 now;
277 s64 toks;
278 s64 ptoks = 0;
Jussi Kivilinna0abf77e2008-07-20 00:08:27 -0700279 unsigned int len = qdisc_pkt_len(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700280
Eric Dumazetd2de8752014-08-22 18:32:09 -0700281 now = ktime_get_ns();
Jiri Pirkob757c932013-02-12 00:12:05 +0000282 toks = min_t(s64, now - q->t_c, q->buffer);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700283
Hiroaki SHIMODAa135e592014-03-02 17:30:26 +0900284 if (tbf_peak_present(q)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700285 ptoks = toks + q->ptokens;
Jiri Pirkob757c932013-02-12 00:12:05 +0000286 if (ptoks > q->mtu)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700287 ptoks = q->mtu;
Jiri Pirkob757c932013-02-12 00:12:05 +0000288 ptoks -= (s64) psched_l2t_ns(&q->peak, len);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700289 }
290 toks += q->tokens;
Jiri Pirkob757c932013-02-12 00:12:05 +0000291 if (toks > q->buffer)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700292 toks = q->buffer;
Jiri Pirkob757c932013-02-12 00:12:05 +0000293 toks -= (s64) psched_l2t_ns(&q->rate, len);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700294
295 if ((toks|ptoks) >= 0) {
Jarek Poplawski77be1552008-10-31 00:47:01 -0700296 skb = qdisc_dequeue_peeked(q->qdisc);
Jarek Poplawski03c05f02008-10-31 00:46:19 -0700297 if (unlikely(!skb))
298 return NULL;
299
Linus Torvalds1da177e2005-04-16 15:20:36 -0700300 q->t_c = now;
301 q->tokens = toks;
302 q->ptokens = ptoks;
WANG Cong8d5958f2016-06-01 16:15:19 -0700303 qdisc_qstats_backlog_dec(sch, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700304 sch->q.qlen--;
Eric Dumazet9190b3b2011-01-20 23:31:33 -0800305 qdisc_bstats_update(sch, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700306 return skb;
307 }
308
Jiri Pirkob757c932013-02-12 00:12:05 +0000309 qdisc_watchdog_schedule_ns(&q->watchdog,
Eric Dumazet45f50be2016-06-10 16:41:39 -0700310 now + max_t(long, -toks, -ptoks));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700311
312 /* Maybe we have a shorter packet in the queue,
313 which can be sent now. It sounds cool,
314 but, however, this is wrong in principle.
315 We MUST NOT reorder packets under these circumstances.
316
317 Really, if we split the flow into independent
318 subflows, it would be a very good solution.
319 This is the main idea of all FQ algorithms
320 (cf. CSZ, HPFQ, HFSC)
321 */
322
John Fastabend25331d62014-09-28 11:53:29 -0700323 qdisc_qstats_overlimit(sch);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700324 }
325 return NULL;
326}
327
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000328static void tbf_reset(struct Qdisc *sch)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700329{
330 struct tbf_sched_data *q = qdisc_priv(sch);
331
332 qdisc_reset(q->qdisc);
WANG Cong8d5958f2016-06-01 16:15:19 -0700333 sch->qstats.backlog = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700334 sch->q.qlen = 0;
Eric Dumazetd2de8752014-08-22 18:32:09 -0700335 q->t_c = ktime_get_ns();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700336 q->tokens = q->buffer;
337 q->ptokens = q->mtu;
Patrick McHardyf7f593e2007-03-16 01:20:07 -0700338 qdisc_watchdog_cancel(&q->watchdog);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700339}
340
Patrick McHardy27a34212008-01-23 20:35:39 -0800341static const struct nla_policy tbf_policy[TCA_TBF_MAX + 1] = {
342 [TCA_TBF_PARMS] = { .len = sizeof(struct tc_tbf_qopt) },
343 [TCA_TBF_RTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
344 [TCA_TBF_PTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
Yang Yinglianga33c4a22013-11-08 10:23:34 +0800345 [TCA_TBF_RATE64] = { .type = NLA_U64 },
346 [TCA_TBF_PRATE64] = { .type = NLA_U64 },
Yang Yingliang2e04ad42013-12-20 09:24:47 +0800347 [TCA_TBF_BURST] = { .type = NLA_U32 },
348 [TCA_TBF_PBURST] = { .type = NLA_U32 },
Patrick McHardy27a34212008-01-23 20:35:39 -0800349};
350
Alexander Aring20307212017-12-20 12:35:14 -0500351static int tbf_change(struct Qdisc *sch, struct nlattr *opt,
352 struct netlink_ext_ack *extack)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700353{
Patrick McHardycee63722008-01-23 20:33:32 -0800354 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700355 struct tbf_sched_data *q = qdisc_priv(sch);
Yang Yinglianga33c4a22013-11-08 10:23:34 +0800356 struct nlattr *tb[TCA_TBF_MAX + 1];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700357 struct tc_tbf_qopt *qopt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700358 struct Qdisc *child = NULL;
Yang Yingliangcc106e42013-12-10 14:59:27 +0800359 struct psched_ratecfg rate;
360 struct psched_ratecfg peak;
361 u64 max_size;
362 s64 buffer, mtu;
Yang Yinglianga33c4a22013-11-08 10:23:34 +0800363 u64 rate64 = 0, prate64 = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700364
Johannes Berg8cb08172019-04-26 14:07:28 +0200365 err = nla_parse_nested_deprecated(tb, TCA_TBF_MAX, opt, tbf_policy,
366 NULL);
Patrick McHardycee63722008-01-23 20:33:32 -0800367 if (err < 0)
368 return err;
369
370 err = -EINVAL;
Patrick McHardy27a34212008-01-23 20:35:39 -0800371 if (tb[TCA_TBF_PARMS] == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700372 goto done;
373
Patrick McHardy1e904742008-01-22 22:11:17 -0800374 qopt = nla_data(tb[TCA_TBF_PARMS]);
Yang Yingliangcc106e42013-12-10 14:59:27 +0800375 if (qopt->rate.linklayer == TC_LINKLAYER_UNAWARE)
376 qdisc_put_rtab(qdisc_get_rtab(&qopt->rate,
Alexander Aringe9bc3fa2017-12-20 12:35:18 -0500377 tb[TCA_TBF_RTAB],
378 NULL));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700379
Yang Yingliangcc106e42013-12-10 14:59:27 +0800380 if (qopt->peakrate.linklayer == TC_LINKLAYER_UNAWARE)
381 qdisc_put_rtab(qdisc_get_rtab(&qopt->peakrate,
Alexander Aringe9bc3fa2017-12-20 12:35:18 -0500382 tb[TCA_TBF_PTAB],
383 NULL));
Eric Dumazet4d0820c2013-11-23 12:59:20 -0800384
Yang Yingliangcc106e42013-12-10 14:59:27 +0800385 buffer = min_t(u64, PSCHED_TICKS2NS(qopt->buffer), ~0U);
386 mtu = min_t(u64, PSCHED_TICKS2NS(qopt->mtu), ~0U);
387
388 if (tb[TCA_TBF_RATE64])
389 rate64 = nla_get_u64(tb[TCA_TBF_RATE64]);
390 psched_ratecfg_precompute(&rate, &qopt->rate, rate64);
391
Yang Yingliang2e04ad42013-12-20 09:24:47 +0800392 if (tb[TCA_TBF_BURST]) {
393 max_size = nla_get_u32(tb[TCA_TBF_BURST]);
394 buffer = psched_l2t_ns(&rate, max_size);
395 } else {
396 max_size = min_t(u64, psched_ns_t2l(&rate, buffer), ~0U);
397 }
Yang Yingliangcc106e42013-12-10 14:59:27 +0800398
399 if (qopt->peakrate.rate) {
400 if (tb[TCA_TBF_PRATE64])
401 prate64 = nla_get_u64(tb[TCA_TBF_PRATE64]);
402 psched_ratecfg_precompute(&peak, &qopt->peakrate, prate64);
403 if (peak.rate_bytes_ps <= rate.rate_bytes_ps) {
404 pr_warn_ratelimited("sch_tbf: peakrate %llu is lower than or equals to rate %llu !\n",
Yang Yingliang2e04ad42013-12-20 09:24:47 +0800405 peak.rate_bytes_ps, rate.rate_bytes_ps);
Yang Yingliangcc106e42013-12-10 14:59:27 +0800406 err = -EINVAL;
407 goto done;
408 }
409
Yang Yingliang2e04ad42013-12-20 09:24:47 +0800410 if (tb[TCA_TBF_PBURST]) {
411 u32 pburst = nla_get_u32(tb[TCA_TBF_PBURST]);
412 max_size = min_t(u32, max_size, pburst);
413 mtu = psched_l2t_ns(&peak, pburst);
414 } else {
415 max_size = min_t(u64, max_size, psched_ns_t2l(&peak, mtu));
416 }
Hiroaki SHIMODAa135e592014-03-02 17:30:26 +0900417 } else {
418 memset(&peak, 0, sizeof(peak));
Yang Yingliangcc106e42013-12-10 14:59:27 +0800419 }
420
421 if (max_size < psched_mtu(qdisc_dev(sch)))
422 pr_warn_ratelimited("sch_tbf: burst %llu is lower than device %s mtu (%u) !\n",
423 max_size, qdisc_dev(sch)->name,
424 psched_mtu(qdisc_dev(sch)));
425
426 if (!max_size) {
427 err = -EINVAL;
428 goto done;
429 }
430
Hiroaki SHIMODA724b9e12014-02-26 21:43:42 +0900431 if (q->qdisc != &noop_qdisc) {
432 err = fifo_set_limit(q->qdisc, qopt->limit);
433 if (err)
434 goto done;
435 } else if (qopt->limit > 0) {
Alexander Aringa38a98822017-12-20 12:35:21 -0500436 child = fifo_create_dflt(sch, &bfifo_qdisc_ops, qopt->limit,
437 extack);
Hiroaki SHIMODA724b9e12014-02-26 21:43:42 +0900438 if (IS_ERR(child)) {
439 err = PTR_ERR(child);
440 goto done;
441 }
Paolo Abeni44a63b132018-05-18 14:51:44 +0200442
443 /* child is fifo, no need to check for noop_qdisc */
444 qdisc_hash_add(child, true);
Hiroaki SHIMODA724b9e12014-02-26 21:43:42 +0900445 }
446
Linus Torvalds1da177e2005-04-16 15:20:36 -0700447 sch_tree_lock(sch);
Patrick McHardy5e50da02006-11-29 17:36:20 -0800448 if (child) {
Paolo Abenie5f0e8f2019-03-28 16:53:13 +0100449 qdisc_tree_flush_backlog(q->qdisc);
Vlad Buslov86bd4462018-09-24 19:22:50 +0300450 qdisc_put(q->qdisc);
Patrick McHardyb94c8af2008-11-20 04:11:36 -0800451 q->qdisc = child;
Patrick McHardy5e50da02006-11-29 17:36:20 -0800452 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700453 q->limit = qopt->limit;
Yang Yingliang2e04ad42013-12-20 09:24:47 +0800454 if (tb[TCA_TBF_PBURST])
455 q->mtu = mtu;
456 else
457 q->mtu = PSCHED_TICKS2NS(qopt->mtu);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700458 q->max_size = max_size;
Yang Yingliang2e04ad42013-12-20 09:24:47 +0800459 if (tb[TCA_TBF_BURST])
460 q->buffer = buffer;
461 else
462 q->buffer = PSCHED_TICKS2NS(qopt->buffer);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700463 q->tokens = q->buffer;
464 q->ptokens = q->mtu;
Patrick McHardyb94c8af2008-11-20 04:11:36 -0800465
Yang Yingliangcc106e42013-12-10 14:59:27 +0800466 memcpy(&q->rate, &rate, sizeof(struct psched_ratecfg));
Hiroaki SHIMODAa135e592014-03-02 17:30:26 +0900467 memcpy(&q->peak, &peak, sizeof(struct psched_ratecfg));
Patrick McHardyb94c8af2008-11-20 04:11:36 -0800468
Linus Torvalds1da177e2005-04-16 15:20:36 -0700469 sch_tree_unlock(sch);
470 err = 0;
Petr Machataef6aadc2020-01-24 15:23:06 +0200471
472 tbf_offload_change(sch);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700473done:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700474 return err;
475}
476
Alexander Aringe63d7df2017-12-20 12:35:13 -0500477static int tbf_init(struct Qdisc *sch, struct nlattr *opt,
478 struct netlink_ext_ack *extack)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700479{
480 struct tbf_sched_data *q = qdisc_priv(sch);
481
Nikolay Aleksandrovc2d65112017-08-30 12:49:05 +0300482 qdisc_watchdog_init(&q->watchdog, sch);
483 q->qdisc = &noop_qdisc;
484
Alexander Aringac8ef4a2017-12-20 12:35:11 -0500485 if (!opt)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700486 return -EINVAL;
487
Eric Dumazetd2de8752014-08-22 18:32:09 -0700488 q->t_c = ktime_get_ns();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700489
Alexander Aring20307212017-12-20 12:35:14 -0500490 return tbf_change(sch, opt, extack);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700491}
492
493static void tbf_destroy(struct Qdisc *sch)
494{
495 struct tbf_sched_data *q = qdisc_priv(sch);
496
Patrick McHardyf7f593e2007-03-16 01:20:07 -0700497 qdisc_watchdog_cancel(&q->watchdog);
Petr Machataef6aadc2020-01-24 15:23:06 +0200498 tbf_offload_destroy(sch);
Vlad Buslov86bd4462018-09-24 19:22:50 +0300499 qdisc_put(q->qdisc);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700500}
501
502static int tbf_dump(struct Qdisc *sch, struct sk_buff *skb)
503{
504 struct tbf_sched_data *q = qdisc_priv(sch);
Patrick McHardy4b3550ef2008-01-23 20:34:11 -0800505 struct nlattr *nest;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700506 struct tc_tbf_qopt opt;
Petr Machataef6aadc2020-01-24 15:23:06 +0200507 int err;
508
509 err = tbf_offload_dump(sch);
510 if (err)
511 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700512
Michal Kubecekae0be8d2019-04-26 11:13:06 +0200513 nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
Patrick McHardy4b3550ef2008-01-23 20:34:11 -0800514 if (nest == NULL)
515 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700516
517 opt.limit = q->limit;
Eric Dumazet01cb71d2013-06-02 13:55:05 +0000518 psched_ratecfg_getrate(&opt.rate, &q->rate);
Hiroaki SHIMODAa135e592014-03-02 17:30:26 +0900519 if (tbf_peak_present(q))
Eric Dumazet01cb71d2013-06-02 13:55:05 +0000520 psched_ratecfg_getrate(&opt.peakrate, &q->peak);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700521 else
522 memset(&opt.peakrate, 0, sizeof(opt.peakrate));
Jiri Pirkob757c932013-02-12 00:12:05 +0000523 opt.mtu = PSCHED_NS2TICKS(q->mtu);
524 opt.buffer = PSCHED_NS2TICKS(q->buffer);
David S. Miller1b34ec42012-03-29 05:11:39 -0400525 if (nla_put(skb, TCA_TBF_PARMS, sizeof(opt), &opt))
526 goto nla_put_failure;
Yang Yinglianga33c4a22013-11-08 10:23:34 +0800527 if (q->rate.rate_bytes_ps >= (1ULL << 32) &&
Nicolas Dichtel2a51c1e2016-04-25 10:25:15 +0200528 nla_put_u64_64bit(skb, TCA_TBF_RATE64, q->rate.rate_bytes_ps,
529 TCA_TBF_PAD))
Yang Yinglianga33c4a22013-11-08 10:23:34 +0800530 goto nla_put_failure;
Hiroaki SHIMODAa135e592014-03-02 17:30:26 +0900531 if (tbf_peak_present(q) &&
Yang Yinglianga33c4a22013-11-08 10:23:34 +0800532 q->peak.rate_bytes_ps >= (1ULL << 32) &&
Nicolas Dichtel2a51c1e2016-04-25 10:25:15 +0200533 nla_put_u64_64bit(skb, TCA_TBF_PRATE64, q->peak.rate_bytes_ps,
534 TCA_TBF_PAD))
Yang Yinglianga33c4a22013-11-08 10:23:34 +0800535 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700536
Yang Yingliangd59b7d82014-03-12 10:20:32 +0800537 return nla_nest_end(skb, nest);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700538
Patrick McHardy1e904742008-01-22 22:11:17 -0800539nla_put_failure:
Patrick McHardy4b3550ef2008-01-23 20:34:11 -0800540 nla_nest_cancel(skb, nest);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700541 return -1;
542}
543
544static int tbf_dump_class(struct Qdisc *sch, unsigned long cl,
545 struct sk_buff *skb, struct tcmsg *tcm)
546{
547 struct tbf_sched_data *q = qdisc_priv(sch);
548
Linus Torvalds1da177e2005-04-16 15:20:36 -0700549 tcm->tcm_handle |= TC_H_MIN(1);
550 tcm->tcm_info = q->qdisc->handle;
551
552 return 0;
553}
554
555static int tbf_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
Alexander Aring653d6fd2017-12-20 12:35:17 -0500556 struct Qdisc **old, struct netlink_ext_ack *extack)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700557{
558 struct tbf_sched_data *q = qdisc_priv(sch);
559
560 if (new == NULL)
561 new = &noop_qdisc;
562
WANG Cong86a79962016-02-25 14:55:00 -0800563 *old = qdisc_replace(sch, new, &q->qdisc);
Petr Machata6b3efbf2021-10-19 11:07:04 +0300564
565 tbf_offload_graft(sch, new, *old, extack);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700566 return 0;
567}
568
569static struct Qdisc *tbf_leaf(struct Qdisc *sch, unsigned long arg)
570{
571 struct tbf_sched_data *q = qdisc_priv(sch);
572 return q->qdisc;
573}
574
WANG Cong143976c2017-08-24 16:51:29 -0700575static unsigned long tbf_find(struct Qdisc *sch, u32 classid)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700576{
577 return 1;
578}
579
Linus Torvalds1da177e2005-04-16 15:20:36 -0700580static void tbf_walk(struct Qdisc *sch, struct qdisc_walker *walker)
581{
582 if (!walker->stop) {
583 if (walker->count >= walker->skip)
584 if (walker->fn(sch, 1, walker) < 0) {
585 walker->stop = 1;
586 return;
587 }
588 walker->count++;
589 }
590}
591
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000592static const struct Qdisc_class_ops tbf_class_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700593 .graft = tbf_graft,
594 .leaf = tbf_leaf,
WANG Cong143976c2017-08-24 16:51:29 -0700595 .find = tbf_find,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700596 .walk = tbf_walk,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700597 .dump = tbf_dump_class,
598};
599
Eric Dumazet20fea082007-11-14 01:44:41 -0800600static struct Qdisc_ops tbf_qdisc_ops __read_mostly = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700601 .next = NULL,
602 .cl_ops = &tbf_class_ops,
603 .id = "tbf",
604 .priv_size = sizeof(struct tbf_sched_data),
605 .enqueue = tbf_enqueue,
606 .dequeue = tbf_dequeue,
Jarek Poplawski77be1552008-10-31 00:47:01 -0700607 .peek = qdisc_peek_dequeued,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700608 .init = tbf_init,
609 .reset = tbf_reset,
610 .destroy = tbf_destroy,
611 .change = tbf_change,
612 .dump = tbf_dump,
613 .owner = THIS_MODULE,
614};
615
616static int __init tbf_module_init(void)
617{
618 return register_qdisc(&tbf_qdisc_ops);
619}
620
621static void __exit tbf_module_exit(void)
622{
623 unregister_qdisc(&tbf_qdisc_ops);
624}
625module_init(tbf_module_init)
626module_exit(tbf_module_exit)
627MODULE_LICENSE("GPL");