blob: 78e79029dc631a8b0a9985ea23b1568b30e10f96 [file] [log] [blame]
Thomas Gleixner2874c5f2019-05-27 08:55:01 +02001// SPDX-License-Identifier: GPL-2.0-or-later
Linus Torvalds1da177e2005-04-16 15:20:36 -07002/*
3 * net/sched/sch_tbf.c Token Bucket Filter queue.
4 *
Linus Torvalds1da177e2005-04-16 15:20:36 -07005 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
6 * Dmitry Torokhov <dtor@mail.ru> - allow attaching inner qdiscs -
7 * original idea by Martin Devera
Linus Torvalds1da177e2005-04-16 15:20:36 -07008 */
9
Linus Torvalds1da177e2005-04-16 15:20:36 -070010#include <linux/module.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070011#include <linux/types.h>
12#include <linux/kernel.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070013#include <linux/string.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070014#include <linux/errno.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070015#include <linux/skbuff.h>
Patrick McHardy0ba48052007-07-02 22:49:07 -070016#include <net/netlink.h>
Jiri Pirkob757c932013-02-12 00:12:05 +000017#include <net/sch_generic.h>
Petr Machataef6aadc2020-01-24 15:23:06 +020018#include <net/pkt_cls.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070019#include <net/pkt_sched.h>
20
21
22/* Simple Token Bucket Filter.
23 =======================================
24
25 SOURCE.
26 -------
27
28 None.
29
30 Description.
31 ------------
32
33 A data flow obeys TBF with rate R and depth B, if for any
34 time interval t_i...t_f the number of transmitted bits
35 does not exceed B + R*(t_f-t_i).
36
37 Packetized version of this definition:
38 The sequence of packets of sizes s_i served at moments t_i
39 obeys TBF, if for any i<=k:
40
41 s_i+....+s_k <= B + R*(t_k - t_i)
42
43 Algorithm.
44 ----------
45
46 Let N(t_i) be B/R initially and N(t) grow continuously with time as:
47
48 N(t+delta) = min{B/R, N(t) + delta}
49
50 If the first packet in queue has length S, it may be
51 transmitted only at the time t_* when S/R <= N(t_*),
52 and in this case N(t) jumps:
53
54 N(t_* + 0) = N(t_* - 0) - S/R.
55
56
57
58 Actually, QoS requires two TBF to be applied to a data stream.
59 One of them controls steady state burst size, another
60 one with rate P (peak rate) and depth M (equal to link MTU)
61 limits bursts at a smaller time scale.
62
63 It is easy to see that P>R, and B>M. If P is infinity, this double
64 TBF is equivalent to a single one.
65
66 When TBF works in reshaping mode, latency is estimated as:
67
68 lat = max ((L-B)/R, (L-M)/P)
69
70
71 NOTES.
72 ------
73
74 If TBF throttles, it starts a watchdog timer, which will wake it up
75 when it is ready to transmit.
76 Note that the minimal timer resolution is 1/HZ.
77 If no new packets arrive during this period,
78 or if the device is not awaken by EOI for some previous packet,
79 TBF can stop its activity for 1/HZ.
80
81
82 This means, that with depth B, the maximal rate is
83
84 R_crit = B*HZ
85
86 F.e. for 10Mbit ethernet and HZ=100 the minimal allowed B is ~10Kbytes.
87
88 Note that the peak rate TBF is much more tough: with MTU 1500
89 P_crit = 150Kbytes/sec. So, if you need greater peak
90 rates, use alpha with HZ=1000 :-)
91
92 With classful TBF, limit is just kept for backwards compatibility.
93 It is passed to the default bfifo qdisc - if the inner qdisc is
94 changed the limit is not effective anymore.
95*/
96
Eric Dumazetcc7ec452011-01-19 19:26:56 +000097struct tbf_sched_data {
Linus Torvalds1da177e2005-04-16 15:20:36 -070098/* Parameters */
99 u32 limit; /* Maximal length of backlog: bytes */
Hiroaki SHIMODAa135e592014-03-02 17:30:26 +0900100 u32 max_size;
Jiri Pirkob757c932013-02-12 00:12:05 +0000101 s64 buffer; /* Token bucket depth/rate: MUST BE >= MTU/B */
102 s64 mtu;
Jiri Pirkob757c932013-02-12 00:12:05 +0000103 struct psched_ratecfg rate;
104 struct psched_ratecfg peak;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700105
106/* Variables */
Jiri Pirkob757c932013-02-12 00:12:05 +0000107 s64 tokens; /* Current number of B tokens */
108 s64 ptokens; /* Current number of P tokens */
109 s64 t_c; /* Time check-point */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700110 struct Qdisc *qdisc; /* Inner qdisc, default - bfifo queue */
Patrick McHardyf7f593e2007-03-16 01:20:07 -0700111 struct qdisc_watchdog watchdog; /* Watchdog timer */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700112};
113
Eric Dumazete43ac792013-05-21 08:16:46 +0000114
Yang Yingliangcc106e42013-12-10 14:59:27 +0800115/* Time to Length, convert time in ns to length in bytes
116 * to determinate how many bytes can be sent in given time.
117 */
118static u64 psched_ns_t2l(const struct psched_ratecfg *r,
119 u64 time_in_ns)
120{
121 /* The formula is :
122 * len = (time_in_ns * r->rate_bytes_ps) / NSEC_PER_SEC
123 */
124 u64 len = time_in_ns * r->rate_bytes_ps;
125
126 do_div(len, NSEC_PER_SEC);
127
Yang Yingliangd55d2822013-12-12 10:57:22 +0800128 if (unlikely(r->linklayer == TC_LINKLAYER_ATM)) {
129 do_div(len, 53);
130 len = len * 48;
131 }
Yang Yingliangcc106e42013-12-10 14:59:27 +0800132
133 if (len > r->overhead)
134 len -= r->overhead;
135 else
136 len = 0;
137
138 return len;
139}
140
Petr Machataef6aadc2020-01-24 15:23:06 +0200141static void tbf_offload_change(struct Qdisc *sch)
142{
143 struct tbf_sched_data *q = qdisc_priv(sch);
144 struct net_device *dev = qdisc_dev(sch);
145 struct tc_tbf_qopt_offload qopt;
146
147 if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
148 return;
149
150 qopt.command = TC_TBF_REPLACE;
151 qopt.handle = sch->handle;
152 qopt.parent = sch->parent;
153 qopt.replace_params.rate = q->rate;
154 qopt.replace_params.max_size = q->max_size;
155 qopt.replace_params.qstats = &sch->qstats;
156
157 dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_TBF, &qopt);
158}
159
160static void tbf_offload_destroy(struct Qdisc *sch)
161{
162 struct net_device *dev = qdisc_dev(sch);
163 struct tc_tbf_qopt_offload qopt;
164
165 if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
166 return;
167
168 qopt.command = TC_TBF_DESTROY;
169 qopt.handle = sch->handle;
170 qopt.parent = sch->parent;
171 dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_TBF, &qopt);
172}
173
174static int tbf_offload_dump(struct Qdisc *sch)
175{
176 struct tc_tbf_qopt_offload qopt;
177
178 qopt.command = TC_TBF_STATS;
179 qopt.handle = sch->handle;
180 qopt.parent = sch->parent;
181 qopt.stats.bstats = &sch->bstats;
182 qopt.stats.qstats = &sch->qstats;
183
184 return qdisc_offload_dump_helper(sch, TC_SETUP_QDISC_TBF, &qopt);
185}
186
Eric Dumazete43ac792013-05-21 08:16:46 +0000187/* GSO packet is too big, segment it so that tbf can transmit
188 * each segment in time
189 */
Eric Dumazet520ac302016-06-21 23:16:49 -0700190static int tbf_segment(struct sk_buff *skb, struct Qdisc *sch,
191 struct sk_buff **to_free)
Eric Dumazete43ac792013-05-21 08:16:46 +0000192{
193 struct tbf_sched_data *q = qdisc_priv(sch);
194 struct sk_buff *segs, *nskb;
195 netdev_features_t features = netif_skb_features(skb);
WANG Cong2ccccf52016-02-25 14:55:01 -0800196 unsigned int len = 0, prev_len = qdisc_pkt_len(skb);
Eric Dumazete43ac792013-05-21 08:16:46 +0000197 int ret, nb;
198
199 segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
200
201 if (IS_ERR_OR_NULL(segs))
Eric Dumazet520ac302016-06-21 23:16:49 -0700202 return qdisc_drop(skb, sch, to_free);
Eric Dumazete43ac792013-05-21 08:16:46 +0000203
204 nb = 0;
Jason A. Donenfeldb950d8a2020-01-13 18:42:30 -0500205 skb_list_walk_safe(segs, segs, nskb) {
David S. Millera8305bf2018-07-29 20:42:53 -0700206 skb_mark_not_on_list(segs);
Eric Dumazet4d0820c2013-11-23 12:59:20 -0800207 qdisc_skb_cb(segs)->pkt_len = segs->len;
WANG Cong2ccccf52016-02-25 14:55:01 -0800208 len += segs->len;
Eric Dumazet520ac302016-06-21 23:16:49 -0700209 ret = qdisc_enqueue(segs, q->qdisc, to_free);
Eric Dumazete43ac792013-05-21 08:16:46 +0000210 if (ret != NET_XMIT_SUCCESS) {
211 if (net_xmit_drop_count(ret))
John Fastabend25331d62014-09-28 11:53:29 -0700212 qdisc_qstats_drop(sch);
Eric Dumazete43ac792013-05-21 08:16:46 +0000213 } else {
214 nb++;
215 }
Eric Dumazete43ac792013-05-21 08:16:46 +0000216 }
217 sch->q.qlen += nb;
218 if (nb > 1)
WANG Cong2ccccf52016-02-25 14:55:01 -0800219 qdisc_tree_reduce_backlog(sch, 1 - nb, prev_len - len);
Eric Dumazete43ac792013-05-21 08:16:46 +0000220 consume_skb(skb);
221 return nb > 0 ? NET_XMIT_SUCCESS : NET_XMIT_DROP;
222}
223
Eric Dumazet520ac302016-06-21 23:16:49 -0700224static int tbf_enqueue(struct sk_buff *skb, struct Qdisc *sch,
225 struct sk_buff **to_free)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700226{
227 struct tbf_sched_data *q = qdisc_priv(sch);
Toke Høiland-Jørgensenf6bab192019-01-09 17:09:42 +0100228 unsigned int len = qdisc_pkt_len(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700229 int ret;
230
Eric Dumazete43ac792013-05-21 08:16:46 +0000231 if (qdisc_pkt_len(skb) > q->max_size) {
Daniel Axtensee78bbe2018-03-01 17:13:38 +1100232 if (skb_is_gso(skb) &&
233 skb_gso_validate_mac_len(skb, q->max_size))
Eric Dumazet520ac302016-06-21 23:16:49 -0700234 return tbf_segment(skb, sch, to_free);
235 return qdisc_drop(skb, sch, to_free);
Eric Dumazete43ac792013-05-21 08:16:46 +0000236 }
Eric Dumazet520ac302016-06-21 23:16:49 -0700237 ret = qdisc_enqueue(skb, q->qdisc, to_free);
Ben Greear9871e502010-08-10 01:45:40 -0700238 if (ret != NET_XMIT_SUCCESS) {
Jarek Poplawski378a2f02008-08-04 22:31:03 -0700239 if (net_xmit_drop_count(ret))
John Fastabend25331d62014-09-28 11:53:29 -0700240 qdisc_qstats_drop(sch);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700241 return ret;
242 }
243
Toke Høiland-Jørgensenf6bab192019-01-09 17:09:42 +0100244 sch->qstats.backlog += len;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700245 sch->q.qlen++;
Ben Greear9871e502010-08-10 01:45:40 -0700246 return NET_XMIT_SUCCESS;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700247}
248
Hiroaki SHIMODAa135e592014-03-02 17:30:26 +0900249static bool tbf_peak_present(const struct tbf_sched_data *q)
250{
251 return q->peak.rate_bytes_ps;
252}
253
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000254static struct sk_buff *tbf_dequeue(struct Qdisc *sch)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700255{
256 struct tbf_sched_data *q = qdisc_priv(sch);
257 struct sk_buff *skb;
258
Jarek Poplawski03c05f02008-10-31 00:46:19 -0700259 skb = q->qdisc->ops->peek(q->qdisc);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700260
261 if (skb) {
Jiri Pirkob757c932013-02-12 00:12:05 +0000262 s64 now;
263 s64 toks;
264 s64 ptoks = 0;
Jussi Kivilinna0abf77e2008-07-20 00:08:27 -0700265 unsigned int len = qdisc_pkt_len(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700266
Eric Dumazetd2de8752014-08-22 18:32:09 -0700267 now = ktime_get_ns();
Jiri Pirkob757c932013-02-12 00:12:05 +0000268 toks = min_t(s64, now - q->t_c, q->buffer);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700269
Hiroaki SHIMODAa135e592014-03-02 17:30:26 +0900270 if (tbf_peak_present(q)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700271 ptoks = toks + q->ptokens;
Jiri Pirkob757c932013-02-12 00:12:05 +0000272 if (ptoks > q->mtu)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700273 ptoks = q->mtu;
Jiri Pirkob757c932013-02-12 00:12:05 +0000274 ptoks -= (s64) psched_l2t_ns(&q->peak, len);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700275 }
276 toks += q->tokens;
Jiri Pirkob757c932013-02-12 00:12:05 +0000277 if (toks > q->buffer)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700278 toks = q->buffer;
Jiri Pirkob757c932013-02-12 00:12:05 +0000279 toks -= (s64) psched_l2t_ns(&q->rate, len);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700280
281 if ((toks|ptoks) >= 0) {
Jarek Poplawski77be1552008-10-31 00:47:01 -0700282 skb = qdisc_dequeue_peeked(q->qdisc);
Jarek Poplawski03c05f02008-10-31 00:46:19 -0700283 if (unlikely(!skb))
284 return NULL;
285
Linus Torvalds1da177e2005-04-16 15:20:36 -0700286 q->t_c = now;
287 q->tokens = toks;
288 q->ptokens = ptoks;
WANG Cong8d5958f2016-06-01 16:15:19 -0700289 qdisc_qstats_backlog_dec(sch, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700290 sch->q.qlen--;
Eric Dumazet9190b3b2011-01-20 23:31:33 -0800291 qdisc_bstats_update(sch, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700292 return skb;
293 }
294
Jiri Pirkob757c932013-02-12 00:12:05 +0000295 qdisc_watchdog_schedule_ns(&q->watchdog,
Eric Dumazet45f50be2016-06-10 16:41:39 -0700296 now + max_t(long, -toks, -ptoks));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700297
298 /* Maybe we have a shorter packet in the queue,
299 which can be sent now. It sounds cool,
300 but, however, this is wrong in principle.
301 We MUST NOT reorder packets under these circumstances.
302
303 Really, if we split the flow into independent
304 subflows, it would be a very good solution.
305 This is the main idea of all FQ algorithms
306 (cf. CSZ, HPFQ, HFSC)
307 */
308
John Fastabend25331d62014-09-28 11:53:29 -0700309 qdisc_qstats_overlimit(sch);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700310 }
311 return NULL;
312}
313
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000314static void tbf_reset(struct Qdisc *sch)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700315{
316 struct tbf_sched_data *q = qdisc_priv(sch);
317
318 qdisc_reset(q->qdisc);
WANG Cong8d5958f2016-06-01 16:15:19 -0700319 sch->qstats.backlog = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700320 sch->q.qlen = 0;
Eric Dumazetd2de8752014-08-22 18:32:09 -0700321 q->t_c = ktime_get_ns();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700322 q->tokens = q->buffer;
323 q->ptokens = q->mtu;
Patrick McHardyf7f593e2007-03-16 01:20:07 -0700324 qdisc_watchdog_cancel(&q->watchdog);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700325}
326
Patrick McHardy27a34212008-01-23 20:35:39 -0800327static const struct nla_policy tbf_policy[TCA_TBF_MAX + 1] = {
328 [TCA_TBF_PARMS] = { .len = sizeof(struct tc_tbf_qopt) },
329 [TCA_TBF_RTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
330 [TCA_TBF_PTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
Yang Yinglianga33c4a22013-11-08 10:23:34 +0800331 [TCA_TBF_RATE64] = { .type = NLA_U64 },
332 [TCA_TBF_PRATE64] = { .type = NLA_U64 },
Yang Yingliang2e04ad42013-12-20 09:24:47 +0800333 [TCA_TBF_BURST] = { .type = NLA_U32 },
334 [TCA_TBF_PBURST] = { .type = NLA_U32 },
Patrick McHardy27a34212008-01-23 20:35:39 -0800335};
336
Alexander Aring20307212017-12-20 12:35:14 -0500337static int tbf_change(struct Qdisc *sch, struct nlattr *opt,
338 struct netlink_ext_ack *extack)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700339{
Patrick McHardycee63722008-01-23 20:33:32 -0800340 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700341 struct tbf_sched_data *q = qdisc_priv(sch);
Yang Yinglianga33c4a22013-11-08 10:23:34 +0800342 struct nlattr *tb[TCA_TBF_MAX + 1];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700343 struct tc_tbf_qopt *qopt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700344 struct Qdisc *child = NULL;
Yang Yingliangcc106e42013-12-10 14:59:27 +0800345 struct psched_ratecfg rate;
346 struct psched_ratecfg peak;
347 u64 max_size;
348 s64 buffer, mtu;
Yang Yinglianga33c4a22013-11-08 10:23:34 +0800349 u64 rate64 = 0, prate64 = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700350
Johannes Berg8cb08172019-04-26 14:07:28 +0200351 err = nla_parse_nested_deprecated(tb, TCA_TBF_MAX, opt, tbf_policy,
352 NULL);
Patrick McHardycee63722008-01-23 20:33:32 -0800353 if (err < 0)
354 return err;
355
356 err = -EINVAL;
Patrick McHardy27a34212008-01-23 20:35:39 -0800357 if (tb[TCA_TBF_PARMS] == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700358 goto done;
359
Patrick McHardy1e904742008-01-22 22:11:17 -0800360 qopt = nla_data(tb[TCA_TBF_PARMS]);
Yang Yingliangcc106e42013-12-10 14:59:27 +0800361 if (qopt->rate.linklayer == TC_LINKLAYER_UNAWARE)
362 qdisc_put_rtab(qdisc_get_rtab(&qopt->rate,
Alexander Aringe9bc3fa2017-12-20 12:35:18 -0500363 tb[TCA_TBF_RTAB],
364 NULL));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700365
Yang Yingliangcc106e42013-12-10 14:59:27 +0800366 if (qopt->peakrate.linklayer == TC_LINKLAYER_UNAWARE)
367 qdisc_put_rtab(qdisc_get_rtab(&qopt->peakrate,
Alexander Aringe9bc3fa2017-12-20 12:35:18 -0500368 tb[TCA_TBF_PTAB],
369 NULL));
Eric Dumazet4d0820c2013-11-23 12:59:20 -0800370
Yang Yingliangcc106e42013-12-10 14:59:27 +0800371 buffer = min_t(u64, PSCHED_TICKS2NS(qopt->buffer), ~0U);
372 mtu = min_t(u64, PSCHED_TICKS2NS(qopt->mtu), ~0U);
373
374 if (tb[TCA_TBF_RATE64])
375 rate64 = nla_get_u64(tb[TCA_TBF_RATE64]);
376 psched_ratecfg_precompute(&rate, &qopt->rate, rate64);
377
Yang Yingliang2e04ad42013-12-20 09:24:47 +0800378 if (tb[TCA_TBF_BURST]) {
379 max_size = nla_get_u32(tb[TCA_TBF_BURST]);
380 buffer = psched_l2t_ns(&rate, max_size);
381 } else {
382 max_size = min_t(u64, psched_ns_t2l(&rate, buffer), ~0U);
383 }
Yang Yingliangcc106e42013-12-10 14:59:27 +0800384
385 if (qopt->peakrate.rate) {
386 if (tb[TCA_TBF_PRATE64])
387 prate64 = nla_get_u64(tb[TCA_TBF_PRATE64]);
388 psched_ratecfg_precompute(&peak, &qopt->peakrate, prate64);
389 if (peak.rate_bytes_ps <= rate.rate_bytes_ps) {
390 pr_warn_ratelimited("sch_tbf: peakrate %llu is lower than or equals to rate %llu !\n",
Yang Yingliang2e04ad42013-12-20 09:24:47 +0800391 peak.rate_bytes_ps, rate.rate_bytes_ps);
Yang Yingliangcc106e42013-12-10 14:59:27 +0800392 err = -EINVAL;
393 goto done;
394 }
395
Yang Yingliang2e04ad42013-12-20 09:24:47 +0800396 if (tb[TCA_TBF_PBURST]) {
397 u32 pburst = nla_get_u32(tb[TCA_TBF_PBURST]);
398 max_size = min_t(u32, max_size, pburst);
399 mtu = psched_l2t_ns(&peak, pburst);
400 } else {
401 max_size = min_t(u64, max_size, psched_ns_t2l(&peak, mtu));
402 }
Hiroaki SHIMODAa135e592014-03-02 17:30:26 +0900403 } else {
404 memset(&peak, 0, sizeof(peak));
Yang Yingliangcc106e42013-12-10 14:59:27 +0800405 }
406
407 if (max_size < psched_mtu(qdisc_dev(sch)))
408 pr_warn_ratelimited("sch_tbf: burst %llu is lower than device %s mtu (%u) !\n",
409 max_size, qdisc_dev(sch)->name,
410 psched_mtu(qdisc_dev(sch)));
411
412 if (!max_size) {
413 err = -EINVAL;
414 goto done;
415 }
416
Hiroaki SHIMODA724b9e12014-02-26 21:43:42 +0900417 if (q->qdisc != &noop_qdisc) {
418 err = fifo_set_limit(q->qdisc, qopt->limit);
419 if (err)
420 goto done;
421 } else if (qopt->limit > 0) {
Alexander Aringa38a98822017-12-20 12:35:21 -0500422 child = fifo_create_dflt(sch, &bfifo_qdisc_ops, qopt->limit,
423 extack);
Hiroaki SHIMODA724b9e12014-02-26 21:43:42 +0900424 if (IS_ERR(child)) {
425 err = PTR_ERR(child);
426 goto done;
427 }
Paolo Abeni44a63b132018-05-18 14:51:44 +0200428
429 /* child is fifo, no need to check for noop_qdisc */
430 qdisc_hash_add(child, true);
Hiroaki SHIMODA724b9e12014-02-26 21:43:42 +0900431 }
432
Linus Torvalds1da177e2005-04-16 15:20:36 -0700433 sch_tree_lock(sch);
Patrick McHardy5e50da02006-11-29 17:36:20 -0800434 if (child) {
Paolo Abenie5f0e8f2019-03-28 16:53:13 +0100435 qdisc_tree_flush_backlog(q->qdisc);
Vlad Buslov86bd4462018-09-24 19:22:50 +0300436 qdisc_put(q->qdisc);
Patrick McHardyb94c8af2008-11-20 04:11:36 -0800437 q->qdisc = child;
Patrick McHardy5e50da02006-11-29 17:36:20 -0800438 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700439 q->limit = qopt->limit;
Yang Yingliang2e04ad42013-12-20 09:24:47 +0800440 if (tb[TCA_TBF_PBURST])
441 q->mtu = mtu;
442 else
443 q->mtu = PSCHED_TICKS2NS(qopt->mtu);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700444 q->max_size = max_size;
Yang Yingliang2e04ad42013-12-20 09:24:47 +0800445 if (tb[TCA_TBF_BURST])
446 q->buffer = buffer;
447 else
448 q->buffer = PSCHED_TICKS2NS(qopt->buffer);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700449 q->tokens = q->buffer;
450 q->ptokens = q->mtu;
Patrick McHardyb94c8af2008-11-20 04:11:36 -0800451
Yang Yingliangcc106e42013-12-10 14:59:27 +0800452 memcpy(&q->rate, &rate, sizeof(struct psched_ratecfg));
Hiroaki SHIMODAa135e592014-03-02 17:30:26 +0900453 memcpy(&q->peak, &peak, sizeof(struct psched_ratecfg));
Patrick McHardyb94c8af2008-11-20 04:11:36 -0800454
Linus Torvalds1da177e2005-04-16 15:20:36 -0700455 sch_tree_unlock(sch);
456 err = 0;
Petr Machataef6aadc2020-01-24 15:23:06 +0200457
458 tbf_offload_change(sch);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700459done:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700460 return err;
461}
462
Alexander Aringe63d7df2017-12-20 12:35:13 -0500463static int tbf_init(struct Qdisc *sch, struct nlattr *opt,
464 struct netlink_ext_ack *extack)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700465{
466 struct tbf_sched_data *q = qdisc_priv(sch);
467
Nikolay Aleksandrovc2d65112017-08-30 12:49:05 +0300468 qdisc_watchdog_init(&q->watchdog, sch);
469 q->qdisc = &noop_qdisc;
470
Alexander Aringac8ef4a2017-12-20 12:35:11 -0500471 if (!opt)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700472 return -EINVAL;
473
Eric Dumazetd2de8752014-08-22 18:32:09 -0700474 q->t_c = ktime_get_ns();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700475
Alexander Aring20307212017-12-20 12:35:14 -0500476 return tbf_change(sch, opt, extack);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700477}
478
479static void tbf_destroy(struct Qdisc *sch)
480{
481 struct tbf_sched_data *q = qdisc_priv(sch);
482
Patrick McHardyf7f593e2007-03-16 01:20:07 -0700483 qdisc_watchdog_cancel(&q->watchdog);
Petr Machataef6aadc2020-01-24 15:23:06 +0200484 tbf_offload_destroy(sch);
Vlad Buslov86bd4462018-09-24 19:22:50 +0300485 qdisc_put(q->qdisc);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700486}
487
488static int tbf_dump(struct Qdisc *sch, struct sk_buff *skb)
489{
490 struct tbf_sched_data *q = qdisc_priv(sch);
Patrick McHardy4b3550ef2008-01-23 20:34:11 -0800491 struct nlattr *nest;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700492 struct tc_tbf_qopt opt;
Petr Machataef6aadc2020-01-24 15:23:06 +0200493 int err;
494
495 err = tbf_offload_dump(sch);
496 if (err)
497 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700498
Michal Kubecekae0be8d2019-04-26 11:13:06 +0200499 nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
Patrick McHardy4b3550ef2008-01-23 20:34:11 -0800500 if (nest == NULL)
501 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700502
503 opt.limit = q->limit;
Eric Dumazet01cb71d2013-06-02 13:55:05 +0000504 psched_ratecfg_getrate(&opt.rate, &q->rate);
Hiroaki SHIMODAa135e592014-03-02 17:30:26 +0900505 if (tbf_peak_present(q))
Eric Dumazet01cb71d2013-06-02 13:55:05 +0000506 psched_ratecfg_getrate(&opt.peakrate, &q->peak);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700507 else
508 memset(&opt.peakrate, 0, sizeof(opt.peakrate));
Jiri Pirkob757c932013-02-12 00:12:05 +0000509 opt.mtu = PSCHED_NS2TICKS(q->mtu);
510 opt.buffer = PSCHED_NS2TICKS(q->buffer);
David S. Miller1b34ec42012-03-29 05:11:39 -0400511 if (nla_put(skb, TCA_TBF_PARMS, sizeof(opt), &opt))
512 goto nla_put_failure;
Yang Yinglianga33c4a22013-11-08 10:23:34 +0800513 if (q->rate.rate_bytes_ps >= (1ULL << 32) &&
Nicolas Dichtel2a51c1e2016-04-25 10:25:15 +0200514 nla_put_u64_64bit(skb, TCA_TBF_RATE64, q->rate.rate_bytes_ps,
515 TCA_TBF_PAD))
Yang Yinglianga33c4a22013-11-08 10:23:34 +0800516 goto nla_put_failure;
Hiroaki SHIMODAa135e592014-03-02 17:30:26 +0900517 if (tbf_peak_present(q) &&
Yang Yinglianga33c4a22013-11-08 10:23:34 +0800518 q->peak.rate_bytes_ps >= (1ULL << 32) &&
Nicolas Dichtel2a51c1e2016-04-25 10:25:15 +0200519 nla_put_u64_64bit(skb, TCA_TBF_PRATE64, q->peak.rate_bytes_ps,
520 TCA_TBF_PAD))
Yang Yinglianga33c4a22013-11-08 10:23:34 +0800521 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700522
Yang Yingliangd59b7d82014-03-12 10:20:32 +0800523 return nla_nest_end(skb, nest);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700524
Patrick McHardy1e904742008-01-22 22:11:17 -0800525nla_put_failure:
Patrick McHardy4b3550ef2008-01-23 20:34:11 -0800526 nla_nest_cancel(skb, nest);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700527 return -1;
528}
529
530static int tbf_dump_class(struct Qdisc *sch, unsigned long cl,
531 struct sk_buff *skb, struct tcmsg *tcm)
532{
533 struct tbf_sched_data *q = qdisc_priv(sch);
534
Linus Torvalds1da177e2005-04-16 15:20:36 -0700535 tcm->tcm_handle |= TC_H_MIN(1);
536 tcm->tcm_info = q->qdisc->handle;
537
538 return 0;
539}
540
541static int tbf_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
Alexander Aring653d6fd2017-12-20 12:35:17 -0500542 struct Qdisc **old, struct netlink_ext_ack *extack)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700543{
544 struct tbf_sched_data *q = qdisc_priv(sch);
545
546 if (new == NULL)
547 new = &noop_qdisc;
548
WANG Cong86a79962016-02-25 14:55:00 -0800549 *old = qdisc_replace(sch, new, &q->qdisc);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700550 return 0;
551}
552
553static struct Qdisc *tbf_leaf(struct Qdisc *sch, unsigned long arg)
554{
555 struct tbf_sched_data *q = qdisc_priv(sch);
556 return q->qdisc;
557}
558
WANG Cong143976c2017-08-24 16:51:29 -0700559static unsigned long tbf_find(struct Qdisc *sch, u32 classid)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700560{
561 return 1;
562}
563
Linus Torvalds1da177e2005-04-16 15:20:36 -0700564static void tbf_walk(struct Qdisc *sch, struct qdisc_walker *walker)
565{
566 if (!walker->stop) {
567 if (walker->count >= walker->skip)
568 if (walker->fn(sch, 1, walker) < 0) {
569 walker->stop = 1;
570 return;
571 }
572 walker->count++;
573 }
574}
575
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000576static const struct Qdisc_class_ops tbf_class_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700577 .graft = tbf_graft,
578 .leaf = tbf_leaf,
WANG Cong143976c2017-08-24 16:51:29 -0700579 .find = tbf_find,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700580 .walk = tbf_walk,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700581 .dump = tbf_dump_class,
582};
583
Eric Dumazet20fea082007-11-14 01:44:41 -0800584static struct Qdisc_ops tbf_qdisc_ops __read_mostly = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700585 .next = NULL,
586 .cl_ops = &tbf_class_ops,
587 .id = "tbf",
588 .priv_size = sizeof(struct tbf_sched_data),
589 .enqueue = tbf_enqueue,
590 .dequeue = tbf_dequeue,
Jarek Poplawski77be1552008-10-31 00:47:01 -0700591 .peek = qdisc_peek_dequeued,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700592 .init = tbf_init,
593 .reset = tbf_reset,
594 .destroy = tbf_destroy,
595 .change = tbf_change,
596 .dump = tbf_dump,
597 .owner = THIS_MODULE,
598};
599
600static int __init tbf_module_init(void)
601{
602 return register_qdisc(&tbf_qdisc_ops);
603}
604
605static void __exit tbf_module_exit(void)
606{
607 unregister_qdisc(&tbf_qdisc_ops);
608}
609module_init(tbf_module_init)
610module_exit(tbf_module_exit)
611MODULE_LICENSE("GPL");