blob: 5d571aa04a76c92db8901a13384bb608f9fa9f54 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * net/sched/sch_netem.c Network emulator
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
Stephen Hemminger798b6b12006-10-22 20:16:57 -07007 * 2 of the License.
Linus Torvalds1da177e2005-04-16 15:20:36 -07008 *
9 * Many of the algorithms and ideas for this came from
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +090010 * NIST Net which is not copyrighted.
Linus Torvalds1da177e2005-04-16 15:20:36 -070011 *
12 * Authors: Stephen Hemminger <shemminger@osdl.org>
13 * Catalin(ux aka Dino) BOIE <catab at umbrella dot ro>
14 */
15
Linus Torvalds1da177e2005-04-16 15:20:36 -070016#include <linux/module.h>
17#include <linux/bitops.h>
18#include <linux/types.h>
19#include <linux/kernel.h>
20#include <linux/errno.h>
21#include <linux/netdevice.h>
22#include <linux/skbuff.h>
23#include <linux/rtnetlink.h>
24
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -070025#include <net/netlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070026#include <net/pkt_sched.h>
27
Stephen Hemmingerc865e5d2005-12-21 19:03:44 -080028#define VERSION "1.2"
Stephen Hemmingereb229c42005-11-03 13:49:01 -080029
Linus Torvalds1da177e2005-04-16 15:20:36 -070030/* Network Emulation Queuing algorithm.
31 ====================================
32
33 Sources: [1] Mark Carson, Darrin Santay, "NIST Net - A Linux-based
34 Network Emulation Tool
35 [2] Luigi Rizzo, DummyNet for FreeBSD
36
37 ----------------------------------------------------------------
38
39 This started out as a simple way to delay outgoing packets to
40 test TCP but has grown to include most of the functionality
41 of a full blown network emulator like NISTnet. It can delay
42 packets and add random jitter (and correlation). The random
43 distribution can be loaded from a table as well to provide
44 normal, Pareto, or experimental curves. Packet loss,
45 duplication, and reordering can also be emulated.
46
47 This qdisc does not do classification that can be handled in
48 layering other disciplines. It does not need to do bandwidth
49 control either since that can be handled by using token
50 bucket or other rate control.
51
52 The simulator is limited by the Linux timer resolution
53 and will create packet bursts on the HZ boundary (1ms).
54*/
55
56struct netem_sched_data {
57 struct Qdisc *qdisc;
Patrick McHardy59cb5c62007-03-16 01:20:31 -070058 struct qdisc_watchdog watchdog;
Linus Torvalds1da177e2005-04-16 15:20:36 -070059
Stephen Hemmingerb4076212007-03-22 12:16:21 -070060 psched_tdiff_t latency;
61 psched_tdiff_t jitter;
62
Linus Torvalds1da177e2005-04-16 15:20:36 -070063 u32 loss;
64 u32 limit;
65 u32 counter;
66 u32 gap;
Linus Torvalds1da177e2005-04-16 15:20:36 -070067 u32 duplicate;
Stephen Hemminger0dca51d2005-05-26 12:55:48 -070068 u32 reorder;
Stephen Hemmingerc865e5d2005-12-21 19:03:44 -080069 u32 corrupt;
Linus Torvalds1da177e2005-04-16 15:20:36 -070070
71 struct crndstate {
Stephen Hemmingerb4076212007-03-22 12:16:21 -070072 u32 last;
73 u32 rho;
Stephen Hemmingerc865e5d2005-12-21 19:03:44 -080074 } delay_cor, loss_cor, dup_cor, reorder_cor, corrupt_cor;
Linus Torvalds1da177e2005-04-16 15:20:36 -070075
76 struct disttable {
77 u32 size;
78 s16 table[0];
79 } *delay_dist;
80};
81
82/* Time stamp put into socket buffer control block */
83struct netem_skb_cb {
84 psched_time_t time_to_send;
85};
86
87/* init_crandom - initialize correlated random number generator
88 * Use entropy source for initial seed.
89 */
90static void init_crandom(struct crndstate *state, unsigned long rho)
91{
92 state->rho = rho;
93 state->last = net_random();
94}
95
96/* get_crandom - correlated random number generator
97 * Next number depends on last value.
98 * rho is scaled to avoid floating point.
99 */
Stephen Hemmingerb4076212007-03-22 12:16:21 -0700100static u32 get_crandom(struct crndstate *state)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700101{
102 u64 value, rho;
103 unsigned long answer;
104
Stephen Hemmingerbb2f8cc2007-03-23 00:12:09 -0700105 if (state->rho == 0) /* no correlation */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700106 return net_random();
107
108 value = net_random();
109 rho = (u64)state->rho + 1;
110 answer = (value * ((1ull<<32) - rho) + state->last * rho) >> 32;
111 state->last = answer;
112 return answer;
113}
114
115/* tabledist - return a pseudo-randomly distributed value with mean mu and
116 * std deviation sigma. Uses table lookup to approximate the desired
117 * distribution, and a uniformly-distributed pseudo-random source.
118 */
Stephen Hemmingerb4076212007-03-22 12:16:21 -0700119static psched_tdiff_t tabledist(psched_tdiff_t mu, psched_tdiff_t sigma,
120 struct crndstate *state,
121 const struct disttable *dist)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700122{
Stephen Hemmingerb4076212007-03-22 12:16:21 -0700123 psched_tdiff_t x;
124 long t;
125 u32 rnd;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700126
127 if (sigma == 0)
128 return mu;
129
130 rnd = get_crandom(state);
131
132 /* default uniform distribution */
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900133 if (dist == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700134 return (rnd % (2*sigma)) - sigma + mu;
135
136 t = dist->table[rnd % dist->size];
137 x = (sigma % NETEM_DIST_SCALE) * t;
138 if (x >= 0)
139 x += NETEM_DIST_SCALE/2;
140 else
141 x -= NETEM_DIST_SCALE/2;
142
143 return x / NETEM_DIST_SCALE + (sigma / NETEM_DIST_SCALE) * t + mu;
144}
145
Stephen Hemminger0afb51e2005-05-26 12:53:49 -0700146/*
147 * Insert one skb into qdisc.
148 * Note: parent depends on return value to account for queue length.
149 * NET_XMIT_DROP: queue length didn't change.
150 * NET_XMIT_SUCCESS: one skb was queued.
151 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700152static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
153{
154 struct netem_sched_data *q = qdisc_priv(sch);
Guillaume Chazarain89e1df72006-07-21 14:45:25 -0700155 /* We don't fill cb now as skb_unshare() may invalidate it */
156 struct netem_skb_cb *cb;
Stephen Hemminger0afb51e2005-05-26 12:53:49 -0700157 struct sk_buff *skb2;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700158 int ret;
Stephen Hemminger0afb51e2005-05-26 12:53:49 -0700159 int count = 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700160
Stephen Hemminger771018e2005-05-03 16:24:32 -0700161 pr_debug("netem_enqueue skb=%p\n", skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700162
Stephen Hemminger0afb51e2005-05-26 12:53:49 -0700163 /* Random duplication */
164 if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor))
165 ++count;
166
Linus Torvalds1da177e2005-04-16 15:20:36 -0700167 /* Random packet drop 0 => none, ~0 => all */
Stephen Hemminger0afb51e2005-05-26 12:53:49 -0700168 if (q->loss && q->loss >= get_crandom(&q->loss_cor))
169 --count;
170
171 if (count == 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700172 sch->qstats.drops++;
173 kfree_skb(skb);
Stephen Hemminger89bbb0a32006-04-28 12:11:36 -0700174 return NET_XMIT_BYPASS;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700175 }
176
David S. Miller4e8a5202006-10-22 21:00:33 -0700177 skb_orphan(skb);
178
Stephen Hemminger0afb51e2005-05-26 12:53:49 -0700179 /*
180 * If we need to duplicate packet, then re-insert at top of the
181 * qdisc tree, since parent queuer expects that only one
182 * skb will be queued.
183 */
184 if (count > 1 && (skb2 = skb_clone(skb, GFP_ATOMIC)) != NULL) {
185 struct Qdisc *rootq = sch->dev->qdisc;
186 u32 dupsave = q->duplicate; /* prevent duplicating a dup... */
187 q->duplicate = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700188
Stephen Hemminger0afb51e2005-05-26 12:53:49 -0700189 rootq->enqueue(skb2, rootq);
190 q->duplicate = dupsave;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700191 }
192
Stephen Hemmingerc865e5d2005-12-21 19:03:44 -0800193 /*
194 * Randomized packet corruption.
195 * Make copy if needed since we are modifying
196 * If packet is going to be hardware checksummed, then
197 * do it now in software before we mangle it.
198 */
199 if (q->corrupt && q->corrupt >= get_crandom(&q->corrupt_cor)) {
200 if (!(skb = skb_unshare(skb, GFP_ATOMIC))
Patrick McHardy84fa7932006-08-29 16:44:56 -0700201 || (skb->ip_summed == CHECKSUM_PARTIAL
202 && skb_checksum_help(skb))) {
Stephen Hemmingerc865e5d2005-12-21 19:03:44 -0800203 sch->qstats.drops++;
204 return NET_XMIT_DROP;
205 }
206
207 skb->data[net_random() % skb_headlen(skb)] ^= 1<<(net_random() % 8);
208 }
209
Guillaume Chazarain89e1df72006-07-21 14:45:25 -0700210 cb = (struct netem_skb_cb *)skb->cb;
Stephen Hemminger0dca51d2005-05-26 12:55:48 -0700211 if (q->gap == 0 /* not doing reordering */
212 || q->counter < q->gap /* inside last reordering gap */
213 || q->reorder < get_crandom(&q->reorder_cor)) {
Stephen Hemminger0f9f32a2005-05-26 12:55:01 -0700214 psched_time_t now;
Stephen Hemminger07aaa112005-11-03 13:43:07 -0800215 psched_tdiff_t delay;
216
217 delay = tabledist(q->latency, q->jitter,
218 &q->delay_cor, q->delay_dist);
219
Stephen Hemminger0f9f32a2005-05-26 12:55:01 -0700220 PSCHED_GET_TIME(now);
Patrick McHardy7c59e252007-03-23 11:27:45 -0700221 cb->time_to_send = now + delay;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700222 ++q->counter;
223 ret = q->qdisc->enqueue(skb, q->qdisc);
224 } else {
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900225 /*
Stephen Hemminger0dca51d2005-05-26 12:55:48 -0700226 * Do re-ordering by putting one out of N packets at the front
227 * of the queue.
228 */
Stephen Hemminger0f9f32a2005-05-26 12:55:01 -0700229 PSCHED_GET_TIME(cb->time_to_send);
Stephen Hemminger0dca51d2005-05-26 12:55:48 -0700230 q->counter = 0;
Stephen Hemminger0f9f32a2005-05-26 12:55:01 -0700231 ret = q->qdisc->ops->requeue(skb, q->qdisc);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700232 }
233
234 if (likely(ret == NET_XMIT_SUCCESS)) {
235 sch->q.qlen++;
236 sch->bstats.bytes += skb->len;
237 sch->bstats.packets++;
238 } else
239 sch->qstats.drops++;
240
Stephen Hemmingerd5d75cd2005-05-03 16:24:57 -0700241 pr_debug("netem: enqueue ret %d\n", ret);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700242 return ret;
243}
244
245/* Requeue packets but don't change time stamp */
246static int netem_requeue(struct sk_buff *skb, struct Qdisc *sch)
247{
248 struct netem_sched_data *q = qdisc_priv(sch);
249 int ret;
250
251 if ((ret = q->qdisc->ops->requeue(skb, q->qdisc)) == 0) {
252 sch->q.qlen++;
253 sch->qstats.requeues++;
254 }
255
256 return ret;
257}
258
259static unsigned int netem_drop(struct Qdisc* sch)
260{
261 struct netem_sched_data *q = qdisc_priv(sch);
Patrick McHardy6d037a22006-03-20 19:00:49 -0800262 unsigned int len = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700263
Patrick McHardy6d037a22006-03-20 19:00:49 -0800264 if (q->qdisc->ops->drop && (len = q->qdisc->ops->drop(q->qdisc)) != 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700265 sch->q.qlen--;
266 sch->qstats.drops++;
267 }
268 return len;
269}
270
Linus Torvalds1da177e2005-04-16 15:20:36 -0700271static struct sk_buff *netem_dequeue(struct Qdisc *sch)
272{
273 struct netem_sched_data *q = qdisc_priv(sch);
274 struct sk_buff *skb;
275
Stephen Hemminger11274e52007-03-22 12:17:42 -0700276 smp_mb();
277 if (sch->flags & TCQ_F_THROTTLED)
278 return NULL;
279
Linus Torvalds1da177e2005-04-16 15:20:36 -0700280 skb = q->qdisc->dequeue(q->qdisc);
Stephen Hemminger771018e2005-05-03 16:24:32 -0700281 if (skb) {
Stephen Hemminger0f9f32a2005-05-26 12:55:01 -0700282 const struct netem_skb_cb *cb
283 = (const struct netem_skb_cb *)skb->cb;
284 psched_time_t now;
Stephen Hemminger771018e2005-05-03 16:24:32 -0700285
Stephen Hemminger0f9f32a2005-05-26 12:55:01 -0700286 /* if more time remaining? */
287 PSCHED_GET_TIME(now);
Stephen Hemminger07aaa112005-11-03 13:43:07 -0800288
Patrick McHardy104e0872007-03-23 11:28:07 -0700289 if (cb->time_to_send <= now) {
Stephen Hemminger0f9f32a2005-05-26 12:55:01 -0700290 pr_debug("netem_dequeue: return skb=%p\n", skb);
291 sch->q.qlen--;
Stephen Hemminger0f9f32a2005-05-26 12:55:01 -0700292 return skb;
293 }
Stephen Hemminger11274e52007-03-22 12:17:42 -0700294
295 if (unlikely(q->qdisc->ops->requeue(skb, q->qdisc) != NET_XMIT_SUCCESS)) {
296 qdisc_tree_decrease_qlen(q->qdisc, 1);
297 sch->qstats.drops++;
298 printk(KERN_ERR "netem: %s could not requeue\n",
299 q->qdisc->ops->id);
300 }
301
302 qdisc_watchdog_schedule(&q->watchdog, cb->time_to_send);
Stephen Hemminger0f9f32a2005-05-26 12:55:01 -0700303 }
304
305 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700306}
307
Linus Torvalds1da177e2005-04-16 15:20:36 -0700308static void netem_reset(struct Qdisc *sch)
309{
310 struct netem_sched_data *q = qdisc_priv(sch);
311
312 qdisc_reset(q->qdisc);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700313 sch->q.qlen = 0;
Patrick McHardy59cb5c62007-03-16 01:20:31 -0700314 qdisc_watchdog_cancel(&q->watchdog);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700315}
316
Stephen Hemminger300ce172005-10-30 13:47:34 -0800317/* Pass size change message down to embedded FIFO */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700318static int set_fifo_limit(struct Qdisc *q, int limit)
319{
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900320 struct rtattr *rta;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700321 int ret = -ENOMEM;
322
Stephen Hemminger300ce172005-10-30 13:47:34 -0800323 /* Hack to avoid sending change message to non-FIFO */
324 if (strncmp(q->ops->id + 1, "fifo", 4) != 0)
325 return 0;
326
Linus Torvalds1da177e2005-04-16 15:20:36 -0700327 rta = kmalloc(RTA_LENGTH(sizeof(struct tc_fifo_qopt)), GFP_KERNEL);
328 if (rta) {
329 rta->rta_type = RTM_NEWQDISC;
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900330 rta->rta_len = RTA_LENGTH(sizeof(struct tc_fifo_qopt));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700331 ((struct tc_fifo_qopt *)RTA_DATA(rta))->limit = limit;
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900332
Linus Torvalds1da177e2005-04-16 15:20:36 -0700333 ret = q->ops->change(q, rta);
334 kfree(rta);
335 }
336 return ret;
337}
338
339/*
340 * Distribution data is a variable size payload containing
341 * signed 16 bit values.
342 */
343static int get_dist_table(struct Qdisc *sch, const struct rtattr *attr)
344{
345 struct netem_sched_data *q = qdisc_priv(sch);
346 unsigned long n = RTA_PAYLOAD(attr)/sizeof(__s16);
347 const __s16 *data = RTA_DATA(attr);
348 struct disttable *d;
349 int i;
350
351 if (n > 65536)
352 return -EINVAL;
353
354 d = kmalloc(sizeof(*d) + n*sizeof(d->table[0]), GFP_KERNEL);
355 if (!d)
356 return -ENOMEM;
357
358 d->size = n;
359 for (i = 0; i < n; i++)
360 d->table[i] = data[i];
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900361
Linus Torvalds1da177e2005-04-16 15:20:36 -0700362 spin_lock_bh(&sch->dev->queue_lock);
363 d = xchg(&q->delay_dist, d);
364 spin_unlock_bh(&sch->dev->queue_lock);
365
366 kfree(d);
367 return 0;
368}
369
370static int get_correlation(struct Qdisc *sch, const struct rtattr *attr)
371{
372 struct netem_sched_data *q = qdisc_priv(sch);
373 const struct tc_netem_corr *c = RTA_DATA(attr);
374
375 if (RTA_PAYLOAD(attr) != sizeof(*c))
376 return -EINVAL;
377
378 init_crandom(&q->delay_cor, c->delay_corr);
379 init_crandom(&q->loss_cor, c->loss_corr);
380 init_crandom(&q->dup_cor, c->dup_corr);
381 return 0;
382}
383
Stephen Hemminger0dca51d2005-05-26 12:55:48 -0700384static int get_reorder(struct Qdisc *sch, const struct rtattr *attr)
385{
386 struct netem_sched_data *q = qdisc_priv(sch);
387 const struct tc_netem_reorder *r = RTA_DATA(attr);
388
389 if (RTA_PAYLOAD(attr) != sizeof(*r))
390 return -EINVAL;
391
392 q->reorder = r->probability;
393 init_crandom(&q->reorder_cor, r->correlation);
394 return 0;
395}
396
Stephen Hemmingerc865e5d2005-12-21 19:03:44 -0800397static int get_corrupt(struct Qdisc *sch, const struct rtattr *attr)
398{
399 struct netem_sched_data *q = qdisc_priv(sch);
400 const struct tc_netem_corrupt *r = RTA_DATA(attr);
401
402 if (RTA_PAYLOAD(attr) != sizeof(*r))
403 return -EINVAL;
404
405 q->corrupt = r->probability;
406 init_crandom(&q->corrupt_cor, r->correlation);
407 return 0;
408}
409
410/* Parse netlink message to set options */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700411static int netem_change(struct Qdisc *sch, struct rtattr *opt)
412{
413 struct netem_sched_data *q = qdisc_priv(sch);
414 struct tc_netem_qopt *qopt;
415 int ret;
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900416
Linus Torvalds1da177e2005-04-16 15:20:36 -0700417 if (opt == NULL || RTA_PAYLOAD(opt) < sizeof(*qopt))
418 return -EINVAL;
419
420 qopt = RTA_DATA(opt);
421 ret = set_fifo_limit(q->qdisc, qopt->limit);
422 if (ret) {
423 pr_debug("netem: can't set fifo limit\n");
424 return ret;
425 }
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900426
Linus Torvalds1da177e2005-04-16 15:20:36 -0700427 q->latency = qopt->latency;
428 q->jitter = qopt->jitter;
429 q->limit = qopt->limit;
430 q->gap = qopt->gap;
Stephen Hemminger0dca51d2005-05-26 12:55:48 -0700431 q->counter = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700432 q->loss = qopt->loss;
433 q->duplicate = qopt->duplicate;
434
Stephen Hemmingerbb2f8cc2007-03-23 00:12:09 -0700435 /* for compatibility with earlier versions.
436 * if gap is set, need to assume 100% probability
Stephen Hemminger0dca51d2005-05-26 12:55:48 -0700437 */
Stephen Hemmingera362e0a2007-03-22 12:15:45 -0700438 if (q->gap)
439 q->reorder = ~0;
Stephen Hemminger0dca51d2005-05-26 12:55:48 -0700440
Linus Torvalds1da177e2005-04-16 15:20:36 -0700441 /* Handle nested options after initial queue options.
442 * Should have put all options in nested format but too late now.
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900443 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700444 if (RTA_PAYLOAD(opt) > sizeof(*qopt)) {
445 struct rtattr *tb[TCA_NETEM_MAX];
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900446 if (rtattr_parse(tb, TCA_NETEM_MAX,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700447 RTA_DATA(opt) + sizeof(*qopt),
448 RTA_PAYLOAD(opt) - sizeof(*qopt)))
449 return -EINVAL;
450
451 if (tb[TCA_NETEM_CORR-1]) {
452 ret = get_correlation(sch, tb[TCA_NETEM_CORR-1]);
453 if (ret)
454 return ret;
455 }
456
457 if (tb[TCA_NETEM_DELAY_DIST-1]) {
458 ret = get_dist_table(sch, tb[TCA_NETEM_DELAY_DIST-1]);
459 if (ret)
460 return ret;
461 }
Stephen Hemmingerc865e5d2005-12-21 19:03:44 -0800462
Stephen Hemminger0dca51d2005-05-26 12:55:48 -0700463 if (tb[TCA_NETEM_REORDER-1]) {
464 ret = get_reorder(sch, tb[TCA_NETEM_REORDER-1]);
465 if (ret)
466 return ret;
467 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700468
Stephen Hemmingerc865e5d2005-12-21 19:03:44 -0800469 if (tb[TCA_NETEM_CORRUPT-1]) {
470 ret = get_corrupt(sch, tb[TCA_NETEM_CORRUPT-1]);
471 if (ret)
472 return ret;
473 }
474 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700475
476 return 0;
477}
478
Stephen Hemminger300ce172005-10-30 13:47:34 -0800479/*
480 * Special case version of FIFO queue for use by netem.
481 * It queues in order based on timestamps in skb's
482 */
483struct fifo_sched_data {
484 u32 limit;
Stephen Hemminger075aa572007-03-22 12:17:05 -0700485 psched_time_t oldest;
Stephen Hemminger300ce172005-10-30 13:47:34 -0800486};
487
488static int tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch)
489{
490 struct fifo_sched_data *q = qdisc_priv(sch);
491 struct sk_buff_head *list = &sch->q;
Stephen Hemminger075aa572007-03-22 12:17:05 -0700492 psched_time_t tnext = ((struct netem_skb_cb *)nskb->cb)->time_to_send;
Stephen Hemminger300ce172005-10-30 13:47:34 -0800493 struct sk_buff *skb;
494
495 if (likely(skb_queue_len(list) < q->limit)) {
Stephen Hemminger075aa572007-03-22 12:17:05 -0700496 /* Optimize for add at tail */
Patrick McHardy104e0872007-03-23 11:28:07 -0700497 if (likely(skb_queue_empty(list) || tnext >= q->oldest)) {
Stephen Hemminger075aa572007-03-22 12:17:05 -0700498 q->oldest = tnext;
499 return qdisc_enqueue_tail(nskb, sch);
500 }
501
Stephen Hemminger300ce172005-10-30 13:47:34 -0800502 skb_queue_reverse_walk(list, skb) {
503 const struct netem_skb_cb *cb
504 = (const struct netem_skb_cb *)skb->cb;
505
Patrick McHardy104e0872007-03-23 11:28:07 -0700506 if (tnext >= cb->time_to_send)
Stephen Hemminger300ce172005-10-30 13:47:34 -0800507 break;
508 }
509
510 __skb_queue_after(list, skb, nskb);
511
512 sch->qstats.backlog += nskb->len;
513 sch->bstats.bytes += nskb->len;
514 sch->bstats.packets++;
515
516 return NET_XMIT_SUCCESS;
517 }
518
Stephen Hemminger075aa572007-03-22 12:17:05 -0700519 return qdisc_reshape_fail(nskb, sch);
Stephen Hemminger300ce172005-10-30 13:47:34 -0800520}
521
522static int tfifo_init(struct Qdisc *sch, struct rtattr *opt)
523{
524 struct fifo_sched_data *q = qdisc_priv(sch);
525
526 if (opt) {
527 struct tc_fifo_qopt *ctl = RTA_DATA(opt);
528 if (RTA_PAYLOAD(opt) < sizeof(*ctl))
529 return -EINVAL;
530
531 q->limit = ctl->limit;
532 } else
533 q->limit = max_t(u32, sch->dev->tx_queue_len, 1);
534
Stephen Hemminger075aa572007-03-22 12:17:05 -0700535 PSCHED_SET_PASTPERFECT(q->oldest);
Stephen Hemminger300ce172005-10-30 13:47:34 -0800536 return 0;
537}
538
539static int tfifo_dump(struct Qdisc *sch, struct sk_buff *skb)
540{
541 struct fifo_sched_data *q = qdisc_priv(sch);
542 struct tc_fifo_qopt opt = { .limit = q->limit };
543
544 RTA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
545 return skb->len;
546
547rtattr_failure:
548 return -1;
549}
550
551static struct Qdisc_ops tfifo_qdisc_ops = {
552 .id = "tfifo",
553 .priv_size = sizeof(struct fifo_sched_data),
554 .enqueue = tfifo_enqueue,
555 .dequeue = qdisc_dequeue_head,
556 .requeue = qdisc_requeue,
557 .drop = qdisc_queue_drop,
558 .init = tfifo_init,
559 .reset = qdisc_reset_queue,
560 .change = tfifo_init,
561 .dump = tfifo_dump,
562};
563
Linus Torvalds1da177e2005-04-16 15:20:36 -0700564static int netem_init(struct Qdisc *sch, struct rtattr *opt)
565{
566 struct netem_sched_data *q = qdisc_priv(sch);
567 int ret;
568
569 if (!opt)
570 return -EINVAL;
571
Patrick McHardy59cb5c62007-03-16 01:20:31 -0700572 qdisc_watchdog_init(&q->watchdog, sch);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700573
Patrick McHardy9f9afec2006-11-29 17:35:18 -0800574 q->qdisc = qdisc_create_dflt(sch->dev, &tfifo_qdisc_ops,
575 TC_H_MAKE(sch->handle, 1));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700576 if (!q->qdisc) {
577 pr_debug("netem: qdisc create failed\n");
578 return -ENOMEM;
579 }
580
581 ret = netem_change(sch, opt);
582 if (ret) {
583 pr_debug("netem: change failed\n");
584 qdisc_destroy(q->qdisc);
585 }
586 return ret;
587}
588
589static void netem_destroy(struct Qdisc *sch)
590{
591 struct netem_sched_data *q = qdisc_priv(sch);
592
Patrick McHardy59cb5c62007-03-16 01:20:31 -0700593 qdisc_watchdog_cancel(&q->watchdog);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700594 qdisc_destroy(q->qdisc);
595 kfree(q->delay_dist);
596}
597
598static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
599{
600 const struct netem_sched_data *q = qdisc_priv(sch);
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -0700601 unsigned char *b = skb_tail_pointer(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700602 struct rtattr *rta = (struct rtattr *) b;
603 struct tc_netem_qopt qopt;
604 struct tc_netem_corr cor;
Stephen Hemminger0dca51d2005-05-26 12:55:48 -0700605 struct tc_netem_reorder reorder;
Stephen Hemmingerc865e5d2005-12-21 19:03:44 -0800606 struct tc_netem_corrupt corrupt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700607
608 qopt.latency = q->latency;
609 qopt.jitter = q->jitter;
610 qopt.limit = q->limit;
611 qopt.loss = q->loss;
612 qopt.gap = q->gap;
613 qopt.duplicate = q->duplicate;
614 RTA_PUT(skb, TCA_OPTIONS, sizeof(qopt), &qopt);
615
616 cor.delay_corr = q->delay_cor.rho;
617 cor.loss_corr = q->loss_cor.rho;
618 cor.dup_corr = q->dup_cor.rho;
619 RTA_PUT(skb, TCA_NETEM_CORR, sizeof(cor), &cor);
Stephen Hemminger0dca51d2005-05-26 12:55:48 -0700620
621 reorder.probability = q->reorder;
622 reorder.correlation = q->reorder_cor.rho;
623 RTA_PUT(skb, TCA_NETEM_REORDER, sizeof(reorder), &reorder);
624
Stephen Hemmingerc865e5d2005-12-21 19:03:44 -0800625 corrupt.probability = q->corrupt;
626 corrupt.correlation = q->corrupt_cor.rho;
627 RTA_PUT(skb, TCA_NETEM_CORRUPT, sizeof(corrupt), &corrupt);
628
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -0700629 rta->rta_len = skb_tail_pointer(skb) - b;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700630
631 return skb->len;
632
633rtattr_failure:
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -0700634 nlmsg_trim(skb, b);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700635 return -1;
636}
637
638static int netem_dump_class(struct Qdisc *sch, unsigned long cl,
639 struct sk_buff *skb, struct tcmsg *tcm)
640{
641 struct netem_sched_data *q = qdisc_priv(sch);
642
643 if (cl != 1) /* only one class */
644 return -ENOENT;
645
646 tcm->tcm_handle |= TC_H_MIN(1);
647 tcm->tcm_info = q->qdisc->handle;
648
649 return 0;
650}
651
652static int netem_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
653 struct Qdisc **old)
654{
655 struct netem_sched_data *q = qdisc_priv(sch);
656
657 if (new == NULL)
658 new = &noop_qdisc;
659
660 sch_tree_lock(sch);
661 *old = xchg(&q->qdisc, new);
Patrick McHardy5e50da02006-11-29 17:36:20 -0800662 qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700663 qdisc_reset(*old);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700664 sch_tree_unlock(sch);
665
666 return 0;
667}
668
669static struct Qdisc *netem_leaf(struct Qdisc *sch, unsigned long arg)
670{
671 struct netem_sched_data *q = qdisc_priv(sch);
672 return q->qdisc;
673}
674
675static unsigned long netem_get(struct Qdisc *sch, u32 classid)
676{
677 return 1;
678}
679
680static void netem_put(struct Qdisc *sch, unsigned long arg)
681{
682}
683
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900684static int netem_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700685 struct rtattr **tca, unsigned long *arg)
686{
687 return -ENOSYS;
688}
689
690static int netem_delete(struct Qdisc *sch, unsigned long arg)
691{
692 return -ENOSYS;
693}
694
695static void netem_walk(struct Qdisc *sch, struct qdisc_walker *walker)
696{
697 if (!walker->stop) {
698 if (walker->count >= walker->skip)
699 if (walker->fn(sch, 1, walker) < 0) {
700 walker->stop = 1;
701 return;
702 }
703 walker->count++;
704 }
705}
706
707static struct tcf_proto **netem_find_tcf(struct Qdisc *sch, unsigned long cl)
708{
709 return NULL;
710}
711
712static struct Qdisc_class_ops netem_class_ops = {
713 .graft = netem_graft,
714 .leaf = netem_leaf,
715 .get = netem_get,
716 .put = netem_put,
717 .change = netem_change_class,
718 .delete = netem_delete,
719 .walk = netem_walk,
720 .tcf_chain = netem_find_tcf,
721 .dump = netem_dump_class,
722};
723
724static struct Qdisc_ops netem_qdisc_ops = {
725 .id = "netem",
726 .cl_ops = &netem_class_ops,
727 .priv_size = sizeof(struct netem_sched_data),
728 .enqueue = netem_enqueue,
729 .dequeue = netem_dequeue,
730 .requeue = netem_requeue,
731 .drop = netem_drop,
732 .init = netem_init,
733 .reset = netem_reset,
734 .destroy = netem_destroy,
735 .change = netem_change,
736 .dump = netem_dump,
737 .owner = THIS_MODULE,
738};
739
740
741static int __init netem_module_init(void)
742{
Stephen Hemmingereb229c42005-11-03 13:49:01 -0800743 pr_info("netem: version " VERSION "\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700744 return register_qdisc(&netem_qdisc_ops);
745}
746static void __exit netem_module_exit(void)
747{
748 unregister_qdisc(&netem_qdisc_ops);
749}
750module_init(netem_module_init)
751module_exit(netem_module_exit)
752MODULE_LICENSE("GPL");