blob: ffe4c06073415bc04c62177ecc662168cf26ed7a [file] [log] [blame]
brakmo4ffd44c2019-03-01 12:38:50 -08001#!/bin/bash
2# SPDX-License-Identifier: GPL-2.0
3#
4# Copyright (c) 2019 Facebook
5#
6# This program is free software; you can redistribute it and/or
7# modify it under the terms of version 2 of the GNU General Public
8# License as published by the Free Software Foundation.
9
10Usage() {
11 echo "Script for testing HBM (Host Bandwidth Manager) framework."
12 echo "It creates a cgroup to use for testing and load a BPF program to limit"
13 echo "egress or ingress bandwidht. It then uses iperf3 or netperf to create"
14 echo "loads. The output is the goodput in Mbps (unless -D was used)."
15 echo ""
brakmoffd81552019-05-28 16:59:39 -070016 echo "USAGE: $name [out] [-b=<prog>|--bpf=<prog>] [-c=<cc>|--cc=<cc>]"
brakmo71634d72019-07-02 15:09:52 -070017 echo " [-D] [-d=<delay>|--delay=<delay>] [--debug] [-E] [--edt]"
brakmo4ffd44c2019-03-01 12:38:50 -080018 echo " [-f=<#flows>|--flows=<#flows>] [-h] [-i=<id>|--id=<id >]"
brakmoffd81552019-05-28 16:59:39 -070019 echo " [-l] [-N] [--no_cn] [-p=<port>|--port=<port>] [-P]"
brakmo4ffd44c2019-03-01 12:38:50 -080020 echo " [-q=<qdisc>] [-R] [-s=<server>|--server=<server]"
21 echo " [-S|--stats] -t=<time>|--time=<time>] [-w] [cubic|dctcp]"
22 echo " Where:"
23 echo " out egress (default)"
24 echo " -b or --bpf BPF program filename to load and attach."
25 echo " Default is hbm_out_kern.o for egress,"
26 echo " -c or -cc TCP congestion control (cubic or dctcp)"
27 echo " --debug print BPF trace buffer"
28 echo " -d or --delay add a delay in ms using netem"
29 echo " -D In addition to the goodput in Mbps, it also outputs"
30 echo " other detailed information. This information is"
31 echo " test dependent (i.e. iperf3 or netperf)."
32 echo " -E enable ECN (not required for dctcp)"
brakmo71634d72019-07-02 15:09:52 -070033 echo " --edt use fq's Earliest Departure Time (requires fq)"
brakmo4ffd44c2019-03-01 12:38:50 -080034 echo " -f or --flows number of concurrent flows (default=1)"
35 echo " -i or --id cgroup id (an integer, default is 1)"
36 echo " -N use netperf instead of iperf3"
brakmoffd81552019-05-28 16:59:39 -070037 echo " --no_cn Do not return CN notifications"
brakmo4ffd44c2019-03-01 12:38:50 -080038 echo " -l do not limit flows using loopback"
39 echo " -h Help"
40 echo " -p or --port iperf3 port (default is 5201)"
41 echo " -P use an iperf3 instance for each flow"
42 echo " -q use the specified qdisc"
43 echo " -r or --rate rate in Mbps (default 1s 1Gbps)"
44 echo " -R Use TCP_RR for netperf. 1st flow has req"
45 echo " size of 10KB, rest of 1MB. Reply in all"
46 echo " cases is 1 byte."
47 echo " More detailed output for each flow can be found"
48 echo " in the files netperf.<cg>.<flow>, where <cg> is the"
49 echo " cgroup id as specified with the -i flag, and <flow>"
50 echo " is the flow id starting at 1 and increasing by 1 for"
51 echo " flow (as specified by -f)."
52 echo " -s or --server hostname of netperf server. Used to create netperf"
53 echo " test traffic between to hosts (default is within host)"
54 echo " netserver must be running on the host."
55 echo " -S or --stats whether to update hbm stats (default is yes)."
56 echo " -t or --time duration of iperf3 in seconds (default=5)"
57 echo " -w Work conserving flag. cgroup can increase its"
58 echo " bandwidth beyond the rate limit specified"
59 echo " while there is available bandwidth. Current"
60 echo " implementation assumes there is only one NIC"
61 echo " (eth0), but can be extended to support multiple"
62 echo " NICs."
63 echo " cubic or dctcp specify which TCP CC to use"
64 echo " "
65 exit
66}
67
68#set -x
69
70debug_flag=0
71args="$@"
72name="$0"
73netem=0
74cc=x
75dir="-o"
76dir_name="out"
77dur=5
78flows=1
79id=1
80prog=""
81port=5201
82rate=1000
83multi_iperf=0
84flow_cnt=1
85use_netperf=0
86rr=0
87ecn=0
88details=0
89server=""
90qdisc=""
91flags=""
92do_stats=0
93
94function start_hbm () {
95 rm -f hbm.out
96 echo "./hbm $dir -n $id -r $rate -t $dur $flags $dbg $prog" > hbm.out
97 echo " " >> hbm.out
98 ./hbm $dir -n $id -r $rate -t $dur $flags $dbg $prog >> hbm.out 2>&1 &
99 echo $!
100}
101
102processArgs () {
103 for i in $args ; do
104 case $i in
105 # Support for upcomming ingress rate limiting
106 #in) # support for upcoming ingress rate limiting
107 # dir="-i"
108 # dir_name="in"
109 # ;;
110 out)
111 dir="-o"
112 dir_name="out"
113 ;;
114 -b=*|--bpf=*)
115 prog="${i#*=}"
116 ;;
117 -c=*|--cc=*)
118 cc="${i#*=}"
119 ;;
brakmoffd81552019-05-28 16:59:39 -0700120 --no_cn)
121 flags="$flags --no_cn"
122 ;;
brakmo4ffd44c2019-03-01 12:38:50 -0800123 --debug)
124 flags="$flags -d"
125 debug_flag=1
126 ;;
127 -d=*|--delay=*)
128 netem="${i#*=}"
129 ;;
130 -D)
131 details=1
132 ;;
133 -E)
brakmo71634d72019-07-02 15:09:52 -0700134 ecn=1
135 ;;
136 --edt)
137 flags="$flags --edt"
138 qdisc="fq"
brakmo4ffd44c2019-03-01 12:38:50 -0800139 ;;
brakmo4ffd44c2019-03-01 12:38:50 -0800140 -f=*|--flows=*)
141 flows="${i#*=}"
142 ;;
143 -i=*|--id=*)
144 id="${i#*=}"
145 ;;
146 -l)
147 flags="$flags -l"
148 ;;
149 -N)
150 use_netperf=1
151 ;;
152 -p=*|--port=*)
153 port="${i#*=}"
154 ;;
155 -P)
156 multi_iperf=1
157 ;;
158 -q=*)
159 qdisc="${i#*=}"
160 ;;
161 -r=*|--rate=*)
162 rate="${i#*=}"
163 ;;
164 -R)
165 rr=1
166 ;;
167 -s=*|--server=*)
168 server="${i#*=}"
169 ;;
170 -S|--stats)
171 flags="$flags -s"
172 do_stats=1
173 ;;
174 -t=*|--time=*)
175 dur="${i#*=}"
176 ;;
177 -w)
178 flags="$flags -w"
179 ;;
180 cubic)
181 cc=cubic
182 ;;
183 dctcp)
184 cc=dctcp
185 ;;
186 *)
187 echo "Unknown arg:$i"
188 Usage
189 ;;
190 esac
191 done
192}
193
194processArgs
195
196if [ $debug_flag -eq 1 ] ; then
197 rm -f hbm_out.log
198fi
199
200hbm_pid=$(start_hbm)
201usleep 100000
202
203host=`hostname`
204cg_base_dir=/sys/fs/cgroup
205cg_dir="$cg_base_dir/cgroup-test-work-dir/hbm$id"
206
207echo $$ >> $cg_dir/cgroup.procs
208
209ulimit -l unlimited
210
211rm -f ss.out
212rm -f hbm.[0-9]*.$dir_name
213if [ $ecn -ne 0 ] ; then
214 sysctl -w -q -n net.ipv4.tcp_ecn=1
215fi
216
217if [ $use_netperf -eq 0 ] ; then
218 cur_cc=`sysctl -n net.ipv4.tcp_congestion_control`
219 if [ "$cc" != "x" ] ; then
220 sysctl -w -q -n net.ipv4.tcp_congestion_control=$cc
221 fi
222fi
223
224if [ "$netem" -ne "0" ] ; then
225 if [ "$qdisc" != "" ] ; then
226 echo "WARNING: Ignoring -q options because -d option used"
227 fi
228 tc qdisc del dev lo root > /dev/null 2>&1
229 tc qdisc add dev lo root netem delay $netem\ms > /dev/null 2>&1
230elif [ "$qdisc" != "" ] ; then
brakmo71634d72019-07-02 15:09:52 -0700231 tc qdisc del dev eth0 root > /dev/null 2>&1
232 tc qdisc add dev eth0 root $qdisc > /dev/null 2>&1
brakmo4ffd44c2019-03-01 12:38:50 -0800233fi
234
235n=0
236m=$[$dur * 5]
237hn="::1"
238if [ $use_netperf -ne 0 ] ; then
239 if [ "$server" != "" ] ; then
240 hn=$server
241 fi
242fi
243
244( ping6 -i 0.2 -c $m $hn > ping.out 2>&1 ) &
245
246if [ $use_netperf -ne 0 ] ; then
247 begNetserverPid=`ps ax | grep netserver | grep --invert-match "grep" | \
248 awk '{ print $1 }'`
249 if [ "$begNetserverPid" == "" ] ; then
250 if [ "$server" == "" ] ; then
251 ( ./netserver > /dev/null 2>&1) &
252 usleep 100000
253 fi
254 fi
255 flow_cnt=1
256 if [ "$server" == "" ] ; then
257 np_server=$host
258 else
259 np_server=$server
260 fi
261 if [ "$cc" == "x" ] ; then
262 np_cc=""
263 else
264 np_cc="-K $cc,$cc"
265 fi
266 replySize=1
267 while [ $flow_cnt -le $flows ] ; do
268 if [ $rr -ne 0 ] ; then
269 reqSize=1M
270 if [ $flow_cnt -eq 1 ] ; then
271 reqSize=10K
272 fi
273 if [ "$dir" == "-i" ] ; then
274 replySize=$reqSize
275 reqSize=1
276 fi
277 ( ./netperf -H $np_server -l $dur -f m -j -t TCP_RR -- -r $reqSize,$replySize $np_cc -k P50_lATENCY,P90_LATENCY,LOCAL_TRANSPORT_RETRANS,REMOTE_TRANSPORT_RETRANS,LOCAL_SEND_THROUGHPUT,LOCAL_RECV_THROUGHPUT,REQUEST_SIZE,RESPONSE_SIZE > netperf.$id.$flow_cnt ) &
278 else
279 if [ "$dir" == "-i" ] ; then
280 ( ./netperf -H $np_server -l $dur -f m -j -t TCP_RR -- -r 1,10M $np_cc -k P50_LATENCY,P90_LATENCY,LOCAL_TRANSPORT_RETRANS,LOCAL_SEND_THROUGHPUT,REMOTE_TRANSPORT_RETRANS,REMOTE_SEND_THROUGHPUT,REQUEST_SIZE,RESPONSE_SIZE > netperf.$id.$flow_cnt ) &
281 else
282 ( ./netperf -H $np_server -l $dur -f m -j -t TCP_STREAM -- $np_cc -k P50_lATENCY,P90_LATENCY,LOCAL_TRANSPORT_RETRANS,LOCAL_SEND_THROUGHPUT,REQUEST_SIZE,RESPONSE_SIZE > netperf.$id.$flow_cnt ) &
283 fi
284 fi
285 flow_cnt=$[flow_cnt+1]
286 done
287
288# sleep for duration of test (plus some buffer)
289 n=$[dur+2]
290 sleep $n
291
292# force graceful termination of netperf
293 pids=`pgrep netperf`
294 for p in $pids ; do
295 kill -SIGALRM $p
296 done
297
298 flow_cnt=1
299 rate=0
300 if [ $details -ne 0 ] ; then
301 echo ""
302 echo "Details for HBM in cgroup $id"
303 if [ $do_stats -eq 1 ] ; then
304 if [ -e hbm.$id.$dir_name ] ; then
305 cat hbm.$id.$dir_name
306 fi
307 fi
308 fi
309 while [ $flow_cnt -le $flows ] ; do
310 if [ "$dir" == "-i" ] ; then
311 r=`cat netperf.$id.$flow_cnt | grep -o "REMOTE_SEND_THROUGHPUT=[0-9]*" | grep -o "[0-9]*"`
312 else
313 r=`cat netperf.$id.$flow_cnt | grep -o "LOCAL_SEND_THROUGHPUT=[0-9]*" | grep -o "[0-9]*"`
314 fi
315 echo "rate for flow $flow_cnt: $r"
316 rate=$[rate+r]
317 if [ $details -ne 0 ] ; then
318 echo "-----"
319 echo "Details for cgroup $id, flow $flow_cnt"
320 cat netperf.$id.$flow_cnt
321 fi
322 flow_cnt=$[flow_cnt+1]
323 done
324 if [ $details -ne 0 ] ; then
325 echo ""
326 delay=`grep "avg" ping.out | grep -o "= [0-9.]*/[0-9.]*" | grep -o "[0-9.]*$"`
327 echo "PING AVG DELAY:$delay"
328 echo "AGGREGATE_GOODPUT:$rate"
329 else
330 echo $rate
331 fi
332elif [ $multi_iperf -eq 0 ] ; then
333 (iperf3 -s -p $port -1 > /dev/null 2>&1) &
334 usleep 100000
335 iperf3 -c $host -p $port -i 0 -P $flows -f m -t $dur > iperf.$id
336 rates=`grep receiver iperf.$id | grep -o "[0-9.]* Mbits" | grep -o "^[0-9]*"`
337 rate=`echo $rates | grep -o "[0-9]*$"`
338
339 if [ $details -ne 0 ] ; then
340 echo ""
341 echo "Details for HBM in cgroup $id"
342 if [ $do_stats -eq 1 ] ; then
343 if [ -e hbm.$id.$dir_name ] ; then
344 cat hbm.$id.$dir_name
345 fi
346 fi
347 delay=`grep "avg" ping.out | grep -o "= [0-9.]*/[0-9.]*" | grep -o "[0-9.]*$"`
348 echo "PING AVG DELAY:$delay"
349 echo "AGGREGATE_GOODPUT:$rate"
350 else
351 echo $rate
352 fi
353else
354 flow_cnt=1
355 while [ $flow_cnt -le $flows ] ; do
356 (iperf3 -s -p $port -1 > /dev/null 2>&1) &
357 ( iperf3 -c $host -p $port -i 0 -P 1 -f m -t $dur | grep receiver | grep -o "[0-9.]* Mbits" | grep -o "^[0-9]*" | grep -o "[0-9]*$" > iperf3.$id.$flow_cnt ) &
358 port=$[port+1]
359 flow_cnt=$[flow_cnt+1]
360 done
361 n=$[dur+1]
362 sleep $n
363 flow_cnt=1
364 rate=0
365 if [ $details -ne 0 ] ; then
366 echo ""
367 echo "Details for HBM in cgroup $id"
368 if [ $do_stats -eq 1 ] ; then
369 if [ -e hbm.$id.$dir_name ] ; then
370 cat hbm.$id.$dir_name
371 fi
372 fi
373 fi
374
375 while [ $flow_cnt -le $flows ] ; do
376 r=`cat iperf3.$id.$flow_cnt`
377# echo "rate for flow $flow_cnt: $r"
378 if [ $details -ne 0 ] ; then
379 echo "Rate for cgroup $id, flow $flow_cnt LOCAL_SEND_THROUGHPUT=$r"
380 fi
381 rate=$[rate+r]
382 flow_cnt=$[flow_cnt+1]
383 done
384 if [ $details -ne 0 ] ; then
385 delay=`grep "avg" ping.out | grep -o "= [0-9.]*/[0-9.]*" | grep -o "[0-9.]*$"`
386 echo "PING AVG DELAY:$delay"
387 echo "AGGREGATE_GOODPUT:$rate"
388 else
389 echo $rate
390 fi
391fi
392
393if [ $use_netperf -eq 0 ] ; then
394 sysctl -w -q -n net.ipv4.tcp_congestion_control=$cur_cc
395fi
396if [ $ecn -ne 0 ] ; then
397 sysctl -w -q -n net.ipv4.tcp_ecn=0
398fi
399if [ "$netem" -ne "0" ] ; then
400 tc qdisc del dev lo root > /dev/null 2>&1
401fi
brakmo71634d72019-07-02 15:09:52 -0700402if [ "$qdisc" != "" ] ; then
403 tc qdisc del dev eth0 root > /dev/null 2>&1
404fi
brakmo4ffd44c2019-03-01 12:38:50 -0800405sleep 2
406
407hbmPid=`ps ax | grep "hbm " | grep --invert-match "grep" | awk '{ print $1 }'`
408if [ "$hbmPid" == "$hbm_pid" ] ; then
409 kill $hbm_pid
410fi
411
412sleep 1
413
414# Detach any BPF programs that may have lingered
415ttx=`bpftool cgroup tree | grep hbm`
416v=2
417for x in $ttx ; do
418 if [ "${x:0:36}" == "/sys/fs/cgroup/cgroup-test-work-dir/" ] ; then
419 cg=$x ; v=0
420 else
421 if [ $v -eq 0 ] ; then
422 id=$x ; v=1
423 else
424 if [ $v -eq 1 ] ; then
425 type=$x ; bpftool cgroup detach $cg $type id $id
426 v=0
427 fi
428 fi
429 fi
430done
431
432if [ $use_netperf -ne 0 ] ; then
433 if [ "$server" == "" ] ; then
434 if [ "$begNetserverPid" == "" ] ; then
435 netserverPid=`ps ax | grep netserver | grep --invert-match "grep" | awk '{ print $1 }'`
436 if [ "$netserverPid" != "" ] ; then
437 kill $netserverPid
438 fi
439 fi
440 fi
441fi
442exit