brakmo | 4ffd44c | 2019-03-01 12:38:50 -0800 | [diff] [blame] | 1 | #!/bin/bash |
| 2 | # SPDX-License-Identifier: GPL-2.0 |
| 3 | # |
| 4 | # Copyright (c) 2019 Facebook |
| 5 | # |
| 6 | # This program is free software; you can redistribute it and/or |
| 7 | # modify it under the terms of version 2 of the GNU General Public |
| 8 | # License as published by the Free Software Foundation. |
| 9 | |
| 10 | Usage() { |
| 11 | echo "Script for testing HBM (Host Bandwidth Manager) framework." |
| 12 | echo "It creates a cgroup to use for testing and load a BPF program to limit" |
| 13 | echo "egress or ingress bandwidht. It then uses iperf3 or netperf to create" |
| 14 | echo "loads. The output is the goodput in Mbps (unless -D was used)." |
| 15 | echo "" |
brakmo | ffd8155 | 2019-05-28 16:59:39 -0700 | [diff] [blame] | 16 | echo "USAGE: $name [out] [-b=<prog>|--bpf=<prog>] [-c=<cc>|--cc=<cc>]" |
brakmo | 71634d7 | 2019-07-02 15:09:52 -0700 | [diff] [blame] | 17 | echo " [-D] [-d=<delay>|--delay=<delay>] [--debug] [-E] [--edt]" |
brakmo | 4ffd44c | 2019-03-01 12:38:50 -0800 | [diff] [blame] | 18 | echo " [-f=<#flows>|--flows=<#flows>] [-h] [-i=<id>|--id=<id >]" |
brakmo | ffd8155 | 2019-05-28 16:59:39 -0700 | [diff] [blame] | 19 | echo " [-l] [-N] [--no_cn] [-p=<port>|--port=<port>] [-P]" |
brakmo | 4ffd44c | 2019-03-01 12:38:50 -0800 | [diff] [blame] | 20 | echo " [-q=<qdisc>] [-R] [-s=<server>|--server=<server]" |
| 21 | echo " [-S|--stats] -t=<time>|--time=<time>] [-w] [cubic|dctcp]" |
| 22 | echo " Where:" |
| 23 | echo " out egress (default)" |
| 24 | echo " -b or --bpf BPF program filename to load and attach." |
| 25 | echo " Default is hbm_out_kern.o for egress," |
| 26 | echo " -c or -cc TCP congestion control (cubic or dctcp)" |
| 27 | echo " --debug print BPF trace buffer" |
| 28 | echo " -d or --delay add a delay in ms using netem" |
| 29 | echo " -D In addition to the goodput in Mbps, it also outputs" |
| 30 | echo " other detailed information. This information is" |
| 31 | echo " test dependent (i.e. iperf3 or netperf)." |
| 32 | echo " -E enable ECN (not required for dctcp)" |
brakmo | 71634d7 | 2019-07-02 15:09:52 -0700 | [diff] [blame] | 33 | echo " --edt use fq's Earliest Departure Time (requires fq)" |
brakmo | 4ffd44c | 2019-03-01 12:38:50 -0800 | [diff] [blame] | 34 | echo " -f or --flows number of concurrent flows (default=1)" |
| 35 | echo " -i or --id cgroup id (an integer, default is 1)" |
| 36 | echo " -N use netperf instead of iperf3" |
brakmo | ffd8155 | 2019-05-28 16:59:39 -0700 | [diff] [blame] | 37 | echo " --no_cn Do not return CN notifications" |
brakmo | 4ffd44c | 2019-03-01 12:38:50 -0800 | [diff] [blame] | 38 | echo " -l do not limit flows using loopback" |
| 39 | echo " -h Help" |
| 40 | echo " -p or --port iperf3 port (default is 5201)" |
| 41 | echo " -P use an iperf3 instance for each flow" |
| 42 | echo " -q use the specified qdisc" |
| 43 | echo " -r or --rate rate in Mbps (default 1s 1Gbps)" |
| 44 | echo " -R Use TCP_RR for netperf. 1st flow has req" |
| 45 | echo " size of 10KB, rest of 1MB. Reply in all" |
| 46 | echo " cases is 1 byte." |
| 47 | echo " More detailed output for each flow can be found" |
| 48 | echo " in the files netperf.<cg>.<flow>, where <cg> is the" |
| 49 | echo " cgroup id as specified with the -i flag, and <flow>" |
| 50 | echo " is the flow id starting at 1 and increasing by 1 for" |
| 51 | echo " flow (as specified by -f)." |
| 52 | echo " -s or --server hostname of netperf server. Used to create netperf" |
| 53 | echo " test traffic between to hosts (default is within host)" |
| 54 | echo " netserver must be running on the host." |
| 55 | echo " -S or --stats whether to update hbm stats (default is yes)." |
| 56 | echo " -t or --time duration of iperf3 in seconds (default=5)" |
| 57 | echo " -w Work conserving flag. cgroup can increase its" |
| 58 | echo " bandwidth beyond the rate limit specified" |
| 59 | echo " while there is available bandwidth. Current" |
| 60 | echo " implementation assumes there is only one NIC" |
| 61 | echo " (eth0), but can be extended to support multiple" |
| 62 | echo " NICs." |
| 63 | echo " cubic or dctcp specify which TCP CC to use" |
| 64 | echo " " |
| 65 | exit |
| 66 | } |
| 67 | |
| 68 | #set -x |
| 69 | |
| 70 | debug_flag=0 |
| 71 | args="$@" |
| 72 | name="$0" |
| 73 | netem=0 |
| 74 | cc=x |
| 75 | dir="-o" |
| 76 | dir_name="out" |
| 77 | dur=5 |
| 78 | flows=1 |
| 79 | id=1 |
| 80 | prog="" |
| 81 | port=5201 |
| 82 | rate=1000 |
| 83 | multi_iperf=0 |
| 84 | flow_cnt=1 |
| 85 | use_netperf=0 |
| 86 | rr=0 |
| 87 | ecn=0 |
| 88 | details=0 |
| 89 | server="" |
| 90 | qdisc="" |
| 91 | flags="" |
| 92 | do_stats=0 |
| 93 | |
| 94 | function start_hbm () { |
| 95 | rm -f hbm.out |
| 96 | echo "./hbm $dir -n $id -r $rate -t $dur $flags $dbg $prog" > hbm.out |
| 97 | echo " " >> hbm.out |
| 98 | ./hbm $dir -n $id -r $rate -t $dur $flags $dbg $prog >> hbm.out 2>&1 & |
| 99 | echo $! |
| 100 | } |
| 101 | |
| 102 | processArgs () { |
| 103 | for i in $args ; do |
| 104 | case $i in |
| 105 | # Support for upcomming ingress rate limiting |
| 106 | #in) # support for upcoming ingress rate limiting |
| 107 | # dir="-i" |
| 108 | # dir_name="in" |
| 109 | # ;; |
| 110 | out) |
| 111 | dir="-o" |
| 112 | dir_name="out" |
| 113 | ;; |
| 114 | -b=*|--bpf=*) |
| 115 | prog="${i#*=}" |
| 116 | ;; |
| 117 | -c=*|--cc=*) |
| 118 | cc="${i#*=}" |
| 119 | ;; |
brakmo | ffd8155 | 2019-05-28 16:59:39 -0700 | [diff] [blame] | 120 | --no_cn) |
| 121 | flags="$flags --no_cn" |
| 122 | ;; |
brakmo | 4ffd44c | 2019-03-01 12:38:50 -0800 | [diff] [blame] | 123 | --debug) |
| 124 | flags="$flags -d" |
| 125 | debug_flag=1 |
| 126 | ;; |
| 127 | -d=*|--delay=*) |
| 128 | netem="${i#*=}" |
| 129 | ;; |
| 130 | -D) |
| 131 | details=1 |
| 132 | ;; |
| 133 | -E) |
brakmo | 71634d7 | 2019-07-02 15:09:52 -0700 | [diff] [blame] | 134 | ecn=1 |
| 135 | ;; |
| 136 | --edt) |
| 137 | flags="$flags --edt" |
| 138 | qdisc="fq" |
brakmo | 4ffd44c | 2019-03-01 12:38:50 -0800 | [diff] [blame] | 139 | ;; |
brakmo | 4ffd44c | 2019-03-01 12:38:50 -0800 | [diff] [blame] | 140 | -f=*|--flows=*) |
| 141 | flows="${i#*=}" |
| 142 | ;; |
| 143 | -i=*|--id=*) |
| 144 | id="${i#*=}" |
| 145 | ;; |
| 146 | -l) |
| 147 | flags="$flags -l" |
| 148 | ;; |
| 149 | -N) |
| 150 | use_netperf=1 |
| 151 | ;; |
| 152 | -p=*|--port=*) |
| 153 | port="${i#*=}" |
| 154 | ;; |
| 155 | -P) |
| 156 | multi_iperf=1 |
| 157 | ;; |
| 158 | -q=*) |
| 159 | qdisc="${i#*=}" |
| 160 | ;; |
| 161 | -r=*|--rate=*) |
| 162 | rate="${i#*=}" |
| 163 | ;; |
| 164 | -R) |
| 165 | rr=1 |
| 166 | ;; |
| 167 | -s=*|--server=*) |
| 168 | server="${i#*=}" |
| 169 | ;; |
| 170 | -S|--stats) |
| 171 | flags="$flags -s" |
| 172 | do_stats=1 |
| 173 | ;; |
| 174 | -t=*|--time=*) |
| 175 | dur="${i#*=}" |
| 176 | ;; |
| 177 | -w) |
| 178 | flags="$flags -w" |
| 179 | ;; |
| 180 | cubic) |
| 181 | cc=cubic |
| 182 | ;; |
| 183 | dctcp) |
| 184 | cc=dctcp |
| 185 | ;; |
| 186 | *) |
| 187 | echo "Unknown arg:$i" |
| 188 | Usage |
| 189 | ;; |
| 190 | esac |
| 191 | done |
| 192 | } |
| 193 | |
| 194 | processArgs |
| 195 | |
| 196 | if [ $debug_flag -eq 1 ] ; then |
| 197 | rm -f hbm_out.log |
| 198 | fi |
| 199 | |
| 200 | hbm_pid=$(start_hbm) |
| 201 | usleep 100000 |
| 202 | |
| 203 | host=`hostname` |
| 204 | cg_base_dir=/sys/fs/cgroup |
| 205 | cg_dir="$cg_base_dir/cgroup-test-work-dir/hbm$id" |
| 206 | |
| 207 | echo $$ >> $cg_dir/cgroup.procs |
| 208 | |
| 209 | ulimit -l unlimited |
| 210 | |
| 211 | rm -f ss.out |
| 212 | rm -f hbm.[0-9]*.$dir_name |
| 213 | if [ $ecn -ne 0 ] ; then |
| 214 | sysctl -w -q -n net.ipv4.tcp_ecn=1 |
| 215 | fi |
| 216 | |
| 217 | if [ $use_netperf -eq 0 ] ; then |
| 218 | cur_cc=`sysctl -n net.ipv4.tcp_congestion_control` |
| 219 | if [ "$cc" != "x" ] ; then |
| 220 | sysctl -w -q -n net.ipv4.tcp_congestion_control=$cc |
| 221 | fi |
| 222 | fi |
| 223 | |
| 224 | if [ "$netem" -ne "0" ] ; then |
| 225 | if [ "$qdisc" != "" ] ; then |
| 226 | echo "WARNING: Ignoring -q options because -d option used" |
| 227 | fi |
| 228 | tc qdisc del dev lo root > /dev/null 2>&1 |
| 229 | tc qdisc add dev lo root netem delay $netem\ms > /dev/null 2>&1 |
| 230 | elif [ "$qdisc" != "" ] ; then |
brakmo | 71634d7 | 2019-07-02 15:09:52 -0700 | [diff] [blame] | 231 | tc qdisc del dev eth0 root > /dev/null 2>&1 |
| 232 | tc qdisc add dev eth0 root $qdisc > /dev/null 2>&1 |
brakmo | 4ffd44c | 2019-03-01 12:38:50 -0800 | [diff] [blame] | 233 | fi |
| 234 | |
| 235 | n=0 |
| 236 | m=$[$dur * 5] |
| 237 | hn="::1" |
| 238 | if [ $use_netperf -ne 0 ] ; then |
| 239 | if [ "$server" != "" ] ; then |
| 240 | hn=$server |
| 241 | fi |
| 242 | fi |
| 243 | |
| 244 | ( ping6 -i 0.2 -c $m $hn > ping.out 2>&1 ) & |
| 245 | |
| 246 | if [ $use_netperf -ne 0 ] ; then |
| 247 | begNetserverPid=`ps ax | grep netserver | grep --invert-match "grep" | \ |
| 248 | awk '{ print $1 }'` |
| 249 | if [ "$begNetserverPid" == "" ] ; then |
| 250 | if [ "$server" == "" ] ; then |
| 251 | ( ./netserver > /dev/null 2>&1) & |
| 252 | usleep 100000 |
| 253 | fi |
| 254 | fi |
| 255 | flow_cnt=1 |
| 256 | if [ "$server" == "" ] ; then |
| 257 | np_server=$host |
| 258 | else |
| 259 | np_server=$server |
| 260 | fi |
| 261 | if [ "$cc" == "x" ] ; then |
| 262 | np_cc="" |
| 263 | else |
| 264 | np_cc="-K $cc,$cc" |
| 265 | fi |
| 266 | replySize=1 |
| 267 | while [ $flow_cnt -le $flows ] ; do |
| 268 | if [ $rr -ne 0 ] ; then |
| 269 | reqSize=1M |
| 270 | if [ $flow_cnt -eq 1 ] ; then |
| 271 | reqSize=10K |
| 272 | fi |
| 273 | if [ "$dir" == "-i" ] ; then |
| 274 | replySize=$reqSize |
| 275 | reqSize=1 |
| 276 | fi |
| 277 | ( ./netperf -H $np_server -l $dur -f m -j -t TCP_RR -- -r $reqSize,$replySize $np_cc -k P50_lATENCY,P90_LATENCY,LOCAL_TRANSPORT_RETRANS,REMOTE_TRANSPORT_RETRANS,LOCAL_SEND_THROUGHPUT,LOCAL_RECV_THROUGHPUT,REQUEST_SIZE,RESPONSE_SIZE > netperf.$id.$flow_cnt ) & |
| 278 | else |
| 279 | if [ "$dir" == "-i" ] ; then |
| 280 | ( ./netperf -H $np_server -l $dur -f m -j -t TCP_RR -- -r 1,10M $np_cc -k P50_LATENCY,P90_LATENCY,LOCAL_TRANSPORT_RETRANS,LOCAL_SEND_THROUGHPUT,REMOTE_TRANSPORT_RETRANS,REMOTE_SEND_THROUGHPUT,REQUEST_SIZE,RESPONSE_SIZE > netperf.$id.$flow_cnt ) & |
| 281 | else |
| 282 | ( ./netperf -H $np_server -l $dur -f m -j -t TCP_STREAM -- $np_cc -k P50_lATENCY,P90_LATENCY,LOCAL_TRANSPORT_RETRANS,LOCAL_SEND_THROUGHPUT,REQUEST_SIZE,RESPONSE_SIZE > netperf.$id.$flow_cnt ) & |
| 283 | fi |
| 284 | fi |
| 285 | flow_cnt=$[flow_cnt+1] |
| 286 | done |
| 287 | |
| 288 | # sleep for duration of test (plus some buffer) |
| 289 | n=$[dur+2] |
| 290 | sleep $n |
| 291 | |
| 292 | # force graceful termination of netperf |
| 293 | pids=`pgrep netperf` |
| 294 | for p in $pids ; do |
| 295 | kill -SIGALRM $p |
| 296 | done |
| 297 | |
| 298 | flow_cnt=1 |
| 299 | rate=0 |
| 300 | if [ $details -ne 0 ] ; then |
| 301 | echo "" |
| 302 | echo "Details for HBM in cgroup $id" |
| 303 | if [ $do_stats -eq 1 ] ; then |
| 304 | if [ -e hbm.$id.$dir_name ] ; then |
| 305 | cat hbm.$id.$dir_name |
| 306 | fi |
| 307 | fi |
| 308 | fi |
| 309 | while [ $flow_cnt -le $flows ] ; do |
| 310 | if [ "$dir" == "-i" ] ; then |
| 311 | r=`cat netperf.$id.$flow_cnt | grep -o "REMOTE_SEND_THROUGHPUT=[0-9]*" | grep -o "[0-9]*"` |
| 312 | else |
| 313 | r=`cat netperf.$id.$flow_cnt | grep -o "LOCAL_SEND_THROUGHPUT=[0-9]*" | grep -o "[0-9]*"` |
| 314 | fi |
| 315 | echo "rate for flow $flow_cnt: $r" |
| 316 | rate=$[rate+r] |
| 317 | if [ $details -ne 0 ] ; then |
| 318 | echo "-----" |
| 319 | echo "Details for cgroup $id, flow $flow_cnt" |
| 320 | cat netperf.$id.$flow_cnt |
| 321 | fi |
| 322 | flow_cnt=$[flow_cnt+1] |
| 323 | done |
| 324 | if [ $details -ne 0 ] ; then |
| 325 | echo "" |
| 326 | delay=`grep "avg" ping.out | grep -o "= [0-9.]*/[0-9.]*" | grep -o "[0-9.]*$"` |
| 327 | echo "PING AVG DELAY:$delay" |
| 328 | echo "AGGREGATE_GOODPUT:$rate" |
| 329 | else |
| 330 | echo $rate |
| 331 | fi |
| 332 | elif [ $multi_iperf -eq 0 ] ; then |
| 333 | (iperf3 -s -p $port -1 > /dev/null 2>&1) & |
| 334 | usleep 100000 |
| 335 | iperf3 -c $host -p $port -i 0 -P $flows -f m -t $dur > iperf.$id |
| 336 | rates=`grep receiver iperf.$id | grep -o "[0-9.]* Mbits" | grep -o "^[0-9]*"` |
| 337 | rate=`echo $rates | grep -o "[0-9]*$"` |
| 338 | |
| 339 | if [ $details -ne 0 ] ; then |
| 340 | echo "" |
| 341 | echo "Details for HBM in cgroup $id" |
| 342 | if [ $do_stats -eq 1 ] ; then |
| 343 | if [ -e hbm.$id.$dir_name ] ; then |
| 344 | cat hbm.$id.$dir_name |
| 345 | fi |
| 346 | fi |
| 347 | delay=`grep "avg" ping.out | grep -o "= [0-9.]*/[0-9.]*" | grep -o "[0-9.]*$"` |
| 348 | echo "PING AVG DELAY:$delay" |
| 349 | echo "AGGREGATE_GOODPUT:$rate" |
| 350 | else |
| 351 | echo $rate |
| 352 | fi |
| 353 | else |
| 354 | flow_cnt=1 |
| 355 | while [ $flow_cnt -le $flows ] ; do |
| 356 | (iperf3 -s -p $port -1 > /dev/null 2>&1) & |
| 357 | ( iperf3 -c $host -p $port -i 0 -P 1 -f m -t $dur | grep receiver | grep -o "[0-9.]* Mbits" | grep -o "^[0-9]*" | grep -o "[0-9]*$" > iperf3.$id.$flow_cnt ) & |
| 358 | port=$[port+1] |
| 359 | flow_cnt=$[flow_cnt+1] |
| 360 | done |
| 361 | n=$[dur+1] |
| 362 | sleep $n |
| 363 | flow_cnt=1 |
| 364 | rate=0 |
| 365 | if [ $details -ne 0 ] ; then |
| 366 | echo "" |
| 367 | echo "Details for HBM in cgroup $id" |
| 368 | if [ $do_stats -eq 1 ] ; then |
| 369 | if [ -e hbm.$id.$dir_name ] ; then |
| 370 | cat hbm.$id.$dir_name |
| 371 | fi |
| 372 | fi |
| 373 | fi |
| 374 | |
| 375 | while [ $flow_cnt -le $flows ] ; do |
| 376 | r=`cat iperf3.$id.$flow_cnt` |
| 377 | # echo "rate for flow $flow_cnt: $r" |
| 378 | if [ $details -ne 0 ] ; then |
| 379 | echo "Rate for cgroup $id, flow $flow_cnt LOCAL_SEND_THROUGHPUT=$r" |
| 380 | fi |
| 381 | rate=$[rate+r] |
| 382 | flow_cnt=$[flow_cnt+1] |
| 383 | done |
| 384 | if [ $details -ne 0 ] ; then |
| 385 | delay=`grep "avg" ping.out | grep -o "= [0-9.]*/[0-9.]*" | grep -o "[0-9.]*$"` |
| 386 | echo "PING AVG DELAY:$delay" |
| 387 | echo "AGGREGATE_GOODPUT:$rate" |
| 388 | else |
| 389 | echo $rate |
| 390 | fi |
| 391 | fi |
| 392 | |
| 393 | if [ $use_netperf -eq 0 ] ; then |
| 394 | sysctl -w -q -n net.ipv4.tcp_congestion_control=$cur_cc |
| 395 | fi |
| 396 | if [ $ecn -ne 0 ] ; then |
| 397 | sysctl -w -q -n net.ipv4.tcp_ecn=0 |
| 398 | fi |
| 399 | if [ "$netem" -ne "0" ] ; then |
| 400 | tc qdisc del dev lo root > /dev/null 2>&1 |
| 401 | fi |
brakmo | 71634d7 | 2019-07-02 15:09:52 -0700 | [diff] [blame] | 402 | if [ "$qdisc" != "" ] ; then |
| 403 | tc qdisc del dev eth0 root > /dev/null 2>&1 |
| 404 | fi |
brakmo | 4ffd44c | 2019-03-01 12:38:50 -0800 | [diff] [blame] | 405 | sleep 2 |
| 406 | |
| 407 | hbmPid=`ps ax | grep "hbm " | grep --invert-match "grep" | awk '{ print $1 }'` |
| 408 | if [ "$hbmPid" == "$hbm_pid" ] ; then |
| 409 | kill $hbm_pid |
| 410 | fi |
| 411 | |
| 412 | sleep 1 |
| 413 | |
| 414 | # Detach any BPF programs that may have lingered |
| 415 | ttx=`bpftool cgroup tree | grep hbm` |
| 416 | v=2 |
| 417 | for x in $ttx ; do |
| 418 | if [ "${x:0:36}" == "/sys/fs/cgroup/cgroup-test-work-dir/" ] ; then |
| 419 | cg=$x ; v=0 |
| 420 | else |
| 421 | if [ $v -eq 0 ] ; then |
| 422 | id=$x ; v=1 |
| 423 | else |
| 424 | if [ $v -eq 1 ] ; then |
| 425 | type=$x ; bpftool cgroup detach $cg $type id $id |
| 426 | v=0 |
| 427 | fi |
| 428 | fi |
| 429 | fi |
| 430 | done |
| 431 | |
| 432 | if [ $use_netperf -ne 0 ] ; then |
| 433 | if [ "$server" == "" ] ; then |
| 434 | if [ "$begNetserverPid" == "" ] ; then |
| 435 | netserverPid=`ps ax | grep netserver | grep --invert-match "grep" | awk '{ print $1 }'` |
| 436 | if [ "$netserverPid" != "" ] ; then |
| 437 | kill $netserverPid |
| 438 | fi |
| 439 | fi |
| 440 | fi |
| 441 | fi |
| 442 | exit |