selftests: mlxsw: Add test for trust-DSCP

Add a test that exercises the new code. Send DSCP-tagged packets, and
observe how they are prioritized in the switch and the DSCP is updated
on egress again.

Signed-off-by: Petr Machata <>
Signed-off-by: Ido Schimmel <>
Signed-off-by: David S. Miller <>
diff --git a/tools/testing/selftests/drivers/net/mlxsw/ b/tools/testing/selftests/drivers/net/mlxsw/
new file mode 100755
index 0000000..418319f
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/
@@ -0,0 +1,248 @@
+# SPDX-License-Identifier: GPL-2.0
+# Test for DSCP prioritization and rewrite. Packets ingress $swp1 with a DSCP
+# tag and are prioritized according to the map at $swp1. They egress $swp2 and
+# the DSCP value is updated to match the map at that interface. The updated DSCP
+# tag is verified at $h2.
+# ICMP responses are produced with the same DSCP tag that arrived at $h2. They
+# go through prioritization at $swp2 and DSCP retagging at $swp1. The tag is
+# verified at $h1--it should match the original tag.
+# +----------------------+                             +----------------------+
+# | H1                   |                             |                   H2 |
+# |    + $h1             |                             |            $h2 +     |
+# |    |    |                             | |     |
+# +----|-----------------+                             +----------------|-----+
+#      |                                                                |
+# +----|----------------------------------------------------------------|-----+
+# | SW |                                                                |     |
+# |  +-|----------------------------------------------------------------|-+   |
+# |  | + $swp1                       BR                           $swp2 + |   |
+# |  |   APP=0,5,10 .. 7,5,17                      APP=0,5,20 .. 7,5,27   |   |
+# |  +--------------------------------------------------------------------+   |
+# +---------------------------------------------------------------------------+
+	ping_ipv4
+	test_dscp
+lib_dir=$(dirname $0)/../../../net/forwarding
+source $lib_dir/
+	local add_del=$1; shift
+	local dev=$1; shift
+	local base=$1; shift
+	local dscp;
+	for prio in {0..7}; do
+		dscp=$((base + prio))
+		__icmp_capture_add_del $add_del $dscp "" $dev \
+				       "ip_tos $((dscp << 2))"
+	done
+	local dev=$1; shift
+	local base=$1; shift
+	__dscp_capture_add_del add $dev $base
+	local dev=$1; shift
+	local base=$1; shift
+	__dscp_capture_add_del del $dev $base
+	local dscp;
+	simple_if_init $h1
+	tc qdisc add dev $h1 clsact
+	dscp_capture_install $h1 10
+	dscp_capture_uninstall $h1 10
+	tc qdisc del dev $h1 clsact
+	simple_if_fini $h1
+	simple_if_init $h2
+	tc qdisc add dev $h2 clsact
+	dscp_capture_install $h2 20
+	dscp_capture_uninstall $h2 20
+	tc qdisc del dev $h2 clsact
+	simple_if_fini $h2
+	local base=$1; shift
+	for prio in {0..7}; do
+		echo app=$prio,5,$((base + prio))
+	done
+	local dev=$1; shift
+	while lldptool -t -i $dev -V APP -c app | grep -q pending; do
+	    echo "$dev: waiting for lldpad to push pending APP updates"
+	    sleep 5
+	done
+	ip link add name br1 type bridge vlan_filtering 1
+	ip link set dev br1 up
+	ip link set dev $swp1 master br1
+	ip link set dev $swp1 up
+	ip link set dev $swp2 master br1
+	ip link set dev $swp2 up
+	lldptool -T -i $swp1 -V APP $(dscp_map 10) >/dev/null
+	lldptool -T -i $swp2 -V APP $(dscp_map 20) >/dev/null
+	lldpad_wait $swp1
+	lldpad_wait $swp2
+	lldptool -T -i $swp2 -V APP -d $(dscp_map 20) >/dev/null
+	lldptool -T -i $swp1 -V APP -d $(dscp_map 10) >/dev/null
+	# Give lldpad a chance to push down the changes. If the device is downed
+	# too soon, the updates will be left pending, but will have been struck
+	# off the lldpad's DB already, and we won't be able to tell. Then on
+	# next test iteration this would cause weirdness as newly-added APP
+	# rules conflict with the old ones, sometimes getting stuck in an
+	# "unknown" state.
+	sleep 5
+	ip link set dev $swp2 nomaster
+	ip link set dev $swp1 nomaster
+	ip link del dev br1
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+	vrf_prepare
+	h1_create
+	h2_create
+	switch_create
+	pre_cleanup
+	switch_destroy
+	h2_destroy
+	h1_destroy
+	vrf_cleanup
+	local dev=$1; shift
+	local base=$1; shift
+	for prio in {0..7}; do
+		local dscp=$((base + prio))
+		local t=$(tc_rule_stats_get $dev $dscp)
+		echo "[$dscp]=$t "
+	done
+	ping_test $h1
+	local vrf_name=$1; shift
+	local sip=$1; shift
+	local dip=$1; shift
+	local prio=$1; shift
+	local dev_10=$1; shift
+	local dev_20=$1; shift
+	local dscp_10=$(((prio + 10) << 2))
+	local dscp_20=$(((prio + 20) << 2))
+	RET=0
+	local -A t0s
+	eval "t0s=($(dscp_fetch_stats $dev_10 10)
+		   $(dscp_fetch_stats $dev_20 20))"
+	ip vrf exec $vrf_name \
+	   ${PING} -Q $dscp_10 ${sip:+-I $sip} $dip \
+		   -c 10 -i 0.1 -w 2 &> /dev/null
+	local -A t1s
+	eval "t1s=($(dscp_fetch_stats $dev_10 10)
+		   $(dscp_fetch_stats $dev_20 20))"
+	for key in ${!t0s[@]}; do
+		local expect
+		if ((key == dscp_10 || key == dscp_20)); then
+			expect=10
+		else
+			expect=0
+		fi
+		local delta=$((t1s[key] - t0s[key]))
+		((expect == delta))
+		check_err $? "DSCP $key: Expected to capture $expect packets, got $delta."
+	done
+	log_test "DSCP rewrite: $dscp_10-(prio $prio)-$dscp_20"
+	for prio in {0..7}; do
+		dscp_ping_test v$h1 $prio $h1 $h2
+	done
+trap cleanup EXIT