Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
We got slightly different patches removing a double word
in a comment in net/ipv4/raw.c - picked the version from net.
Simple conflict in drivers/net/ethernet/ibm/ibmvnic.c. Use cached
values instead of VNIC login response buffer (following what
commit 507ebe6444a4 ("ibmvnic: Fix use-after-free of VNIC login
response buffer") did).
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index a106874..8af893e 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -1338,6 +1338,11 @@
Format: <interval>,<probability>,<space>,<times>
See also Documentation/fault-injection/.
+ fb_tunnels= [NET]
+ Format: { initns | none }
+ See Documentation/admin-guide/sysctl/net.rst for
+ fb_tunnels_only_for_init_ns
+
floppy= [HW]
See Documentation/admin-guide/blockdev/floppy.rst.
diff --git a/Documentation/admin-guide/sysctl/net.rst b/Documentation/admin-guide/sysctl/net.rst
index 42cd04b..57fd6ce 100644
--- a/Documentation/admin-guide/sysctl/net.rst
+++ b/Documentation/admin-guide/sysctl/net.rst
@@ -300,7 +300,6 @@
0: 0 1 2 3 4 5 6 7
RSS hash key:
84:50:f4:00:a8:15:d1:a7:e9:7f:1d:60:35:c7:47:25:42:97:74:ca:56:bb:b6:a1:d8:43:e3:c9:0c:fd:17:55:c2:3a:4d:69:ed:f1:42:89
-
netdev_tstamp_prequeue
----------------------
@@ -321,11 +320,20 @@
----------------------------
Controls if fallback tunnels (like tunl0, gre0, gretap0, erspan0,
-sit0, ip6tnl0, ip6gre0) are automatically created when a new
-network namespace is created, if corresponding tunnel is present
-in initial network namespace.
-If set to 1, these devices are not automatically created, and
-user space is responsible for creating them if needed.
+sit0, ip6tnl0, ip6gre0) are automatically created. There are 3 possibilities
+(a) value = 0; respective fallback tunnels are created when module is
+loaded in every net namespaces (backward compatible behavior).
+(b) value = 1; [kcmd value: initns] respective fallback tunnels are
+created only in init net namespace and every other net namespace will
+not have them.
+(c) value = 2; [kcmd value: none] fallback tunnels are not created
+when a module is loaded in any of the net namespace. Setting value to
+"2" is pointless after boot if these modules are built-in, so there is
+a kernel command-line option that can change this default. Please refer to
+Documentation/admin-guide/kernel-parameters.txt for additional details.
+
+Not creating fallback tunnels gives control to userspace to create
+whatever is needed only and avoid creating devices which are redundant.
Default : 0 (for compatibility reasons)
diff --git a/Documentation/bpf/bpf_devel_QA.rst b/Documentation/bpf/bpf_devel_QA.rst
index a26aa1b..75a0dca 100644
--- a/Documentation/bpf/bpf_devel_QA.rst
+++ b/Documentation/bpf/bpf_devel_QA.rst
@@ -149,7 +149,7 @@
again in a second or later revision, it is also required to add a
version number (``v2``, ``v3``, ...) into the subject prefix::
- git format-patch --subject-prefix='PATCH net-next v2' start..finish
+ git format-patch --subject-prefix='PATCH bpf-next v2' start..finish
When changes have been requested to the patch series, always send the
whole patch series again with the feedback incorporated (never send
@@ -479,17 +479,18 @@
$ llc --version
LLVM (http://llvm.org/):
- LLVM version 6.0.0svn
+ LLVM version 10.0.0
Optimized build.
Default target: x86_64-unknown-linux-gnu
Host CPU: skylake
Registered Targets:
- bpf - BPF (host endian)
- bpfeb - BPF (big endian)
- bpfel - BPF (little endian)
- x86 - 32-bit X86: Pentium-Pro and above
- x86-64 - 64-bit X86: EM64T and AMD64
+ aarch64 - AArch64 (little endian)
+ bpf - BPF (host endian)
+ bpfeb - BPF (big endian)
+ bpfel - BPF (little endian)
+ x86 - 32-bit X86: Pentium-Pro and above
+ x86-64 - 64-bit X86: EM64T and AMD64
For developers in order to utilize the latest features added to LLVM's
BPF back end, it is advisable to run the latest LLVM releases. Support
@@ -517,6 +518,10 @@
The built binaries can then be found in the build/bin/ directory, where
you can point the PATH variable to.
+Set ``-DLLVM_TARGETS_TO_BUILD`` equal to the target you wish to build, you
+will find a full list of targets within the llvm-project/llvm/lib/Target
+directory.
+
Q: Reporting LLVM BPF issues
----------------------------
Q: Should I notify BPF kernel maintainers about issues in LLVM's BPF code
diff --git a/Documentation/bpf/btf.rst b/Documentation/bpf/btf.rst
index b5361b8..44dc789 100644
--- a/Documentation/bpf/btf.rst
+++ b/Documentation/bpf/btf.rst
@@ -724,6 +724,31 @@
BTF_ID_UNUSED
BTF_ID(struct, task_struct)
+The ``BTF_SET_START/END`` macros pair defines sorted list of BTF ID values
+and their count, with following syntax::
+
+ BTF_SET_START(set)
+ BTF_ID(type1, name1)
+ BTF_ID(type2, name2)
+ BTF_SET_END(set)
+
+resulting in following layout in .BTF_ids section::
+
+ __BTF_ID__set__set:
+ .zero 4
+ __BTF_ID__type1__name1__3:
+ .zero 4
+ __BTF_ID__type2__name2__4:
+ .zero 4
+
+The ``struct btf_id_set set;`` variable is defined to access the list.
+
+The ``typeX`` name can be one of following::
+
+ struct, union, typedef, func
+
+and is used as a filter when resolving the BTF ID value.
+
All the BTF ID lists and sets are compiled in the .BTF_ids section and
resolved during the linking phase of kernel build by ``resolve_btfids`` tool.
diff --git a/Documentation/bpf/index.rst b/Documentation/bpf/index.rst
index 7df2465..4f2874b 100644
--- a/Documentation/bpf/index.rst
+++ b/Documentation/bpf/index.rst
@@ -52,6 +52,7 @@
prog_cgroup_sysctl
prog_flow_dissector
bpf_lsm
+ prog_sk_lookup
Map types
diff --git a/Documentation/bpf/prog_sk_lookup.rst b/Documentation/bpf/prog_sk_lookup.rst
new file mode 100644
index 0000000..85a305c
--- /dev/null
+++ b/Documentation/bpf/prog_sk_lookup.rst
@@ -0,0 +1,98 @@
+.. SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+
+=====================
+BPF sk_lookup program
+=====================
+
+BPF sk_lookup program type (``BPF_PROG_TYPE_SK_LOOKUP``) introduces programmability
+into the socket lookup performed by the transport layer when a packet is to be
+delivered locally.
+
+When invoked BPF sk_lookup program can select a socket that will receive the
+incoming packet by calling the ``bpf_sk_assign()`` BPF helper function.
+
+Hooks for a common attach point (``BPF_SK_LOOKUP``) exist for both TCP and UDP.
+
+Motivation
+==========
+
+BPF sk_lookup program type was introduced to address setup scenarios where
+binding sockets to an address with ``bind()`` socket call is impractical, such
+as:
+
+1. receiving connections on a range of IP addresses, e.g. 192.0.2.0/24, when
+ binding to a wildcard address ``INADRR_ANY`` is not possible due to a port
+ conflict,
+2. receiving connections on all or a wide range of ports, i.e. an L7 proxy use
+ case.
+
+Such setups would require creating and ``bind()``'ing one socket to each of the
+IP address/port in the range, leading to resource consumption and potential
+latency spikes during socket lookup.
+
+Attachment
+==========
+
+BPF sk_lookup program can be attached to a network namespace with
+``bpf(BPF_LINK_CREATE, ...)`` syscall using the ``BPF_SK_LOOKUP`` attach type and a
+netns FD as attachment ``target_fd``.
+
+Multiple programs can be attached to one network namespace. Programs will be
+invoked in the same order as they were attached.
+
+Hooks
+=====
+
+The attached BPF sk_lookup programs run whenever the transport layer needs to
+find a listening (TCP) or an unconnected (UDP) socket for an incoming packet.
+
+Incoming traffic to established (TCP) and connected (UDP) sockets is delivered
+as usual without triggering the BPF sk_lookup hook.
+
+The attached BPF programs must return with either ``SK_PASS`` or ``SK_DROP``
+verdict code. As for other BPF program types that are network filters,
+``SK_PASS`` signifies that the socket lookup should continue on to regular
+hashtable-based lookup, while ``SK_DROP`` causes the transport layer to drop the
+packet.
+
+A BPF sk_lookup program can also select a socket to receive the packet by
+calling ``bpf_sk_assign()`` BPF helper. Typically, the program looks up a socket
+in a map holding sockets, such as ``SOCKMAP`` or ``SOCKHASH``, and passes a
+``struct bpf_sock *`` to ``bpf_sk_assign()`` helper to record the
+selection. Selecting a socket only takes effect if the program has terminated
+with ``SK_PASS`` code.
+
+When multiple programs are attached, the end result is determined from return
+codes of all the programs according to the following rules:
+
+1. If any program returned ``SK_PASS`` and selected a valid socket, the socket
+ is used as the result of the socket lookup.
+2. If more than one program returned ``SK_PASS`` and selected a socket, the last
+ selection takes effect.
+3. If any program returned ``SK_DROP``, and no program returned ``SK_PASS`` and
+ selected a socket, socket lookup fails.
+4. If all programs returned ``SK_PASS`` and none of them selected a socket,
+ socket lookup continues on.
+
+API
+===
+
+In its context, an instance of ``struct bpf_sk_lookup``, BPF sk_lookup program
+receives information about the packet that triggered the socket lookup. Namely:
+
+* IP version (``AF_INET`` or ``AF_INET6``),
+* L4 protocol identifier (``IPPROTO_TCP`` or ``IPPROTO_UDP``),
+* source and destination IP address,
+* source and destination L4 port,
+* the socket that has been selected with ``bpf_sk_assign()``.
+
+Refer to ``struct bpf_sk_lookup`` declaration in ``linux/bpf.h`` user API
+header, and `bpf-helpers(7)
+<https://man7.org/linux/man-pages/man7/bpf-helpers.7.html>`_ man-page section
+for ``bpf_sk_assign()`` for details.
+
+Example
+=======
+
+See ``tools/testing/selftests/bpf/prog_tests/sk_lookup.c`` for the reference
+implementation.
diff --git a/Documentation/devicetree/bindings/net/brcm,bcm7445-switch-v4.0.txt b/Documentation/devicetree/bindings/net/brcm,bcm7445-switch-v4.0.txt
index 88b57b0..97ca62b 100644
--- a/Documentation/devicetree/bindings/net/brcm,bcm7445-switch-v4.0.txt
+++ b/Documentation/devicetree/bindings/net/brcm,bcm7445-switch-v4.0.txt
@@ -50,6 +50,13 @@
- reset-names: If the "reset" property is specified, this property should have
the value "switch" to denote the switch reset line.
+- clocks: when provided, the first phandle is to the switch's main clock and
+ is valid for both BCM7445 and BCM7278. The second phandle is only applicable
+ to BCM7445 and is to support dividing the switch core clock.
+
+- clock-names: when provided, the first phandle must be "sw_switch", and the
+ second must be named "sw_switch_mdiv".
+
Port subnodes:
Optional properties:
diff --git a/Documentation/devicetree/bindings/net/brcm,systemport.txt b/Documentation/devicetree/bindings/net/brcm,systemport.txt
index 83f29e0..7573673 100644
--- a/Documentation/devicetree/bindings/net/brcm,systemport.txt
+++ b/Documentation/devicetree/bindings/net/brcm,systemport.txt
@@ -20,6 +20,11 @@
- systemport,num-tier1-arb: number of tier 1 arbiters, an integer
- systemport,num-txq: number of HW transmit queues, an integer
- systemport,num-rxq: number of HW receive queues, an integer
+- clocks: When provided, must be two phandles to the functional clocks nodes of
+ the SYSTEMPORT block. The first phandle is the main SYSTEMPORT clock used
+ during normal operation, while the second phandle is the Wake-on-LAN clock.
+- clock-names: When provided, names of the functional clock phandles, first
+ name should be "sw_sysport" and second should be "sw_sysportwol".
Example:
ethernet@f04a0000 {
diff --git a/Documentation/devicetree/bindings/net/intel,dwmac-plat.yaml b/Documentation/devicetree/bindings/net/intel,dwmac-plat.yaml
new file mode 100644
index 0000000..fa3ebba
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/intel,dwmac-plat.yaml
@@ -0,0 +1,130 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/net/intel,dwmac-plat.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Intel DWMAC glue layer Device Tree Bindings
+
+maintainers:
+ - Vineetha G. Jaya Kumaran <vineetha.g.jaya.kumaran@intel.com>
+
+select:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - intel,keembay-dwmac
+ required:
+ - compatible
+
+allOf:
+ - $ref: "snps,dwmac.yaml#"
+
+properties:
+ compatible:
+ oneOf:
+ - items:
+ - enum:
+ - intel,keembay-dwmac
+ - const: snps,dwmac-4.10a
+
+ clocks:
+ items:
+ - description: GMAC main clock
+ - description: PTP reference clock
+ - description: Tx clock
+
+ clock-names:
+ items:
+ - const: stmmaceth
+ - const: ptp_ref
+ - const: tx_clk
+
+required:
+ - compatible
+ - clocks
+ - clock-names
+
+examples:
+# FIXME: Remove defines and include the correct header file
+# once it is available in mainline.
+ - |
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+ #include <dt-bindings/interrupt-controller/irq.h>
+ #define MOVISOC_KMB_PSS_GBE
+ #define MOVISOC_KMB_PSS_AUX_GBE_PTP
+ #define MOVISOC_KMB_PSS_AUX_GBE_TX
+
+ stmmac_axi_setup: stmmac-axi-config {
+ snps,lpi_en;
+ snps,wr_osr_lmt = <0x0>;
+ snps,rd_osr_lmt = <0x2>;
+ snps,blen = <0 0 0 0 16 8 4>;
+ };
+
+ mtl_rx_setup: rx-queues-config {
+ snps,rx-queues-to-use = <2>;
+ snps,rx-sched-sp;
+ queue0 {
+ snps,dcb-algorithm;
+ snps,map-to-dma-channel = <0x0>;
+ snps,priority = <0x0>;
+ };
+
+ queue1 {
+ snps,dcb-algorithm;
+ snps,map-to-dma-channel = <0x1>;
+ snps,priority = <0x1>;
+ };
+ };
+
+ mtl_tx_setup: tx-queues-config {
+ snps,tx-queues-to-use = <2>;
+ snps,tx-sched-wrr;
+ queue0 {
+ snps,weight = <0x10>;
+ snps,dcb-algorithm;
+ snps,priority = <0x0>;
+ };
+
+ queue1 {
+ snps,weight = <0x10>;
+ snps,dcb-algorithm;
+ snps,priority = <0x1>;
+ };
+ };
+
+ gmac0: ethernet@3a000000 {
+ compatible = "intel,keembay-dwmac", "snps,dwmac-4.10a";
+ interrupts = <GIC_SPI 119 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "macirq";
+ reg = <0x3a000000 0x8000>;
+ snps,perfect-filter-entries = <128>;
+ phy-handle = <ð_phy0>;
+ phy-mode = "rgmii";
+ rx-fifo-depth = <4096>;
+ tx-fifo-depth = <4096>;
+ clock-names = "stmmaceth", "ptp_ref", "tx_clk";
+ clocks = <&scmi_clk MOVISOC_KMB_PSS_GBE>,
+ <&scmi_clk MOVISOC_KMB_PSS_AUX_GBE_PTP>,
+ <&scmi_clk MOVISOC_KMB_PSS_AUX_GBE_TX>;
+ snps,pbl = <0x4>;
+ snps,axi-config = <&stmmac_axi_setup>;
+ snps,mtl-rx-config = <&mtl_rx_setup>;
+ snps,mtl-tx-config = <&mtl_tx_setup>;
+ snps,tso;
+ status = "okay";
+
+ mdio0 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "snps,dwmac-mdio";
+
+ ethernet-phy@0 {
+ reg = <0>;
+ };
+ };
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/net/ti,dp83822.yaml b/Documentation/devicetree/bindings/net/ti,dp83822.yaml
new file mode 100644
index 0000000..5591353
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/ti,dp83822.yaml
@@ -0,0 +1,80 @@
+# SPDX-License-Identifier: (GPL-2.0+ OR BSD-2-Clause)
+# Copyright (C) 2020 Texas Instruments Incorporated
+%YAML 1.2
+---
+$id: "http://devicetree.org/schemas/net/ti,dp83822.yaml#"
+$schema: "http://devicetree.org/meta-schemas/core.yaml#"
+
+title: TI DP83822 ethernet PHY
+
+maintainers:
+ - Dan Murphy <dmurphy@ti.com>
+
+description: |
+ The DP83822 is a low-power, single-port, 10/100 Mbps Ethernet PHY. It
+ provides all of the physical layer functions needed to transmit and receive
+ data over standard, twisted-pair cables or to connect to an external,
+ fiber-optic transceiver. Additionally, the DP83822 provides flexibility to
+ connect to a MAC through a standard MII, RMII, or RGMII interface
+
+ Specifications about the Ethernet PHY can be found at:
+ http://www.ti.com/lit/ds/symlink/dp83822i.pdf
+
+allOf:
+ - $ref: "ethernet-phy.yaml#"
+
+properties:
+ reg:
+ maxItems: 1
+
+ ti,link-loss-low:
+ type: boolean
+ description: |
+ DP83822 PHY in Fiber mode only.
+ Sets the DP83822 to detect a link drop condition when the signal goes
+ high. If not set then link drop will occur when the signal goes low.
+ This property is only applicable if the fiber mode support is strapped
+ to on.
+
+ ti,fiber-mode:
+ type: boolean
+ description: |
+ DP83822 PHY only.
+ If present the DP83822 PHY is configured to operate in fiber mode
+ Fiber mode support can also be strapped. If the strap pin is not set
+ correctly or not set at all then this boolean can be used to enable it.
+ If the fiber mode is not strapped then signal detection for the PHY
+ is disabled.
+ In fiber mode, auto-negotiation is disabled and the PHY can only work in
+ 100base-fx (full and half duplex) modes.
+
+ rx-internal-delay-ps:
+ description: |
+ DP83822 PHY only.
+ Setting this property to a non-zero number sets the RX internal delay
+ for the PHY. The internal delay for the PHY is fixed to 3.5ns relative
+ to receive data.
+
+ tx-internal-delay-ps:
+ description: |
+ DP83822 PHY only.
+ Setting this property to a non-zero number sets the TX internal delay
+ for the PHY. The internal delay for the PHY is fixed to 3.5ns relative
+ to transmit data.
+
+required:
+ - reg
+
+examples:
+ - |
+ mdio0 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ ethphy0: ethernet-phy@0 {
+ reg = <0>;
+ rx-internal-delay-ps = <1>;
+ tx-internal-delay-ps = <1>;
+ };
+ };
+
+...
diff --git a/Documentation/networking/af_xdp.rst b/Documentation/networking/af_xdp.rst
index 5bc55a4..2ccc564 100644
--- a/Documentation/networking/af_xdp.rst
+++ b/Documentation/networking/af_xdp.rst
@@ -258,14 +258,21 @@
XDP_SHARED_UMEM bind flag
-------------------------
-This flag enables you to bind multiple sockets to the same UMEM, but
-only if they share the same queue id. In this mode, each socket has
-their own RX and TX rings, but the UMEM (tied to the fist socket
-created) only has a single FILL ring and a single COMPLETION
-ring. To use this mode, create the first socket and bind it in the normal
-way. Create a second socket and create an RX and a TX ring, or at
-least one of them, but no FILL or COMPLETION rings as the ones from
-the first socket will be used. In the bind call, set he
+This flag enables you to bind multiple sockets to the same UMEM. It
+works on the same queue id, between queue ids and between
+netdevs/devices. In this mode, each socket has their own RX and TX
+rings as usual, but you are going to have one or more FILL and
+COMPLETION ring pairs. You have to create one of these pairs per
+unique netdev and queue id tuple that you bind to.
+
+Starting with the case were we would like to share a UMEM between
+sockets bound to the same netdev and queue id. The UMEM (tied to the
+fist socket created) will only have a single FILL ring and a single
+COMPLETION ring as there is only on unique netdev,queue_id tuple that
+we have bound to. To use this mode, create the first socket and bind
+it in the normal way. Create a second socket and create an RX and a TX
+ring, or at least one of them, but no FILL or COMPLETION rings as the
+ones from the first socket will be used. In the bind call, set he
XDP_SHARED_UMEM option and provide the initial socket's fd in the
sxdp_shared_umem_fd field. You can attach an arbitrary number of extra
sockets this way.
@@ -305,11 +312,41 @@
libbpf code that protects multiple users at this point in time.
Libbpf uses this mode if you create more than one socket tied to the
-same umem. However, note that you need to supply the
+same UMEM. However, note that you need to supply the
XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD libbpf_flag with the
xsk_socket__create calls and load your own XDP program as there is no
built in one in libbpf that will route the traffic for you.
+The second case is when you share a UMEM between sockets that are
+bound to different queue ids and/or netdevs. In this case you have to
+create one FILL ring and one COMPLETION ring for each unique
+netdev,queue_id pair. Let us say you want to create two sockets bound
+to two different queue ids on the same netdev. Create the first socket
+and bind it in the normal way. Create a second socket and create an RX
+and a TX ring, or at least one of them, and then one FILL and
+COMPLETION ring for this socket. Then in the bind call, set he
+XDP_SHARED_UMEM option and provide the initial socket's fd in the
+sxdp_shared_umem_fd field as you registered the UMEM on that
+socket. These two sockets will now share one and the same UMEM.
+
+There is no need to supply an XDP program like the one in the previous
+case where sockets were bound to the same queue id and
+device. Instead, use the NIC's packet steering capabilities to steer
+the packets to the right queue. In the previous example, there is only
+one queue shared among sockets, so the NIC cannot do this steering. It
+can only steer between queues.
+
+In libbpf, you need to use the xsk_socket__create_shared() API as it
+takes a reference to a FILL ring and a COMPLETION ring that will be
+created for you and bound to the shared UMEM. You can use this
+function for all the sockets you create, or you can use it for the
+second and following ones and use xsk_socket__create() for the first
+one. Both methods yield the same result.
+
+Note that a UMEM can be shared between sockets on the same queue id
+and device, as well as between queues on the same device and between
+devices at the same time.
+
XDP_USE_NEED_WAKEUP bind flag
-----------------------------
@@ -364,7 +401,7 @@
COMPLETION ring are mandatory as you need to have a UMEM tied to your
socket. But if the XDP_SHARED_UMEM flag is used, any socket after the
first one does not have a UMEM and should in that case not have any
-FILL or COMPLETION rings created as the ones from the shared umem will
+FILL or COMPLETION rings created as the ones from the shared UMEM will
be used. Note, that the rings are single-producer single-consumer, so
do not try to access them from multiple processes at the same
time. See the XDP_SHARED_UMEM section.
@@ -567,6 +604,17 @@
switch, or other distribution mechanism, in your NIC to direct
traffic to the correct queue id and socket.
+Q: My packets are sometimes corrupted. What is wrong?
+
+A: Care has to be taken not to feed the same buffer in the UMEM into
+ more than one ring at the same time. If you for example feed the
+ same buffer into the FILL ring and the TX ring at the same time, the
+ NIC might receive data into the buffer at the same time it is
+ sending it. This will cause some packets to become corrupted. Same
+ thing goes for feeding the same buffer into the FILL rings
+ belonging to different queue ids or netdevs bound with the
+ XDP_SHARED_UMEM flag.
+
Credits
=======
diff --git a/Documentation/networking/l2tp.rst b/Documentation/networking/l2tp.rst
index a48238a..498b382 100644
--- a/Documentation/networking/l2tp.rst
+++ b/Documentation/networking/l2tp.rst
@@ -4,124 +4,364 @@
L2TP
====
-This document describes how to use the kernel's L2TP drivers to
-provide L2TP functionality. L2TP is a protocol that tunnels one or
-more sessions over an IP tunnel. It is commonly used for VPNs
-(L2TP/IPSec) and by ISPs to tunnel subscriber PPP sessions over an IP
-network infrastructure. With L2TPv3, it is also useful as a Layer-2
-tunneling infrastructure.
+Layer 2 Tunneling Protocol (L2TP) allows L2 frames to be tunneled over
+an IP network.
-Features
+This document covers the kernel's L2TP subsystem. It documents kernel
+APIs for application developers who want to use the L2TP subsystem and
+it provides some technical details about the internal implementation
+which may be useful to kernel developers and maintainers.
+
+Overview
========
-L2TPv2 (PPP over L2TP (UDP tunnels)).
-L2TPv3 ethernet pseudowires.
-L2TPv3 PPP pseudowires.
-L2TPv3 IP encapsulation.
-Netlink sockets for L2TPv3 configuration management.
+The kernel's L2TP subsystem implements the datapath for L2TPv2 and
+L2TPv3. L2TPv2 is carried over UDP. L2TPv3 is carried over UDP or
+directly over IP (protocol 115).
-History
-=======
+The L2TP RFCs define two basic kinds of L2TP packets: control packets
+(the "control plane"), and data packets (the "data plane"). The kernel
+deals only with data packets. The more complex control packets are
+handled by user space.
-The original pppol2tp driver was introduced in 2.6.23 and provided
-L2TPv2 functionality (rfc2661). L2TPv2 is used to tunnel one or more PPP
-sessions over a UDP tunnel.
+An L2TP tunnel carries one or more L2TP sessions. Each tunnel is
+associated with a socket. Each session is associated with a virtual
+netdevice, e.g. ``pppN``, ``l2tpethN``, through which data frames pass
+to/from L2TP. Fields in the L2TP header identify the tunnel or session
+and whether it is a control or data packet. When tunnels and sessions
+are set up using the Linux kernel API, we're just setting up the L2TP
+data path. All aspects of the control protocol are to be handled by
+user space.
-L2TPv3 (rfc3931) changes the protocol to allow different frame types
-to be passed over an L2TP tunnel by moving the PPP-specific parts of
-the protocol out of the core L2TP packet headers. Each frame type is
-known as a pseudowire type. Ethernet, PPP, HDLC, Frame Relay and ATM
-pseudowires for L2TP are defined in separate RFC standards. Another
-change for L2TPv3 is that it can be carried directly over IP with no
-UDP header (UDP is optional). It is also possible to create static
-unmanaged L2TPv3 tunnels manually without a control protocol
-(userspace daemon) to manage them.
+This split in responsibilities leads to a natural sequence of
+operations when establishing tunnels and sessions. The procedure looks
+like this:
-To support L2TPv3, the original pppol2tp driver was split up to
-separate the L2TP and PPP functionality. Existing L2TPv2 userspace
-apps should be unaffected as the original pppol2tp sockets API is
-retained. L2TPv3, however, uses netlink to manage L2TPv3 tunnels and
-sessions.
+ 1) Create a tunnel socket. Exchange L2TP control protocol messages
+ with the peer over that socket in order to establish a tunnel.
-Design
-======
+ 2) Create a tunnel context in the kernel, using information
+ obtained from the peer using the control protocol messages.
-The L2TP protocol separates control and data frames. The L2TP kernel
-drivers handle only L2TP data frames; control frames are always
-handled by userspace. L2TP control frames carry messages between L2TP
-clients/servers and are used to setup / teardown tunnels and
-sessions. An L2TP client or server is implemented in userspace.
+ 3) Exchange L2TP control protocol messages with the peer over the
+ tunnel socket in order to establish a session.
-Each L2TP tunnel is implemented using a UDP or L2TPIP socket; L2TPIP
-provides L2TPv3 IP encapsulation (no UDP) and is implemented using a
-new l2tpip socket family. The tunnel socket is typically created by
-userspace, though for unmanaged L2TPv3 tunnels, the socket can also be
-created by the kernel. Each L2TP session (pseudowire) gets a network
-interface instance. In the case of PPP, these interfaces are created
-indirectly by pppd using a pppol2tp socket. In the case of ethernet,
-the netdevice is created upon a netlink request to create an L2TPv3
-ethernet pseudowire.
+ 4) Create a session context in the kernel using information
+ obtained from the peer using the control protocol messages.
-For PPP, the PPPoL2TP driver, net/l2tp/l2tp_ppp.c, provides a
-mechanism by which PPP frames carried through an L2TP session are
-passed through the kernel's PPP subsystem. The standard PPP daemon,
-pppd, handles all PPP interaction with the peer. PPP network
-interfaces are created for each local PPP endpoint. The kernel's PPP
-subsystem arranges for PPP control frames to be delivered to pppd,
-while data frames are forwarded as usual.
+L2TP APIs
+=========
-For ethernet, the L2TPETH driver, net/l2tp/l2tp_eth.c, implements a
-netdevice driver, managing virtual ethernet devices, one per
-pseudowire. These interfaces can be managed using standard Linux tools
-such as "ip" and "ifconfig". If only IP frames are passed over the
-tunnel, the interface can be given an IP addresses of itself and its
-peer. If non-IP frames are to be passed over the tunnel, the interface
-can be added to a bridge using brctl. All L2TP datapath protocol
-functions are handled by the L2TP core driver.
+This section documents each userspace API of the L2TP subsystem.
-Each tunnel and session within a tunnel is assigned a unique tunnel_id
-and session_id. These ids are carried in the L2TP header of every
-control and data packet. (Actually, in L2TPv3, the tunnel_id isn't
-present in data frames - it is inferred from the IP connection on
-which the packet was received.) The L2TP driver uses the ids to lookup
-internal tunnel and/or session contexts to determine how to handle the
-packet. Zero tunnel / session ids are treated specially - zero ids are
-never assigned to tunnels or sessions in the network. In the driver,
-the tunnel context keeps a reference to the tunnel UDP or L2TPIP
-socket. The session context holds data that lets the driver interface
-to the kernel's network frame type subsystems, i.e. PPP, ethernet.
+Tunnel Sockets
+--------------
-Userspace Programming
-=====================
+L2TPv2 always uses UDP. L2TPv3 may use UDP or IP encapsulation.
-For L2TPv2, there are a number of requirements on the userspace L2TP
-daemon in order to use the pppol2tp driver.
+To create a tunnel socket for use by L2TP, the standard POSIX
+socket API is used.
-1. Use a UDP socket per tunnel.
+For example, for a tunnel using IPv4 addresses and UDP encapsulation::
-2. Create a single PPPoL2TP socket per tunnel bound to a special null
- session id. This is used only for communicating with the driver but
- must remain open while the tunnel is active. Opening this tunnel
- management socket causes the driver to mark the tunnel socket as an
- L2TP UDP encapsulation socket and flags it for use by the
- referenced tunnel id. This hooks up the UDP receive path via
- udp_encap_rcv() in net/ipv4/udp.c. PPP data frames are never passed
- in this special PPPoX socket.
+ int sockfd = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP);
-3. Create a PPPoL2TP socket per L2TP session. This is typically done
- by starting pppd with the pppol2tp plugin and appropriate
- arguments. A PPPoL2TP tunnel management socket (Step 2) must be
- created before the first PPPoL2TP session socket is created.
+Or for a tunnel using IPv6 addresses and IP encapsulation::
+
+ int sockfd = socket(AF_INET6, SOCK_DGRAM, IPPROTO_L2TP);
+
+UDP socket programming doesn't need to be covered here.
+
+IPPROTO_L2TP is an IP protocol type implemented by the kernel's L2TP
+subsystem. The L2TPIP socket address is defined in struct
+sockaddr_l2tpip and struct sockaddr_l2tpip6 at
+`include/uapi/linux/l2tp.h`_. The address includes the L2TP tunnel
+(connection) id. To use L2TP IP encapsulation, an L2TPv3 application
+should bind the L2TPIP socket using the locally assigned
+tunnel id. When the peer's tunnel id and IP address is known, a
+connect must be done.
+
+If the L2TP application needs to handle L2TPv3 tunnel setup requests
+from peers using L2TPIP, it must open a dedicated L2TPIP
+socket to listen for those requests and bind the socket using tunnel
+id 0 since tunnel setup requests are addressed to tunnel id 0.
+
+An L2TP tunnel and all of its sessions are automatically closed when
+its tunnel socket is closed.
+
+Netlink API
+-----------
+
+L2TP applications use netlink to manage L2TP tunnel and session
+instances in the kernel. The L2TP netlink API is defined in
+`include/uapi/linux/l2tp.h`_.
+
+L2TP uses `Generic Netlink`_ (GENL). Several commands are defined:
+Create, Delete, Modify and Get for tunnel and session
+instances, e.g. ``L2TP_CMD_TUNNEL_CREATE``. The API header lists the
+netlink attribute types that can be used with each command.
+
+Tunnel and session instances are identified by a locally unique
+32-bit id. L2TP tunnel ids are given by ``L2TP_ATTR_CONN_ID`` and
+``L2TP_ATTR_PEER_CONN_ID`` attributes and L2TP session ids are given
+by ``L2TP_ATTR_SESSION_ID`` and ``L2TP_ATTR_PEER_SESSION_ID``
+attributes. If netlink is used to manage L2TPv2 tunnel and session
+instances, the L2TPv2 16-bit tunnel/session id is cast to a 32-bit
+value in these attributes.
+
+In the ``L2TP_CMD_TUNNEL_CREATE`` command, ``L2TP_ATTR_FD`` tells the
+kernel the tunnel socket fd being used. If not specified, the kernel
+creates a kernel socket for the tunnel, using IP parameters set in
+``L2TP_ATTR_IP[6]_SADDR``, ``L2TP_ATTR_IP[6]_DADDR``,
+``L2TP_ATTR_UDP_SPORT``, ``L2TP_ATTR_UDP_DPORT`` attributes. Kernel
+sockets are used to implement unmanaged L2TPv3 tunnels (iproute2's "ip
+l2tp" commands). If ``L2TP_ATTR_FD`` is given, it must be a socket fd
+that is already bound and connected. There is more information about
+unmanaged tunnels later in this document.
+
+``L2TP_CMD_TUNNEL_CREATE`` attributes:-
+
+================== ======== ===
+Attribute Required Use
+================== ======== ===
+CONN_ID Y Sets the tunnel (connection) id.
+PEER_CONN_ID Y Sets the peer tunnel (connection) id.
+PROTO_VERSION Y Protocol version. 2 or 3.
+ENCAP_TYPE Y Encapsulation type: UDP or IP.
+FD N Tunnel socket file descriptor.
+UDP_CSUM N Enable IPv4 UDP checksums. Used only if FD is
+ not set.
+UDP_ZERO_CSUM6_TX N Zero IPv6 UDP checksum on transmit. Used only
+ if FD is not set.
+UDP_ZERO_CSUM6_RX N Zero IPv6 UDP checksum on receive. Used only if
+ FD is not set.
+IP_SADDR N IPv4 source address. Used only if FD is not
+ set.
+IP_DADDR N IPv4 destination address. Used only if FD is
+ not set.
+UDP_SPORT N UDP source port. Used only if FD is not set.
+UDP_DPORT N UDP destination port. Used only if FD is not
+ set.
+IP6_SADDR N IPv6 source address. Used only if FD is not
+ set.
+IP6_DADDR N IPv6 destination address. Used only if FD is
+ not set.
+DEBUG N Debug flags.
+================== ======== ===
+
+``L2TP_CMD_TUNNEL_DESTROY`` attributes:-
+
+================== ======== ===
+Attribute Required Use
+================== ======== ===
+CONN_ID Y Identifies the tunnel id to be destroyed.
+================== ======== ===
+
+``L2TP_CMD_TUNNEL_MODIFY`` attributes:-
+
+================== ======== ===
+Attribute Required Use
+================== ======== ===
+CONN_ID Y Identifies the tunnel id to be modified.
+DEBUG N Debug flags.
+================== ======== ===
+
+``L2TP_CMD_TUNNEL_GET`` attributes:-
+
+================== ======== ===
+Attribute Required Use
+================== ======== ===
+CONN_ID N Identifies the tunnel id to be queried.
+ Ignored in DUMP requests.
+================== ======== ===
+
+``L2TP_CMD_SESSION_CREATE`` attributes:-
+
+================== ======== ===
+Attribute Required Use
+================== ======== ===
+CONN_ID Y The parent tunnel id.
+SESSION_ID Y Sets the session id.
+PEER_SESSION_ID Y Sets the parent session id.
+PW_TYPE Y Sets the pseudowire type.
+DEBUG N Debug flags.
+RECV_SEQ N Enable rx data sequence numbers.
+SEND_SEQ N Enable tx data sequence numbers.
+LNS_MODE N Enable LNS mode (auto-enable data sequence
+ numbers).
+RECV_TIMEOUT N Timeout to wait when reordering received
+ packets.
+L2SPEC_TYPE N Sets layer2-specific-sublayer type (L2TPv3
+ only).
+COOKIE N Sets optional cookie (L2TPv3 only).
+PEER_COOKIE N Sets optional peer cookie (L2TPv3 only).
+IFNAME N Sets interface name (L2TPv3 only).
+================== ======== ===
+
+For Ethernet session types, this will create an l2tpeth virtual
+interface which can then be configured as required. For PPP session
+types, a PPPoL2TP socket must also be opened and connected, mapping it
+onto the new session. This is covered in "PPPoL2TP Sockets" later.
+
+``L2TP_CMD_SESSION_DESTROY`` attributes:-
+
+================== ======== ===
+Attribute Required Use
+================== ======== ===
+CONN_ID Y Identifies the parent tunnel id of the session
+ to be destroyed.
+SESSION_ID Y Identifies the session id to be destroyed.
+IFNAME N Identifies the session by interface name. If
+ set, this overrides any CONN_ID and SESSION_ID
+ attributes. Currently supported for L2TPv3
+ Ethernet sessions only.
+================== ======== ===
+
+``L2TP_CMD_SESSION_MODIFY`` attributes:-
+
+================== ======== ===
+Attribute Required Use
+================== ======== ===
+CONN_ID Y Identifies the parent tunnel id of the session
+ to be modified.
+SESSION_ID Y Identifies the session id to be modified.
+IFNAME N Identifies the session by interface name. If
+ set, this overrides any CONN_ID and SESSION_ID
+ attributes. Currently supported for L2TPv3
+ Ethernet sessions only.
+DEBUG N Debug flags.
+RECV_SEQ N Enable rx data sequence numbers.
+SEND_SEQ N Enable tx data sequence numbers.
+LNS_MODE N Enable LNS mode (auto-enable data sequence
+ numbers).
+RECV_TIMEOUT N Timeout to wait when reordering received
+ packets.
+================== ======== ===
+
+``L2TP_CMD_SESSION_GET`` attributes:-
+
+================== ======== ===
+Attribute Required Use
+================== ======== ===
+CONN_ID N Identifies the tunnel id to be queried.
+ Ignored for DUMP requests.
+SESSION_ID N Identifies the session id to be queried.
+ Ignored for DUMP requests.
+IFNAME N Identifies the session by interface name.
+ If set, this overrides any CONN_ID and
+ SESSION_ID attributes. Ignored for DUMP
+ requests. Currently supported for L2TPv3
+ Ethernet sessions only.
+================== ======== ===
+
+Application developers should refer to `include/uapi/linux/l2tp.h`_ for
+netlink command and attribute definitions.
+
+Sample userspace code using libmnl_:
+
+ - Open L2TP netlink socket::
+
+ struct nl_sock *nl_sock;
+ int l2tp_nl_family_id;
+
+ nl_sock = nl_socket_alloc();
+ genl_connect(nl_sock);
+ genl_id = genl_ctrl_resolve(nl_sock, L2TP_GENL_NAME);
+
+ - Create a tunnel::
+
+ struct nlmsghdr *nlh;
+ struct genlmsghdr *gnlh;
+
+ nlh = mnl_nlmsg_put_header(buf);
+ nlh->nlmsg_type = genl_id; /* assigned to genl socket */
+ nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
+ nlh->nlmsg_seq = seq;
+
+ gnlh = mnl_nlmsg_put_extra_header(nlh, sizeof(*gnlh));
+ gnlh->cmd = L2TP_CMD_TUNNEL_CREATE;
+ gnlh->version = L2TP_GENL_VERSION;
+ gnlh->reserved = 0;
+
+ mnl_attr_put_u32(nlh, L2TP_ATTR_FD, tunl_sock_fd);
+ mnl_attr_put_u32(nlh, L2TP_ATTR_CONN_ID, tid);
+ mnl_attr_put_u32(nlh, L2TP_ATTR_PEER_CONN_ID, peer_tid);
+ mnl_attr_put_u8(nlh, L2TP_ATTR_PROTO_VERSION, protocol_version);
+ mnl_attr_put_u16(nlh, L2TP_ATTR_ENCAP_TYPE, encap);
+
+ - Create a session::
+
+ struct nlmsghdr *nlh;
+ struct genlmsghdr *gnlh;
+
+ nlh = mnl_nlmsg_put_header(buf);
+ nlh->nlmsg_type = genl_id; /* assigned to genl socket */
+ nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
+ nlh->nlmsg_seq = seq;
+
+ gnlh = mnl_nlmsg_put_extra_header(nlh, sizeof(*gnlh));
+ gnlh->cmd = L2TP_CMD_SESSION_CREATE;
+ gnlh->version = L2TP_GENL_VERSION;
+ gnlh->reserved = 0;
+
+ mnl_attr_put_u32(nlh, L2TP_ATTR_CONN_ID, tid);
+ mnl_attr_put_u32(nlh, L2TP_ATTR_PEER_CONN_ID, peer_tid);
+ mnl_attr_put_u32(nlh, L2TP_ATTR_SESSION_ID, sid);
+ mnl_attr_put_u32(nlh, L2TP_ATTR_PEER_SESSION_ID, peer_sid);
+ mnl_attr_put_u16(nlh, L2TP_ATTR_PW_TYPE, pwtype);
+ /* there are other session options which can be set using netlink
+ * attributes during session creation -- see l2tp.h
+ */
+
+ - Delete a session::
+
+ struct nlmsghdr *nlh;
+ struct genlmsghdr *gnlh;
+
+ nlh = mnl_nlmsg_put_header(buf);
+ nlh->nlmsg_type = genl_id; /* assigned to genl socket */
+ nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
+ nlh->nlmsg_seq = seq;
+
+ gnlh = mnl_nlmsg_put_extra_header(nlh, sizeof(*gnlh));
+ gnlh->cmd = L2TP_CMD_SESSION_DELETE;
+ gnlh->version = L2TP_GENL_VERSION;
+ gnlh->reserved = 0;
+
+ mnl_attr_put_u32(nlh, L2TP_ATTR_CONN_ID, tid);
+ mnl_attr_put_u32(nlh, L2TP_ATTR_SESSION_ID, sid);
+
+ - Delete a tunnel and all of its sessions (if any)::
+
+ struct nlmsghdr *nlh;
+ struct genlmsghdr *gnlh;
+
+ nlh = mnl_nlmsg_put_header(buf);
+ nlh->nlmsg_type = genl_id; /* assigned to genl socket */
+ nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
+ nlh->nlmsg_seq = seq;
+
+ gnlh = mnl_nlmsg_put_extra_header(nlh, sizeof(*gnlh));
+ gnlh->cmd = L2TP_CMD_TUNNEL_DELETE;
+ gnlh->version = L2TP_GENL_VERSION;
+ gnlh->reserved = 0;
+
+ mnl_attr_put_u32(nlh, L2TP_ATTR_CONN_ID, tid);
+
+PPPoL2TP Session Socket API
+---------------------------
+
+For PPP session types, a PPPoL2TP socket must be opened and connected
+to the L2TP session.
When creating PPPoL2TP sockets, the application provides information
-to the driver about the socket in a socket connect() call. Source and
-destination tunnel and session ids are provided, as well as the file
-descriptor of a UDP socket. See struct pppol2tp_addr in
-include/linux/if_pppol2tp.h. Note that zero tunnel / session ids are
-treated specially. When creating the per-tunnel PPPoL2TP management
-socket in Step 2 above, zero source and destination session ids are
-specified, which tells the driver to prepare the supplied UDP file
-descriptor for use as an L2TP tunnel socket.
+to the kernel about the tunnel and session in a socket connect()
+call. Source and destination tunnel and session ids are provided, as
+well as the file descriptor of a UDP or L2TPIP socket. See struct
+pppol2tp_addr in `include/linux/if_pppol2tp.h`_. For historical reasons,
+there are unfortunately slightly different address structures for
+L2TPv2/L2TPv3 IPv4/IPv6 tunnels and userspace must use the appropriate
+structure that matches the tunnel socket type.
Userspace may control behavior of the tunnel or session using
setsockopt and ioctl on the PPPoX socket. The following socket
@@ -130,229 +370,308 @@
========= ===========================================================
DEBUG bitmask of debug message categories. See below.
SENDSEQ - 0 => don't send packets with sequence numbers
- - 1 => send packets with sequence numbers
+ - 1 => send packets with sequence numbers
RECVSEQ - 0 => receive packet sequence numbers are optional
- - 1 => drop receive packets without sequence numbers
+ - 1 => drop receive packets without sequence numbers
LNSMODE - 0 => act as LAC.
- - 1 => act as LNS.
+ - 1 => act as LNS.
REORDERTO reorder timeout (in millisecs). If 0, don't try to reorder.
========= ===========================================================
-Only the DEBUG option is supported by the special tunnel management
-PPPoX socket.
-
In addition to the standard PPP ioctls, a PPPIOCGL2TPSTATS is provided
to retrieve tunnel and session statistics from the kernel using the
PPPoX socket of the appropriate tunnel or session.
-For L2TPv3, userspace must use the netlink API defined in
-include/linux/l2tp.h to manage tunnel and session contexts. The
-general procedure to create a new L2TP tunnel with one session is:-
+Sample userspace code:
-1. Open a GENL socket using L2TP_GENL_NAME for configuring the kernel
- using netlink.
+ - Create session PPPoX data socket::
-2. Create a UDP or L2TPIP socket for the tunnel.
+ struct sockaddr_pppol2tp sax;
+ int fd;
-3. Create a new L2TP tunnel using a L2TP_CMD_TUNNEL_CREATE
- request. Set attributes according to desired tunnel parameters,
- referencing the UDP or L2TPIP socket created in the previous step.
+ /* Note, the tunnel socket must be bound already, else it
+ * will not be ready
+ */
+ sax.sa_family = AF_PPPOX;
+ sax.sa_protocol = PX_PROTO_OL2TP;
+ sax.pppol2tp.fd = tunnel_fd;
+ sax.pppol2tp.addr.sin_addr.s_addr = addr->sin_addr.s_addr;
+ sax.pppol2tp.addr.sin_port = addr->sin_port;
+ sax.pppol2tp.addr.sin_family = AF_INET;
+ sax.pppol2tp.s_tunnel = tunnel_id;
+ sax.pppol2tp.s_session = session_id;
+ sax.pppol2tp.d_tunnel = peer_tunnel_id;
+ sax.pppol2tp.d_session = peer_session_id;
-4. Create a new L2TP session in the tunnel using a
- L2TP_CMD_SESSION_CREATE request.
+ /* session_fd is the fd of the session's PPPoL2TP socket.
+ * tunnel_fd is the fd of the tunnel UDP / L2TPIP socket.
+ */
+ fd = connect(session_fd, (struct sockaddr *)&sax, sizeof(sax));
+ if (fd < 0 ) {
+ return -errno;
+ }
+ return 0;
-The tunnel and all of its sessions are closed when the tunnel socket
-is closed. The netlink API may also be used to delete sessions and
-tunnels. Configuration and status info may be set or read using netlink.
+Old L2TPv2-only API
+-------------------
-The L2TP driver also supports static (unmanaged) L2TPv3 tunnels. These
-are where there is no L2TP control message exchange with the peer to
-setup the tunnel; the tunnel is configured manually at each end of the
-tunnel. There is no need for an L2TP userspace application in this
-case -- the tunnel socket is created by the kernel and configured
-using parameters sent in the L2TP_CMD_TUNNEL_CREATE netlink
-request. The "ip" utility of iproute2 has commands for managing static
-L2TPv3 tunnels; do "ip l2tp help" for more information.
+When L2TP was first added to the Linux kernel in 2.6.23, it
+implemented only L2TPv2 and did not include a netlink API. Instead,
+tunnel and session instances in the kernel were managed directly using
+only PPPoL2TP sockets. The PPPoL2TP socket is used as described in
+section "PPPoL2TP Session Socket API" but tunnel and session instances
+are automatically created on a connect() of the socket instead of
+being created by a separate netlink request:
+
+ - Tunnels are managed using a tunnel management socket which is a
+ dedicated PPPoL2TP socket, connected to (invalid) session
+ id 0. The L2TP tunnel instance is created when the PPPoL2TP
+ tunnel management socket is connected and is destroyed when the
+ socket is closed.
+
+ - Session instances are created in the kernel when a PPPoL2TP
+ socket is connected to a non-zero session id. Session parameters
+ are set using setsockopt. The L2TP session instance is destroyed
+ when the socket is closed.
+
+This API is still supported but its use is discouraged. Instead, new
+L2TPv2 applications should use netlink to first create the tunnel and
+session, then create a PPPoL2TP socket for the session.
+
+Unmanaged L2TPv3 tunnels
+------------------------
+
+The kernel L2TP subsystem also supports static (unmanaged) L2TPv3
+tunnels. Unmanaged tunnels have no userspace tunnel socket, and
+exchange no control messages with the peer to set up the tunnel; the
+tunnel is configured manually at each end of the tunnel. All
+configuration is done using netlink. There is no need for an L2TP
+userspace application in this case -- the tunnel socket is created by
+the kernel and configured using parameters sent in the
+``L2TP_CMD_TUNNEL_CREATE`` netlink request. The ``ip`` utility of
+``iproute2`` has commands for managing static L2TPv3 tunnels; do ``ip
+l2tp help`` for more information.
Debugging
-=========
+---------
-The driver supports a flexible debug scheme where kernel trace
-messages may be optionally enabled per tunnel and per session. Care is
-needed when debugging a live system since the messages are not
-rate-limited and a busy system could be swamped. Userspace uses
-setsockopt on the PPPoX socket to set a debug mask.
+The L2TP subsystem offers a range of debugging interfaces through the
+debugfs filesystem.
-The following debug mask bits are available:
+To access these interfaces, the debugfs filesystem must first be mounted::
-================ ==============================
-L2TP_MSG_DEBUG verbose debug (if compiled in)
-L2TP_MSG_CONTROL userspace - kernel interface
-L2TP_MSG_SEQ sequence numbers handling
-L2TP_MSG_DATA data packets
-================ ==============================
+ # mount -t debugfs debugfs /debug
-If enabled, files under a l2tp debugfs directory can be used to dump
-kernel state about L2TP tunnels and sessions. To access it, the
-debugfs filesystem must first be mounted::
+Files under the l2tp directory can then be accessed, providing a summary
+of the current population of tunnel and session contexts existing in the
+kernel::
- # mount -t debugfs debugfs /debug
-
-Files under the l2tp directory can then be accessed::
-
- # cat /debug/l2tp/tunnels
+ # cat /debug/l2tp/tunnels
The debugfs files should not be used by applications to obtain L2TP
state information because the file format is subject to change. It is
implemented to provide extra debug information to help diagnose
-problems.) Users should use the netlink API.
+problems. Applications should instead use the netlink API.
-/proc/net/pppol2tp is also provided for backwards compatibility with
-the original pppol2tp driver. It lists information about L2TPv2
+In addition the L2TP subsystem implements tracepoints using the standard
+kernel event tracing API. The available L2TP events can be reviewed as
+follows::
+
+ # find /debug/tracing/events/l2tp
+
+Finally, /proc/net/pppol2tp is also provided for backwards compatibility
+with the original pppol2tp code. It lists information about L2TPv2
tunnels and sessions only. Its use is discouraged.
-Unmanaged L2TPv3 Tunnels
-========================
-
-Some commercial L2TP products support unmanaged L2TPv3 ethernet
-tunnels, where there is no L2TP control protocol; tunnels are
-configured at each side manually. New commands are available in
-iproute2's ip utility to support this.
-
-To create an L2TPv3 ethernet pseudowire between local host 192.168.1.1
-and peer 192.168.1.2, using IP addresses 10.5.1.1 and 10.5.1.2 for the
-tunnel endpoints::
-
- # ip l2tp add tunnel tunnel_id 1 peer_tunnel_id 1 udp_sport 5000 \
- udp_dport 5000 encap udp local 192.168.1.1 remote 192.168.1.2
- # ip l2tp add session tunnel_id 1 session_id 1 peer_session_id 1
- # ip -s -d show dev l2tpeth0
- # ip addr add 10.5.1.2/32 peer 10.5.1.1/32 dev l2tpeth0
- # ip li set dev l2tpeth0 up
-
-Choose IP addresses to be the address of a local IP interface and that
-of the remote system. The IP addresses of the l2tpeth0 interface can be
-anything suitable.
-
-Repeat the above at the peer, with ports, tunnel/session ids and IP
-addresses reversed. The tunnel and session IDs can be any non-zero
-32-bit number, but the values must be reversed at the peer.
-
-======================== ===================
-Host 1 Host2
-======================== ===================
-udp_sport=5000 udp_sport=5001
-udp_dport=5001 udp_dport=5000
-tunnel_id=42 tunnel_id=45
-peer_tunnel_id=45 peer_tunnel_id=42
-session_id=128 session_id=5196755
-peer_session_id=5196755 peer_session_id=128
-======================== ===================
-
-When done at both ends of the tunnel, it should be possible to send
-data over the network. e.g.::
-
- # ping 10.5.1.1
-
-
-Sample Userspace Code
-=====================
-
-1. Create tunnel management PPPoX socket::
-
- kernel_fd = socket(AF_PPPOX, SOCK_DGRAM, PX_PROTO_OL2TP);
- if (kernel_fd >= 0) {
- struct sockaddr_pppol2tp sax;
- struct sockaddr_in const *peer_addr;
-
- peer_addr = l2tp_tunnel_get_peer_addr(tunnel);
- memset(&sax, 0, sizeof(sax));
- sax.sa_family = AF_PPPOX;
- sax.sa_protocol = PX_PROTO_OL2TP;
- sax.pppol2tp.fd = udp_fd; /* fd of tunnel UDP socket */
- sax.pppol2tp.addr.sin_addr.s_addr = peer_addr->sin_addr.s_addr;
- sax.pppol2tp.addr.sin_port = peer_addr->sin_port;
- sax.pppol2tp.addr.sin_family = AF_INET;
- sax.pppol2tp.s_tunnel = tunnel_id;
- sax.pppol2tp.s_session = 0; /* special case: mgmt socket */
- sax.pppol2tp.d_tunnel = 0;
- sax.pppol2tp.d_session = 0; /* special case: mgmt socket */
-
- if(connect(kernel_fd, (struct sockaddr *)&sax, sizeof(sax) ) < 0 ) {
- perror("connect failed");
- result = -errno;
- goto err;
- }
- }
-
-2. Create session PPPoX data socket::
-
- struct sockaddr_pppol2tp sax;
- int fd;
-
- /* Note, the target socket must be bound already, else it will not be ready */
- sax.sa_family = AF_PPPOX;
- sax.sa_protocol = PX_PROTO_OL2TP;
- sax.pppol2tp.fd = tunnel_fd;
- sax.pppol2tp.addr.sin_addr.s_addr = addr->sin_addr.s_addr;
- sax.pppol2tp.addr.sin_port = addr->sin_port;
- sax.pppol2tp.addr.sin_family = AF_INET;
- sax.pppol2tp.s_tunnel = tunnel_id;
- sax.pppol2tp.s_session = session_id;
- sax.pppol2tp.d_tunnel = peer_tunnel_id;
- sax.pppol2tp.d_session = peer_session_id;
-
- /* session_fd is the fd of the session's PPPoL2TP socket.
- * tunnel_fd is the fd of the tunnel UDP socket.
- */
- fd = connect(session_fd, (struct sockaddr *)&sax, sizeof(sax));
- if (fd < 0 ) {
- return -errno;
- }
- return 0;
-
Internal Implementation
=======================
-The driver keeps a struct l2tp_tunnel context per L2TP tunnel and a
-struct l2tp_session context for each session. The l2tp_tunnel is
-always associated with a UDP or L2TP/IP socket and keeps a list of
-sessions in the tunnel. The l2tp_session context keeps kernel state
-about the session. It has private data which is used for data specific
-to the session type. With L2TPv2, the session always carried PPP
-traffic. With L2TPv3, the session can also carry ethernet frames
-(ethernet pseudowire) or other data types such as ATM, HDLC or Frame
-Relay.
+This section is for kernel developers and maintainers.
-When a tunnel is first opened, the reference count on the socket is
-increased using sock_hold(). This ensures that the kernel socket
-cannot be removed while L2TP's data structures reference it.
+Sockets
+-------
-Some L2TP sessions also have a socket (PPP pseudowires) while others
-do not (ethernet pseudowires). We can't use the socket reference count
-as the reference count for session contexts. The L2TP implementation
-therefore has its own internal reference counts on the session
-contexts.
+UDP sockets are implemented by the networking core. When an L2TP
+tunnel is created using a UDP socket, the socket is set up as an
+encapsulated UDP socket by setting encap_rcv and encap_destroy
+callbacks on the UDP socket. l2tp_udp_encap_recv is called when
+packets are received on the socket. l2tp_udp_encap_destroy is called
+when userspace closes the socket.
-To Do
-=====
+L2TPIP sockets are implemented in `net/l2tp/l2tp_ip.c`_ and
+`net/l2tp/l2tp_ip6.c`_.
-Add L2TP tunnel switching support. This would route tunneled traffic
-from one L2TP tunnel into another. Specified in
-http://tools.ietf.org/html/draft-ietf-l2tpext-tunnel-switching-08
+Tunnels
+-------
-Add L2TPv3 VLAN pseudowire support.
+The kernel keeps a struct l2tp_tunnel context per L2TP tunnel. The
+l2tp_tunnel is always associated with a UDP or L2TP/IP socket and
+keeps a list of sessions in the tunnel. When a tunnel is first
+registered with L2TP core, the reference count on the socket is
+increased. This ensures that the socket cannot be removed while L2TP's
+data structures reference it.
-Add L2TPv3 IP pseudowire support.
+Tunnels are identified by a unique tunnel id. The id is 16-bit for
+L2TPv2 and 32-bit for L2TPv3. Internally, the id is stored as a 32-bit
+value.
-Add L2TPv3 ATM pseudowire support.
+Tunnels are kept in a per-net list, indexed by tunnel id. The tunnel
+id namespace is shared by L2TPv2 and L2TPv3. The tunnel context can be
+derived from the socket's sk_user_data.
+
+Handling tunnel socket close is perhaps the most tricky part of the
+L2TP implementation. If userspace closes a tunnel socket, the L2TP
+tunnel and all of its sessions must be closed and destroyed. Since the
+tunnel context holds a ref on the tunnel socket, the socket's
+sk_destruct won't be called until the tunnel sock_put's its
+socket. For UDP sockets, when userspace closes the tunnel socket, the
+socket's encap_destroy handler is invoked, which L2TP uses to initiate
+its tunnel close actions. For L2TPIP sockets, the socket's close
+handler initiates the same tunnel close actions. All sessions are
+first closed. Each session drops its tunnel ref. When the tunnel ref
+reaches zero, the tunnel puts its socket ref. When the socket is
+eventually destroyed, it's sk_destruct finally frees the L2TP tunnel
+context.
+
+Sessions
+--------
+
+The kernel keeps a struct l2tp_session context for each session. Each
+session has private data which is used for data specific to the
+session type. With L2TPv2, the session always carries PPP
+traffic. With L2TPv3, the session can carry Ethernet frames (Ethernet
+pseudowire) or other data types such as PPP, ATM, HDLC or Frame
+Relay. Linux currently implements only Ethernet and PPP session types.
+
+Some L2TP session types also have a socket (PPP pseudowires) while
+others do not (Ethernet pseudowires). We can't therefore use the
+socket reference count as the reference count for session
+contexts. The L2TP implementation therefore has its own internal
+reference counts on the session contexts.
+
+Like tunnels, L2TP sessions are identified by a unique
+session id. Just as with tunnel ids, the session id is 16-bit for
+L2TPv2 and 32-bit for L2TPv3. Internally, the id is stored as a 32-bit
+value.
+
+Sessions hold a ref on their parent tunnel to ensure that the tunnel
+stays extant while one or more sessions references it.
+
+Sessions are kept in a per-tunnel list, indexed by session id. L2TPv3
+sessions are also kept in a per-net list indexed by session id,
+because L2TPv3 session ids are unique across all tunnels and L2TPv3
+data packets do not contain a tunnel id in the header. This list is
+therefore needed to find the session context associated with a
+received data packet when the tunnel context cannot be derived from
+the tunnel socket.
+
+Although the L2TPv3 RFC specifies that L2TPv3 session ids are not
+scoped by the tunnel, the kernel does not police this for L2TPv3 UDP
+tunnels and does not add sessions of L2TPv3 UDP tunnels into the
+per-net session list. In the UDP receive code, we must trust that the
+tunnel can be identified using the tunnel socket's sk_user_data and
+lookup the session in the tunnel's session list instead of the per-net
+session list.
+
+PPP
+---
+
+`net/l2tp/l2tp_ppp.c`_ implements the PPPoL2TP socket family. Each PPP
+session has a PPPoL2TP socket.
+
+The PPPoL2TP socket's sk_user_data references the l2tp_session.
+
+Userspace sends and receives PPP packets over L2TP using a PPPoL2TP
+socket. Only PPP control frames pass over this socket: PPP data
+packets are handled entirely by the kernel, passing between the L2TP
+session and its associated ``pppN`` netdev through the PPP channel
+interface of the kernel PPP subsystem.
+
+The L2TP PPP implementation handles the closing of a PPPoL2TP socket
+by closing its corresponding L2TP session. This is complicated because
+it must consider racing with netlink session create/destroy requests
+and pppol2tp_connect trying to reconnect with a session that is in the
+process of being closed. Unlike tunnels, PPP sessions do not hold a
+ref on their associated socket, so code must be careful to sock_hold
+the socket where necessary. For all the details, see commit
+3d609342cc04129ff7568e19316ce3d7451a27e8.
+
+Ethernet
+--------
+
+`net/l2tp/l2tp_eth.c`_ implements L2TPv3 Ethernet pseudowires. It
+manages a netdev for each session.
+
+L2TP Ethernet sessions are created and destroyed by netlink request,
+or are destroyed when the tunnel is destroyed. Unlike PPP sessions,
+Ethernet sessions do not have an associated socket.
Miscellaneous
=============
-The L2TP drivers were developed as part of the OpenL2TP project by
-Katalix Systems Ltd. OpenL2TP is a full-featured L2TP client / server,
-designed from the ground up to have the L2TP datapath in the
-kernel. The project also implemented the pppol2tp plugin for pppd
-which allows pppd to use the kernel driver. Details can be found at
-http://www.openl2tp.org.
+RFCs
+----
+
+The kernel code implements the datapath features specified in the
+following RFCs:
+
+======= =============== ===================================
+RFC2661 L2TPv2 https://tools.ietf.org/html/rfc2661
+RFC3931 L2TPv3 https://tools.ietf.org/html/rfc3931
+RFC4719 L2TPv3 Ethernet https://tools.ietf.org/html/rfc4719
+======= =============== ===================================
+
+Implementations
+---------------
+
+A number of open source applications use the L2TP kernel subsystem:
+
+============ ==============================================
+iproute2 https://github.com/shemminger/iproute2
+go-l2tp https://github.com/katalix/go-l2tp
+tunneldigger https://github.com/wlanslovenija/tunneldigger
+xl2tpd https://github.com/xelerance/xl2tpd
+============ ==============================================
+
+Limitations
+-----------
+
+The current implementation has a number of limitations:
+
+ 1) Multiple UDP sockets with the same 5-tuple address cannot be
+ used. The kernel's tunnel context is identified using private
+ data associated with the socket so it is important that each
+ socket is uniquely identified by its address.
+
+ 2) Interfacing with openvswitch is not yet implemented. It may be
+ useful to map OVS Ethernet and VLAN ports into L2TPv3 tunnels.
+
+ 3) VLAN pseudowires are implemented using an ``l2tpethN`` interface
+ configured with a VLAN sub-interface. Since L2TPv3 VLAN
+ pseudowires carry one and only one VLAN, it may be better to use
+ a single netdevice rather than an ``l2tpethN`` and ``l2tpethN``:M
+ pair per VLAN session. The netlink attribute
+ ``L2TP_ATTR_VLAN_ID`` was added for this, but it was never
+ implemented.
+
+Testing
+-------
+
+Unmanaged L2TPv3 Ethernet features are tested by the kernel's built-in
+selftests. See `tools/testing/selftests/net/l2tp.sh`_.
+
+Another test suite, l2tp-ktest_, covers all
+of the L2TP APIs and tunnel/session types. This may be integrated into
+the kernel's built-in L2TP selftests in the future.
+
+.. Links
+.. _Generic Netlink: generic_netlink.html
+.. _libmnl: https://www.netfilter.org/projects/libmnl
+.. _include/uapi/linux/l2tp.h: ../../../include/uapi/linux/l2tp.h
+.. _include/linux/if_pppol2tp.h: ../../../include/linux/if_pppol2tp.h
+.. _net/l2tp/l2tp_ip.c: ../../../net/l2tp/l2tp_ip.c
+.. _net/l2tp/l2tp_ip6.c: ../../../net/l2tp/l2tp_ip6.c
+.. _net/l2tp/l2tp_ppp.c: ../../../net/l2tp/l2tp_ppp.c
+.. _net/l2tp/l2tp_eth.c: ../../../net/l2tp/l2tp_eth.c
+.. _tools/testing/selftests/net/l2tp.sh: ../../../tools/testing/selftests/net/l2tp.sh
+.. _l2tp-ktest: https://github.com/katalix/l2tp-ktest
diff --git a/MAINTAINERS b/MAINTAINERS
index dca9bfd..cd4ce79 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1286,7 +1286,7 @@
F: Documentation/devicetree/bindings/net/apm-xgene-enet.txt
F: Documentation/devicetree/bindings/net/apm-xgene-mdio.txt
F: drivers/net/ethernet/apm/xgene/
-F: drivers/net/phy/mdio-xgene.c
+F: drivers/net/mdio/mdio-xgene.c
APPLIED MICRO (APM) X-GENE SOC PMU
M: Khuong Dinh <khuong@os.amperecomputing.com>
@@ -4709,6 +4709,15 @@
W: http://www.chelsio.com
F: drivers/crypto/chelsio
+CXGB4 INLINE CRYPTO DRIVER
+M: Ayush Sawal <ayush.sawal@chelsio.com>
+M: Vinay Kumar Yadav <vinay.yadav@chelsio.com>
+M: Rohit Maheshwari <rohitm@chelsio.com>
+L: netdev@vger.kernel.org
+S: Supported
+W: http://www.chelsio.com
+F: drivers/net/ethernet/chelsio/inline_crypto/
+
CXGB4 ETHERNET DRIVER (CXGB4)
M: Vishal Kulkarni <vishal@chelsio.com>
L: netdev@vger.kernel.org
@@ -6521,11 +6530,14 @@
F: Documentation/devicetree/bindings/net/mdio*
F: Documentation/devicetree/bindings/net/qca,ar803x.yaml
F: Documentation/networking/phy.rst
+F: drivers/net/mdio/
+F: drivers/net/pcs/
F: drivers/net/phy/
F: drivers/of/of_mdio.c
F: drivers/of/of_net.c
F: include/dt-bindings/net/qca-ar803x.h
F: include/linux/*mdio*.h
+F: include/linux/mdio/*.h
F: include/linux/of_net.h
F: include/linux/phy.h
F: include/linux/phy_fixed.h
@@ -10298,6 +10310,13 @@
W: http://linux-test-project.github.io/
T: git git://github.com/linux-test-project/ltp.git
+LYNX PCS MODULE
+M: Ioana Ciornei <ioana.ciornei@nxp.com>
+L: netdev@vger.kernel.org
+S: Supported
+F: drivers/net/phy/pcs-lynx.c
+F: include/linux/pcs-lynx.h
+
M68K ARCHITECTURE
M: Geert Uytterhoeven <geert@linux-m68k.org>
L: linux-m68k@lists.linux-m68k.org
@@ -10505,7 +10524,7 @@
L: netdev@vger.kernel.org
S: Maintained
F: Documentation/devicetree/bindings/net/marvell,mvusb.yaml
-F: drivers/net/phy/mdio-mvusb.c
+F: drivers/net/mdio/mdio-mvusb.c
MARVELL XENON MMC/SD/SDIO HOST CONTROLLER DRIVER
M: Hu Ziji <huziji@marvell.com>
@@ -15660,6 +15679,7 @@
S: Maintained
F: drivers/net/phy/phylink.c
F: drivers/net/phy/sfp*
+F: include/linux/mdio/mdio-i2c.h
F: include/linux/phylink.h
F: include/linux/sfp.h
K: phylink\.h|struct\s+phylink|\.phylink|>phylink_|phylink_(autoneg|clear|connect|create|destroy|disconnect|ethtool|helper|mac|mii|of|set|start|stop|test|validate)
@@ -16744,8 +16764,8 @@
M: Jose Abreu <Jose.Abreu@synopsys.com>
L: netdev@vger.kernel.org
S: Supported
-F: drivers/net/phy/mdio-xpcs.c
-F: include/linux/mdio-xpcs.h
+F: drivers/net/pcs/pcs-xpcs.c
+F: include/linux/pcs/pcs-xpcs.h
SYNOPSYS DESIGNWARE I2C DRIVER
M: Jarkko Nikula <jarkko.nikula@linux.intel.com>
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 42b6709..7d9ea7b 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -1379,10 +1379,15 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
u8 *prog = *pprog;
int cnt = 0;
- if (emit_call(&prog, __bpf_prog_enter, prog))
- return -EINVAL;
- /* remember prog start time returned by __bpf_prog_enter */
- emit_mov_reg(&prog, true, BPF_REG_6, BPF_REG_0);
+ if (p->aux->sleepable) {
+ if (emit_call(&prog, __bpf_prog_enter_sleepable, prog))
+ return -EINVAL;
+ } else {
+ if (emit_call(&prog, __bpf_prog_enter, prog))
+ return -EINVAL;
+ /* remember prog start time returned by __bpf_prog_enter */
+ emit_mov_reg(&prog, true, BPF_REG_6, BPF_REG_0);
+ }
/* arg1: lea rdi, [rbp - stack_size] */
EMIT4(0x48, 0x8D, 0x7D, -stack_size);
@@ -1402,13 +1407,18 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
if (mod_ret)
emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -8);
- /* arg1: mov rdi, progs[i] */
- emit_mov_imm64(&prog, BPF_REG_1, (long) p >> 32,
- (u32) (long) p);
- /* arg2: mov rsi, rbx <- start time in nsec */
- emit_mov_reg(&prog, true, BPF_REG_2, BPF_REG_6);
- if (emit_call(&prog, __bpf_prog_exit, prog))
- return -EINVAL;
+ if (p->aux->sleepable) {
+ if (emit_call(&prog, __bpf_prog_exit_sleepable, prog))
+ return -EINVAL;
+ } else {
+ /* arg1: mov rdi, progs[i] */
+ emit_mov_imm64(&prog, BPF_REG_1, (long) p >> 32,
+ (u32) (long) p);
+ /* arg2: mov rsi, rbx <- start time in nsec */
+ emit_mov_reg(&prog, true, BPF_REG_2, BPF_REG_6);
+ if (emit_call(&prog, __bpf_prog_exit, prog))
+ return -EINVAL;
+ }
*pprog = prog;
return 0;
diff --git a/drivers/crypto/chelsio/Kconfig b/drivers/crypto/chelsio/Kconfig
index 2984fdf..af161da 100644
--- a/drivers/crypto/chelsio/Kconfig
+++ b/drivers/crypto/chelsio/Kconfig
@@ -22,27 +22,6 @@
To compile this driver as a module, choose M here: the module
will be called chcr.
-config CHELSIO_IPSEC_INLINE
- bool "Chelsio IPSec XFRM Tx crypto offload"
- depends on CHELSIO_T4
- depends on CRYPTO_DEV_CHELSIO
- depends on XFRM_OFFLOAD
- depends on INET_ESP_OFFLOAD || INET6_ESP_OFFLOAD
- default n
- help
- Enable support for IPSec Tx Inline.
-
-config CRYPTO_DEV_CHELSIO_TLS
- tristate "Chelsio Crypto Inline TLS Driver"
- depends on CHELSIO_T4
- depends on TLS_TOE
- select CRYPTO_DEV_CHELSIO
- help
- Support Chelsio Inline TLS with Chelsio crypto accelerator.
-
- To compile this driver as a module, choose M here: the module
- will be called chtls.
-
config CHELSIO_TLS_DEVICE
bool "Chelsio Inline KTLS Offload"
depends on CHELSIO_T4
diff --git a/drivers/crypto/chelsio/Makefile b/drivers/crypto/chelsio/Makefile
index 0e9d0359..f2e8e2f 100644
--- a/drivers/crypto/chelsio/Makefile
+++ b/drivers/crypto/chelsio/Makefile
@@ -6,5 +6,3 @@
#ifdef CONFIG_CHELSIO_TLS_DEVICE
chcr-objs += chcr_ktls.o
#endif
-chcr-$(CONFIG_CHELSIO_IPSEC_INLINE) += chcr_ipsec.o
-obj-$(CONFIG_CRYPTO_DEV_CHELSIO_TLS) += chtls/
diff --git a/drivers/crypto/chelsio/chcr_algo.h b/drivers/crypto/chelsio/chcr_algo.h
index d4f6e01..507aafe 100644
--- a/drivers/crypto/chelsio/chcr_algo.h
+++ b/drivers/crypto/chelsio/chcr_algo.h
@@ -86,39 +86,6 @@
KEY_CONTEXT_OPAD_PRESENT_M)
#define KEY_CONTEXT_OPAD_PRESENT_F KEY_CONTEXT_OPAD_PRESENT_V(1U)
-#define TLS_KEYCTX_RXFLIT_CNT_S 24
-#define TLS_KEYCTX_RXFLIT_CNT_V(x) ((x) << TLS_KEYCTX_RXFLIT_CNT_S)
-
-#define TLS_KEYCTX_RXPROT_VER_S 20
-#define TLS_KEYCTX_RXPROT_VER_M 0xf
-#define TLS_KEYCTX_RXPROT_VER_V(x) ((x) << TLS_KEYCTX_RXPROT_VER_S)
-
-#define TLS_KEYCTX_RXCIPH_MODE_S 16
-#define TLS_KEYCTX_RXCIPH_MODE_M 0xf
-#define TLS_KEYCTX_RXCIPH_MODE_V(x) ((x) << TLS_KEYCTX_RXCIPH_MODE_S)
-
-#define TLS_KEYCTX_RXAUTH_MODE_S 12
-#define TLS_KEYCTX_RXAUTH_MODE_M 0xf
-#define TLS_KEYCTX_RXAUTH_MODE_V(x) ((x) << TLS_KEYCTX_RXAUTH_MODE_S)
-
-#define TLS_KEYCTX_RXCIAU_CTRL_S 11
-#define TLS_KEYCTX_RXCIAU_CTRL_V(x) ((x) << TLS_KEYCTX_RXCIAU_CTRL_S)
-
-#define TLS_KEYCTX_RX_SEQCTR_S 9
-#define TLS_KEYCTX_RX_SEQCTR_M 0x3
-#define TLS_KEYCTX_RX_SEQCTR_V(x) ((x) << TLS_KEYCTX_RX_SEQCTR_S)
-
-#define TLS_KEYCTX_RX_VALID_S 8
-#define TLS_KEYCTX_RX_VALID_V(x) ((x) << TLS_KEYCTX_RX_VALID_S)
-
-#define TLS_KEYCTX_RXCK_SIZE_S 3
-#define TLS_KEYCTX_RXCK_SIZE_M 0x7
-#define TLS_KEYCTX_RXCK_SIZE_V(x) ((x) << TLS_KEYCTX_RXCK_SIZE_S)
-
-#define TLS_KEYCTX_RXMK_SIZE_S 0
-#define TLS_KEYCTX_RXMK_SIZE_M 0x7
-#define TLS_KEYCTX_RXMK_SIZE_V(x) ((x) << TLS_KEYCTX_RXMK_SIZE_S)
-
#define CHCR_HASH_MAX_DIGEST_SIZE 64
#define CHCR_MAX_SHA_DIGEST_SIZE 64
diff --git a/drivers/crypto/chelsio/chcr_core.c b/drivers/crypto/chelsio/chcr_core.c
index bd8dac8..b3570b4 100644
--- a/drivers/crypto/chelsio/chcr_core.c
+++ b/drivers/crypto/chelsio/chcr_core.c
@@ -40,10 +40,6 @@ static const struct tlsdev_ops chcr_ktls_ops = {
};
#endif
-#ifdef CONFIG_CHELSIO_IPSEC_INLINE
-static void update_netdev_features(void);
-#endif /* CONFIG_CHELSIO_IPSEC_INLINE */
-
static chcr_handler_func work_handlers[NUM_CPL_CMDS] = {
[CPL_FW6_PLD] = cpl_fw6_pld_handler,
#ifdef CONFIG_CHELSIO_TLS_DEVICE
@@ -60,10 +56,8 @@ static struct cxgb4_uld_info chcr_uld_info = {
.add = chcr_uld_add,
.state_change = chcr_uld_state_change,
.rx_handler = chcr_uld_rx_handler,
-#if defined(CONFIG_CHELSIO_IPSEC_INLINE) || defined(CONFIG_CHELSIO_TLS_DEVICE)
- .tx_handler = chcr_uld_tx_handler,
-#endif /* CONFIG_CHELSIO_IPSEC_INLINE || CONFIG_CHELSIO_TLS_DEVICE */
#if defined(CONFIG_CHELSIO_TLS_DEVICE)
+ .tx_handler = chcr_uld_tx_handler,
.tlsdev_ops = &chcr_ktls_ops,
#endif
};
@@ -241,19 +235,11 @@ int chcr_uld_rx_handler(void *handle, const __be64 *rsp,
return 0;
}
-#if defined(CONFIG_CHELSIO_IPSEC_INLINE) || defined(CONFIG_CHELSIO_TLS_DEVICE)
+#if defined(CONFIG_CHELSIO_TLS_DEVICE)
int chcr_uld_tx_handler(struct sk_buff *skb, struct net_device *dev)
{
- /* In case if skb's decrypted bit is set, it's nic tls packet, else it's
- * ipsec packet.
- */
-#ifdef CONFIG_CHELSIO_TLS_DEVICE
if (skb->decrypted)
return chcr_ktls_xmit(skb, dev);
-#endif
-#ifdef CONFIG_CHELSIO_IPSEC_INLINE
- return chcr_ipsec_xmit(skb, dev);
-#endif
return 0;
}
#endif /* CONFIG_CHELSIO_IPSEC_INLINE || CONFIG_CHELSIO_TLS_DEVICE */
@@ -305,24 +291,6 @@ static int chcr_uld_state_change(void *handle, enum cxgb4_state state)
return ret;
}
-#ifdef CONFIG_CHELSIO_IPSEC_INLINE
-static void update_netdev_features(void)
-{
- struct uld_ctx *u_ctx, *tmp;
-
- mutex_lock(&drv_data.drv_mutex);
- list_for_each_entry_safe(u_ctx, tmp, &drv_data.inact_dev, entry) {
- if (u_ctx->lldi.crypto & ULP_CRYPTO_IPSEC_INLINE)
- chcr_add_xfrmops(&u_ctx->lldi);
- }
- list_for_each_entry_safe(u_ctx, tmp, &drv_data.act_dev, entry) {
- if (u_ctx->lldi.crypto & ULP_CRYPTO_IPSEC_INLINE)
- chcr_add_xfrmops(&u_ctx->lldi);
- }
- mutex_unlock(&drv_data.drv_mutex);
-}
-#endif /* CONFIG_CHELSIO_IPSEC_INLINE */
-
static int __init chcr_crypto_init(void)
{
INIT_LIST_HEAD(&drv_data.act_dev);
@@ -332,12 +300,6 @@ static int __init chcr_crypto_init(void)
drv_data.last_dev = NULL;
cxgb4_register_uld(CXGB4_ULD_CRYPTO, &chcr_uld_info);
- #ifdef CONFIG_CHELSIO_IPSEC_INLINE
- rtnl_lock();
- update_netdev_features();
- rtnl_unlock();
- #endif /* CONFIG_CHELSIO_IPSEC_INLINE */
-
return 0;
}
diff --git a/drivers/crypto/chelsio/chcr_core.h b/drivers/crypto/chelsio/chcr_core.h
index 67d77ab..81f6e61 100644
--- a/drivers/crypto/chelsio/chcr_core.h
+++ b/drivers/crypto/chelsio/chcr_core.h
@@ -72,54 +72,6 @@ struct _key_ctx {
unsigned char key[];
};
-#define KEYCTX_TX_WR_IV_S 55
-#define KEYCTX_TX_WR_IV_M 0x1ffULL
-#define KEYCTX_TX_WR_IV_V(x) ((x) << KEYCTX_TX_WR_IV_S)
-#define KEYCTX_TX_WR_IV_G(x) \
- (((x) >> KEYCTX_TX_WR_IV_S) & KEYCTX_TX_WR_IV_M)
-
-#define KEYCTX_TX_WR_AAD_S 47
-#define KEYCTX_TX_WR_AAD_M 0xffULL
-#define KEYCTX_TX_WR_AAD_V(x) ((x) << KEYCTX_TX_WR_AAD_S)
-#define KEYCTX_TX_WR_AAD_G(x) (((x) >> KEYCTX_TX_WR_AAD_S) & \
- KEYCTX_TX_WR_AAD_M)
-
-#define KEYCTX_TX_WR_AADST_S 39
-#define KEYCTX_TX_WR_AADST_M 0xffULL
-#define KEYCTX_TX_WR_AADST_V(x) ((x) << KEYCTX_TX_WR_AADST_S)
-#define KEYCTX_TX_WR_AADST_G(x) \
- (((x) >> KEYCTX_TX_WR_AADST_S) & KEYCTX_TX_WR_AADST_M)
-
-#define KEYCTX_TX_WR_CIPHER_S 30
-#define KEYCTX_TX_WR_CIPHER_M 0x1ffULL
-#define KEYCTX_TX_WR_CIPHER_V(x) ((x) << KEYCTX_TX_WR_CIPHER_S)
-#define KEYCTX_TX_WR_CIPHER_G(x) \
- (((x) >> KEYCTX_TX_WR_CIPHER_S) & KEYCTX_TX_WR_CIPHER_M)
-
-#define KEYCTX_TX_WR_CIPHERST_S 23
-#define KEYCTX_TX_WR_CIPHERST_M 0x7f
-#define KEYCTX_TX_WR_CIPHERST_V(x) ((x) << KEYCTX_TX_WR_CIPHERST_S)
-#define KEYCTX_TX_WR_CIPHERST_G(x) \
- (((x) >> KEYCTX_TX_WR_CIPHERST_S) & KEYCTX_TX_WR_CIPHERST_M)
-
-#define KEYCTX_TX_WR_AUTH_S 14
-#define KEYCTX_TX_WR_AUTH_M 0x1ff
-#define KEYCTX_TX_WR_AUTH_V(x) ((x) << KEYCTX_TX_WR_AUTH_S)
-#define KEYCTX_TX_WR_AUTH_G(x) \
- (((x) >> KEYCTX_TX_WR_AUTH_S) & KEYCTX_TX_WR_AUTH_M)
-
-#define KEYCTX_TX_WR_AUTHST_S 7
-#define KEYCTX_TX_WR_AUTHST_M 0x7f
-#define KEYCTX_TX_WR_AUTHST_V(x) ((x) << KEYCTX_TX_WR_AUTHST_S)
-#define KEYCTX_TX_WR_AUTHST_G(x) \
- (((x) >> KEYCTX_TX_WR_AUTHST_S) & KEYCTX_TX_WR_AUTHST_M)
-
-#define KEYCTX_TX_WR_AUTHIN_S 0
-#define KEYCTX_TX_WR_AUTHIN_M 0x7f
-#define KEYCTX_TX_WR_AUTHIN_V(x) ((x) << KEYCTX_TX_WR_AUTHIN_S)
-#define KEYCTX_TX_WR_AUTHIN_G(x) \
- (((x) >> KEYCTX_TX_WR_AUTHIN_S) & KEYCTX_TX_WR_AUTHIN_M)
-
#define WQ_RETRY 5
struct chcr_driver_data {
struct list_head act_dev;
@@ -157,42 +109,6 @@ struct uld_ctx {
struct chcr_dev dev;
};
-struct sge_opaque_hdr {
- void *dev;
- dma_addr_t addr[MAX_SKB_FRAGS + 1];
-};
-
-struct chcr_ipsec_req {
- struct ulp_txpkt ulptx;
- struct ulptx_idata sc_imm;
- struct cpl_tx_sec_pdu sec_cpl;
- struct _key_ctx key_ctx;
-};
-
-struct chcr_ipsec_wr {
- struct fw_ulptx_wr wreq;
- struct chcr_ipsec_req req;
-};
-
-#define ESN_IV_INSERT_OFFSET 12
-struct chcr_ipsec_aadiv {
- __be32 spi;
- u8 seq_no[8];
- u8 iv[8];
-};
-
-struct ipsec_sa_entry {
- int hmac_ctrl;
- u16 esn;
- u16 resv;
- unsigned int enckey_len;
- unsigned int kctx_len;
- unsigned int authsize;
- __be32 key_ctx_hdr;
- char salt[MAX_SALT];
- char key[2 * AES_MAX_KEY_SIZE];
-};
-
/*
* sgl_len - calculates the size of an SGL of the given capacity
* @n: the number of SGL entries
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index 1368d1d..c3dbe64 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -473,6 +473,10 @@
source "drivers/net/phy/Kconfig"
+source "drivers/net/mdio/Kconfig"
+
+source "drivers/net/pcs/Kconfig"
+
source "drivers/net/plip/Kconfig"
source "drivers/net/ppp/Kconfig"
diff --git a/drivers/net/Makefile b/drivers/net/Makefile
index 94b6080..72e18d5 100644
--- a/drivers/net/Makefile
+++ b/drivers/net/Makefile
@@ -21,6 +21,8 @@
obj-$(CONFIG_NET) += Space.o loopback.o
obj-$(CONFIG_NETCONSOLE) += netconsole.o
obj-y += phy/
+obj-y += mdio/
+obj-y += pcs/
obj-$(CONFIG_RIONET) += rionet.o
obj-$(CONFIG_NET_TEAM) += team/
obj-$(CONFIG_TUN) += tun.o
diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c
index e731db9..26fcff8 100644
--- a/drivers/net/dsa/b53/b53_common.c
+++ b/drivers/net/dsa/b53/b53_common.c
@@ -17,8 +17,6 @@
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
#include <linux/delay.h>
#include <linux/export.h>
#include <linux/gpio.h>
@@ -767,8 +765,11 @@ static int b53_switch_reset(struct b53_device *dev)
usleep_range(1000, 2000);
} while (timeout-- > 0);
- if (timeout == 0)
+ if (timeout == 0) {
+ dev_err(dev->dev,
+ "Timeout waiting for SW_RST to clear!\n");
return -ETIMEDOUT;
+ }
}
b53_read8(dev, B53_CTRL_PAGE, B53_SWITCH_MODE, &mgmt);
@@ -2620,8 +2621,9 @@ int b53_switch_detect(struct b53_device *dev)
dev->chip_id = id32;
break;
default:
- pr_err("unsupported switch detected (BCM53%02x/BCM%x)\n",
- id8, id32);
+ dev_err(dev->dev,
+ "unsupported switch detected (BCM53%02x/BCM%x)\n",
+ id8, id32);
return -ENODEV;
}
}
@@ -2651,7 +2653,8 @@ int b53_switch_register(struct b53_device *dev)
if (ret)
return ret;
- pr_info("found switch: %s, rev %i\n", dev->name, dev->core_rev);
+ dev_info(dev->dev, "found switch: %s, rev %i\n",
+ dev->name, dev->core_rev);
return dsa_register_switch(dev->ds);
}
diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c
index 5ebff98..7a74e4d 100644
--- a/drivers/net/dsa/bcm_sf2.c
+++ b/drivers/net/dsa/bcm_sf2.c
@@ -14,6 +14,7 @@
#include <linux/phy_fixed.h>
#include <linux/phylink.h>
#include <linux/mii.h>
+#include <linux/clk.h>
#include <linux/of.h>
#include <linux/of_irq.h>
#include <linux/of_address.h>
@@ -31,6 +32,49 @@
#include "b53/b53_priv.h"
#include "b53/b53_regs.h"
+/* Return the number of active ports, not counting the IMP (CPU) port */
+static unsigned int bcm_sf2_num_active_ports(struct dsa_switch *ds)
+{
+ struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds);
+ unsigned int port, count = 0;
+
+ for (port = 0; port < ARRAY_SIZE(priv->port_sts); port++) {
+ if (dsa_is_cpu_port(ds, port))
+ continue;
+ if (priv->port_sts[port].enabled)
+ count++;
+ }
+
+ return count;
+}
+
+static void bcm_sf2_recalc_clock(struct dsa_switch *ds)
+{
+ struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds);
+ unsigned long new_rate;
+ unsigned int ports_active;
+ /* Frequenty in Mhz */
+ const unsigned long rate_table[] = {
+ 59220000,
+ 60820000,
+ 62500000,
+ 62500000,
+ };
+
+ ports_active = bcm_sf2_num_active_ports(ds);
+ if (ports_active == 0 || !priv->clk_mdiv)
+ return;
+
+ /* If we overflow our table, just use the recommended operational
+ * frequency
+ */
+ if (ports_active > ARRAY_SIZE(rate_table))
+ new_rate = 90000000;
+ else
+ new_rate = rate_table[ports_active - 1];
+ clk_set_rate(priv->clk_mdiv, new_rate);
+}
+
static void bcm_sf2_imp_setup(struct dsa_switch *ds, int port)
{
struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds);
@@ -82,6 +126,8 @@ static void bcm_sf2_imp_setup(struct dsa_switch *ds, int port)
reg &= ~(RX_DIS | TX_DIS);
core_writel(priv, reg, CORE_G_PCTL_PORT(port));
}
+
+ priv->port_sts[port].enabled = true;
}
static void bcm_sf2_gphy_enable_set(struct dsa_switch *ds, bool enable)
@@ -167,6 +213,10 @@ static int bcm_sf2_port_setup(struct dsa_switch *ds, int port,
if (!dsa_is_user_port(ds, port))
return 0;
+ priv->port_sts[port].enabled = true;
+
+ bcm_sf2_recalc_clock(ds);
+
/* Clear the memory power down */
reg = core_readl(priv, CORE_MEM_PSM_VDD_CTRL);
reg &= ~P_TXQ_PSM_VDD(port);
@@ -260,6 +310,10 @@ static void bcm_sf2_port_disable(struct dsa_switch *ds, int port)
reg = core_readl(priv, CORE_MEM_PSM_VDD_CTRL);
reg |= P_TXQ_PSM_VDD(port);
core_writel(priv, reg, CORE_MEM_PSM_VDD_CTRL);
+
+ priv->port_sts[port].enabled = false;
+
+ bcm_sf2_recalc_clock(ds);
}
@@ -750,6 +804,9 @@ static int bcm_sf2_sw_suspend(struct dsa_switch *ds)
bcm_sf2_port_disable(ds, port);
}
+ if (!priv->wol_ports_mask)
+ clk_disable_unprepare(priv->clk);
+
return 0;
}
@@ -758,6 +815,9 @@ static int bcm_sf2_sw_resume(struct dsa_switch *ds)
struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds);
int ret;
+ if (!priv->wol_ports_mask)
+ clk_prepare_enable(priv->clk);
+
ret = bcm_sf2_sw_rst(priv);
if (ret) {
pr_err("%s: failed to software reset switch\n", __func__);
@@ -1189,10 +1249,24 @@ static int bcm_sf2_sw_probe(struct platform_device *pdev)
base++;
}
+ priv->clk = devm_clk_get_optional(&pdev->dev, "sw_switch");
+ if (IS_ERR(priv->clk))
+ return PTR_ERR(priv->clk);
+
+ clk_prepare_enable(priv->clk);
+
+ priv->clk_mdiv = devm_clk_get_optional(&pdev->dev, "sw_switch_mdiv");
+ if (IS_ERR(priv->clk_mdiv)) {
+ ret = PTR_ERR(priv->clk_mdiv);
+ goto out_clk;
+ }
+
+ clk_prepare_enable(priv->clk_mdiv);
+
ret = bcm_sf2_sw_rst(priv);
if (ret) {
pr_err("unable to software reset switch: %d\n", ret);
- return ret;
+ goto out_clk_mdiv;
}
bcm_sf2_gphy_enable_set(priv->dev->ds, true);
@@ -1200,7 +1274,7 @@ static int bcm_sf2_sw_probe(struct platform_device *pdev)
ret = bcm_sf2_mdio_register(ds);
if (ret) {
pr_err("failed to register MDIO bus\n");
- return ret;
+ goto out_clk_mdiv;
}
bcm_sf2_gphy_enable_set(priv->dev->ds, false);
@@ -1267,6 +1341,10 @@ static int bcm_sf2_sw_probe(struct platform_device *pdev)
out_mdio:
bcm_sf2_mdio_unregister(priv);
+out_clk_mdiv:
+ clk_disable_unprepare(priv->clk_mdiv);
+out_clk:
+ clk_disable_unprepare(priv->clk);
return ret;
}
@@ -1280,6 +1358,8 @@ static int bcm_sf2_sw_remove(struct platform_device *pdev)
dsa_unregister_switch(priv->dev->ds);
bcm_sf2_cfp_exit(priv->dev->ds);
bcm_sf2_mdio_unregister(priv);
+ clk_disable_unprepare(priv->clk_mdiv);
+ clk_disable_unprepare(priv->clk);
if (priv->type == BCM7278_DEVICE_ID && !IS_ERR(priv->rcdev))
reset_control_assert(priv->rcdev);
diff --git a/drivers/net/dsa/bcm_sf2.h b/drivers/net/dsa/bcm_sf2.h
index de386dd..1ed901a 100644
--- a/drivers/net/dsa/bcm_sf2.h
+++ b/drivers/net/dsa/bcm_sf2.h
@@ -45,6 +45,7 @@ struct bcm_sf2_hw_params {
struct bcm_sf2_port_status {
unsigned int link;
+ bool enabled;
};
struct bcm_sf2_cfp_priv {
@@ -93,6 +94,9 @@ struct bcm_sf2_priv {
/* Mask of ports enabled for Wake-on-LAN */
u32 wol_ports_mask;
+ struct clk *clk;
+ struct clk *clk_mdiv;
+
/* MoCA port location */
int moca_port;
diff --git a/drivers/net/dsa/dsa_loop.c b/drivers/net/dsa/dsa_loop.c
index eb600b3..b588614 100644
--- a/drivers/net/dsa/dsa_loop.c
+++ b/drivers/net/dsa/dsa_loop.c
@@ -28,6 +28,53 @@ static struct dsa_loop_mib_entry dsa_loop_mibs[] = {
static struct phy_device *phydevs[PHY_MAX_ADDR];
+enum dsa_loop_devlink_resource_id {
+ DSA_LOOP_DEVLINK_PARAM_ID_VTU,
+};
+
+static u64 dsa_loop_devlink_vtu_get(void *priv)
+{
+ struct dsa_loop_priv *ps = priv;
+ unsigned int i, count = 0;
+ struct dsa_loop_vlan *vl;
+
+ for (i = 0; i < ARRAY_SIZE(ps->vlans); i++) {
+ vl = &ps->vlans[i];
+ if (vl->members)
+ count++;
+ }
+
+ return count;
+}
+
+static int dsa_loop_setup_devlink_resources(struct dsa_switch *ds)
+{
+ struct devlink_resource_size_params size_params;
+ struct dsa_loop_priv *ps = ds->priv;
+ int err;
+
+ devlink_resource_size_params_init(&size_params, ARRAY_SIZE(ps->vlans),
+ ARRAY_SIZE(ps->vlans),
+ 1, DEVLINK_RESOURCE_UNIT_ENTRY);
+
+ err = dsa_devlink_resource_register(ds, "VTU", ARRAY_SIZE(ps->vlans),
+ DSA_LOOP_DEVLINK_PARAM_ID_VTU,
+ DEVLINK_RESOURCE_ID_PARENT_TOP,
+ &size_params);
+ if (err)
+ goto out;
+
+ dsa_devlink_resource_occ_get_register(ds,
+ DSA_LOOP_DEVLINK_PARAM_ID_VTU,
+ dsa_loop_devlink_vtu_get, ps);
+
+ return 0;
+
+out:
+ dsa_devlink_resources_unregister(ds);
+ return err;
+}
+
static enum dsa_tag_protocol dsa_loop_get_protocol(struct dsa_switch *ds,
int port,
enum dsa_tag_protocol mp)
@@ -48,7 +95,12 @@ static int dsa_loop_setup(struct dsa_switch *ds)
dev_dbg(ds->dev, "%s\n", __func__);
- return 0;
+ return dsa_loop_setup_devlink_resources(ds);
+}
+
+static void dsa_loop_teardown(struct dsa_switch *ds)
+{
+ dsa_devlink_resources_unregister(ds);
}
static int dsa_loop_get_sset_count(struct dsa_switch *ds, int port, int sset)
@@ -243,6 +295,7 @@ static int dsa_loop_port_max_mtu(struct dsa_switch *ds, int port)
static const struct dsa_switch_ops dsa_loop_driver = {
.get_tag_protocol = dsa_loop_get_protocol,
.setup = dsa_loop_setup,
+ .teardown = dsa_loop_teardown,
.get_strings = dsa_loop_get_strings,
.get_ethtool_stats = dsa_loop_get_ethtool_stats,
.get_sset_count = dsa_loop_get_sset_count,
@@ -290,6 +343,7 @@ static int dsa_loop_drv_probe(struct mdio_device *mdiodev)
ds->dev = &mdiodev->dev;
ds->ops = &dsa_loop_driver;
ds->priv = ps;
+ ds->configure_vlan_while_not_filtering = true;
ps->bus = mdiodev->bus;
dev_set_drvdata(&mdiodev->dev, ds);
diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index f0dbc05..15b97a4 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -3329,12 +3329,6 @@ static int mv88e6xxx_mdio_register(struct mv88e6xxx_chip *chip,
return 0;
}
-static const struct of_device_id mv88e6xxx_mdio_external_match[] = {
- { .compatible = "marvell,mv88e6xxx-mdio-external",
- .data = (void *)true },
- { },
-};
-
static void mv88e6xxx_mdios_unregister(struct mv88e6xxx_chip *chip)
{
@@ -3354,7 +3348,6 @@ static void mv88e6xxx_mdios_unregister(struct mv88e6xxx_chip *chip)
static int mv88e6xxx_mdios_register(struct mv88e6xxx_chip *chip,
struct device_node *np)
{
- const struct of_device_id *match;
struct device_node *child;
int err;
@@ -3372,8 +3365,8 @@ static int mv88e6xxx_mdios_register(struct mv88e6xxx_chip *chip,
* bus.
*/
for_each_available_child_of_node(np, child) {
- match = of_match_node(mv88e6xxx_mdio_external_match, child);
- if (match) {
+ if (of_device_is_compatible(
+ child, "marvell,mv88e6xxx-mdio-external")) {
err = mv88e6xxx_mdio_register(chip, child, true);
if (err) {
mv88e6xxx_mdios_unregister(chip);
diff --git a/drivers/net/dsa/mv88e6xxx/hwtstamp.c b/drivers/net/dsa/mv88e6xxx/hwtstamp.c
index a4c488b..094d17a 100644
--- a/drivers/net/dsa/mv88e6xxx/hwtstamp.c
+++ b/drivers/net/dsa/mv88e6xxx/hwtstamp.c
@@ -211,49 +211,20 @@ int mv88e6xxx_port_hwtstamp_get(struct dsa_switch *ds, int port,
-EFAULT : 0;
}
-/* Get the start of the PTP header in this skb */
-static u8 *parse_ptp_header(struct sk_buff *skb, unsigned int type)
-{
- u8 *data = skb_mac_header(skb);
- unsigned int offset = 0;
-
- if (type & PTP_CLASS_VLAN)
- offset += VLAN_HLEN;
-
- switch (type & PTP_CLASS_PMASK) {
- case PTP_CLASS_IPV4:
- offset += ETH_HLEN + IPV4_HLEN(data + offset) + UDP_HLEN;
- break;
- case PTP_CLASS_IPV6:
- offset += ETH_HLEN + IP6_HLEN + UDP_HLEN;
- break;
- case PTP_CLASS_L2:
- offset += ETH_HLEN;
- break;
- default:
- return NULL;
- }
-
- /* Ensure that the entire header is present in this packet. */
- if (skb->len + ETH_HLEN < offset + 34)
- return NULL;
-
- return data + offset;
-}
-
/* Returns a pointer to the PTP header if the caller should time stamp,
* or NULL if the caller should not.
*/
-static u8 *mv88e6xxx_should_tstamp(struct mv88e6xxx_chip *chip, int port,
- struct sk_buff *skb, unsigned int type)
+static struct ptp_header *mv88e6xxx_should_tstamp(struct mv88e6xxx_chip *chip,
+ int port, struct sk_buff *skb,
+ unsigned int type)
{
struct mv88e6xxx_port_hwtstamp *ps = &chip->port_hwtstamp[port];
- u8 *hdr;
+ struct ptp_header *hdr;
if (!chip->info->ptp_support)
return NULL;
- hdr = parse_ptp_header(skb, type);
+ hdr = ptp_parse_header(skb, type);
if (!hdr)
return NULL;
@@ -275,12 +246,11 @@ static int mv88e6xxx_ts_valid(u16 status)
static int seq_match(struct sk_buff *skb, u16 ts_seqid)
{
unsigned int type = SKB_PTP_TYPE(skb);
- u8 *hdr = parse_ptp_header(skb, type);
- __be16 *seqid;
+ struct ptp_header *hdr;
- seqid = (__be16 *)(hdr + OFF_PTP_SEQUENCE_ID);
+ hdr = ptp_parse_header(skb, type);
- return ts_seqid == ntohs(*seqid);
+ return ts_seqid == ntohs(hdr->sequence_id);
}
static void mv88e6xxx_get_rxts(struct mv88e6xxx_chip *chip,
@@ -357,9 +327,9 @@ static void mv88e6xxx_rxtstamp_work(struct mv88e6xxx_chip *chip,
&ps->rx_queue2);
}
-static int is_pdelay_resp(u8 *msgtype)
+static int is_pdelay_resp(const struct ptp_header *hdr)
{
- return (*msgtype & 0xf) == 3;
+ return (hdr->tsmt & 0xf) == 3;
}
bool mv88e6xxx_port_rxtstamp(struct dsa_switch *ds, int port,
@@ -367,7 +337,7 @@ bool mv88e6xxx_port_rxtstamp(struct dsa_switch *ds, int port,
{
struct mv88e6xxx_port_hwtstamp *ps;
struct mv88e6xxx_chip *chip;
- u8 *hdr;
+ struct ptp_header *hdr;
chip = ds->priv;
ps = &chip->port_hwtstamp[port];
@@ -503,8 +473,7 @@ bool mv88e6xxx_port_txtstamp(struct dsa_switch *ds, int port,
{
struct mv88e6xxx_chip *chip = ds->priv;
struct mv88e6xxx_port_hwtstamp *ps = &chip->port_hwtstamp[port];
- __be16 *seq_ptr;
- u8 *hdr;
+ struct ptp_header *hdr;
if (!(skb_shinfo(clone)->tx_flags & SKBTX_HW_TSTAMP))
return false;
@@ -513,15 +482,13 @@ bool mv88e6xxx_port_txtstamp(struct dsa_switch *ds, int port,
if (!hdr)
return false;
- seq_ptr = (__be16 *)(hdr + OFF_PTP_SEQUENCE_ID);
-
if (test_and_set_bit_lock(MV88E6XXX_HWTSTAMP_TX_IN_PROGRESS,
&ps->state))
return false;
ps->tx_skb = clone;
ps->tx_tstamp_start = jiffies;
- ps->tx_seq_id = be16_to_cpup(seq_ptr);
+ ps->tx_seq_id = be16_to_cpu(hdr->sequence_id);
ptp_schedule_worker(chip->ptp_clock, 0);
return true;
diff --git a/drivers/net/dsa/ocelot/Kconfig b/drivers/net/dsa/ocelot/Kconfig
index 2d23cce..e19718d 100644
--- a/drivers/net/dsa/ocelot/Kconfig
+++ b/drivers/net/dsa/ocelot/Kconfig
@@ -8,6 +8,7 @@
select MSCC_OCELOT_SWITCH_LIB
select NET_DSA_TAG_OCELOT
select FSL_ENETC_MDIO
+ select PCS_LYNX
help
This driver supports network switches from the Vitesse /
Microsemi / Microchip Ocelot family of switching cores that are
diff --git a/drivers/net/dsa/ocelot/felix.c b/drivers/net/dsa/ocelot/felix.c
index 04bfa6e..a1e1d38 100644
--- a/drivers/net/dsa/ocelot/felix.c
+++ b/drivers/net/dsa/ocelot/felix.c
@@ -19,6 +19,7 @@
#include <linux/of_net.h>
#include <linux/pci.h>
#include <linux/of.h>
+#include <linux/pcs-lynx.h>
#include <net/pkt_sched.h>
#include <net/dsa.h>
#include "felix.h"
@@ -196,27 +197,16 @@ static void felix_phylink_validate(struct dsa_switch *ds, int port,
felix->info->phylink_validate(ocelot, port, supported, state);
}
-static int felix_phylink_mac_pcs_get_state(struct dsa_switch *ds, int port,
- struct phylink_link_state *state)
-{
- struct ocelot *ocelot = ds->priv;
- struct felix *felix = ocelot_to_felix(ocelot);
-
- if (felix->info->pcs_link_state)
- felix->info->pcs_link_state(ocelot, port, state);
-
- return 0;
-}
-
static void felix_phylink_mac_config(struct dsa_switch *ds, int port,
unsigned int link_an_mode,
const struct phylink_link_state *state)
{
struct ocelot *ocelot = ds->priv;
struct felix *felix = ocelot_to_felix(ocelot);
+ struct dsa_port *dp = dsa_to_port(ds, port);
- if (felix->info->pcs_config)
- felix->info->pcs_config(ocelot, port, link_an_mode, state);
+ if (felix->pcs[port])
+ phylink_set_pcs(dp->pl, &felix->pcs[port]->pcs);
}
static void felix_phylink_mac_link_down(struct dsa_switch *ds, int port,
@@ -306,10 +296,6 @@ static void felix_phylink_mac_link_up(struct dsa_switch *ds, int port,
ocelot_fields_write(ocelot, port,
QSYS_SWITCH_PORT_MODE_PORT_ENA, 1);
- if (felix->info->pcs_link_up)
- felix->info->pcs_link_up(ocelot, port, link_an_mode, interface,
- speed, duplex);
-
if (felix->info->port_sched_speed_set)
felix->info->port_sched_speed_set(ocelot, port, speed);
}
@@ -627,11 +613,6 @@ static int felix_setup(struct dsa_switch *ds)
ds->mtu_enforcement_ingress = true;
ds->configure_vlan_while_not_filtering = true;
- /* It looks like the MAC/PCS interrupt register - PM0_IEVENT (0x8040)
- * isn't instantiated for the Felix PF.
- * In-band AN may take a few ms to complete, so we need to poll.
- */
- ds->pcs_poll = true;
return 0;
}
@@ -787,7 +768,6 @@ const struct dsa_switch_ops felix_switch_ops = {
.get_sset_count = felix_get_sset_count,
.get_ts_info = felix_get_ts_info,
.phylink_validate = felix_phylink_validate,
- .phylink_mac_link_state = felix_phylink_mac_pcs_get_state,
.phylink_mac_config = felix_phylink_mac_config,
.phylink_mac_link_down = felix_phylink_mac_link_down,
.phylink_mac_link_up = felix_phylink_mac_link_up,
diff --git a/drivers/net/dsa/ocelot/felix.h b/drivers/net/dsa/ocelot/felix.h
index 98f1462..9bceb99 100644
--- a/drivers/net/dsa/ocelot/felix.h
+++ b/drivers/net/dsa/ocelot/felix.h
@@ -28,15 +28,6 @@ struct felix_info {
int imdio_pci_bar;
int (*mdio_bus_alloc)(struct ocelot *ocelot);
void (*mdio_bus_free)(struct ocelot *ocelot);
- void (*pcs_config)(struct ocelot *ocelot, int port,
- unsigned int link_an_mode,
- const struct phylink_link_state *state);
- void (*pcs_link_up)(struct ocelot *ocelot, int port,
- unsigned int link_an_mode,
- phy_interface_t interface,
- int speed, int duplex);
- void (*pcs_link_state)(struct ocelot *ocelot, int port,
- struct phylink_link_state *state);
void (*phylink_validate)(struct ocelot *ocelot, int port,
unsigned long *supported,
struct phylink_link_state *state);
@@ -59,20 +50,11 @@ struct felix {
const struct felix_info *info;
struct ocelot ocelot;
struct mii_bus *imdio;
- struct phy_device **pcs;
+ struct lynx_pcs **pcs;
resource_size_t switch_base;
resource_size_t imdio_base;
};
-void vsc9959_pcs_link_state(struct ocelot *ocelot, int port,
- struct phylink_link_state *state);
-void vsc9959_pcs_config(struct ocelot *ocelot, int port,
- unsigned int link_an_mode,
- const struct phylink_link_state *state);
-void vsc9959_pcs_link_up(struct ocelot *ocelot, int port,
- unsigned int link_an_mode,
- phy_interface_t interface,
- int speed, int duplex);
void vsc9959_mdio_bus_free(struct ocelot *ocelot);
#endif
diff --git a/drivers/net/dsa/ocelot/felix_vsc9959.c b/drivers/net/dsa/ocelot/felix_vsc9959.c
index 9b720c8..126a53a 100644
--- a/drivers/net/dsa/ocelot/felix_vsc9959.c
+++ b/drivers/net/dsa/ocelot/felix_vsc9959.c
@@ -9,6 +9,7 @@
#include <soc/mscc/ocelot_sys.h>
#include <soc/mscc/ocelot.h>
#include <linux/packing.h>
+#include <linux/pcs-lynx.h>
#include <net/pkt_sched.h>
#include <linux/iopoll.h>
#include <linux/mdio.h>
@@ -766,347 +767,6 @@ static int vsc9959_reset(struct ocelot *ocelot)
return 0;
}
-/* We enable SGMII AN only when the PHY has managed = "in-band-status" in the
- * device tree. If we are in MLO_AN_PHY mode, we program directly state->speed
- * into the PCS, which is retrieved out-of-band over MDIO. This also has the
- * benefit of working with SGMII fixed-links, like downstream switches, where
- * both link partners attempt to operate as AN slaves and therefore AN never
- * completes. But it also has the disadvantage that some PHY chips don't pass
- * traffic if SGMII AN is enabled but not completed (acknowledged by us), so
- * setting MLO_AN_INBAND is actually required for those.
- */
-static void vsc9959_pcs_config_sgmii(struct phy_device *pcs,
- unsigned int link_an_mode,
- const struct phylink_link_state *state)
-{
- int bmsr, bmcr;
-
- /* Some PHYs like VSC8234 don't like it when AN restarts on
- * their system side and they restart line side AN too, going
- * into an endless link up/down loop. Don't restart PCS AN if
- * link is up already.
- * We do check that AN is enabled just in case this is the 1st
- * call, PCS detects a carrier but AN is disabled from power on
- * or by boot loader.
- */
- bmcr = phy_read(pcs, MII_BMCR);
- if (bmcr < 0)
- return;
-
- bmsr = phy_read(pcs, MII_BMSR);
- if (bmsr < 0)
- return;
-
- if ((bmcr & BMCR_ANENABLE) && (bmsr & BMSR_LSTATUS))
- return;
-
- /* SGMII spec requires tx_config_Reg[15:0] to be exactly 0x4001
- * for the MAC PCS in order to acknowledge the AN.
- */
- phy_write(pcs, MII_ADVERTISE, ADVERTISE_SGMII |
- ADVERTISE_LPACK);
-
- phy_write(pcs, ENETC_PCS_IF_MODE,
- ENETC_PCS_IF_MODE_SGMII_EN |
- ENETC_PCS_IF_MODE_USE_SGMII_AN);
-
- /* Adjust link timer for SGMII */
- phy_write(pcs, ENETC_PCS_LINK_TIMER1,
- ENETC_PCS_LINK_TIMER1_VAL);
- phy_write(pcs, ENETC_PCS_LINK_TIMER2,
- ENETC_PCS_LINK_TIMER2_VAL);
-
- phy_set_bits(pcs, MII_BMCR, BMCR_ANENABLE);
-}
-
-static void vsc9959_pcs_config_usxgmii(struct phy_device *pcs,
- unsigned int link_an_mode,
- const struct phylink_link_state *state)
-{
- /* Configure device ability for the USXGMII Replicator */
- phy_write_mmd(pcs, MDIO_MMD_VEND2, MII_ADVERTISE,
- MDIO_USXGMII_2500FULL |
- MDIO_USXGMII_LINK |
- ADVERTISE_SGMII |
- ADVERTISE_LPACK);
-}
-
-void vsc9959_pcs_config(struct ocelot *ocelot, int port,
- unsigned int link_an_mode,
- const struct phylink_link_state *state)
-{
- struct felix *felix = ocelot_to_felix(ocelot);
- struct phy_device *pcs = felix->pcs[port];
-
- if (!pcs)
- return;
-
- /* The PCS does not implement the BMSR register fully, so capability
- * detection via genphy_read_abilities does not work. Since we can get
- * the PHY config word from the LPA register though, there is still
- * value in using the generic phy_resolve_aneg_linkmode function. So
- * populate the supported and advertising link modes manually here.
- */
- linkmode_set_bit_array(phy_basic_ports_array,
- ARRAY_SIZE(phy_basic_ports_array),
- pcs->supported);
- linkmode_set_bit(ETHTOOL_LINK_MODE_10baseT_Half_BIT, pcs->supported);
- linkmode_set_bit(ETHTOOL_LINK_MODE_10baseT_Full_BIT, pcs->supported);
- linkmode_set_bit(ETHTOOL_LINK_MODE_100baseT_Half_BIT, pcs->supported);
- linkmode_set_bit(ETHTOOL_LINK_MODE_100baseT_Full_BIT, pcs->supported);
- linkmode_set_bit(ETHTOOL_LINK_MODE_1000baseT_Half_BIT, pcs->supported);
- linkmode_set_bit(ETHTOOL_LINK_MODE_1000baseT_Full_BIT, pcs->supported);
- if (pcs->interface == PHY_INTERFACE_MODE_2500BASEX ||
- pcs->interface == PHY_INTERFACE_MODE_USXGMII)
- linkmode_set_bit(ETHTOOL_LINK_MODE_2500baseX_Full_BIT,
- pcs->supported);
- if (pcs->interface != PHY_INTERFACE_MODE_2500BASEX)
- linkmode_set_bit(ETHTOOL_LINK_MODE_Autoneg_BIT,
- pcs->supported);
- phy_advertise_supported(pcs);
-
- if (!phylink_autoneg_inband(link_an_mode))
- return;
-
- switch (pcs->interface) {
- case PHY_INTERFACE_MODE_SGMII:
- case PHY_INTERFACE_MODE_QSGMII:
- vsc9959_pcs_config_sgmii(pcs, link_an_mode, state);
- break;
- case PHY_INTERFACE_MODE_2500BASEX:
- phydev_err(pcs, "AN not supported on 3.125GHz SerDes lane\n");
- break;
- case PHY_INTERFACE_MODE_USXGMII:
- vsc9959_pcs_config_usxgmii(pcs, link_an_mode, state);
- break;
- default:
- dev_err(ocelot->dev, "Unsupported link mode %s\n",
- phy_modes(pcs->interface));
- }
-}
-
-static void vsc9959_pcs_link_up_sgmii(struct phy_device *pcs,
- unsigned int link_an_mode,
- int speed, int duplex)
-{
- u16 if_mode = ENETC_PCS_IF_MODE_SGMII_EN;
-
- switch (speed) {
- case SPEED_1000:
- if_mode |= ENETC_PCS_IF_MODE_SGMII_SPEED(ENETC_PCS_SPEED_1000);
- break;
- case SPEED_100:
- if_mode |= ENETC_PCS_IF_MODE_SGMII_SPEED(ENETC_PCS_SPEED_100);
- break;
- case SPEED_10:
- if_mode |= ENETC_PCS_IF_MODE_SGMII_SPEED(ENETC_PCS_SPEED_10);
- break;
- default:
- phydev_err(pcs, "Invalid PCS speed %d\n", speed);
- return;
- }
-
- if (duplex == DUPLEX_HALF)
- if_mode |= ENETC_PCS_IF_MODE_DUPLEX_HALF;
-
- phy_write(pcs, ENETC_PCS_IF_MODE, if_mode);
- phy_clear_bits(pcs, MII_BMCR, BMCR_ANENABLE);
-}
-
-/* 2500Base-X is SerDes protocol 7 on Felix and 6 on ENETC. It is a SerDes lane
- * clocked at 3.125 GHz which encodes symbols with 8b/10b and does not have
- * auto-negotiation of any link parameters. Electrically it is compatible with
- * a single lane of XAUI.
- * The hardware reference manual wants to call this mode SGMII, but it isn't
- * really, since the fundamental features of SGMII:
- * - Downgrading the link speed by duplicating symbols
- * - Auto-negotiation
- * are not there.
- * The speed is configured at 1000 in the IF_MODE and BMCR MDIO registers
- * because the clock frequency is actually given by a PLL configured in the
- * Reset Configuration Word (RCW).
- * Since there is no difference between fixed speed SGMII w/o AN and 802.3z w/o
- * AN, we call this PHY interface type 2500Base-X. In case a PHY negotiates a
- * lower link speed on line side, the system-side interface remains fixed at
- * 2500 Mbps and we do rate adaptation through pause frames.
- */
-static void vsc9959_pcs_link_up_2500basex(struct phy_device *pcs,
- unsigned int link_an_mode,
- int speed, int duplex)
-{
- u16 if_mode = ENETC_PCS_IF_MODE_SGMII_SPEED(ENETC_PCS_SPEED_2500) |
- ENETC_PCS_IF_MODE_SGMII_EN;
-
- if (duplex == DUPLEX_HALF)
- if_mode |= ENETC_PCS_IF_MODE_DUPLEX_HALF;
-
- phy_write(pcs, ENETC_PCS_IF_MODE, if_mode);
- phy_clear_bits(pcs, MII_BMCR, BMCR_ANENABLE);
-}
-
-void vsc9959_pcs_link_up(struct ocelot *ocelot, int port,
- unsigned int link_an_mode,
- phy_interface_t interface,
- int speed, int duplex)
-{
- struct felix *felix = ocelot_to_felix(ocelot);
- struct phy_device *pcs = felix->pcs[port];
-
- if (!pcs)
- return;
-
- if (phylink_autoneg_inband(link_an_mode))
- return;
-
- switch (interface) {
- case PHY_INTERFACE_MODE_SGMII:
- case PHY_INTERFACE_MODE_QSGMII:
- vsc9959_pcs_link_up_sgmii(pcs, link_an_mode, speed, duplex);
- break;
- case PHY_INTERFACE_MODE_2500BASEX:
- vsc9959_pcs_link_up_2500basex(pcs, link_an_mode, speed,
- duplex);
- break;
- case PHY_INTERFACE_MODE_USXGMII:
- phydev_err(pcs, "USXGMII only supports in-band AN for now\n");
- break;
- default:
- dev_err(ocelot->dev, "Unsupported link mode %s\n",
- phy_modes(pcs->interface));
- }
-}
-
-static void vsc9959_pcs_link_state_resolve(struct phy_device *pcs,
- struct phylink_link_state *state)
-{
- state->an_complete = pcs->autoneg_complete;
- state->an_enabled = pcs->autoneg;
- state->link = pcs->link;
- state->duplex = pcs->duplex;
- state->speed = pcs->speed;
- /* SGMII AN does not negotiate flow control, but that's ok,
- * since phylink already knows that, and does:
- * link_state.pause |= pl->phy_state.pause;
- */
- state->pause = MLO_PAUSE_NONE;
-
- phydev_dbg(pcs,
- "mode=%s/%s/%s adv=%*pb lpa=%*pb link=%u an_enabled=%u an_complete=%u\n",
- phy_modes(pcs->interface),
- phy_speed_to_str(pcs->speed),
- phy_duplex_to_str(pcs->duplex),
- __ETHTOOL_LINK_MODE_MASK_NBITS, pcs->advertising,
- __ETHTOOL_LINK_MODE_MASK_NBITS, pcs->lp_advertising,
- pcs->link, pcs->autoneg, pcs->autoneg_complete);
-}
-
-static void vsc9959_pcs_link_state_sgmii(struct phy_device *pcs,
- struct phylink_link_state *state)
-{
- int err;
-
- err = genphy_update_link(pcs);
- if (err < 0)
- return;
-
- if (pcs->autoneg_complete) {
- u16 lpa = phy_read(pcs, MII_LPA);
-
- mii_lpa_to_linkmode_lpa_sgmii(pcs->lp_advertising, lpa);
-
- phy_resolve_aneg_linkmode(pcs);
- }
-}
-
-static void vsc9959_pcs_link_state_2500basex(struct phy_device *pcs,
- struct phylink_link_state *state)
-{
- int err;
-
- err = genphy_update_link(pcs);
- if (err < 0)
- return;
-
- pcs->speed = SPEED_2500;
- pcs->asym_pause = true;
- pcs->pause = true;
-}
-
-static void vsc9959_pcs_link_state_usxgmii(struct phy_device *pcs,
- struct phylink_link_state *state)
-{
- int status, lpa;
-
- status = phy_read_mmd(pcs, MDIO_MMD_VEND2, MII_BMSR);
- if (status < 0)
- return;
-
- pcs->autoneg = true;
- pcs->autoneg_complete = !!(status & BMSR_ANEGCOMPLETE);
- pcs->link = !!(status & BMSR_LSTATUS);
-
- if (!pcs->link || !pcs->autoneg_complete)
- return;
-
- lpa = phy_read_mmd(pcs, MDIO_MMD_VEND2, MII_LPA);
- if (lpa < 0)
- return;
-
- switch (lpa & MDIO_USXGMII_SPD_MASK) {
- case MDIO_USXGMII_10:
- pcs->speed = SPEED_10;
- break;
- case MDIO_USXGMII_100:
- pcs->speed = SPEED_100;
- break;
- case MDIO_USXGMII_1000:
- pcs->speed = SPEED_1000;
- break;
- case MDIO_USXGMII_2500:
- pcs->speed = SPEED_2500;
- break;
- default:
- break;
- }
-
- if (lpa & MDIO_USXGMII_FULL_DUPLEX)
- pcs->duplex = DUPLEX_FULL;
- else
- pcs->duplex = DUPLEX_HALF;
-}
-
-void vsc9959_pcs_link_state(struct ocelot *ocelot, int port,
- struct phylink_link_state *state)
-{
- struct felix *felix = ocelot_to_felix(ocelot);
- struct phy_device *pcs = felix->pcs[port];
-
- if (!pcs)
- return;
-
- pcs->speed = SPEED_UNKNOWN;
- pcs->duplex = DUPLEX_UNKNOWN;
- pcs->pause = 0;
- pcs->asym_pause = 0;
-
- switch (pcs->interface) {
- case PHY_INTERFACE_MODE_SGMII:
- case PHY_INTERFACE_MODE_QSGMII:
- vsc9959_pcs_link_state_sgmii(pcs, state);
- break;
- case PHY_INTERFACE_MODE_2500BASEX:
- vsc9959_pcs_link_state_2500basex(pcs, state);
- break;
- case PHY_INTERFACE_MODE_USXGMII:
- vsc9959_pcs_link_state_usxgmii(pcs, state);
- break;
- default:
- return;
- }
-
- vsc9959_pcs_link_state_resolve(pcs, state);
-}
-
static void vsc9959_phylink_validate(struct ocelot *ocelot, int port,
unsigned long *supported,
struct phylink_link_state *state)
@@ -1195,7 +855,7 @@ static int vsc9959_mdio_bus_alloc(struct ocelot *ocelot)
int rc;
felix->pcs = devm_kcalloc(dev, felix->info->num_ports,
- sizeof(struct phy_device *),
+ sizeof(struct lynx_pcs *),
GFP_KERNEL);
if (!felix->pcs) {
dev_err(dev, "failed to allocate array for PCS PHYs\n");
@@ -1246,18 +906,26 @@ static int vsc9959_mdio_bus_alloc(struct ocelot *ocelot)
for (port = 0; port < felix->info->num_ports; port++) {
struct ocelot_port *ocelot_port = ocelot->ports[port];
- struct phy_device *pcs;
- bool is_c45 = false;
+ struct mdio_device *pcs;
+ struct lynx_pcs *lynx;
- if (ocelot_port->phy_mode == PHY_INTERFACE_MODE_USXGMII)
- is_c45 = true;
+ if (dsa_is_unused_port(felix->ds, port))
+ continue;
- pcs = get_phy_device(felix->imdio, port, is_c45);
+ if (ocelot_port->phy_mode == PHY_INTERFACE_MODE_INTERNAL)
+ continue;
+
+ pcs = mdio_device_create(felix->imdio, port);
if (IS_ERR(pcs))
continue;
- pcs->interface = ocelot_port->phy_mode;
- felix->pcs[port] = pcs;
+ lynx = lynx_pcs_create(pcs);
+ if (!lynx) {
+ mdio_device_free(pcs);
+ continue;
+ }
+
+ felix->pcs[port] = lynx;
dev_info(dev, "Found PCS at internal MDIO address %d\n", port);
}
@@ -1271,12 +939,13 @@ void vsc9959_mdio_bus_free(struct ocelot *ocelot)
int port;
for (port = 0; port < ocelot->num_phys_ports; port++) {
- struct phy_device *pcs = felix->pcs[port];
+ struct lynx_pcs *pcs = felix->pcs[port];
if (!pcs)
continue;
- put_device(&pcs->mdio.dev);
+ mdio_device_free(pcs->mdio);
+ lynx_pcs_destroy(pcs);
}
mdiobus_unregister(felix->imdio);
}
@@ -1499,9 +1168,6 @@ static const struct felix_info felix_info_vsc9959 = {
.imdio_pci_bar = 0,
.mdio_bus_alloc = vsc9959_mdio_bus_alloc,
.mdio_bus_free = vsc9959_mdio_bus_free,
- .pcs_config = vsc9959_pcs_config,
- .pcs_link_up = vsc9959_pcs_link_up,
- .pcs_link_state = vsc9959_pcs_link_state,
.phylink_validate = vsc9959_phylink_validate,
.prevalidate_phy_mode = vsc9959_prevalidate_phy_mode,
.port_setup_tc = vsc9959_port_setup_tc,
diff --git a/drivers/net/dsa/ocelot/seville_vsc9953.c b/drivers/net/dsa/ocelot/seville_vsc9953.c
index 625b189..2d6a5f5 100644
--- a/drivers/net/dsa/ocelot/seville_vsc9953.c
+++ b/drivers/net/dsa/ocelot/seville_vsc9953.c
@@ -7,6 +7,7 @@
#include <soc/mscc/ocelot_sys.h>
#include <soc/mscc/ocelot.h>
#include <linux/of_platform.h>
+#include <linux/pcs-lynx.h>
#include <linux/packing.h>
#include <linux/iopoll.h>
#include "felix.h"
@@ -960,18 +961,27 @@ static int vsc9953_mdio_bus_alloc(struct ocelot *ocelot)
for (port = 0; port < felix->info->num_ports; port++) {
struct ocelot_port *ocelot_port = ocelot->ports[port];
- struct phy_device *pcs;
int addr = port + 4;
+ struct mdio_device *pcs;
+ struct lynx_pcs *lynx;
+
+ if (dsa_is_unused_port(felix->ds, port))
+ continue;
if (ocelot_port->phy_mode == PHY_INTERFACE_MODE_INTERNAL)
continue;
- pcs = get_phy_device(felix->imdio, addr, false);
+ pcs = mdio_device_create(felix->imdio, addr);
if (IS_ERR(pcs))
continue;
- pcs->interface = ocelot_port->phy_mode;
- felix->pcs[port] = pcs;
+ lynx = lynx_pcs_create(pcs);
+ if (!lynx) {
+ mdio_device_free(pcs);
+ continue;
+ }
+
+ felix->pcs[port] = lynx;
dev_info(dev, "Found PCS at internal MDIO address %d\n", addr);
}
@@ -1013,9 +1023,6 @@ static const struct felix_info seville_info_vsc9953 = {
.num_ports = 10,
.mdio_bus_alloc = vsc9953_mdio_bus_alloc,
.mdio_bus_free = vsc9959_mdio_bus_free,
- .pcs_config = vsc9959_pcs_config,
- .pcs_link_up = vsc9959_pcs_link_up,
- .pcs_link_state = vsc9959_pcs_link_state,
.phylink_validate = vsc9953_phylink_validate,
.prevalidate_phy_mode = vsc9953_prevalidate_phy_mode,
.xmit_template_populate = vsc9953_xmit_template_populate,
diff --git a/drivers/net/dsa/realtek-smi-core.h b/drivers/net/dsa/realtek-smi-core.h
index 9a63b51..6f2dab7 100644
--- a/drivers/net/dsa/realtek-smi-core.h
+++ b/drivers/net/dsa/realtek-smi-core.h
@@ -25,6 +25,9 @@ struct rtl8366_mib_counter {
const char *name;
};
+/**
+ * struct rtl8366_vlan_mc - Virtual LAN member configuration
+ */
struct rtl8366_vlan_mc {
u16 vid;
u16 untag;
@@ -119,7 +122,6 @@ int realtek_smi_setup_mdio(struct realtek_smi *smi);
int rtl8366_mc_is_used(struct realtek_smi *smi, int mc_index, int *used);
int rtl8366_set_vlan(struct realtek_smi *smi, int vid, u32 member,
u32 untag, u32 fid);
-int rtl8366_get_pvid(struct realtek_smi *smi, int port, int *val);
int rtl8366_set_pvid(struct realtek_smi *smi, unsigned int port,
unsigned int vid);
int rtl8366_enable_vlan4k(struct realtek_smi *smi, bool enable);
diff --git a/drivers/net/dsa/rtl8366.c b/drivers/net/dsa/rtl8366.c
index 8f40fbf..2dcde7a 100644
--- a/drivers/net/dsa/rtl8366.c
+++ b/drivers/net/dsa/rtl8366.c
@@ -36,13 +36,114 @@ int rtl8366_mc_is_used(struct realtek_smi *smi, int mc_index, int *used)
}
EXPORT_SYMBOL_GPL(rtl8366_mc_is_used);
-int rtl8366_set_vlan(struct realtek_smi *smi, int vid, u32 member,
- u32 untag, u32 fid)
+/**
+ * rtl8366_obtain_mc() - retrieve or allocate a VLAN member configuration
+ * @smi: the Realtek SMI device instance
+ * @vid: the VLAN ID to look up or allocate
+ * @vlanmc: the pointer will be assigned to a pointer to a valid member config
+ * if successful
+ * @return: index of a new member config or negative error number
+ */
+static int rtl8366_obtain_mc(struct realtek_smi *smi, int vid,
+ struct rtl8366_vlan_mc *vlanmc)
{
struct rtl8366_vlan_4k vlan4k;
int ret;
int i;
+ /* Try to find an existing member config entry for this VID */
+ for (i = 0; i < smi->num_vlan_mc; i++) {
+ ret = smi->ops->get_vlan_mc(smi, i, vlanmc);
+ if (ret) {
+ dev_err(smi->dev, "error searching for VLAN MC %d for VID %d\n",
+ i, vid);
+ return ret;
+ }
+
+ if (vid == vlanmc->vid)
+ return i;
+ }
+
+ /* We have no MC entry for this VID, try to find an empty one */
+ for (i = 0; i < smi->num_vlan_mc; i++) {
+ ret = smi->ops->get_vlan_mc(smi, i, vlanmc);
+ if (ret) {
+ dev_err(smi->dev, "error searching for VLAN MC %d for VID %d\n",
+ i, vid);
+ return ret;
+ }
+
+ if (vlanmc->vid == 0 && vlanmc->member == 0) {
+ /* Update the entry from the 4K table */
+ ret = smi->ops->get_vlan_4k(smi, vid, &vlan4k);
+ if (ret) {
+ dev_err(smi->dev, "error looking for 4K VLAN MC %d for VID %d\n",
+ i, vid);
+ return ret;
+ }
+
+ vlanmc->vid = vid;
+ vlanmc->member = vlan4k.member;
+ vlanmc->untag = vlan4k.untag;
+ vlanmc->fid = vlan4k.fid;
+ ret = smi->ops->set_vlan_mc(smi, i, vlanmc);
+ if (ret) {
+ dev_err(smi->dev, "unable to set/update VLAN MC %d for VID %d\n",
+ i, vid);
+ return ret;
+ }
+
+ dev_dbg(smi->dev, "created new MC at index %d for VID %d\n",
+ i, vid);
+ return i;
+ }
+ }
+
+ /* MC table is full, try to find an unused entry and replace it */
+ for (i = 0; i < smi->num_vlan_mc; i++) {
+ int used;
+
+ ret = rtl8366_mc_is_used(smi, i, &used);
+ if (ret)
+ return ret;
+
+ if (!used) {
+ /* Update the entry from the 4K table */
+ ret = smi->ops->get_vlan_4k(smi, vid, &vlan4k);
+ if (ret)
+ return ret;
+
+ vlanmc->vid = vid;
+ vlanmc->member = vlan4k.member;
+ vlanmc->untag = vlan4k.untag;
+ vlanmc->fid = vlan4k.fid;
+ ret = smi->ops->set_vlan_mc(smi, i, vlanmc);
+ if (ret) {
+ dev_err(smi->dev, "unable to set/update VLAN MC %d for VID %d\n",
+ i, vid);
+ return ret;
+ }
+ dev_dbg(smi->dev, "recycled MC at index %i for VID %d\n",
+ i, vid);
+ return i;
+ }
+ }
+
+ dev_err(smi->dev, "all VLAN member configurations are in use\n");
+ return -ENOSPC;
+}
+
+int rtl8366_set_vlan(struct realtek_smi *smi, int vid, u32 member,
+ u32 untag, u32 fid)
+{
+ struct rtl8366_vlan_mc vlanmc;
+ struct rtl8366_vlan_4k vlan4k;
+ int mc;
+ int ret;
+
+ if (!smi->ops->is_vlan_valid(smi, vid))
+ return -EINVAL;
+
dev_dbg(smi->dev,
"setting VLAN%d 4k members: 0x%02x, untagged: 0x%02x\n",
vid, member, untag);
@@ -63,133 +164,58 @@ int rtl8366_set_vlan(struct realtek_smi *smi, int vid, u32 member,
"resulting VLAN%d 4k members: 0x%02x, untagged: 0x%02x\n",
vid, vlan4k.member, vlan4k.untag);
- /* Try to find an existing MC entry for this VID */
- for (i = 0; i < smi->num_vlan_mc; i++) {
- struct rtl8366_vlan_mc vlanmc;
+ /* Find or allocate a member config for this VID */
+ ret = rtl8366_obtain_mc(smi, vid, &vlanmc);
+ if (ret < 0)
+ return ret;
+ mc = ret;
- ret = smi->ops->get_vlan_mc(smi, i, &vlanmc);
- if (ret)
- return ret;
+ /* Update the MC entry */
+ vlanmc.member |= member;
+ vlanmc.untag |= untag;
+ vlanmc.fid = fid;
- if (vid == vlanmc.vid) {
- /* update the MC entry */
- vlanmc.member |= member;
- vlanmc.untag |= untag;
- vlanmc.fid = fid;
-
- ret = smi->ops->set_vlan_mc(smi, i, &vlanmc);
-
- dev_dbg(smi->dev,
- "resulting VLAN%d MC members: 0x%02x, untagged: 0x%02x\n",
- vid, vlanmc.member, vlanmc.untag);
-
- break;
- }
- }
+ /* Commit updates to the MC entry */
+ ret = smi->ops->set_vlan_mc(smi, mc, &vlanmc);
+ if (ret)
+ dev_err(smi->dev, "failed to commit changes to VLAN MC index %d for VID %d\n",
+ mc, vid);
+ else
+ dev_dbg(smi->dev,
+ "resulting VLAN%d MC members: 0x%02x, untagged: 0x%02x\n",
+ vid, vlanmc.member, vlanmc.untag);
return ret;
}
EXPORT_SYMBOL_GPL(rtl8366_set_vlan);
-int rtl8366_get_pvid(struct realtek_smi *smi, int port, int *val)
-{
- struct rtl8366_vlan_mc vlanmc;
- int ret;
- int index;
-
- ret = smi->ops->get_mc_index(smi, port, &index);
- if (ret)
- return ret;
-
- ret = smi->ops->get_vlan_mc(smi, index, &vlanmc);
- if (ret)
- return ret;
-
- *val = vlanmc.vid;
- return 0;
-}
-EXPORT_SYMBOL_GPL(rtl8366_get_pvid);
-
int rtl8366_set_pvid(struct realtek_smi *smi, unsigned int port,
unsigned int vid)
{
struct rtl8366_vlan_mc vlanmc;
- struct rtl8366_vlan_4k vlan4k;
+ int mc;
int ret;
- int i;
- /* Try to find an existing MC entry for this VID */
- for (i = 0; i < smi->num_vlan_mc; i++) {
- ret = smi->ops->get_vlan_mc(smi, i, &vlanmc);
- if (ret)
- return ret;
+ if (!smi->ops->is_vlan_valid(smi, vid))
+ return -EINVAL;
- if (vid == vlanmc.vid) {
- ret = smi->ops->set_vlan_mc(smi, i, &vlanmc);
- if (ret)
- return ret;
+ /* Find or allocate a member config for this VID */
+ ret = rtl8366_obtain_mc(smi, vid, &vlanmc);
+ if (ret < 0)
+ return ret;
+ mc = ret;
- ret = smi->ops->set_mc_index(smi, port, i);
- return ret;
- }
+ ret = smi->ops->set_mc_index(smi, port, mc);
+ if (ret) {
+ dev_err(smi->dev, "set PVID: failed to set MC index %d for port %d\n",
+ mc, port);
+ return ret;
}
- /* We have no MC entry for this VID, try to find an empty one */
- for (i = 0; i < smi->num_vlan_mc; i++) {
- ret = smi->ops->get_vlan_mc(smi, i, &vlanmc);
- if (ret)
- return ret;
+ dev_dbg(smi->dev, "set PVID: the PVID for port %d set to %d using existing MC index %d\n",
+ port, vid, mc);
- if (vlanmc.vid == 0 && vlanmc.member == 0) {
- /* Update the entry from the 4K table */
- ret = smi->ops->get_vlan_4k(smi, vid, &vlan4k);
- if (ret)
- return ret;
-
- vlanmc.vid = vid;
- vlanmc.member = vlan4k.member;
- vlanmc.untag = vlan4k.untag;
- vlanmc.fid = vlan4k.fid;
- ret = smi->ops->set_vlan_mc(smi, i, &vlanmc);
- if (ret)
- return ret;
-
- ret = smi->ops->set_mc_index(smi, port, i);
- return ret;
- }
- }
-
- /* MC table is full, try to find an unused entry and replace it */
- for (i = 0; i < smi->num_vlan_mc; i++) {
- int used;
-
- ret = rtl8366_mc_is_used(smi, i, &used);
- if (ret)
- return ret;
-
- if (!used) {
- /* Update the entry from the 4K table */
- ret = smi->ops->get_vlan_4k(smi, vid, &vlan4k);
- if (ret)
- return ret;
-
- vlanmc.vid = vid;
- vlanmc.member = vlan4k.member;
- vlanmc.untag = vlan4k.untag;
- vlanmc.fid = vlan4k.fid;
- ret = smi->ops->set_vlan_mc(smi, i, &vlanmc);
- if (ret)
- return ret;
-
- ret = smi->ops->set_mc_index(smi, port, i);
- return ret;
- }
- }
-
- dev_err(smi->dev,
- "all VLAN member configurations are in use\n");
-
- return -ENOSPC;
+ return 0;
}
EXPORT_SYMBOL_GPL(rtl8366_set_pvid);
@@ -389,7 +415,8 @@ void rtl8366_vlan_add(struct dsa_switch *ds, int port,
if (!smi->ops->is_vlan_valid(smi, vid))
return;
- dev_info(smi->dev, "add VLAN on port %d, %s, %s\n",
+ dev_info(smi->dev, "add VLAN %d on port %d, %s, %s\n",
+ vlan->vid_begin,
port,
untagged ? "untagged" : "tagged",
pvid ? " PVID" : "no PVID");
@@ -398,34 +425,26 @@ void rtl8366_vlan_add(struct dsa_switch *ds, int port,
dev_err(smi->dev, "port is DSA or CPU port\n");
for (vid = vlan->vid_begin; vid <= vlan->vid_end; vid++) {
- int pvid_val = 0;
-
- dev_info(smi->dev, "add VLAN %04x\n", vid);
member |= BIT(port);
if (untagged)
untag |= BIT(port);
- /* To ensure that we have a valid MC entry for this VLAN,
- * initialize the port VLAN ID here.
- */
- ret = rtl8366_get_pvid(smi, port, &pvid_val);
- if (ret < 0) {
- dev_err(smi->dev, "could not lookup PVID for port %d\n",
- port);
- return;
- }
- if (pvid_val == 0) {
- ret = rtl8366_set_pvid(smi, port, vid);
- if (ret < 0)
- return;
- }
-
ret = rtl8366_set_vlan(smi, vid, member, untag, 0);
if (ret)
dev_err(smi->dev,
"failed to set up VLAN %04x",
vid);
+
+ ret = rtl8366_set_pvid(smi, port, vid);
+ if (ret)
+ dev_err(smi->dev,
+ "failed to set PVID on port %d to VLAN %04x",
+ port, vid);
+
+ if (!ret)
+ dev_dbg(smi->dev, "VLAN add: added VLAN %d with PVID on port %d\n",
+ vid, port);
}
}
EXPORT_SYMBOL_GPL(rtl8366_vlan_add);
diff --git a/drivers/net/ethernet/3com/typhoon.c b/drivers/net/ethernet/3com/typhoon.c
index d3b30ba..f11474c 100644
--- a/drivers/net/ethernet/3com/typhoon.c
+++ b/drivers/net/ethernet/3com/typhoon.c
@@ -789,8 +789,8 @@ typhoon_start_tx(struct sk_buff *skb, struct net_device *dev)
* it with zeros to ETH_ZLEN for us.
*/
if (skb_shinfo(skb)->nr_frags == 0) {
- skb_dma = pci_map_single(tp->tx_pdev, skb->data, skb->len,
- PCI_DMA_TODEVICE);
+ skb_dma = dma_map_single(&tp->tx_pdev->dev, skb->data,
+ skb->len, DMA_TO_DEVICE);
txd->flags = TYPHOON_FRAG_DESC | TYPHOON_DESC_VALID;
txd->len = cpu_to_le16(skb->len);
txd->frag.addr = cpu_to_le32(skb_dma);
@@ -800,8 +800,8 @@ typhoon_start_tx(struct sk_buff *skb, struct net_device *dev)
int i, len;
len = skb_headlen(skb);
- skb_dma = pci_map_single(tp->tx_pdev, skb->data, len,
- PCI_DMA_TODEVICE);
+ skb_dma = dma_map_single(&tp->tx_pdev->dev, skb->data, len,
+ DMA_TO_DEVICE);
txd->flags = TYPHOON_FRAG_DESC | TYPHOON_DESC_VALID;
txd->len = cpu_to_le16(len);
txd->frag.addr = cpu_to_le32(skb_dma);
@@ -818,8 +818,8 @@ typhoon_start_tx(struct sk_buff *skb, struct net_device *dev)
len = skb_frag_size(frag);
frag_addr = skb_frag_address(frag);
- skb_dma = pci_map_single(tp->tx_pdev, frag_addr, len,
- PCI_DMA_TODEVICE);
+ skb_dma = dma_map_single(&tp->tx_pdev->dev, frag_addr,
+ len, DMA_TO_DEVICE);
txd->flags = TYPHOON_FRAG_DESC | TYPHOON_DESC_VALID;
txd->len = cpu_to_le16(len);
txd->frag.addr = cpu_to_le32(skb_dma);
@@ -1349,12 +1349,12 @@ typhoon_download_firmware(struct typhoon *tp)
image_data = typhoon_fw->data;
fHdr = (struct typhoon_file_header *) image_data;
- /* Cannot just map the firmware image using pci_map_single() as
+ /* Cannot just map the firmware image using dma_map_single() as
* the firmware is vmalloc()'d and may not be physically contiguous,
- * so we allocate some consistent memory to copy the sections into.
+ * so we allocate some coherent memory to copy the sections into.
*/
err = -ENOMEM;
- dpage = pci_alloc_consistent(pdev, PAGE_SIZE, &dpage_dma);
+ dpage = dma_alloc_coherent(&pdev->dev, PAGE_SIZE, &dpage_dma, GFP_ATOMIC);
if (!dpage) {
netdev_err(tp->dev, "no DMA mem for firmware\n");
goto err_out;
@@ -1460,7 +1460,7 @@ typhoon_download_firmware(struct typhoon *tp)
iowrite32(irqMasked, ioaddr + TYPHOON_REG_INTR_MASK);
iowrite32(irqEnabled, ioaddr + TYPHOON_REG_INTR_ENABLE);
- pci_free_consistent(pdev, PAGE_SIZE, dpage, dpage_dma);
+ dma_free_coherent(&pdev->dev, PAGE_SIZE, dpage, dpage_dma);
err_out:
return err;
@@ -1527,8 +1527,8 @@ typhoon_clean_tx(struct typhoon *tp, struct transmit_ring *txRing,
*/
skb_dma = (dma_addr_t) le32_to_cpu(tx->frag.addr);
dma_len = le16_to_cpu(tx->len);
- pci_unmap_single(tp->pdev, skb_dma, dma_len,
- PCI_DMA_TODEVICE);
+ dma_unmap_single(&tp->pdev->dev, skb_dma, dma_len,
+ DMA_TO_DEVICE);
}
tx->flags = 0;
@@ -1609,8 +1609,8 @@ typhoon_alloc_rx_skb(struct typhoon *tp, u32 idx)
skb_reserve(skb, 2);
#endif
- dma_addr = pci_map_single(tp->pdev, skb->data,
- PKT_BUF_SZ, PCI_DMA_FROMDEVICE);
+ dma_addr = dma_map_single(&tp->pdev->dev, skb->data, PKT_BUF_SZ,
+ DMA_FROM_DEVICE);
/* Since no card does 64 bit DAC, the high bits will never
* change from zero.
@@ -1665,20 +1665,19 @@ typhoon_rx(struct typhoon *tp, struct basic_ring *rxRing, volatile __le32 * read
if (pkt_len < rx_copybreak &&
(new_skb = netdev_alloc_skb(tp->dev, pkt_len + 2)) != NULL) {
skb_reserve(new_skb, 2);
- pci_dma_sync_single_for_cpu(tp->pdev, dma_addr,
- PKT_BUF_SZ,
- PCI_DMA_FROMDEVICE);
+ dma_sync_single_for_cpu(&tp->pdev->dev, dma_addr,
+ PKT_BUF_SZ, DMA_FROM_DEVICE);
skb_copy_to_linear_data(new_skb, skb->data, pkt_len);
- pci_dma_sync_single_for_device(tp->pdev, dma_addr,
- PKT_BUF_SZ,
- PCI_DMA_FROMDEVICE);
+ dma_sync_single_for_device(&tp->pdev->dev, dma_addr,
+ PKT_BUF_SZ,
+ DMA_FROM_DEVICE);
skb_put(new_skb, pkt_len);
typhoon_recycle_rx_skb(tp, idx);
} else {
new_skb = skb;
skb_put(new_skb, pkt_len);
- pci_unmap_single(tp->pdev, dma_addr, PKT_BUF_SZ,
- PCI_DMA_FROMDEVICE);
+ dma_unmap_single(&tp->pdev->dev, dma_addr, PKT_BUF_SZ,
+ DMA_FROM_DEVICE);
typhoon_alloc_rx_skb(tp, idx);
}
new_skb->protocol = eth_type_trans(new_skb, tp->dev);
@@ -1792,8 +1791,8 @@ typhoon_free_rx_rings(struct typhoon *tp)
for (i = 0; i < RXENT_ENTRIES; i++) {
struct rxbuff_ent *rxb = &tp->rxbuffers[i];
if (rxb->skb) {
- pci_unmap_single(tp->pdev, rxb->dma_addr, PKT_BUF_SZ,
- PCI_DMA_FROMDEVICE);
+ dma_unmap_single(&tp->pdev->dev, rxb->dma_addr,
+ PKT_BUF_SZ, DMA_FROM_DEVICE);
dev_kfree_skb(rxb->skb);
rxb->skb = NULL;
}
@@ -2306,7 +2305,7 @@ typhoon_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
goto error_out_disable;
}
- err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
+ err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
if (err < 0) {
err_msg = "No usable DMA configuration";
goto error_out_mwi;
@@ -2355,8 +2354,8 @@ typhoon_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
/* allocate pci dma space for rx and tx descriptor rings
*/
- shared = pci_alloc_consistent(pdev, sizeof(struct typhoon_shared),
- &shared_dma);
+ shared = dma_alloc_coherent(&pdev->dev, sizeof(struct typhoon_shared),
+ &shared_dma, GFP_KERNEL);
if (!shared) {
err_msg = "could not allocate DMA memory";
err = -ENOMEM;
@@ -2509,8 +2508,8 @@ typhoon_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
typhoon_reset(ioaddr, NoWait);
error_out_dma:
- pci_free_consistent(pdev, sizeof(struct typhoon_shared),
- shared, shared_dma);
+ dma_free_coherent(&pdev->dev, sizeof(struct typhoon_shared), shared,
+ shared_dma);
error_out_remap:
pci_iounmap(pdev, ioaddr);
error_out_regions:
@@ -2537,8 +2536,8 @@ typhoon_remove_one(struct pci_dev *pdev)
pci_restore_state(pdev);
typhoon_reset(tp->ioaddr, NoWait);
pci_iounmap(pdev, tp->ioaddr);
- pci_free_consistent(pdev, sizeof(struct typhoon_shared),
- tp->shared, tp->shared_dma);
+ dma_free_coherent(&pdev->dev, sizeof(struct typhoon_shared),
+ tp->shared, tp->shared_dma);
pci_release_regions(pdev);
pci_clear_mwi(pdev);
pci_disable_device(pdev);
diff --git a/drivers/net/ethernet/8390/axnet_cs.c b/drivers/net/ethernet/8390/axnet_cs.c
index a00b36f..2488bfd 100644
--- a/drivers/net/ethernet/8390/axnet_cs.c
+++ b/drivers/net/ethernet/8390/axnet_cs.c
@@ -657,8 +657,10 @@ static void block_input(struct net_device *dev, int count,
outb_p(E8390_RREAD+E8390_START, nic_base + AXNET_CMD);
insw(nic_base + AXNET_DATAPORT,buf,count>>1);
- if (count & 0x01)
- buf[count-1] = inb(nic_base + AXNET_DATAPORT), xfer_count++;
+ if (count & 0x01) {
+ buf[count-1] = inb(nic_base + AXNET_DATAPORT);
+ xfer_count++;
+ }
}
@@ -1270,10 +1272,12 @@ static void ei_tx_intr(struct net_device *dev)
ei_local->txing = 1;
NS8390_trigger_send(dev, ei_local->tx2, ei_local->tx_start_page + 6);
netif_trans_update(dev);
- ei_local->tx2 = -1,
+ ei_local->tx2 = -1;
ei_local->lasttx = 2;
+ } else {
+ ei_local->lasttx = 20;
+ ei_local->txing = 0;
}
- else ei_local->lasttx = 20, ei_local->txing = 0;
}
else if (ei_local->tx2 < 0)
{
@@ -1289,9 +1293,10 @@ static void ei_tx_intr(struct net_device *dev)
netif_trans_update(dev);
ei_local->tx1 = -1;
ei_local->lasttx = 1;
+ } else {
+ ei_local->lasttx = 10;
+ ei_local->txing = 0;
}
- else
- ei_local->lasttx = 10, ei_local->txing = 0;
}
// else
// netdev_warn(dev, "unexpected TX-done interrupt, lasttx=%d\n",
diff --git a/drivers/net/ethernet/8390/lib8390.c b/drivers/net/ethernet/8390/lib8390.c
index babc92e..1f48d7f 100644
--- a/drivers/net/ethernet/8390/lib8390.c
+++ b/drivers/net/ethernet/8390/lib8390.c
@@ -597,10 +597,12 @@ static void ei_tx_intr(struct net_device *dev)
ei_local->txing = 1;
NS8390_trigger_send(dev, ei_local->tx2, ei_local->tx_start_page + 6);
netif_trans_update(dev);
- ei_local->tx2 = -1,
+ ei_local->tx2 = -1;
ei_local->lasttx = 2;
- } else
- ei_local->lasttx = 20, ei_local->txing = 0;
+ } else {
+ ei_local->lasttx = 20;
+ ei_local->txing = 0;
+ }
} else if (ei_local->tx2 < 0) {
if (ei_local->lasttx != 2 && ei_local->lasttx != -2)
pr_err("%s: bogus last_tx_buffer %d, tx2=%d\n",
@@ -612,8 +614,10 @@ static void ei_tx_intr(struct net_device *dev)
netif_trans_update(dev);
ei_local->tx1 = -1;
ei_local->lasttx = 1;
- } else
- ei_local->lasttx = 10, ei_local->txing = 0;
+ } else {
+ ei_local->lasttx = 10;
+ ei_local->txing = 0;
+ }
} /* else
netdev_warn(dev, "unexpected TX-done interrupt, lasttx=%d\n",
ei_local->lasttx);
diff --git a/drivers/net/ethernet/8390/pcnet_cs.c b/drivers/net/ethernet/8390/pcnet_cs.c
index 164c3ed..9d3b1e0 100644
--- a/drivers/net/ethernet/8390/pcnet_cs.c
+++ b/drivers/net/ethernet/8390/pcnet_cs.c
@@ -1178,8 +1178,10 @@ static void dma_block_input(struct net_device *dev, int count,
outb_p(E8390_RREAD+E8390_START, nic_base + PCNET_CMD);
insw(nic_base + PCNET_DATAPORT,buf,count>>1);
- if (count & 0x01)
- buf[count-1] = inb(nic_base + PCNET_DATAPORT), xfer_count++;
+ if (count & 0x01) {
+ buf[count-1] = inb(nic_base + PCNET_DATAPORT);
+ xfer_count++;
+ }
/* This was for the ALPHA version only, but enough people have been
encountering problems that it is still here. */
diff --git a/drivers/net/ethernet/adaptec/starfire.c b/drivers/net/ethernet/adaptec/starfire.c
index ba0055b..5552997 100644
--- a/drivers/net/ethernet/adaptec/starfire.c
+++ b/drivers/net/ethernet/adaptec/starfire.c
@@ -886,7 +886,9 @@ static int netdev_open(struct net_device *dev)
tx_ring_size = ((sizeof(starfire_tx_desc) * TX_RING_SIZE + QUEUE_ALIGN - 1) / QUEUE_ALIGN) * QUEUE_ALIGN;
rx_ring_size = sizeof(struct starfire_rx_desc) * RX_RING_SIZE;
np->queue_mem_size = tx_done_q_size + rx_done_q_size + tx_ring_size + rx_ring_size;
- np->queue_mem = pci_alloc_consistent(np->pci_dev, np->queue_mem_size, &np->queue_mem_dma);
+ np->queue_mem = dma_alloc_coherent(&np->pci_dev->dev,
+ np->queue_mem_size,
+ &np->queue_mem_dma, GFP_ATOMIC);
if (np->queue_mem == NULL) {
free_irq(irq, dev);
return -ENOMEM;
@@ -1136,9 +1138,11 @@ static void init_ring(struct net_device *dev)
np->rx_info[i].skb = skb;
if (skb == NULL)
break;
- np->rx_info[i].mapping = pci_map_single(np->pci_dev, skb->data, np->rx_buf_sz, PCI_DMA_FROMDEVICE);
- if (pci_dma_mapping_error(np->pci_dev,
- np->rx_info[i].mapping)) {
+ np->rx_info[i].mapping = dma_map_single(&np->pci_dev->dev,
+ skb->data,
+ np->rx_buf_sz,
+ DMA_FROM_DEVICE);
+ if (dma_mapping_error(&np->pci_dev->dev, np->rx_info[i].mapping)) {
dev_kfree_skb(skb);
np->rx_info[i].skb = NULL;
break;
@@ -1217,18 +1221,19 @@ static netdev_tx_t start_tx(struct sk_buff *skb, struct net_device *dev)
status |= skb_first_frag_len(skb) | (skb_num_frags(skb) << 16);
np->tx_info[entry].mapping =
- pci_map_single(np->pci_dev, skb->data, skb_first_frag_len(skb), PCI_DMA_TODEVICE);
+ dma_map_single(&np->pci_dev->dev, skb->data,
+ skb_first_frag_len(skb),
+ DMA_TO_DEVICE);
} else {
const skb_frag_t *this_frag = &skb_shinfo(skb)->frags[i - 1];
status |= skb_frag_size(this_frag);
np->tx_info[entry].mapping =
- pci_map_single(np->pci_dev,
+ dma_map_single(&np->pci_dev->dev,
skb_frag_address(this_frag),
skb_frag_size(this_frag),
- PCI_DMA_TODEVICE);
+ DMA_TO_DEVICE);
}
- if (pci_dma_mapping_error(np->pci_dev,
- np->tx_info[entry].mapping)) {
+ if (dma_mapping_error(&np->pci_dev->dev, np->tx_info[entry].mapping)) {
dev->stats.tx_dropped++;
goto err_out;
}
@@ -1271,18 +1276,16 @@ static netdev_tx_t start_tx(struct sk_buff *skb, struct net_device *dev)
entry = prev_tx % TX_RING_SIZE;
np->tx_info[entry].skb = NULL;
if (i > 0) {
- pci_unmap_single(np->pci_dev,
+ dma_unmap_single(&np->pci_dev->dev,
np->tx_info[entry].mapping,
- skb_first_frag_len(skb),
- PCI_DMA_TODEVICE);
+ skb_first_frag_len(skb), DMA_TO_DEVICE);
np->tx_info[entry].mapping = 0;
entry = (entry + np->tx_info[entry].used_slots) % TX_RING_SIZE;
for (j = 1; j < i; j++) {
- pci_unmap_single(np->pci_dev,
+ dma_unmap_single(&np->pci_dev->dev,
np->tx_info[entry].mapping,
- skb_frag_size(
- &skb_shinfo(skb)->frags[j-1]),
- PCI_DMA_TODEVICE);
+ skb_frag_size(&skb_shinfo(skb)->frags[j - 1]),
+ DMA_TO_DEVICE);
entry++;
}
}
@@ -1356,20 +1359,20 @@ static irqreturn_t intr_handler(int irq, void *dev_instance)
u16 entry = (tx_status & 0x7fff) / sizeof(starfire_tx_desc);
struct sk_buff *skb = np->tx_info[entry].skb;
np->tx_info[entry].skb = NULL;
- pci_unmap_single(np->pci_dev,
+ dma_unmap_single(&np->pci_dev->dev,
np->tx_info[entry].mapping,
skb_first_frag_len(skb),
- PCI_DMA_TODEVICE);
+ DMA_TO_DEVICE);
np->tx_info[entry].mapping = 0;
np->dirty_tx += np->tx_info[entry].used_slots;
entry = (entry + np->tx_info[entry].used_slots) % TX_RING_SIZE;
{
int i;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
- pci_unmap_single(np->pci_dev,
+ dma_unmap_single(&np->pci_dev->dev,
np->tx_info[entry].mapping,
skb_frag_size(&skb_shinfo(skb)->frags[i]),
- PCI_DMA_TODEVICE);
+ DMA_TO_DEVICE);
np->dirty_tx++;
entry++;
}
@@ -1461,16 +1464,18 @@ static int __netdev_rx(struct net_device *dev, int *quota)
if (pkt_len < rx_copybreak &&
(skb = netdev_alloc_skb(dev, pkt_len + 2)) != NULL) {
skb_reserve(skb, 2); /* 16 byte align the IP header */
- pci_dma_sync_single_for_cpu(np->pci_dev,
- np->rx_info[entry].mapping,
- pkt_len, PCI_DMA_FROMDEVICE);
+ dma_sync_single_for_cpu(&np->pci_dev->dev,
+ np->rx_info[entry].mapping,
+ pkt_len, DMA_FROM_DEVICE);
skb_copy_to_linear_data(skb, np->rx_info[entry].skb->data, pkt_len);
- pci_dma_sync_single_for_device(np->pci_dev,
- np->rx_info[entry].mapping,
- pkt_len, PCI_DMA_FROMDEVICE);
+ dma_sync_single_for_device(&np->pci_dev->dev,
+ np->rx_info[entry].mapping,
+ pkt_len, DMA_FROM_DEVICE);
skb_put(skb, pkt_len);
} else {
- pci_unmap_single(np->pci_dev, np->rx_info[entry].mapping, np->rx_buf_sz, PCI_DMA_FROMDEVICE);
+ dma_unmap_single(&np->pci_dev->dev,
+ np->rx_info[entry].mapping,
+ np->rx_buf_sz, DMA_FROM_DEVICE);
skb = np->rx_info[entry].skb;
skb_put(skb, pkt_len);
np->rx_info[entry].skb = NULL;
@@ -1588,9 +1593,9 @@ static void refill_rx_ring(struct net_device *dev)
if (skb == NULL)
break; /* Better luck next round. */
np->rx_info[entry].mapping =
- pci_map_single(np->pci_dev, skb->data, np->rx_buf_sz, PCI_DMA_FROMDEVICE);
- if (pci_dma_mapping_error(np->pci_dev,
- np->rx_info[entry].mapping)) {
+ dma_map_single(&np->pci_dev->dev, skb->data,
+ np->rx_buf_sz, DMA_FROM_DEVICE);
+ if (dma_mapping_error(&np->pci_dev->dev, np->rx_info[entry].mapping)) {
dev_kfree_skb(skb);
np->rx_info[entry].skb = NULL;
break;
@@ -1963,7 +1968,9 @@ static int netdev_close(struct net_device *dev)
for (i = 0; i < RX_RING_SIZE; i++) {
np->rx_ring[i].rxaddr = cpu_to_dma(0xBADF00D0); /* An invalid address. */
if (np->rx_info[i].skb != NULL) {
- pci_unmap_single(np->pci_dev, np->rx_info[i].mapping, np->rx_buf_sz, PCI_DMA_FROMDEVICE);
+ dma_unmap_single(&np->pci_dev->dev,
+ np->rx_info[i].mapping,
+ np->rx_buf_sz, DMA_FROM_DEVICE);
dev_kfree_skb(np->rx_info[i].skb);
}
np->rx_info[i].skb = NULL;
@@ -1973,9 +1980,8 @@ static int netdev_close(struct net_device *dev)
struct sk_buff *skb = np->tx_info[i].skb;
if (skb == NULL)
continue;
- pci_unmap_single(np->pci_dev,
- np->tx_info[i].mapping,
- skb_first_frag_len(skb), PCI_DMA_TODEVICE);
+ dma_unmap_single(&np->pci_dev->dev, np->tx_info[i].mapping,
+ skb_first_frag_len(skb), DMA_TO_DEVICE);
np->tx_info[i].mapping = 0;
dev_kfree_skb(skb);
np->tx_info[i].skb = NULL;
@@ -2018,7 +2024,8 @@ static void starfire_remove_one(struct pci_dev *pdev)
unregister_netdev(dev);
if (np->queue_mem)
- pci_free_consistent(pdev, np->queue_mem_size, np->queue_mem, np->queue_mem_dma);
+ dma_free_coherent(&pdev->dev, np->queue_mem_size,
+ np->queue_mem, np->queue_mem_dma);
/* XXX: add wakeup code -- requires firmware for MagicPacket */
diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_main.h b/drivers/net/ethernet/apm/xgene/xgene_enet_main.h
index d35a338..643f5e6 100644
--- a/drivers/net/ethernet/apm/xgene/xgene_enet_main.h
+++ b/drivers/net/ethernet/apm/xgene/xgene_enet_main.h
@@ -18,6 +18,7 @@
#include <linux/of_platform.h>
#include <linux/of_net.h>
#include <linux/of_mdio.h>
+#include <linux/mdio/mdio-xgene.h>
#include <linux/module.h>
#include <net/ip.h>
#include <linux/prefetch.h>
@@ -26,7 +27,6 @@
#include "xgene_enet_hw.h"
#include "xgene_enet_cle.h"
#include "xgene_enet_ring2.h"
-#include "../../../phy/mdio-xgene.h"
#define ETHER_MIN_PACKET 64
#define ETHER_STD_PACKET 1518
diff --git a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
index decab9a8..c7288e1f 100644
--- a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
+++ b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
@@ -826,16 +826,16 @@ static inline void atl1c_clean_buffer(struct pci_dev *pdev,
return;
if (buffer_info->dma) {
if (buffer_info->flags & ATL1C_PCIMAP_FROMDEVICE)
- pci_driection = PCI_DMA_FROMDEVICE;
+ pci_driection = DMA_FROM_DEVICE;
else
- pci_driection = PCI_DMA_TODEVICE;
+ pci_driection = DMA_TO_DEVICE;
if (buffer_info->flags & ATL1C_PCIMAP_SINGLE)
- pci_unmap_single(pdev, buffer_info->dma,
- buffer_info->length, pci_driection);
+ dma_unmap_single(&pdev->dev, buffer_info->dma,
+ buffer_info->length, pci_driection);
else if (buffer_info->flags & ATL1C_PCIMAP_PAGE)
- pci_unmap_page(pdev, buffer_info->dma,
- buffer_info->length, pci_driection);
+ dma_unmap_page(&pdev->dev, buffer_info->dma,
+ buffer_info->length, pci_driection);
}
if (buffer_info->skb)
dev_consume_skb_any(buffer_info->skb);
@@ -933,9 +933,8 @@ static void atl1c_free_ring_resources(struct atl1c_adapter *adapter)
{
struct pci_dev *pdev = adapter->pdev;
- pci_free_consistent(pdev, adapter->ring_header.size,
- adapter->ring_header.desc,
- adapter->ring_header.dma);
+ dma_free_coherent(&pdev->dev, adapter->ring_header.size,
+ adapter->ring_header.desc, adapter->ring_header.dma);
adapter->ring_header.desc = NULL;
/* Note: just free tdp_ring.buffer_info,
@@ -1717,10 +1716,9 @@ static int atl1c_alloc_rx_buffer(struct atl1c_adapter *adapter)
ATL1C_SET_BUFFER_STATE(buffer_info, ATL1C_BUFFER_BUSY);
buffer_info->skb = skb;
buffer_info->length = adapter->rx_buffer_len;
- mapping = pci_map_single(pdev, vir_addr,
- buffer_info->length,
- PCI_DMA_FROMDEVICE);
- if (unlikely(pci_dma_mapping_error(pdev, mapping))) {
+ mapping = dma_map_single(&pdev->dev, vir_addr,
+ buffer_info->length, DMA_FROM_DEVICE);
+ if (unlikely(dma_mapping_error(&pdev->dev, mapping))) {
dev_kfree_skb(skb);
buffer_info->skb = NULL;
buffer_info->length = 0;
@@ -1831,8 +1829,8 @@ static void atl1c_clean_rx_irq(struct atl1c_adapter *adapter,
rfd_index = (rrs->word0 >> RRS_RX_RFD_INDEX_SHIFT) &
RRS_RX_RFD_INDEX_MASK;
buffer_info = &rfd_ring->buffer_info[rfd_index];
- pci_unmap_single(pdev, buffer_info->dma,
- buffer_info->length, PCI_DMA_FROMDEVICE);
+ dma_unmap_single(&pdev->dev, buffer_info->dma,
+ buffer_info->length, DMA_FROM_DEVICE);
skb = buffer_info->skb;
} else {
/* TODO */
@@ -2106,10 +2104,10 @@ static int atl1c_tx_map(struct atl1c_adapter *adapter,
buffer_info = atl1c_get_tx_buffer(adapter, use_tpd);
buffer_info->length = map_len;
- buffer_info->dma = pci_map_single(adapter->pdev,
- skb->data, hdr_len, PCI_DMA_TODEVICE);
- if (unlikely(pci_dma_mapping_error(adapter->pdev,
- buffer_info->dma)))
+ buffer_info->dma = dma_map_single(&adapter->pdev->dev,
+ skb->data, hdr_len,
+ DMA_TO_DEVICE);
+ if (unlikely(dma_mapping_error(&adapter->pdev->dev, buffer_info->dma)))
goto err_dma;
ATL1C_SET_BUFFER_STATE(buffer_info, ATL1C_BUFFER_BUSY);
ATL1C_SET_PCIMAP_TYPE(buffer_info, ATL1C_PCIMAP_SINGLE,
@@ -2131,10 +2129,10 @@ static int atl1c_tx_map(struct atl1c_adapter *adapter,
buffer_info = atl1c_get_tx_buffer(adapter, use_tpd);
buffer_info->length = buf_len - mapped_len;
buffer_info->dma =
- pci_map_single(adapter->pdev, skb->data + mapped_len,
- buffer_info->length, PCI_DMA_TODEVICE);
- if (unlikely(pci_dma_mapping_error(adapter->pdev,
- buffer_info->dma)))
+ dma_map_single(&adapter->pdev->dev,
+ skb->data + mapped_len,
+ buffer_info->length, DMA_TO_DEVICE);
+ if (unlikely(dma_mapping_error(&adapter->pdev->dev, buffer_info->dma)))
goto err_dma;
ATL1C_SET_BUFFER_STATE(buffer_info, ATL1C_BUFFER_BUSY);
@@ -2542,8 +2540,8 @@ static int atl1c_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
* various kernel subsystems to support the mechanics required by a
* fixed-high-32-bit system.
*/
- if ((pci_set_dma_mask(pdev, DMA_BIT_MASK(32)) != 0) ||
- (pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)) != 0)) {
+ if ((dma_set_mask(&pdev->dev, DMA_BIT_MASK(32)) != 0) ||
+ (dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32)) != 0)) {
dev_err(&pdev->dev, "No usable DMA configuration,aborting\n");
goto err_dma;
}
diff --git a/drivers/net/ethernet/atheros/atl1e/atl1e_main.c b/drivers/net/ethernet/atheros/atl1e/atl1e_main.c
index 223ef84..fb78f6c 100644
--- a/drivers/net/ethernet/atheros/atl1e/atl1e_main.c
+++ b/drivers/net/ethernet/atheros/atl1e/atl1e_main.c
@@ -654,11 +654,13 @@ static void atl1e_clean_tx_ring(struct atl1e_adapter *adapter)
tx_buffer = &tx_ring->tx_buffer[index];
if (tx_buffer->dma) {
if (tx_buffer->flags & ATL1E_TX_PCIMAP_SINGLE)
- pci_unmap_single(pdev, tx_buffer->dma,
- tx_buffer->length, PCI_DMA_TODEVICE);
+ dma_unmap_single(&pdev->dev, tx_buffer->dma,
+ tx_buffer->length,
+ DMA_TO_DEVICE);
else if (tx_buffer->flags & ATL1E_TX_PCIMAP_PAGE)
- pci_unmap_page(pdev, tx_buffer->dma,
- tx_buffer->length, PCI_DMA_TODEVICE);
+ dma_unmap_page(&pdev->dev, tx_buffer->dma,
+ tx_buffer->length,
+ DMA_TO_DEVICE);
tx_buffer->dma = 0;
}
}
@@ -774,8 +776,8 @@ static void atl1e_free_ring_resources(struct atl1e_adapter *adapter)
atl1e_clean_rx_ring(adapter);
if (adapter->ring_vir_addr) {
- pci_free_consistent(pdev, adapter->ring_size,
- adapter->ring_vir_addr, adapter->ring_dma);
+ dma_free_coherent(&pdev->dev, adapter->ring_size,
+ adapter->ring_vir_addr, adapter->ring_dma);
adapter->ring_vir_addr = NULL;
}
@@ -810,11 +812,12 @@ static int atl1e_setup_ring_resources(struct atl1e_adapter *adapter)
/* real ring DMA buffer */
size = adapter->ring_size;
- adapter->ring_vir_addr = pci_zalloc_consistent(pdev, adapter->ring_size,
- &adapter->ring_dma);
+ adapter->ring_vir_addr = dma_alloc_coherent(&pdev->dev,
+ adapter->ring_size,
+ &adapter->ring_dma, GFP_KERNEL);
if (adapter->ring_vir_addr == NULL) {
netdev_err(adapter->netdev,
- "pci_alloc_consistent failed, size = D%d\n", size);
+ "dma_alloc_coherent failed, size = D%d\n", size);
return -ENOMEM;
}
@@ -870,8 +873,8 @@ static int atl1e_setup_ring_resources(struct atl1e_adapter *adapter)
return 0;
failed:
if (adapter->ring_vir_addr != NULL) {
- pci_free_consistent(pdev, adapter->ring_size,
- adapter->ring_vir_addr, adapter->ring_dma);
+ dma_free_coherent(&pdev->dev, adapter->ring_size,
+ adapter->ring_vir_addr, adapter->ring_dma);
adapter->ring_vir_addr = NULL;
}
return err;
@@ -1233,11 +1236,15 @@ static bool atl1e_clean_tx_irq(struct atl1e_adapter *adapter)
tx_buffer = &tx_ring->tx_buffer[next_to_clean];
if (tx_buffer->dma) {
if (tx_buffer->flags & ATL1E_TX_PCIMAP_SINGLE)
- pci_unmap_single(adapter->pdev, tx_buffer->dma,
- tx_buffer->length, PCI_DMA_TODEVICE);
+ dma_unmap_single(&adapter->pdev->dev,
+ tx_buffer->dma,
+ tx_buffer->length,
+ DMA_TO_DEVICE);
else if (tx_buffer->flags & ATL1E_TX_PCIMAP_PAGE)
- pci_unmap_page(adapter->pdev, tx_buffer->dma,
- tx_buffer->length, PCI_DMA_TODEVICE);
+ dma_unmap_page(&adapter->pdev->dev,
+ tx_buffer->dma,
+ tx_buffer->length,
+ DMA_TO_DEVICE);
tx_buffer->dma = 0;
}
@@ -1710,8 +1717,9 @@ static int atl1e_tx_map(struct atl1e_adapter *adapter,
tx_buffer = atl1e_get_tx_buffer(adapter, use_tpd);
tx_buffer->length = map_len;
- tx_buffer->dma = pci_map_single(adapter->pdev,
- skb->data, hdr_len, PCI_DMA_TODEVICE);
+ tx_buffer->dma = dma_map_single(&adapter->pdev->dev,
+ skb->data, hdr_len,
+ DMA_TO_DEVICE);
if (dma_mapping_error(&adapter->pdev->dev, tx_buffer->dma))
return -ENOSPC;
@@ -1739,8 +1747,9 @@ static int atl1e_tx_map(struct atl1e_adapter *adapter,
((buf_len - mapped_len) >= MAX_TX_BUF_LEN) ?
MAX_TX_BUF_LEN : (buf_len - mapped_len);
tx_buffer->dma =
- pci_map_single(adapter->pdev, skb->data + mapped_len,
- map_len, PCI_DMA_TODEVICE);
+ dma_map_single(&adapter->pdev->dev,
+ skb->data + mapped_len, map_len,
+ DMA_TO_DEVICE);
if (dma_mapping_error(&adapter->pdev->dev, tx_buffer->dma)) {
/* We need to unwind the mappings we've done */
@@ -1749,8 +1758,10 @@ static int atl1e_tx_map(struct atl1e_adapter *adapter,
while (adapter->tx_ring.next_to_use != ring_end) {
tpd = atl1e_get_tpd(adapter);
tx_buffer = atl1e_get_tx_buffer(adapter, tpd);
- pci_unmap_single(adapter->pdev, tx_buffer->dma,
- tx_buffer->length, PCI_DMA_TODEVICE);
+ dma_unmap_single(&adapter->pdev->dev,
+ tx_buffer->dma,
+ tx_buffer->length,
+ DMA_TO_DEVICE);
}
/* Reset the tx rings next pointer */
adapter->tx_ring.next_to_use = ring_start;
@@ -2300,8 +2311,8 @@ static int atl1e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
* various kernel subsystems to support the mechanics required by a
* fixed-high-32-bit system.
*/
- if ((pci_set_dma_mask(pdev, DMA_BIT_MASK(32)) != 0) ||
- (pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)) != 0)) {
+ if ((dma_set_mask(&pdev->dev, DMA_BIT_MASK(32)) != 0) ||
+ (dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32)) != 0)) {
dev_err(&pdev->dev, "No usable DMA configuration,aborting\n");
goto err_dma;
}
diff --git a/drivers/net/ethernet/atheros/atlx/atl1.c b/drivers/net/ethernet/atheros/atlx/atl1.c
index b35fcfc..60f8aa7 100644
--- a/drivers/net/ethernet/atheros/atlx/atl1.c
+++ b/drivers/net/ethernet/atheros/atlx/atl1.c
@@ -1050,11 +1050,11 @@ static s32 atl1_setup_ring_resources(struct atl1_adapter *adapter)
+ sizeof(struct stats_msg_block)
+ 40;
- ring_header->desc = pci_alloc_consistent(pdev, ring_header->size,
- &ring_header->dma);
+ ring_header->desc = dma_alloc_coherent(&pdev->dev, ring_header->size,
+ &ring_header->dma, GFP_KERNEL);
if (unlikely(!ring_header->desc)) {
if (netif_msg_drv(adapter))
- dev_err(&pdev->dev, "pci_alloc_consistent failed\n");
+ dev_err(&pdev->dev, "dma_alloc_coherent failed\n");
goto err_nomem;
}
@@ -1136,8 +1136,8 @@ static void atl1_clean_rx_ring(struct atl1_adapter *adapter)
for (i = 0; i < rfd_ring->count; i++) {
buffer_info = &rfd_ring->buffer_info[i];
if (buffer_info->dma) {
- pci_unmap_page(pdev, buffer_info->dma,
- buffer_info->length, PCI_DMA_FROMDEVICE);
+ dma_unmap_page(&pdev->dev, buffer_info->dma,
+ buffer_info->length, DMA_FROM_DEVICE);
buffer_info->dma = 0;
}
if (buffer_info->skb) {
@@ -1175,8 +1175,8 @@ static void atl1_clean_tx_ring(struct atl1_adapter *adapter)
for (i = 0; i < tpd_ring->count; i++) {
buffer_info = &tpd_ring->buffer_info[i];
if (buffer_info->dma) {
- pci_unmap_page(pdev, buffer_info->dma,
- buffer_info->length, PCI_DMA_TODEVICE);
+ dma_unmap_page(&pdev->dev, buffer_info->dma,
+ buffer_info->length, DMA_TO_DEVICE);
buffer_info->dma = 0;
}
}
@@ -1217,8 +1217,8 @@ static void atl1_free_ring_resources(struct atl1_adapter *adapter)
atl1_clean_rx_ring(adapter);
kfree(tpd_ring->buffer_info);
- pci_free_consistent(pdev, ring_header->size, ring_header->desc,
- ring_header->dma);
+ dma_free_coherent(&pdev->dev, ring_header->size, ring_header->desc,
+ ring_header->dma);
tpd_ring->buffer_info = NULL;
tpd_ring->desc = NULL;
@@ -1866,9 +1866,9 @@ static u16 atl1_alloc_rx_buffers(struct atl1_adapter *adapter)
buffer_info->length = (u16) adapter->rx_buffer_len;
page = virt_to_page(skb->data);
offset = offset_in_page(skb->data);
- buffer_info->dma = pci_map_page(pdev, page, offset,
+ buffer_info->dma = dma_map_page(&pdev->dev, page, offset,
adapter->rx_buffer_len,
- PCI_DMA_FROMDEVICE);
+ DMA_FROM_DEVICE);
rfd_desc->buffer_addr = cpu_to_le64(buffer_info->dma);
rfd_desc->buf_len = cpu_to_le16(adapter->rx_buffer_len);
rfd_desc->coalese = 0;
@@ -1992,8 +1992,8 @@ static int atl1_intr_rx(struct atl1_adapter *adapter, int budget)
}
/* Good Receive */
- pci_unmap_page(adapter->pdev, buffer_info->dma,
- buffer_info->length, PCI_DMA_FROMDEVICE);
+ dma_unmap_page(&adapter->pdev->dev, buffer_info->dma,
+ buffer_info->length, DMA_FROM_DEVICE);
buffer_info->dma = 0;
skb = buffer_info->skb;
length = le16_to_cpu(rrd->xsz.xsum_sz.pkt_size);
@@ -2062,8 +2062,8 @@ static int atl1_intr_tx(struct atl1_adapter *adapter)
while (cmb_tpd_next_to_clean != sw_tpd_next_to_clean) {
buffer_info = &tpd_ring->buffer_info[sw_tpd_next_to_clean];
if (buffer_info->dma) {
- pci_unmap_page(adapter->pdev, buffer_info->dma,
- buffer_info->length, PCI_DMA_TODEVICE);
+ dma_unmap_page(&adapter->pdev->dev, buffer_info->dma,
+ buffer_info->length, DMA_TO_DEVICE);
buffer_info->dma = 0;
}
@@ -2210,9 +2210,9 @@ static void atl1_tx_map(struct atl1_adapter *adapter, struct sk_buff *skb,
buffer_info->length = hdr_len;
page = virt_to_page(skb->data);
offset = offset_in_page(skb->data);
- buffer_info->dma = pci_map_page(adapter->pdev, page,
+ buffer_info->dma = dma_map_page(&adapter->pdev->dev, page,
offset, hdr_len,
- PCI_DMA_TODEVICE);
+ DMA_TO_DEVICE);
if (++next_to_use == tpd_ring->count)
next_to_use = 0;
@@ -2235,9 +2235,10 @@ static void atl1_tx_map(struct atl1_adapter *adapter, struct sk_buff *skb,
(hdr_len + i * ATL1_MAX_TX_BUF_LEN));
offset = offset_in_page(skb->data +
(hdr_len + i * ATL1_MAX_TX_BUF_LEN));
- buffer_info->dma = pci_map_page(adapter->pdev,
- page, offset, buffer_info->length,
- PCI_DMA_TODEVICE);
+ buffer_info->dma = dma_map_page(&adapter->pdev->dev,
+ page, offset,
+ buffer_info->length,
+ DMA_TO_DEVICE);
if (++next_to_use == tpd_ring->count)
next_to_use = 0;
}
@@ -2247,8 +2248,9 @@ static void atl1_tx_map(struct atl1_adapter *adapter, struct sk_buff *skb,
buffer_info->length = buf_len;
page = virt_to_page(skb->data);
offset = offset_in_page(skb->data);
- buffer_info->dma = pci_map_page(adapter->pdev, page,
- offset, buf_len, PCI_DMA_TODEVICE);
+ buffer_info->dma = dma_map_page(&adapter->pdev->dev, page,
+ offset, buf_len,
+ DMA_TO_DEVICE);
if (++next_to_use == tpd_ring->count)
next_to_use = 0;
}
@@ -2922,7 +2924,7 @@ static int atl1_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
* various kernel subsystems to support the mechanics required by a
* fixed-high-32-bit system.
*/
- err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
+ err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
if (err) {
dev_err(&pdev->dev, "no usable DMA configuration\n");
goto err_dma;
diff --git a/drivers/net/ethernet/atheros/atlx/atl2.c b/drivers/net/ethernet/atheros/atlx/atl2.c
index c915852..e2526c0 100644
--- a/drivers/net/ethernet/atheros/atlx/atl2.c
+++ b/drivers/net/ethernet/atheros/atlx/atl2.c
@@ -281,8 +281,8 @@ static s32 atl2_setup_ring_resources(struct atl2_adapter *adapter)
adapter->txs_ring_size * 4 + 7 + /* dword align */
adapter->rxd_ring_size * 1536 + 127; /* 128bytes align */
- adapter->ring_vir_addr = pci_alloc_consistent(pdev, size,
- &adapter->ring_dma);
+ adapter->ring_vir_addr = dma_alloc_coherent(&pdev->dev, size,
+ &adapter->ring_dma, GFP_KERNEL);
if (!adapter->ring_vir_addr)
return -ENOMEM;
@@ -663,8 +663,8 @@ static int atl2_request_irq(struct atl2_adapter *adapter)
static void atl2_free_ring_resources(struct atl2_adapter *adapter)
{
struct pci_dev *pdev = adapter->pdev;
- pci_free_consistent(pdev, adapter->ring_size, adapter->ring_vir_addr,
- adapter->ring_dma);
+ dma_free_coherent(&pdev->dev, adapter->ring_size,
+ adapter->ring_vir_addr, adapter->ring_dma);
}
/**
@@ -1328,8 +1328,8 @@ static int atl2_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
* until the kernel has the proper infrastructure to support 64-bit DMA
* on these devices.
*/
- if (pci_set_dma_mask(pdev, DMA_BIT_MASK(32)) &&
- pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32))) {
+ if (dma_set_mask(&pdev->dev, DMA_BIT_MASK(32)) &&
+ dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32))) {
printk(KERN_ERR "atl2: No usable DMA configuration, aborting\n");
err = -EIO;
goto err_dma;
diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c
index 0762d5d..0fdd19d 100644
--- a/drivers/net/ethernet/broadcom/bcmsysport.c
+++ b/drivers/net/ethernet/broadcom/bcmsysport.c
@@ -20,6 +20,7 @@
#include <linux/phy.h>
#include <linux/phy_fixed.h>
#include <net/dsa.h>
+#include <linux/clk.h>
#include <net/ip.h>
#include <net/ipv6.h>
@@ -186,6 +187,11 @@ static int bcm_sysport_set_features(struct net_device *dev,
netdev_features_t features)
{
struct bcm_sysport_priv *priv = netdev_priv(dev);
+ int ret;
+
+ ret = clk_prepare_enable(priv->clk);
+ if (ret)
+ return ret;
/* Read CRC forward */
if (!priv->is_lite)
@@ -197,6 +203,8 @@ static int bcm_sysport_set_features(struct net_device *dev,
bcm_sysport_set_rx_csum(dev, features);
bcm_sysport_set_tx_csum(dev, features);
+ clk_disable_unprepare(priv->clk);
+
return 0;
}
@@ -1940,6 +1948,8 @@ static int bcm_sysport_open(struct net_device *dev)
unsigned int i;
int ret;
+ clk_prepare_enable(priv->clk);
+
/* Reset UniMAC */
umac_reset(priv);
@@ -1970,7 +1980,8 @@ static int bcm_sysport_open(struct net_device *dev)
0, priv->phy_interface);
if (!phydev) {
netdev_err(dev, "could not attach to PHY\n");
- return -ENODEV;
+ ret = -ENODEV;
+ goto out_clk_disable;
}
/* Reset house keeping link status */
@@ -2048,6 +2059,8 @@ static int bcm_sysport_open(struct net_device *dev)
free_irq(priv->irq0, dev);
out_phy_disconnect:
phy_disconnect(phydev);
+out_clk_disable:
+ clk_disable_unprepare(priv->clk);
return ret;
}
@@ -2106,6 +2119,8 @@ static int bcm_sysport_stop(struct net_device *dev)
/* Disconnect from PHY */
phy_disconnect(dev->phydev);
+ clk_disable_unprepare(priv->clk);
+
return 0;
}
@@ -2487,6 +2502,10 @@ static int bcm_sysport_probe(struct platform_device *pdev)
/* Initialize private members */
priv = netdev_priv(dev);
+ priv->clk = devm_clk_get_optional(&pdev->dev, "sw_sysport");
+ if (IS_ERR(priv->clk))
+ return PTR_ERR(priv->clk);
+
/* Allocate number of TX rings */
priv->tx_rings = devm_kcalloc(&pdev->dev, txq,
sizeof(struct bcm_sysport_tx_ring),
@@ -2566,6 +2585,10 @@ static int bcm_sysport_probe(struct platform_device *pdev)
if (!ret)
device_set_wakeup_capable(&pdev->dev, 1);
+ priv->wol_clk = devm_clk_get_optional(&pdev->dev, "sw_sysportwol");
+ if (IS_ERR(priv->wol_clk))
+ return PTR_ERR(priv->wol_clk);
+
/* Set the needed headroom once and for all */
BUILD_BUG_ON(sizeof(struct bcm_tsb) != 8);
dev->needed_headroom += sizeof(struct bcm_tsb);
@@ -2590,6 +2613,8 @@ static int bcm_sysport_probe(struct platform_device *pdev)
goto err_deregister_notifier;
}
+ clk_prepare_enable(priv->clk);
+
priv->rev = topctrl_readl(priv, REV_CNTL) & REV_MASK;
dev_info(&pdev->dev,
"Broadcom SYSTEMPORT%s " REV_FMT
@@ -2598,6 +2623,8 @@ static int bcm_sysport_probe(struct platform_device *pdev)
(priv->rev >> 8) & 0xff, priv->rev & 0xff,
priv->irq0, priv->irq1, txq, rxq);
+ clk_disable_unprepare(priv->clk);
+
return 0;
err_deregister_notifier:
@@ -2751,8 +2778,12 @@ static int __maybe_unused bcm_sysport_suspend(struct device *d)
bcm_sysport_fini_rx_ring(priv);
/* Get prepared for Wake-on-LAN */
- if (device_may_wakeup(d) && priv->wolopts)
+ if (device_may_wakeup(d) && priv->wolopts) {
+ clk_prepare_enable(priv->wol_clk);
ret = bcm_sysport_suspend_to_wol(priv);
+ }
+
+ clk_disable_unprepare(priv->clk);
return ret;
}
@@ -2767,6 +2798,10 @@ static int __maybe_unused bcm_sysport_resume(struct device *d)
if (!netif_running(dev))
return 0;
+ clk_prepare_enable(priv->clk);
+ if (priv->wolopts)
+ clk_disable_unprepare(priv->wol_clk);
+
umac_reset(priv);
/* Disable the UniMAC RX/TX */
@@ -2846,6 +2881,7 @@ static int __maybe_unused bcm_sysport_resume(struct device *d)
out_free_tx_rings:
for (i = 0; i < dev->num_tx_queues; i++)
bcm_sysport_fini_tx_ring(priv, i);
+ clk_disable_unprepare(priv->clk);
return ret;
}
diff --git a/drivers/net/ethernet/broadcom/bcmsysport.h b/drivers/net/ethernet/broadcom/bcmsysport.h
index 6d80735..3a5cb6f 100644
--- a/drivers/net/ethernet/broadcom/bcmsysport.h
+++ b/drivers/net/ethernet/broadcom/bcmsysport.h
@@ -770,6 +770,8 @@ struct bcm_sysport_priv {
u32 wolopts;
u8 sopass[SOPASS_MAX];
unsigned int wol_irq_disabled:1;
+ struct clk *clk;
+ struct clk *wol_clk;
/* MIB related fields */
struct bcm_sysport_mib mib;
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_droq.c b/drivers/net/ethernet/cavium/liquidio/octeon_droq.c
index 0171690..cf4fe5b 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_droq.c
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_droq.c
@@ -280,13 +280,10 @@ int octeon_init_droq(struct octeon_device *oct,
dev_dbg(&oct->pci_dev->dev, "droq[%d]: num_desc: %d\n", q_no,
droq->max_count);
- droq->recv_buf_list = (struct octeon_recv_buffer *)
- vzalloc_node(array_size(droq->max_count, OCT_DROQ_RECVBUF_SIZE),
- numa_node);
+ droq->recv_buf_list = vzalloc_node(array_size(droq->max_count, OCT_DROQ_RECVBUF_SIZE),
+ numa_node);
if (!droq->recv_buf_list)
- droq->recv_buf_list = (struct octeon_recv_buffer *)
- vzalloc(array_size(droq->max_count,
- OCT_DROQ_RECVBUF_SIZE));
+ droq->recv_buf_list = vzalloc(array_size(droq->max_count, OCT_DROQ_RECVBUF_SIZE));
if (!droq->recv_buf_list) {
dev_err(&oct->pci_dev->dev, "Output queue recv buf list alloc failed\n");
goto init_droq_fail;
diff --git a/drivers/net/ethernet/chelsio/Kconfig b/drivers/net/ethernet/chelsio/Kconfig
index f6f3ef9..87cc0ef 100644
--- a/drivers/net/ethernet/chelsio/Kconfig
+++ b/drivers/net/ethernet/chelsio/Kconfig
@@ -134,4 +134,6 @@
help
Common library for Chelsio drivers.
+source "drivers/net/ethernet/chelsio/inline_crypto/Kconfig"
+
endif # NET_VENDOR_CHELSIO
diff --git a/drivers/net/ethernet/chelsio/Makefile b/drivers/net/ethernet/chelsio/Makefile
index c0f978d..1a6fd8b 100644
--- a/drivers/net/ethernet/chelsio/Makefile
+++ b/drivers/net/ethernet/chelsio/Makefile
@@ -8,3 +8,4 @@
obj-$(CONFIG_CHELSIO_T4) += cxgb4/
obj-$(CONFIG_CHELSIO_T4VF) += cxgb4vf/
obj-$(CONFIG_CHELSIO_LIB) += libcxgb/
+obj-$(CONFIG_CHELSIO_INLINE_CRYPTO) += inline_crypto/
diff --git a/drivers/net/ethernet/chelsio/cxgb/cxgb2.c b/drivers/net/ethernet/chelsio/cxgb/cxgb2.c
index 9973679..0e4a0f4 100644
--- a/drivers/net/ethernet/chelsio/cxgb/cxgb2.c
+++ b/drivers/net/ethernet/chelsio/cxgb/cxgb2.c
@@ -997,17 +997,17 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
goto out_disable_pdev;
}
- if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) {
+ if (!dma_set_mask(&pdev->dev, DMA_BIT_MASK(64))) {
pci_using_dac = 1;
- if (pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64))) {
- pr_err("%s: unable to obtain 64-bit DMA for "
- "consistent allocations\n", pci_name(pdev));
+ if (dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64))) {
+ pr_err("%s: unable to obtain 64-bit DMA for coherent allocations\n",
+ pci_name(pdev));
err = -ENODEV;
goto out_disable_pdev;
}
- } else if ((err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32))) != 0) {
+ } else if ((err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32))) != 0) {
pr_err("%s: no usable DMA configuration\n", pci_name(pdev));
goto out_disable_pdev;
}
diff --git a/drivers/net/ethernet/chelsio/cxgb/sge.c b/drivers/net/ethernet/chelsio/cxgb/sge.c
index 47b5c8e..21016de 100644
--- a/drivers/net/ethernet/chelsio/cxgb/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb/sge.c
@@ -509,9 +509,8 @@ static void free_freelQ_buffers(struct pci_dev *pdev, struct freelQ *q)
while (q->credits--) {
struct freelQ_ce *ce = &q->centries[cidx];
- pci_unmap_single(pdev, dma_unmap_addr(ce, dma_addr),
- dma_unmap_len(ce, dma_len),
- PCI_DMA_FROMDEVICE);
+ dma_unmap_single(&pdev->dev, dma_unmap_addr(ce, dma_addr),
+ dma_unmap_len(ce, dma_len), DMA_FROM_DEVICE);
dev_kfree_skb(ce->skb);
ce->skb = NULL;
if (++cidx == q->size)
@@ -529,8 +528,8 @@ static void free_rx_resources(struct sge *sge)
if (sge->respQ.entries) {
size = sizeof(struct respQ_e) * sge->respQ.size;
- pci_free_consistent(pdev, size, sge->respQ.entries,
- sge->respQ.dma_addr);
+ dma_free_coherent(&pdev->dev, size, sge->respQ.entries,
+ sge->respQ.dma_addr);
}
for (i = 0; i < SGE_FREELQ_N; i++) {
@@ -542,8 +541,8 @@ static void free_rx_resources(struct sge *sge)
}
if (q->entries) {
size = sizeof(struct freelQ_e) * q->size;
- pci_free_consistent(pdev, size, q->entries,
- q->dma_addr);
+ dma_free_coherent(&pdev->dev, size, q->entries,
+ q->dma_addr);
}
}
}
@@ -564,7 +563,8 @@ static int alloc_rx_resources(struct sge *sge, struct sge_params *p)
q->size = p->freelQ_size[i];
q->dma_offset = sge->rx_pkt_pad ? 0 : NET_IP_ALIGN;
size = sizeof(struct freelQ_e) * q->size;
- q->entries = pci_alloc_consistent(pdev, size, &q->dma_addr);
+ q->entries = dma_alloc_coherent(&pdev->dev, size,
+ &q->dma_addr, GFP_KERNEL);
if (!q->entries)
goto err_no_mem;
@@ -601,7 +601,8 @@ static int alloc_rx_resources(struct sge *sge, struct sge_params *p)
sge->respQ.credits = 0;
size = sizeof(struct respQ_e) * sge->respQ.size;
sge->respQ.entries =
- pci_alloc_consistent(pdev, size, &sge->respQ.dma_addr);
+ dma_alloc_coherent(&pdev->dev, size, &sge->respQ.dma_addr,
+ GFP_KERNEL);
if (!sge->respQ.entries)
goto err_no_mem;
return 0;
@@ -624,9 +625,10 @@ static void free_cmdQ_buffers(struct sge *sge, struct cmdQ *q, unsigned int n)
ce = &q->centries[cidx];
while (n--) {
if (likely(dma_unmap_len(ce, dma_len))) {
- pci_unmap_single(pdev, dma_unmap_addr(ce, dma_addr),
+ dma_unmap_single(&pdev->dev,
+ dma_unmap_addr(ce, dma_addr),
dma_unmap_len(ce, dma_len),
- PCI_DMA_TODEVICE);
+ DMA_TO_DEVICE);
if (q->sop)
q->sop = 0;
}
@@ -663,8 +665,8 @@ static void free_tx_resources(struct sge *sge)
}
if (q->entries) {
size = sizeof(struct cmdQ_e) * q->size;
- pci_free_consistent(pdev, size, q->entries,
- q->dma_addr);
+ dma_free_coherent(&pdev->dev, size, q->entries,
+ q->dma_addr);
}
}
}
@@ -689,7 +691,8 @@ static int alloc_tx_resources(struct sge *sge, struct sge_params *p)
q->stop_thres = 0;
spin_lock_init(&q->lock);
size = sizeof(struct cmdQ_e) * q->size;
- q->entries = pci_alloc_consistent(pdev, size, &q->dma_addr);
+ q->entries = dma_alloc_coherent(&pdev->dev, size,
+ &q->dma_addr, GFP_KERNEL);
if (!q->entries)
goto err_no_mem;
@@ -837,8 +840,8 @@ static void refill_free_list(struct sge *sge, struct freelQ *q)
break;
skb_reserve(skb, q->dma_offset);
- mapping = pci_map_single(pdev, skb->data, dma_len,
- PCI_DMA_FROMDEVICE);
+ mapping = dma_map_single(&pdev->dev, skb->data, dma_len,
+ DMA_FROM_DEVICE);
skb_reserve(skb, sge->rx_pkt_pad);
ce->skb = skb;
@@ -1049,15 +1052,15 @@ static inline struct sk_buff *get_packet(struct adapter *adapter,
goto use_orig_buf;
skb_put(skb, len);
- pci_dma_sync_single_for_cpu(pdev,
- dma_unmap_addr(ce, dma_addr),
- dma_unmap_len(ce, dma_len),
- PCI_DMA_FROMDEVICE);
+ dma_sync_single_for_cpu(&pdev->dev,
+ dma_unmap_addr(ce, dma_addr),
+ dma_unmap_len(ce, dma_len),
+ DMA_FROM_DEVICE);
skb_copy_from_linear_data(ce->skb, skb->data, len);
- pci_dma_sync_single_for_device(pdev,
- dma_unmap_addr(ce, dma_addr),
- dma_unmap_len(ce, dma_len),
- PCI_DMA_FROMDEVICE);
+ dma_sync_single_for_device(&pdev->dev,
+ dma_unmap_addr(ce, dma_addr),
+ dma_unmap_len(ce, dma_len),
+ DMA_FROM_DEVICE);
recycle_fl_buf(fl, fl->cidx);
return skb;
}
@@ -1068,8 +1071,8 @@ static inline struct sk_buff *get_packet(struct adapter *adapter,
return NULL;
}
- pci_unmap_single(pdev, dma_unmap_addr(ce, dma_addr),
- dma_unmap_len(ce, dma_len), PCI_DMA_FROMDEVICE);
+ dma_unmap_single(&pdev->dev, dma_unmap_addr(ce, dma_addr),
+ dma_unmap_len(ce, dma_len), DMA_FROM_DEVICE);
skb = ce->skb;
prefetch(skb->data);
@@ -1091,8 +1094,9 @@ static void unexpected_offload(struct adapter *adapter, struct freelQ *fl)
struct freelQ_ce *ce = &fl->centries[fl->cidx];
struct sk_buff *skb = ce->skb;
- pci_dma_sync_single_for_cpu(adapter->pdev, dma_unmap_addr(ce, dma_addr),
- dma_unmap_len(ce, dma_len), PCI_DMA_FROMDEVICE);
+ dma_sync_single_for_cpu(&adapter->pdev->dev,
+ dma_unmap_addr(ce, dma_addr),
+ dma_unmap_len(ce, dma_len), DMA_FROM_DEVICE);
pr_err("%s: unexpected offload packet, cmd %u\n",
adapter->name, *skb->data);
recycle_fl_buf(fl, fl->cidx);
@@ -1209,8 +1213,8 @@ static inline void write_tx_descs(struct adapter *adapter, struct sk_buff *skb,
e = e1 = &q->entries[pidx];
ce = &q->centries[pidx];
- mapping = pci_map_single(adapter->pdev, skb->data,
- skb_headlen(skb), PCI_DMA_TODEVICE);
+ mapping = dma_map_single(&adapter->pdev->dev, skb->data,
+ skb_headlen(skb), DMA_TO_DEVICE);
desc_mapping = mapping;
desc_len = skb_headlen(skb);
diff --git a/drivers/net/ethernet/chelsio/cxgb3/sge.c b/drivers/net/ethernet/chelsio/cxgb3/sge.c
index 6dabbf1..ee6188d 100644
--- a/drivers/net/ethernet/chelsio/cxgb3/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb3/sge.c
@@ -2372,10 +2372,7 @@ static int process_responses(struct adapter *adap, struct sge_qset *qs,
if (fl->use_pages) {
void *addr = fl->sdesc[fl->cidx].pg_chunk.va;
- prefetch(addr);
-#if L1_CACHE_BYTES < 128
- prefetch(addr + L1_CACHE_BYTES);
-#endif
+ net_prefetch(addr);
__refill_fl(adap, fl);
if (lro > 0) {
lro_add_page(adap, qs, fl,
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
index 9cb8b22..e5d5c0f 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
@@ -1196,6 +1196,9 @@ struct adapter {
struct cxgb4_tc_u32_table *tc_u32;
struct chcr_ktls chcr_ktls;
struct chcr_stats_debug chcr_stats;
+#if IS_ENABLED(CONFIG_CHELSIO_IPSEC_INLINE)
+ struct ch_ipsec_stats_debug ch_ipsec_stats;
+#endif
/* TC flower offload */
bool tc_flower_initialized;
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
index 05f33b7..42112e8 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
@@ -3542,14 +3542,17 @@ static int chcr_stats_show(struct seq_file *seq, void *v)
atomic_read(&adap->chcr_stats.error));
seq_printf(seq, "Fallback: %10u \n",
atomic_read(&adap->chcr_stats.fallback));
- seq_printf(seq, "IPSec PDU: %10u\n",
- atomic_read(&adap->chcr_stats.ipsec_cnt));
seq_printf(seq, "TLS PDU Tx: %10u\n",
atomic_read(&adap->chcr_stats.tls_pdu_tx));
seq_printf(seq, "TLS PDU Rx: %10u\n",
atomic_read(&adap->chcr_stats.tls_pdu_rx));
seq_printf(seq, "TLS Keys (DDR) Count: %10u\n",
atomic_read(&adap->chcr_stats.tls_key));
+#if IS_ENABLED(CONFIG_CHELSIO_IPSEC_INLINE)
+ seq_puts(seq, "\nChelsio Inline IPsec Crypto Accelerator Stats\n");
+ seq_printf(seq, "IPSec PDU: %10u\n",
+ atomic_read(&adap->ch_ipsec_stats.ipsec_cnt));
+#endif
#ifdef CONFIG_CHELSIO_TLS_DEVICE
seq_puts(seq, "\nChelsio KTLS Crypto Accelerator Stats\n");
seq_printf(seq, "Tx TLS offload refcount: %20u\n",
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
index 650db92..f6c1ec1 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
@@ -604,17 +604,14 @@ int cxgb4_get_free_ftid(struct net_device *dev, u8 family, bool hash_en,
/* If the new rule wants to get inserted into
* HPFILTER region, but its prio is greater
* than the rule with the highest prio in HASH
- * region, then reject the rule.
+ * region, or if there's not enough slots
+ * available in HPFILTER region, then skip
+ * trying to insert this rule into HPFILTER
+ * region and directly go to the next region.
*/
- if (t->tc_hash_tids_max_prio &&
- tc_prio > t->tc_hash_tids_max_prio)
- break;
-
- /* If there's not enough slots available
- * in HPFILTER region, then move on to
- * normal FILTER region immediately.
- */
- if (ftid + n > t->nhpftids) {
+ if ((t->tc_hash_tids_max_prio &&
+ tc_prio > t->tc_hash_tids_max_prio) ||
+ (ftid + n) > t->nhpftids) {
ftid = t->nhpftids;
continue;
}
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
index a963fd0..83c8189 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
@@ -302,6 +302,7 @@ enum cxgb4_uld {
CXGB4_ULD_ISCSI,
CXGB4_ULD_ISCSIT,
CXGB4_ULD_CRYPTO,
+ CXGB4_ULD_IPSEC,
CXGB4_ULD_TLS,
CXGB4_ULD_MAX
};
@@ -368,7 +369,6 @@ struct chcr_stats_debug {
atomic_t complete;
atomic_t error;
atomic_t fallback;
- atomic_t ipsec_cnt;
atomic_t tls_pdu_tx;
atomic_t tls_pdu_rx;
atomic_t tls_key;
@@ -394,6 +394,12 @@ struct chcr_stats_debug {
#endif
};
+#if IS_ENABLED(CONFIG_CHELSIO_IPSEC_INLINE)
+struct ch_ipsec_stats_debug {
+ atomic_t ipsec_cnt;
+};
+#endif
+
#define OCQ_WIN_OFFSET(pdev, vres) \
(pci_resource_len((pdev), 2) - roundup_pow_of_two((vres)->ocq.size))
diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c
index 869431a..fddd70e 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c
@@ -1416,9 +1416,9 @@ static netdev_tx_t cxgb4_eth_xmit(struct sk_buff *skb, struct net_device *dev)
pi = netdev_priv(dev);
adap = pi->adapter;
ssi = skb_shinfo(skb);
-#ifdef CONFIG_CHELSIO_IPSEC_INLINE
+#if IS_ENABLED(CONFIG_CHELSIO_IPSEC_INLINE)
if (xfrm_offload(skb) && !ssi->gso_size)
- return adap->uld[CXGB4_ULD_CRYPTO].tx_handler(skb, dev);
+ return adap->uld[CXGB4_ULD_IPSEC].tx_handler(skb, dev);
#endif /* CHELSIO_IPSEC_INLINE */
#ifdef CONFIG_CHELSIO_TLS_DEVICE
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
index fa33679..98d01a7 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
@@ -4745,9 +4745,11 @@ static void le_intr_handler(struct adapter *adap)
static struct intr_info t6_le_intr_info[] = {
{ T6_LIPMISS_F, "LE LIP miss", -1, 0 },
{ T6_LIP0_F, "LE 0 LIP error", -1, 0 },
+ { CMDTIDERR_F, "LE cmd tid error", -1, 1 },
{ TCAMINTPERR_F, "LE parity error", -1, 1 },
{ T6_UNKNOWNCMD_F, "LE unknown command", -1, 1 },
{ SSRAMINTPERR_F, "LE request queue parity error", -1, 1 },
+ { HASHTBLMEMCRCERR_F, "LE hash table mem crc error", -1, 0 },
{ 0 }
};
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h b/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h
index 065c01c..b11a172 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h
@@ -3017,6 +3017,14 @@
#define REV_V(x) ((x) << REV_S)
#define REV_G(x) (((x) >> REV_S) & REV_M)
+#define HASHTBLMEMCRCERR_S 27
+#define HASHTBLMEMCRCERR_V(x) ((x) << HASHTBLMEMCRCERR_S)
+#define HASHTBLMEMCRCERR_F HASHTBLMEMCRCERR_V(1U)
+
+#define CMDTIDERR_S 22
+#define CMDTIDERR_V(x) ((x) << CMDTIDERR_S)
+#define CMDTIDERR_F CMDTIDERR_V(1U)
+
#define T6_UNKNOWNCMD_S 3
#define T6_UNKNOWNCMD_V(x) ((x) << T6_UNKNOWNCMD_S)
#define T6_UNKNOWNCMD_F T6_UNKNOWNCMD_V(1U)
diff --git a/drivers/net/ethernet/chelsio/inline_crypto/Kconfig b/drivers/net/ethernet/chelsio/inline_crypto/Kconfig
new file mode 100644
index 0000000..be70b59
--- /dev/null
+++ b/drivers/net/ethernet/chelsio/inline_crypto/Kconfig
@@ -0,0 +1,37 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# Chelsio inline crypto configuration
+#
+
+config CHELSIO_INLINE_CRYPTO
+ bool "Chelsio Inline Crypto support"
+ depends on CHELSIO_T4
+ default y
+ help
+ Enable support for inline crypto.
+ Allows enable/disable from list of inline crypto drivers.
+
+if CHELSIO_INLINE_CRYPTO
+
+config CRYPTO_DEV_CHELSIO_TLS
+ tristate "Chelsio Crypto Inline TLS Driver"
+ depends on TLS_TOE
+ help
+ Support Chelsio Inline TLS with Chelsio crypto accelerator.
+ Enable inline TLS support for Tx and Rx.
+
+ To compile this driver as a module, choose M here: the module
+ will be called chtls.
+
+config CHELSIO_IPSEC_INLINE
+ tristate "Chelsio IPSec XFRM Tx crypto offload"
+ depends on XFRM_OFFLOAD
+ depends on INET_ESP_OFFLOAD || INET6_ESP_OFFLOAD
+ help
+ Support Chelsio Inline IPsec with Chelsio crypto accelerator.
+ Enable inline IPsec support for Tx.
+
+ To compile this driver as a module, choose M here: the module
+ will be called ch_ipsec.
+
+endif # CHELSIO_INLINE_CRYPTO
diff --git a/drivers/net/ethernet/chelsio/inline_crypto/Makefile b/drivers/net/ethernet/chelsio/inline_crypto/Makefile
new file mode 100644
index 0000000..9a86ee8
--- /dev/null
+++ b/drivers/net/ethernet/chelsio/inline_crypto/Makefile
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
+obj-$(CONFIG_CRYPTO_DEV_CHELSIO_TLS) += chtls/
+obj-$(CONFIG_CHELSIO_IPSEC_INLINE) += ch_ipsec/
diff --git a/drivers/net/ethernet/chelsio/inline_crypto/ch_ipsec/Makefile b/drivers/net/ethernet/chelsio/inline_crypto/ch_ipsec/Makefile
new file mode 100644
index 0000000..efdcaae
--- /dev/null
+++ b/drivers/net/ethernet/chelsio/inline_crypto/ch_ipsec/Makefile
@@ -0,0 +1,8 @@
+# SPDX-License-Identifier: GPL-2.0-only
+ccflags-y := -I $(srctree)/drivers/net/ethernet/chelsio/cxgb4 \
+ -I $(srctree)/drivers/crypto/chelsio
+
+obj-$(CONFIG_CHELSIO_IPSEC_INLINE) += ch_ipsec.o
+ch_ipsec-objs := chcr_ipsec.o
+
+
diff --git a/drivers/crypto/chelsio/chcr_ipsec.c b/drivers/net/ethernet/chelsio/inline_crypto/ch_ipsec/chcr_ipsec.c
similarity index 88%
rename from drivers/crypto/chelsio/chcr_ipsec.c
rename to drivers/net/ethernet/chelsio/inline_crypto/ch_ipsec/chcr_ipsec.c
index 967babd..276f884 100644
--- a/drivers/crypto/chelsio/chcr_ipsec.c
+++ b/drivers/net/ethernet/chelsio/inline_crypto/ch_ipsec/chcr_ipsec.c
@@ -60,9 +60,7 @@
#include <crypto/scatterwalk.h>
#include <crypto/internal/hash.h>
-#include "chcr_core.h"
-#include "chcr_algo.h"
-#include "chcr_crypto.h"
+#include "chcr_ipsec.h"
/*
* Max Tx descriptor space we allow for an Ethernet packet to be inlined
@@ -71,11 +69,17 @@
#define MAX_IMM_TX_PKT_LEN 256
#define GCM_ESP_IV_SIZE 8
+static LIST_HEAD(uld_ctx_list);
+static DEFINE_MUTEX(dev_mutex);
+
static int chcr_xfrm_add_state(struct xfrm_state *x);
static void chcr_xfrm_del_state(struct xfrm_state *x);
static void chcr_xfrm_free_state(struct xfrm_state *x);
static bool chcr_ipsec_offload_ok(struct sk_buff *skb, struct xfrm_state *x);
static void chcr_advance_esn_state(struct xfrm_state *x);
+static int ch_ipsec_uld_state_change(void *handle, enum cxgb4_state new_state);
+static void *ch_ipsec_uld_add(const struct cxgb4_lld_info *infop);
+static void update_netdev_features(void);
static const struct xfrmdev_ops chcr_xfrmdev_ops = {
.xdo_dev_state_add = chcr_xfrm_add_state,
@@ -102,6 +106,57 @@ void chcr_add_xfrmops(const struct cxgb4_lld_info *lld)
}
}
+static struct cxgb4_uld_info ch_ipsec_uld_info = {
+ .name = CHIPSEC_DRV_MODULE_NAME,
+ .nrxq = MAX_ULD_QSETS,
+ /* Max ntxq will be derived from fw config file*/
+ .rxq_size = 1024,
+ .add = ch_ipsec_uld_add,
+ .state_change = ch_ipsec_uld_state_change,
+ .tx_handler = chcr_ipsec_xmit,
+};
+
+static void *ch_ipsec_uld_add(const struct cxgb4_lld_info *infop)
+{
+ struct ipsec_uld_ctx *u_ctx;
+
+ pr_info_once("%s - version %s\n", CHIPSEC_DRV_DESC,
+ CHIPSEC_DRV_VERSION);
+ u_ctx = kzalloc(sizeof(*u_ctx), GFP_KERNEL);
+ if (!u_ctx) {
+ u_ctx = ERR_PTR(-ENOMEM);
+ goto out;
+ }
+ u_ctx->lldi = *infop;
+out:
+ return u_ctx;
+}
+
+static int ch_ipsec_uld_state_change(void *handle, enum cxgb4_state new_state)
+{
+ struct ipsec_uld_ctx *u_ctx = handle;
+
+ pr_info("new_state %u\n", new_state);
+ switch (new_state) {
+ case CXGB4_STATE_UP:
+ pr_info("%s: Up\n", pci_name(u_ctx->lldi.pdev));
+ mutex_lock(&dev_mutex);
+ list_add_tail(&u_ctx->entry, &uld_ctx_list);
+ mutex_unlock(&dev_mutex);
+ break;
+ case CXGB4_STATE_START_RECOVERY:
+ case CXGB4_STATE_DOWN:
+ case CXGB4_STATE_DETACH:
+ pr_info("%s: Down\n", pci_name(u_ctx->lldi.pdev));
+ list_del(&u_ctx->entry);
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
static inline int chcr_ipsec_setauthsize(struct xfrm_state *x,
struct ipsec_sa_entry *sa_entry)
{
@@ -538,7 +593,7 @@ inline void *chcr_crypto_wreq(struct sk_buff *skb,
unsigned int kctx_len = sa_entry->kctx_len;
int qid = q->q.cntxt_id;
- atomic_inc(&adap->chcr_stats.ipsec_cnt);
+ atomic_inc(&adap->ch_ipsec_stats.ipsec_cnt);
flits = calc_tx_sec_flits(skb, sa_entry, &immediate);
ndesc = DIV_ROUND_UP(flits, 2);
@@ -752,3 +807,51 @@ out_free: dev_kfree_skb_any(skb);
cxgb4_ring_tx_db(adap, &q->q, ndesc);
return NETDEV_TX_OK;
}
+
+static void update_netdev_features(void)
+{
+ struct ipsec_uld_ctx *u_ctx, *tmp;
+
+ mutex_lock(&dev_mutex);
+ list_for_each_entry_safe(u_ctx, tmp, &uld_ctx_list, entry) {
+ if (u_ctx->lldi.crypto & ULP_CRYPTO_IPSEC_INLINE)
+ chcr_add_xfrmops(&u_ctx->lldi);
+ }
+ mutex_unlock(&dev_mutex);
+}
+
+static int __init chcr_ipsec_init(void)
+{
+ cxgb4_register_uld(CXGB4_ULD_IPSEC, &ch_ipsec_uld_info);
+
+ rtnl_lock();
+ update_netdev_features();
+ rtnl_unlock();
+
+ return 0;
+}
+
+static void __exit chcr_ipsec_exit(void)
+{
+ struct ipsec_uld_ctx *u_ctx, *tmp;
+ struct adapter *adap;
+
+ mutex_lock(&dev_mutex);
+ list_for_each_entry_safe(u_ctx, tmp, &uld_ctx_list, entry) {
+ adap = pci_get_drvdata(u_ctx->lldi.pdev);
+ atomic_set(&adap->ch_ipsec_stats.ipsec_cnt, 0);
+ list_del(&u_ctx->entry);
+ kfree(u_ctx);
+ }
+ mutex_unlock(&dev_mutex);
+ cxgb4_unregister_uld(CXGB4_ULD_IPSEC);
+}
+
+module_init(chcr_ipsec_init);
+module_exit(chcr_ipsec_exit);
+
+MODULE_DESCRIPTION("Crypto IPSEC for Chelsio Terminator cards.");
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Chelsio Communications");
+MODULE_VERSION(CHIPSEC_DRV_VERSION);
+
diff --git a/drivers/net/ethernet/chelsio/inline_crypto/ch_ipsec/chcr_ipsec.h b/drivers/net/ethernet/chelsio/inline_crypto/ch_ipsec/chcr_ipsec.h
new file mode 100644
index 0000000..1d110d2
--- /dev/null
+++ b/drivers/net/ethernet/chelsio/inline_crypto/ch_ipsec/chcr_ipsec.h
@@ -0,0 +1,58 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright (c) 2018 Chelsio Communications, Inc. */
+
+#ifndef __CHCR_IPSEC_H__
+#define __CHCR_IPSEC_H__
+
+#include <crypto/algapi.h>
+#include "t4_hw.h"
+#include "cxgb4.h"
+#include "t4_msg.h"
+#include "cxgb4_uld.h"
+
+#include "chcr_core.h"
+#include "chcr_algo.h"
+#include "chcr_crypto.h"
+
+#define CHIPSEC_DRV_MODULE_NAME "ch_ipsec"
+#define CHIPSEC_DRV_VERSION "1.0.0.0-ko"
+#define CHIPSEC_DRV_DESC "Chelsio T6 Crypto Ipsec offload Driver"
+
+struct ipsec_uld_ctx {
+ struct list_head entry;
+ struct cxgb4_lld_info lldi;
+};
+
+struct chcr_ipsec_req {
+ struct ulp_txpkt ulptx;
+ struct ulptx_idata sc_imm;
+ struct cpl_tx_sec_pdu sec_cpl;
+ struct _key_ctx key_ctx;
+};
+
+struct chcr_ipsec_wr {
+ struct fw_ulptx_wr wreq;
+ struct chcr_ipsec_req req;
+};
+
+#define ESN_IV_INSERT_OFFSET 12
+struct chcr_ipsec_aadiv {
+ __be32 spi;
+ u8 seq_no[8];
+ u8 iv[8];
+};
+
+struct ipsec_sa_entry {
+ int hmac_ctrl;
+ u16 esn;
+ u16 resv;
+ unsigned int enckey_len;
+ unsigned int kctx_len;
+ unsigned int authsize;
+ __be32 key_ctx_hdr;
+ char salt[MAX_SALT];
+ char key[2 * AES_MAX_KEY_SIZE];
+};
+
+#endif /* __CHCR_IPSEC_H__ */
+
diff --git a/drivers/crypto/chelsio/chtls/Makefile b/drivers/net/ethernet/chelsio/inline_crypto/chtls/Makefile
similarity index 100%
rename from drivers/crypto/chelsio/chtls/Makefile
rename to drivers/net/ethernet/chelsio/inline_crypto/chtls/Makefile
diff --git a/drivers/crypto/chelsio/chtls/chtls.h b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls.h
similarity index 80%
rename from drivers/crypto/chelsio/chtls/chtls.h
rename to drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls.h
index 4594427..2d3dfdd 100644
--- a/drivers/crypto/chelsio/chtls/chtls.h
+++ b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls.h
@@ -32,6 +32,94 @@
#include "chcr_core.h"
#include "chcr_crypto.h"
+#define CHTLS_DRV_VERSION "1.0.0.0-ko"
+
+#define TLS_KEYCTX_RXFLIT_CNT_S 24
+#define TLS_KEYCTX_RXFLIT_CNT_V(x) ((x) << TLS_KEYCTX_RXFLIT_CNT_S)
+
+#define TLS_KEYCTX_RXPROT_VER_S 20
+#define TLS_KEYCTX_RXPROT_VER_M 0xf
+#define TLS_KEYCTX_RXPROT_VER_V(x) ((x) << TLS_KEYCTX_RXPROT_VER_S)
+
+#define TLS_KEYCTX_RXCIPH_MODE_S 16
+#define TLS_KEYCTX_RXCIPH_MODE_M 0xf
+#define TLS_KEYCTX_RXCIPH_MODE_V(x) ((x) << TLS_KEYCTX_RXCIPH_MODE_S)
+
+#define TLS_KEYCTX_RXAUTH_MODE_S 12
+#define TLS_KEYCTX_RXAUTH_MODE_M 0xf
+#define TLS_KEYCTX_RXAUTH_MODE_V(x) ((x) << TLS_KEYCTX_RXAUTH_MODE_S)
+
+#define TLS_KEYCTX_RXCIAU_CTRL_S 11
+#define TLS_KEYCTX_RXCIAU_CTRL_V(x) ((x) << TLS_KEYCTX_RXCIAU_CTRL_S)
+
+#define TLS_KEYCTX_RX_SEQCTR_S 9
+#define TLS_KEYCTX_RX_SEQCTR_M 0x3
+#define TLS_KEYCTX_RX_SEQCTR_V(x) ((x) << TLS_KEYCTX_RX_SEQCTR_S)
+
+#define TLS_KEYCTX_RX_VALID_S 8
+#define TLS_KEYCTX_RX_VALID_V(x) ((x) << TLS_KEYCTX_RX_VALID_S)
+
+#define TLS_KEYCTX_RXCK_SIZE_S 3
+#define TLS_KEYCTX_RXCK_SIZE_M 0x7
+#define TLS_KEYCTX_RXCK_SIZE_V(x) ((x) << TLS_KEYCTX_RXCK_SIZE_S)
+
+#define TLS_KEYCTX_RXMK_SIZE_S 0
+#define TLS_KEYCTX_RXMK_SIZE_M 0x7
+#define TLS_KEYCTX_RXMK_SIZE_V(x) ((x) << TLS_KEYCTX_RXMK_SIZE_S)
+
+#define KEYCTX_TX_WR_IV_S 55
+#define KEYCTX_TX_WR_IV_M 0x1ffULL
+#define KEYCTX_TX_WR_IV_V(x) ((x) << KEYCTX_TX_WR_IV_S)
+#define KEYCTX_TX_WR_IV_G(x) \
+ (((x) >> KEYCTX_TX_WR_IV_S) & KEYCTX_TX_WR_IV_M)
+
+#define KEYCTX_TX_WR_AAD_S 47
+#define KEYCTX_TX_WR_AAD_M 0xffULL
+#define KEYCTX_TX_WR_AAD_V(x) ((x) << KEYCTX_TX_WR_AAD_S)
+#define KEYCTX_TX_WR_AAD_G(x) (((x) >> KEYCTX_TX_WR_AAD_S) & \
+ KEYCTX_TX_WR_AAD_M)
+
+#define KEYCTX_TX_WR_AADST_S 39
+#define KEYCTX_TX_WR_AADST_M 0xffULL
+#define KEYCTX_TX_WR_AADST_V(x) ((x) << KEYCTX_TX_WR_AADST_S)
+#define KEYCTX_TX_WR_AADST_G(x) \
+ (((x) >> KEYCTX_TX_WR_AADST_S) & KEYCTX_TX_WR_AADST_M)
+
+#define KEYCTX_TX_WR_CIPHER_S 30
+#define KEYCTX_TX_WR_CIPHER_M 0x1ffULL
+#define KEYCTX_TX_WR_CIPHER_V(x) ((x) << KEYCTX_TX_WR_CIPHER_S)
+#define KEYCTX_TX_WR_CIPHER_G(x) \
+ (((x) >> KEYCTX_TX_WR_CIPHER_S) & KEYCTX_TX_WR_CIPHER_M)
+
+#define KEYCTX_TX_WR_CIPHERST_S 23
+#define KEYCTX_TX_WR_CIPHERST_M 0x7f
+#define KEYCTX_TX_WR_CIPHERST_V(x) ((x) << KEYCTX_TX_WR_CIPHERST_S)
+#define KEYCTX_TX_WR_CIPHERST_G(x) \
+ (((x) >> KEYCTX_TX_WR_CIPHERST_S) & KEYCTX_TX_WR_CIPHERST_M)
+
+#define KEYCTX_TX_WR_AUTH_S 14
+#define KEYCTX_TX_WR_AUTH_M 0x1ff
+#define KEYCTX_TX_WR_AUTH_V(x) ((x) << KEYCTX_TX_WR_AUTH_S)
+#define KEYCTX_TX_WR_AUTH_G(x) \
+ (((x) >> KEYCTX_TX_WR_AUTH_S) & KEYCTX_TX_WR_AUTH_M)
+
+#define KEYCTX_TX_WR_AUTHST_S 7
+#define KEYCTX_TX_WR_AUTHST_M 0x7f
+#define KEYCTX_TX_WR_AUTHST_V(x) ((x) << KEYCTX_TX_WR_AUTHST_S)
+#define KEYCTX_TX_WR_AUTHST_G(x) \
+ (((x) >> KEYCTX_TX_WR_AUTHST_S) & KEYCTX_TX_WR_AUTHST_M)
+
+#define KEYCTX_TX_WR_AUTHIN_S 0
+#define KEYCTX_TX_WR_AUTHIN_M 0x7f
+#define KEYCTX_TX_WR_AUTHIN_V(x) ((x) << KEYCTX_TX_WR_AUTHIN_S)
+#define KEYCTX_TX_WR_AUTHIN_G(x) \
+ (((x) >> KEYCTX_TX_WR_AUTHIN_S) & KEYCTX_TX_WR_AUTHIN_M)
+
+struct sge_opaque_hdr {
+ void *dev;
+ dma_addr_t addr[MAX_SKB_FRAGS + 1];
+};
+
#define MAX_IVS_PAGE 256
#define TLS_KEY_CONTEXT_SZ 64
#define CIPHER_BLOCK_SIZE 16
diff --git a/drivers/crypto/chelsio/chtls/chtls_cm.c b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c
similarity index 100%
rename from drivers/crypto/chelsio/chtls/chtls_cm.c
rename to drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c
diff --git a/drivers/crypto/chelsio/chtls/chtls_cm.h b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.h
similarity index 100%
rename from drivers/crypto/chelsio/chtls/chtls_cm.h
rename to drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.h
diff --git a/drivers/crypto/chelsio/chtls/chtls_hw.c b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_hw.c
similarity index 100%
rename from drivers/crypto/chelsio/chtls/chtls_hw.c
rename to drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_hw.c
diff --git a/drivers/crypto/chelsio/chtls/chtls_io.c b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_io.c
similarity index 100%
rename from drivers/crypto/chelsio/chtls/chtls_io.c
rename to drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_io.c
diff --git a/drivers/crypto/chelsio/chtls/chtls_main.c b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_main.c
similarity index 99%
rename from drivers/crypto/chelsio/chtls/chtls_main.c
rename to drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_main.c
index 66d247e..9098b3e 100644
--- a/drivers/crypto/chelsio/chtls/chtls_main.c
+++ b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_main.c
@@ -638,4 +638,4 @@ module_exit(chtls_unregister);
MODULE_DESCRIPTION("Chelsio TLS Inline driver");
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Chelsio Communications");
-MODULE_VERSION(DRV_VERSION);
+MODULE_VERSION(CHTLS_DRV_VERSION);
diff --git a/drivers/net/ethernet/cirrus/cs89x0.h b/drivers/net/ethernet/cirrus/cs89x0.h
index 91423b7..210f9ec 100644
--- a/drivers/net/ethernet/cirrus/cs89x0.h
+++ b/drivers/net/ethernet/cirrus/cs89x0.h
@@ -459,7 +459,3 @@
#define PNP_CNF_INT 0x70
#define PNP_CNF_DMA 0x74
#define PNP_CNF_MEM 0x48
-
-#define BIT0 1
-#define BIT15 0x8000
-
diff --git a/drivers/net/ethernet/dlink/dl2k.c b/drivers/net/ethernet/dlink/dl2k.c
index be6d8a9..e8e563d 100644
--- a/drivers/net/ethernet/dlink/dl2k.c
+++ b/drivers/net/ethernet/dlink/dl2k.c
@@ -7,7 +7,6 @@
*/
-#define DRV_NAME "DL2000/TC902x-based linux driver"
#include "dl2k.h"
#include <linux/dma-mapping.h>
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth-dcb.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth-dcb.c
index 83dee57..84de064 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth-dcb.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth-dcb.c
@@ -17,12 +17,12 @@ static int dpaa2_eth_dcbnl_ieee_getpfc(struct net_device *net_dev,
return 0;
}
-static inline bool is_prio_enabled(u8 pfc_en, u8 tc)
+static inline bool dpaa2_eth_is_prio_enabled(u8 pfc_en, u8 tc)
{
return !!(pfc_en & (1 << tc));
}
-static int set_pfc_cn(struct dpaa2_eth_priv *priv, u8 pfc_en)
+static int dpaa2_eth_set_pfc_cn(struct dpaa2_eth_priv *priv, u8 pfc_en)
{
struct dpni_congestion_notification_cfg cfg = {0};
int i, err;
@@ -33,7 +33,7 @@ static int set_pfc_cn(struct dpaa2_eth_priv *priv, u8 pfc_en)
cfg.message_ctx = 0ULL;
for (i = 0; i < dpaa2_eth_tc_count(priv); i++) {
- if (is_prio_enabled(pfc_en, i)) {
+ if (dpaa2_eth_is_prio_enabled(pfc_en, i)) {
cfg.threshold_entry = DPAA2_ETH_CN_THRESH_ENTRY(priv);
cfg.threshold_exit = DPAA2_ETH_CN_THRESH_EXIT(priv);
} else {
@@ -93,7 +93,7 @@ static int dpaa2_eth_dcbnl_ieee_setpfc(struct net_device *net_dev,
}
/* Configure congestion notifications for the enabled priorities */
- err = set_pfc_cn(priv, pfc->pfc_en);
+ err = dpaa2_eth_set_pfc_cn(priv, pfc->pfc_en);
if (err)
return err;
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
index cf5383bb..ceaf761 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
@@ -40,9 +40,9 @@ static void *dpaa2_iova_to_virt(struct iommu_domain *domain,
return phys_to_virt(phys_addr);
}
-static void validate_rx_csum(struct dpaa2_eth_priv *priv,
- u32 fd_status,
- struct sk_buff *skb)
+static void dpaa2_eth_validate_rx_csum(struct dpaa2_eth_priv *priv,
+ u32 fd_status,
+ struct sk_buff *skb)
{
skb_checksum_none_assert(skb);
@@ -62,9 +62,9 @@ static void validate_rx_csum(struct dpaa2_eth_priv *priv,
/* Free a received FD.
* Not to be used for Tx conf FDs or on any other paths.
*/
-static void free_rx_fd(struct dpaa2_eth_priv *priv,
- const struct dpaa2_fd *fd,
- void *vaddr)
+static void dpaa2_eth_free_rx_fd(struct dpaa2_eth_priv *priv,
+ const struct dpaa2_fd *fd,
+ void *vaddr)
{
struct device *dev = priv->net_dev->dev.parent;
dma_addr_t addr = dpaa2_fd_get_addr(fd);
@@ -100,9 +100,9 @@ static void free_rx_fd(struct dpaa2_eth_priv *priv,
}
/* Build a linear skb based on a single-buffer frame descriptor */
-static struct sk_buff *build_linear_skb(struct dpaa2_eth_channel *ch,
- const struct dpaa2_fd *fd,
- void *fd_vaddr)
+static struct sk_buff *dpaa2_eth_build_linear_skb(struct dpaa2_eth_channel *ch,
+ const struct dpaa2_fd *fd,
+ void *fd_vaddr)
{
struct sk_buff *skb = NULL;
u16 fd_offset = dpaa2_fd_get_offset(fd);
@@ -121,9 +121,9 @@ static struct sk_buff *build_linear_skb(struct dpaa2_eth_channel *ch,
}
/* Build a non linear (fragmented) skb based on a S/G table */
-static struct sk_buff *build_frag_skb(struct dpaa2_eth_priv *priv,
- struct dpaa2_eth_channel *ch,
- struct dpaa2_sg_entry *sgt)
+static struct sk_buff *dpaa2_eth_build_frag_skb(struct dpaa2_eth_priv *priv,
+ struct dpaa2_eth_channel *ch,
+ struct dpaa2_sg_entry *sgt)
{
struct sk_buff *skb = NULL;
struct device *dev = priv->net_dev->dev.parent;
@@ -204,7 +204,8 @@ static struct sk_buff *build_frag_skb(struct dpaa2_eth_priv *priv,
/* Free buffers acquired from the buffer pool or which were meant to
* be released in the pool
*/
-static void free_bufs(struct dpaa2_eth_priv *priv, u64 *buf_array, int count)
+static void dpaa2_eth_free_bufs(struct dpaa2_eth_priv *priv, u64 *buf_array,
+ int count)
{
struct device *dev = priv->net_dev->dev.parent;
void *vaddr;
@@ -218,9 +219,9 @@ static void free_bufs(struct dpaa2_eth_priv *priv, u64 *buf_array, int count)
}
}
-static void xdp_release_buf(struct dpaa2_eth_priv *priv,
- struct dpaa2_eth_channel *ch,
- dma_addr_t addr)
+static void dpaa2_eth_xdp_release_buf(struct dpaa2_eth_priv *priv,
+ struct dpaa2_eth_channel *ch,
+ dma_addr_t addr)
{
int retries = 0;
int err;
@@ -238,7 +239,7 @@ static void xdp_release_buf(struct dpaa2_eth_priv *priv,
}
if (err) {
- free_bufs(priv, ch->xdp.drop_bufs, ch->xdp.drop_cnt);
+ dpaa2_eth_free_bufs(priv, ch->xdp.drop_bufs, ch->xdp.drop_cnt);
ch->buf_count -= ch->xdp.drop_cnt;
}
@@ -274,9 +275,9 @@ static int dpaa2_eth_xdp_flush(struct dpaa2_eth_priv *priv,
return total_enqueued;
}
-static void xdp_tx_flush(struct dpaa2_eth_priv *priv,
- struct dpaa2_eth_channel *ch,
- struct dpaa2_eth_fq *fq)
+static void dpaa2_eth_xdp_tx_flush(struct dpaa2_eth_priv *priv,
+ struct dpaa2_eth_channel *ch,
+ struct dpaa2_eth_fq *fq)
{
struct rtnl_link_stats64 *percpu_stats;
struct dpaa2_fd *fds;
@@ -295,17 +296,17 @@ static void xdp_tx_flush(struct dpaa2_eth_priv *priv,
ch->stats.xdp_tx++;
}
for (i = enqueued; i < fq->xdp_tx_fds.num; i++) {
- xdp_release_buf(priv, ch, dpaa2_fd_get_addr(&fds[i]));
+ dpaa2_eth_xdp_release_buf(priv, ch, dpaa2_fd_get_addr(&fds[i]));
percpu_stats->tx_errors++;
ch->stats.xdp_tx_err++;
}
fq->xdp_tx_fds.num = 0;
}
-static void xdp_enqueue(struct dpaa2_eth_priv *priv,
- struct dpaa2_eth_channel *ch,
- struct dpaa2_fd *fd,
- void *buf_start, u16 queue_id)
+static void dpaa2_eth_xdp_enqueue(struct dpaa2_eth_priv *priv,
+ struct dpaa2_eth_channel *ch,
+ struct dpaa2_fd *fd,
+ void *buf_start, u16 queue_id)
{
struct dpaa2_faead *faead;
struct dpaa2_fd *dest_fd;
@@ -333,13 +334,13 @@ static void xdp_enqueue(struct dpaa2_eth_priv *priv,
if (fq->xdp_tx_fds.num < DEV_MAP_BULK_SIZE)
return;
- xdp_tx_flush(priv, ch, fq);
+ dpaa2_eth_xdp_tx_flush(priv, ch, fq);
}
-static u32 run_xdp(struct dpaa2_eth_priv *priv,
- struct dpaa2_eth_channel *ch,
- struct dpaa2_eth_fq *rx_fq,
- struct dpaa2_fd *fd, void *vaddr)
+static u32 dpaa2_eth_run_xdp(struct dpaa2_eth_priv *priv,
+ struct dpaa2_eth_channel *ch,
+ struct dpaa2_eth_fq *rx_fq,
+ struct dpaa2_fd *fd, void *vaddr)
{
dma_addr_t addr = dpaa2_fd_get_addr(fd);
struct bpf_prog *xdp_prog;
@@ -372,7 +373,7 @@ static u32 run_xdp(struct dpaa2_eth_priv *priv,
case XDP_PASS:
break;
case XDP_TX:
- xdp_enqueue(priv, ch, fd, vaddr, rx_fq->flowid);
+ dpaa2_eth_xdp_enqueue(priv, ch, fd, vaddr, rx_fq->flowid);
break;
default:
bpf_warn_invalid_xdp_action(xdp_act);
@@ -381,7 +382,7 @@ static u32 run_xdp(struct dpaa2_eth_priv *priv,
trace_xdp_exception(priv->net_dev, xdp_prog, xdp_act);
fallthrough;
case XDP_DROP:
- xdp_release_buf(priv, ch, addr);
+ dpaa2_eth_xdp_release_buf(priv, ch, addr);
ch->stats.xdp_drop++;
break;
case XDP_REDIRECT:
@@ -441,7 +442,7 @@ static void dpaa2_eth_rx(struct dpaa2_eth_priv *priv,
percpu_extras = this_cpu_ptr(priv->percpu_extras);
if (fd_format == dpaa2_fd_single) {
- xdp_act = run_xdp(priv, ch, fq, (struct dpaa2_fd *)fd, vaddr);
+ xdp_act = dpaa2_eth_run_xdp(priv, ch, fq, (struct dpaa2_fd *)fd, vaddr);
if (xdp_act != XDP_PASS) {
percpu_stats->rx_packets++;
percpu_stats->rx_bytes += dpaa2_fd_get_len(fd);
@@ -450,13 +451,13 @@ static void dpaa2_eth_rx(struct dpaa2_eth_priv *priv,
dma_unmap_page(dev, addr, priv->rx_buf_size,
DMA_BIDIRECTIONAL);
- skb = build_linear_skb(ch, fd, vaddr);
+ skb = dpaa2_eth_build_linear_skb(ch, fd, vaddr);
} else if (fd_format == dpaa2_fd_sg) {
WARN_ON(priv->xdp_prog);
dma_unmap_page(dev, addr, priv->rx_buf_size,
DMA_BIDIRECTIONAL);
- skb = build_frag_skb(priv, ch, buf_data);
+ skb = dpaa2_eth_build_frag_skb(priv, ch, buf_data);
free_pages((unsigned long)vaddr, 0);
percpu_extras->rx_sg_frames++;
percpu_extras->rx_sg_bytes += dpaa2_fd_get_len(fd);
@@ -485,7 +486,7 @@ static void dpaa2_eth_rx(struct dpaa2_eth_priv *priv,
/* Check if we need to validate the L4 csum */
if (likely(dpaa2_fd_get_frc(fd) & DPAA2_FD_FRC_FASV)) {
status = le32_to_cpu(fas->status);
- validate_rx_csum(priv, status, skb);
+ dpaa2_eth_validate_rx_csum(priv, status, skb);
}
skb->protocol = eth_type_trans(skb, priv->net_dev);
@@ -499,7 +500,7 @@ static void dpaa2_eth_rx(struct dpaa2_eth_priv *priv,
return;
err_build_skb:
- free_rx_fd(priv, fd, vaddr);
+ dpaa2_eth_free_rx_fd(priv, fd, vaddr);
err_frame_format:
percpu_stats->rx_dropped++;
}
@@ -510,8 +511,8 @@ static void dpaa2_eth_rx(struct dpaa2_eth_priv *priv,
*
* Observance of NAPI budget is not our concern, leaving that to the caller.
*/
-static int consume_frames(struct dpaa2_eth_channel *ch,
- struct dpaa2_eth_fq **src)
+static int dpaa2_eth_consume_frames(struct dpaa2_eth_channel *ch,
+ struct dpaa2_eth_fq **src)
{
struct dpaa2_eth_priv *priv = ch->priv;
struct dpaa2_eth_fq *fq = NULL;
@@ -560,7 +561,7 @@ static int consume_frames(struct dpaa2_eth_channel *ch,
}
/* Configure the egress frame annotation for timestamp update */
-static void enable_tx_tstamp(struct dpaa2_fd *fd, void *buf_start)
+static void dpaa2_eth_enable_tx_tstamp(struct dpaa2_fd *fd, void *buf_start)
{
struct dpaa2_faead *faead;
u32 ctrl, frc;
@@ -582,9 +583,9 @@ static void enable_tx_tstamp(struct dpaa2_fd *fd, void *buf_start)
}
/* Create a frame descriptor based on a fragmented skb */
-static int build_sg_fd(struct dpaa2_eth_priv *priv,
- struct sk_buff *skb,
- struct dpaa2_fd *fd)
+static int dpaa2_eth_build_sg_fd(struct dpaa2_eth_priv *priv,
+ struct sk_buff *skb,
+ struct dpaa2_fd *fd)
{
struct device *dev = priv->net_dev->dev.parent;
void *sgt_buf = NULL;
@@ -673,7 +674,7 @@ static int build_sg_fd(struct dpaa2_eth_priv *priv,
dpaa2_fd_set_ctrl(fd, FD_CTRL_PTA);
if (priv->tx_tstamp && skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)
- enable_tx_tstamp(fd, sgt_buf);
+ dpaa2_eth_enable_tx_tstamp(fd, sgt_buf);
return 0;
@@ -692,9 +693,9 @@ static int build_sg_fd(struct dpaa2_eth_priv *priv,
* enough for the HW requirements, thus instead of realloc-ing the skb we
* create a SG frame descriptor with only one entry.
*/
-static int build_sg_fd_single_buf(struct dpaa2_eth_priv *priv,
- struct sk_buff *skb,
- struct dpaa2_fd *fd)
+static int dpaa2_eth_build_sg_fd_single_buf(struct dpaa2_eth_priv *priv,
+ struct sk_buff *skb,
+ struct dpaa2_fd *fd)
{
struct device *dev = priv->net_dev->dev.parent;
struct dpaa2_eth_sgt_cache *sgt_cache;
@@ -751,7 +752,7 @@ static int build_sg_fd_single_buf(struct dpaa2_eth_priv *priv,
dpaa2_fd_set_ctrl(fd, FD_CTRL_PTA);
if (priv->tx_tstamp && skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)
- enable_tx_tstamp(fd, sgt_buf);
+ dpaa2_eth_enable_tx_tstamp(fd, sgt_buf);
return 0;
@@ -767,9 +768,9 @@ static int build_sg_fd_single_buf(struct dpaa2_eth_priv *priv,
}
/* Create a frame descriptor based on a linear skb */
-static int build_single_fd(struct dpaa2_eth_priv *priv,
- struct sk_buff *skb,
- struct dpaa2_fd *fd)
+static int dpaa2_eth_build_single_fd(struct dpaa2_eth_priv *priv,
+ struct sk_buff *skb,
+ struct dpaa2_fd *fd)
{
struct device *dev = priv->net_dev->dev.parent;
u8 *buffer_start, *aligned_start;
@@ -807,7 +808,7 @@ static int build_single_fd(struct dpaa2_eth_priv *priv,
dpaa2_fd_set_ctrl(fd, FD_CTRL_PTA);
if (priv->tx_tstamp && skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)
- enable_tx_tstamp(fd, buffer_start);
+ dpaa2_eth_enable_tx_tstamp(fd, buffer_start);
return 0;
}
@@ -819,9 +820,9 @@ static int build_single_fd(struct dpaa2_eth_priv *priv,
* This can be called either from dpaa2_eth_tx_conf() or on the error path of
* dpaa2_eth_tx().
*/
-static void free_tx_fd(const struct dpaa2_eth_priv *priv,
- struct dpaa2_eth_fq *fq,
- const struct dpaa2_fd *fd, bool in_napi)
+static void dpaa2_eth_free_tx_fd(const struct dpaa2_eth_priv *priv,
+ struct dpaa2_eth_fq *fq,
+ const struct dpaa2_fd *fd, bool in_napi)
{
struct device *dev = priv->net_dev->dev.parent;
dma_addr_t fd_addr, sg_addr;
@@ -954,17 +955,17 @@ static netdev_tx_t dpaa2_eth_tx(struct sk_buff *skb, struct net_device *net_dev)
memset(&fd, 0, sizeof(fd));
if (skb_is_nonlinear(skb)) {
- err = build_sg_fd(priv, skb, &fd);
+ err = dpaa2_eth_build_sg_fd(priv, skb, &fd);
percpu_extras->tx_sg_frames++;
percpu_extras->tx_sg_bytes += skb->len;
} else if (skb_headroom(skb) < needed_headroom) {
- err = build_sg_fd_single_buf(priv, skb, &fd);
+ err = dpaa2_eth_build_sg_fd_single_buf(priv, skb, &fd);
percpu_extras->tx_sg_frames++;
percpu_extras->tx_sg_bytes += skb->len;
percpu_extras->tx_converted_sg_frames++;
percpu_extras->tx_converted_sg_bytes += skb->len;
} else {
- err = build_single_fd(priv, skb, &fd);
+ err = dpaa2_eth_build_single_fd(priv, skb, &fd);
}
if (unlikely(err)) {
@@ -1010,7 +1011,7 @@ static netdev_tx_t dpaa2_eth_tx(struct sk_buff *skb, struct net_device *net_dev)
if (unlikely(err < 0)) {
percpu_stats->tx_errors++;
/* Clean up everything, including freeing the skb */
- free_tx_fd(priv, fq, &fd, false);
+ dpaa2_eth_free_tx_fd(priv, fq, &fd, false);
netdev_tx_completed_queue(nq, 1, fd_len);
} else {
percpu_stats->tx_packets++;
@@ -1045,7 +1046,7 @@ static void dpaa2_eth_tx_conf(struct dpaa2_eth_priv *priv,
/* Check frame errors in the FD field */
fd_errors = dpaa2_fd_get_ctrl(fd) & DPAA2_FD_TX_ERR_MASK;
- free_tx_fd(priv, fq, fd, true);
+ dpaa2_eth_free_tx_fd(priv, fq, fd, true);
if (likely(!fd_errors))
return;
@@ -1059,7 +1060,7 @@ static void dpaa2_eth_tx_conf(struct dpaa2_eth_priv *priv,
percpu_stats->tx_errors++;
}
-static int set_rx_csum(struct dpaa2_eth_priv *priv, bool enable)
+static int dpaa2_eth_set_rx_csum(struct dpaa2_eth_priv *priv, bool enable)
{
int err;
@@ -1082,7 +1083,7 @@ static int set_rx_csum(struct dpaa2_eth_priv *priv, bool enable)
return 0;
}
-static int set_tx_csum(struct dpaa2_eth_priv *priv, bool enable)
+static int dpaa2_eth_set_tx_csum(struct dpaa2_eth_priv *priv, bool enable)
{
int err;
@@ -1106,8 +1107,8 @@ static int set_tx_csum(struct dpaa2_eth_priv *priv, bool enable)
/* Perform a single release command to add buffers
* to the specified buffer pool
*/
-static int add_bufs(struct dpaa2_eth_priv *priv,
- struct dpaa2_eth_channel *ch, u16 bpid)
+static int dpaa2_eth_add_bufs(struct dpaa2_eth_priv *priv,
+ struct dpaa2_eth_channel *ch, u16 bpid)
{
struct device *dev = priv->net_dev->dev.parent;
u64 buf_array[DPAA2_ETH_BUFS_PER_CMD];
@@ -1155,7 +1156,7 @@ static int add_bufs(struct dpaa2_eth_priv *priv,
* not much else we can do about it
*/
if (err) {
- free_bufs(priv, buf_array, i);
+ dpaa2_eth_free_bufs(priv, buf_array, i);
return 0;
}
@@ -1173,7 +1174,7 @@ static int add_bufs(struct dpaa2_eth_priv *priv,
return 0;
}
-static int seed_pool(struct dpaa2_eth_priv *priv, u16 bpid)
+static int dpaa2_eth_seed_pool(struct dpaa2_eth_priv *priv, u16 bpid)
{
int i, j;
int new_count;
@@ -1181,7 +1182,7 @@ static int seed_pool(struct dpaa2_eth_priv *priv, u16 bpid)
for (j = 0; j < priv->num_channels; j++) {
for (i = 0; i < DPAA2_ETH_NUM_BUFS;
i += DPAA2_ETH_BUFS_PER_CMD) {
- new_count = add_bufs(priv, priv->channel[j], bpid);
+ new_count = dpaa2_eth_add_bufs(priv, priv->channel[j], bpid);
priv->channel[j]->buf_count += new_count;
if (new_count < DPAA2_ETH_BUFS_PER_CMD) {
@@ -1197,7 +1198,7 @@ static int seed_pool(struct dpaa2_eth_priv *priv, u16 bpid)
* Drain the specified number of buffers from the DPNI's private buffer pool.
* @count must not exceeed DPAA2_ETH_BUFS_PER_CMD
*/
-static void drain_bufs(struct dpaa2_eth_priv *priv, int count)
+static void dpaa2_eth_drain_bufs(struct dpaa2_eth_priv *priv, int count)
{
u64 buf_array[DPAA2_ETH_BUFS_PER_CMD];
int retries = 0;
@@ -1213,17 +1214,17 @@ static void drain_bufs(struct dpaa2_eth_priv *priv, int count)
netdev_err(priv->net_dev, "dpaa2_io_service_acquire() failed\n");
return;
}
- free_bufs(priv, buf_array, ret);
+ dpaa2_eth_free_bufs(priv, buf_array, ret);
retries = 0;
} while (ret);
}
-static void drain_pool(struct dpaa2_eth_priv *priv)
+static void dpaa2_eth_drain_pool(struct dpaa2_eth_priv *priv)
{
int i;
- drain_bufs(priv, DPAA2_ETH_BUFS_PER_CMD);
- drain_bufs(priv, 1);
+ dpaa2_eth_drain_bufs(priv, DPAA2_ETH_BUFS_PER_CMD);
+ dpaa2_eth_drain_bufs(priv, 1);
for (i = 0; i < priv->num_channels; i++)
priv->channel[i]->buf_count = 0;
@@ -1232,9 +1233,9 @@ static void drain_pool(struct dpaa2_eth_priv *priv)
/* Function is called from softirq context only, so we don't need to guard
* the access to percpu count
*/
-static int refill_pool(struct dpaa2_eth_priv *priv,
- struct dpaa2_eth_channel *ch,
- u16 bpid)
+static int dpaa2_eth_refill_pool(struct dpaa2_eth_priv *priv,
+ struct dpaa2_eth_channel *ch,
+ u16 bpid)
{
int new_count;
@@ -1242,7 +1243,7 @@ static int refill_pool(struct dpaa2_eth_priv *priv,
return 0;
do {
- new_count = add_bufs(priv, ch, bpid);
+ new_count = dpaa2_eth_add_bufs(priv, ch, bpid);
if (unlikely(!new_count)) {
/* Out of memory; abort for now, we'll try later on */
break;
@@ -1272,7 +1273,7 @@ static void dpaa2_eth_sgt_cache_drain(struct dpaa2_eth_priv *priv)
}
}
-static int pull_channel(struct dpaa2_eth_channel *ch)
+static int dpaa2_eth_pull_channel(struct dpaa2_eth_channel *ch)
{
int err;
int dequeues = -1;
@@ -1319,14 +1320,14 @@ static int dpaa2_eth_poll(struct napi_struct *napi, int budget)
ch->rx_list = &rx_list;
do {
- err = pull_channel(ch);
+ err = dpaa2_eth_pull_channel(ch);
if (unlikely(err))
break;
/* Refill pool if appropriate */
- refill_pool(priv, ch, priv->bpid);
+ dpaa2_eth_refill_pool(priv, ch, priv->bpid);
- store_cleaned = consume_frames(ch, &fq);
+ store_cleaned = dpaa2_eth_consume_frames(ch, &fq);
if (store_cleaned <= 0)
break;
if (fq->type == DPAA2_RX_FQ) {
@@ -1375,12 +1376,12 @@ static int dpaa2_eth_poll(struct napi_struct *napi, int budget)
if (ch->xdp.res & XDP_REDIRECT)
xdp_do_flush_map();
else if (rx_cleaned && ch->xdp.res & XDP_TX)
- xdp_tx_flush(priv, ch, &priv->fq[flowid]);
+ dpaa2_eth_xdp_tx_flush(priv, ch, &priv->fq[flowid]);
return work_done;
}
-static void enable_ch_napi(struct dpaa2_eth_priv *priv)
+static void dpaa2_eth_enable_ch_napi(struct dpaa2_eth_priv *priv)
{
struct dpaa2_eth_channel *ch;
int i;
@@ -1391,7 +1392,7 @@ static void enable_ch_napi(struct dpaa2_eth_priv *priv)
}
}
-static void disable_ch_napi(struct dpaa2_eth_priv *priv)
+static void dpaa2_eth_disable_ch_napi(struct dpaa2_eth_priv *priv)
{
struct dpaa2_eth_channel *ch;
int i;
@@ -1465,7 +1466,7 @@ void dpaa2_eth_set_rx_taildrop(struct dpaa2_eth_priv *priv,
priv->rx_cgtd_enabled = td.enable;
}
-static int link_state_update(struct dpaa2_eth_priv *priv)
+static int dpaa2_eth_link_state_update(struct dpaa2_eth_priv *priv)
{
struct dpni_link_state state = {0};
bool tx_pause;
@@ -1517,7 +1518,7 @@ static int dpaa2_eth_open(struct net_device *net_dev)
struct dpaa2_eth_priv *priv = netdev_priv(net_dev);
int err;
- err = seed_pool(priv, priv->bpid);
+ err = dpaa2_eth_seed_pool(priv, priv->bpid);
if (err) {
/* Not much to do; the buffer pool, though not filled up,
* may still contain some buffers which would enable us
@@ -1541,7 +1542,7 @@ static int dpaa2_eth_open(struct net_device *net_dev)
*/
netif_carrier_off(net_dev);
}
- enable_ch_napi(priv);
+ dpaa2_eth_enable_ch_napi(priv);
err = dpni_enable(priv->mc_io, 0, priv->mc_token);
if (err < 0) {
@@ -1553,7 +1554,7 @@ static int dpaa2_eth_open(struct net_device *net_dev)
/* If the DPMAC object has already processed the link up
* interrupt, we have to learn the link state ourselves.
*/
- err = link_state_update(priv);
+ err = dpaa2_eth_link_state_update(priv);
if (err < 0) {
netdev_err(net_dev, "Can't update link state\n");
goto link_state_err;
@@ -1566,13 +1567,13 @@ static int dpaa2_eth_open(struct net_device *net_dev)
link_state_err:
enable_err:
- disable_ch_napi(priv);
- drain_pool(priv);
+ dpaa2_eth_disable_ch_napi(priv);
+ dpaa2_eth_drain_pool(priv);
return err;
}
/* Total number of in-flight frames on ingress queues */
-static u32 ingress_fq_count(struct dpaa2_eth_priv *priv)
+static u32 dpaa2_eth_ingress_fq_count(struct dpaa2_eth_priv *priv)
{
struct dpaa2_eth_fq *fq;
u32 fcnt = 0, bcnt = 0, total = 0;
@@ -1591,13 +1592,13 @@ static u32 ingress_fq_count(struct dpaa2_eth_priv *priv)
return total;
}
-static void wait_for_ingress_fq_empty(struct dpaa2_eth_priv *priv)
+static void dpaa2_eth_wait_for_ingress_fq_empty(struct dpaa2_eth_priv *priv)
{
int retries = 10;
u32 pending;
do {
- pending = ingress_fq_count(priv);
+ pending = dpaa2_eth_ingress_fq_count(priv);
if (pending)
msleep(100);
} while (pending && --retries);
@@ -1605,7 +1606,7 @@ static void wait_for_ingress_fq_empty(struct dpaa2_eth_priv *priv)
#define DPNI_TX_PENDING_VER_MAJOR 7
#define DPNI_TX_PENDING_VER_MINOR 13
-static void wait_for_egress_fq_empty(struct dpaa2_eth_priv *priv)
+static void dpaa2_eth_wait_for_egress_fq_empty(struct dpaa2_eth_priv *priv)
{
union dpni_statistics stats;
int retries = 10;
@@ -1651,7 +1652,7 @@ static int dpaa2_eth_stop(struct net_device *net_dev)
* on WRIOP. After it finishes, wait until all remaining frames on Rx
* and Tx conf queues are consumed on NAPI poll.
*/
- wait_for_egress_fq_empty(priv);
+ dpaa2_eth_wait_for_egress_fq_empty(priv);
do {
dpni_disable(priv->mc_io, 0, priv->mc_token);
@@ -1667,11 +1668,11 @@ static int dpaa2_eth_stop(struct net_device *net_dev)
*/
}
- wait_for_ingress_fq_empty(priv);
- disable_ch_napi(priv);
+ dpaa2_eth_wait_for_ingress_fq_empty(priv);
+ dpaa2_eth_disable_ch_napi(priv);
/* Empty the buffer pool */
- drain_pool(priv);
+ dpaa2_eth_drain_pool(priv);
/* Empty the Scatter-Gather Buffer cache */
dpaa2_eth_sgt_cache_drain(priv);
@@ -1725,8 +1726,8 @@ static void dpaa2_eth_get_stats(struct net_device *net_dev,
/* Copy mac unicast addresses from @net_dev to @priv.
* Its sole purpose is to make dpaa2_eth_set_rx_mode() more readable.
*/
-static void add_uc_hw_addr(const struct net_device *net_dev,
- struct dpaa2_eth_priv *priv)
+static void dpaa2_eth_add_uc_hw_addr(const struct net_device *net_dev,
+ struct dpaa2_eth_priv *priv)
{
struct netdev_hw_addr *ha;
int err;
@@ -1744,8 +1745,8 @@ static void add_uc_hw_addr(const struct net_device *net_dev,
/* Copy mac multicast addresses from @net_dev to @priv
* Its sole purpose is to make dpaa2_eth_set_rx_mode() more readable.
*/
-static void add_mc_hw_addr(const struct net_device *net_dev,
- struct dpaa2_eth_priv *priv)
+static void dpaa2_eth_add_mc_hw_addr(const struct net_device *net_dev,
+ struct dpaa2_eth_priv *priv)
{
struct netdev_hw_addr *ha;
int err;
@@ -1810,7 +1811,7 @@ static void dpaa2_eth_set_rx_mode(struct net_device *net_dev)
err = dpni_clear_mac_filters(mc_io, 0, mc_token, 1, 0);
if (err)
netdev_warn(net_dev, "Can't clear uc filters\n");
- add_uc_hw_addr(net_dev, priv);
+ dpaa2_eth_add_uc_hw_addr(net_dev, priv);
/* Finally, clear uc promisc and set mc promisc as requested. */
err = dpni_set_unicast_promisc(mc_io, 0, mc_token, 0);
@@ -1833,8 +1834,8 @@ static void dpaa2_eth_set_rx_mode(struct net_device *net_dev)
err = dpni_clear_mac_filters(mc_io, 0, mc_token, 1, 1);
if (err)
netdev_warn(net_dev, "Can't clear mac filters\n");
- add_mc_hw_addr(net_dev, priv);
- add_uc_hw_addr(net_dev, priv);
+ dpaa2_eth_add_mc_hw_addr(net_dev, priv);
+ dpaa2_eth_add_uc_hw_addr(net_dev, priv);
/* Now we can clear both ucast and mcast promisc, without risking
* to drop legitimate frames anymore.
@@ -1868,14 +1869,14 @@ static int dpaa2_eth_set_features(struct net_device *net_dev,
if (changed & NETIF_F_RXCSUM) {
enable = !!(features & NETIF_F_RXCSUM);
- err = set_rx_csum(priv, enable);
+ err = dpaa2_eth_set_rx_csum(priv, enable);
if (err)
return err;
}
if (changed & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM)) {
enable = !!(features & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM));
- err = set_tx_csum(priv, enable);
+ err = dpaa2_eth_set_tx_csum(priv, enable);
if (err)
return err;
}
@@ -1944,7 +1945,7 @@ static bool xdp_mtu_valid(struct dpaa2_eth_priv *priv, int mtu)
return true;
}
-static int set_rx_mfl(struct dpaa2_eth_priv *priv, int mtu, bool has_xdp)
+static int dpaa2_eth_set_rx_mfl(struct dpaa2_eth_priv *priv, int mtu, bool has_xdp)
{
int mfl, err;
@@ -1978,7 +1979,7 @@ static int dpaa2_eth_change_mtu(struct net_device *dev, int new_mtu)
if (!xdp_mtu_valid(priv, new_mtu))
return -EINVAL;
- err = set_rx_mfl(priv, new_mtu, true);
+ err = dpaa2_eth_set_rx_mfl(priv, new_mtu, true);
if (err)
return err;
@@ -1987,7 +1988,7 @@ static int dpaa2_eth_change_mtu(struct net_device *dev, int new_mtu)
return 0;
}
-static int update_rx_buffer_headroom(struct dpaa2_eth_priv *priv, bool has_xdp)
+static int dpaa2_eth_update_rx_buffer_headroom(struct dpaa2_eth_priv *priv, bool has_xdp)
{
struct dpni_buffer_layout buf_layout = {0};
int err;
@@ -2013,7 +2014,7 @@ static int update_rx_buffer_headroom(struct dpaa2_eth_priv *priv, bool has_xdp)
return 0;
}
-static int setup_xdp(struct net_device *dev, struct bpf_prog *prog)
+static int dpaa2_eth_setup_xdp(struct net_device *dev, struct bpf_prog *prog)
{
struct dpaa2_eth_priv *priv = netdev_priv(dev);
struct dpaa2_eth_channel *ch;
@@ -2039,10 +2040,10 @@ static int setup_xdp(struct net_device *dev, struct bpf_prog *prog)
* so we are sure no old format buffers will be used from now on.
*/
if (need_update) {
- err = set_rx_mfl(priv, dev->mtu, !!prog);
+ err = dpaa2_eth_set_rx_mfl(priv, dev->mtu, !!prog);
if (err)
goto out_err;
- err = update_rx_buffer_headroom(priv, !!prog);
+ err = dpaa2_eth_update_rx_buffer_headroom(priv, !!prog);
if (err)
goto out_err;
}
@@ -2079,7 +2080,7 @@ static int dpaa2_eth_xdp(struct net_device *dev, struct netdev_bpf *xdp)
{
switch (xdp->command) {
case XDP_SETUP_PROG:
- return setup_xdp(dev, xdp->prog);
+ return dpaa2_eth_setup_xdp(dev, xdp->prog);
default:
return -EINVAL;
}
@@ -2316,7 +2317,7 @@ static const struct net_device_ops dpaa2_eth_ops = {
.ndo_setup_tc = dpaa2_eth_setup_tc,
};
-static void cdan_cb(struct dpaa2_io_notification_ctx *ctx)
+static void dpaa2_eth_cdan_cb(struct dpaa2_io_notification_ctx *ctx)
{
struct dpaa2_eth_channel *ch;
@@ -2329,7 +2330,7 @@ static void cdan_cb(struct dpaa2_io_notification_ctx *ctx)
}
/* Allocate and configure a DPCON object */
-static struct fsl_mc_device *setup_dpcon(struct dpaa2_eth_priv *priv)
+static struct fsl_mc_device *dpaa2_eth_setup_dpcon(struct dpaa2_eth_priv *priv)
{
struct fsl_mc_device *dpcon;
struct device *dev = priv->net_dev->dev.parent;
@@ -2373,16 +2374,15 @@ static struct fsl_mc_device *setup_dpcon(struct dpaa2_eth_priv *priv)
return ERR_PTR(err);
}
-static void free_dpcon(struct dpaa2_eth_priv *priv,
- struct fsl_mc_device *dpcon)
+static void dpaa2_eth_free_dpcon(struct dpaa2_eth_priv *priv,
+ struct fsl_mc_device *dpcon)
{
dpcon_disable(priv->mc_io, 0, dpcon->mc_handle);
dpcon_close(priv->mc_io, 0, dpcon->mc_handle);
fsl_mc_object_free(dpcon);
}
-static struct dpaa2_eth_channel *
-alloc_channel(struct dpaa2_eth_priv *priv)
+static struct dpaa2_eth_channel *dpaa2_eth_alloc_channel(struct dpaa2_eth_priv *priv)
{
struct dpaa2_eth_channel *channel;
struct dpcon_attr attr;
@@ -2393,7 +2393,7 @@ alloc_channel(struct dpaa2_eth_priv *priv)
if (!channel)
return NULL;
- channel->dpcon = setup_dpcon(priv);
+ channel->dpcon = dpaa2_eth_setup_dpcon(priv);
if (IS_ERR(channel->dpcon)) {
err = PTR_ERR(channel->dpcon);
goto err_setup;
@@ -2413,23 +2413,23 @@ alloc_channel(struct dpaa2_eth_priv *priv)
return channel;
err_get_attr:
- free_dpcon(priv, channel->dpcon);
+ dpaa2_eth_free_dpcon(priv, channel->dpcon);
err_setup:
kfree(channel);
return ERR_PTR(err);
}
-static void free_channel(struct dpaa2_eth_priv *priv,
- struct dpaa2_eth_channel *channel)
+static void dpaa2_eth_free_channel(struct dpaa2_eth_priv *priv,
+ struct dpaa2_eth_channel *channel)
{
- free_dpcon(priv, channel->dpcon);
+ dpaa2_eth_free_dpcon(priv, channel->dpcon);
kfree(channel);
}
/* DPIO setup: allocate and configure QBMan channels, setup core affinity
* and register data availability notifications
*/
-static int setup_dpio(struct dpaa2_eth_priv *priv)
+static int dpaa2_eth_setup_dpio(struct dpaa2_eth_priv *priv)
{
struct dpaa2_io_notification_ctx *nctx;
struct dpaa2_eth_channel *channel;
@@ -2449,7 +2449,7 @@ static int setup_dpio(struct dpaa2_eth_priv *priv)
cpumask_clear(&priv->dpio_cpumask);
for_each_online_cpu(i) {
/* Try to allocate a channel */
- channel = alloc_channel(priv);
+ channel = dpaa2_eth_alloc_channel(priv);
if (IS_ERR_OR_NULL(channel)) {
err = PTR_ERR_OR_ZERO(channel);
if (err != -EPROBE_DEFER)
@@ -2462,7 +2462,7 @@ static int setup_dpio(struct dpaa2_eth_priv *priv)
nctx = &channel->nctx;
nctx->is_cdan = 1;
- nctx->cb = cdan_cb;
+ nctx->cb = dpaa2_eth_cdan_cb;
nctx->id = channel->ch_id;
nctx->desired_cpu = i;
@@ -2510,14 +2510,14 @@ static int setup_dpio(struct dpaa2_eth_priv *priv)
err_set_cdan:
dpaa2_io_service_deregister(channel->dpio, nctx, dev);
err_service_reg:
- free_channel(priv, channel);
+ dpaa2_eth_free_channel(priv, channel);
err_alloc_ch:
if (err == -EPROBE_DEFER) {
for (i = 0; i < priv->num_channels; i++) {
channel = priv->channel[i];
nctx = &channel->nctx;
dpaa2_io_service_deregister(channel->dpio, nctx, dev);
- free_channel(priv, channel);
+ dpaa2_eth_free_channel(priv, channel);
}
priv->num_channels = 0;
return err;
@@ -2534,7 +2534,7 @@ static int setup_dpio(struct dpaa2_eth_priv *priv)
return 0;
}
-static void free_dpio(struct dpaa2_eth_priv *priv)
+static void dpaa2_eth_free_dpio(struct dpaa2_eth_priv *priv)
{
struct device *dev = priv->net_dev->dev.parent;
struct dpaa2_eth_channel *ch;
@@ -2544,12 +2544,12 @@ static void free_dpio(struct dpaa2_eth_priv *priv)
for (i = 0; i < priv->num_channels; i++) {
ch = priv->channel[i];
dpaa2_io_service_deregister(ch->dpio, &ch->nctx, dev);
- free_channel(priv, ch);
+ dpaa2_eth_free_channel(priv, ch);
}
}
-static struct dpaa2_eth_channel *get_affine_channel(struct dpaa2_eth_priv *priv,
- int cpu)
+static struct dpaa2_eth_channel *dpaa2_eth_get_affine_channel(struct dpaa2_eth_priv *priv,
+ int cpu)
{
struct device *dev = priv->net_dev->dev.parent;
int i;
@@ -2566,7 +2566,7 @@ static struct dpaa2_eth_channel *get_affine_channel(struct dpaa2_eth_priv *priv,
return priv->channel[0];
}
-static void set_fq_affinity(struct dpaa2_eth_priv *priv)
+static void dpaa2_eth_set_fq_affinity(struct dpaa2_eth_priv *priv)
{
struct device *dev = priv->net_dev->dev.parent;
struct dpaa2_eth_fq *fq;
@@ -2597,13 +2597,13 @@ static void set_fq_affinity(struct dpaa2_eth_priv *priv)
default:
dev_err(dev, "Unknown FQ type: %d\n", fq->type);
}
- fq->channel = get_affine_channel(priv, fq->target_cpu);
+ fq->channel = dpaa2_eth_get_affine_channel(priv, fq->target_cpu);
}
update_xps(priv);
}
-static void setup_fqs(struct dpaa2_eth_priv *priv)
+static void dpaa2_eth_setup_fqs(struct dpaa2_eth_priv *priv)
{
int i, j;
@@ -2627,11 +2627,11 @@ static void setup_fqs(struct dpaa2_eth_priv *priv)
}
/* For each FQ, decide on which core to process incoming frames */
- set_fq_affinity(priv);
+ dpaa2_eth_set_fq_affinity(priv);
}
/* Allocate and configure one buffer pool for each interface */
-static int setup_dpbp(struct dpaa2_eth_priv *priv)
+static int dpaa2_eth_setup_dpbp(struct dpaa2_eth_priv *priv)
{
int err;
struct fsl_mc_device *dpbp_dev;
@@ -2690,15 +2690,15 @@ static int setup_dpbp(struct dpaa2_eth_priv *priv)
return err;
}
-static void free_dpbp(struct dpaa2_eth_priv *priv)
+static void dpaa2_eth_free_dpbp(struct dpaa2_eth_priv *priv)
{
- drain_pool(priv);
+ dpaa2_eth_drain_pool(priv);
dpbp_disable(priv->mc_io, 0, priv->dpbp_dev->mc_handle);
dpbp_close(priv->mc_io, 0, priv->dpbp_dev->mc_handle);
fsl_mc_object_free(priv->dpbp_dev);
}
-static int set_buffer_layout(struct dpaa2_eth_priv *priv)
+static int dpaa2_eth_set_buffer_layout(struct dpaa2_eth_priv *priv)
{
struct device *dev = priv->net_dev->dev.parent;
struct dpni_buffer_layout buf_layout = {0};
@@ -2815,7 +2815,7 @@ static inline int dpaa2_eth_enqueue_fq_multiple(struct dpaa2_eth_priv *priv,
return 0;
}
-static void set_enqueue_mode(struct dpaa2_eth_priv *priv)
+static void dpaa2_eth_set_enqueue_mode(struct dpaa2_eth_priv *priv)
{
if (dpaa2_eth_cmp_dpni_ver(priv, DPNI_ENQUEUE_FQID_VER_MAJOR,
DPNI_ENQUEUE_FQID_VER_MINOR) < 0)
@@ -2824,7 +2824,7 @@ static void set_enqueue_mode(struct dpaa2_eth_priv *priv)
priv->enqueue = dpaa2_eth_enqueue_fq_multiple;
}
-static int set_pause(struct dpaa2_eth_priv *priv)
+static int dpaa2_eth_set_pause(struct dpaa2_eth_priv *priv)
{
struct device *dev = priv->net_dev->dev.parent;
struct dpni_link_cfg link_cfg = {0};
@@ -2851,7 +2851,7 @@ static int set_pause(struct dpaa2_eth_priv *priv)
return 0;
}
-static void update_tx_fqids(struct dpaa2_eth_priv *priv)
+static void dpaa2_eth_update_tx_fqids(struct dpaa2_eth_priv *priv)
{
struct dpni_queue_id qid = {0};
struct dpaa2_eth_fq *fq;
@@ -2893,7 +2893,7 @@ static void update_tx_fqids(struct dpaa2_eth_priv *priv)
}
/* Configure ingress classification based on VLAN PCP */
-static int set_vlan_qos(struct dpaa2_eth_priv *priv)
+static int dpaa2_eth_set_vlan_qos(struct dpaa2_eth_priv *priv)
{
struct device *dev = priv->net_dev->dev.parent;
struct dpkg_profile_cfg kg_cfg = {0};
@@ -3005,7 +3005,7 @@ static int set_vlan_qos(struct dpaa2_eth_priv *priv)
}
/* Configure the DPNI object this interface is associated with */
-static int setup_dpni(struct fsl_mc_device *ls_dev)
+static int dpaa2_eth_setup_dpni(struct fsl_mc_device *ls_dev)
{
struct device *dev = &ls_dev->dev;
struct dpaa2_eth_priv *priv;
@@ -3053,20 +3053,20 @@ static int setup_dpni(struct fsl_mc_device *ls_dev)
goto close;
}
- err = set_buffer_layout(priv);
+ err = dpaa2_eth_set_buffer_layout(priv);
if (err)
goto close;
- set_enqueue_mode(priv);
+ dpaa2_eth_set_enqueue_mode(priv);
/* Enable pause frame support */
if (dpaa2_eth_has_pause_support(priv)) {
- err = set_pause(priv);
+ err = dpaa2_eth_set_pause(priv);
if (err)
goto close;
}
- err = set_vlan_qos(priv);
+ err = dpaa2_eth_set_vlan_qos(priv);
if (err && err != -EOPNOTSUPP)
goto close;
@@ -3086,7 +3086,7 @@ static int setup_dpni(struct fsl_mc_device *ls_dev)
return err;
}
-static void free_dpni(struct dpaa2_eth_priv *priv)
+static void dpaa2_eth_free_dpni(struct dpaa2_eth_priv *priv)
{
int err;
@@ -3098,8 +3098,8 @@ static void free_dpni(struct dpaa2_eth_priv *priv)
dpni_close(priv->mc_io, 0, priv->mc_token);
}
-static int setup_rx_flow(struct dpaa2_eth_priv *priv,
- struct dpaa2_eth_fq *fq)
+static int dpaa2_eth_setup_rx_flow(struct dpaa2_eth_priv *priv,
+ struct dpaa2_eth_fq *fq)
{
struct device *dev = priv->net_dev->dev.parent;
struct dpni_queue queue;
@@ -3150,8 +3150,8 @@ static int setup_rx_flow(struct dpaa2_eth_priv *priv,
return 0;
}
-static int setup_tx_flow(struct dpaa2_eth_priv *priv,
- struct dpaa2_eth_fq *fq)
+static int dpaa2_eth_setup_tx_flow(struct dpaa2_eth_priv *priv,
+ struct dpaa2_eth_fq *fq)
{
struct device *dev = priv->net_dev->dev.parent;
struct dpni_queue queue;
@@ -3266,7 +3266,7 @@ static const struct dpaa2_eth_dist_fields dist_fields[] = {
};
/* Configure the Rx hash key using the legacy API */
-static int config_legacy_hash_key(struct dpaa2_eth_priv *priv, dma_addr_t key)
+static int dpaa2_eth_config_legacy_hash_key(struct dpaa2_eth_priv *priv, dma_addr_t key)
{
struct device *dev = priv->net_dev->dev.parent;
struct dpni_rx_tc_dist_cfg dist_cfg;
@@ -3291,7 +3291,7 @@ static int config_legacy_hash_key(struct dpaa2_eth_priv *priv, dma_addr_t key)
}
/* Configure the Rx hash key using the new API */
-static int config_hash_key(struct dpaa2_eth_priv *priv, dma_addr_t key)
+static int dpaa2_eth_config_hash_key(struct dpaa2_eth_priv *priv, dma_addr_t key)
{
struct device *dev = priv->net_dev->dev.parent;
struct dpni_rx_dist_cfg dist_cfg;
@@ -3317,7 +3317,7 @@ static int config_hash_key(struct dpaa2_eth_priv *priv, dma_addr_t key)
}
/* Configure the Rx flow classification key */
-static int config_cls_key(struct dpaa2_eth_priv *priv, dma_addr_t key)
+static int dpaa2_eth_config_cls_key(struct dpaa2_eth_priv *priv, dma_addr_t key)
{
struct device *dev = priv->net_dev->dev.parent;
struct dpni_rx_dist_cfg dist_cfg;
@@ -3452,11 +3452,11 @@ static int dpaa2_eth_set_dist_key(struct net_device *net_dev,
if (type == DPAA2_ETH_RX_DIST_HASH) {
if (dpaa2_eth_has_legacy_dist(priv))
- err = config_legacy_hash_key(priv, key_iova);
+ err = dpaa2_eth_config_legacy_hash_key(priv, key_iova);
else
- err = config_hash_key(priv, key_iova);
+ err = dpaa2_eth_config_hash_key(priv, key_iova);
} else {
- err = config_cls_key(priv, key_iova);
+ err = dpaa2_eth_config_cls_key(priv, key_iova);
}
dma_unmap_single(dev, key_iova, DPAA2_CLASSIFIER_DMA_SIZE,
@@ -3531,7 +3531,7 @@ static int dpaa2_eth_set_default_cls(struct dpaa2_eth_priv *priv)
/* Bind the DPNI to its needed objects and resources: buffer pool, DPIOs,
* frame queues and channels
*/
-static int bind_dpni(struct dpaa2_eth_priv *priv)
+static int dpaa2_eth_bind_dpni(struct dpaa2_eth_priv *priv)
{
struct net_device *net_dev = priv->net_dev;
struct device *dev = net_dev->dev.parent;
@@ -3579,10 +3579,10 @@ static int bind_dpni(struct dpaa2_eth_priv *priv)
for (i = 0; i < priv->num_fqs; i++) {
switch (priv->fq[i].type) {
case DPAA2_RX_FQ:
- err = setup_rx_flow(priv, &priv->fq[i]);
+ err = dpaa2_eth_setup_rx_flow(priv, &priv->fq[i]);
break;
case DPAA2_TX_CONF_FQ:
- err = setup_tx_flow(priv, &priv->fq[i]);
+ err = dpaa2_eth_setup_tx_flow(priv, &priv->fq[i]);
break;
default:
dev_err(dev, "Invalid FQ type %d\n", priv->fq[i].type);
@@ -3603,7 +3603,7 @@ static int bind_dpni(struct dpaa2_eth_priv *priv)
}
/* Allocate rings for storing incoming frame descriptors */
-static int alloc_rings(struct dpaa2_eth_priv *priv)
+static int dpaa2_eth_alloc_rings(struct dpaa2_eth_priv *priv)
{
struct net_device *net_dev = priv->net_dev;
struct device *dev = net_dev->dev.parent;
@@ -3630,7 +3630,7 @@ static int alloc_rings(struct dpaa2_eth_priv *priv)
return -ENOMEM;
}
-static void free_rings(struct dpaa2_eth_priv *priv)
+static void dpaa2_eth_free_rings(struct dpaa2_eth_priv *priv)
{
int i;
@@ -3638,7 +3638,7 @@ static void free_rings(struct dpaa2_eth_priv *priv)
dpaa2_io_store_destroy(priv->channel[i]->store);
}
-static int set_mac_addr(struct dpaa2_eth_priv *priv)
+static int dpaa2_eth_set_mac_addr(struct dpaa2_eth_priv *priv)
{
struct net_device *net_dev = priv->net_dev;
struct device *dev = net_dev->dev.parent;
@@ -3703,7 +3703,7 @@ static int set_mac_addr(struct dpaa2_eth_priv *priv)
return 0;
}
-static int netdev_init(struct net_device *net_dev)
+static int dpaa2_eth_netdev_init(struct net_device *net_dev)
{
struct device *dev = net_dev->dev.parent;
struct dpaa2_eth_priv *priv = netdev_priv(net_dev);
@@ -3716,7 +3716,7 @@ static int netdev_init(struct net_device *net_dev)
net_dev->netdev_ops = &dpaa2_eth_ops;
net_dev->ethtool_ops = &dpaa2_ethtool_ops;
- err = set_mac_addr(priv);
+ err = dpaa2_eth_set_mac_addr(priv);
if (err)
return err;
@@ -3771,13 +3771,13 @@ static int netdev_init(struct net_device *net_dev)
return 0;
}
-static int poll_link_state(void *arg)
+static int dpaa2_eth_poll_link_state(void *arg)
{
struct dpaa2_eth_priv *priv = (struct dpaa2_eth_priv *)arg;
int err;
while (!kthread_should_stop()) {
- err = link_state_update(priv);
+ err = dpaa2_eth_link_state_update(priv);
if (unlikely(err))
return err;
@@ -3847,11 +3847,11 @@ static irqreturn_t dpni_irq0_handler_thread(int irq_num, void *arg)
}
if (status & DPNI_IRQ_EVENT_LINK_CHANGED)
- link_state_update(netdev_priv(net_dev));
+ dpaa2_eth_link_state_update(netdev_priv(net_dev));
if (status & DPNI_IRQ_EVENT_ENDPOINT_CHANGED) {
- set_mac_addr(netdev_priv(net_dev));
- update_tx_fqids(priv);
+ dpaa2_eth_set_mac_addr(netdev_priv(net_dev));
+ dpaa2_eth_update_tx_fqids(priv);
rtnl_lock();
if (priv->mac)
@@ -3864,7 +3864,7 @@ static irqreturn_t dpni_irq0_handler_thread(int irq_num, void *arg)
return IRQ_HANDLED;
}
-static int setup_irqs(struct fsl_mc_device *ls_dev)
+static int dpaa2_eth_setup_irqs(struct fsl_mc_device *ls_dev)
{
int err = 0;
struct fsl_mc_device_irq *irq;
@@ -3910,7 +3910,7 @@ static int setup_irqs(struct fsl_mc_device *ls_dev)
return err;
}
-static void add_ch_napi(struct dpaa2_eth_priv *priv)
+static void dpaa2_eth_add_ch_napi(struct dpaa2_eth_priv *priv)
{
int i;
struct dpaa2_eth_channel *ch;
@@ -3923,7 +3923,7 @@ static void add_ch_napi(struct dpaa2_eth_priv *priv)
}
}
-static void del_ch_napi(struct dpaa2_eth_priv *priv)
+static void dpaa2_eth_del_ch_napi(struct dpaa2_eth_priv *priv)
{
int i;
struct dpaa2_eth_channel *ch;
@@ -3970,26 +3970,26 @@ static int dpaa2_eth_probe(struct fsl_mc_device *dpni_dev)
}
/* MC objects initialization and configuration */
- err = setup_dpni(dpni_dev);
+ err = dpaa2_eth_setup_dpni(dpni_dev);
if (err)
goto err_dpni_setup;
- err = setup_dpio(priv);
+ err = dpaa2_eth_setup_dpio(priv);
if (err)
goto err_dpio_setup;
- setup_fqs(priv);
+ dpaa2_eth_setup_fqs(priv);
- err = setup_dpbp(priv);
+ err = dpaa2_eth_setup_dpbp(priv);
if (err)
goto err_dpbp_setup;
- err = bind_dpni(priv);
+ err = dpaa2_eth_bind_dpni(priv);
if (err)
goto err_bind;
/* Add a NAPI context for each channel */
- add_ch_napi(priv);
+ dpaa2_eth_add_ch_napi(priv);
/* Percpu statistics */
priv->percpu_stats = alloc_percpu(*priv->percpu_stats);
@@ -4012,21 +4012,21 @@ static int dpaa2_eth_probe(struct fsl_mc_device *dpni_dev)
goto err_alloc_sgt_cache;
}
- err = netdev_init(net_dev);
+ err = dpaa2_eth_netdev_init(net_dev);
if (err)
goto err_netdev_init;
/* Configure checksum offload based on current interface flags */
- err = set_rx_csum(priv, !!(net_dev->features & NETIF_F_RXCSUM));
+ err = dpaa2_eth_set_rx_csum(priv, !!(net_dev->features & NETIF_F_RXCSUM));
if (err)
goto err_csum;
- err = set_tx_csum(priv, !!(net_dev->features &
- (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM)));
+ err = dpaa2_eth_set_tx_csum(priv,
+ !!(net_dev->features & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM)));
if (err)
goto err_csum;
- err = alloc_rings(priv);
+ err = dpaa2_eth_alloc_rings(priv);
if (err)
goto err_alloc_rings;
@@ -4039,10 +4039,10 @@ static int dpaa2_eth_probe(struct fsl_mc_device *dpni_dev)
}
#endif
- err = setup_irqs(dpni_dev);
+ err = dpaa2_eth_setup_irqs(dpni_dev);
if (err) {
netdev_warn(net_dev, "Failed to set link interrupt, fall back to polling\n");
- priv->poll_thread = kthread_run(poll_link_state, priv,
+ priv->poll_thread = kthread_run(dpaa2_eth_poll_link_state, priv,
"%s_poll_link", net_dev->name);
if (IS_ERR(priv->poll_thread)) {
dev_err(dev, "Error starting polling thread\n");
@@ -4076,7 +4076,7 @@ static int dpaa2_eth_probe(struct fsl_mc_device *dpni_dev)
else
fsl_mc_free_irqs(dpni_dev);
err_poll_thread:
- free_rings(priv);
+ dpaa2_eth_free_rings(priv);
err_alloc_rings:
err_csum:
err_netdev_init:
@@ -4086,13 +4086,13 @@ static int dpaa2_eth_probe(struct fsl_mc_device *dpni_dev)
err_alloc_percpu_extras:
free_percpu(priv->percpu_stats);
err_alloc_percpu_stats:
- del_ch_napi(priv);
+ dpaa2_eth_del_ch_napi(priv);
err_bind:
- free_dpbp(priv);
+ dpaa2_eth_free_dpbp(priv);
err_dpbp_setup:
- free_dpio(priv);
+ dpaa2_eth_free_dpio(priv);
err_dpio_setup:
- free_dpni(priv);
+ dpaa2_eth_free_dpni(priv);
err_dpni_setup:
fsl_mc_portal_free(priv->mc_io);
err_portal_alloc:
@@ -4126,15 +4126,15 @@ static int dpaa2_eth_remove(struct fsl_mc_device *ls_dev)
else
fsl_mc_free_irqs(ls_dev);
- free_rings(priv);
+ dpaa2_eth_free_rings(priv);
free_percpu(priv->sgt_cache);
free_percpu(priv->percpu_stats);
free_percpu(priv->percpu_extras);
- del_ch_napi(priv);
- free_dpbp(priv);
- free_dpio(priv);
- free_dpni(priv);
+ dpaa2_eth_del_ch_napi(priv);
+ dpaa2_eth_free_dpbp(priv);
+ dpaa2_eth_free_dpio(priv);
+ dpaa2_eth_free_dpni(priv);
fsl_mc_portal_free(priv->mc_io);
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c
index 8356f1f..26bd99b 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c
@@ -316,8 +316,8 @@ static void dpaa2_eth_get_ethtool_stats(struct net_device *net_dev,
dpaa2_mac_get_ethtool_stats(priv->mac, data + i);
}
-static int prep_eth_rule(struct ethhdr *eth_value, struct ethhdr *eth_mask,
- void *key, void *mask, u64 *fields)
+static int dpaa2_eth_prep_eth_rule(struct ethhdr *eth_value, struct ethhdr *eth_mask,
+ void *key, void *mask, u64 *fields)
{
int off;
@@ -345,9 +345,9 @@ static int prep_eth_rule(struct ethhdr *eth_value, struct ethhdr *eth_mask,
return 0;
}
-static int prep_uip_rule(struct ethtool_usrip4_spec *uip_value,
- struct ethtool_usrip4_spec *uip_mask,
- void *key, void *mask, u64 *fields)
+static int dpaa2_eth_prep_uip_rule(struct ethtool_usrip4_spec *uip_value,
+ struct ethtool_usrip4_spec *uip_mask,
+ void *key, void *mask, u64 *fields)
{
int off;
u32 tmp_value, tmp_mask;
@@ -400,9 +400,9 @@ static int prep_uip_rule(struct ethtool_usrip4_spec *uip_value,
return 0;
}
-static int prep_l4_rule(struct ethtool_tcpip4_spec *l4_value,
- struct ethtool_tcpip4_spec *l4_mask,
- void *key, void *mask, u8 l4_proto, u64 *fields)
+static int dpaa2_eth_prep_l4_rule(struct ethtool_tcpip4_spec *l4_value,
+ struct ethtool_tcpip4_spec *l4_mask,
+ void *key, void *mask, u8 l4_proto, u64 *fields)
{
int off;
@@ -451,9 +451,9 @@ static int prep_l4_rule(struct ethtool_tcpip4_spec *l4_value,
return 0;
}
-static int prep_ext_rule(struct ethtool_flow_ext *ext_value,
- struct ethtool_flow_ext *ext_mask,
- void *key, void *mask, u64 *fields)
+static int dpaa2_eth_prep_ext_rule(struct ethtool_flow_ext *ext_value,
+ struct ethtool_flow_ext *ext_mask,
+ void *key, void *mask, u64 *fields)
{
int off;
@@ -470,9 +470,9 @@ static int prep_ext_rule(struct ethtool_flow_ext *ext_value,
return 0;
}
-static int prep_mac_ext_rule(struct ethtool_flow_ext *ext_value,
- struct ethtool_flow_ext *ext_mask,
- void *key, void *mask, u64 *fields)
+static int dpaa2_eth_prep_mac_ext_rule(struct ethtool_flow_ext *ext_value,
+ struct ethtool_flow_ext *ext_mask,
+ void *key, void *mask, u64 *fields)
{
int off;
@@ -486,32 +486,32 @@ static int prep_mac_ext_rule(struct ethtool_flow_ext *ext_value,
return 0;
}
-static int prep_cls_rule(struct ethtool_rx_flow_spec *fs, void *key, void *mask,
- u64 *fields)
+static int dpaa2_eth_prep_cls_rule(struct ethtool_rx_flow_spec *fs, void *key,
+ void *mask, u64 *fields)
{
int err;
switch (fs->flow_type & 0xFF) {
case ETHER_FLOW:
- err = prep_eth_rule(&fs->h_u.ether_spec, &fs->m_u.ether_spec,
- key, mask, fields);
+ err = dpaa2_eth_prep_eth_rule(&fs->h_u.ether_spec, &fs->m_u.ether_spec,
+ key, mask, fields);
break;
case IP_USER_FLOW:
- err = prep_uip_rule(&fs->h_u.usr_ip4_spec,
- &fs->m_u.usr_ip4_spec, key, mask, fields);
+ err = dpaa2_eth_prep_uip_rule(&fs->h_u.usr_ip4_spec,
+ &fs->m_u.usr_ip4_spec, key, mask, fields);
break;
case TCP_V4_FLOW:
- err = prep_l4_rule(&fs->h_u.tcp_ip4_spec, &fs->m_u.tcp_ip4_spec,
- key, mask, IPPROTO_TCP, fields);
+ err = dpaa2_eth_prep_l4_rule(&fs->h_u.tcp_ip4_spec, &fs->m_u.tcp_ip4_spec,
+ key, mask, IPPROTO_TCP, fields);
break;
case UDP_V4_FLOW:
- err = prep_l4_rule(&fs->h_u.udp_ip4_spec, &fs->m_u.udp_ip4_spec,
- key, mask, IPPROTO_UDP, fields);
+ err = dpaa2_eth_prep_l4_rule(&fs->h_u.udp_ip4_spec, &fs->m_u.udp_ip4_spec,
+ key, mask, IPPROTO_UDP, fields);
break;
case SCTP_V4_FLOW:
- err = prep_l4_rule(&fs->h_u.sctp_ip4_spec,
- &fs->m_u.sctp_ip4_spec, key, mask,
- IPPROTO_SCTP, fields);
+ err = dpaa2_eth_prep_l4_rule(&fs->h_u.sctp_ip4_spec,
+ &fs->m_u.sctp_ip4_spec, key, mask,
+ IPPROTO_SCTP, fields);
break;
default:
return -EOPNOTSUPP;
@@ -521,14 +521,14 @@ static int prep_cls_rule(struct ethtool_rx_flow_spec *fs, void *key, void *mask,
return err;
if (fs->flow_type & FLOW_EXT) {
- err = prep_ext_rule(&fs->h_ext, &fs->m_ext, key, mask, fields);
+ err = dpaa2_eth_prep_ext_rule(&fs->h_ext, &fs->m_ext, key, mask, fields);
if (err)
return err;
}
if (fs->flow_type & FLOW_MAC_EXT) {
- err = prep_mac_ext_rule(&fs->h_ext, &fs->m_ext, key, mask,
- fields);
+ err = dpaa2_eth_prep_mac_ext_rule(&fs->h_ext, &fs->m_ext, key,
+ mask, fields);
if (err)
return err;
}
@@ -536,9 +536,9 @@ static int prep_cls_rule(struct ethtool_rx_flow_spec *fs, void *key, void *mask,
return 0;
}
-static int do_cls_rule(struct net_device *net_dev,
- struct ethtool_rx_flow_spec *fs,
- bool add)
+static int dpaa2_eth_do_cls_rule(struct net_device *net_dev,
+ struct ethtool_rx_flow_spec *fs,
+ bool add)
{
struct dpaa2_eth_priv *priv = netdev_priv(net_dev);
struct device *dev = net_dev->dev.parent;
@@ -561,7 +561,7 @@ static int do_cls_rule(struct net_device *net_dev,
return -ENOMEM;
/* Fill the key and mask memory areas */
- err = prep_cls_rule(fs, key_buf, key_buf + rule_cfg.key_size, &fields);
+ err = dpaa2_eth_prep_cls_rule(fs, key_buf, key_buf + rule_cfg.key_size, &fields);
if (err)
goto free_mem;
@@ -629,7 +629,7 @@ static int do_cls_rule(struct net_device *net_dev,
return err;
}
-static int num_rules(struct dpaa2_eth_priv *priv)
+static int dpaa2_eth_num_cls_rules(struct dpaa2_eth_priv *priv)
{
int i, rules = 0;
@@ -640,9 +640,9 @@ static int num_rules(struct dpaa2_eth_priv *priv)
return rules;
}
-static int update_cls_rule(struct net_device *net_dev,
- struct ethtool_rx_flow_spec *new_fs,
- unsigned int location)
+static int dpaa2_eth_update_cls_rule(struct net_device *net_dev,
+ struct ethtool_rx_flow_spec *new_fs,
+ unsigned int location)
{
struct dpaa2_eth_priv *priv = netdev_priv(net_dev);
struct dpaa2_eth_cls_rule *rule;
@@ -658,13 +658,14 @@ static int update_cls_rule(struct net_device *net_dev,
/* If a rule is present at the specified location, delete it. */
if (rule->in_use) {
- err = do_cls_rule(net_dev, &rule->fs, false);
+ err = dpaa2_eth_do_cls_rule(net_dev, &rule->fs, false);
if (err)
return err;
rule->in_use = 0;
- if (!dpaa2_eth_fs_mask_enabled(priv) && !num_rules(priv))
+ if (!dpaa2_eth_fs_mask_enabled(priv) &&
+ !dpaa2_eth_num_cls_rules(priv))
priv->rx_cls_fields = 0;
}
@@ -672,7 +673,7 @@ static int update_cls_rule(struct net_device *net_dev,
if (!new_fs)
return err;
- err = do_cls_rule(net_dev, new_fs, true);
+ err = dpaa2_eth_do_cls_rule(net_dev, new_fs, true);
if (err)
return err;
@@ -702,7 +703,7 @@ static int dpaa2_eth_get_rxnfc(struct net_device *net_dev,
break;
case ETHTOOL_GRXCLSRLCNT:
rxnfc->rule_cnt = 0;
- rxnfc->rule_cnt = num_rules(priv);
+ rxnfc->rule_cnt = dpaa2_eth_num_cls_rules(priv);
rxnfc->data = max_rules;
break;
case ETHTOOL_GRXCLSRULE:
@@ -744,10 +745,10 @@ static int dpaa2_eth_set_rxnfc(struct net_device *net_dev,
err = dpaa2_eth_set_hash(net_dev, rxnfc->data);
break;
case ETHTOOL_SRXCLSRLINS:
- err = update_cls_rule(net_dev, &rxnfc->fs, rxnfc->fs.location);
+ err = dpaa2_eth_update_cls_rule(net_dev, &rxnfc->fs, rxnfc->fs.location);
break;
case ETHTOOL_SRXCLSRLDEL:
- err = update_cls_rule(net_dev, NULL, rxnfc->fs.location);
+ err = dpaa2_eth_update_cls_rule(net_dev, NULL, rxnfc->fs.location);
break;
default:
err = -EOPNOTSUPP;
diff --git a/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c b/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c
index bf846b4..78e008b 100644
--- a/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c
+++ b/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c
@@ -562,10 +562,13 @@ fs_enet_start_xmit(struct sk_buff *skb, struct net_device *dev)
BD_ENET_TX_TC);
CBDS_SC(bdp, BD_ENET_TX_READY);
- if ((CBDR_SC(bdp) & BD_ENET_TX_WRAP) == 0)
- bdp++, curidx++;
- else
- bdp = fep->tx_bd_base, curidx = 0;
+ if ((CBDR_SC(bdp) & BD_ENET_TX_WRAP) == 0) {
+ bdp++;
+ curidx++;
+ } else {
+ bdp = fep->tx_bd_base;
+ curidx = 0;
+ }
len = skb_frag_size(frag);
CBDW_BUFADDR(bdp, skb_frag_dma_map(fep->dev, frag, 0, len,
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c b/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c
index b43dec0..b98244f 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c
@@ -13,8 +13,6 @@
#include "hns_dsaf_ppe.h"
#include "hns_dsaf_rcb.h"
-#define AE_NAME_PORT_ID_IDX 6
-
static struct hns_mac_cb *hns_get_mac_cb(struct hnae_handle *handle)
{
struct hnae_vf_cb *vf_cb = hns_ae_get_vf_cb(handle);
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c
index 22522f8..b13f3a5 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c
@@ -557,10 +557,7 @@ static int hns_nic_poll_rx_skb(struct hns_nic_ring_data *ring_data,
va = (unsigned char *)desc_cb->buf + desc_cb->page_offset;
/* prefetch first cache line of first page */
- prefetch(va);
-#if L1_CACHE_BYTES < 128
- prefetch(va + L1_CACHE_BYTES);
-#endif
+ net_prefetch(va);
skb = *out_skb = napi_alloc_skb(&ring_data->napi,
HNS_RX_HEAD_SIZE);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index a4f1d51..47ab2a5 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -3093,10 +3093,7 @@ static int hns3_handle_rx_bd(struct hns3_enet_ring *ring)
* lines. In such a case, single fetch would suffice to cache in the
* relevant part of the header.
*/
- prefetch(ring->va);
-#if L1_CACHE_BYTES < 128
- prefetch(ring->va + L1_CACHE_BYTES);
-#endif
+ net_prefetch(ring->va);
if (!skb) {
ret = hns3_alloc_skb(ring, length, ring->va);
diff --git a/drivers/net/ethernet/huawei/hinic/Makefile b/drivers/net/ethernet/huawei/hinic/Makefile
index 67b59d0..2f89119 100644
--- a/drivers/net/ethernet/huawei/hinic/Makefile
+++ b/drivers/net/ethernet/huawei/hinic/Makefile
@@ -4,4 +4,5 @@
hinic-y := hinic_main.o hinic_tx.o hinic_rx.o hinic_port.o hinic_hw_dev.o \
hinic_hw_io.o hinic_hw_qp.o hinic_hw_cmdq.o hinic_hw_wq.o \
hinic_hw_mgmt.o hinic_hw_api_cmd.o hinic_hw_eqs.o hinic_hw_if.o \
- hinic_common.o hinic_ethtool.o hinic_devlink.o hinic_hw_mbox.o hinic_sriov.o
+ hinic_common.o hinic_ethtool.o hinic_devlink.o hinic_hw_mbox.o \
+ hinic_sriov.o hinic_debugfs.o
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_debugfs.c b/drivers/net/ethernet/huawei/hinic/hinic_debugfs.c
new file mode 100644
index 0000000..19eb839
--- /dev/null
+++ b/drivers/net/ethernet/huawei/hinic/hinic_debugfs.c
@@ -0,0 +1,318 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Huawei HiNIC PCI Express Linux driver
+ * Copyright(c) 2017 Huawei Technologies Co., Ltd
+ */
+
+#include <linux/debugfs.h>
+#include <linux/device.h>
+
+#include "hinic_debugfs.h"
+
+static struct dentry *hinic_dbgfs_root;
+
+enum sq_dbg_info {
+ GLB_SQ_ID,
+ SQ_PI,
+ SQ_CI,
+ SQ_FI,
+ SQ_MSIX_ENTRY,
+};
+
+static char *sq_fields[] = {"glb_sq_id", "sq_pi", "sq_ci", "sq_fi", "sq_msix_entry"};
+
+static u64 hinic_dbg_get_sq_info(struct hinic_dev *nic_dev, struct hinic_sq *sq, int idx)
+{
+ struct hinic_wq *wq = sq->wq;
+
+ switch (idx) {
+ case GLB_SQ_ID:
+ return nic_dev->hwdev->func_to_io.global_qpn + sq->qid;
+ case SQ_PI:
+ return atomic_read(&wq->prod_idx) & wq->mask;
+ case SQ_CI:
+ return atomic_read(&wq->cons_idx) & wq->mask;
+ case SQ_FI:
+ return be16_to_cpu(*(__be16 *)(sq->hw_ci_addr)) & wq->mask;
+ case SQ_MSIX_ENTRY:
+ return sq->msix_entry;
+ }
+
+ return 0;
+}
+
+enum rq_dbg_info {
+ GLB_RQ_ID,
+ RQ_HW_PI,
+ RQ_SW_CI,
+ RQ_SW_PI,
+ RQ_MSIX_ENTRY,
+};
+
+static char *rq_fields[] = {"glb_rq_id", "rq_hw_pi", "rq_sw_ci", "rq_sw_pi", "rq_msix_entry"};
+
+static u64 hinic_dbg_get_rq_info(struct hinic_dev *nic_dev, struct hinic_rq *rq, int idx)
+{
+ struct hinic_wq *wq = rq->wq;
+
+ switch (idx) {
+ case GLB_RQ_ID:
+ return nic_dev->hwdev->func_to_io.global_qpn + rq->qid;
+ case RQ_HW_PI:
+ return be16_to_cpu(*(__be16 *)(rq->pi_virt_addr)) & wq->mask;
+ case RQ_SW_CI:
+ return atomic_read(&wq->cons_idx) & wq->mask;
+ case RQ_SW_PI:
+ return atomic_read(&wq->prod_idx) & wq->mask;
+ case RQ_MSIX_ENTRY:
+ return rq->msix_entry;
+ }
+
+ return 0;
+}
+
+enum func_tbl_info {
+ VALID,
+ RX_MODE,
+ MTU,
+ RQ_DEPTH,
+ QUEUE_NUM,
+};
+
+static char *func_table_fields[] = {"valid", "rx_mode", "mtu", "rq_depth", "cfg_q_num"};
+
+static int hinic_dbg_get_func_table(struct hinic_dev *nic_dev, int idx)
+{
+ struct tag_sml_funcfg_tbl *funcfg_table_elem;
+ struct hinic_cmd_lt_rd *read_data;
+ u16 out_size = sizeof(*read_data);
+ int err;
+
+ read_data = kzalloc(sizeof(*read_data), GFP_KERNEL);
+ if (!read_data)
+ return ~0;
+
+ read_data->node = TBL_ID_FUNC_CFG_SM_NODE;
+ read_data->inst = TBL_ID_FUNC_CFG_SM_INST;
+ read_data->entry_size = HINIC_FUNCTION_CONFIGURE_TABLE_SIZE;
+ read_data->lt_index = HINIC_HWIF_FUNC_IDX(nic_dev->hwdev->hwif);
+ read_data->len = HINIC_FUNCTION_CONFIGURE_TABLE_SIZE;
+
+ err = hinic_port_msg_cmd(nic_dev->hwdev, HINIC_PORT_CMD_RD_LINE_TBL, read_data,
+ sizeof(*read_data), read_data, &out_size);
+ if (err || out_size != sizeof(*read_data) || read_data->status) {
+ netif_err(nic_dev, drv, nic_dev->netdev,
+ "Failed to get func table, err: %d, status: 0x%x, out size: 0x%x\n",
+ err, read_data->status, out_size);
+ kfree(read_data);
+ return ~0;
+ }
+
+ funcfg_table_elem = (struct tag_sml_funcfg_tbl *)read_data->data;
+
+ switch (idx) {
+ case VALID:
+ return funcfg_table_elem->dw0.bs.valid;
+ case RX_MODE:
+ return funcfg_table_elem->dw0.bs.nic_rx_mode;
+ case MTU:
+ return funcfg_table_elem->dw1.bs.mtu;
+ case RQ_DEPTH:
+ return funcfg_table_elem->dw13.bs.cfg_rq_depth;
+ case QUEUE_NUM:
+ return funcfg_table_elem->dw13.bs.cfg_q_num;
+ }
+
+ kfree(read_data);
+
+ return ~0;
+}
+
+static ssize_t hinic_dbg_cmd_read(struct file *filp, char __user *buffer, size_t count,
+ loff_t *ppos)
+{
+ struct hinic_debug_priv *dbg;
+ char ret_buf[20];
+ int *desc;
+ u64 out;
+ int ret;
+
+ desc = filp->private_data;
+ dbg = container_of(desc, struct hinic_debug_priv, field_id[*desc]);
+
+ switch (dbg->type) {
+ case HINIC_DBG_SQ_INFO:
+ out = hinic_dbg_get_sq_info(dbg->dev, dbg->object, *desc);
+ break;
+
+ case HINIC_DBG_RQ_INFO:
+ out = hinic_dbg_get_rq_info(dbg->dev, dbg->object, *desc);
+ break;
+
+ case HINIC_DBG_FUNC_TABLE:
+ out = hinic_dbg_get_func_table(dbg->dev, *desc);
+ break;
+
+ default:
+ netif_warn(dbg->dev, drv, dbg->dev->netdev, "Invalid hinic debug cmd: %d\n",
+ dbg->type);
+ return -EINVAL;
+ }
+
+ ret = snprintf(ret_buf, sizeof(ret_buf), "0x%llx\n", out);
+
+ return simple_read_from_buffer(buffer, count, ppos, ret_buf, ret);
+}
+
+static const struct file_operations hinic_dbg_cmd_fops = {
+ .owner = THIS_MODULE,
+ .open = simple_open,
+ .read = hinic_dbg_cmd_read,
+};
+
+static int create_dbg_files(struct hinic_dev *dev, enum hinic_dbg_type type, void *data,
+ struct dentry *root, struct hinic_debug_priv **dbg, char **field,
+ int nfile)
+{
+ struct hinic_debug_priv *tmp;
+ int i;
+
+ tmp = kzalloc(sizeof(*tmp), GFP_KERNEL);
+ if (!tmp)
+ return -ENOMEM;
+
+ tmp->dev = dev;
+ tmp->object = data;
+ tmp->type = type;
+ tmp->root = root;
+
+ for (i = 0; i < nfile; i++) {
+ tmp->field_id[i] = i;
+ debugfs_create_file(field[i], 0400, root, &tmp->field_id[i], &hinic_dbg_cmd_fops);
+ }
+
+ *dbg = tmp;
+
+ return 0;
+}
+
+static void rem_dbg_files(struct hinic_debug_priv *dbg)
+{
+ if (dbg->type != HINIC_DBG_FUNC_TABLE)
+ debugfs_remove_recursive(dbg->root);
+
+ kfree(dbg);
+}
+
+int hinic_sq_debug_add(struct hinic_dev *dev, u16 sq_id)
+{
+ struct hinic_sq *sq;
+ struct dentry *root;
+ char sub_dir[16];
+
+ sq = dev->txqs[sq_id].sq;
+
+ sprintf(sub_dir, "0x%x", sq_id);
+
+ root = debugfs_create_dir(sub_dir, dev->sq_dbgfs);
+
+ return create_dbg_files(dev, HINIC_DBG_SQ_INFO, sq, root, &sq->dbg, sq_fields,
+ ARRAY_SIZE(sq_fields));
+}
+
+void hinic_sq_debug_rem(struct hinic_sq *sq)
+{
+ if (sq->dbg)
+ rem_dbg_files(sq->dbg);
+}
+
+int hinic_rq_debug_add(struct hinic_dev *dev, u16 rq_id)
+{
+ struct hinic_rq *rq;
+ struct dentry *root;
+ char sub_dir[16];
+
+ rq = dev->rxqs[rq_id].rq;
+
+ sprintf(sub_dir, "0x%x", rq_id);
+
+ root = debugfs_create_dir(sub_dir, dev->rq_dbgfs);
+
+ return create_dbg_files(dev, HINIC_DBG_RQ_INFO, rq, root, &rq->dbg, rq_fields,
+ ARRAY_SIZE(rq_fields));
+}
+
+void hinic_rq_debug_rem(struct hinic_rq *rq)
+{
+ if (rq->dbg)
+ rem_dbg_files(rq->dbg);
+}
+
+int hinic_func_table_debug_add(struct hinic_dev *dev)
+{
+ if (HINIC_IS_VF(dev->hwdev->hwif))
+ return 0;
+
+ return create_dbg_files(dev, HINIC_DBG_FUNC_TABLE, dev, dev->func_tbl_dbgfs, &dev->dbg,
+ func_table_fields, ARRAY_SIZE(func_table_fields));
+}
+
+void hinic_func_table_debug_rem(struct hinic_dev *dev)
+{
+ if (!HINIC_IS_VF(dev->hwdev->hwif) && dev->dbg)
+ rem_dbg_files(dev->dbg);
+}
+
+void hinic_sq_dbgfs_init(struct hinic_dev *nic_dev)
+{
+ nic_dev->sq_dbgfs = debugfs_create_dir("SQs", nic_dev->dbgfs_root);
+}
+
+void hinic_sq_dbgfs_uninit(struct hinic_dev *nic_dev)
+{
+ debugfs_remove_recursive(nic_dev->sq_dbgfs);
+}
+
+void hinic_rq_dbgfs_init(struct hinic_dev *nic_dev)
+{
+ nic_dev->rq_dbgfs = debugfs_create_dir("RQs", nic_dev->dbgfs_root);
+}
+
+void hinic_rq_dbgfs_uninit(struct hinic_dev *nic_dev)
+{
+ debugfs_remove_recursive(nic_dev->rq_dbgfs);
+}
+
+void hinic_func_tbl_dbgfs_init(struct hinic_dev *nic_dev)
+{
+ if (!HINIC_IS_VF(nic_dev->hwdev->hwif))
+ nic_dev->func_tbl_dbgfs = debugfs_create_dir("func_table", nic_dev->dbgfs_root);
+}
+
+void hinic_func_tbl_dbgfs_uninit(struct hinic_dev *nic_dev)
+{
+ if (!HINIC_IS_VF(nic_dev->hwdev->hwif))
+ debugfs_remove_recursive(nic_dev->func_tbl_dbgfs);
+}
+
+void hinic_dbg_init(struct hinic_dev *nic_dev)
+{
+ nic_dev->dbgfs_root = debugfs_create_dir(pci_name(nic_dev->hwdev->hwif->pdev),
+ hinic_dbgfs_root);
+}
+
+void hinic_dbg_uninit(struct hinic_dev *nic_dev)
+{
+ debugfs_remove_recursive(nic_dev->dbgfs_root);
+ nic_dev->dbgfs_root = NULL;
+}
+
+void hinic_dbg_register_debugfs(const char *debugfs_dir_name)
+{
+ hinic_dbgfs_root = debugfs_create_dir(debugfs_dir_name, NULL);
+}
+
+void hinic_dbg_unregister_debugfs(void)
+{
+ debugfs_remove_recursive(hinic_dbgfs_root);
+ hinic_dbgfs_root = NULL;
+}
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_debugfs.h b/drivers/net/ethernet/huawei/hinic/hinic_debugfs.h
new file mode 100644
index 0000000..e9e00cf
--- /dev/null
+++ b/drivers/net/ethernet/huawei/hinic/hinic_debugfs.h
@@ -0,0 +1,114 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Huawei HiNIC PCI Express Linux driver
+ * Copyright(c) 2017 Huawei Technologies Co., Ltd
+ */
+
+#ifndef HINIC_DEBUGFS_H
+#define HINIC_DEBUGFS_H
+
+#include "hinic_dev.h"
+
+#define TBL_ID_FUNC_CFG_SM_NODE 11
+#define TBL_ID_FUNC_CFG_SM_INST 1
+
+#define HINIC_FUNCTION_CONFIGURE_TABLE_SIZE 64
+#define HINIC_FUNCTION_CONFIGURE_TABLE 1
+
+struct hinic_cmd_lt_rd {
+ u8 status;
+ u8 version;
+ u8 rsvd0[6];
+
+ unsigned char node;
+ unsigned char inst;
+ unsigned char entry_size;
+ unsigned char rsvd;
+ unsigned int lt_index;
+ unsigned int offset;
+ unsigned int len;
+ unsigned char data[100];
+};
+
+struct tag_sml_funcfg_tbl {
+ union {
+ struct {
+ u32 rsvd0 :8;
+ u32 nic_rx_mode :5;
+ u32 rsvd1 :18;
+ u32 valid :1;
+ } bs;
+
+ u32 value;
+ } dw0;
+
+ union {
+ struct {
+ u32 vlan_id :12;
+ u32 vlan_mode :3;
+ u32 fast_recycled_mode :1;
+ u32 mtu :16;
+ } bs;
+
+ u32 value;
+ } dw1;
+
+ u32 dw2;
+ u32 dw3;
+ u32 dw4;
+ u32 dw5;
+ u32 dw6;
+ u32 dw7;
+ u32 dw8;
+ u32 dw9;
+ u32 dw10;
+ u32 dw11;
+ u32 dw12;
+
+ union {
+ struct {
+ u32 rsvd2 :15;
+ u32 cfg_q_num :9;
+ u32 cfg_rq_depth :6;
+ u32 vhd_type :2;
+ } bs;
+
+ u32 value;
+ } dw13;
+
+ u32 dw14;
+ u32 dw15;
+};
+
+int hinic_sq_debug_add(struct hinic_dev *dev, u16 sq_id);
+
+void hinic_sq_debug_rem(struct hinic_sq *sq);
+
+int hinic_rq_debug_add(struct hinic_dev *dev, u16 rq_id);
+
+void hinic_rq_debug_rem(struct hinic_rq *rq);
+
+int hinic_func_table_debug_add(struct hinic_dev *dev);
+
+void hinic_func_table_debug_rem(struct hinic_dev *dev);
+
+void hinic_sq_dbgfs_init(struct hinic_dev *nic_dev);
+
+void hinic_sq_dbgfs_uninit(struct hinic_dev *nic_dev);
+
+void hinic_rq_dbgfs_init(struct hinic_dev *nic_dev);
+
+void hinic_rq_dbgfs_uninit(struct hinic_dev *nic_dev);
+
+void hinic_func_tbl_dbgfs_init(struct hinic_dev *nic_dev);
+
+void hinic_func_tbl_dbgfs_uninit(struct hinic_dev *nic_dev);
+
+void hinic_dbg_init(struct hinic_dev *nic_dev);
+
+void hinic_dbg_uninit(struct hinic_dev *nic_dev);
+
+void hinic_dbg_register_debugfs(const char *debugfs_dir_name);
+
+void hinic_dbg_unregister_debugfs(void);
+
+#endif
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_dev.h b/drivers/net/ethernet/huawei/hinic/hinic_dev.h
index 0a1e20e..fb3e891 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_dev.h
+++ b/drivers/net/ethernet/huawei/hinic/hinic_dev.h
@@ -58,6 +58,20 @@ struct hinic_intr_coal_info {
u8 resend_timer_cfg;
};
+enum hinic_dbg_type {
+ HINIC_DBG_SQ_INFO,
+ HINIC_DBG_RQ_INFO,
+ HINIC_DBG_FUNC_TABLE,
+};
+
+struct hinic_debug_priv {
+ struct hinic_dev *dev;
+ void *object;
+ enum hinic_dbg_type type;
+ struct dentry *root;
+ int field_id[64];
+};
+
struct hinic_dev {
struct net_device *netdev;
struct hinic_hwdev *hwdev;
@@ -97,6 +111,12 @@ struct hinic_dev {
int lb_test_rx_idx;
int lb_pkt_len;
u8 *lb_test_rx_buf;
+
+ struct dentry *dbgfs_root;
+ struct dentry *sq_dbgfs;
+ struct dentry *rq_dbgfs;
+ struct dentry *func_tbl_dbgfs;
+ struct hinic_debug_priv *dbg;
struct devlink *devlink;
bool cable_unplugged;
bool module_unrecognized;
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c
index 0c73776..2396851 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c
@@ -465,6 +465,7 @@ int hinic_hwdev_ifup(struct hinic_hwdev *hwdev, u16 sq_depth, u16 rq_depth)
func_to_io->hwdev = hwdev;
func_to_io->sq_depth = sq_depth;
func_to_io->rq_depth = rq_depth;
+ func_to_io->global_qpn = base_qpn;
err = hinic_io_init(func_to_io, hwif, nic_cap->max_qps, num_ceqs,
ceq_msix_entries);
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h
index 701eb81..416492e 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h
@@ -96,6 +96,8 @@ enum hinic_port_cmd {
HINIC_PORT_CMD_RSS_TEMP_MGR = 49,
+ HINIC_PORT_CMD_RD_LINE_TBL = 57,
+
HINIC_PORT_CMD_RSS_CFG = 66,
HINIC_PORT_CMD_FWCTXT_INIT = 69,
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_io.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_io.c
index 3e3fa74..6772d89 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_io.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_io.c
@@ -305,6 +305,7 @@ static int init_qp(struct hinic_func_to_io *func_to_io,
func_to_io->sq_db[q_id] = db_base;
+ qp->sq.qid = q_id;
err = hinic_init_sq(&qp->sq, hwif, &func_to_io->sq_wq[q_id],
sq_msix_entry,
CI_ADDR(func_to_io->ci_addr_base, q_id),
@@ -314,6 +315,7 @@ static int init_qp(struct hinic_func_to_io *func_to_io,
goto err_sq_init;
}
+ qp->rq.qid = q_id;
err = hinic_init_rq(&qp->rq, hwif, &func_to_io->rq_wq[q_id],
rq_msix_entry);
if (err) {
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_io.h b/drivers/net/ethernet/huawei/hinic/hinic_hw_io.h
index ee6d607..52159a9 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_io.h
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_io.h
@@ -59,6 +59,7 @@ struct hinic_nic_cfg {
struct hinic_func_to_io {
struct hinic_hwif *hwif;
struct hinic_hwdev *hwdev;
+ u16 global_qpn;
struct hinic_ceqs ceqs;
struct hinic_wqs wqs;
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.h b/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.h
index ca3e2d0..0dfa51a 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.h
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.h
@@ -81,6 +81,8 @@ struct hinic_sq {
struct hinic_wq *wq;
+ u16 qid;
+
u32 irq;
u16 msix_entry;
@@ -90,6 +92,7 @@ struct hinic_sq {
void __iomem *db_base;
struct sk_buff **saved_skb;
+ struct hinic_debug_priv *dbg;
};
struct hinic_rq {
@@ -97,6 +100,8 @@ struct hinic_rq {
struct hinic_wq *wq;
+ u16 qid;
+
struct cpumask affinity_mask;
u32 irq;
u16 msix_entry;
@@ -110,6 +115,7 @@ struct hinic_rq {
u16 *pi_virt_addr;
dma_addr_t pi_dma_addr;
+ struct hinic_debug_priv *dbg;
};
struct hinic_qp {
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_main.c b/drivers/net/ethernet/huawei/hinic/hinic_main.c
index 501056f..797c55a 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_main.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_main.c
@@ -24,6 +24,7 @@
#include <linux/delay.h>
#include <linux/err.h>
+#include "hinic_debugfs.h"
#include "hinic_hw_qp.h"
#include "hinic_hw_dev.h"
#include "hinic_devlink.h"
@@ -153,6 +154,8 @@ static int create_txqs(struct hinic_dev *nic_dev)
if (!nic_dev->txqs)
return -ENOMEM;
+ hinic_sq_dbgfs_init(nic_dev);
+
for (i = 0; i < num_txqs; i++) {
struct hinic_sq *sq = hinic_hwdev_get_sq(nic_dev->hwdev, i);
@@ -162,13 +165,27 @@ static int create_txqs(struct hinic_dev *nic_dev)
"Failed to init Txq\n");
goto err_init_txq;
}
+
+ err = hinic_sq_debug_add(nic_dev, i);
+ if (err) {
+ netif_err(nic_dev, drv, netdev,
+ "Failed to add SQ%d debug\n", i);
+ goto err_add_sq_dbg;
+ }
+
}
return 0;
+err_add_sq_dbg:
+ hinic_clean_txq(&nic_dev->txqs[i]);
err_init_txq:
- for (j = 0; j < i; j++)
+ for (j = 0; j < i; j++) {
+ hinic_sq_debug_rem(nic_dev->txqs[j].sq);
hinic_clean_txq(&nic_dev->txqs[j]);
+ }
+
+ hinic_sq_dbgfs_uninit(nic_dev);
devm_kfree(&netdev->dev, nic_dev->txqs);
return err;
@@ -186,8 +203,12 @@ static void free_txqs(struct hinic_dev *nic_dev)
if (!nic_dev->txqs)
return;
- for (i = 0; i < num_txqs; i++)
+ for (i = 0; i < num_txqs; i++) {
+ hinic_sq_debug_rem(nic_dev->txqs[i].sq);
hinic_clean_txq(&nic_dev->txqs[i]);
+ }
+
+ hinic_sq_dbgfs_uninit(nic_dev);
devm_kfree(&netdev->dev, nic_dev->txqs);
nic_dev->txqs = NULL;
@@ -213,6 +234,8 @@ static int create_rxqs(struct hinic_dev *nic_dev)
if (!nic_dev->rxqs)
return -ENOMEM;
+ hinic_rq_dbgfs_init(nic_dev);
+
for (i = 0; i < num_rxqs; i++) {
struct hinic_rq *rq = hinic_hwdev_get_rq(nic_dev->hwdev, i);
@@ -222,13 +245,26 @@ static int create_rxqs(struct hinic_dev *nic_dev)
"Failed to init rxq\n");
goto err_init_rxq;
}
+
+ err = hinic_rq_debug_add(nic_dev, i);
+ if (err) {
+ netif_err(nic_dev, drv, netdev,
+ "Failed to add RQ%d debug\n", i);
+ goto err_add_rq_dbg;
+ }
}
return 0;
+err_add_rq_dbg:
+ hinic_clean_rxq(&nic_dev->rxqs[i]);
err_init_rxq:
- for (j = 0; j < i; j++)
+ for (j = 0; j < i; j++) {
+ hinic_rq_debug_rem(nic_dev->rxqs[j].rq);
hinic_clean_rxq(&nic_dev->rxqs[j]);
+ }
+
+ hinic_rq_dbgfs_uninit(nic_dev);
devm_kfree(&netdev->dev, nic_dev->rxqs);
return err;
@@ -246,8 +282,12 @@ static void free_rxqs(struct hinic_dev *nic_dev)
if (!nic_dev->rxqs)
return;
- for (i = 0; i < num_rxqs; i++)
+ for (i = 0; i < num_rxqs; i++) {
+ hinic_rq_debug_rem(nic_dev->rxqs[i].rq);
hinic_clean_rxq(&nic_dev->rxqs[i]);
+ }
+
+ hinic_rq_dbgfs_uninit(nic_dev);
devm_kfree(&netdev->dev, nic_dev->rxqs);
nic_dev->rxqs = NULL;
@@ -1260,6 +1300,16 @@ static int nic_dev_init(struct pci_dev *pdev)
goto err_init_intr;
}
+ hinic_dbg_init(nic_dev);
+
+ hinic_func_tbl_dbgfs_init(nic_dev);
+
+ err = hinic_func_table_debug_add(nic_dev);
+ if (err) {
+ dev_err(&pdev->dev, "Failed to add func_table debug\n");
+ goto err_add_func_table_dbg;
+ }
+
err = register_netdev(netdev);
if (err) {
dev_err(&pdev->dev, "Failed to register netdev\n");
@@ -1269,6 +1319,10 @@ static int nic_dev_init(struct pci_dev *pdev)
return 0;
err_reg_netdev:
+ hinic_func_table_debug_rem(nic_dev);
+err_add_func_table_dbg:
+ hinic_func_tbl_dbgfs_uninit(nic_dev);
+ hinic_dbg_uninit(nic_dev);
hinic_free_intr_coalesce(nic_dev);
err_init_intr:
err_set_pfc:
@@ -1391,6 +1445,12 @@ static void hinic_remove(struct pci_dev *pdev)
unregister_netdev(netdev);
+ hinic_func_table_debug_rem(nic_dev);
+
+ hinic_func_tbl_dbgfs_uninit(nic_dev);
+
+ hinic_dbg_uninit(nic_dev);
+
hinic_free_intr_coalesce(nic_dev);
hinic_port_del_mac(nic_dev, netdev->dev_addr, 0);
@@ -1445,4 +1505,17 @@ static struct pci_driver hinic_driver = {
.sriov_configure = hinic_pci_sriov_configure,
};
-module_pci_driver(hinic_driver);
+static int __init hinic_module_init(void)
+{
+ hinic_dbg_register_debugfs(HINIC_DRV_NAME);
+ return pci_register_driver(&hinic_driver);
+}
+
+static void __exit hinic_module_exit(void)
+{
+ pci_unregister_driver(&hinic_driver);
+ hinic_dbg_unregister_debugfs();
+}
+
+module_init(hinic_module_init);
+module_exit(hinic_module_exit);
diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index d3a7743..6b619c1 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -104,8 +104,7 @@ static int send_login(struct ibmvnic_adapter *adapter);
static void send_cap_queries(struct ibmvnic_adapter *adapter);
static int init_sub_crqs(struct ibmvnic_adapter *);
static int init_sub_crq_irqs(struct ibmvnic_adapter *adapter);
-static int ibmvnic_init(struct ibmvnic_adapter *);
-static int ibmvnic_reset_init(struct ibmvnic_adapter *);
+static int ibmvnic_reset_init(struct ibmvnic_adapter *, bool reset);
static void release_crq_queue(struct ibmvnic_adapter *);
static int __ibmvnic_set_mac(struct net_device *, u8 *);
static int init_crq_queue(struct ibmvnic_adapter *adapter);
@@ -297,8 +296,7 @@ static void deactivate_rx_pools(struct ibmvnic_adapter *adapter)
{
int i;
- for (i = 0; i < be32_to_cpu(adapter->login_rsp_buf->num_rxadd_subcrqs);
- i++)
+ for (i = 0; i < adapter->num_active_rx_pools; i++)
adapter->rx_pool[i].active = 0;
}
@@ -306,6 +304,7 @@ static void replenish_rx_pool(struct ibmvnic_adapter *adapter,
struct ibmvnic_rx_pool *pool)
{
int count = pool->size - atomic_read(&pool->available);
+ u64 handle = adapter->rx_scrq[pool->index]->handle;
struct device *dev = &adapter->vdev->dev;
int buffers_added = 0;
unsigned long lpar_rc;
@@ -314,7 +313,6 @@ static void replenish_rx_pool(struct ibmvnic_adapter *adapter,
unsigned int offset;
dma_addr_t dma_addr;
unsigned char *dst;
- u64 *handle_array;
int shift = 0;
int index;
int i;
@@ -322,10 +320,6 @@ static void replenish_rx_pool(struct ibmvnic_adapter *adapter,
if (!pool->active)
return;
- handle_array = (u64 *)((u8 *)(adapter->login_rsp_buf) +
- be32_to_cpu(adapter->login_rsp_buf->
- off_rxadd_subcrqs));
-
for (i = 0; i < count; ++i) {
skb = alloc_skb(pool->buff_size, GFP_ATOMIC);
if (!skb) {
@@ -369,8 +363,7 @@ static void replenish_rx_pool(struct ibmvnic_adapter *adapter,
#endif
sub_crq.rx_add.len = cpu_to_be32(pool->buff_size << shift);
- lpar_rc = send_subcrq(adapter, handle_array[pool->index],
- &sub_crq);
+ lpar_rc = send_subcrq(adapter, handle, &sub_crq);
if (lpar_rc != H_SUCCESS)
goto failure;
@@ -407,8 +400,7 @@ static void replenish_pools(struct ibmvnic_adapter *adapter)
int i;
adapter->replenish_task_cycles++;
- for (i = 0; i < be32_to_cpu(adapter->login_rsp_buf->num_rxadd_subcrqs);
- i++) {
+ for (i = 0; i < adapter->num_active_rx_pools; i++) {
if (adapter->rx_pool[i].active)
replenish_rx_pool(adapter, &adapter->rx_pool[i]);
}
@@ -475,25 +467,23 @@ static int init_stats_token(struct ibmvnic_adapter *adapter)
static int reset_rx_pools(struct ibmvnic_adapter *adapter)
{
struct ibmvnic_rx_pool *rx_pool;
+ u64 buff_size;
int rx_scrqs;
int i, j, rc;
- u64 *size_array;
if (!adapter->rx_pool)
return -1;
- size_array = (u64 *)((u8 *)(adapter->login_rsp_buf) +
- be32_to_cpu(adapter->login_rsp_buf->off_rxadd_buff_size));
-
- rx_scrqs = be32_to_cpu(adapter->login_rsp_buf->num_rxadd_subcrqs);
+ buff_size = adapter->cur_rx_buf_sz;
+ rx_scrqs = adapter->num_active_rx_pools;
for (i = 0; i < rx_scrqs; i++) {
rx_pool = &adapter->rx_pool[i];
netdev_dbg(adapter->netdev, "Re-setting rx_pool[%d]\n", i);
- if (rx_pool->buff_size != be64_to_cpu(size_array[i])) {
+ if (rx_pool->buff_size != buff_size) {
free_long_term_buff(adapter, &rx_pool->long_term_buff);
- rx_pool->buff_size = be64_to_cpu(size_array[i]);
+ rx_pool->buff_size = buff_size;
rc = alloc_long_term_buff(adapter,
&rx_pool->long_term_buff,
rx_pool->size *
@@ -561,13 +551,11 @@ static int init_rx_pools(struct net_device *netdev)
struct device *dev = &adapter->vdev->dev;
struct ibmvnic_rx_pool *rx_pool;
int rxadd_subcrqs;
- u64 *size_array;
+ u64 buff_size;
int i, j;
- rxadd_subcrqs =
- be32_to_cpu(adapter->login_rsp_buf->num_rxadd_subcrqs);
- size_array = (u64 *)((u8 *)(adapter->login_rsp_buf) +
- be32_to_cpu(adapter->login_rsp_buf->off_rxadd_buff_size));
+ rxadd_subcrqs = adapter->num_active_rx_scrqs;
+ buff_size = adapter->cur_rx_buf_sz;
adapter->rx_pool = kcalloc(rxadd_subcrqs,
sizeof(struct ibmvnic_rx_pool),
@@ -585,11 +573,11 @@ static int init_rx_pools(struct net_device *netdev)
netdev_dbg(adapter->netdev,
"Initializing rx_pool[%d], %lld buffs, %lld bytes each\n",
i, adapter->req_rx_add_entries_per_subcrq,
- be64_to_cpu(size_array[i]));
+ buff_size);
rx_pool->size = adapter->req_rx_add_entries_per_subcrq;
rx_pool->index = i;
- rx_pool->buff_size = be64_to_cpu(size_array[i]);
+ rx_pool->buff_size = buff_size;
rx_pool->active = 1;
rx_pool->free_map = kcalloc(rx_pool->size, sizeof(int),
@@ -655,7 +643,7 @@ static int reset_tx_pools(struct ibmvnic_adapter *adapter)
if (!adapter->tx_pool)
return -1;
- tx_scrqs = be32_to_cpu(adapter->login_rsp_buf->num_txsubm_subcrqs);
+ tx_scrqs = adapter->num_active_tx_pools;
for (i = 0; i < tx_scrqs; i++) {
rc = reset_one_tx_pool(adapter, &adapter->tso_pool[i]);
if (rc)
@@ -744,7 +732,7 @@ static int init_tx_pools(struct net_device *netdev)
int tx_subcrqs;
int i, rc;
- tx_subcrqs = be32_to_cpu(adapter->login_rsp_buf->num_txsubm_subcrqs);
+ tx_subcrqs = adapter->num_active_tx_scrqs;
adapter->tx_pool = kcalloc(tx_subcrqs,
sizeof(struct ibmvnic_tx_pool), GFP_KERNEL);
if (!adapter->tx_pool)
@@ -980,7 +968,7 @@ static int set_link_state(struct ibmvnic_adapter *adapter, u8 link_state)
return -1;
}
- if (adapter->init_done_rc == 1) {
+ if (adapter->init_done_rc == PARTIALSUCCESS) {
/* Partuial success, delay and re-send */
mdelay(1000);
resend = true;
@@ -1530,9 +1518,9 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
unsigned int offset;
int num_entries = 1;
unsigned char *dst;
- u64 *handle_array;
int index = 0;
u8 proto = 0;
+ u64 handle;
netdev_tx_t ret = NETDEV_TX_OK;
if (test_bit(0, &adapter->resetting)) {
@@ -1559,8 +1547,7 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
tx_scrq = adapter->tx_scrq[queue_num];
txq = netdev_get_tx_queue(netdev, skb_get_queue_mapping(skb));
- handle_array = (u64 *)((u8 *)(adapter->login_rsp_buf) +
- be32_to_cpu(adapter->login_rsp_buf->off_txsubm_subcrqs));
+ handle = tx_scrq->handle;
index = tx_pool->free_map[tx_pool->consumer_index];
@@ -1672,14 +1659,14 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
ret = NETDEV_TX_OK;
goto tx_err_out;
}
- lpar_rc = send_subcrq_indirect(adapter, handle_array[queue_num],
+ lpar_rc = send_subcrq_indirect(adapter, handle,
(u64)tx_buff->indir_dma,
(u64)num_entries);
dma_unmap_single(dev, tx_buff->indir_dma,
sizeof(tx_buff->indir_arr), DMA_TO_DEVICE);
} else {
tx_buff->num_entries = num_entries;
- lpar_rc = send_subcrq(adapter, handle_array[queue_num],
+ lpar_rc = send_subcrq(adapter, handle,
&tx_crq);
}
if (lpar_rc != H_SUCCESS) {
@@ -1874,7 +1861,7 @@ static int do_change_param_reset(struct ibmvnic_adapter *adapter,
return rc;
}
- rc = ibmvnic_reset_init(adapter);
+ rc = ibmvnic_reset_init(adapter, true);
if (rc)
return IBMVNIC_INIT_FAILED;
@@ -1992,7 +1979,7 @@ static int do_reset(struct ibmvnic_adapter *adapter,
goto out;
}
- rc = ibmvnic_reset_init(adapter);
+ rc = ibmvnic_reset_init(adapter, true);
if (rc) {
rc = IBMVNIC_INIT_FAILED;
goto out;
@@ -2106,7 +2093,7 @@ static int do_hard_reset(struct ibmvnic_adapter *adapter,
return rc;
}
- rc = ibmvnic_init(adapter);
+ rc = ibmvnic_reset_init(adapter, false);
if (rc)
return rc;
@@ -3581,8 +3568,7 @@ static int ibmvnic_send_crq(struct ibmvnic_adapter *adapter,
if (rc) {
if (rc == H_CLOSED) {
dev_warn(dev, "CRQ Queue closed\n");
- if (test_bit(0, &adapter->resetting))
- ibmvnic_reset(adapter, VNIC_RESET_FATAL);
+ /* do not reset, report the fail, wait for passive init from server */
}
dev_warn(dev, "Send error (rc=%d)\n", rc);
@@ -3593,14 +3579,31 @@ static int ibmvnic_send_crq(struct ibmvnic_adapter *adapter,
static int ibmvnic_send_crq_init(struct ibmvnic_adapter *adapter)
{
+ struct device *dev = &adapter->vdev->dev;
union ibmvnic_crq crq;
+ int retries = 100;
+ int rc;
memset(&crq, 0, sizeof(crq));
crq.generic.first = IBMVNIC_CRQ_INIT_CMD;
crq.generic.cmd = IBMVNIC_CRQ_INIT;
netdev_dbg(adapter->netdev, "Sending CRQ init\n");
- return ibmvnic_send_crq(adapter, &crq);
+ do {
+ rc = ibmvnic_send_crq(adapter, &crq);
+ if (rc != H_CLOSED)
+ break;
+ retries--;
+ msleep(50);
+
+ } while (retries > 0);
+
+ if (rc) {
+ dev_err(dev, "Failed to send init request, rc = %d\n", rc);
+ return rc;
+ }
+
+ return 0;
}
static int send_version_xchg(struct ibmvnic_adapter *adapter)
@@ -4305,6 +4308,11 @@ static int handle_login_rsp(union ibmvnic_crq *login_rsp_crq,
struct net_device *netdev = adapter->netdev;
struct ibmvnic_login_rsp_buffer *login_rsp = adapter->login_rsp_buf;
struct ibmvnic_login_buffer *login = adapter->login_buf;
+ u64 *tx_handle_array;
+ u64 *rx_handle_array;
+ int num_tx_pools;
+ int num_rx_pools;
+ u64 *size_array;
int i;
dma_unmap_single(dev, adapter->login_buf_token, adapter->login_buf_sz,
@@ -4339,6 +4347,30 @@ static int handle_login_rsp(union ibmvnic_crq *login_rsp_crq,
ibmvnic_remove(adapter->vdev);
return -EIO;
}
+ size_array = (u64 *)((u8 *)(adapter->login_rsp_buf) +
+ be32_to_cpu(adapter->login_rsp_buf->off_rxadd_buff_size));
+ /* variable buffer sizes are not supported, so just read the
+ * first entry.
+ */
+ adapter->cur_rx_buf_sz = be64_to_cpu(size_array[0]);
+
+ num_tx_pools = be32_to_cpu(adapter->login_rsp_buf->num_txsubm_subcrqs);
+ num_rx_pools = be32_to_cpu(adapter->login_rsp_buf->num_rxadd_subcrqs);
+
+ tx_handle_array = (u64 *)((u8 *)(adapter->login_rsp_buf) +
+ be32_to_cpu(adapter->login_rsp_buf->off_txsubm_subcrqs));
+ rx_handle_array = (u64 *)((u8 *)(adapter->login_rsp_buf) +
+ be32_to_cpu(adapter->login_rsp_buf->off_rxadd_subcrqs));
+
+ for (i = 0; i < num_tx_pools; i++)
+ adapter->tx_scrq[i]->handle = tx_handle_array[i];
+
+ for (i = 0; i < num_rx_pools; i++)
+ adapter->rx_scrq[i]->handle = rx_handle_array[i];
+
+ adapter->num_active_tx_scrqs = num_tx_pools;
+ adapter->num_active_rx_scrqs = num_rx_pools;
+ release_login_rsp_buffer(adapter);
release_login_buffer(adapter);
complete(&adapter->init_done);
@@ -4984,7 +5016,7 @@ static int init_crq_queue(struct ibmvnic_adapter *adapter)
return retrc;
}
-static int ibmvnic_reset_init(struct ibmvnic_adapter *adapter)
+static int ibmvnic_reset_init(struct ibmvnic_adapter *adapter, bool reset)
{
struct device *dev = &adapter->vdev->dev;
unsigned long timeout = msecs_to_jiffies(30000);
@@ -4993,12 +5025,19 @@ static int ibmvnic_reset_init(struct ibmvnic_adapter *adapter)
adapter->from_passive_init = false;
- old_num_rx_queues = adapter->req_rx_queues;
- old_num_tx_queues = adapter->req_tx_queues;
+ if (reset) {
+ old_num_rx_queues = adapter->req_rx_queues;
+ old_num_tx_queues = adapter->req_tx_queues;
+ reinit_completion(&adapter->init_done);
+ }
- reinit_completion(&adapter->init_done);
adapter->init_done_rc = 0;
- ibmvnic_send_crq_init(adapter);
+ rc = ibmvnic_send_crq_init(adapter);
+ if (rc) {
+ dev_err(dev, "Send crq init failed with error %d\n", rc);
+ return rc;
+ }
+
if (!wait_for_completion_timeout(&adapter->init_done, timeout)) {
dev_err(dev, "Initialization sequence timed out\n");
return -1;
@@ -5009,13 +5048,8 @@ static int ibmvnic_reset_init(struct ibmvnic_adapter *adapter)
return adapter->init_done_rc;
}
- if (adapter->from_passive_init) {
- adapter->state = VNIC_OPEN;
- adapter->from_passive_init = false;
- return -1;
- }
-
- if (test_bit(0, &adapter->resetting) && !adapter->wait_for_reset &&
+ if (reset &&
+ test_bit(0, &adapter->resetting) && !adapter->wait_for_reset &&
adapter->reset_reason != VNIC_RESET_MOBILITY) {
if (adapter->req_rx_queues != old_num_rx_queues ||
adapter->req_tx_queues != old_num_tx_queues) {
@@ -5043,48 +5077,6 @@ static int ibmvnic_reset_init(struct ibmvnic_adapter *adapter)
return rc;
}
-static int ibmvnic_init(struct ibmvnic_adapter *adapter)
-{
- struct device *dev = &adapter->vdev->dev;
- unsigned long timeout = msecs_to_jiffies(30000);
- int rc;
-
- adapter->from_passive_init = false;
-
- adapter->init_done_rc = 0;
- ibmvnic_send_crq_init(adapter);
- if (!wait_for_completion_timeout(&adapter->init_done, timeout)) {
- dev_err(dev, "Initialization sequence timed out\n");
- return -1;
- }
-
- if (adapter->init_done_rc) {
- release_crq_queue(adapter);
- return adapter->init_done_rc;
- }
-
- if (adapter->from_passive_init) {
- adapter->state = VNIC_OPEN;
- adapter->from_passive_init = false;
- return -1;
- }
-
- rc = init_sub_crqs(adapter);
- if (rc) {
- dev_err(dev, "Initialization of sub crqs failed\n");
- release_crq_queue(adapter);
- return rc;
- }
-
- rc = init_sub_crq_irqs(adapter);
- if (rc) {
- dev_err(dev, "Failed to initialize sub crq irqs\n");
- release_crq_queue(adapter);
- }
-
- return rc;
-}
-
static struct device_attribute dev_attr_failover;
static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id)
@@ -5147,7 +5139,7 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id)
goto ibmvnic_init_fail;
}
- rc = ibmvnic_init(adapter);
+ rc = ibmvnic_reset_init(adapter, false);
if (rc && rc != EAGAIN)
goto ibmvnic_init_fail;
} while (rc == EAGAIN);
@@ -5297,8 +5289,7 @@ static unsigned long ibmvnic_get_desired_dma(struct vio_dev *vdev)
for (i = 0; i < adapter->req_tx_queues + adapter->req_rx_queues; i++)
ret += 4 * PAGE_SIZE; /* the scrq message queue */
- for (i = 0; i < be32_to_cpu(adapter->login_rsp_buf->num_rxadd_subcrqs);
- i++)
+ for (i = 0; i < adapter->num_active_rx_pools; i++)
ret += adapter->rx_pool[i].size *
IOMMU_PAGE_ALIGN(adapter->rx_pool[i].buff_size, tbl);
diff --git a/drivers/net/ethernet/ibm/ibmvnic.h b/drivers/net/ethernet/ibm/ibmvnic.h
index f8416e1..8da9879 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.h
+++ b/drivers/net/ethernet/ibm/ibmvnic.h
@@ -875,6 +875,7 @@ struct ibmvnic_sub_crq_queue {
struct ibmvnic_adapter *adapter;
atomic_t used;
char name[32];
+ u64 handle;
};
struct ibmvnic_long_term_buff {
@@ -1075,6 +1076,7 @@ struct ibmvnic_adapter {
u32 num_active_rx_napi;
u32 num_active_tx_scrqs;
u32 num_active_tx_pools;
+ u32 cur_rx_buf_sz;
struct tasklet_struct tasklet;
enum vnic_state state;
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_main.c b/drivers/net/ethernet/intel/fm10k/fm10k_main.c
index d88dd41..99b8252 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_main.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_main.c
@@ -310,10 +310,7 @@ static struct sk_buff *fm10k_fetch_rx_buffer(struct fm10k_ring *rx_ring,
rx_buffer->page_offset;
/* prefetch first cache line of first page */
- prefetch(page_addr);
-#if L1_CACHE_BYTES < 128
- prefetch((void *)((u8 *)page_addr + L1_CACHE_BYTES));
-#endif
+ net_prefetch(page_addr);
/* allocate a skb to store the frags */
skb = napi_alloc_skb(&rx_ring->q_vector->napi,
diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
index 825c104..dc15771 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
@@ -1967,7 +1967,7 @@ static int i40e_set_ringparam(struct net_device *netdev,
(new_rx_count == vsi->rx_rings[0]->count))
return 0;
- /* If there is a AF_XDP UMEM attached to any of Rx rings,
+ /* If there is a AF_XDP page pool attached to any of Rx rings,
* disallow changing the number of descriptors -- regardless
* if the netdev is running or not.
*/
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 2e433fd..05c6d3e 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -3122,12 +3122,12 @@ static void i40e_config_xps_tx_ring(struct i40e_ring *ring)
}
/**
- * i40e_xsk_umem - Retrieve the AF_XDP ZC if XDP and ZC is enabled
+ * i40e_xsk_pool - Retrieve the AF_XDP buffer pool if XDP and ZC is enabled
* @ring: The Tx or Rx ring
*
- * Returns the UMEM or NULL.
+ * Returns the AF_XDP buffer pool or NULL.
**/
-static struct xdp_umem *i40e_xsk_umem(struct i40e_ring *ring)
+static struct xsk_buff_pool *i40e_xsk_pool(struct i40e_ring *ring)
{
bool xdp_on = i40e_enabled_xdp_vsi(ring->vsi);
int qid = ring->queue_index;
@@ -3138,7 +3138,7 @@ static struct xdp_umem *i40e_xsk_umem(struct i40e_ring *ring)
if (!xdp_on || !test_bit(qid, ring->vsi->af_xdp_zc_qps))
return NULL;
- return xdp_get_umem_from_qid(ring->vsi->netdev, qid);
+ return xsk_get_pool_from_qid(ring->vsi->netdev, qid);
}
/**
@@ -3157,7 +3157,7 @@ static int i40e_configure_tx_ring(struct i40e_ring *ring)
u32 qtx_ctl = 0;
if (ring_is_xdp(ring))
- ring->xsk_umem = i40e_xsk_umem(ring);
+ ring->xsk_pool = i40e_xsk_pool(ring);
/* some ATR related tx ring init */
if (vsi->back->flags & I40E_FLAG_FD_ATR_ENABLED) {
@@ -3280,12 +3280,13 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring)
xdp_rxq_info_unreg_mem_model(&ring->xdp_rxq);
kfree(ring->rx_bi);
- ring->xsk_umem = i40e_xsk_umem(ring);
- if (ring->xsk_umem) {
+ ring->xsk_pool = i40e_xsk_pool(ring);
+ if (ring->xsk_pool) {
ret = i40e_alloc_rx_bi_zc(ring);
if (ret)
return ret;
- ring->rx_buf_len = xsk_umem_get_rx_frame_size(ring->xsk_umem);
+ ring->rx_buf_len =
+ xsk_pool_get_rx_frame_size(ring->xsk_pool);
/* For AF_XDP ZC, we disallow packets to span on
* multiple buffers, thus letting us skip that
* handling in the fast-path.
@@ -3368,8 +3369,8 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring)
ring->tail = hw->hw_addr + I40E_QRX_TAIL(pf_q);
writel(0, ring->tail);
- if (ring->xsk_umem) {
- xsk_buff_set_rxq_info(ring->xsk_umem, &ring->xdp_rxq);
+ if (ring->xsk_pool) {
+ xsk_pool_set_rxq_info(ring->xsk_pool, &ring->xdp_rxq);
ok = i40e_alloc_rx_buffers_zc(ring, I40E_DESC_UNUSED(ring));
} else {
ok = !i40e_alloc_rx_buffers(ring, I40E_DESC_UNUSED(ring));
@@ -3380,7 +3381,7 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring)
*/
dev_info(&vsi->back->pdev->dev,
"Failed to allocate some buffers on %sRx ring %d (pf_q %d)\n",
- ring->xsk_umem ? "UMEM enabled " : "",
+ ring->xsk_pool ? "AF_XDP ZC enabled " : "",
ring->queue_index, pf_q);
}
@@ -12644,7 +12645,7 @@ static int i40e_xdp_setup(struct i40e_vsi *vsi,
*/
if (need_reset && prog)
for (i = 0; i < vsi->num_queue_pairs; i++)
- if (vsi->xdp_rings[i]->xsk_umem)
+ if (vsi->xdp_rings[i]->xsk_pool)
(void)i40e_xsk_wakeup(vsi->netdev, i,
XDP_WAKEUP_RX);
@@ -12923,8 +12924,8 @@ static int i40e_xdp(struct net_device *dev,
switch (xdp->command) {
case XDP_SETUP_PROG:
return i40e_xdp_setup(vsi, xdp->prog);
- case XDP_SETUP_XSK_UMEM:
- return i40e_xsk_umem_setup(vsi, xdp->xsk.umem,
+ case XDP_SETUP_XSK_POOL:
+ return i40e_xsk_pool_setup(vsi, xdp->xsk.pool,
xdp->xsk.queue_id);
default:
return -EINVAL;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index 3e5c566..91ab824 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -636,7 +636,7 @@ void i40e_clean_tx_ring(struct i40e_ring *tx_ring)
unsigned long bi_size;
u16 i;
- if (ring_is_xdp(tx_ring) && tx_ring->xsk_umem) {
+ if (ring_is_xdp(tx_ring) && tx_ring->xsk_pool) {
i40e_xsk_clean_tx_ring(tx_ring);
} else {
/* ring already cleared, nothing to do */
@@ -1335,7 +1335,7 @@ void i40e_clean_rx_ring(struct i40e_ring *rx_ring)
rx_ring->skb = NULL;
}
- if (rx_ring->xsk_umem) {
+ if (rx_ring->xsk_pool) {
i40e_xsk_clean_rx_ring(rx_ring);
goto skip_free;
}
@@ -1369,7 +1369,7 @@ void i40e_clean_rx_ring(struct i40e_ring *rx_ring)
}
skip_free:
- if (rx_ring->xsk_umem)
+ if (rx_ring->xsk_pool)
i40e_clear_rx_bi_zc(rx_ring);
else
i40e_clear_rx_bi(rx_ring);
@@ -1992,10 +1992,8 @@ static struct sk_buff *i40e_construct_skb(struct i40e_ring *rx_ring,
struct sk_buff *skb;
/* prefetch first cache line of first page */
- prefetch(xdp->data);
-#if L1_CACHE_BYTES < 128
- prefetch(xdp->data + L1_CACHE_BYTES);
-#endif
+ net_prefetch(xdp->data);
+
/* Note, we get here by enabling legacy-rx via:
*
* ethtool --set-priv-flags <dev> legacy-rx on
@@ -2078,10 +2076,8 @@ static struct sk_buff *i40e_build_skb(struct i40e_ring *rx_ring,
* likely have a consumer accessing first few bytes of meta
* data, and then actual data.
*/
- prefetch(xdp->data_meta);
-#if L1_CACHE_BYTES < 128
- prefetch(xdp->data_meta + L1_CACHE_BYTES);
-#endif
+ net_prefetch(xdp->data_meta);
+
/* build an skb around the page buffer */
skb = build_skb(xdp->data_hard_start, truesize);
if (unlikely(!skb))
@@ -2579,7 +2575,7 @@ int i40e_napi_poll(struct napi_struct *napi, int budget)
* budget and be more aggressive about cleaning up the Tx descriptors.
*/
i40e_for_each_ring(ring, q_vector->tx) {
- bool wd = ring->xsk_umem ?
+ bool wd = ring->xsk_pool ?
i40e_clean_xdp_tx_irq(vsi, ring) :
i40e_clean_tx_irq(vsi, ring, budget);
@@ -2607,7 +2603,7 @@ int i40e_napi_poll(struct napi_struct *napi, int budget)
budget_per_ring = budget;
i40e_for_each_ring(ring, q_vector->rx) {
- int cleaned = ring->xsk_umem ?
+ int cleaned = ring->xsk_pool ?
i40e_clean_rx_irq_zc(ring, budget_per_ring) :
i40e_clean_rx_irq(ring, budget_per_ring);
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
index 4036893..703b644 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
@@ -388,7 +388,7 @@ struct i40e_ring {
struct i40e_channel *ch;
struct xdp_rxq_info xdp_rxq;
- struct xdp_umem *xsk_umem;
+ struct xsk_buff_pool *xsk_pool;
} ____cacheline_internodealigned_in_smp;
static inline bool ring_uses_build_skb(struct i40e_ring *ring)
diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
index 8ce57b5..2a1153d 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
@@ -29,14 +29,16 @@ static struct xdp_buff **i40e_rx_bi(struct i40e_ring *rx_ring, u32 idx)
}
/**
- * i40e_xsk_umem_enable - Enable/associate a UMEM to a certain ring/qid
+ * i40e_xsk_pool_enable - Enable/associate an AF_XDP buffer pool to a
+ * certain ring/qid
* @vsi: Current VSI
- * @umem: UMEM
- * @qid: Rx ring to associate UMEM to
+ * @pool: buffer pool
+ * @qid: Rx ring to associate buffer pool with
*
* Returns 0 on success, <0 on failure
**/
-static int i40e_xsk_umem_enable(struct i40e_vsi *vsi, struct xdp_umem *umem,
+static int i40e_xsk_pool_enable(struct i40e_vsi *vsi,
+ struct xsk_buff_pool *pool,
u16 qid)
{
struct net_device *netdev = vsi->netdev;
@@ -53,7 +55,7 @@ static int i40e_xsk_umem_enable(struct i40e_vsi *vsi, struct xdp_umem *umem,
qid >= netdev->real_num_tx_queues)
return -EINVAL;
- err = xsk_buff_dma_map(umem, &vsi->back->pdev->dev, I40E_RX_DMA_ATTR);
+ err = xsk_pool_dma_map(pool, &vsi->back->pdev->dev, I40E_RX_DMA_ATTR);
if (err)
return err;
@@ -80,21 +82,22 @@ static int i40e_xsk_umem_enable(struct i40e_vsi *vsi, struct xdp_umem *umem,
}
/**
- * i40e_xsk_umem_disable - Disassociate a UMEM from a certain ring/qid
+ * i40e_xsk_pool_disable - Disassociate an AF_XDP buffer pool from a
+ * certain ring/qid
* @vsi: Current VSI
- * @qid: Rx ring to associate UMEM to
+ * @qid: Rx ring to associate buffer pool with
*
* Returns 0 on success, <0 on failure
**/
-static int i40e_xsk_umem_disable(struct i40e_vsi *vsi, u16 qid)
+static int i40e_xsk_pool_disable(struct i40e_vsi *vsi, u16 qid)
{
struct net_device *netdev = vsi->netdev;
- struct xdp_umem *umem;
+ struct xsk_buff_pool *pool;
bool if_running;
int err;
- umem = xdp_get_umem_from_qid(netdev, qid);
- if (!umem)
+ pool = xsk_get_pool_from_qid(netdev, qid);
+ if (!pool)
return -EINVAL;
if_running = netif_running(vsi->netdev) && i40e_enabled_xdp_vsi(vsi);
@@ -106,7 +109,7 @@ static int i40e_xsk_umem_disable(struct i40e_vsi *vsi, u16 qid)
}
clear_bit(qid, vsi->af_xdp_zc_qps);
- xsk_buff_dma_unmap(umem, I40E_RX_DMA_ATTR);
+ xsk_pool_dma_unmap(pool, I40E_RX_DMA_ATTR);
if (if_running) {
err = i40e_queue_pair_enable(vsi, qid);
@@ -118,20 +121,21 @@ static int i40e_xsk_umem_disable(struct i40e_vsi *vsi, u16 qid)
}
/**
- * i40e_xsk_umem_setup - Enable/disassociate a UMEM to/from a ring/qid
+ * i40e_xsk_pool_setup - Enable/disassociate an AF_XDP buffer pool to/from
+ * a ring/qid
* @vsi: Current VSI
- * @umem: UMEM to enable/associate to a ring, or NULL to disable
- * @qid: Rx ring to (dis)associate UMEM (from)to
+ * @pool: Buffer pool to enable/associate to a ring, or NULL to disable
+ * @qid: Rx ring to (dis)associate buffer pool (from)to
*
- * This function enables or disables a UMEM to a certain ring.
+ * This function enables or disables a buffer pool to a certain ring.
*
* Returns 0 on success, <0 on failure
**/
-int i40e_xsk_umem_setup(struct i40e_vsi *vsi, struct xdp_umem *umem,
+int i40e_xsk_pool_setup(struct i40e_vsi *vsi, struct xsk_buff_pool *pool,
u16 qid)
{
- return umem ? i40e_xsk_umem_enable(vsi, umem, qid) :
- i40e_xsk_umem_disable(vsi, qid);
+ return pool ? i40e_xsk_pool_enable(vsi, pool, qid) :
+ i40e_xsk_pool_disable(vsi, qid);
}
/**
@@ -191,7 +195,7 @@ bool i40e_alloc_rx_buffers_zc(struct i40e_ring *rx_ring, u16 count)
rx_desc = I40E_RX_DESC(rx_ring, ntu);
bi = i40e_rx_bi(rx_ring, ntu);
do {
- xdp = xsk_buff_alloc(rx_ring->xsk_umem);
+ xdp = xsk_buff_alloc(rx_ring->xsk_pool);
if (!xdp) {
ok = false;
goto no_buffers;
@@ -310,7 +314,7 @@ int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget)
bi = i40e_rx_bi(rx_ring, rx_ring->next_to_clean);
(*bi)->data_end = (*bi)->data + size;
- xsk_buff_dma_sync_for_cpu(*bi);
+ xsk_buff_dma_sync_for_cpu(*bi, rx_ring->xsk_pool);
xdp_res = i40e_run_xdp_zc(rx_ring, *bi);
if (xdp_res) {
@@ -358,11 +362,11 @@ int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget)
i40e_finalize_xdp_rx(rx_ring, xdp_xmit);
i40e_update_rx_stats(rx_ring, total_rx_bytes, total_rx_packets);
- if (xsk_umem_uses_need_wakeup(rx_ring->xsk_umem)) {
+ if (xsk_uses_need_wakeup(rx_ring->xsk_pool)) {
if (failure || rx_ring->next_to_clean == rx_ring->next_to_use)
- xsk_set_rx_need_wakeup(rx_ring->xsk_umem);
+ xsk_set_rx_need_wakeup(rx_ring->xsk_pool);
else
- xsk_clear_rx_need_wakeup(rx_ring->xsk_umem);
+ xsk_clear_rx_need_wakeup(rx_ring->xsk_pool);
return (int)total_rx_packets;
}
@@ -385,11 +389,11 @@ static bool i40e_xmit_zc(struct i40e_ring *xdp_ring, unsigned int budget)
dma_addr_t dma;
while (budget-- > 0) {
- if (!xsk_umem_consume_tx(xdp_ring->xsk_umem, &desc))
+ if (!xsk_tx_peek_desc(xdp_ring->xsk_pool, &desc))
break;
- dma = xsk_buff_raw_get_dma(xdp_ring->xsk_umem, desc.addr);
- xsk_buff_raw_dma_sync_for_device(xdp_ring->xsk_umem, dma,
+ dma = xsk_buff_raw_get_dma(xdp_ring->xsk_pool, desc.addr);
+ xsk_buff_raw_dma_sync_for_device(xdp_ring->xsk_pool, dma,
desc.len);
tx_bi = &xdp_ring->tx_bi[xdp_ring->next_to_use];
@@ -416,7 +420,7 @@ static bool i40e_xmit_zc(struct i40e_ring *xdp_ring, unsigned int budget)
I40E_TXD_QW1_CMD_SHIFT);
i40e_xdp_ring_update_tail(xdp_ring);
- xsk_umem_consume_tx_done(xdp_ring->xsk_umem);
+ xsk_tx_release(xdp_ring->xsk_pool);
i40e_update_tx_stats(xdp_ring, sent_frames, total_bytes);
}
@@ -448,7 +452,7 @@ static void i40e_clean_xdp_tx_buffer(struct i40e_ring *tx_ring,
**/
bool i40e_clean_xdp_tx_irq(struct i40e_vsi *vsi, struct i40e_ring *tx_ring)
{
- struct xdp_umem *umem = tx_ring->xsk_umem;
+ struct xsk_buff_pool *bp = tx_ring->xsk_pool;
u32 i, completed_frames, xsk_frames = 0;
u32 head_idx = i40e_get_head(tx_ring);
struct i40e_tx_buffer *tx_bi;
@@ -488,13 +492,13 @@ bool i40e_clean_xdp_tx_irq(struct i40e_vsi *vsi, struct i40e_ring *tx_ring)
tx_ring->next_to_clean -= tx_ring->count;
if (xsk_frames)
- xsk_umem_complete_tx(umem, xsk_frames);
+ xsk_tx_completed(bp, xsk_frames);
i40e_arm_wb(tx_ring, vsi, completed_frames);
out_xmit:
- if (xsk_umem_uses_need_wakeup(tx_ring->xsk_umem))
- xsk_set_tx_need_wakeup(tx_ring->xsk_umem);
+ if (xsk_uses_need_wakeup(tx_ring->xsk_pool))
+ xsk_set_tx_need_wakeup(tx_ring->xsk_pool);
return i40e_xmit_zc(tx_ring, I40E_DESC_UNUSED(tx_ring));
}
@@ -526,7 +530,7 @@ int i40e_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags)
if (queue_id >= vsi->num_queue_pairs)
return -ENXIO;
- if (!vsi->xdp_rings[queue_id]->xsk_umem)
+ if (!vsi->xdp_rings[queue_id]->xsk_pool)
return -ENXIO;
ring = vsi->xdp_rings[queue_id];
@@ -565,7 +569,7 @@ void i40e_xsk_clean_rx_ring(struct i40e_ring *rx_ring)
void i40e_xsk_clean_tx_ring(struct i40e_ring *tx_ring)
{
u16 ntc = tx_ring->next_to_clean, ntu = tx_ring->next_to_use;
- struct xdp_umem *umem = tx_ring->xsk_umem;
+ struct xsk_buff_pool *bp = tx_ring->xsk_pool;
struct i40e_tx_buffer *tx_bi;
u32 xsk_frames = 0;
@@ -585,14 +589,15 @@ void i40e_xsk_clean_tx_ring(struct i40e_ring *tx_ring)
}
if (xsk_frames)
- xsk_umem_complete_tx(umem, xsk_frames);
+ xsk_tx_completed(bp, xsk_frames);
}
/**
- * i40e_xsk_any_rx_ring_enabled - Checks if Rx rings have AF_XDP UMEM attached
+ * i40e_xsk_any_rx_ring_enabled - Checks if Rx rings have an AF_XDP
+ * buffer pool attached
* @vsi: vsi
*
- * Returns true if any of the Rx rings has an AF_XDP UMEM attached
+ * Returns true if any of the Rx rings has an AF_XDP buffer pool attached
**/
bool i40e_xsk_any_rx_ring_enabled(struct i40e_vsi *vsi)
{
@@ -600,7 +605,7 @@ bool i40e_xsk_any_rx_ring_enabled(struct i40e_vsi *vsi)
int i;
for (i = 0; i < vsi->num_queue_pairs; i++) {
- if (xdp_get_umem_from_qid(netdev, i))
+ if (xsk_get_pool_from_qid(netdev, i))
return true;
}
diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.h b/drivers/net/ethernet/intel/i40e/i40e_xsk.h
index c524c14..7adfd85 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_xsk.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.h
@@ -5,12 +5,12 @@
#define _I40E_XSK_H_
struct i40e_vsi;
-struct xdp_umem;
+struct xsk_buff_pool;
struct zero_copy_allocator;
int i40e_queue_pair_disable(struct i40e_vsi *vsi, int queue_pair);
int i40e_queue_pair_enable(struct i40e_vsi *vsi, int queue_pair);
-int i40e_xsk_umem_setup(struct i40e_vsi *vsi, struct xdp_umem *umem,
+int i40e_xsk_pool_setup(struct i40e_vsi *vsi, struct xsk_buff_pool *pool,
u16 qid);
bool i40e_alloc_rx_buffers_zc(struct i40e_ring *rx_ring, u16 cleaned_count);
int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget);
diff --git a/drivers/net/ethernet/intel/iavf/iavf_txrx.c b/drivers/net/ethernet/intel/iavf/iavf_txrx.c
index ca041b3..256fa07 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_txrx.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_txrx.c
@@ -1309,10 +1309,7 @@ static struct sk_buff *iavf_construct_skb(struct iavf_ring *rx_ring,
return NULL;
/* prefetch first cache line of first page */
va = page_address(rx_buffer->page) + rx_buffer->page_offset;
- prefetch(va);
-#if L1_CACHE_BYTES < 128
- prefetch(va + L1_CACHE_BYTES);
-#endif
+ net_prefetch(va);
/* allocate a skb to store the frags */
skb = __napi_alloc_skb(&rx_ring->q_vector->napi,
@@ -1376,10 +1373,8 @@ static struct sk_buff *iavf_build_skb(struct iavf_ring *rx_ring,
return NULL;
/* prefetch first cache line of first page */
va = page_address(rx_buffer->page) + rx_buffer->page_offset;
- prefetch(va);
-#if L1_CACHE_BYTES < 128
- prefetch(va + L1_CACHE_BYTES);
-#endif
+ net_prefetch(va);
+
/* build an skb around the page buffer */
skb = build_skb(va - IAVF_SKB_PAD, truesize);
if (unlikely(!skb))
diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
index fe140ff..65583f0 100644
--- a/drivers/net/ethernet/intel/ice/ice.h
+++ b/drivers/net/ethernet/intel/ice/ice.h
@@ -321,9 +321,9 @@ struct ice_vsi {
struct ice_ring **xdp_rings; /* XDP ring array */
u16 num_xdp_txq; /* Used XDP queues */
u8 xdp_mapping_mode; /* ICE_MAP_MODE_[CONTIG|SCATTER] */
- struct xdp_umem **xsk_umems;
- u16 num_xsk_umems_used;
- u16 num_xsk_umems;
+ struct xsk_buff_pool **xsk_pools;
+ u16 num_xsk_pools_used;
+ u16 num_xsk_pools;
} ____cacheline_internodealigned_in_smp;
/* struct that defines an interrupt vector */
@@ -507,25 +507,25 @@ static inline void ice_set_ring_xdp(struct ice_ring *ring)
}
/**
- * ice_xsk_umem - get XDP UMEM bound to a ring
+ * ice_xsk_pool - get XSK buffer pool bound to a ring
* @ring - ring to use
*
- * Returns a pointer to xdp_umem structure if there is an UMEM present,
+ * Returns a pointer to xdp_umem structure if there is a buffer pool present,
* NULL otherwise.
*/
-static inline struct xdp_umem *ice_xsk_umem(struct ice_ring *ring)
+static inline struct xsk_buff_pool *ice_xsk_pool(struct ice_ring *ring)
{
- struct xdp_umem **umems = ring->vsi->xsk_umems;
+ struct xsk_buff_pool **pools = ring->vsi->xsk_pools;
u16 qid = ring->q_index;
if (ice_ring_is_xdp(ring))
qid -= ring->vsi->num_xdp_txq;
- if (qid >= ring->vsi->num_xsk_umems || !umems || !umems[qid] ||
+ if (qid >= ring->vsi->num_xsk_pools || !pools || !pools[qid] ||
!ice_is_xdp_ena_vsi(ring->vsi))
return NULL;
- return umems[qid];
+ return pools[qid];
}
/**
diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c
index 8700847..fe4320e 100644
--- a/drivers/net/ethernet/intel/ice/ice_base.c
+++ b/drivers/net/ethernet/intel/ice/ice_base.c
@@ -308,12 +308,12 @@ int ice_setup_rx_ctx(struct ice_ring *ring)
xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev,
ring->q_index);
- ring->xsk_umem = ice_xsk_umem(ring);
- if (ring->xsk_umem) {
+ ring->xsk_pool = ice_xsk_pool(ring);
+ if (ring->xsk_pool) {
xdp_rxq_info_unreg_mem_model(&ring->xdp_rxq);
ring->rx_buf_len =
- xsk_umem_get_rx_frame_size(ring->xsk_umem);
+ xsk_pool_get_rx_frame_size(ring->xsk_pool);
/* For AF_XDP ZC, we disallow packets to span on
* multiple buffers, thus letting us skip that
* handling in the fast-path.
@@ -324,7 +324,7 @@ int ice_setup_rx_ctx(struct ice_ring *ring)
NULL);
if (err)
return err;
- xsk_buff_set_rxq_info(ring->xsk_umem, &ring->xdp_rxq);
+ xsk_pool_set_rxq_info(ring->xsk_pool, &ring->xdp_rxq);
dev_info(dev, "Registered XDP mem model MEM_TYPE_XSK_BUFF_POOL on Rx ring %d\n",
ring->q_index);
@@ -417,9 +417,9 @@ int ice_setup_rx_ctx(struct ice_ring *ring)
ring->tail = hw->hw_addr + QRX_TAIL(pf_q);
writel(0, ring->tail);
- if (ring->xsk_umem) {
- if (!xsk_buff_can_alloc(ring->xsk_umem, num_bufs)) {
- dev_warn(dev, "UMEM does not provide enough addresses to fill %d buffers on Rx ring %d\n",
+ if (ring->xsk_pool) {
+ if (!xsk_buff_can_alloc(ring->xsk_pool, num_bufs)) {
+ dev_warn(dev, "XSK buffer pool does not provide enough addresses to fill %d buffers on Rx ring %d\n",
num_bufs, ring->q_index);
dev_warn(dev, "Change Rx ring/fill queue size to avoid performance issues\n");
@@ -428,7 +428,7 @@ int ice_setup_rx_ctx(struct ice_ring *ring)
err = ice_alloc_rx_bufs_zc(ring, num_bufs);
if (err)
- dev_info(dev, "Failed to allocate some buffers on UMEM enabled Rx ring %d (pf_q %d)\n",
+ dev_info(dev, "Failed to allocate some buffers on XSK buffer pool enabled Rx ring %d (pf_q %d)\n",
ring->q_index, pf_q);
return 0;
}
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
index f268277..feeb5cd 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_lib.c
@@ -1743,7 +1743,7 @@ int ice_vsi_cfg_xdp_txqs(struct ice_vsi *vsi)
return ret;
for (i = 0; i < vsi->num_xdp_txq; i++)
- vsi->xdp_rings[i]->xsk_umem = ice_xsk_umem(vsi->xdp_rings[i]);
+ vsi->xdp_rings[i]->xsk_pool = ice_xsk_pool(vsi->xdp_rings[i]);
return ret;
}
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index 4634b48..2297ee7 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -2273,7 +2273,7 @@ static int ice_xdp_alloc_setup_rings(struct ice_vsi *vsi)
if (ice_setup_tx_ring(xdp_ring))
goto free_xdp_rings;
ice_set_ring_xdp(xdp_ring);
- xdp_ring->xsk_umem = ice_xsk_umem(xdp_ring);
+ xdp_ring->xsk_pool = ice_xsk_pool(xdp_ring);
}
return 0;
@@ -2517,13 +2517,13 @@ ice_xdp_setup_prog(struct ice_vsi *vsi, struct bpf_prog *prog,
if (if_running)
ret = ice_up(vsi);
- if (!ret && prog && vsi->xsk_umems) {
+ if (!ret && prog && vsi->xsk_pools) {
int i;
ice_for_each_rxq(vsi, i) {
struct ice_ring *rx_ring = vsi->rx_rings[i];
- if (rx_ring->xsk_umem)
+ if (rx_ring->xsk_pool)
napi_schedule(&rx_ring->q_vector->napi);
}
}
@@ -2549,8 +2549,8 @@ static int ice_xdp(struct net_device *dev, struct netdev_bpf *xdp)
switch (xdp->command) {
case XDP_SETUP_PROG:
return ice_xdp_setup_prog(vsi, xdp->prog, xdp->extack);
- case XDP_SETUP_XSK_UMEM:
- return ice_xsk_umem_setup(vsi, xdp->xsk.umem,
+ case XDP_SETUP_XSK_POOL:
+ return ice_xsk_pool_setup(vsi, xdp->xsk.pool,
xdp->xsk.queue_id);
default:
return -EINVAL;
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c
index 9d0d6b0..eae7526 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.c
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.c
@@ -145,7 +145,7 @@ void ice_clean_tx_ring(struct ice_ring *tx_ring)
{
u16 i;
- if (ice_ring_is_xdp(tx_ring) && tx_ring->xsk_umem) {
+ if (ice_ring_is_xdp(tx_ring) && tx_ring->xsk_pool) {
ice_xsk_clean_xdp_ring(tx_ring);
goto tx_skip_free;
}
@@ -375,7 +375,7 @@ void ice_clean_rx_ring(struct ice_ring *rx_ring)
if (!rx_ring->rx_buf)
return;
- if (rx_ring->xsk_umem) {
+ if (rx_ring->xsk_pool) {
ice_xsk_clean_rx_ring(rx_ring);
goto rx_skip_free;
}
@@ -919,10 +919,7 @@ ice_build_skb(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf,
* likely have a consumer accessing first few bytes of meta
* data, and then actual data.
*/
- prefetch(xdp->data_meta);
-#if L1_CACHE_BYTES < 128
- prefetch((void *)(xdp->data + L1_CACHE_BYTES));
-#endif
+ net_prefetch(xdp->data_meta);
/* build an skb around the page buffer */
skb = build_skb(xdp->data_hard_start, truesize);
if (unlikely(!skb))
@@ -964,10 +961,7 @@ ice_construct_skb(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf,
struct sk_buff *skb;
/* prefetch first cache line of first page */
- prefetch(xdp->data);
-#if L1_CACHE_BYTES < 128
- prefetch((void *)(xdp->data + L1_CACHE_BYTES));
-#endif /* L1_CACHE_BYTES */
+ net_prefetch(xdp->data);
/* allocate a skb to store the frags */
skb = __napi_alloc_skb(&rx_ring->q_vector->napi, ICE_RX_HDR_SIZE,
@@ -1616,7 +1610,7 @@ int ice_napi_poll(struct napi_struct *napi, int budget)
* budget and be more aggressive about cleaning up the Tx descriptors.
*/
ice_for_each_ring(ring, q_vector->tx) {
- bool wd = ring->xsk_umem ?
+ bool wd = ring->xsk_pool ?
ice_clean_tx_irq_zc(ring, budget) :
ice_clean_tx_irq(ring, budget);
@@ -1646,7 +1640,7 @@ int ice_napi_poll(struct napi_struct *napi, int budget)
* comparison in the irq context instead of many inside the
* ice_clean_rx_irq function and makes the codebase cleaner.
*/
- cleaned = ring->xsk_umem ?
+ cleaned = ring->xsk_pool ?
ice_clean_rx_irq_zc(ring, budget_per_ring) :
ice_clean_rx_irq(ring, budget_per_ring);
work_done += cleaned;
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h
index 51b4df7..e9f60d5 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.h
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.h
@@ -295,7 +295,7 @@ struct ice_ring {
struct rcu_head rcu; /* to avoid race on free */
struct bpf_prog *xdp_prog;
- struct xdp_umem *xsk_umem;
+ struct xsk_buff_pool *xsk_pool;
/* CL3 - 3rd cacheline starts here */
struct xdp_rxq_info xdp_rxq;
/* CLX - the below items are only accessed infrequently and should be
diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c
index 20ac5fc..7978865 100644
--- a/drivers/net/ethernet/intel/ice/ice_xsk.c
+++ b/drivers/net/ethernet/intel/ice/ice_xsk.c
@@ -236,7 +236,7 @@ static int ice_qp_ena(struct ice_vsi *vsi, u16 q_idx)
if (err)
goto free_buf;
ice_set_ring_xdp(xdp_ring);
- xdp_ring->xsk_umem = ice_xsk_umem(xdp_ring);
+ xdp_ring->xsk_pool = ice_xsk_pool(xdp_ring);
}
err = ice_setup_rx_ctx(rx_ring);
@@ -260,21 +260,21 @@ static int ice_qp_ena(struct ice_vsi *vsi, u16 q_idx)
}
/**
- * ice_xsk_alloc_umems - allocate a UMEM region for an XDP socket
- * @vsi: VSI to allocate the UMEM on
+ * ice_xsk_alloc_pools - allocate a buffer pool for an XDP socket
+ * @vsi: VSI to allocate the buffer pool on
*
* Returns 0 on success, negative on error
*/
-static int ice_xsk_alloc_umems(struct ice_vsi *vsi)
+static int ice_xsk_alloc_pools(struct ice_vsi *vsi)
{
- if (vsi->xsk_umems)
+ if (vsi->xsk_pools)
return 0;
- vsi->xsk_umems = kcalloc(vsi->num_xsk_umems, sizeof(*vsi->xsk_umems),
+ vsi->xsk_pools = kcalloc(vsi->num_xsk_pools, sizeof(*vsi->xsk_pools),
GFP_KERNEL);
- if (!vsi->xsk_umems) {
- vsi->num_xsk_umems = 0;
+ if (!vsi->xsk_pools) {
+ vsi->num_xsk_pools = 0;
return -ENOMEM;
}
@@ -282,73 +282,73 @@ static int ice_xsk_alloc_umems(struct ice_vsi *vsi)
}
/**
- * ice_xsk_remove_umem - Remove an UMEM for a certain ring/qid
+ * ice_xsk_remove_pool - Remove an buffer pool for a certain ring/qid
* @vsi: VSI from which the VSI will be removed
- * @qid: Ring/qid associated with the UMEM
+ * @qid: Ring/qid associated with the buffer pool
*/
-static void ice_xsk_remove_umem(struct ice_vsi *vsi, u16 qid)
+static void ice_xsk_remove_pool(struct ice_vsi *vsi, u16 qid)
{
- vsi->xsk_umems[qid] = NULL;
- vsi->num_xsk_umems_used--;
+ vsi->xsk_pools[qid] = NULL;
+ vsi->num_xsk_pools_used--;
- if (vsi->num_xsk_umems_used == 0) {
- kfree(vsi->xsk_umems);
- vsi->xsk_umems = NULL;
- vsi->num_xsk_umems = 0;
+ if (vsi->num_xsk_pools_used == 0) {
+ kfree(vsi->xsk_pools);
+ vsi->xsk_pools = NULL;
+ vsi->num_xsk_pools = 0;
}
}
/**
- * ice_xsk_umem_disable - disable a UMEM region
+ * ice_xsk_pool_disable - disable a buffer pool region
* @vsi: Current VSI
* @qid: queue ID
*
* Returns 0 on success, negative on failure
*/
-static int ice_xsk_umem_disable(struct ice_vsi *vsi, u16 qid)
+static int ice_xsk_pool_disable(struct ice_vsi *vsi, u16 qid)
{
- if (!vsi->xsk_umems || qid >= vsi->num_xsk_umems ||
- !vsi->xsk_umems[qid])
+ if (!vsi->xsk_pools || qid >= vsi->num_xsk_pools ||
+ !vsi->xsk_pools[qid])
return -EINVAL;
- xsk_buff_dma_unmap(vsi->xsk_umems[qid], ICE_RX_DMA_ATTR);
- ice_xsk_remove_umem(vsi, qid);
+ xsk_pool_dma_unmap(vsi->xsk_pools[qid], ICE_RX_DMA_ATTR);
+ ice_xsk_remove_pool(vsi, qid);
return 0;
}
/**
- * ice_xsk_umem_enable - enable a UMEM region
+ * ice_xsk_pool_enable - enable a buffer pool region
* @vsi: Current VSI
- * @umem: pointer to a requested UMEM region
+ * @pool: pointer to a requested buffer pool region
* @qid: queue ID
*
* Returns 0 on success, negative on failure
*/
static int
-ice_xsk_umem_enable(struct ice_vsi *vsi, struct xdp_umem *umem, u16 qid)
+ice_xsk_pool_enable(struct ice_vsi *vsi, struct xsk_buff_pool *pool, u16 qid)
{
int err;
if (vsi->type != ICE_VSI_PF)
return -EINVAL;
- if (!vsi->num_xsk_umems)
- vsi->num_xsk_umems = min_t(u16, vsi->num_rxq, vsi->num_txq);
- if (qid >= vsi->num_xsk_umems)
+ if (!vsi->num_xsk_pools)
+ vsi->num_xsk_pools = min_t(u16, vsi->num_rxq, vsi->num_txq);
+ if (qid >= vsi->num_xsk_pools)
return -EINVAL;
- err = ice_xsk_alloc_umems(vsi);
+ err = ice_xsk_alloc_pools(vsi);
if (err)
return err;
- if (vsi->xsk_umems && vsi->xsk_umems[qid])
+ if (vsi->xsk_pools && vsi->xsk_pools[qid])
return -EBUSY;
- vsi->xsk_umems[qid] = umem;
- vsi->num_xsk_umems_used++;
+ vsi->xsk_pools[qid] = pool;
+ vsi->num_xsk_pools_used++;
- err = xsk_buff_dma_map(vsi->xsk_umems[qid], ice_pf_to_dev(vsi->back),
+ err = xsk_pool_dma_map(vsi->xsk_pools[qid], ice_pf_to_dev(vsi->back),
ICE_RX_DMA_ATTR);
if (err)
return err;
@@ -357,17 +357,17 @@ ice_xsk_umem_enable(struct ice_vsi *vsi, struct xdp_umem *umem, u16 qid)
}
/**
- * ice_xsk_umem_setup - enable/disable a UMEM region depending on its state
+ * ice_xsk_pool_setup - enable/disable a buffer pool region depending on its state
* @vsi: Current VSI
- * @umem: UMEM to enable/associate to a ring, NULL to disable
+ * @pool: buffer pool to enable/associate to a ring, NULL to disable
* @qid: queue ID
*
* Returns 0 on success, negative on failure
*/
-int ice_xsk_umem_setup(struct ice_vsi *vsi, struct xdp_umem *umem, u16 qid)
+int ice_xsk_pool_setup(struct ice_vsi *vsi, struct xsk_buff_pool *pool, u16 qid)
{
- bool if_running, umem_present = !!umem;
- int ret = 0, umem_failure = 0;
+ bool if_running, pool_present = !!pool;
+ int ret = 0, pool_failure = 0;
if_running = netif_running(vsi->netdev) && ice_is_xdp_ena_vsi(vsi);
@@ -375,26 +375,26 @@ int ice_xsk_umem_setup(struct ice_vsi *vsi, struct xdp_umem *umem, u16 qid)
ret = ice_qp_dis(vsi, qid);
if (ret) {
netdev_err(vsi->netdev, "ice_qp_dis error = %d\n", ret);
- goto xsk_umem_if_up;
+ goto xsk_pool_if_up;
}
}
- umem_failure = umem_present ? ice_xsk_umem_enable(vsi, umem, qid) :
- ice_xsk_umem_disable(vsi, qid);
+ pool_failure = pool_present ? ice_xsk_pool_enable(vsi, pool, qid) :
+ ice_xsk_pool_disable(vsi, qid);
-xsk_umem_if_up:
+xsk_pool_if_up:
if (if_running) {
ret = ice_qp_ena(vsi, qid);
- if (!ret && umem_present)
+ if (!ret && pool_present)
napi_schedule(&vsi->xdp_rings[qid]->q_vector->napi);
else if (ret)
netdev_err(vsi->netdev, "ice_qp_ena error = %d\n", ret);
}
- if (umem_failure) {
- netdev_err(vsi->netdev, "Could not %sable UMEM, error = %d\n",
- umem_present ? "en" : "dis", umem_failure);
- return umem_failure;
+ if (pool_failure) {
+ netdev_err(vsi->netdev, "Could not %sable buffer pool, error = %d\n",
+ pool_present ? "en" : "dis", pool_failure);
+ return pool_failure;
}
return ret;
@@ -425,7 +425,7 @@ bool ice_alloc_rx_bufs_zc(struct ice_ring *rx_ring, u16 count)
rx_buf = &rx_ring->rx_buf[ntu];
do {
- rx_buf->xdp = xsk_buff_alloc(rx_ring->xsk_umem);
+ rx_buf->xdp = xsk_buff_alloc(rx_ring->xsk_pool);
if (!rx_buf->xdp) {
ret = true;
break;
@@ -595,7 +595,7 @@ int ice_clean_rx_irq_zc(struct ice_ring *rx_ring, int budget)
rx_buf = &rx_ring->rx_buf[rx_ring->next_to_clean];
rx_buf->xdp->data_end = rx_buf->xdp->data + size;
- xsk_buff_dma_sync_for_cpu(rx_buf->xdp);
+ xsk_buff_dma_sync_for_cpu(rx_buf->xdp, rx_ring->xsk_pool);
xdp_res = ice_run_xdp_zc(rx_ring, rx_buf->xdp);
if (xdp_res) {
@@ -645,11 +645,11 @@ int ice_clean_rx_irq_zc(struct ice_ring *rx_ring, int budget)
ice_finalize_xdp_rx(rx_ring, xdp_xmit);
ice_update_rx_ring_stats(rx_ring, total_rx_packets, total_rx_bytes);
- if (xsk_umem_uses_need_wakeup(rx_ring->xsk_umem)) {
+ if (xsk_uses_need_wakeup(rx_ring->xsk_pool)) {
if (failure || rx_ring->next_to_clean == rx_ring->next_to_use)
- xsk_set_rx_need_wakeup(rx_ring->xsk_umem);
+ xsk_set_rx_need_wakeup(rx_ring->xsk_pool);
else
- xsk_clear_rx_need_wakeup(rx_ring->xsk_umem);
+ xsk_clear_rx_need_wakeup(rx_ring->xsk_pool);
return (int)total_rx_packets;
}
@@ -682,11 +682,11 @@ static bool ice_xmit_zc(struct ice_ring *xdp_ring, int budget)
tx_buf = &xdp_ring->tx_buf[xdp_ring->next_to_use];
- if (!xsk_umem_consume_tx(xdp_ring->xsk_umem, &desc))
+ if (!xsk_tx_peek_desc(xdp_ring->xsk_pool, &desc))
break;
- dma = xsk_buff_raw_get_dma(xdp_ring->xsk_umem, desc.addr);
- xsk_buff_raw_dma_sync_for_device(xdp_ring->xsk_umem, dma,
+ dma = xsk_buff_raw_get_dma(xdp_ring->xsk_pool, desc.addr);
+ xsk_buff_raw_dma_sync_for_device(xdp_ring->xsk_pool, dma,
desc.len);
tx_buf->bytecount = desc.len;
@@ -703,7 +703,7 @@ static bool ice_xmit_zc(struct ice_ring *xdp_ring, int budget)
if (tx_desc) {
ice_xdp_ring_update_tail(xdp_ring);
- xsk_umem_consume_tx_done(xdp_ring->xsk_umem);
+ xsk_tx_release(xdp_ring->xsk_pool);
}
return budget > 0 && work_done;
@@ -777,10 +777,10 @@ bool ice_clean_tx_irq_zc(struct ice_ring *xdp_ring, int budget)
xdp_ring->next_to_clean = ntc;
if (xsk_frames)
- xsk_umem_complete_tx(xdp_ring->xsk_umem, xsk_frames);
+ xsk_tx_completed(xdp_ring->xsk_pool, xsk_frames);
- if (xsk_umem_uses_need_wakeup(xdp_ring->xsk_umem))
- xsk_set_tx_need_wakeup(xdp_ring->xsk_umem);
+ if (xsk_uses_need_wakeup(xdp_ring->xsk_pool))
+ xsk_set_tx_need_wakeup(xdp_ring->xsk_pool);
ice_update_tx_ring_stats(xdp_ring, total_packets, total_bytes);
xmit_done = ice_xmit_zc(xdp_ring, ICE_DFLT_IRQ_WORK);
@@ -814,7 +814,7 @@ ice_xsk_wakeup(struct net_device *netdev, u32 queue_id,
if (queue_id >= vsi->num_txq)
return -ENXIO;
- if (!vsi->xdp_rings[queue_id]->xsk_umem)
+ if (!vsi->xdp_rings[queue_id]->xsk_pool)
return -ENXIO;
ring = vsi->xdp_rings[queue_id];
@@ -833,20 +833,20 @@ ice_xsk_wakeup(struct net_device *netdev, u32 queue_id,
}
/**
- * ice_xsk_any_rx_ring_ena - Checks if Rx rings have AF_XDP UMEM attached
+ * ice_xsk_any_rx_ring_ena - Checks if Rx rings have AF_XDP buff pool attached
* @vsi: VSI to be checked
*
- * Returns true if any of the Rx rings has an AF_XDP UMEM attached
+ * Returns true if any of the Rx rings has an AF_XDP buff pool attached
*/
bool ice_xsk_any_rx_ring_ena(struct ice_vsi *vsi)
{
int i;
- if (!vsi->xsk_umems)
+ if (!vsi->xsk_pools)
return false;
- for (i = 0; i < vsi->num_xsk_umems; i++) {
- if (vsi->xsk_umems[i])
+ for (i = 0; i < vsi->num_xsk_pools; i++) {
+ if (vsi->xsk_pools[i])
return true;
}
@@ -854,7 +854,7 @@ bool ice_xsk_any_rx_ring_ena(struct ice_vsi *vsi)
}
/**
- * ice_xsk_clean_rx_ring - clean UMEM queues connected to a given Rx ring
+ * ice_xsk_clean_rx_ring - clean buffer pool queues connected to a given Rx ring
* @rx_ring: ring to be cleaned
*/
void ice_xsk_clean_rx_ring(struct ice_ring *rx_ring)
@@ -872,7 +872,7 @@ void ice_xsk_clean_rx_ring(struct ice_ring *rx_ring)
}
/**
- * ice_xsk_clean_xdp_ring - Clean the XDP Tx ring and its UMEM queues
+ * ice_xsk_clean_xdp_ring - Clean the XDP Tx ring and its buffer pool queues
* @xdp_ring: XDP_Tx ring
*/
void ice_xsk_clean_xdp_ring(struct ice_ring *xdp_ring)
@@ -896,5 +896,5 @@ void ice_xsk_clean_xdp_ring(struct ice_ring *xdp_ring)
}
if (xsk_frames)
- xsk_umem_complete_tx(xdp_ring->xsk_umem, xsk_frames);
+ xsk_tx_completed(xdp_ring->xsk_pool, xsk_frames);
}
diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.h b/drivers/net/ethernet/intel/ice/ice_xsk.h
index fc1a06b..fad7836 100644
--- a/drivers/net/ethernet/intel/ice/ice_xsk.h
+++ b/drivers/net/ethernet/intel/ice/ice_xsk.h
@@ -9,7 +9,8 @@
struct ice_vsi;
#ifdef CONFIG_XDP_SOCKETS
-int ice_xsk_umem_setup(struct ice_vsi *vsi, struct xdp_umem *umem, u16 qid);
+int ice_xsk_pool_setup(struct ice_vsi *vsi, struct xsk_buff_pool *pool,
+ u16 qid);
int ice_clean_rx_irq_zc(struct ice_ring *rx_ring, int budget);
bool ice_clean_tx_irq_zc(struct ice_ring *xdp_ring, int budget);
int ice_xsk_wakeup(struct net_device *netdev, u32 queue_id, u32 flags);
@@ -19,8 +20,8 @@ void ice_xsk_clean_rx_ring(struct ice_ring *rx_ring);
void ice_xsk_clean_xdp_ring(struct ice_ring *xdp_ring);
#else
static inline int
-ice_xsk_umem_setup(struct ice_vsi __always_unused *vsi,
- struct xdp_umem __always_unused *umem,
+ice_xsk_pool_setup(struct ice_vsi __always_unused *vsi,
+ struct xsk_buff_pool __always_unused *pool,
u16 __always_unused qid)
{
return -EOPNOTSUPP;
diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index d9c3a6b..e1e37d0 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -8046,10 +8046,7 @@ static struct sk_buff *igb_construct_skb(struct igb_ring *rx_ring,
struct sk_buff *skb;
/* prefetch first cache line of first page */
- prefetch(va);
-#if L1_CACHE_BYTES < 128
- prefetch(va + L1_CACHE_BYTES);
-#endif
+ net_prefetch(va);
/* allocate a skb to store the frags */
skb = napi_alloc_skb(&rx_ring->q_vector->napi, IGB_RX_HDR_LEN);
@@ -8103,10 +8100,7 @@ static struct sk_buff *igb_build_skb(struct igb_ring *rx_ring,
struct sk_buff *skb;
/* prefetch first cache line of first page */
- prefetch(va);
-#if L1_CACHE_BYTES < 128
- prefetch(va + L1_CACHE_BYTES);
-#endif
+ net_prefetch(va);
/* build an skb around the page buffer */
skb = build_skb(va - IGB_SKB_PAD, truesize);
diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index 9593aa4..c6968fd 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -1550,10 +1550,7 @@ static struct sk_buff *igc_build_skb(struct igc_ring *rx_ring,
struct sk_buff *skb;
/* prefetch first cache line of first page */
- prefetch(va);
-#if L1_CACHE_BYTES < 128
- prefetch(va + L1_CACHE_BYTES);
-#endif
+ net_prefetch(va);
/* build an skb around the page buffer */
skb = build_skb(va - IGC_SKB_PAD, truesize);
@@ -1589,10 +1586,7 @@ static struct sk_buff *igc_construct_skb(struct igc_ring *rx_ring,
struct sk_buff *skb;
/* prefetch first cache line of first page */
- prefetch(va);
-#if L1_CACHE_BYTES < 128
- prefetch(va + L1_CACHE_BYTES);
-#endif
+ net_prefetch(va);
/* allocate a skb to store the frags */
skb = napi_alloc_skb(&rx_ring->q_vector->napi, IGC_RX_HDR_LEN);
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
index 1e8a809..de0fc6e 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
@@ -350,7 +350,7 @@ struct ixgbe_ring {
struct ixgbe_rx_queue_stats rx_stats;
};
struct xdp_rxq_info xdp_rxq;
- struct xdp_umem *xsk_umem;
+ struct xsk_buff_pool *xsk_pool;
u16 ring_idx; /* {rx,tx,xdp}_ring back reference idx */
u16 rx_buf_len;
} ____cacheline_internodealigned_in_smp;
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 2f8a4cf..0b675c3 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -2095,10 +2095,8 @@ static struct sk_buff *ixgbe_construct_skb(struct ixgbe_ring *rx_ring,
struct sk_buff *skb;
/* prefetch first cache line of first page */
- prefetch(xdp->data);
-#if L1_CACHE_BYTES < 128
- prefetch(xdp->data + L1_CACHE_BYTES);
-#endif
+ net_prefetch(xdp->data);
+
/* Note, we get here by enabling legacy-rx via:
*
* ethtool --set-priv-flags <dev> legacy-rx on
@@ -2161,10 +2159,7 @@ static struct sk_buff *ixgbe_build_skb(struct ixgbe_ring *rx_ring,
* likely have a consumer accessing first few bytes of meta
* data, and then actual data.
*/
- prefetch(xdp->data_meta);
-#if L1_CACHE_BYTES < 128
- prefetch(xdp->data_meta + L1_CACHE_BYTES);
-#endif
+ net_prefetch(xdp->data_meta);
/* build an skb to around the page buffer */
skb = build_skb(xdp->data_hard_start, truesize);
@@ -3156,7 +3151,7 @@ int ixgbe_poll(struct napi_struct *napi, int budget)
#endif
ixgbe_for_each_ring(ring, q_vector->tx) {
- bool wd = ring->xsk_umem ?
+ bool wd = ring->xsk_pool ?
ixgbe_clean_xdp_tx_irq(q_vector, ring, budget) :
ixgbe_clean_tx_irq(q_vector, ring, budget);
@@ -3176,7 +3171,7 @@ int ixgbe_poll(struct napi_struct *napi, int budget)
per_ring_budget = budget;
ixgbe_for_each_ring(ring, q_vector->rx) {
- int cleaned = ring->xsk_umem ?
+ int cleaned = ring->xsk_pool ?
ixgbe_clean_rx_irq_zc(q_vector, ring,
per_ring_budget) :
ixgbe_clean_rx_irq(q_vector, ring,
@@ -3471,9 +3466,9 @@ void ixgbe_configure_tx_ring(struct ixgbe_adapter *adapter,
u32 txdctl = IXGBE_TXDCTL_ENABLE;
u8 reg_idx = ring->reg_idx;
- ring->xsk_umem = NULL;
+ ring->xsk_pool = NULL;
if (ring_is_xdp(ring))
- ring->xsk_umem = ixgbe_xsk_umem(adapter, ring);
+ ring->xsk_pool = ixgbe_xsk_pool(adapter, ring);
/* disable queue to avoid issues while updating state */
IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(reg_idx), 0);
@@ -3713,8 +3708,8 @@ static void ixgbe_configure_srrctl(struct ixgbe_adapter *adapter,
srrctl = IXGBE_RX_HDR_SIZE << IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT;
/* configure the packet buffer length */
- if (rx_ring->xsk_umem) {
- u32 xsk_buf_len = xsk_umem_get_rx_frame_size(rx_ring->xsk_umem);
+ if (rx_ring->xsk_pool) {
+ u32 xsk_buf_len = xsk_pool_get_rx_frame_size(rx_ring->xsk_pool);
/* If the MAC support setting RXDCTL.RLPML, the
* SRRCTL[n].BSIZEPKT is set to PAGE_SIZE and
@@ -4059,12 +4054,12 @@ void ixgbe_configure_rx_ring(struct ixgbe_adapter *adapter,
u8 reg_idx = ring->reg_idx;
xdp_rxq_info_unreg_mem_model(&ring->xdp_rxq);
- ring->xsk_umem = ixgbe_xsk_umem(adapter, ring);
- if (ring->xsk_umem) {
+ ring->xsk_pool = ixgbe_xsk_pool(adapter, ring);
+ if (ring->xsk_pool) {
WARN_ON(xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
MEM_TYPE_XSK_BUFF_POOL,
NULL));
- xsk_buff_set_rxq_info(ring->xsk_umem, &ring->xdp_rxq);
+ xsk_pool_set_rxq_info(ring->xsk_pool, &ring->xdp_rxq);
} else {
WARN_ON(xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
MEM_TYPE_PAGE_SHARED, NULL));
@@ -4119,8 +4114,8 @@ void ixgbe_configure_rx_ring(struct ixgbe_adapter *adapter,
#endif
}
- if (ring->xsk_umem && hw->mac.type != ixgbe_mac_82599EB) {
- u32 xsk_buf_len = xsk_umem_get_rx_frame_size(ring->xsk_umem);
+ if (ring->xsk_pool && hw->mac.type != ixgbe_mac_82599EB) {
+ u32 xsk_buf_len = xsk_pool_get_rx_frame_size(ring->xsk_pool);
rxdctl &= ~(IXGBE_RXDCTL_RLPMLMASK |
IXGBE_RXDCTL_RLPML_EN);
@@ -4142,7 +4137,7 @@ void ixgbe_configure_rx_ring(struct ixgbe_adapter *adapter,
IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(reg_idx), rxdctl);
ixgbe_rx_desc_queue_enable(adapter, ring);
- if (ring->xsk_umem)
+ if (ring->xsk_pool)
ixgbe_alloc_rx_buffers_zc(ring, ixgbe_desc_unused(ring));
else
ixgbe_alloc_rx_buffers(ring, ixgbe_desc_unused(ring));
@@ -5292,7 +5287,7 @@ static void ixgbe_clean_rx_ring(struct ixgbe_ring *rx_ring)
u16 i = rx_ring->next_to_clean;
struct ixgbe_rx_buffer *rx_buffer = &rx_ring->rx_buffer_info[i];
- if (rx_ring->xsk_umem) {
+ if (rx_ring->xsk_pool) {
ixgbe_xsk_clean_rx_ring(rx_ring);
goto skip_free;
}
@@ -5984,7 +5979,7 @@ static void ixgbe_clean_tx_ring(struct ixgbe_ring *tx_ring)
u16 i = tx_ring->next_to_clean;
struct ixgbe_tx_buffer *tx_buffer = &tx_ring->tx_buffer_info[i];
- if (tx_ring->xsk_umem) {
+ if (tx_ring->xsk_pool) {
ixgbe_xsk_clean_tx_ring(tx_ring);
goto out;
}
@@ -10146,7 +10141,7 @@ static int ixgbe_xdp_setup(struct net_device *dev, struct bpf_prog *prog)
*/
if (need_reset && prog)
for (i = 0; i < adapter->num_rx_queues; i++)
- if (adapter->xdp_ring[i]->xsk_umem)
+ if (adapter->xdp_ring[i]->xsk_pool)
(void)ixgbe_xsk_wakeup(adapter->netdev, i,
XDP_WAKEUP_RX);
@@ -10160,8 +10155,8 @@ static int ixgbe_xdp(struct net_device *dev, struct netdev_bpf *xdp)
switch (xdp->command) {
case XDP_SETUP_PROG:
return ixgbe_xdp_setup(dev, xdp->prog);
- case XDP_SETUP_XSK_UMEM:
- return ixgbe_xsk_umem_setup(adapter, xdp->xsk.umem,
+ case XDP_SETUP_XSK_POOL:
+ return ixgbe_xsk_pool_setup(adapter, xdp->xsk.pool,
xdp->xsk.queue_id);
default:
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_txrx_common.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_txrx_common.h
index 7887ae4..2aeec78 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_txrx_common.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_txrx_common.h
@@ -28,9 +28,10 @@ void ixgbe_irq_rearm_queues(struct ixgbe_adapter *adapter, u64 qmask);
void ixgbe_txrx_ring_disable(struct ixgbe_adapter *adapter, int ring);
void ixgbe_txrx_ring_enable(struct ixgbe_adapter *adapter, int ring);
-struct xdp_umem *ixgbe_xsk_umem(struct ixgbe_adapter *adapter,
- struct ixgbe_ring *ring);
-int ixgbe_xsk_umem_setup(struct ixgbe_adapter *adapter, struct xdp_umem *umem,
+struct xsk_buff_pool *ixgbe_xsk_pool(struct ixgbe_adapter *adapter,
+ struct ixgbe_ring *ring);
+int ixgbe_xsk_pool_setup(struct ixgbe_adapter *adapter,
+ struct xsk_buff_pool *pool,
u16 qid);
void ixgbe_zca_free(struct zero_copy_allocator *alloc, unsigned long handle);
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c
index ec7121f..3771857 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c
@@ -8,8 +8,8 @@
#include "ixgbe.h"
#include "ixgbe_txrx_common.h"
-struct xdp_umem *ixgbe_xsk_umem(struct ixgbe_adapter *adapter,
- struct ixgbe_ring *ring)
+struct xsk_buff_pool *ixgbe_xsk_pool(struct ixgbe_adapter *adapter,
+ struct ixgbe_ring *ring)
{
bool xdp_on = READ_ONCE(adapter->xdp_prog);
int qid = ring->ring_idx;
@@ -17,11 +17,11 @@ struct xdp_umem *ixgbe_xsk_umem(struct ixgbe_adapter *adapter,
if (!xdp_on || !test_bit(qid, adapter->af_xdp_zc_qps))
return NULL;
- return xdp_get_umem_from_qid(adapter->netdev, qid);
+ return xsk_get_pool_from_qid(adapter->netdev, qid);
}
-static int ixgbe_xsk_umem_enable(struct ixgbe_adapter *adapter,
- struct xdp_umem *umem,
+static int ixgbe_xsk_pool_enable(struct ixgbe_adapter *adapter,
+ struct xsk_buff_pool *pool,
u16 qid)
{
struct net_device *netdev = adapter->netdev;
@@ -35,7 +35,7 @@ static int ixgbe_xsk_umem_enable(struct ixgbe_adapter *adapter,
qid >= netdev->real_num_tx_queues)
return -EINVAL;
- err = xsk_buff_dma_map(umem, &adapter->pdev->dev, IXGBE_RX_DMA_ATTR);
+ err = xsk_pool_dma_map(pool, &adapter->pdev->dev, IXGBE_RX_DMA_ATTR);
if (err)
return err;
@@ -59,13 +59,13 @@ static int ixgbe_xsk_umem_enable(struct ixgbe_adapter *adapter,
return 0;
}
-static int ixgbe_xsk_umem_disable(struct ixgbe_adapter *adapter, u16 qid)
+static int ixgbe_xsk_pool_disable(struct ixgbe_adapter *adapter, u16 qid)
{
- struct xdp_umem *umem;
+ struct xsk_buff_pool *pool;
bool if_running;
- umem = xdp_get_umem_from_qid(adapter->netdev, qid);
- if (!umem)
+ pool = xsk_get_pool_from_qid(adapter->netdev, qid);
+ if (!pool)
return -EINVAL;
if_running = netif_running(adapter->netdev) &&
@@ -75,7 +75,7 @@ static int ixgbe_xsk_umem_disable(struct ixgbe_adapter *adapter, u16 qid)
ixgbe_txrx_ring_disable(adapter, qid);
clear_bit(qid, adapter->af_xdp_zc_qps);
- xsk_buff_dma_unmap(umem, IXGBE_RX_DMA_ATTR);
+ xsk_pool_dma_unmap(pool, IXGBE_RX_DMA_ATTR);
if (if_running)
ixgbe_txrx_ring_enable(adapter, qid);
@@ -83,11 +83,12 @@ static int ixgbe_xsk_umem_disable(struct ixgbe_adapter *adapter, u16 qid)
return 0;
}
-int ixgbe_xsk_umem_setup(struct ixgbe_adapter *adapter, struct xdp_umem *umem,
+int ixgbe_xsk_pool_setup(struct ixgbe_adapter *adapter,
+ struct xsk_buff_pool *pool,
u16 qid)
{
- return umem ? ixgbe_xsk_umem_enable(adapter, umem, qid) :
- ixgbe_xsk_umem_disable(adapter, qid);
+ return pool ? ixgbe_xsk_pool_enable(adapter, pool, qid) :
+ ixgbe_xsk_pool_disable(adapter, qid);
}
static int ixgbe_run_xdp_zc(struct ixgbe_adapter *adapter,
@@ -149,7 +150,7 @@ bool ixgbe_alloc_rx_buffers_zc(struct ixgbe_ring *rx_ring, u16 count)
i -= rx_ring->count;
do {
- bi->xdp = xsk_buff_alloc(rx_ring->xsk_umem);
+ bi->xdp = xsk_buff_alloc(rx_ring->xsk_pool);
if (!bi->xdp) {
ok = false;
break;
@@ -286,7 +287,7 @@ int ixgbe_clean_rx_irq_zc(struct ixgbe_q_vector *q_vector,
}
bi->xdp->data_end = bi->xdp->data + size;
- xsk_buff_dma_sync_for_cpu(bi->xdp);
+ xsk_buff_dma_sync_for_cpu(bi->xdp, rx_ring->xsk_pool);
xdp_res = ixgbe_run_xdp_zc(adapter, rx_ring, bi->xdp);
if (xdp_res) {
@@ -344,11 +345,11 @@ int ixgbe_clean_rx_irq_zc(struct ixgbe_q_vector *q_vector,
q_vector->rx.total_packets += total_rx_packets;
q_vector->rx.total_bytes += total_rx_bytes;
- if (xsk_umem_uses_need_wakeup(rx_ring->xsk_umem)) {
+ if (xsk_uses_need_wakeup(rx_ring->xsk_pool)) {
if (failure || rx_ring->next_to_clean == rx_ring->next_to_use)
- xsk_set_rx_need_wakeup(rx_ring->xsk_umem);
+ xsk_set_rx_need_wakeup(rx_ring->xsk_pool);
else
- xsk_clear_rx_need_wakeup(rx_ring->xsk_umem);
+ xsk_clear_rx_need_wakeup(rx_ring->xsk_pool);
return (int)total_rx_packets;
}
@@ -373,6 +374,7 @@ void ixgbe_xsk_clean_rx_ring(struct ixgbe_ring *rx_ring)
static bool ixgbe_xmit_zc(struct ixgbe_ring *xdp_ring, unsigned int budget)
{
+ struct xsk_buff_pool *pool = xdp_ring->xsk_pool;
union ixgbe_adv_tx_desc *tx_desc = NULL;
struct ixgbe_tx_buffer *tx_bi;
bool work_done = true;
@@ -387,12 +389,11 @@ static bool ixgbe_xmit_zc(struct ixgbe_ring *xdp_ring, unsigned int budget)
break;
}
- if (!xsk_umem_consume_tx(xdp_ring->xsk_umem, &desc))
+ if (!xsk_tx_peek_desc(pool, &desc))
break;
- dma = xsk_buff_raw_get_dma(xdp_ring->xsk_umem, desc.addr);
- xsk_buff_raw_dma_sync_for_device(xdp_ring->xsk_umem, dma,
- desc.len);
+ dma = xsk_buff_raw_get_dma(pool, desc.addr);
+ xsk_buff_raw_dma_sync_for_device(pool, dma, desc.len);
tx_bi = &xdp_ring->tx_buffer_info[xdp_ring->next_to_use];
tx_bi->bytecount = desc.len;
@@ -418,7 +419,7 @@ static bool ixgbe_xmit_zc(struct ixgbe_ring *xdp_ring, unsigned int budget)
if (tx_desc) {
ixgbe_xdp_ring_update_tail(xdp_ring);
- xsk_umem_consume_tx_done(xdp_ring->xsk_umem);
+ xsk_tx_release(pool);
}
return !!budget && work_done;
@@ -439,7 +440,7 @@ bool ixgbe_clean_xdp_tx_irq(struct ixgbe_q_vector *q_vector,
{
u16 ntc = tx_ring->next_to_clean, ntu = tx_ring->next_to_use;
unsigned int total_packets = 0, total_bytes = 0;
- struct xdp_umem *umem = tx_ring->xsk_umem;
+ struct xsk_buff_pool *pool = tx_ring->xsk_pool;
union ixgbe_adv_tx_desc *tx_desc;
struct ixgbe_tx_buffer *tx_bi;
u32 xsk_frames = 0;
@@ -484,10 +485,10 @@ bool ixgbe_clean_xdp_tx_irq(struct ixgbe_q_vector *q_vector,
q_vector->tx.total_packets += total_packets;
if (xsk_frames)
- xsk_umem_complete_tx(umem, xsk_frames);
+ xsk_tx_completed(pool, xsk_frames);
- if (xsk_umem_uses_need_wakeup(tx_ring->xsk_umem))
- xsk_set_tx_need_wakeup(tx_ring->xsk_umem);
+ if (xsk_uses_need_wakeup(pool))
+ xsk_set_tx_need_wakeup(pool);
return ixgbe_xmit_zc(tx_ring, q_vector->tx.work_limit);
}
@@ -511,7 +512,7 @@ int ixgbe_xsk_wakeup(struct net_device *dev, u32 qid, u32 flags)
if (test_bit(__IXGBE_TX_DISABLED, &ring->state))
return -ENETDOWN;
- if (!ring->xsk_umem)
+ if (!ring->xsk_pool)
return -ENXIO;
if (!napi_if_scheduled_mark_missed(&ring->q_vector->napi)) {
@@ -526,7 +527,7 @@ int ixgbe_xsk_wakeup(struct net_device *dev, u32 qid, u32 flags)
void ixgbe_xsk_clean_tx_ring(struct ixgbe_ring *tx_ring)
{
u16 ntc = tx_ring->next_to_clean, ntu = tx_ring->next_to_use;
- struct xdp_umem *umem = tx_ring->xsk_umem;
+ struct xsk_buff_pool *pool = tx_ring->xsk_pool;
struct ixgbe_tx_buffer *tx_bi;
u32 xsk_frames = 0;
@@ -546,5 +547,5 @@ void ixgbe_xsk_clean_tx_ring(struct ixgbe_ring *tx_ring)
}
if (xsk_frames)
- xsk_umem_complete_tx(umem, xsk_frames);
+ xsk_tx_completed(pool, xsk_frames);
}
diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
index a428113..50afec4 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
@@ -866,10 +866,8 @@ struct sk_buff *ixgbevf_construct_skb(struct ixgbevf_ring *rx_ring,
struct sk_buff *skb;
/* prefetch first cache line of first page */
- prefetch(xdp->data);
-#if L1_CACHE_BYTES < 128
- prefetch(xdp->data + L1_CACHE_BYTES);
-#endif
+ net_prefetch(xdp->data);
+
/* Note, we get here by enabling legacy-rx via:
*
* ethtool --set-priv-flags <dev> legacy-rx on
@@ -947,10 +945,7 @@ static struct sk_buff *ixgbevf_build_skb(struct ixgbevf_ring *rx_ring,
* have a consumer accessing first few bytes of meta data,
* and then actual data.
*/
- prefetch(xdp->data_meta);
-#if L1_CACHE_BYTES < 128
- prefetch(xdp->data_meta + L1_CACHE_BYTES);
-#endif
+ net_prefetch(xdp->data_meta);
/* build an skb around the page buffer */
skb = build_skb(xdp->data_hard_start, truesize);
diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2.h b/drivers/net/ethernet/marvell/mvpp2/mvpp2.h
index 32753cc..ecb5f46 100644
--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2.h
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2.h
@@ -967,6 +967,7 @@ struct mvpp2_port {
phy_interface_t phy_interface;
struct phylink *phylink;
struct phylink_config phylink_config;
+ struct phylink_pcs phylink_pcs;
struct phy *comphy;
struct mvpp2_bm_pool *pool_long;
diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
index 6e140d1b..ee8b6a9 100644
--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
@@ -57,13 +57,7 @@ static struct {
/* The prototype is added here to be used in start_dev when using ACPI. This
* will be removed once phylink is used for all modes (dt+ACPI).
*/
-static void mvpp2_mac_config(struct phylink_config *config, unsigned int mode,
- const struct phylink_link_state *state);
-static void mvpp2_mac_link_up(struct phylink_config *config,
- struct phy_device *phy,
- unsigned int mode, phy_interface_t interface,
- int speed, int duplex,
- bool tx_pause, bool rx_pause);
+static void mvpp2_acpi_start(struct mvpp2_port *port);
/* Queue modes */
#define MVPP2_QDIST_SINGLE_MODE 0
@@ -1485,8 +1479,8 @@ static void mvpp2_port_loopback_set(struct mvpp2_port *port,
else
val &= ~MVPP2_GMAC_GMII_LB_EN_MASK;
- if (phy_interface_mode_is_8023z(port->phy_interface) ||
- port->phy_interface == PHY_INTERFACE_MODE_SGMII)
+ if (phy_interface_mode_is_8023z(state->interface) ||
+ state->interface == PHY_INTERFACE_MODE_SGMII)
val |= MVPP2_GMAC_PCS_LB_EN_MASK;
else
val &= ~MVPP2_GMAC_PCS_LB_EN_MASK;
@@ -4007,17 +4001,7 @@ static void mvpp2_start_dev(struct mvpp2_port *port)
if (port->phylink) {
phylink_start(port->phylink);
} else {
- /* Phylink isn't used as of now for ACPI, so the MAC has to be
- * configured manually when the interface is started. This will
- * be removed as soon as the phylink ACPI support lands in.
- */
- struct phylink_link_state state = {
- .interface = port->phy_interface,
- };
- mvpp2_mac_config(&port->phylink_config, MLO_AN_INBAND, &state);
- mvpp2_mac_link_up(&port->phylink_config, NULL,
- MLO_AN_INBAND, port->phy_interface,
- SPEED_UNKNOWN, DUPLEX_UNKNOWN, false, false);
+ mvpp2_acpi_start(port);
}
netif_tx_start_all_queues(port->dev);
@@ -5392,6 +5376,155 @@ static struct mvpp2_port *mvpp2_phylink_to_port(struct phylink_config *config)
return container_of(config, struct mvpp2_port, phylink_config);
}
+static struct mvpp2_port *mvpp2_pcs_to_port(struct phylink_pcs *pcs)
+{
+ return container_of(pcs, struct mvpp2_port, phylink_pcs);
+}
+
+static void mvpp2_xlg_pcs_get_state(struct phylink_pcs *pcs,
+ struct phylink_link_state *state)
+{
+ struct mvpp2_port *port = mvpp2_pcs_to_port(pcs);
+ u32 val;
+
+ state->speed = SPEED_10000;
+ state->duplex = 1;
+ state->an_complete = 1;
+
+ val = readl(port->base + MVPP22_XLG_STATUS);
+ state->link = !!(val & MVPP22_XLG_STATUS_LINK_UP);
+
+ state->pause = 0;
+ val = readl(port->base + MVPP22_XLG_CTRL0_REG);
+ if (val & MVPP22_XLG_CTRL0_TX_FLOW_CTRL_EN)
+ state->pause |= MLO_PAUSE_TX;
+ if (val & MVPP22_XLG_CTRL0_RX_FLOW_CTRL_EN)
+ state->pause |= MLO_PAUSE_RX;
+}
+
+static int mvpp2_xlg_pcs_config(struct phylink_pcs *pcs,
+ unsigned int mode,
+ phy_interface_t interface,
+ const unsigned long *advertising,
+ bool permit_pause_to_mac)
+{
+ return 0;
+}
+
+static const struct phylink_pcs_ops mvpp2_phylink_xlg_pcs_ops = {
+ .pcs_get_state = mvpp2_xlg_pcs_get_state,
+ .pcs_config = mvpp2_xlg_pcs_config,
+};
+
+static void mvpp2_gmac_pcs_get_state(struct phylink_pcs *pcs,
+ struct phylink_link_state *state)
+{
+ struct mvpp2_port *port = mvpp2_pcs_to_port(pcs);
+ u32 val;
+
+ val = readl(port->base + MVPP2_GMAC_STATUS0);
+
+ state->an_complete = !!(val & MVPP2_GMAC_STATUS0_AN_COMPLETE);
+ state->link = !!(val & MVPP2_GMAC_STATUS0_LINK_UP);
+ state->duplex = !!(val & MVPP2_GMAC_STATUS0_FULL_DUPLEX);
+
+ switch (port->phy_interface) {
+ case PHY_INTERFACE_MODE_1000BASEX:
+ state->speed = SPEED_1000;
+ break;
+ case PHY_INTERFACE_MODE_2500BASEX:
+ state->speed = SPEED_2500;
+ break;
+ default:
+ if (val & MVPP2_GMAC_STATUS0_GMII_SPEED)
+ state->speed = SPEED_1000;
+ else if (val & MVPP2_GMAC_STATUS0_MII_SPEED)
+ state->speed = SPEED_100;
+ else
+ state->speed = SPEED_10;
+ }
+
+ state->pause = 0;
+ if (val & MVPP2_GMAC_STATUS0_RX_PAUSE)
+ state->pause |= MLO_PAUSE_RX;
+ if (val & MVPP2_GMAC_STATUS0_TX_PAUSE)
+ state->pause |= MLO_PAUSE_TX;
+}
+
+static int mvpp2_gmac_pcs_config(struct phylink_pcs *pcs, unsigned int mode,
+ phy_interface_t interface,
+ const unsigned long *advertising,
+ bool permit_pause_to_mac)
+{
+ struct mvpp2_port *port = mvpp2_pcs_to_port(pcs);
+ u32 mask, val, an, old_an, changed;
+
+ mask = MVPP2_GMAC_IN_BAND_AUTONEG_BYPASS |
+ MVPP2_GMAC_IN_BAND_AUTONEG |
+ MVPP2_GMAC_AN_SPEED_EN |
+ MVPP2_GMAC_FLOW_CTRL_AUTONEG |
+ MVPP2_GMAC_AN_DUPLEX_EN;
+
+ if (phylink_autoneg_inband(mode)) {
+ mask |= MVPP2_GMAC_CONFIG_MII_SPEED |
+ MVPP2_GMAC_CONFIG_GMII_SPEED |
+ MVPP2_GMAC_CONFIG_FULL_DUPLEX;
+ val = MVPP2_GMAC_IN_BAND_AUTONEG;
+
+ if (interface == PHY_INTERFACE_MODE_SGMII) {
+ /* SGMII mode receives the speed and duplex from PHY */
+ val |= MVPP2_GMAC_AN_SPEED_EN |
+ MVPP2_GMAC_AN_DUPLEX_EN;
+ } else {
+ /* 802.3z mode has fixed speed and duplex */
+ val |= MVPP2_GMAC_CONFIG_GMII_SPEED |
+ MVPP2_GMAC_CONFIG_FULL_DUPLEX;
+
+ /* The FLOW_CTRL_AUTONEG bit selects either the hardware
+ * automatically or the bits in MVPP22_GMAC_CTRL_4_REG
+ * manually controls the GMAC pause modes.
+ */
+ if (permit_pause_to_mac)
+ val |= MVPP2_GMAC_FLOW_CTRL_AUTONEG;
+
+ /* Configure advertisement bits */
+ mask |= MVPP2_GMAC_FC_ADV_EN | MVPP2_GMAC_FC_ADV_ASM_EN;
+ if (phylink_test(advertising, Pause))
+ val |= MVPP2_GMAC_FC_ADV_EN;
+ if (phylink_test(advertising, Asym_Pause))
+ val |= MVPP2_GMAC_FC_ADV_ASM_EN;
+ }
+ } else {
+ val = 0;
+ }
+
+ old_an = an = readl(port->base + MVPP2_GMAC_AUTONEG_CONFIG);
+ an = (an & ~mask) | val;
+ changed = an ^ old_an;
+ if (changed)
+ writel(an, port->base + MVPP2_GMAC_AUTONEG_CONFIG);
+
+ /* We are only interested in the advertisement bits changing */
+ return changed & (MVPP2_GMAC_FC_ADV_EN | MVPP2_GMAC_FC_ADV_ASM_EN);
+}
+
+static void mvpp2_gmac_pcs_an_restart(struct phylink_pcs *pcs)
+{
+ struct mvpp2_port *port = mvpp2_pcs_to_port(pcs);
+ u32 val = readl(port->base + MVPP2_GMAC_AUTONEG_CONFIG);
+
+ writel(val | MVPP2_GMAC_IN_BAND_RESTART_AN,
+ port->base + MVPP2_GMAC_AUTONEG_CONFIG);
+ writel(val & ~MVPP2_GMAC_IN_BAND_RESTART_AN,
+ port->base + MVPP2_GMAC_AUTONEG_CONFIG);
+}
+
+static const struct phylink_pcs_ops mvpp2_phylink_gmac_pcs_ops = {
+ .pcs_get_state = mvpp2_gmac_pcs_get_state,
+ .pcs_config = mvpp2_gmac_pcs_config,
+ .pcs_an_restart = mvpp2_gmac_pcs_an_restart,
+};
+
static void mvpp2_phylink_validate(struct phylink_config *config,
unsigned long *supported,
struct phylink_link_state *state)
@@ -5480,89 +5613,6 @@ static void mvpp2_phylink_validate(struct phylink_config *config,
bitmap_zero(supported, __ETHTOOL_LINK_MODE_MASK_NBITS);
}
-static void mvpp22_xlg_pcs_get_state(struct mvpp2_port *port,
- struct phylink_link_state *state)
-{
- u32 val;
-
- state->speed = SPEED_10000;
- state->duplex = 1;
- state->an_complete = 1;
-
- val = readl(port->base + MVPP22_XLG_STATUS);
- state->link = !!(val & MVPP22_XLG_STATUS_LINK_UP);
-
- state->pause = 0;
- val = readl(port->base + MVPP22_XLG_CTRL0_REG);
- if (val & MVPP22_XLG_CTRL0_TX_FLOW_CTRL_EN)
- state->pause |= MLO_PAUSE_TX;
- if (val & MVPP22_XLG_CTRL0_RX_FLOW_CTRL_EN)
- state->pause |= MLO_PAUSE_RX;
-}
-
-static void mvpp2_gmac_pcs_get_state(struct mvpp2_port *port,
- struct phylink_link_state *state)
-{
- u32 val;
-
- val = readl(port->base + MVPP2_GMAC_STATUS0);
-
- state->an_complete = !!(val & MVPP2_GMAC_STATUS0_AN_COMPLETE);
- state->link = !!(val & MVPP2_GMAC_STATUS0_LINK_UP);
- state->duplex = !!(val & MVPP2_GMAC_STATUS0_FULL_DUPLEX);
-
- switch (port->phy_interface) {
- case PHY_INTERFACE_MODE_1000BASEX:
- state->speed = SPEED_1000;
- break;
- case PHY_INTERFACE_MODE_2500BASEX:
- state->speed = SPEED_2500;
- break;
- default:
- if (val & MVPP2_GMAC_STATUS0_GMII_SPEED)
- state->speed = SPEED_1000;
- else if (val & MVPP2_GMAC_STATUS0_MII_SPEED)
- state->speed = SPEED_100;
- else
- state->speed = SPEED_10;
- }
-
- state->pause = 0;
- if (val & MVPP2_GMAC_STATUS0_RX_PAUSE)
- state->pause |= MLO_PAUSE_RX;
- if (val & MVPP2_GMAC_STATUS0_TX_PAUSE)
- state->pause |= MLO_PAUSE_TX;
-}
-
-static void mvpp2_phylink_mac_pcs_get_state(struct phylink_config *config,
- struct phylink_link_state *state)
-{
- struct mvpp2_port *port = mvpp2_phylink_to_port(config);
-
- if (port->priv->hw_version == MVPP22 && port->gop_id == 0) {
- u32 mode = readl(port->base + MVPP22_XLG_CTRL3_REG);
- mode &= MVPP22_XLG_CTRL3_MACMODESELECT_MASK;
-
- if (mode == MVPP22_XLG_CTRL3_MACMODESELECT_10G) {
- mvpp22_xlg_pcs_get_state(port, state);
- return;
- }
- }
-
- mvpp2_gmac_pcs_get_state(port, state);
-}
-
-static void mvpp2_mac_an_restart(struct phylink_config *config)
-{
- struct mvpp2_port *port = mvpp2_phylink_to_port(config);
- u32 val = readl(port->base + MVPP2_GMAC_AUTONEG_CONFIG);
-
- writel(val | MVPP2_GMAC_IN_BAND_RESTART_AN,
- port->base + MVPP2_GMAC_AUTONEG_CONFIG);
- writel(val & ~MVPP2_GMAC_IN_BAND_RESTART_AN,
- port->base + MVPP2_GMAC_AUTONEG_CONFIG);
-}
-
static void mvpp2_xlg_config(struct mvpp2_port *port, unsigned int mode,
const struct phylink_link_state *state)
{
@@ -5586,23 +5636,16 @@ static void mvpp2_xlg_config(struct mvpp2_port *port, unsigned int mode,
static void mvpp2_gmac_config(struct mvpp2_port *port, unsigned int mode,
const struct phylink_link_state *state)
{
- u32 old_an, an;
u32 old_ctrl0, ctrl0;
u32 old_ctrl2, ctrl2;
u32 old_ctrl4, ctrl4;
- old_an = an = readl(port->base + MVPP2_GMAC_AUTONEG_CONFIG);
old_ctrl0 = ctrl0 = readl(port->base + MVPP2_GMAC_CTRL_0_REG);
old_ctrl2 = ctrl2 = readl(port->base + MVPP2_GMAC_CTRL_2_REG);
old_ctrl4 = ctrl4 = readl(port->base + MVPP22_GMAC_CTRL_4_REG);
- an &= ~(MVPP2_GMAC_AN_SPEED_EN | MVPP2_GMAC_FC_ADV_EN |
- MVPP2_GMAC_FC_ADV_ASM_EN | MVPP2_GMAC_FLOW_CTRL_AUTONEG |
- MVPP2_GMAC_AN_DUPLEX_EN | MVPP2_GMAC_IN_BAND_AUTONEG |
- MVPP2_GMAC_IN_BAND_AUTONEG_BYPASS);
ctrl0 &= ~MVPP2_GMAC_PORT_TYPE_MASK;
- ctrl2 &= ~(MVPP2_GMAC_INBAND_AN_MASK | MVPP2_GMAC_PORT_RESET_MASK |
- MVPP2_GMAC_PCS_ENABLE_MASK);
+ ctrl2 &= ~(MVPP2_GMAC_INBAND_AN_MASK | MVPP2_GMAC_PCS_ENABLE_MASK);
/* Configure port type */
if (phy_interface_mode_is_8023z(state->interface)) {
@@ -5624,12 +5667,6 @@ static void mvpp2_gmac_config(struct mvpp2_port *port, unsigned int mode,
MVPP22_CTRL4_QSGMII_BYPASS_ACTIVE;
}
- /* Configure advertisement bits */
- if (phylink_test(state->advertising, Pause))
- an |= MVPP2_GMAC_FC_ADV_EN;
- if (phylink_test(state->advertising, Asym_Pause))
- an |= MVPP2_GMAC_FC_ADV_ASM_EN;
-
/* Configure negotiation style */
if (!phylink_autoneg_inband(mode)) {
/* Phy or fixed speed - no in-band AN, nothing to do, leave the
@@ -5638,14 +5675,6 @@ static void mvpp2_gmac_config(struct mvpp2_port *port, unsigned int mode,
} else if (state->interface == PHY_INTERFACE_MODE_SGMII) {
/* SGMII in-band mode receives the speed and duplex from
* the PHY. Flow control information is not received. */
- an &= ~(MVPP2_GMAC_FORCE_LINK_DOWN |
- MVPP2_GMAC_FORCE_LINK_PASS |
- MVPP2_GMAC_CONFIG_MII_SPEED |
- MVPP2_GMAC_CONFIG_GMII_SPEED |
- MVPP2_GMAC_CONFIG_FULL_DUPLEX);
- an |= MVPP2_GMAC_IN_BAND_AUTONEG |
- MVPP2_GMAC_AN_SPEED_EN |
- MVPP2_GMAC_AN_DUPLEX_EN;
} else if (phy_interface_mode_is_8023z(state->interface)) {
/* 1000BaseX and 2500BaseX ports cannot negotiate speed nor can
* they negotiate duplex: they are always operating with a fixed
@@ -5653,42 +5682,6 @@ static void mvpp2_gmac_config(struct mvpp2_port *port, unsigned int mode,
* speed and full duplex here.
*/
ctrl0 |= MVPP2_GMAC_PORT_TYPE_MASK;
- an &= ~(MVPP2_GMAC_FORCE_LINK_DOWN |
- MVPP2_GMAC_FORCE_LINK_PASS |
- MVPP2_GMAC_CONFIG_MII_SPEED |
- MVPP2_GMAC_CONFIG_GMII_SPEED |
- MVPP2_GMAC_CONFIG_FULL_DUPLEX);
- an |= MVPP2_GMAC_IN_BAND_AUTONEG |
- MVPP2_GMAC_CONFIG_GMII_SPEED |
- MVPP2_GMAC_CONFIG_FULL_DUPLEX;
-
- if (state->pause & MLO_PAUSE_AN && state->an_enabled)
- an |= MVPP2_GMAC_FLOW_CTRL_AUTONEG;
- }
-
-/* Some fields of the auto-negotiation register require the port to be down when
- * their value is updated.
- */
-#define MVPP2_GMAC_AN_PORT_DOWN_MASK \
- (MVPP2_GMAC_IN_BAND_AUTONEG | \
- MVPP2_GMAC_IN_BAND_AUTONEG_BYPASS | \
- MVPP2_GMAC_CONFIG_MII_SPEED | MVPP2_GMAC_CONFIG_GMII_SPEED | \
- MVPP2_GMAC_AN_SPEED_EN | MVPP2_GMAC_CONFIG_FULL_DUPLEX | \
- MVPP2_GMAC_AN_DUPLEX_EN)
-
- if ((old_ctrl0 ^ ctrl0) & MVPP2_GMAC_PORT_TYPE_MASK ||
- (old_ctrl2 ^ ctrl2) & MVPP2_GMAC_INBAND_AN_MASK ||
- (old_an ^ an) & MVPP2_GMAC_AN_PORT_DOWN_MASK) {
- /* Force link down */
- old_an &= ~MVPP2_GMAC_FORCE_LINK_PASS;
- old_an |= MVPP2_GMAC_FORCE_LINK_DOWN;
- writel(old_an, port->base + MVPP2_GMAC_AUTONEG_CONFIG);
-
- /* Set the GMAC in a reset state - do this in a way that
- * ensures we clear it below.
- */
- old_ctrl2 |= MVPP2_GMAC_PORT_RESET_MASK;
- writel(old_ctrl2, port->base + MVPP2_GMAC_CTRL_2_REG);
}
if (old_ctrl0 != ctrl0)
@@ -5697,40 +5690,84 @@ static void mvpp2_gmac_config(struct mvpp2_port *port, unsigned int mode,
writel(ctrl2, port->base + MVPP2_GMAC_CTRL_2_REG);
if (old_ctrl4 != ctrl4)
writel(ctrl4, port->base + MVPP22_GMAC_CTRL_4_REG);
- if (old_an != an)
- writel(an, port->base + MVPP2_GMAC_AUTONEG_CONFIG);
+}
- if (old_ctrl2 & MVPP2_GMAC_PORT_RESET_MASK) {
- while (readl(port->base + MVPP2_GMAC_CTRL_2_REG) &
- MVPP2_GMAC_PORT_RESET_MASK)
- continue;
+static int mvpp2__mac_prepare(struct phylink_config *config, unsigned int mode,
+ phy_interface_t interface)
+{
+ struct mvpp2_port *port = mvpp2_phylink_to_port(config);
+
+ /* Check for invalid configuration */
+ if (mvpp2_is_xlg(interface) && port->gop_id != 0) {
+ netdev_err(port->dev, "Invalid mode on %s\n", port->dev->name);
+ return -EINVAL;
}
+
+ if (port->phy_interface != interface ||
+ phylink_autoneg_inband(mode)) {
+ /* Force the link down when changing the interface or if in
+ * in-band mode to ensure we do not change the configuration
+ * while the hardware is indicating link is up. We force both
+ * XLG and GMAC down to ensure that they're both in a known
+ * state.
+ */
+ mvpp2_modify(port->base + MVPP2_GMAC_AUTONEG_CONFIG,
+ MVPP2_GMAC_FORCE_LINK_PASS |
+ MVPP2_GMAC_FORCE_LINK_DOWN,
+ MVPP2_GMAC_FORCE_LINK_DOWN);
+
+ if (mvpp2_port_supports_xlg(port))
+ mvpp2_modify(port->base + MVPP22_XLG_CTRL0_REG,
+ MVPP22_XLG_CTRL0_FORCE_LINK_PASS |
+ MVPP22_XLG_CTRL0_FORCE_LINK_DOWN,
+ MVPP22_XLG_CTRL0_FORCE_LINK_DOWN);
+ }
+
+ /* Make sure the port is disabled when reconfiguring the mode */
+ mvpp2_port_disable(port);
+
+ if (port->phy_interface != interface) {
+ /* Place GMAC into reset */
+ mvpp2_modify(port->base + MVPP2_GMAC_CTRL_2_REG,
+ MVPP2_GMAC_PORT_RESET_MASK,
+ MVPP2_GMAC_PORT_RESET_MASK);
+
+ if (port->priv->hw_version == MVPP22) {
+ mvpp22_gop_mask_irq(port);
+
+ phy_power_off(port->comphy);
+ }
+ }
+
+ /* Select the appropriate PCS operations depending on the
+ * configured interface mode. We will only switch to a mode
+ * that the validate() checks have already passed.
+ */
+ if (mvpp2_is_xlg(interface))
+ port->phylink_pcs.ops = &mvpp2_phylink_xlg_pcs_ops;
+ else
+ port->phylink_pcs.ops = &mvpp2_phylink_gmac_pcs_ops;
+
+ return 0;
+}
+
+static int mvpp2_mac_prepare(struct phylink_config *config, unsigned int mode,
+ phy_interface_t interface)
+{
+ struct mvpp2_port *port = mvpp2_phylink_to_port(config);
+ int ret;
+
+ ret = mvpp2__mac_prepare(config, mode, interface);
+ if (ret == 0)
+ phylink_set_pcs(port->phylink, &port->phylink_pcs);
+
+ return ret;
}
static void mvpp2_mac_config(struct phylink_config *config, unsigned int mode,
const struct phylink_link_state *state)
{
struct mvpp2_port *port = mvpp2_phylink_to_port(config);
- bool change_interface = port->phy_interface != state->interface;
-
- /* Check for invalid configuration */
- if (mvpp2_is_xlg(state->interface) && port->gop_id != 0) {
- netdev_err(port->dev, "Invalid mode on %s\n", port->dev->name);
- return;
- }
-
- /* Make sure the port is disabled when reconfiguring the mode */
- mvpp2_port_disable(port);
-
- if (port->priv->hw_version == MVPP22 && change_interface) {
- mvpp22_gop_mask_irq(port);
-
- port->phy_interface = state->interface;
-
- /* Reconfigure the serdes lanes */
- phy_power_off(port->comphy);
- mvpp22_mode_reconfigure(port);
- }
/* mac (re)configuration */
if (mvpp2_is_xlg(state->interface))
@@ -5742,11 +5779,51 @@ static void mvpp2_mac_config(struct phylink_config *config, unsigned int mode,
if (port->priv->hw_version == MVPP21 && port->flags & MVPP2_F_LOOPBACK)
mvpp2_port_loopback_set(port, state);
+}
- if (port->priv->hw_version == MVPP22 && change_interface)
+static int mvpp2_mac_finish(struct phylink_config *config, unsigned int mode,
+ phy_interface_t interface)
+{
+ struct mvpp2_port *port = mvpp2_phylink_to_port(config);
+
+ if (port->priv->hw_version == MVPP22 &&
+ port->phy_interface != interface) {
+ port->phy_interface = interface;
+
+ /* Reconfigure the serdes lanes */
+ mvpp22_mode_reconfigure(port);
+
+ /* Unmask interrupts */
mvpp22_gop_unmask_irq(port);
+ }
+
+ if (!mvpp2_is_xlg(interface)) {
+ /* Release GMAC reset and wait */
+ mvpp2_modify(port->base + MVPP2_GMAC_CTRL_2_REG,
+ MVPP2_GMAC_PORT_RESET_MASK, 0);
+
+ while (readl(port->base + MVPP2_GMAC_CTRL_2_REG) &
+ MVPP2_GMAC_PORT_RESET_MASK)
+ continue;
+ }
mvpp2_port_enable(port);
+
+ /* Allow the link to come up if in in-band mode, otherwise the
+ * link is forced via mac_link_down()/mac_link_up()
+ */
+ if (phylink_autoneg_inband(mode)) {
+ if (mvpp2_is_xlg(interface))
+ mvpp2_modify(port->base + MVPP22_XLG_CTRL0_REG,
+ MVPP22_XLG_CTRL0_FORCE_LINK_PASS |
+ MVPP22_XLG_CTRL0_FORCE_LINK_DOWN, 0);
+ else
+ mvpp2_modify(port->base + MVPP2_GMAC_AUTONEG_CONFIG,
+ MVPP2_GMAC_FORCE_LINK_PASS |
+ MVPP2_GMAC_FORCE_LINK_DOWN, 0);
+ }
+
+ return 0;
}
static void mvpp2_mac_link_up(struct phylink_config *config,
@@ -5843,13 +5920,36 @@ static void mvpp2_mac_link_down(struct phylink_config *config,
static const struct phylink_mac_ops mvpp2_phylink_ops = {
.validate = mvpp2_phylink_validate,
- .mac_pcs_get_state = mvpp2_phylink_mac_pcs_get_state,
- .mac_an_restart = mvpp2_mac_an_restart,
+ .mac_prepare = mvpp2_mac_prepare,
.mac_config = mvpp2_mac_config,
+ .mac_finish = mvpp2_mac_finish,
.mac_link_up = mvpp2_mac_link_up,
.mac_link_down = mvpp2_mac_link_down,
};
+/* Work-around for ACPI */
+static void mvpp2_acpi_start(struct mvpp2_port *port)
+{
+ /* Phylink isn't used as of now for ACPI, so the MAC has to be
+ * configured manually when the interface is started. This will
+ * be removed as soon as the phylink ACPI support lands in.
+ */
+ struct phylink_link_state state = {
+ .interface = port->phy_interface,
+ };
+ mvpp2__mac_prepare(&port->phylink_config, MLO_AN_INBAND,
+ port->phy_interface);
+ mvpp2_mac_config(&port->phylink_config, MLO_AN_INBAND, &state);
+ port->phylink_pcs.ops->pcs_config(&port->phylink_pcs, MLO_AN_INBAND,
+ port->phy_interface,
+ state.advertising, false);
+ mvpp2_mac_finish(&port->phylink_config, MLO_AN_INBAND,
+ port->phy_interface);
+ mvpp2_mac_link_up(&port->phylink_config, NULL,
+ MLO_AN_INBAND, port->phy_interface,
+ SPEED_UNKNOWN, DUPLEX_UNKNOWN, false, false);
+}
+
/* Ports initialization */
static int mvpp2_port_probe(struct platform_device *pdev,
struct fwnode_handle *port_fwnode,
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/Makefile b/drivers/net/ethernet/marvell/octeontx2/af/Makefile
index 1b25948..0bc2410 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/Makefile
+++ b/drivers/net/ethernet/marvell/octeontx2/af/Makefile
@@ -8,4 +8,4 @@
octeontx2_mbox-y := mbox.o
octeontx2_af-y := cgx.o rvu.o rvu_cgx.o rvu_npa.o rvu_nix.o \
- rvu_reg.o rvu_npc.o rvu_debugfs.o
+ rvu_reg.o rvu_npc.o rvu_debugfs.o ptp.o
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c
index a4e65da..8f17e26 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c
@@ -468,6 +468,35 @@ static void cgx_lmac_pause_frm_config(struct cgx *cgx, int lmac_id, bool enable)
}
}
+void cgx_lmac_ptp_config(void *cgxd, int lmac_id, bool enable)
+{
+ struct cgx *cgx = cgxd;
+ u64 cfg;
+
+ if (!cgx)
+ return;
+
+ if (enable) {
+ /* Enable inbound PTP timestamping */
+ cfg = cgx_read(cgx, lmac_id, CGXX_GMP_GMI_RXX_FRM_CTL);
+ cfg |= CGX_GMP_GMI_RXX_FRM_CTL_PTP_MODE;
+ cgx_write(cgx, lmac_id, CGXX_GMP_GMI_RXX_FRM_CTL, cfg);
+
+ cfg = cgx_read(cgx, lmac_id, CGXX_SMUX_RX_FRM_CTL);
+ cfg |= CGX_SMUX_RX_FRM_CTL_PTP_MODE;
+ cgx_write(cgx, lmac_id, CGXX_SMUX_RX_FRM_CTL, cfg);
+ } else {
+ /* Disable inbound PTP stamping */
+ cfg = cgx_read(cgx, lmac_id, CGXX_GMP_GMI_RXX_FRM_CTL);
+ cfg &= ~CGX_GMP_GMI_RXX_FRM_CTL_PTP_MODE;
+ cgx_write(cgx, lmac_id, CGXX_GMP_GMI_RXX_FRM_CTL, cfg);
+
+ cfg = cgx_read(cgx, lmac_id, CGXX_SMUX_RX_FRM_CTL);
+ cfg &= ~CGX_SMUX_RX_FRM_CTL_PTP_MODE;
+ cgx_write(cgx, lmac_id, CGXX_SMUX_RX_FRM_CTL, cfg);
+ }
+}
+
/* CGX Firmware interface low level support */
static int cgx_fwi_cmd_send(u64 req, u64 *resp, struct lmac *lmac)
{
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.h b/drivers/net/ethernet/marvell/octeontx2/af/cgx.h
index 394f965..27ca329 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.h
@@ -58,8 +58,10 @@
#define CGXX_SMUX_RX_FRM_CTL 0x20020
#define CGX_SMUX_RX_FRM_CTL_CTL_BCK BIT_ULL(3)
+#define CGX_SMUX_RX_FRM_CTL_PTP_MODE BIT_ULL(12)
#define CGXX_GMP_GMI_RXX_FRM_CTL 0x38028
#define CGX_GMP_GMI_RXX_FRM_CTL_CTL_BCK BIT_ULL(3)
+#define CGX_GMP_GMI_RXX_FRM_CTL_PTP_MODE BIT_ULL(12)
#define CGXX_SMUX_TX_CTL 0x20178
#define CGXX_SMUX_TX_PAUSE_PKT_TIME 0x20110
#define CGXX_SMUX_TX_PAUSE_PKT_INTERVAL 0x20120
@@ -139,4 +141,6 @@ int cgx_lmac_get_pause_frm(void *cgxd, int lmac_id,
u8 *tx_pause, u8 *rx_pause);
int cgx_lmac_set_pause_frm(void *cgxd, int lmac_id,
u8 tx_pause, u8 rx_pause);
+void cgx_lmac_ptp_config(void *cgxd, int lmac_id, bool enable);
+
#endif /* CGX_H */
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
index 6dfd0f9..4aaef0a 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
@@ -127,6 +127,7 @@ M(ATTACH_RESOURCES, 0x002, attach_resources, rsrc_attach, msg_rsp) \
M(DETACH_RESOURCES, 0x003, detach_resources, rsrc_detach, msg_rsp) \
M(MSIX_OFFSET, 0x005, msix_offset, msg_req, msix_offset_rsp) \
M(VF_FLR, 0x006, vf_flr, msg_req, msg_rsp) \
+M(PTP_OP, 0x007, ptp_op, ptp_req, ptp_rsp) \
M(GET_HW_CAP, 0x008, get_hw_cap, msg_req, get_hw_cap_rsp) \
/* CGX mbox IDs (range 0x200 - 0x3FF) */ \
M(CGX_START_RXTX, 0x200, cgx_start_rxtx, msg_req, msg_rsp) \
@@ -143,6 +144,8 @@ M(CGX_STOP_LINKEVENTS, 0x208, cgx_stop_linkevents, msg_req, msg_rsp) \
M(CGX_GET_LINKINFO, 0x209, cgx_get_linkinfo, msg_req, cgx_link_info_msg) \
M(CGX_INTLBK_ENABLE, 0x20A, cgx_intlbk_enable, msg_req, msg_rsp) \
M(CGX_INTLBK_DISABLE, 0x20B, cgx_intlbk_disable, msg_req, msg_rsp) \
+M(CGX_PTP_RX_ENABLE, 0x20C, cgx_ptp_rx_enable, msg_req, msg_rsp) \
+M(CGX_PTP_RX_DISABLE, 0x20D, cgx_ptp_rx_disable, msg_req, msg_rsp) \
M(CGX_CFG_PAUSE_FRM, 0x20E, cgx_cfg_pause_frm, cgx_pause_frm_cfg, \
cgx_pause_frm_cfg) \
/* NPA mbox IDs (range 0x400 - 0x5FF) */ \
@@ -213,6 +216,8 @@ M(NIX_LSO_FORMAT_CFG, 0x8011, nix_lso_format_cfg, \
nix_lso_format_cfg, \
nix_lso_format_cfg_rsp) \
M(NIX_RXVLAN_ALLOC, 0x8012, nix_rxvlan_alloc, msg_req, msg_rsp) \
+M(NIX_LF_PTP_TX_ENABLE, 0x8013, nix_lf_ptp_tx_enable, msg_req, msg_rsp) \
+M(NIX_LF_PTP_TX_DISABLE, 0x8014, nix_lf_ptp_tx_disable, msg_req, msg_rsp) \
M(NIX_BP_ENABLE, 0x8016, nix_bp_enable, nix_bp_cfg_req, \
nix_bp_cfg_rsp) \
M(NIX_BP_DISABLE, 0x8017, nix_bp_disable, nix_bp_cfg_req, msg_rsp) \
@@ -858,4 +863,20 @@ struct npc_get_kex_cfg_rsp {
u8 mkex_pfl_name[MKEX_NAME_LEN];
};
+enum ptp_op {
+ PTP_OP_ADJFINE = 0,
+ PTP_OP_GET_CLOCK = 1,
+};
+
+struct ptp_req {
+ struct mbox_msghdr hdr;
+ u8 op;
+ s64 scaled_ppm;
+};
+
+struct ptp_rsp {
+ struct mbox_msghdr hdr;
+ u64 clk;
+};
+
#endif /* MBOX_H */
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/ptp.c b/drivers/net/ethernet/marvell/octeontx2/af/ptp.c
new file mode 100644
index 0000000..f69f4f3
--- /dev/null
+++ b/drivers/net/ethernet/marvell/octeontx2/af/ptp.c
@@ -0,0 +1,275 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Marvell PTP driver
+ *
+ * Copyright (C) 2020 Marvell International Ltd.
+ */
+
+#include <linux/bitfield.h>
+#include <linux/device.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+
+#include "ptp.h"
+#include "mbox.h"
+#include "rvu.h"
+
+#define DRV_NAME "Marvell PTP Driver"
+
+#define PCI_DEVID_OCTEONTX2_PTP 0xA00C
+#define PCI_SUBSYS_DEVID_OCTX2_98xx_PTP 0xB100
+#define PCI_SUBSYS_DEVID_OCTX2_96XX_PTP 0xB200
+#define PCI_SUBSYS_DEVID_OCTX2_95XX_PTP 0xB300
+#define PCI_SUBSYS_DEVID_OCTX2_LOKI_PTP 0xB400
+#define PCI_SUBSYS_DEVID_OCTX2_95MM_PTP 0xB500
+#define PCI_DEVID_OCTEONTX2_RST 0xA085
+
+#define PCI_PTP_BAR_NO 0
+#define PCI_RST_BAR_NO 0
+
+#define PTP_CLOCK_CFG 0xF00ULL
+#define PTP_CLOCK_CFG_PTP_EN BIT_ULL(0)
+#define PTP_CLOCK_LO 0xF08ULL
+#define PTP_CLOCK_HI 0xF10ULL
+#define PTP_CLOCK_COMP 0xF18ULL
+
+#define RST_BOOT 0x1600ULL
+#define RST_MUL_BITS GENMASK_ULL(38, 33)
+#define CLOCK_BASE_RATE 50000000ULL
+
+static u64 get_clock_rate(void)
+{
+ u64 cfg, ret = CLOCK_BASE_RATE * 16;
+ struct pci_dev *pdev;
+ void __iomem *base;
+
+ /* To get the input clock frequency with which PTP co-processor
+ * block is running the base frequency(50 MHz) needs to be multiplied
+ * with multiplier bits present in RST_BOOT register of RESET block.
+ * Hence below code gets the multiplier bits from the RESET PCI
+ * device present in the system.
+ */
+ pdev = pci_get_device(PCI_VENDOR_ID_CAVIUM,
+ PCI_DEVID_OCTEONTX2_RST, NULL);
+ if (!pdev)
+ goto error;
+
+ base = pci_ioremap_bar(pdev, PCI_RST_BAR_NO);
+ if (!base)
+ goto error_put_pdev;
+
+ cfg = readq(base + RST_BOOT);
+ ret = CLOCK_BASE_RATE * FIELD_GET(RST_MUL_BITS, cfg);
+
+ iounmap(base);
+
+error_put_pdev:
+ pci_dev_put(pdev);
+
+error:
+ return ret;
+}
+
+struct ptp *ptp_get(void)
+{
+ struct pci_dev *pdev;
+ struct ptp *ptp;
+
+ /* If the PTP pci device is found on the system and ptp
+ * driver is bound to it then the PTP pci device is returned
+ * to the caller(rvu driver).
+ */
+ pdev = pci_get_device(PCI_VENDOR_ID_CAVIUM,
+ PCI_DEVID_OCTEONTX2_PTP, NULL);
+ if (!pdev)
+ return ERR_PTR(-ENODEV);
+
+ ptp = pci_get_drvdata(pdev);
+ if (!ptp)
+ ptp = ERR_PTR(-EPROBE_DEFER);
+ if (IS_ERR(ptp))
+ pci_dev_put(pdev);
+
+ return ptp;
+}
+
+void ptp_put(struct ptp *ptp)
+{
+ if (!ptp)
+ return;
+
+ pci_dev_put(ptp->pdev);
+}
+
+static int ptp_adjfine(struct ptp *ptp, long scaled_ppm)
+{
+ bool neg_adj = false;
+ u64 comp;
+ u64 adj;
+ s64 ppb;
+
+ if (scaled_ppm < 0) {
+ neg_adj = true;
+ scaled_ppm = -scaled_ppm;
+ }
+
+ /* The hardware adds the clock compensation value to the PTP clock
+ * on every coprocessor clock cycle. Typical convention is that it
+ * represent number of nanosecond betwen each cycle. In this
+ * convention compensation value is in 64 bit fixed-point
+ * representation where upper 32 bits are number of nanoseconds
+ * and lower is fractions of nanosecond.
+ * The scaled_ppm represent the ratio in "parts per million" by which
+ * the compensation value should be corrected.
+ * To calculate new compenstation value we use 64bit fixed point
+ * arithmetic on following formula
+ * comp = tbase + tbase * scaled_ppm / (1M * 2^16)
+ * where tbase is the basic compensation value calculated
+ * initialy in the probe function.
+ */
+ comp = ((u64)1000000000ull << 32) / ptp->clock_rate;
+ /* convert scaled_ppm to ppb */
+ ppb = 1 + scaled_ppm;
+ ppb *= 125;
+ ppb >>= 13;
+ adj = comp * ppb;
+ adj = div_u64(adj, 1000000000ull);
+ comp = neg_adj ? comp - adj : comp + adj;
+
+ writeq(comp, ptp->reg_base + PTP_CLOCK_COMP);
+
+ return 0;
+}
+
+static int ptp_get_clock(struct ptp *ptp, u64 *clk)
+{
+ /* Return the current PTP clock */
+ *clk = readq(ptp->reg_base + PTP_CLOCK_HI);
+
+ return 0;
+}
+
+static int ptp_probe(struct pci_dev *pdev,
+ const struct pci_device_id *ent)
+{
+ struct device *dev = &pdev->dev;
+ struct ptp *ptp;
+ u64 clock_comp;
+ u64 clock_cfg;
+ int err;
+
+ ptp = devm_kzalloc(dev, sizeof(*ptp), GFP_KERNEL);
+ if (!ptp) {
+ err = -ENOMEM;
+ goto error;
+ }
+
+ ptp->pdev = pdev;
+
+ err = pcim_enable_device(pdev);
+ if (err)
+ goto error_free;
+
+ err = pcim_iomap_regions(pdev, 1 << PCI_PTP_BAR_NO, pci_name(pdev));
+ if (err)
+ goto error_free;
+
+ ptp->reg_base = pcim_iomap_table(pdev)[PCI_PTP_BAR_NO];
+
+ ptp->clock_rate = get_clock_rate();
+
+ /* Enable PTP clock */
+ clock_cfg = readq(ptp->reg_base + PTP_CLOCK_CFG);
+ clock_cfg |= PTP_CLOCK_CFG_PTP_EN;
+ writeq(clock_cfg, ptp->reg_base + PTP_CLOCK_CFG);
+
+ clock_comp = ((u64)1000000000ull << 32) / ptp->clock_rate;
+ /* Initial compensation value to start the nanosecs counter */
+ writeq(clock_comp, ptp->reg_base + PTP_CLOCK_COMP);
+
+ pci_set_drvdata(pdev, ptp);
+
+ return 0;
+
+error_free:
+ devm_kfree(dev, ptp);
+
+error:
+ /* For `ptp_get()` we need to differentiate between the case
+ * when the core has not tried to probe this device and the case when
+ * the probe failed. In the later case we pretend that the
+ * initialization was successful and keep the error in
+ * `dev->driver_data`.
+ */
+ pci_set_drvdata(pdev, ERR_PTR(err));
+ return 0;
+}
+
+static void ptp_remove(struct pci_dev *pdev)
+{
+ struct ptp *ptp = pci_get_drvdata(pdev);
+ u64 clock_cfg;
+
+ if (IS_ERR_OR_NULL(ptp))
+ return;
+
+ /* Disable PTP clock */
+ clock_cfg = readq(ptp->reg_base + PTP_CLOCK_CFG);
+ clock_cfg &= ~PTP_CLOCK_CFG_PTP_EN;
+ writeq(clock_cfg, ptp->reg_base + PTP_CLOCK_CFG);
+}
+
+static const struct pci_device_id ptp_id_table[] = {
+ { PCI_DEVICE_SUB(PCI_VENDOR_ID_CAVIUM, PCI_DEVID_OCTEONTX2_PTP,
+ PCI_VENDOR_ID_CAVIUM,
+ PCI_SUBSYS_DEVID_OCTX2_98xx_PTP) },
+ { PCI_DEVICE_SUB(PCI_VENDOR_ID_CAVIUM, PCI_DEVID_OCTEONTX2_PTP,
+ PCI_VENDOR_ID_CAVIUM,
+ PCI_SUBSYS_DEVID_OCTX2_96XX_PTP) },
+ { PCI_DEVICE_SUB(PCI_VENDOR_ID_CAVIUM, PCI_DEVID_OCTEONTX2_PTP,
+ PCI_VENDOR_ID_CAVIUM,
+ PCI_SUBSYS_DEVID_OCTX2_95XX_PTP) },
+ { PCI_DEVICE_SUB(PCI_VENDOR_ID_CAVIUM, PCI_DEVID_OCTEONTX2_PTP,
+ PCI_VENDOR_ID_CAVIUM,
+ PCI_SUBSYS_DEVID_OCTX2_LOKI_PTP) },
+ { PCI_DEVICE_SUB(PCI_VENDOR_ID_CAVIUM, PCI_DEVID_OCTEONTX2_PTP,
+ PCI_VENDOR_ID_CAVIUM,
+ PCI_SUBSYS_DEVID_OCTX2_95MM_PTP) },
+ { 0, }
+};
+
+struct pci_driver ptp_driver = {
+ .name = DRV_NAME,
+ .id_table = ptp_id_table,
+ .probe = ptp_probe,
+ .remove = ptp_remove,
+};
+
+int rvu_mbox_handler_ptp_op(struct rvu *rvu, struct ptp_req *req,
+ struct ptp_rsp *rsp)
+{
+ int err = 0;
+
+ /* This function is the PTP mailbox handler invoked when
+ * called by AF consumers/netdev drivers via mailbox mechanism.
+ * It is used by netdev driver to get the PTP clock and to set
+ * frequency adjustments. Since mailbox can be called without
+ * notion of whether the driver is bound to ptp device below
+ * validation is needed as first step.
+ */
+ if (!rvu->ptp)
+ return -ENODEV;
+
+ switch (req->op) {
+ case PTP_OP_ADJFINE:
+ err = ptp_adjfine(rvu->ptp, req->scaled_ppm);
+ break;
+ case PTP_OP_GET_CLOCK:
+ err = ptp_get_clock(rvu->ptp, &rsp->clk);
+ break;
+ default:
+ err = -EINVAL;
+ break;
+ }
+
+ return err;
+}
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/ptp.h b/drivers/net/ethernet/marvell/octeontx2/af/ptp.h
new file mode 100644
index 0000000..878bc39
--- /dev/null
+++ b/drivers/net/ethernet/marvell/octeontx2/af/ptp.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Marvell PTP driver
+ *
+ * Copyright (C) 2020 Marvell International Ltd.
+ */
+
+#ifndef PTP_H
+#define PTP_H
+
+#include <linux/timecounter.h>
+#include <linux/time64.h>
+#include <linux/spinlock.h>
+
+struct ptp {
+ struct pci_dev *pdev;
+ void __iomem *reg_base;
+ u32 clock_rate;
+};
+
+struct ptp *ptp_get(void);
+void ptp_put(struct ptp *ptp);
+
+extern struct pci_driver ptp_driver;
+
+#endif
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c
index 557e429..c3ef73a 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c
@@ -18,6 +18,7 @@
#include "cgx.h"
#include "rvu.h"
#include "rvu_reg.h"
+#include "ptp.h"
#define DRV_NAME "octeontx2-af"
#define DRV_STRING "Marvell OcteonTX2 RVU Admin Function Driver"
@@ -2565,13 +2566,21 @@ static int rvu_probe(struct pci_dev *pdev, const struct pci_device_id *id)
pci_set_master(pdev);
+ rvu->ptp = ptp_get();
+ if (IS_ERR(rvu->ptp)) {
+ err = PTR_ERR(rvu->ptp);
+ if (err == -EPROBE_DEFER)
+ goto err_release_regions;
+ rvu->ptp = NULL;
+ }
+
/* Map Admin function CSRs */
rvu->afreg_base = pcim_iomap(pdev, PCI_AF_REG_BAR_NUM, 0);
rvu->pfreg_base = pcim_iomap(pdev, PCI_PF_REG_BAR_NUM, 0);
if (!rvu->afreg_base || !rvu->pfreg_base) {
dev_err(dev, "Unable to map admin function CSRs, aborting\n");
err = -ENOMEM;
- goto err_release_regions;
+ goto err_put_ptp;
}
/* Store module params in rvu structure */
@@ -2586,7 +2595,7 @@ static int rvu_probe(struct pci_dev *pdev, const struct pci_device_id *id)
err = rvu_setup_hw_resources(rvu);
if (err)
- goto err_release_regions;
+ goto err_put_ptp;
/* Init mailbox btw AF and PFs */
err = rvu_mbox_init(rvu, &rvu->afpf_wq_info, TYPE_AFPF,
@@ -2626,6 +2635,8 @@ static int rvu_probe(struct pci_dev *pdev, const struct pci_device_id *id)
rvu_reset_all_blocks(rvu);
rvu_free_hw_resources(rvu);
rvu_clear_rvum_blk_revid(rvu);
+err_put_ptp:
+ ptp_put(rvu->ptp);
err_release_regions:
pci_release_regions(pdev);
err_disable_device:
@@ -2651,6 +2662,7 @@ static void rvu_remove(struct pci_dev *pdev)
rvu_reset_all_blocks(rvu);
rvu_free_hw_resources(rvu);
rvu_clear_rvum_blk_revid(rvu);
+ ptp_put(rvu->ptp);
pci_release_regions(pdev);
pci_disable_device(pdev);
pci_set_drvdata(pdev, NULL);
@@ -2676,9 +2688,19 @@ static int __init rvu_init_module(void)
if (err < 0)
return err;
+ err = pci_register_driver(&ptp_driver);
+ if (err < 0)
+ goto ptp_err;
+
err = pci_register_driver(&rvu_driver);
if (err < 0)
- pci_unregister_driver(&cgx_driver);
+ goto rvu_err;
+
+ return 0;
+rvu_err:
+ pci_unregister_driver(&ptp_driver);
+ptp_err:
+ pci_unregister_driver(&cgx_driver);
return err;
}
@@ -2686,6 +2708,7 @@ static int __init rvu_init_module(void)
static void __exit rvu_cleanup_module(void)
{
pci_unregister_driver(&rvu_driver);
+ pci_unregister_driver(&ptp_driver);
pci_unregister_driver(&cgx_driver);
}
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
index dcf25a0..05da7a9 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
@@ -289,6 +289,8 @@ struct rvu_fwdata {
u64 reserved[FWDATA_RESERVED_MEM];
};
+struct ptp;
+
struct rvu {
void __iomem *afreg_base;
void __iomem *pfreg_base;
@@ -337,6 +339,8 @@ struct rvu {
/* Firmware data */
struct rvu_fwdata *fwdata;
+ struct ptp *ptp;
+
#ifdef CONFIG_DEBUG_FS
struct rvu_debugfs rvu_dbg;
#endif
@@ -469,6 +473,7 @@ int rvu_npc_init(struct rvu *rvu);
void rvu_npc_freemem(struct rvu *rvu);
int rvu_npc_get_pkind(struct rvu *rvu, u16 pf);
void rvu_npc_set_pkind(struct rvu *rvu, int pkind, struct rvu_pfvf *pfvf);
+int npc_config_ts_kpuaction(struct rvu *rvu, int pf, u16 pcifunc, bool en);
void rvu_npc_install_ucast_entry(struct rvu *rvu, u16 pcifunc,
int nixlf, u64 chan, u8 *mac_addr);
void rvu_npc_install_promisc_entry(struct rvu *rvu, u16 pcifunc,
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c
index f3c82e4..fe3389c 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c
@@ -509,6 +509,45 @@ int rvu_mbox_handler_cgx_promisc_disable(struct rvu *rvu, struct msg_req *req,
return 0;
}
+static int rvu_cgx_ptp_rx_cfg(struct rvu *rvu, u16 pcifunc, bool enable)
+{
+ int pf = rvu_get_pf(pcifunc);
+ u8 cgx_id, lmac_id;
+ void *cgxd;
+
+ /* This msg is expected only from PFs that are mapped to CGX LMACs,
+ * if received from other PF/VF simply ACK, nothing to do.
+ */
+ if ((pcifunc & RVU_PFVF_FUNC_MASK) ||
+ !is_pf_cgxmapped(rvu, pf))
+ return -ENODEV;
+
+ rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_id, &lmac_id);
+ cgxd = rvu_cgx_pdata(cgx_id, rvu);
+
+ cgx_lmac_ptp_config(cgxd, lmac_id, enable);
+ /* If PTP is enabled then inform NPC that packets to be
+ * parsed by this PF will have their data shifted by 8 bytes
+ * and if PTP is disabled then no shift is required
+ */
+ if (npc_config_ts_kpuaction(rvu, pf, pcifunc, enable))
+ return -EINVAL;
+
+ return 0;
+}
+
+int rvu_mbox_handler_cgx_ptp_rx_enable(struct rvu *rvu, struct msg_req *req,
+ struct msg_rsp *rsp)
+{
+ return rvu_cgx_ptp_rx_cfg(rvu, req->hdr.pcifunc, true);
+}
+
+int rvu_mbox_handler_cgx_ptp_rx_disable(struct rvu *rvu, struct msg_req *req,
+ struct msg_rsp *rsp)
+{
+ return rvu_cgx_ptp_rx_cfg(rvu, req->hdr.pcifunc, false);
+}
+
static int rvu_cgx_config_linkevents(struct rvu *rvu, u16 pcifunc, bool en)
{
int pf = rvu_get_pf(pcifunc);
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
index 01a7931..08181fc 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
@@ -3319,6 +3319,49 @@ void rvu_nix_lf_teardown(struct rvu *rvu, u16 pcifunc, int blkaddr, int nixlf)
nix_ctx_free(rvu, pfvf);
}
+#define NIX_AF_LFX_TX_CFG_PTP_EN BIT_ULL(32)
+
+static int rvu_nix_lf_ptp_tx_cfg(struct rvu *rvu, u16 pcifunc, bool enable)
+{
+ struct rvu_hwinfo *hw = rvu->hw;
+ struct rvu_block *block;
+ int blkaddr;
+ int nixlf;
+ u64 cfg;
+
+ blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, pcifunc);
+ if (blkaddr < 0)
+ return NIX_AF_ERR_AF_LF_INVALID;
+
+ block = &hw->block[blkaddr];
+ nixlf = rvu_get_lf(rvu, block, pcifunc, 0);
+ if (nixlf < 0)
+ return NIX_AF_ERR_AF_LF_INVALID;
+
+ cfg = rvu_read64(rvu, blkaddr, NIX_AF_LFX_TX_CFG(nixlf));
+
+ if (enable)
+ cfg |= NIX_AF_LFX_TX_CFG_PTP_EN;
+ else
+ cfg &= ~NIX_AF_LFX_TX_CFG_PTP_EN;
+
+ rvu_write64(rvu, blkaddr, NIX_AF_LFX_TX_CFG(nixlf), cfg);
+
+ return 0;
+}
+
+int rvu_mbox_handler_nix_lf_ptp_tx_enable(struct rvu *rvu, struct msg_req *req,
+ struct msg_rsp *rsp)
+{
+ return rvu_nix_lf_ptp_tx_cfg(rvu, req->hdr.pcifunc, true);
+}
+
+int rvu_mbox_handler_nix_lf_ptp_tx_disable(struct rvu *rvu, struct msg_req *req,
+ struct msg_rsp *rsp)
+{
+ return rvu_nix_lf_ptp_tx_cfg(rvu, req->hdr.pcifunc, false);
+}
+
int rvu_mbox_handler_nix_lso_format_cfg(struct rvu *rvu,
struct nix_lso_format_cfg *req,
struct nix_lso_format_cfg_rsp *rsp)
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
index 0a21408..e2e585d 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
@@ -27,6 +27,7 @@
#define NIXLF_PROMISC_ENTRY 2
#define NPC_PARSE_RESULT_DMAC_OFFSET 8
+#define NPC_HW_TSTAMP_OFFSET 8
static void npc_mcam_free_all_entries(struct rvu *rvu, struct npc_mcam *mcam,
int blkaddr, u16 pcifunc);
@@ -61,6 +62,36 @@ int rvu_npc_get_pkind(struct rvu *rvu, u16 pf)
return -1;
}
+#define NPC_AF_ACTION0_PTR_ADVANCE GENMASK_ULL(27, 20)
+
+int npc_config_ts_kpuaction(struct rvu *rvu, int pf, u16 pcifunc, bool enable)
+{
+ int pkind, blkaddr;
+ u64 val;
+
+ pkind = rvu_npc_get_pkind(rvu, pf);
+ if (pkind < 0) {
+ dev_err(rvu->dev, "%s: pkind not mapped\n", __func__);
+ return -EINVAL;
+ }
+
+ blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPC, pcifunc);
+ if (blkaddr < 0) {
+ dev_err(rvu->dev, "%s: NPC block not implemented\n", __func__);
+ return -EINVAL;
+ }
+
+ val = rvu_read64(rvu, blkaddr, NPC_AF_PKINDX_ACTION0(pkind));
+ val &= ~NPC_AF_ACTION0_PTR_ADVANCE;
+ /* If timestamp is enabled then configure NPC to shift 8 bytes */
+ if (enable)
+ val |= FIELD_PREP(NPC_AF_ACTION0_PTR_ADVANCE,
+ NPC_HW_TSTAMP_OFFSET);
+ rvu_write64(rvu, blkaddr, NPC_AF_PKINDX_ACTION0(pkind), val);
+
+ return 0;
+}
+
static int npc_get_nixlf_mcam_index(struct npc_mcam *mcam,
u16 pcifunc, int nixlf, int type)
{
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/Makefile b/drivers/net/ethernet/marvell/octeontx2/nic/Makefile
index 778df33..b2c6385 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/Makefile
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/Makefile
@@ -6,7 +6,8 @@
obj-$(CONFIG_OCTEONTX2_PF) += octeontx2_nicpf.o
obj-$(CONFIG_OCTEONTX2_VF) += octeontx2_nicvf.o
-octeontx2_nicpf-y := otx2_pf.o otx2_common.o otx2_txrx.o otx2_ethtool.o
+octeontx2_nicpf-y := otx2_pf.o otx2_common.o otx2_txrx.o otx2_ethtool.o \
+ otx2_ptp.o
octeontx2_nicvf-y := otx2_vf.o
ccflags-y += -I$(srctree)/drivers/net/ethernet/marvell/octeontx2/af
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
index 93c4cf7..820fc66 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
@@ -365,6 +365,95 @@ int otx2_rss_init(struct otx2_nic *pfvf)
return 0;
}
+/* Setup UDP segmentation algorithm in HW */
+static void otx2_setup_udp_segmentation(struct nix_lso_format_cfg *lso, bool v4)
+{
+ struct nix_lso_format *field;
+
+ field = (struct nix_lso_format *)&lso->fields[0];
+ lso->field_mask = GENMASK(18, 0);
+
+ /* IP's Length field */
+ field->layer = NIX_TXLAYER_OL3;
+ /* In ipv4, length field is at offset 2 bytes, for ipv6 it's 4 */
+ field->offset = v4 ? 2 : 4;
+ field->sizem1 = 1; /* i.e 2 bytes */
+ field->alg = NIX_LSOALG_ADD_PAYLEN;
+ field++;
+
+ /* No ID field in IPv6 header */
+ if (v4) {
+ /* Increment IPID */
+ field->layer = NIX_TXLAYER_OL3;
+ field->offset = 4;
+ field->sizem1 = 1; /* i.e 2 bytes */
+ field->alg = NIX_LSOALG_ADD_SEGNUM;
+ field++;
+ }
+
+ /* Update length in UDP header */
+ field->layer = NIX_TXLAYER_OL4;
+ field->offset = 4;
+ field->sizem1 = 1;
+ field->alg = NIX_LSOALG_ADD_PAYLEN;
+}
+
+/* Setup segmentation algorithms in HW and retrieve algorithm index */
+void otx2_setup_segmentation(struct otx2_nic *pfvf)
+{
+ struct nix_lso_format_cfg_rsp *rsp;
+ struct nix_lso_format_cfg *lso;
+ struct otx2_hw *hw = &pfvf->hw;
+ int err;
+
+ mutex_lock(&pfvf->mbox.lock);
+
+ /* UDPv4 segmentation */
+ lso = otx2_mbox_alloc_msg_nix_lso_format_cfg(&pfvf->mbox);
+ if (!lso)
+ goto fail;
+
+ /* Setup UDP/IP header fields that HW should update per segment */
+ otx2_setup_udp_segmentation(lso, true);
+
+ err = otx2_sync_mbox_msg(&pfvf->mbox);
+ if (err)
+ goto fail;
+
+ rsp = (struct nix_lso_format_cfg_rsp *)
+ otx2_mbox_get_rsp(&pfvf->mbox.mbox, 0, &lso->hdr);
+ if (IS_ERR(rsp))
+ goto fail;
+
+ hw->lso_udpv4_idx = rsp->lso_format_idx;
+
+ /* UDPv6 segmentation */
+ lso = otx2_mbox_alloc_msg_nix_lso_format_cfg(&pfvf->mbox);
+ if (!lso)
+ goto fail;
+
+ /* Setup UDP/IP header fields that HW should update per segment */
+ otx2_setup_udp_segmentation(lso, false);
+
+ err = otx2_sync_mbox_msg(&pfvf->mbox);
+ if (err)
+ goto fail;
+
+ rsp = (struct nix_lso_format_cfg_rsp *)
+ otx2_mbox_get_rsp(&pfvf->mbox.mbox, 0, &lso->hdr);
+ if (IS_ERR(rsp))
+ goto fail;
+
+ hw->lso_udpv6_idx = rsp->lso_format_idx;
+ mutex_unlock(&pfvf->mbox.lock);
+ return;
+fail:
+ mutex_unlock(&pfvf->mbox.lock);
+ netdev_info(pfvf->netdev,
+ "Failed to get LSO index for UDP GSO offload, disabling\n");
+ pfvf->netdev->hw_features &= ~NETIF_F_GSO_UDP_L4;
+}
+
void otx2_config_irq_coalescing(struct otx2_nic *pfvf, int qidx)
{
/* Configure CQE interrupt coalescing parameters
@@ -671,6 +760,13 @@ static int otx2_sq_init(struct otx2_nic *pfvf, u16 qidx, u16 sqb_aura)
if (!sq->sg)
return -ENOMEM;
+ if (pfvf->ptp) {
+ err = qmem_alloc(pfvf->dev, &sq->timestamps, qset->sqe_cnt,
+ sizeof(*sq->timestamps));
+ if (err)
+ return err;
+ }
+
sq->head = 0;
sq->sqe_per_sqb = (pfvf->hw.sqb_size / sq->sqe_size) - 1;
sq->num_sqbs = (qset->sqe_cnt + sq->sqe_per_sqb) / sq->sqe_per_sqb;
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
index 2fa2988..ac47762 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
@@ -13,6 +13,9 @@
#include <linux/pci.h>
#include <linux/iommu.h>
+#include <linux/net_tstamp.h>
+#include <linux/ptp_clock_kernel.h>
+#include <linux/timecounter.h>
#include <mbox.h>
#include "otx2_reg.h"
@@ -174,9 +177,11 @@ struct otx2_hw {
u16 rq_skid;
u8 cq_time_wait;
- /* For TSO segmentation */
+ /* Segmentation */
u8 lso_tsov4_idx;
u8 lso_tsov6_idx;
+ u8 lso_udpv4_idx;
+ u8 lso_udpv6_idx;
u8 hw_tso;
/* MSI-X */
@@ -209,6 +214,17 @@ struct refill_work {
struct otx2_nic *pf;
};
+struct otx2_ptp {
+ struct ptp_clock_info ptp_info;
+ struct ptp_clock *ptp_clock;
+ struct otx2_nic *nic;
+
+ struct cyclecounter cycle_counter;
+ struct timecounter time_counter;
+};
+
+#define OTX2_HW_TIMESTAMP_LEN 8
+
struct otx2_nic {
void __iomem *reg_base;
struct net_device *netdev;
@@ -216,6 +232,8 @@ struct otx2_nic {
u16 max_frs;
u16 rbsize; /* Receive buffer size */
+#define OTX2_FLAG_RX_TSTAMP_ENABLED BIT_ULL(0)
+#define OTX2_FLAG_TX_TSTAMP_ENABLED BIT_ULL(1)
#define OTX2_FLAG_INTF_DOWN BIT_ULL(2)
#define OTX2_FLAG_RX_PAUSE_ENABLED BIT_ULL(9)
#define OTX2_FLAG_TX_PAUSE_ENABLED BIT_ULL(10)
@@ -251,6 +269,9 @@ struct otx2_nic {
/* Block address of NIX either BLKADDR_NIX0 or BLKADDR_NIX1 */
int nix_blkaddr;
+
+ struct otx2_ptp *ptp;
+ struct hwtstamp_config tstamp;
};
static inline bool is_otx2_lbkvf(struct pci_dev *pdev)
@@ -561,6 +582,7 @@ void otx2_tx_timeout(struct net_device *netdev, unsigned int txq);
void otx2_get_mac_from_af(struct net_device *netdev);
void otx2_config_irq_coalescing(struct otx2_nic *pfvf, int qidx);
int otx2_config_pause_frm(struct otx2_nic *pfvf);
+void otx2_setup_segmentation(struct otx2_nic *pfvf);
/* RVU block related APIs */
int otx2_attach_npa_nix(struct otx2_nic *pfvf);
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c
index d59f5a9..0341d969 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c
@@ -13,8 +13,10 @@
#include <linux/stddef.h>
#include <linux/etherdevice.h>
#include <linux/log2.h>
+#include <linux/net_tstamp.h>
#include "otx2_common.h"
+#include "otx2_ptp.h"
#define DRV_NAME "octeontx2-nicpf"
#define DRV_VF_NAME "octeontx2-nicvf"
@@ -663,6 +665,31 @@ static u32 otx2_get_link(struct net_device *netdev)
return pfvf->linfo.link_up;
}
+static int otx2_get_ts_info(struct net_device *netdev,
+ struct ethtool_ts_info *info)
+{
+ struct otx2_nic *pfvf = netdev_priv(netdev);
+
+ if (!pfvf->ptp)
+ return ethtool_op_get_ts_info(netdev, info);
+
+ info->so_timestamping = SOF_TIMESTAMPING_TX_SOFTWARE |
+ SOF_TIMESTAMPING_RX_SOFTWARE |
+ SOF_TIMESTAMPING_SOFTWARE |
+ SOF_TIMESTAMPING_TX_HARDWARE |
+ SOF_TIMESTAMPING_RX_HARDWARE |
+ SOF_TIMESTAMPING_RAW_HARDWARE;
+
+ info->phc_index = otx2_ptp_clock_index(pfvf);
+
+ info->tx_types = (1 << HWTSTAMP_TX_OFF) | (1 << HWTSTAMP_TX_ON);
+
+ info->rx_filters = (1 << HWTSTAMP_FILTER_NONE) |
+ (1 << HWTSTAMP_FILTER_ALL);
+
+ return 0;
+}
+
static const struct ethtool_ops otx2_ethtool_ops = {
.supported_coalesce_params = ETHTOOL_COALESCE_USECS |
ETHTOOL_COALESCE_MAX_FRAMES,
@@ -687,6 +714,7 @@ static const struct ethtool_ops otx2_ethtool_ops = {
.set_msglevel = otx2_set_msglevel,
.get_pauseparam = otx2_get_pauseparam,
.set_pauseparam = otx2_set_pauseparam,
+ .get_ts_info = otx2_get_ts_info,
};
void otx2_set_ethtool_ops(struct net_device *netdev)
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
index 75a8c40..aac2845 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
@@ -21,6 +21,7 @@
#include "otx2_common.h"
#include "otx2_txrx.h"
#include "otx2_struct.h"
+#include "otx2_ptp.h"
#define DRV_NAME "octeontx2-nicpf"
#define DRV_STRING "Marvell OcteonTX2 NIC Physical Function Driver"
@@ -41,6 +42,9 @@ enum {
TYPE_PFVF,
};
+static int otx2_config_hw_tx_tstamp(struct otx2_nic *pfvf, bool enable);
+static int otx2_config_hw_rx_tstamp(struct otx2_nic *pfvf, bool enable);
+
static int otx2_change_mtu(struct net_device *netdev, int new_mtu)
{
bool if_up = netif_running(netdev);
@@ -1281,7 +1285,8 @@ static int otx2_init_hw_resources(struct otx2_nic *pf)
hw->pool_cnt = hw->rqpool_cnt + hw->sqpool_cnt;
/* Get the size of receive buffers to allocate */
- pf->rbsize = RCV_FRAG_LEN(pf->netdev->mtu + OTX2_ETH_HLEN);
+ pf->rbsize = RCV_FRAG_LEN(OTX2_HW_TIMESTAMP_LEN + pf->netdev->mtu +
+ OTX2_ETH_HLEN);
mutex_lock(&mbox->lock);
/* NPA init */
@@ -1496,6 +1501,9 @@ int otx2_open(struct net_device *netdev)
if (err)
goto err_disable_napi;
+ /* Setup segmentation algorithms, if failed, clear offload capability */
+ otx2_setup_segmentation(pf);
+
/* Initialize RSS */
err = otx2_rss_init(pf);
if (err)
@@ -1547,6 +1555,16 @@ int otx2_open(struct net_device *netdev)
otx2_set_cints_affinity(pf);
+ /* When reinitializing enable time stamping if it is enabled before */
+ if (pf->flags & OTX2_FLAG_TX_TSTAMP_ENABLED) {
+ pf->flags &= ~OTX2_FLAG_TX_TSTAMP_ENABLED;
+ otx2_config_hw_tx_tstamp(pf, true);
+ }
+ if (pf->flags & OTX2_FLAG_RX_TSTAMP_ENABLED) {
+ pf->flags &= ~OTX2_FLAG_RX_TSTAMP_ENABLED;
+ otx2_config_hw_rx_tstamp(pf, true);
+ }
+
pf->flags &= ~OTX2_FLAG_INTF_DOWN;
/* 'intf_down' may be checked on any cpu */
smp_wmb();
@@ -1738,6 +1756,143 @@ static void otx2_reset_task(struct work_struct *work)
rtnl_unlock();
}
+static int otx2_config_hw_rx_tstamp(struct otx2_nic *pfvf, bool enable)
+{
+ struct msg_req *req;
+ int err;
+
+ if (pfvf->flags & OTX2_FLAG_RX_TSTAMP_ENABLED && enable)
+ return 0;
+
+ mutex_lock(&pfvf->mbox.lock);
+ if (enable)
+ req = otx2_mbox_alloc_msg_cgx_ptp_rx_enable(&pfvf->mbox);
+ else
+ req = otx2_mbox_alloc_msg_cgx_ptp_rx_disable(&pfvf->mbox);
+ if (!req) {
+ mutex_unlock(&pfvf->mbox.lock);
+ return -ENOMEM;
+ }
+
+ err = otx2_sync_mbox_msg(&pfvf->mbox);
+ if (err) {
+ mutex_unlock(&pfvf->mbox.lock);
+ return err;
+ }
+
+ mutex_unlock(&pfvf->mbox.lock);
+ if (enable)
+ pfvf->flags |= OTX2_FLAG_RX_TSTAMP_ENABLED;
+ else
+ pfvf->flags &= ~OTX2_FLAG_RX_TSTAMP_ENABLED;
+ return 0;
+}
+
+static int otx2_config_hw_tx_tstamp(struct otx2_nic *pfvf, bool enable)
+{
+ struct msg_req *req;
+ int err;
+
+ if (pfvf->flags & OTX2_FLAG_TX_TSTAMP_ENABLED && enable)
+ return 0;
+
+ mutex_lock(&pfvf->mbox.lock);
+ if (enable)
+ req = otx2_mbox_alloc_msg_nix_lf_ptp_tx_enable(&pfvf->mbox);
+ else
+ req = otx2_mbox_alloc_msg_nix_lf_ptp_tx_disable(&pfvf->mbox);
+ if (!req) {
+ mutex_unlock(&pfvf->mbox.lock);
+ return -ENOMEM;
+ }
+
+ err = otx2_sync_mbox_msg(&pfvf->mbox);
+ if (err) {
+ mutex_unlock(&pfvf->mbox.lock);
+ return err;
+ }
+
+ mutex_unlock(&pfvf->mbox.lock);
+ if (enable)
+ pfvf->flags |= OTX2_FLAG_TX_TSTAMP_ENABLED;
+ else
+ pfvf->flags &= ~OTX2_FLAG_TX_TSTAMP_ENABLED;
+ return 0;
+}
+
+static int otx2_config_hwtstamp(struct net_device *netdev, struct ifreq *ifr)
+{
+ struct otx2_nic *pfvf = netdev_priv(netdev);
+ struct hwtstamp_config config;
+
+ if (!pfvf->ptp)
+ return -ENODEV;
+
+ if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
+ return -EFAULT;
+
+ /* reserved for future extensions */
+ if (config.flags)
+ return -EINVAL;
+
+ switch (config.tx_type) {
+ case HWTSTAMP_TX_OFF:
+ otx2_config_hw_tx_tstamp(pfvf, false);
+ break;
+ case HWTSTAMP_TX_ON:
+ otx2_config_hw_tx_tstamp(pfvf, true);
+ break;
+ default:
+ return -ERANGE;
+ }
+
+ switch (config.rx_filter) {
+ case HWTSTAMP_FILTER_NONE:
+ otx2_config_hw_rx_tstamp(pfvf, false);
+ break;
+ case HWTSTAMP_FILTER_ALL:
+ case HWTSTAMP_FILTER_SOME:
+ case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
+ case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
+ case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
+ case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
+ case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
+ case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
+ case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
+ case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
+ case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
+ case HWTSTAMP_FILTER_PTP_V2_EVENT:
+ case HWTSTAMP_FILTER_PTP_V2_SYNC:
+ case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
+ otx2_config_hw_rx_tstamp(pfvf, true);
+ config.rx_filter = HWTSTAMP_FILTER_ALL;
+ break;
+ default:
+ return -ERANGE;
+ }
+
+ memcpy(&pfvf->tstamp, &config, sizeof(config));
+
+ return copy_to_user(ifr->ifr_data, &config,
+ sizeof(config)) ? -EFAULT : 0;
+}
+
+static int otx2_ioctl(struct net_device *netdev, struct ifreq *req, int cmd)
+{
+ struct otx2_nic *pfvf = netdev_priv(netdev);
+ struct hwtstamp_config *cfg = &pfvf->tstamp;
+
+ switch (cmd) {
+ case SIOCSHWTSTAMP:
+ return otx2_config_hwtstamp(netdev, req);
+ case SIOCGHWTSTAMP:
+ return copy_to_user(req->ifr_data, cfg,
+ sizeof(*cfg)) ? -EFAULT : 0;
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
static const struct net_device_ops otx2_netdev_ops = {
.ndo_open = otx2_open,
.ndo_stop = otx2_stop,
@@ -1748,6 +1903,7 @@ static const struct net_device_ops otx2_netdev_ops = {
.ndo_set_features = otx2_set_features,
.ndo_tx_timeout = otx2_tx_timeout,
.ndo_get_stats64 = otx2_get_stats64,
+ .ndo_do_ioctl = otx2_ioctl,
};
static int otx2_wq_init(struct otx2_nic *pf)
@@ -1920,6 +2076,9 @@ static int otx2_probe(struct pci_dev *pdev, const struct pci_device_id *id)
/* Assign default mac address */
otx2_get_mac_from_af(netdev);
+ /* Don't check for error. Proceed without ptp */
+ otx2_ptp_init(pf);
+
/* NPA's pool is a stack to which SW frees buffer pointers via Aura.
* HW allocates buffer pointer from stack and uses it for DMA'ing
* ingress packet. In some scenarios HW can free back allocated buffer
@@ -1935,7 +2094,8 @@ static int otx2_probe(struct pci_dev *pdev, const struct pci_device_id *id)
netdev->hw_features = (NETIF_F_RXCSUM | NETIF_F_IP_CSUM |
NETIF_F_IPV6_CSUM | NETIF_F_RXHASH |
- NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6);
+ NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
+ NETIF_F_GSO_UDP_L4);
netdev->features |= netdev->hw_features;
netdev->hw_features |= NETIF_F_LOOPBACK | NETIF_F_RXALL;
@@ -1952,7 +2112,7 @@ static int otx2_probe(struct pci_dev *pdev, const struct pci_device_id *id)
err = register_netdev(netdev);
if (err) {
dev_err(dev, "Failed to register netdevice\n");
- goto err_detach_rsrc;
+ goto err_ptp_destroy;
}
err = otx2_wq_init(pf);
@@ -1972,6 +2132,8 @@ static int otx2_probe(struct pci_dev *pdev, const struct pci_device_id *id)
err_unreg_netdev:
unregister_netdev(netdev);
+err_ptp_destroy:
+ otx2_ptp_destroy(pf);
err_detach_rsrc:
otx2_detach_resources(&pf->mbox);
err_disable_mbox_intr:
@@ -2113,6 +2275,11 @@ static void otx2_remove(struct pci_dev *pdev)
pf = netdev_priv(netdev);
+ if (pf->flags & OTX2_FLAG_TX_TSTAMP_ENABLED)
+ otx2_config_hw_tx_tstamp(pf, false);
+ if (pf->flags & OTX2_FLAG_RX_TSTAMP_ENABLED)
+ otx2_config_hw_rx_tstamp(pf, false);
+
cancel_work_sync(&pf->reset_task);
/* Disable link notifications */
otx2_cgx_config_linkevents(pf, false);
@@ -2122,6 +2289,7 @@ static void otx2_remove(struct pci_dev *pdev)
if (pf->otx2_wq)
destroy_workqueue(pf->otx2_wq);
+ otx2_ptp_destroy(pf);
otx2_detach_resources(&pf->mbox);
otx2_disable_mbox_intr(pf);
otx2_pfaf_mbox_destroy(pf);
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ptp.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ptp.c
new file mode 100644
index 0000000..7bcf524
--- /dev/null
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ptp.c
@@ -0,0 +1,212 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Marvell OcteonTx2 PTP support for ethernet driver
+ *
+ * Copyright (C) 2020 Marvell International Ltd.
+ */
+
+#include "otx2_common.h"
+#include "otx2_ptp.h"
+
+static int otx2_ptp_adjfine(struct ptp_clock_info *ptp_info, long scaled_ppm)
+{
+ struct otx2_ptp *ptp = container_of(ptp_info, struct otx2_ptp,
+ ptp_info);
+ struct ptp_req *req;
+ int err;
+
+ if (!ptp->nic)
+ return -ENODEV;
+
+ req = otx2_mbox_alloc_msg_ptp_op(&ptp->nic->mbox);
+ if (!req)
+ return -ENOMEM;
+
+ req->op = PTP_OP_ADJFINE;
+ req->scaled_ppm = scaled_ppm;
+
+ err = otx2_sync_mbox_msg(&ptp->nic->mbox);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+static u64 ptp_cc_read(const struct cyclecounter *cc)
+{
+ struct otx2_ptp *ptp = container_of(cc, struct otx2_ptp, cycle_counter);
+ struct ptp_req *req;
+ struct ptp_rsp *rsp;
+ int err;
+
+ if (!ptp->nic)
+ return 0;
+
+ req = otx2_mbox_alloc_msg_ptp_op(&ptp->nic->mbox);
+ if (!req)
+ return 0;
+
+ req->op = PTP_OP_GET_CLOCK;
+
+ err = otx2_sync_mbox_msg(&ptp->nic->mbox);
+ if (err)
+ return 0;
+
+ rsp = (struct ptp_rsp *)otx2_mbox_get_rsp(&ptp->nic->mbox.mbox, 0,
+ &req->hdr);
+ if (IS_ERR(rsp))
+ return 0;
+
+ return rsp->clk;
+}
+
+static int otx2_ptp_adjtime(struct ptp_clock_info *ptp_info, s64 delta)
+{
+ struct otx2_ptp *ptp = container_of(ptp_info, struct otx2_ptp,
+ ptp_info);
+ struct otx2_nic *pfvf = ptp->nic;
+
+ mutex_lock(&pfvf->mbox.lock);
+ timecounter_adjtime(&ptp->time_counter, delta);
+ mutex_unlock(&pfvf->mbox.lock);
+
+ return 0;
+}
+
+static int otx2_ptp_gettime(struct ptp_clock_info *ptp_info,
+ struct timespec64 *ts)
+{
+ struct otx2_ptp *ptp = container_of(ptp_info, struct otx2_ptp,
+ ptp_info);
+ struct otx2_nic *pfvf = ptp->nic;
+ u64 nsec;
+
+ mutex_lock(&pfvf->mbox.lock);
+ nsec = timecounter_read(&ptp->time_counter);
+ mutex_unlock(&pfvf->mbox.lock);
+
+ *ts = ns_to_timespec64(nsec);
+
+ return 0;
+}
+
+static int otx2_ptp_settime(struct ptp_clock_info *ptp_info,
+ const struct timespec64 *ts)
+{
+ struct otx2_ptp *ptp = container_of(ptp_info, struct otx2_ptp,
+ ptp_info);
+ struct otx2_nic *pfvf = ptp->nic;
+ u64 nsec;
+
+ nsec = timespec64_to_ns(ts);
+
+ mutex_lock(&pfvf->mbox.lock);
+ timecounter_init(&ptp->time_counter, &ptp->cycle_counter, nsec);
+ mutex_unlock(&pfvf->mbox.lock);
+
+ return 0;
+}
+
+static int otx2_ptp_enable(struct ptp_clock_info *ptp_info,
+ struct ptp_clock_request *rq, int on)
+{
+ return -EOPNOTSUPP;
+}
+
+int otx2_ptp_init(struct otx2_nic *pfvf)
+{
+ struct otx2_ptp *ptp_ptr;
+ struct cyclecounter *cc;
+ struct ptp_req *req;
+ int err;
+
+ mutex_lock(&pfvf->mbox.lock);
+ /* check if PTP block is available */
+ req = otx2_mbox_alloc_msg_ptp_op(&pfvf->mbox);
+ if (!req) {
+ mutex_unlock(&pfvf->mbox.lock);
+ return -ENOMEM;
+ }
+
+ req->op = PTP_OP_GET_CLOCK;
+
+ err = otx2_sync_mbox_msg(&pfvf->mbox);
+ if (err) {
+ mutex_unlock(&pfvf->mbox.lock);
+ return err;
+ }
+ mutex_unlock(&pfvf->mbox.lock);
+
+ ptp_ptr = kzalloc(sizeof(*ptp_ptr), GFP_KERNEL);
+ if (!ptp_ptr) {
+ err = -ENOMEM;
+ goto error;
+ }
+
+ ptp_ptr->nic = pfvf;
+
+ cc = &ptp_ptr->cycle_counter;
+ cc->read = ptp_cc_read;
+ cc->mask = CYCLECOUNTER_MASK(64);
+ cc->mult = 1;
+ cc->shift = 0;
+
+ timecounter_init(&ptp_ptr->time_counter, &ptp_ptr->cycle_counter,
+ ktime_to_ns(ktime_get_real()));
+
+ ptp_ptr->ptp_info = (struct ptp_clock_info) {
+ .owner = THIS_MODULE,
+ .name = "OcteonTX2 PTP",
+ .max_adj = 1000000000ull,
+ .n_ext_ts = 0,
+ .n_pins = 0,
+ .pps = 0,
+ .adjfine = otx2_ptp_adjfine,
+ .adjtime = otx2_ptp_adjtime,
+ .gettime64 = otx2_ptp_gettime,
+ .settime64 = otx2_ptp_settime,
+ .enable = otx2_ptp_enable,
+ };
+
+ ptp_ptr->ptp_clock = ptp_clock_register(&ptp_ptr->ptp_info, pfvf->dev);
+ if (IS_ERR_OR_NULL(ptp_ptr->ptp_clock)) {
+ err = ptp_ptr->ptp_clock ?
+ PTR_ERR(ptp_ptr->ptp_clock) : -ENODEV;
+ kfree(ptp_ptr);
+ goto error;
+ }
+
+ pfvf->ptp = ptp_ptr;
+
+error:
+ return err;
+}
+
+void otx2_ptp_destroy(struct otx2_nic *pfvf)
+{
+ struct otx2_ptp *ptp = pfvf->ptp;
+
+ if (!ptp)
+ return;
+
+ ptp_clock_unregister(ptp->ptp_clock);
+ kfree(ptp);
+ pfvf->ptp = NULL;
+}
+
+int otx2_ptp_clock_index(struct otx2_nic *pfvf)
+{
+ if (!pfvf->ptp)
+ return -ENODEV;
+
+ return ptp_clock_index(pfvf->ptp->ptp_clock);
+}
+
+int otx2_ptp_tstamp2time(struct otx2_nic *pfvf, u64 tstamp, u64 *tsns)
+{
+ if (!pfvf->ptp)
+ return -ENODEV;
+
+ *tsns = timecounter_cyc2time(&pfvf->ptp->time_counter, tstamp);
+
+ return 0;
+}
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ptp.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ptp.h
new file mode 100644
index 0000000..706d63a
--- /dev/null
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ptp.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Marvell OcteonTx2 PTP support for ethernet driver */
+
+#ifndef OTX2_PTP_H
+#define OTX2_PTP_H
+
+int otx2_ptp_init(struct otx2_nic *pfvf);
+void otx2_ptp_destroy(struct otx2_nic *pfvf);
+
+int otx2_ptp_clock_index(struct otx2_nic *pfvf);
+int otx2_ptp_tstamp2time(struct otx2_nic *pfvf, u64 tstamp, u64 *tsns);
+
+#endif
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c
index 3a5b34a..faaa322 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c
@@ -16,6 +16,7 @@
#include "otx2_common.h"
#include "otx2_struct.h"
#include "otx2_txrx.h"
+#include "otx2_ptp.h"
#define CQE_ADDR(CQ, idx) ((CQ)->cqe_base + ((CQ)->cqe_size * (idx)))
@@ -81,8 +82,11 @@ static void otx2_snd_pkt_handler(struct otx2_nic *pfvf,
int budget, int *tx_pkts, int *tx_bytes)
{
struct nix_send_comp_s *snd_comp = &cqe->comp;
+ struct skb_shared_hwtstamps ts;
struct sk_buff *skb = NULL;
+ u64 timestamp, tsns;
struct sg_list *sg;
+ int err;
if (unlikely(snd_comp->status) && netif_msg_tx_err(pfvf))
net_err_ratelimited("%s: TX%d: Error in send CQ status:%x\n",
@@ -94,6 +98,18 @@ static void otx2_snd_pkt_handler(struct otx2_nic *pfvf,
if (unlikely(!skb))
return;
+ if (skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS) {
+ timestamp = ((u64 *)sq->timestamps->base)[snd_comp->sqe_id];
+ if (timestamp != 1) {
+ err = otx2_ptp_tstamp2time(pfvf, timestamp, &tsns);
+ if (!err) {
+ memset(&ts, 0, sizeof(ts));
+ ts.hwtstamp = ns_to_ktime(tsns);
+ skb_tstamp_tx(skb, &ts);
+ }
+ }
+ }
+
*tx_bytes += skb->len;
(*tx_pkts)++;
otx2_dma_unmap_skb_frags(pfvf, sg);
@@ -101,16 +117,47 @@ static void otx2_snd_pkt_handler(struct otx2_nic *pfvf,
sg->skb = (u64)NULL;
}
+static void otx2_set_rxtstamp(struct otx2_nic *pfvf,
+ struct sk_buff *skb, void *data)
+{
+ u64 tsns;
+ int err;
+
+ if (!(pfvf->flags & OTX2_FLAG_RX_TSTAMP_ENABLED))
+ return;
+
+ /* The first 8 bytes is the timestamp */
+ err = otx2_ptp_tstamp2time(pfvf, be64_to_cpu(*(__be64 *)data), &tsns);
+ if (err)
+ return;
+
+ skb_hwtstamps(skb)->hwtstamp = ns_to_ktime(tsns);
+}
+
static void otx2_skb_add_frag(struct otx2_nic *pfvf, struct sk_buff *skb,
- u64 iova, int len)
+ u64 iova, int len, struct nix_rx_parse_s *parse)
{
struct page *page;
+ int off = 0;
void *va;
va = phys_to_virt(otx2_iova_to_phys(pfvf->iommu_domain, iova));
+
+ if (likely(!skb_shinfo(skb)->nr_frags)) {
+ /* Check if data starts at some nonzero offset
+ * from the start of the buffer. For now the
+ * only possible offset is 8 bytes in the case
+ * where packet is prepended by a timestamp.
+ */
+ if (parse->laptr) {
+ otx2_set_rxtstamp(pfvf, skb, va);
+ off = OTX2_HW_TIMESTAMP_LEN;
+ }
+ }
+
page = virt_to_page(va);
skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page,
- va - page_address(page), len, pfvf->rbsize);
+ va - page_address(page) + off, len - off, pfvf->rbsize);
otx2_dma_unmap_page(pfvf, iova - OTX2_HEAD_ROOM,
pfvf->rbsize, DMA_FROM_DEVICE);
@@ -239,7 +286,7 @@ static void otx2_rcv_pkt_handler(struct otx2_nic *pfvf,
if (unlikely(!skb))
return;
- otx2_skb_add_frag(pfvf, skb, cqe->sg.seg_addr, cqe->sg.seg_size);
+ otx2_skb_add_frag(pfvf, skb, cqe->sg.seg_addr, cqe->sg.seg_size, parse);
cq->pool_ptrs++;
otx2_set_rxhash(pfvf, cqe, skb);
@@ -477,15 +524,55 @@ static void otx2_sqe_add_ext(struct otx2_nic *pfvf, struct otx2_snd_queue *sq,
*/
ip_hdr(skb)->tot_len =
htons(ext->lso_sb - skb_network_offset(skb));
- } else {
+ } else if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) {
ext->lso_format = pfvf->hw.lso_tsov6_idx;
+
ipv6_hdr(skb)->payload_len =
htons(ext->lso_sb - skb_network_offset(skb));
+ } else if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) {
+ __be16 l3_proto = vlan_get_protocol(skb);
+ struct udphdr *udph = udp_hdr(skb);
+ u16 iplen;
+
+ ext->lso_sb = skb_transport_offset(skb) +
+ sizeof(struct udphdr);
+
+ /* HW adds payload size to length fields in IP and
+ * UDP headers while segmentation, hence adjust the
+ * lengths to just header sizes.
+ */
+ iplen = htons(ext->lso_sb - skb_network_offset(skb));
+ if (l3_proto == htons(ETH_P_IP)) {
+ ip_hdr(skb)->tot_len = iplen;
+ ext->lso_format = pfvf->hw.lso_udpv4_idx;
+ } else {
+ ipv6_hdr(skb)->payload_len = iplen;
+ ext->lso_format = pfvf->hw.lso_udpv6_idx;
+ }
+
+ udph->len = htons(sizeof(struct udphdr));
}
+ } else if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) {
+ ext->tstmp = 1;
}
+
*offset += sizeof(*ext);
}
+static void otx2_sqe_add_mem(struct otx2_snd_queue *sq, int *offset,
+ int alg, u64 iova)
+{
+ struct nix_sqe_mem_s *mem;
+
+ mem = (struct nix_sqe_mem_s *)(sq->sqe_base + *offset);
+ mem->subdc = NIX_SUBDC_MEM;
+ mem->alg = alg;
+ mem->wmem = 1; /* wait for the memory operation */
+ mem->addr = iova;
+
+ *offset += sizeof(*mem);
+}
+
/* Add SQE header subdescriptor structure */
static void otx2_sqe_add_hdr(struct otx2_nic *pfvf, struct otx2_snd_queue *sq,
struct nix_sqe_hdr_s *sqe_hdr,
@@ -736,6 +823,21 @@ static int otx2_get_sqe_count(struct otx2_nic *pfvf, struct sk_buff *skb)
return skb_shinfo(skb)->gso_segs;
}
+static void otx2_set_txtstamp(struct otx2_nic *pfvf, struct sk_buff *skb,
+ struct otx2_snd_queue *sq, int *offset)
+{
+ u64 iova;
+
+ if (!skb_shinfo(skb)->gso_size &&
+ skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) {
+ skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
+ iova = sq->timestamps->iova + (sq->head * sizeof(u64));
+ otx2_sqe_add_mem(sq, offset, NIX_SENDMEMALG_E_SETTSTMP, iova);
+ } else {
+ skb_tx_timestamp(skb);
+ }
+}
+
bool otx2_sq_append_skb(struct net_device *netdev, struct otx2_snd_queue *sq,
struct sk_buff *skb, u16 qidx)
{
@@ -789,6 +891,8 @@ bool otx2_sq_append_skb(struct net_device *netdev, struct otx2_snd_queue *sq,
return false;
}
+ otx2_set_txtstamp(pfvf, skb, sq, &offset);
+
sqe_hdr->sizem1 = (offset / 16) - 1;
netdev_tx_sent_queue(txq, skb->len);
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h
index da97f2d4..73af156 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h
@@ -91,6 +91,7 @@ struct otx2_snd_queue {
struct qmem *sqe;
struct qmem *tso_hdrs;
struct sg_list *sg;
+ struct qmem *timestamps;
struct queue_stats stats;
u16 sqb_count;
u64 *sqb_ptrs;
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c
index 92a3db6..70e0d4c 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c
@@ -553,7 +553,8 @@ static int otx2vf_probe(struct pci_dev *pdev, const struct pci_device_id *id)
netdev->hw_features = NETIF_F_RXCSUM | NETIF_F_IP_CSUM |
NETIF_F_IPV6_CSUM | NETIF_F_RXHASH |
- NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6;
+ NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
+ NETIF_F_GSO_UDP_L4;
netdev->features = netdev->hw_features;
netdev->gso_max_segs = OTX2_MAX_GSO_SEGS;
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
index b50c567..99d7737 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
@@ -705,7 +705,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
frags = ring->rx_info + (index << priv->log_rx_info);
va = page_address(frags[0].page) + frags[0].page_offset;
- prefetchw(va);
+ net_prefetchw(va);
/*
* make sure we read the CQE after we read the ownership bit
*/
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index 10e6886..0b3eaa1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -24,7 +24,7 @@
mlx5_core-$(CONFIG_MLX5_CORE_EN) += en_main.o en_common.o en_fs.o en_ethtool.o \
en_tx.o en_rx.o en_dim.o en_txrx.o en/xdp.o en_stats.o \
en_selftest.o en/port.o en/monitor_stats.o en/health.o \
- en/reporter_tx.o en/reporter_rx.o en/params.o en/xsk/umem.o \
+ en/reporter_tx.o en/reporter_rx.o en/params.o en/xsk/pool.o \
en/xsk/setup.o en/xsk/rx.o en/xsk/tx.o en/devlink.o
#
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 0cc2080..4f33658 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -442,7 +442,7 @@ struct mlx5e_xdpsq {
struct mlx5e_cq cq;
/* read only */
- struct xdp_umem *umem;
+ struct xsk_buff_pool *xsk_pool;
struct mlx5_wq_cyc wq;
struct mlx5e_xdpsq_stats *stats;
mlx5e_fp_xmit_xdp_frame_check xmit_xdp_frame_check;
@@ -606,7 +606,7 @@ struct mlx5e_rq {
struct page_pool *page_pool;
/* AF_XDP zero-copy */
- struct xdp_umem *umem;
+ struct xsk_buff_pool *xsk_pool;
struct work_struct recover_work;
@@ -729,12 +729,13 @@ struct mlx5e_hv_vhca_stats_agent {
#endif
struct mlx5e_xsk {
- /* UMEMs are stored separately from channels, because we don't want to
- * lose them when channels are recreated. The kernel also stores UMEMs,
- * but it doesn't distinguish between zero-copy and non-zero-copy UMEMs,
- * so rely on our mechanism.
+ /* XSK buffer pools are stored separately from channels,
+ * because we don't want to lose them when channels are
+ * recreated. The kernel also stores buffer pool, but it doesn't
+ * distinguish between zero-copy and non-zero-copy UMEMs, so
+ * rely on our mechanism.
*/
- struct xdp_umem **umems;
+ struct xsk_buff_pool **pools;
u16 refcnt;
bool ever_used;
};
@@ -893,7 +894,7 @@ struct mlx5e_xsk_param;
struct mlx5e_rq_param;
int mlx5e_open_rq(struct mlx5e_channel *c, struct mlx5e_params *params,
struct mlx5e_rq_param *param, struct mlx5e_xsk_param *xsk,
- struct xdp_umem *umem, struct mlx5e_rq *rq);
+ struct xsk_buff_pool *xsk_pool, struct mlx5e_rq *rq);
int mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq, int wait_time);
void mlx5e_deactivate_rq(struct mlx5e_rq *rq);
void mlx5e_close_rq(struct mlx5e_rq *rq);
@@ -903,7 +904,7 @@ int mlx5e_open_icosq(struct mlx5e_channel *c, struct mlx5e_params *params,
struct mlx5e_sq_param *param, struct mlx5e_icosq *sq);
void mlx5e_close_icosq(struct mlx5e_icosq *sq);
int mlx5e_open_xdpsq(struct mlx5e_channel *c, struct mlx5e_params *params,
- struct mlx5e_sq_param *param, struct xdp_umem *umem,
+ struct mlx5e_sq_param *param, struct xsk_buff_pool *xsk_pool,
struct mlx5e_xdpsq *sq, bool is_redirect);
void mlx5e_close_xdpsq(struct mlx5e_xdpsq *sq);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
index 0e6946f..1455927 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
@@ -201,7 +201,7 @@ static void mlx5e_xdp_mpwqe_session_start(struct mlx5e_xdpsq *sq)
pi = mlx5e_xdpsq_get_next_pi(sq, MLX5_SEND_WQE_MAX_WQEBBS);
session->wqe = MLX5E_TX_FETCH_WQE(sq, pi);
- prefetchw(session->wqe->data);
+ net_prefetchw(session->wqe->data);
session->ds_count = MLX5E_XDP_TX_EMPTY_DS_COUNT;
session->pkt_count = 0;
@@ -322,7 +322,7 @@ mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_xmit_data *xdptxd,
struct mlx5e_xdpsq_stats *stats = sq->stats;
- prefetchw(wqe);
+ net_prefetchw(wqe);
if (unlikely(dma_len < MLX5E_XDP_MIN_INLINE || sq->hw_mtu < dma_len)) {
stats->err++;
@@ -445,7 +445,7 @@ bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq)
} while ((++i < MLX5E_TX_CQ_POLL_BUDGET) && (cqe = mlx5_cqwq_get_cqe(&cq->wq)));
if (xsk_frames)
- xsk_umem_complete_tx(sq->umem, xsk_frames);
+ xsk_tx_completed(sq->xsk_pool, xsk_frames);
sq->stats->cqes += i;
@@ -475,7 +475,7 @@ void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq)
}
if (xsk_frames)
- xsk_umem_complete_tx(sq->umem, xsk_frames);
+ xsk_tx_completed(sq->xsk_pool, xsk_frames);
}
int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
@@ -563,4 +563,3 @@ void mlx5e_set_xmit_fp(struct mlx5e_xdpsq *sq, bool is_mpw)
sq->xmit_xdp_frame = is_mpw ?
mlx5e_xmit_xdp_frame_mpwqe : mlx5e_xmit_xdp_frame;
}
-
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.c
new file mode 100644
index 0000000..3503e77
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.c
@@ -0,0 +1,217 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2019-2020, Mellanox Technologies inc. All rights reserved. */
+
+#include <net/xdp_sock_drv.h>
+#include "pool.h"
+#include "setup.h"
+#include "en/params.h"
+
+static int mlx5e_xsk_map_pool(struct mlx5e_priv *priv,
+ struct xsk_buff_pool *pool)
+{
+ struct device *dev = priv->mdev->device;
+
+ return xsk_pool_dma_map(pool, dev, 0);
+}
+
+static void mlx5e_xsk_unmap_pool(struct mlx5e_priv *priv,
+ struct xsk_buff_pool *pool)
+{
+ return xsk_pool_dma_unmap(pool, 0);
+}
+
+static int mlx5e_xsk_get_pools(struct mlx5e_xsk *xsk)
+{
+ if (!xsk->pools) {
+ xsk->pools = kcalloc(MLX5E_MAX_NUM_CHANNELS,
+ sizeof(*xsk->pools), GFP_KERNEL);
+ if (unlikely(!xsk->pools))
+ return -ENOMEM;
+ }
+
+ xsk->refcnt++;
+ xsk->ever_used = true;
+
+ return 0;
+}
+
+static void mlx5e_xsk_put_pools(struct mlx5e_xsk *xsk)
+{
+ if (!--xsk->refcnt) {
+ kfree(xsk->pools);
+ xsk->pools = NULL;
+ }
+}
+
+static int mlx5e_xsk_add_pool(struct mlx5e_xsk *xsk, struct xsk_buff_pool *pool, u16 ix)
+{
+ int err;
+
+ err = mlx5e_xsk_get_pools(xsk);
+ if (unlikely(err))
+ return err;
+
+ xsk->pools[ix] = pool;
+ return 0;
+}
+
+static void mlx5e_xsk_remove_pool(struct mlx5e_xsk *xsk, u16 ix)
+{
+ xsk->pools[ix] = NULL;
+
+ mlx5e_xsk_put_pools(xsk);
+}
+
+static bool mlx5e_xsk_is_pool_sane(struct xsk_buff_pool *pool)
+{
+ return xsk_pool_get_headroom(pool) <= 0xffff &&
+ xsk_pool_get_chunk_size(pool) <= 0xffff;
+}
+
+void mlx5e_build_xsk_param(struct xsk_buff_pool *pool, struct mlx5e_xsk_param *xsk)
+{
+ xsk->headroom = xsk_pool_get_headroom(pool);
+ xsk->chunk_size = xsk_pool_get_chunk_size(pool);
+}
+
+static int mlx5e_xsk_enable_locked(struct mlx5e_priv *priv,
+ struct xsk_buff_pool *pool, u16 ix)
+{
+ struct mlx5e_params *params = &priv->channels.params;
+ struct mlx5e_xsk_param xsk;
+ struct mlx5e_channel *c;
+ int err;
+
+ if (unlikely(mlx5e_xsk_get_pool(&priv->channels.params, &priv->xsk, ix)))
+ return -EBUSY;
+
+ if (unlikely(!mlx5e_xsk_is_pool_sane(pool)))
+ return -EINVAL;
+
+ err = mlx5e_xsk_map_pool(priv, pool);
+ if (unlikely(err))
+ return err;
+
+ err = mlx5e_xsk_add_pool(&priv->xsk, pool, ix);
+ if (unlikely(err))
+ goto err_unmap_pool;
+
+ mlx5e_build_xsk_param(pool, &xsk);
+
+ if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
+ /* XSK objects will be created on open. */
+ goto validate_closed;
+ }
+
+ if (!params->xdp_prog) {
+ /* XSK objects will be created when an XDP program is set,
+ * and the channels are reopened.
+ */
+ goto validate_closed;
+ }
+
+ c = priv->channels.c[ix];
+
+ err = mlx5e_open_xsk(priv, params, &xsk, pool, c);
+ if (unlikely(err))
+ goto err_remove_pool;
+
+ mlx5e_activate_xsk(c);
+
+ /* Don't wait for WQEs, because the newer xdpsock sample doesn't provide
+ * any Fill Ring entries at the setup stage.
+ */
+
+ err = mlx5e_xsk_redirect_rqt_to_channel(priv, priv->channels.c[ix]);
+ if (unlikely(err))
+ goto err_deactivate;
+
+ return 0;
+
+err_deactivate:
+ mlx5e_deactivate_xsk(c);
+ mlx5e_close_xsk(c);
+
+err_remove_pool:
+ mlx5e_xsk_remove_pool(&priv->xsk, ix);
+
+err_unmap_pool:
+ mlx5e_xsk_unmap_pool(priv, pool);
+
+ return err;
+
+validate_closed:
+ /* Check the configuration in advance, rather than fail at a later stage
+ * (in mlx5e_xdp_set or on open) and end up with no channels.
+ */
+ if (!mlx5e_validate_xsk_param(params, &xsk, priv->mdev)) {
+ err = -EINVAL;
+ goto err_remove_pool;
+ }
+
+ return 0;
+}
+
+static int mlx5e_xsk_disable_locked(struct mlx5e_priv *priv, u16 ix)
+{
+ struct xsk_buff_pool *pool = mlx5e_xsk_get_pool(&priv->channels.params,
+ &priv->xsk, ix);
+ struct mlx5e_channel *c;
+
+ if (unlikely(!pool))
+ return -EINVAL;
+
+ if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
+ goto remove_pool;
+
+ /* XSK RQ and SQ are only created if XDP program is set. */
+ if (!priv->channels.params.xdp_prog)
+ goto remove_pool;
+
+ c = priv->channels.c[ix];
+ mlx5e_xsk_redirect_rqt_to_drop(priv, ix);
+ mlx5e_deactivate_xsk(c);
+ mlx5e_close_xsk(c);
+
+remove_pool:
+ mlx5e_xsk_remove_pool(&priv->xsk, ix);
+ mlx5e_xsk_unmap_pool(priv, pool);
+
+ return 0;
+}
+
+static int mlx5e_xsk_enable_pool(struct mlx5e_priv *priv, struct xsk_buff_pool *pool,
+ u16 ix)
+{
+ int err;
+
+ mutex_lock(&priv->state_lock);
+ err = mlx5e_xsk_enable_locked(priv, pool, ix);
+ mutex_unlock(&priv->state_lock);
+
+ return err;
+}
+
+static int mlx5e_xsk_disable_pool(struct mlx5e_priv *priv, u16 ix)
+{
+ int err;
+
+ mutex_lock(&priv->state_lock);
+ err = mlx5e_xsk_disable_locked(priv, ix);
+ mutex_unlock(&priv->state_lock);
+
+ return err;
+}
+
+int mlx5e_xsk_setup_pool(struct net_device *dev, struct xsk_buff_pool *pool, u16 qid)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ struct mlx5e_params *params = &priv->channels.params;
+ u16 ix;
+
+ if (unlikely(!mlx5e_qid_get_ch_if_in_group(params, qid, MLX5E_RQ_GROUP_XSK, &ix)))
+ return -EINVAL;
+
+ return pool ? mlx5e_xsk_enable_pool(priv, pool, ix) :
+ mlx5e_xsk_disable_pool(priv, ix);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.h
new file mode 100644
index 0000000..dca0010
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019-2020, Mellanox Technologies inc. All rights reserved. */
+
+#ifndef __MLX5_EN_XSK_POOL_H__
+#define __MLX5_EN_XSK_POOL_H__
+
+#include "en.h"
+
+static inline struct xsk_buff_pool *mlx5e_xsk_get_pool(struct mlx5e_params *params,
+ struct mlx5e_xsk *xsk, u16 ix)
+{
+ if (!xsk || !xsk->pools)
+ return NULL;
+
+ if (unlikely(ix >= params->num_channels))
+ return NULL;
+
+ return xsk->pools[ix];
+}
+
+struct mlx5e_xsk_param;
+void mlx5e_build_xsk_param(struct xsk_buff_pool *pool, struct mlx5e_xsk_param *xsk);
+
+/* .ndo_bpf callback. */
+int mlx5e_xsk_setup_pool(struct net_device *dev, struct xsk_buff_pool *pool, u16 qid);
+
+#endif /* __MLX5_EN_XSK_POOL_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c
index a33a1f7..bb6669d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c
@@ -48,8 +48,8 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq,
xdp->data_end = xdp->data + cqe_bcnt32;
xdp_set_data_meta_invalid(xdp);
- xsk_buff_dma_sync_for_cpu(xdp);
- prefetch(xdp->data);
+ xsk_buff_dma_sync_for_cpu(xdp, rq->xsk_pool);
+ net_prefetch(xdp->data);
rcu_read_lock();
consumed = mlx5e_xdp_handle(rq, NULL, &cqe_bcnt32, xdp);
@@ -99,8 +99,8 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq,
xdp->data_end = xdp->data + cqe_bcnt;
xdp_set_data_meta_invalid(xdp);
- xsk_buff_dma_sync_for_cpu(xdp);
- prefetch(xdp->data);
+ xsk_buff_dma_sync_for_cpu(xdp, rq->xsk_pool);
+ net_prefetch(xdp->data);
if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_RESP_SEND)) {
rq->stats->wqe_err++;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h
index d147b2f..7f88ccf 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h
@@ -19,10 +19,10 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq,
struct mlx5e_wqe_frag_info *wi,
u32 cqe_bcnt);
-static inline int mlx5e_xsk_page_alloc_umem(struct mlx5e_rq *rq,
+static inline int mlx5e_xsk_page_alloc_pool(struct mlx5e_rq *rq,
struct mlx5e_dma_info *dma_info)
{
- dma_info->xsk = xsk_buff_alloc(rq->umem);
+ dma_info->xsk = xsk_buff_alloc(rq->xsk_pool);
if (!dma_info->xsk)
return -ENOMEM;
@@ -38,13 +38,13 @@ static inline int mlx5e_xsk_page_alloc_umem(struct mlx5e_rq *rq,
static inline bool mlx5e_xsk_update_rx_wakeup(struct mlx5e_rq *rq, bool alloc_err)
{
- if (!xsk_umem_uses_need_wakeup(rq->umem))
+ if (!xsk_uses_need_wakeup(rq->xsk_pool))
return alloc_err;
if (unlikely(alloc_err))
- xsk_set_rx_need_wakeup(rq->umem);
+ xsk_set_rx_need_wakeup(rq->xsk_pool);
else
- xsk_clear_rx_need_wakeup(rq->umem);
+ xsk_clear_rx_need_wakeup(rq->xsk_pool);
return false;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c
index dd9df51..662a1da 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c
@@ -45,7 +45,7 @@ static void mlx5e_build_xsk_cparam(struct mlx5e_priv *priv,
}
int mlx5e_open_xsk(struct mlx5e_priv *priv, struct mlx5e_params *params,
- struct mlx5e_xsk_param *xsk, struct xdp_umem *umem,
+ struct mlx5e_xsk_param *xsk, struct xsk_buff_pool *pool,
struct mlx5e_channel *c)
{
struct mlx5e_channel_param *cparam;
@@ -64,7 +64,7 @@ int mlx5e_open_xsk(struct mlx5e_priv *priv, struct mlx5e_params *params,
if (unlikely(err))
goto err_free_cparam;
- err = mlx5e_open_rq(c, params, &cparam->rq, xsk, umem, &c->xskrq);
+ err = mlx5e_open_rq(c, params, &cparam->rq, xsk, pool, &c->xskrq);
if (unlikely(err))
goto err_close_rx_cq;
@@ -72,13 +72,13 @@ int mlx5e_open_xsk(struct mlx5e_priv *priv, struct mlx5e_params *params,
if (unlikely(err))
goto err_close_rq;
- /* Create a separate SQ, so that when the UMEM is disabled, we could
+ /* Create a separate SQ, so that when the buff pool is disabled, we could
* close this SQ safely and stop receiving CQEs. In other case, e.g., if
- * the XDPSQ was used instead, we might run into trouble when the UMEM
+ * the XDPSQ was used instead, we might run into trouble when the buff pool
* is disabled and then reenabled, but the SQ continues receiving CQEs
- * from the old UMEM.
+ * from the old buff pool.
*/
- err = mlx5e_open_xdpsq(c, params, &cparam->xdp_sq, umem, &c->xsksq, true);
+ err = mlx5e_open_xdpsq(c, params, &cparam->xdp_sq, pool, &c->xsksq, true);
if (unlikely(err))
goto err_close_tx_cq;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.h
index 0dd11b8..ca20f1f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.h
@@ -12,7 +12,7 @@ bool mlx5e_validate_xsk_param(struct mlx5e_params *params,
struct mlx5e_xsk_param *xsk,
struct mlx5_core_dev *mdev);
int mlx5e_open_xsk(struct mlx5e_priv *priv, struct mlx5e_params *params,
- struct mlx5e_xsk_param *xsk, struct xdp_umem *umem,
+ struct mlx5e_xsk_param *xsk, struct xsk_buff_pool *pool,
struct mlx5e_channel *c);
void mlx5e_close_xsk(struct mlx5e_channel *c);
void mlx5e_activate_xsk(struct mlx5e_channel *c);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c
index 4d892f6..aa91cbd 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c
@@ -2,7 +2,7 @@
/* Copyright (c) 2019 Mellanox Technologies. */
#include "tx.h"
-#include "umem.h"
+#include "pool.h"
#include "en/xdp.h"
#include "en/params.h"
#include <net/xdp_sock_drv.h>
@@ -66,7 +66,7 @@ static void mlx5e_xsk_tx_post_err(struct mlx5e_xdpsq *sq,
bool mlx5e_xsk_tx(struct mlx5e_xdpsq *sq, unsigned int budget)
{
- struct xdp_umem *umem = sq->umem;
+ struct xsk_buff_pool *pool = sq->xsk_pool;
struct mlx5e_xdp_info xdpi;
struct mlx5e_xdp_xmit_data xdptxd;
bool work_done = true;
@@ -87,7 +87,7 @@ bool mlx5e_xsk_tx(struct mlx5e_xdpsq *sq, unsigned int budget)
break;
}
- if (!xsk_umem_consume_tx(umem, &desc)) {
+ if (!xsk_tx_peek_desc(pool, &desc)) {
/* TX will get stuck until something wakes it up by
* triggering NAPI. Currently it's expected that the
* application calls sendto() if there are consumed, but
@@ -96,11 +96,11 @@ bool mlx5e_xsk_tx(struct mlx5e_xdpsq *sq, unsigned int budget)
break;
}
- xdptxd.dma_addr = xsk_buff_raw_get_dma(umem, desc.addr);
- xdptxd.data = xsk_buff_raw_get_data(umem, desc.addr);
+ xdptxd.dma_addr = xsk_buff_raw_get_dma(pool, desc.addr);
+ xdptxd.data = xsk_buff_raw_get_data(pool, desc.addr);
xdptxd.len = desc.len;
- xsk_buff_raw_dma_sync_for_device(umem, xdptxd.dma_addr, xdptxd.len);
+ xsk_buff_raw_dma_sync_for_device(pool, xdptxd.dma_addr, xdptxd.len);
ret = INDIRECT_CALL_2(sq->xmit_xdp_frame, mlx5e_xmit_xdp_frame_mpwqe,
mlx5e_xmit_xdp_frame, sq, &xdptxd, &xdpi, check_result);
@@ -119,7 +119,7 @@ bool mlx5e_xsk_tx(struct mlx5e_xdpsq *sq, unsigned int budget)
mlx5e_xdp_mpwqe_complete(sq);
mlx5e_xmit_xdp_doorbell(sq);
- xsk_umem_consume_tx_done(umem);
+ xsk_tx_release(pool);
}
return !(budget && work_done);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h
index 39fa0a7..a050850 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h
@@ -15,13 +15,13 @@ bool mlx5e_xsk_tx(struct mlx5e_xdpsq *sq, unsigned int budget);
static inline void mlx5e_xsk_update_tx_wakeup(struct mlx5e_xdpsq *sq)
{
- if (!xsk_umem_uses_need_wakeup(sq->umem))
+ if (!xsk_uses_need_wakeup(sq->xsk_pool))
return;
if (sq->pc != sq->cc)
- xsk_clear_tx_need_wakeup(sq->umem);
+ xsk_clear_tx_need_wakeup(sq->xsk_pool);
else
- xsk_set_tx_need_wakeup(sq->umem);
+ xsk_set_tx_need_wakeup(sq->xsk_pool);
}
#endif /* __MLX5_EN_XSK_TX_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/umem.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/umem.c
deleted file mode 100644
index 331ca2b..0000000
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/umem.c
+++ /dev/null
@@ -1,217 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
-/* Copyright (c) 2019 Mellanox Technologies. */
-
-#include <net/xdp_sock_drv.h>
-#include "umem.h"
-#include "setup.h"
-#include "en/params.h"
-
-static int mlx5e_xsk_map_umem(struct mlx5e_priv *priv,
- struct xdp_umem *umem)
-{
- struct device *dev = priv->mdev->device;
-
- return xsk_buff_dma_map(umem, dev, 0);
-}
-
-static void mlx5e_xsk_unmap_umem(struct mlx5e_priv *priv,
- struct xdp_umem *umem)
-{
- return xsk_buff_dma_unmap(umem, 0);
-}
-
-static int mlx5e_xsk_get_umems(struct mlx5e_xsk *xsk)
-{
- if (!xsk->umems) {
- xsk->umems = kcalloc(MLX5E_MAX_NUM_CHANNELS,
- sizeof(*xsk->umems), GFP_KERNEL);
- if (unlikely(!xsk->umems))
- return -ENOMEM;
- }
-
- xsk->refcnt++;
- xsk->ever_used = true;
-
- return 0;
-}
-
-static void mlx5e_xsk_put_umems(struct mlx5e_xsk *xsk)
-{
- if (!--xsk->refcnt) {
- kfree(xsk->umems);
- xsk->umems = NULL;
- }
-}
-
-static int mlx5e_xsk_add_umem(struct mlx5e_xsk *xsk, struct xdp_umem *umem, u16 ix)
-{
- int err;
-
- err = mlx5e_xsk_get_umems(xsk);
- if (unlikely(err))
- return err;
-
- xsk->umems[ix] = umem;
- return 0;
-}
-
-static void mlx5e_xsk_remove_umem(struct mlx5e_xsk *xsk, u16 ix)
-{
- xsk->umems[ix] = NULL;
-
- mlx5e_xsk_put_umems(xsk);
-}
-
-static bool mlx5e_xsk_is_umem_sane(struct xdp_umem *umem)
-{
- return xsk_umem_get_headroom(umem) <= 0xffff &&
- xsk_umem_get_chunk_size(umem) <= 0xffff;
-}
-
-void mlx5e_build_xsk_param(struct xdp_umem *umem, struct mlx5e_xsk_param *xsk)
-{
- xsk->headroom = xsk_umem_get_headroom(umem);
- xsk->chunk_size = xsk_umem_get_chunk_size(umem);
-}
-
-static int mlx5e_xsk_enable_locked(struct mlx5e_priv *priv,
- struct xdp_umem *umem, u16 ix)
-{
- struct mlx5e_params *params = &priv->channels.params;
- struct mlx5e_xsk_param xsk;
- struct mlx5e_channel *c;
- int err;
-
- if (unlikely(mlx5e_xsk_get_umem(&priv->channels.params, &priv->xsk, ix)))
- return -EBUSY;
-
- if (unlikely(!mlx5e_xsk_is_umem_sane(umem)))
- return -EINVAL;
-
- err = mlx5e_xsk_map_umem(priv, umem);
- if (unlikely(err))
- return err;
-
- err = mlx5e_xsk_add_umem(&priv->xsk, umem, ix);
- if (unlikely(err))
- goto err_unmap_umem;
-
- mlx5e_build_xsk_param(umem, &xsk);
-
- if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
- /* XSK objects will be created on open. */
- goto validate_closed;
- }
-
- if (!params->xdp_prog) {
- /* XSK objects will be created when an XDP program is set,
- * and the channels are reopened.
- */
- goto validate_closed;
- }
-
- c = priv->channels.c[ix];
-
- err = mlx5e_open_xsk(priv, params, &xsk, umem, c);
- if (unlikely(err))
- goto err_remove_umem;
-
- mlx5e_activate_xsk(c);
-
- /* Don't wait for WQEs, because the newer xdpsock sample doesn't provide
- * any Fill Ring entries at the setup stage.
- */
-
- err = mlx5e_xsk_redirect_rqt_to_channel(priv, priv->channels.c[ix]);
- if (unlikely(err))
- goto err_deactivate;
-
- return 0;
-
-err_deactivate:
- mlx5e_deactivate_xsk(c);
- mlx5e_close_xsk(c);
-
-err_remove_umem:
- mlx5e_xsk_remove_umem(&priv->xsk, ix);
-
-err_unmap_umem:
- mlx5e_xsk_unmap_umem(priv, umem);
-
- return err;
-
-validate_closed:
- /* Check the configuration in advance, rather than fail at a later stage
- * (in mlx5e_xdp_set or on open) and end up with no channels.
- */
- if (!mlx5e_validate_xsk_param(params, &xsk, priv->mdev)) {
- err = -EINVAL;
- goto err_remove_umem;
- }
-
- return 0;
-}
-
-static int mlx5e_xsk_disable_locked(struct mlx5e_priv *priv, u16 ix)
-{
- struct xdp_umem *umem = mlx5e_xsk_get_umem(&priv->channels.params,
- &priv->xsk, ix);
- struct mlx5e_channel *c;
-
- if (unlikely(!umem))
- return -EINVAL;
-
- if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
- goto remove_umem;
-
- /* XSK RQ and SQ are only created if XDP program is set. */
- if (!priv->channels.params.xdp_prog)
- goto remove_umem;
-
- c = priv->channels.c[ix];
- mlx5e_xsk_redirect_rqt_to_drop(priv, ix);
- mlx5e_deactivate_xsk(c);
- mlx5e_close_xsk(c);
-
-remove_umem:
- mlx5e_xsk_remove_umem(&priv->xsk, ix);
- mlx5e_xsk_unmap_umem(priv, umem);
-
- return 0;
-}
-
-static int mlx5e_xsk_enable_umem(struct mlx5e_priv *priv, struct xdp_umem *umem,
- u16 ix)
-{
- int err;
-
- mutex_lock(&priv->state_lock);
- err = mlx5e_xsk_enable_locked(priv, umem, ix);
- mutex_unlock(&priv->state_lock);
-
- return err;
-}
-
-static int mlx5e_xsk_disable_umem(struct mlx5e_priv *priv, u16 ix)
-{
- int err;
-
- mutex_lock(&priv->state_lock);
- err = mlx5e_xsk_disable_locked(priv, ix);
- mutex_unlock(&priv->state_lock);
-
- return err;
-}
-
-int mlx5e_xsk_setup_umem(struct net_device *dev, struct xdp_umem *umem, u16 qid)
-{
- struct mlx5e_priv *priv = netdev_priv(dev);
- struct mlx5e_params *params = &priv->channels.params;
- u16 ix;
-
- if (unlikely(!mlx5e_qid_get_ch_if_in_group(params, qid, MLX5E_RQ_GROUP_XSK, &ix)))
- return -EINVAL;
-
- return umem ? mlx5e_xsk_enable_umem(priv, umem, ix) :
- mlx5e_xsk_disable_umem(priv, ix);
-}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/umem.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/umem.h
deleted file mode 100644
index bada949..0000000
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/umem.h
+++ /dev/null
@@ -1,29 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
-/* Copyright (c) 2019 Mellanox Technologies. */
-
-#ifndef __MLX5_EN_XSK_UMEM_H__
-#define __MLX5_EN_XSK_UMEM_H__
-
-#include "en.h"
-
-static inline struct xdp_umem *mlx5e_xsk_get_umem(struct mlx5e_params *params,
- struct mlx5e_xsk *xsk, u16 ix)
-{
- if (!xsk || !xsk->umems)
- return NULL;
-
- if (unlikely(ix >= params->num_channels))
- return NULL;
-
- return xsk->umems[ix];
-}
-
-struct mlx5e_xsk_param;
-void mlx5e_build_xsk_param(struct xdp_umem *umem, struct mlx5e_xsk_param *xsk);
-
-/* .ndo_bpf callback. */
-int mlx5e_xsk_setup_umem(struct net_device *dev, struct xdp_umem *umem, u16 qid);
-
-int mlx5e_xsk_resize_reuseq(struct xdp_umem *umem, u32 nentries);
-
-#endif /* __MLX5_EN_XSK_UMEM_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
index 0827098..5cb1e48 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -32,7 +32,7 @@
#include "en.h"
#include "en/port.h"
-#include "en/xsk/umem.h"
+#include "en/xsk/pool.h"
#include "lib/clock.h"
void mlx5e_ethtool_get_drvinfo(struct mlx5e_priv *priv,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
index 83c9b2b..b416a8e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
@@ -33,7 +33,7 @@
#include <linux/mlx5/fs.h>
#include "en.h"
#include "en/params.h"
-#include "en/xsk/umem.h"
+#include "en/xsk/pool.h"
struct mlx5e_ethtool_rule {
struct list_head list;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index aebcf73..2683462 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -57,7 +57,7 @@
#include "en/monitor_stats.h"
#include "en/health.h"
#include "en/params.h"
-#include "en/xsk/umem.h"
+#include "en/xsk/pool.h"
#include "en/xsk/setup.h"
#include "en/xsk/rx.h"
#include "en/xsk/tx.h"
@@ -363,7 +363,7 @@ static void mlx5e_rq_err_cqe_work(struct work_struct *recover_work)
static int mlx5e_alloc_rq(struct mlx5e_channel *c,
struct mlx5e_params *params,
struct mlx5e_xsk_param *xsk,
- struct xdp_umem *umem,
+ struct xsk_buff_pool *xsk_pool,
struct mlx5e_rq_param *rqp,
struct mlx5e_rq *rq)
{
@@ -389,9 +389,9 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
rq->mdev = mdev;
rq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu);
rq->xdpsq = &c->rq_xdpsq;
- rq->umem = umem;
+ rq->xsk_pool = xsk_pool;
- if (rq->umem)
+ if (rq->xsk_pool)
rq->stats = &c->priv->channel_stats[c->ix].xskrq;
else
rq->stats = &c->priv->channel_stats[c->ix].rq;
@@ -477,7 +477,7 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
if (xsk) {
err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq,
MEM_TYPE_XSK_BUFF_POOL, NULL);
- xsk_buff_set_rxq_info(rq->umem, &rq->xdp_rxq);
+ xsk_pool_set_rxq_info(rq->xsk_pool, &rq->xdp_rxq);
} else {
/* Create a page_pool and register it with rxq */
pp_params.order = 0;
@@ -816,11 +816,11 @@ void mlx5e_free_rx_descs(struct mlx5e_rq *rq)
int mlx5e_open_rq(struct mlx5e_channel *c, struct mlx5e_params *params,
struct mlx5e_rq_param *param, struct mlx5e_xsk_param *xsk,
- struct xdp_umem *umem, struct mlx5e_rq *rq)
+ struct xsk_buff_pool *xsk_pool, struct mlx5e_rq *rq)
{
int err;
- err = mlx5e_alloc_rq(c, params, xsk, umem, param, rq);
+ err = mlx5e_alloc_rq(c, params, xsk, xsk_pool, param, rq);
if (err)
return err;
@@ -925,7 +925,7 @@ static int mlx5e_alloc_xdpsq_db(struct mlx5e_xdpsq *sq, int numa)
static int mlx5e_alloc_xdpsq(struct mlx5e_channel *c,
struct mlx5e_params *params,
- struct xdp_umem *umem,
+ struct xsk_buff_pool *xsk_pool,
struct mlx5e_sq_param *param,
struct mlx5e_xdpsq *sq,
bool is_redirect)
@@ -941,9 +941,9 @@ static int mlx5e_alloc_xdpsq(struct mlx5e_channel *c,
sq->uar_map = mdev->mlx5e_res.bfreg.map;
sq->min_inline_mode = params->tx_min_inline_mode;
sq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu);
- sq->umem = umem;
+ sq->xsk_pool = xsk_pool;
- sq->stats = sq->umem ?
+ sq->stats = sq->xsk_pool ?
&c->priv->channel_stats[c->ix].xsksq :
is_redirect ?
&c->priv->channel_stats[c->ix].xdpsq :
@@ -1408,13 +1408,13 @@ void mlx5e_close_icosq(struct mlx5e_icosq *sq)
}
int mlx5e_open_xdpsq(struct mlx5e_channel *c, struct mlx5e_params *params,
- struct mlx5e_sq_param *param, struct xdp_umem *umem,
+ struct mlx5e_sq_param *param, struct xsk_buff_pool *xsk_pool,
struct mlx5e_xdpsq *sq, bool is_redirect)
{
struct mlx5e_create_sq_param csp = {};
int err;
- err = mlx5e_alloc_xdpsq(c, params, umem, param, sq, is_redirect);
+ err = mlx5e_alloc_xdpsq(c, params, xsk_pool, param, sq, is_redirect);
if (err)
return err;
@@ -1907,7 +1907,7 @@ static u8 mlx5e_enumerate_lag_port(struct mlx5_core_dev *mdev, int ix)
static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
struct mlx5e_params *params,
struct mlx5e_channel_param *cparam,
- struct xdp_umem *umem,
+ struct xsk_buff_pool *xsk_pool,
struct mlx5e_channel **cp)
{
int cpu = cpumask_first(mlx5_comp_irq_get_affinity_mask(priv->mdev, ix));
@@ -1946,9 +1946,9 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
if (unlikely(err))
goto err_napi_del;
- if (umem) {
- mlx5e_build_xsk_param(umem, &xsk);
- err = mlx5e_open_xsk(priv, params, &xsk, umem, c);
+ if (xsk_pool) {
+ mlx5e_build_xsk_param(xsk_pool, &xsk);
+ err = mlx5e_open_xsk(priv, params, &xsk, xsk_pool, c);
if (unlikely(err))
goto err_close_queues;
}
@@ -2309,12 +2309,12 @@ int mlx5e_open_channels(struct mlx5e_priv *priv,
mlx5e_build_channel_param(priv, &chs->params, cparam);
for (i = 0; i < chs->num; i++) {
- struct xdp_umem *umem = NULL;
+ struct xsk_buff_pool *xsk_pool = NULL;
if (chs->params.xdp_prog)
- umem = mlx5e_xsk_get_umem(&chs->params, chs->params.xsk, i);
+ xsk_pool = mlx5e_xsk_get_pool(&chs->params, chs->params.xsk, i);
- err = mlx5e_open_channel(priv, i, &chs->params, cparam, umem, &chs->c[i]);
+ err = mlx5e_open_channel(priv, i, &chs->params, cparam, xsk_pool, &chs->c[i]);
if (err)
goto err_close_channels;
}
@@ -3892,13 +3892,14 @@ static bool mlx5e_xsk_validate_mtu(struct net_device *netdev,
u16 ix;
for (ix = 0; ix < chs->params.num_channels; ix++) {
- struct xdp_umem *umem = mlx5e_xsk_get_umem(&chs->params, chs->params.xsk, ix);
+ struct xsk_buff_pool *xsk_pool =
+ mlx5e_xsk_get_pool(&chs->params, chs->params.xsk, ix);
struct mlx5e_xsk_param xsk;
- if (!umem)
+ if (!xsk_pool)
continue;
- mlx5e_build_xsk_param(umem, &xsk);
+ mlx5e_build_xsk_param(xsk_pool, &xsk);
if (!mlx5e_validate_xsk_param(new_params, &xsk, mdev)) {
u32 hr = mlx5e_get_linear_rq_headroom(new_params, &xsk);
@@ -4423,8 +4424,8 @@ static int mlx5e_xdp(struct net_device *dev, struct netdev_bpf *xdp)
switch (xdp->command) {
case XDP_SETUP_PROG:
return mlx5e_xdp_set(dev, xdp->prog);
- case XDP_SETUP_XSK_UMEM:
- return mlx5e_xsk_setup_umem(dev, xdp->xsk.umem,
+ case XDP_SETUP_XSK_POOL:
+ return mlx5e_xsk_setup_pool(dev, xdp->xsk.pool,
xdp->xsk.queue_id);
default:
return -EINVAL;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index 65828af..7aab69e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -30,7 +30,6 @@
* SOFTWARE.
*/
-#include <linux/prefetch.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
#include <linux/tcp.h>
@@ -281,8 +280,8 @@ static inline int mlx5e_page_alloc_pool(struct mlx5e_rq *rq,
static inline int mlx5e_page_alloc(struct mlx5e_rq *rq,
struct mlx5e_dma_info *dma_info)
{
- if (rq->umem)
- return mlx5e_xsk_page_alloc_umem(rq, dma_info);
+ if (rq->xsk_pool)
+ return mlx5e_xsk_page_alloc_pool(rq, dma_info);
else
return mlx5e_page_alloc_pool(rq, dma_info);
}
@@ -313,7 +312,7 @@ static inline void mlx5e_page_release(struct mlx5e_rq *rq,
struct mlx5e_dma_info *dma_info,
bool recycle)
{
- if (rq->umem)
+ if (rq->xsk_pool)
/* The `recycle` parameter is ignored, and the page is always
* put into the Reuse Ring, because there is no way to return
* the page to the userspace when the interface goes down.
@@ -400,14 +399,14 @@ static int mlx5e_alloc_rx_wqes(struct mlx5e_rq *rq, u16 ix, u8 wqe_bulk)
int err;
int i;
- if (rq->umem) {
+ if (rq->xsk_pool) {
int pages_desired = wqe_bulk << rq->wqe.info.log_num_frags;
/* Check in advance that we have enough frames, instead of
* allocating one-by-one, failing and moving frames to the
* Reuse Ring.
*/
- if (unlikely(!xsk_buff_can_alloc(rq->umem, pages_desired)))
+ if (unlikely(!xsk_buff_can_alloc(rq->xsk_pool, pages_desired)))
return -ENOMEM;
}
@@ -505,8 +504,8 @@ static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
/* Check in advance that we have enough frames, instead of allocating
* one-by-one, failing and moving frames to the Reuse Ring.
*/
- if (rq->umem &&
- unlikely(!xsk_buff_can_alloc(rq->umem, MLX5_MPWRQ_PAGES_PER_WQE))) {
+ if (rq->xsk_pool &&
+ unlikely(!xsk_buff_can_alloc(rq->xsk_pool, MLX5_MPWRQ_PAGES_PER_WQE))) {
err = -ENOMEM;
goto err;
}
@@ -754,7 +753,7 @@ INDIRECT_CALLABLE_SCOPE bool mlx5e_post_rx_mpwqes(struct mlx5e_rq *rq)
* the driver when it refills the Fill Ring.
* 2. Otherwise, busy poll by rescheduling the NAPI poll.
*/
- if (unlikely(alloc_err == -ENOMEM && rq->umem))
+ if (unlikely(alloc_err == -ENOMEM && rq->xsk_pool))
return true;
return false;
@@ -1141,8 +1140,8 @@ mlx5e_skb_from_cqe_linear(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe,
dma_sync_single_range_for_cpu(rq->pdev, di->addr, wi->offset,
frag_size, DMA_FROM_DEVICE);
- prefetchw(va); /* xdp_frame data area */
- prefetch(data);
+ net_prefetchw(va); /* xdp_frame data area */
+ net_prefetch(data);
rcu_read_lock();
mlx5e_fill_xdp_buff(rq, va, rx_headroom, cqe_bcnt, &xdp);
@@ -1184,7 +1183,7 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe,
return NULL;
}
- prefetchw(skb->data);
+ net_prefetchw(skb->data);
while (byte_cnt) {
u16 frag_consumed_bytes =
@@ -1399,7 +1398,7 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w
return NULL;
}
- prefetchw(skb->data);
+ net_prefetchw(skb->data);
if (unlikely(frag_offset >= PAGE_SIZE)) {
di++;
@@ -1452,8 +1451,8 @@ mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
dma_sync_single_range_for_cpu(rq->pdev, di->addr, head_offset,
frag_size, DMA_FROM_DEVICE);
- prefetchw(va); /* xdp_frame data area */
- prefetch(data);
+ net_prefetchw(va); /* xdp_frame data area */
+ net_prefetch(data);
rcu_read_lock();
mlx5e_fill_xdp_buff(rq, va, rx_headroom, cqe_bcnt32, &xdp);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c
index 4679021..ce8ab1f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c
@@ -30,7 +30,6 @@
* SOFTWARE.
*/
-#include <linux/prefetch.h>
#include <linux/ip.h>
#include <linux/udp.h>
#include <net/udp.h>
@@ -115,7 +114,7 @@ static struct sk_buff *mlx5e_test_get_udp_skb(struct mlx5e_priv *priv)
return NULL;
}
- prefetchw(skb->data);
+ net_prefetchw(skb->data);
skb_reserve(skb, NET_IP_ALIGN);
/* Reserve for ethernet and IP header */
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c b/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c
index 61719ec..252e910 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c
@@ -12,8 +12,17 @@
#include "core.h"
#include "core_env.h"
-#define MLXSW_HWMON_TEMP_SENSOR_MAX_COUNT 127
-#define MLXSW_HWMON_ATTR_COUNT (MLXSW_HWMON_TEMP_SENSOR_MAX_COUNT * 4 + \
+#define MLXSW_HWMON_SENSORS_MAX_COUNT 64
+#define MLXSW_HWMON_MODULES_MAX_COUNT 64
+#define MLXSW_HWMON_GEARBOXES_MAX_COUNT 32
+
+#define MLXSW_HWMON_ATTR_PER_SENSOR 3
+#define MLXSW_HWMON_ATTR_PER_MODULE 7
+#define MLXSW_HWMON_ATTR_PER_GEARBOX 4
+
+#define MLXSW_HWMON_ATTR_COUNT (MLXSW_HWMON_SENSORS_MAX_COUNT * MLXSW_HWMON_ATTR_PER_SENSOR + \
+ MLXSW_HWMON_MODULES_MAX_COUNT * MLXSW_HWMON_ATTR_PER_MODULE + \
+ MLXSW_HWMON_GEARBOXES_MAX_COUNT * MLXSW_HWMON_ATTR_PER_GEARBOX + \
MLXSW_MFCR_TACHOS_MAX + MLXSW_MFCR_PWMS_MAX)
struct mlxsw_hwmon_attr {
@@ -205,25 +214,39 @@ static ssize_t mlxsw_hwmon_pwm_store(struct device *dev,
return len;
}
-static ssize_t mlxsw_hwmon_module_temp_show(struct device *dev,
- struct device_attribute *attr,
- char *buf)
+static int mlxsw_hwmon_module_temp_get(struct device *dev,
+ struct device_attribute *attr,
+ int *p_temp)
{
struct mlxsw_hwmon_attr *mlwsw_hwmon_attr =
container_of(attr, struct mlxsw_hwmon_attr, dev_attr);
struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon;
char mtmp_pl[MLXSW_REG_MTMP_LEN];
u8 module;
- int temp;
int err;
module = mlwsw_hwmon_attr->type_index - mlxsw_hwmon->sensor_count;
mlxsw_reg_mtmp_pack(mtmp_pl, MLXSW_REG_MTMP_MODULE_INDEX_MIN + module,
false, false);
err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mtmp), mtmp_pl);
+ if (err) {
+ dev_err(dev, "Failed to query module temperature\n");
+ return err;
+ }
+ mlxsw_reg_mtmp_unpack(mtmp_pl, p_temp, NULL, NULL);
+
+ return 0;
+}
+
+static ssize_t mlxsw_hwmon_module_temp_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ int err, temp;
+
+ err = mlxsw_hwmon_module_temp_get(dev, attr, &temp);
if (err)
return err;
- mlxsw_reg_mtmp_unpack(mtmp_pl, &temp, NULL, NULL);
return sprintf(buf, "%d\n", temp);
}
@@ -270,47 +293,71 @@ static ssize_t mlxsw_hwmon_module_temp_fault_show(struct device *dev,
return sprintf(buf, "%u\n", fault);
}
-static ssize_t
-mlxsw_hwmon_module_temp_critical_show(struct device *dev,
- struct device_attribute *attr, char *buf)
+static int mlxsw_hwmon_module_temp_critical_get(struct device *dev,
+ struct device_attribute *attr,
+ int *p_temp)
{
struct mlxsw_hwmon_attr *mlwsw_hwmon_attr =
container_of(attr, struct mlxsw_hwmon_attr, dev_attr);
struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon;
- int temp;
u8 module;
int err;
module = mlwsw_hwmon_attr->type_index - mlxsw_hwmon->sensor_count;
err = mlxsw_env_module_temp_thresholds_get(mlxsw_hwmon->core, module,
- SFP_TEMP_HIGH_WARN, &temp);
+ SFP_TEMP_HIGH_WARN, p_temp);
if (err) {
dev_err(dev, "Failed to query module temperature thresholds\n");
return err;
}
+ return 0;
+}
+
+static ssize_t
+mlxsw_hwmon_module_temp_critical_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ int err, temp;
+
+ err = mlxsw_hwmon_module_temp_critical_get(dev, attr, &temp);
+ if (err)
+ return err;
+
return sprintf(buf, "%u\n", temp);
}
+static int mlxsw_hwmon_module_temp_emergency_get(struct device *dev,
+ struct device_attribute *attr,
+ int *p_temp)
+{
+ struct mlxsw_hwmon_attr *mlwsw_hwmon_attr =
+ container_of(attr, struct mlxsw_hwmon_attr, dev_attr);
+ struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon;
+ u8 module;
+ int err;
+
+ module = mlwsw_hwmon_attr->type_index - mlxsw_hwmon->sensor_count;
+ err = mlxsw_env_module_temp_thresholds_get(mlxsw_hwmon->core, module,
+ SFP_TEMP_HIGH_ALARM, p_temp);
+ if (err) {
+ dev_err(dev, "Failed to query module temperature thresholds\n");
+ return err;
+ }
+
+ return 0;
+}
+
static ssize_t
mlxsw_hwmon_module_temp_emergency_show(struct device *dev,
struct device_attribute *attr,
char *buf)
{
- struct mlxsw_hwmon_attr *mlwsw_hwmon_attr =
- container_of(attr, struct mlxsw_hwmon_attr, dev_attr);
- struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon;
- u8 module;
- int temp;
- int err;
+ int err, temp;
- module = mlwsw_hwmon_attr->type_index - mlxsw_hwmon->sensor_count;
- err = mlxsw_env_module_temp_thresholds_get(mlxsw_hwmon->core, module,
- SFP_TEMP_HIGH_ALARM, &temp);
- if (err) {
- dev_err(dev, "Failed to query module temperature thresholds\n");
+ err = mlxsw_hwmon_module_temp_emergency_get(dev, attr, &temp);
+ if (err)
return err;
- }
return sprintf(buf, "%u\n", temp);
}
@@ -341,6 +388,53 @@ mlxsw_hwmon_gbox_temp_label_show(struct device *dev,
return sprintf(buf, "gearbox %03u\n", index);
}
+static ssize_t mlxsw_hwmon_temp_critical_alarm_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ int err, temp, emergency_temp, critic_temp;
+
+ err = mlxsw_hwmon_module_temp_get(dev, attr, &temp);
+ if (err)
+ return err;
+
+ if (temp <= 0)
+ return sprintf(buf, "%d\n", false);
+
+ err = mlxsw_hwmon_module_temp_emergency_get(dev, attr, &emergency_temp);
+ if (err)
+ return err;
+
+ if (temp >= emergency_temp)
+ return sprintf(buf, "%d\n", false);
+
+ err = mlxsw_hwmon_module_temp_critical_get(dev, attr, &critic_temp);
+ if (err)
+ return err;
+
+ return sprintf(buf, "%d\n", temp >= critic_temp);
+}
+
+static ssize_t mlxsw_hwmon_temp_emergency_alarm_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ int err, temp, emergency_temp;
+
+ err = mlxsw_hwmon_module_temp_get(dev, attr, &temp);
+ if (err)
+ return err;
+
+ if (temp <= 0)
+ return sprintf(buf, "%d\n", false);
+
+ err = mlxsw_hwmon_module_temp_emergency_get(dev, attr, &emergency_temp);
+ if (err)
+ return err;
+
+ return sprintf(buf, "%d\n", temp >= emergency_temp);
+}
+
enum mlxsw_hwmon_attr_type {
MLXSW_HWMON_ATTR_TYPE_TEMP,
MLXSW_HWMON_ATTR_TYPE_TEMP_MAX,
@@ -354,6 +448,8 @@ enum mlxsw_hwmon_attr_type {
MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE_EMERG,
MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE_LABEL,
MLXSW_HWMON_ATTR_TYPE_TEMP_GBOX_LABEL,
+ MLXSW_HWMON_ATTR_TYPE_TEMP_CRIT_ALARM,
+ MLXSW_HWMON_ATTR_TYPE_TEMP_EMERGENCY_ALARM,
};
static void mlxsw_hwmon_attr_add(struct mlxsw_hwmon *mlxsw_hwmon,
@@ -444,6 +540,20 @@ static void mlxsw_hwmon_attr_add(struct mlxsw_hwmon *mlxsw_hwmon,
snprintf(mlxsw_hwmon_attr->name, sizeof(mlxsw_hwmon_attr->name),
"temp%u_label", num + 1);
break;
+ case MLXSW_HWMON_ATTR_TYPE_TEMP_CRIT_ALARM:
+ mlxsw_hwmon_attr->dev_attr.show =
+ mlxsw_hwmon_temp_critical_alarm_show;
+ mlxsw_hwmon_attr->dev_attr.attr.mode = 0444;
+ snprintf(mlxsw_hwmon_attr->name, sizeof(mlxsw_hwmon_attr->name),
+ "temp%u_crit_alarm", num + 1);
+ break;
+ case MLXSW_HWMON_ATTR_TYPE_TEMP_EMERGENCY_ALARM:
+ mlxsw_hwmon_attr->dev_attr.show =
+ mlxsw_hwmon_temp_emergency_alarm_show;
+ mlxsw_hwmon_attr->dev_attr.attr.mode = 0444;
+ snprintf(mlxsw_hwmon_attr->name, sizeof(mlxsw_hwmon_attr->name),
+ "temp%u_emergency_alarm", num + 1);
+ break;
default:
WARN_ON(1);
}
@@ -566,6 +676,12 @@ static int mlxsw_hwmon_module_init(struct mlxsw_hwmon *mlxsw_hwmon)
mlxsw_hwmon_attr_add(mlxsw_hwmon,
MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE_LABEL,
i, i);
+ mlxsw_hwmon_attr_add(mlxsw_hwmon,
+ MLXSW_HWMON_ATTR_TYPE_TEMP_CRIT_ALARM,
+ i, i);
+ mlxsw_hwmon_attr_add(mlxsw_hwmon,
+ MLXSW_HWMON_ATTR_TYPE_TEMP_EMERGENCY_ALARM,
+ i, i);
}
return 0;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h
index 079b080..485e3e02 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
@@ -4174,7 +4174,6 @@ MLXSW_ITEM32(reg, ptys, an_status, 0x04, 28, 4);
#define MLXSW_REG_PTYS_EXT_ETH_SPEED_SGMII_100M BIT(0)
#define MLXSW_REG_PTYS_EXT_ETH_SPEED_1000BASE_X_SGMII BIT(1)
-#define MLXSW_REG_PTYS_EXT_ETH_SPEED_2_5GBASE_X_2_5GMII BIT(2)
#define MLXSW_REG_PTYS_EXT_ETH_SPEED_5GBASE_R BIT(3)
#define MLXSW_REG_PTYS_EXT_ETH_SPEED_XFI_XAUI_1_10G BIT(4)
#define MLXSW_REG_PTYS_EXT_ETH_SPEED_XLAUI_4_XLPPI_4_40G BIT(5)
@@ -4197,7 +4196,6 @@ MLXSW_ITEM32(reg, ptys, ext_eth_proto_cap, 0x08, 0, 32);
#define MLXSW_REG_PTYS_ETH_SPEED_10GBASE_CX4 BIT(2)
#define MLXSW_REG_PTYS_ETH_SPEED_10GBASE_KX4 BIT(3)
#define MLXSW_REG_PTYS_ETH_SPEED_10GBASE_KR BIT(4)
-#define MLXSW_REG_PTYS_ETH_SPEED_20GBASE_KR2 BIT(5)
#define MLXSW_REG_PTYS_ETH_SPEED_40GBASE_CR4 BIT(6)
#define MLXSW_REG_PTYS_ETH_SPEED_40GBASE_KR4 BIT(7)
#define MLXSW_REG_PTYS_ETH_SPEED_10GBASE_CR BIT(12)
@@ -4210,10 +4208,6 @@ MLXSW_ITEM32(reg, ptys, ext_eth_proto_cap, 0x08, 0, 32);
#define MLXSW_REG_PTYS_ETH_SPEED_100GBASE_CR4 BIT(20)
#define MLXSW_REG_PTYS_ETH_SPEED_100GBASE_SR4 BIT(21)
#define MLXSW_REG_PTYS_ETH_SPEED_100GBASE_KR4 BIT(22)
-#define MLXSW_REG_PTYS_ETH_SPEED_100GBASE_LR4_ER4 BIT(23)
-#define MLXSW_REG_PTYS_ETH_SPEED_100BASE_TX BIT(24)
-#define MLXSW_REG_PTYS_ETH_SPEED_100BASE_T BIT(25)
-#define MLXSW_REG_PTYS_ETH_SPEED_10GBASE_T BIT(26)
#define MLXSW_REG_PTYS_ETH_SPEED_25GBASE_CR BIT(27)
#define MLXSW_REG_PTYS_ETH_SPEED_25GBASE_KR BIT(28)
#define MLXSW_REG_PTYS_ETH_SPEED_25GBASE_SR BIT(29)
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c
index 14c78f7..f08cad5 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c
@@ -993,22 +993,12 @@ struct mlxsw_sp1_port_link_mode {
static const struct mlxsw_sp1_port_link_mode mlxsw_sp1_port_link_mode[] = {
{
- .mask = MLXSW_REG_PTYS_ETH_SPEED_100BASE_T,
- .mask_ethtool = ETHTOOL_LINK_MODE_100baseT_Full_BIT,
- .speed = SPEED_100,
- },
- {
.mask = MLXSW_REG_PTYS_ETH_SPEED_SGMII |
MLXSW_REG_PTYS_ETH_SPEED_1000BASE_KX,
.mask_ethtool = ETHTOOL_LINK_MODE_1000baseKX_Full_BIT,
.speed = SPEED_1000,
},
{
- .mask = MLXSW_REG_PTYS_ETH_SPEED_10GBASE_T,
- .mask_ethtool = ETHTOOL_LINK_MODE_10000baseT_Full_BIT,
- .speed = SPEED_10000,
- },
- {
.mask = MLXSW_REG_PTYS_ETH_SPEED_10GBASE_CX4 |
MLXSW_REG_PTYS_ETH_SPEED_10GBASE_KX4,
.mask_ethtool = ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT,
@@ -1023,11 +1013,6 @@ static const struct mlxsw_sp1_port_link_mode mlxsw_sp1_port_link_mode[] = {
.speed = SPEED_10000,
},
{
- .mask = MLXSW_REG_PTYS_ETH_SPEED_20GBASE_KR2,
- .mask_ethtool = ETHTOOL_LINK_MODE_20000baseKR2_Full_BIT,
- .speed = SPEED_20000,
- },
- {
.mask = MLXSW_REG_PTYS_ETH_SPEED_40GBASE_CR4,
.mask_ethtool = ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT,
.speed = SPEED_40000,
@@ -1092,11 +1077,6 @@ static const struct mlxsw_sp1_port_link_mode mlxsw_sp1_port_link_mode[] = {
.mask_ethtool = ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT,
.speed = SPEED_100000,
},
- {
- .mask = MLXSW_REG_PTYS_ETH_SPEED_100GBASE_LR4_ER4,
- .mask_ethtool = ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT,
- .speed = SPEED_100000,
- },
};
#define MLXSW_SP1_PORT_LINK_MODE_LEN ARRAY_SIZE(mlxsw_sp1_port_link_mode)
@@ -1237,14 +1217,6 @@ mlxsw_sp2_mask_ethtool_1000base_x_sgmii[] = {
ARRAY_SIZE(mlxsw_sp2_mask_ethtool_1000base_x_sgmii)
static const enum ethtool_link_mode_bit_indices
-mlxsw_sp2_mask_ethtool_2_5gbase_x_2_5gmii[] = {
- ETHTOOL_LINK_MODE_2500baseX_Full_BIT,
-};
-
-#define MLXSW_SP2_MASK_ETHTOOL_2_5GBASE_X_2_5GMII_LEN \
- ARRAY_SIZE(mlxsw_sp2_mask_ethtool_2_5gbase_x_2_5gmii)
-
-static const enum ethtool_link_mode_bit_indices
mlxsw_sp2_mask_ethtool_5gbase_r[] = {
ETHTOOL_LINK_MODE_5000baseT_Full_BIT,
};
@@ -1408,16 +1380,6 @@ static const struct mlxsw_sp2_port_link_mode mlxsw_sp2_port_link_mode[] = {
.speed = SPEED_1000,
},
{
- .mask = MLXSW_REG_PTYS_EXT_ETH_SPEED_2_5GBASE_X_2_5GMII,
- .mask_ethtool = mlxsw_sp2_mask_ethtool_2_5gbase_x_2_5gmii,
- .m_ethtool_len = MLXSW_SP2_MASK_ETHTOOL_2_5GBASE_X_2_5GMII_LEN,
- .mask_width = MLXSW_SP_PORT_MASK_WIDTH_1X |
- MLXSW_SP_PORT_MASK_WIDTH_2X |
- MLXSW_SP_PORT_MASK_WIDTH_4X |
- MLXSW_SP_PORT_MASK_WIDTH_8X,
- .speed = SPEED_2500,
- },
- {
.mask = MLXSW_REG_PTYS_EXT_ETH_SPEED_5GBASE_R,
.mask_ethtool = mlxsw_sp2_mask_ethtool_5gbase_r,
.m_ethtool_len = MLXSW_SP2_MASK_ETHTOOL_5GBASE_R_LEN,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c
index 9650562..ca8090a 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c
@@ -314,11 +314,9 @@ static int mlxsw_sp_ptp_parse(struct sk_buff *skb,
u8 *p_message_type,
u16 *p_sequence_id)
{
- unsigned int offset = 0;
unsigned int ptp_class;
- u8 *data;
+ struct ptp_header *hdr;
- data = skb_mac_header(skb);
ptp_class = ptp_classify_raw(skb);
switch (ptp_class & PTP_CLASS_VMASK) {
@@ -329,30 +327,14 @@ static int mlxsw_sp_ptp_parse(struct sk_buff *skb,
return -ERANGE;
}
- if (ptp_class & PTP_CLASS_VLAN)
- offset += VLAN_HLEN;
-
- switch (ptp_class & PTP_CLASS_PMASK) {
- case PTP_CLASS_IPV4:
- offset += ETH_HLEN + IPV4_HLEN(data + offset) + UDP_HLEN;
- break;
- case PTP_CLASS_IPV6:
- offset += ETH_HLEN + IP6_HLEN + UDP_HLEN;
- break;
- case PTP_CLASS_L2:
- offset += ETH_HLEN;
- break;
- default:
- return -ERANGE;
- }
-
- /* PTP header is 34 bytes. */
- if (skb->len < offset + 34)
+ hdr = ptp_parse_header(skb, ptp_class);
+ if (!hdr)
return -EINVAL;
- *p_message_type = data[offset] & 0x0f;
- *p_domain_number = data[offset + 4];
- *p_sequence_id = (u16)(data[offset + 30]) << 8 | data[offset + 31];
+ *p_message_type = ptp_get_msgtype(hdr, ptp_class);
+ *p_domain_number = hdr->domain_number;
+ *p_sequence_id = be16_to_cpu(hdr->sequence_id);
+
return 0;
}
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c
index 2e41c55..433f14a 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c
@@ -291,7 +291,7 @@ static void mlxsw_sp_rx_sample_listener(struct sk_buff *skb, u8 local_port,
static const struct mlxsw_sp_trap_policer_item
mlxsw_sp_trap_policer_items_arr[] = {
{
- .policer = MLXSW_SP_TRAP_POLICER(1, 10 * 1024, 128),
+ .policer = MLXSW_SP_TRAP_POLICER(1, 10 * 1024, 4096),
},
{
.policer = MLXSW_SP_TRAP_POLICER(2, 128, 128),
@@ -303,25 +303,25 @@ mlxsw_sp_trap_policer_items_arr[] = {
.policer = MLXSW_SP_TRAP_POLICER(4, 128, 128),
},
{
- .policer = MLXSW_SP_TRAP_POLICER(5, 16 * 1024, 128),
+ .policer = MLXSW_SP_TRAP_POLICER(5, 16 * 1024, 8192),
},
{
.policer = MLXSW_SP_TRAP_POLICER(6, 128, 128),
},
{
- .policer = MLXSW_SP_TRAP_POLICER(7, 1024, 128),
+ .policer = MLXSW_SP_TRAP_POLICER(7, 1024, 512),
},
{
- .policer = MLXSW_SP_TRAP_POLICER(8, 20 * 1024, 1024),
+ .policer = MLXSW_SP_TRAP_POLICER(8, 20 * 1024, 8192),
},
{
.policer = MLXSW_SP_TRAP_POLICER(9, 128, 128),
},
{
- .policer = MLXSW_SP_TRAP_POLICER(10, 1024, 128),
+ .policer = MLXSW_SP_TRAP_POLICER(10, 1024, 512),
},
{
- .policer = MLXSW_SP_TRAP_POLICER(11, 360, 128),
+ .policer = MLXSW_SP_TRAP_POLICER(11, 256, 128),
},
{
.policer = MLXSW_SP_TRAP_POLICER(12, 128, 128),
@@ -330,19 +330,19 @@ mlxsw_sp_trap_policer_items_arr[] = {
.policer = MLXSW_SP_TRAP_POLICER(13, 128, 128),
},
{
- .policer = MLXSW_SP_TRAP_POLICER(14, 1024, 128),
+ .policer = MLXSW_SP_TRAP_POLICER(14, 1024, 512),
},
{
- .policer = MLXSW_SP_TRAP_POLICER(15, 1024, 128),
+ .policer = MLXSW_SP_TRAP_POLICER(15, 1024, 512),
},
{
- .policer = MLXSW_SP_TRAP_POLICER(16, 24 * 1024, 4096),
+ .policer = MLXSW_SP_TRAP_POLICER(16, 24 * 1024, 16384),
},
{
- .policer = MLXSW_SP_TRAP_POLICER(17, 19 * 1024, 4096),
+ .policer = MLXSW_SP_TRAP_POLICER(17, 19 * 1024, 8192),
},
{
- .policer = MLXSW_SP_TRAP_POLICER(18, 1024, 128),
+ .policer = MLXSW_SP_TRAP_POLICER(18, 1024, 512),
},
{
.policer = MLXSW_SP_TRAP_POLICER(19, 1024, 512),
diff --git a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c
index 6f9a725..5023d91 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c
@@ -551,16 +551,6 @@ struct mlxsw_sx_port_link_mode {
static const struct mlxsw_sx_port_link_mode mlxsw_sx_port_link_mode[] = {
{
- .mask = MLXSW_REG_PTYS_ETH_SPEED_100BASE_T,
- .supported = SUPPORTED_100baseT_Full,
- .advertised = ADVERTISED_100baseT_Full,
- .speed = 100,
- },
- {
- .mask = MLXSW_REG_PTYS_ETH_SPEED_100BASE_TX,
- .speed = 100,
- },
- {
.mask = MLXSW_REG_PTYS_ETH_SPEED_SGMII |
MLXSW_REG_PTYS_ETH_SPEED_1000BASE_KX,
.supported = SUPPORTED_1000baseKX_Full,
@@ -568,12 +558,6 @@ static const struct mlxsw_sx_port_link_mode mlxsw_sx_port_link_mode[] = {
.speed = 1000,
},
{
- .mask = MLXSW_REG_PTYS_ETH_SPEED_10GBASE_T,
- .supported = SUPPORTED_10000baseT_Full,
- .advertised = ADVERTISED_10000baseT_Full,
- .speed = 10000,
- },
- {
.mask = MLXSW_REG_PTYS_ETH_SPEED_10GBASE_CX4 |
MLXSW_REG_PTYS_ETH_SPEED_10GBASE_KX4,
.supported = SUPPORTED_10000baseKX4_Full,
@@ -590,12 +574,6 @@ static const struct mlxsw_sx_port_link_mode mlxsw_sx_port_link_mode[] = {
.speed = 10000,
},
{
- .mask = MLXSW_REG_PTYS_ETH_SPEED_20GBASE_KR2,
- .supported = SUPPORTED_20000baseKR2_Full,
- .advertised = ADVERTISED_20000baseKR2_Full,
- .speed = 20000,
- },
- {
.mask = MLXSW_REG_PTYS_ETH_SPEED_40GBASE_CR4,
.supported = SUPPORTED_40000baseCR4_Full,
.advertised = ADVERTISED_40000baseCR4_Full,
@@ -634,8 +612,7 @@ static const struct mlxsw_sx_port_link_mode mlxsw_sx_port_link_mode[] = {
{
.mask = MLXSW_REG_PTYS_ETH_SPEED_100GBASE_CR4 |
MLXSW_REG_PTYS_ETH_SPEED_100GBASE_SR4 |
- MLXSW_REG_PTYS_ETH_SPEED_100GBASE_KR4 |
- MLXSW_REG_PTYS_ETH_SPEED_100GBASE_LR4_ER4,
+ MLXSW_REG_PTYS_ETH_SPEED_100GBASE_KR4,
.speed = 100000,
},
};
diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h
index bf51628..a2926b1b 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h
+++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h
@@ -24,6 +24,7 @@
#define NFP_FLOWER_LAYER_VXLAN BIT(7)
#define NFP_FLOWER_LAYER2_GRE BIT(0)
+#define NFP_FLOWER_LAYER2_QINQ BIT(4)
#define NFP_FLOWER_LAYER2_GENEVE BIT(5)
#define NFP_FLOWER_LAYER2_GENEVE_OP BIT(6)
#define NFP_FLOWER_LAYER2_TUN_IPV6 BIT(7)
@@ -319,6 +320,22 @@ struct nfp_flower_mac_mpls {
__be32 mpls_lse;
};
+/* VLAN details (2W/8B)
+ * 3 2 1
+ * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | outer_tpid | outer_tci |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | inner_tpid | inner_tci |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ */
+struct nfp_flower_vlan {
+ __be16 outer_tpid;
+ __be16 outer_tci;
+ __be16 inner_tpid;
+ __be16 inner_tci;
+};
+
/* L4 ports (for UDP, TCP, SCTP) (1W/4B)
* 3 2 1
* 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.h b/drivers/net/ethernet/netronome/nfp/flower/main.h
index 3bf9c1a..caf12ee 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/main.h
+++ b/drivers/net/ethernet/netronome/nfp/flower/main.h
@@ -30,6 +30,8 @@ struct nfp_app;
#define NFP_FLOWER_MASK_ELEMENT_RS 1
#define NFP_FLOWER_MASK_HASH_BITS 10
+#define NFP_FLOWER_KEY_MAX_LW 32
+
#define NFP_FL_META_FLAG_MANAGE_MASK BIT(7)
#define NFP_FL_MASK_REUSE_TIME_NS 40000
@@ -44,6 +46,7 @@ struct nfp_app;
#define NFP_FL_FEATS_FLOW_MOD BIT(5)
#define NFP_FL_FEATS_PRE_TUN_RULES BIT(6)
#define NFP_FL_FEATS_IPV6_TUN BIT(7)
+#define NFP_FL_FEATS_VLAN_QINQ BIT(8)
#define NFP_FL_FEATS_HOST_ACK BIT(31)
#define NFP_FL_ENABLE_FLOW_MERGE BIT(0)
@@ -57,7 +60,8 @@ struct nfp_app;
NFP_FL_FEATS_VF_RLIM | \
NFP_FL_FEATS_FLOW_MOD | \
NFP_FL_FEATS_PRE_TUN_RULES | \
- NFP_FL_FEATS_IPV6_TUN)
+ NFP_FL_FEATS_IPV6_TUN | \
+ NFP_FL_FEATS_VLAN_QINQ)
struct nfp_fl_mask_id {
struct circ_buf mask_id_free_list;
diff --git a/drivers/net/ethernet/netronome/nfp/flower/match.c b/drivers/net/ethernet/netronome/nfp/flower/match.c
index f7f01e2..255a4df 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/match.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/match.c
@@ -10,7 +10,7 @@
static void
nfp_flower_compile_meta_tci(struct nfp_flower_meta_tci *ext,
struct nfp_flower_meta_tci *msk,
- struct flow_rule *rule, u8 key_type)
+ struct flow_rule *rule, u8 key_type, bool qinq_sup)
{
u16 tmp_tci;
@@ -24,7 +24,7 @@ nfp_flower_compile_meta_tci(struct nfp_flower_meta_tci *ext,
msk->nfp_flow_key_layer = key_type;
msk->mask_id = ~0;
- if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN)) {
+ if (!qinq_sup && flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN)) {
struct flow_match_vlan match;
flow_rule_match_vlan(rule, &match);
@@ -231,6 +231,50 @@ nfp_flower_compile_ip_ext(struct nfp_flower_ip_ext *ext,
}
static void
+nfp_flower_fill_vlan(struct flow_dissector_key_vlan *key,
+ struct nfp_flower_vlan *frame,
+ bool outer_vlan)
+{
+ u16 tci;
+
+ tci = NFP_FLOWER_MASK_VLAN_PRESENT;
+ tci |= FIELD_PREP(NFP_FLOWER_MASK_VLAN_PRIO,
+ key->vlan_priority) |
+ FIELD_PREP(NFP_FLOWER_MASK_VLAN_VID,
+ key->vlan_id);
+
+ if (outer_vlan) {
+ frame->outer_tci = cpu_to_be16(tci);
+ frame->outer_tpid = key->vlan_tpid;
+ } else {
+ frame->inner_tci = cpu_to_be16(tci);
+ frame->inner_tpid = key->vlan_tpid;
+ }
+}
+
+static void
+nfp_flower_compile_vlan(struct nfp_flower_vlan *ext,
+ struct nfp_flower_vlan *msk,
+ struct flow_rule *rule)
+{
+ struct flow_match_vlan match;
+
+ memset(ext, 0, sizeof(struct nfp_flower_vlan));
+ memset(msk, 0, sizeof(struct nfp_flower_vlan));
+
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN)) {
+ flow_rule_match_vlan(rule, &match);
+ nfp_flower_fill_vlan(match.key, ext, true);
+ nfp_flower_fill_vlan(match.mask, msk, true);
+ }
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CVLAN)) {
+ flow_rule_match_cvlan(rule, &match);
+ nfp_flower_fill_vlan(match.key, ext, false);
+ nfp_flower_fill_vlan(match.mask, msk, false);
+ }
+}
+
+static void
nfp_flower_compile_ipv4(struct nfp_flower_ipv4 *ext,
struct nfp_flower_ipv4 *msk, struct flow_rule *rule)
{
@@ -433,7 +477,10 @@ int nfp_flower_compile_flow_match(struct nfp_app *app,
struct netlink_ext_ack *extack)
{
struct flow_rule *rule = flow_cls_offload_flow_rule(flow);
+ struct nfp_flower_priv *priv = app->priv;
+ bool qinq_sup;
u32 port_id;
+ int ext_len;
int err;
u8 *ext;
u8 *msk;
@@ -446,9 +493,11 @@ int nfp_flower_compile_flow_match(struct nfp_app *app,
ext = nfp_flow->unmasked_data;
msk = nfp_flow->mask_data;
+ qinq_sup = !!(priv->flower_ext_feats & NFP_FL_FEATS_VLAN_QINQ);
+
nfp_flower_compile_meta_tci((struct nfp_flower_meta_tci *)ext,
(struct nfp_flower_meta_tci *)msk,
- rule, key_ls->key_layer);
+ rule, key_ls->key_layer, qinq_sup);
ext += sizeof(struct nfp_flower_meta_tci);
msk += sizeof(struct nfp_flower_meta_tci);
@@ -547,6 +596,14 @@ int nfp_flower_compile_flow_match(struct nfp_app *app,
}
}
+ if (NFP_FLOWER_LAYER2_QINQ & key_ls->key_layer_two) {
+ nfp_flower_compile_vlan((struct nfp_flower_vlan *)ext,
+ (struct nfp_flower_vlan *)msk,
+ rule);
+ ext += sizeof(struct nfp_flower_vlan);
+ msk += sizeof(struct nfp_flower_vlan);
+ }
+
if (key_ls->key_layer & NFP_FLOWER_LAYER_VXLAN ||
key_ls->key_layer_two & NFP_FLOWER_LAYER2_GENEVE) {
if (key_ls->key_layer_two & NFP_FLOWER_LAYER2_TUN_IPV6) {
@@ -589,5 +646,15 @@ int nfp_flower_compile_flow_match(struct nfp_app *app,
}
}
+ /* Check that the flow key does not exceed the maximum limit.
+ * All structures in the key is multiples of 4 bytes, so use u32.
+ */
+ ext_len = (u32 *)ext - (u32 *)nfp_flow->unmasked_data;
+ if (ext_len > NFP_FLOWER_KEY_MAX_LW) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "unsupported offload: flow key too long");
+ return -EOPNOTSUPP;
+ }
+
return 0;
}
diff --git a/drivers/net/ethernet/netronome/nfp/flower/offload.c b/drivers/net/ethernet/netronome/nfp/flower/offload.c
index 36356f96..1c59aff 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/offload.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c
@@ -31,6 +31,7 @@
BIT(FLOW_DISSECTOR_KEY_PORTS) | \
BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) | \
BIT(FLOW_DISSECTOR_KEY_VLAN) | \
+ BIT(FLOW_DISSECTOR_KEY_CVLAN) | \
BIT(FLOW_DISSECTOR_KEY_ENC_KEYID) | \
BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) | \
BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) | \
@@ -66,7 +67,8 @@
NFP_FLOWER_LAYER_IPV6)
#define NFP_FLOWER_PRE_TUN_RULE_FIELDS \
- (NFP_FLOWER_LAYER_PORT | \
+ (NFP_FLOWER_LAYER_EXT_META | \
+ NFP_FLOWER_LAYER_PORT | \
NFP_FLOWER_LAYER_MAC | \
NFP_FLOWER_LAYER_IPV4 | \
NFP_FLOWER_LAYER_IPV6)
@@ -285,6 +287,30 @@ nfp_flower_calculate_key_layers(struct nfp_app *app,
NL_SET_ERR_MSG_MOD(extack, "unsupported offload: loaded firmware does not support VLAN PCP offload");
return -EOPNOTSUPP;
}
+ if (priv->flower_ext_feats & NFP_FL_FEATS_VLAN_QINQ &&
+ !(key_layer_two & NFP_FLOWER_LAYER2_QINQ)) {
+ key_layer |= NFP_FLOWER_LAYER_EXT_META;
+ key_size += sizeof(struct nfp_flower_ext_meta);
+ key_size += sizeof(struct nfp_flower_vlan);
+ key_layer_two |= NFP_FLOWER_LAYER2_QINQ;
+ }
+ }
+
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CVLAN)) {
+ struct flow_match_vlan cvlan;
+
+ if (!(priv->flower_ext_feats & NFP_FL_FEATS_VLAN_QINQ)) {
+ NL_SET_ERR_MSG_MOD(extack, "unsupported offload: loaded firmware does not support VLAN QinQ offload");
+ return -EOPNOTSUPP;
+ }
+
+ flow_rule_match_vlan(rule, &cvlan);
+ if (!(key_layer_two & NFP_FLOWER_LAYER2_QINQ)) {
+ key_layer |= NFP_FLOWER_LAYER_EXT_META;
+ key_size += sizeof(struct nfp_flower_ext_meta);
+ key_size += sizeof(struct nfp_flower_vlan);
+ key_layer_two |= NFP_FLOWER_LAYER2_QINQ;
+ }
}
if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_CONTROL)) {
@@ -1066,6 +1092,7 @@ int nfp_flower_merge_offloaded_flows(struct nfp_app *app,
* nfp_flower_validate_pre_tun_rule()
* @app: Pointer to the APP handle
* @flow: Pointer to NFP flow representation of rule
+ * @key_ls: Pointer to NFP key layers structure
* @extack: Netlink extended ACK report
*
* Verifies the flow as a pre-tunnel rule.
@@ -1075,10 +1102,13 @@ int nfp_flower_merge_offloaded_flows(struct nfp_app *app,
static int
nfp_flower_validate_pre_tun_rule(struct nfp_app *app,
struct nfp_fl_payload *flow,
+ struct nfp_fl_key_ls *key_ls,
struct netlink_ext_ack *extack)
{
+ struct nfp_flower_priv *priv = app->priv;
struct nfp_flower_meta_tci *meta_tci;
struct nfp_flower_mac_mpls *mac;
+ u8 *ext = flow->unmasked_data;
struct nfp_fl_act_head *act;
u8 *mask = flow->mask_data;
bool vlan = false;
@@ -1086,20 +1116,25 @@ nfp_flower_validate_pre_tun_rule(struct nfp_app *app,
u8 key_layer;
meta_tci = (struct nfp_flower_meta_tci *)flow->unmasked_data;
- if (meta_tci->tci & cpu_to_be16(NFP_FLOWER_MASK_VLAN_PRESENT)) {
- u16 vlan_tci = be16_to_cpu(meta_tci->tci);
+ key_layer = key_ls->key_layer;
+ if (!(priv->flower_ext_feats & NFP_FL_FEATS_VLAN_QINQ)) {
+ if (meta_tci->tci & cpu_to_be16(NFP_FLOWER_MASK_VLAN_PRESENT)) {
+ u16 vlan_tci = be16_to_cpu(meta_tci->tci);
- vlan_tci &= ~NFP_FLOWER_MASK_VLAN_PRESENT;
- flow->pre_tun_rule.vlan_tci = cpu_to_be16(vlan_tci);
- vlan = true;
- } else {
- flow->pre_tun_rule.vlan_tci = cpu_to_be16(0xffff);
+ vlan_tci &= ~NFP_FLOWER_MASK_VLAN_PRESENT;
+ flow->pre_tun_rule.vlan_tci = cpu_to_be16(vlan_tci);
+ vlan = true;
+ } else {
+ flow->pre_tun_rule.vlan_tci = cpu_to_be16(0xffff);
+ }
}
- key_layer = meta_tci->nfp_flow_key_layer;
if (key_layer & ~NFP_FLOWER_PRE_TUN_RULE_FIELDS) {
NL_SET_ERR_MSG_MOD(extack, "unsupported pre-tunnel rule: too many match fields");
return -EOPNOTSUPP;
+ } else if (key_ls->key_layer_two & ~NFP_FLOWER_LAYER2_QINQ) {
+ NL_SET_ERR_MSG_MOD(extack, "unsupported pre-tunnel rule: non-vlan in extended match fields");
+ return -EOPNOTSUPP;
}
if (!(key_layer & NFP_FLOWER_LAYER_MAC)) {
@@ -1109,7 +1144,13 @@ nfp_flower_validate_pre_tun_rule(struct nfp_app *app,
/* Skip fields known to exist. */
mask += sizeof(struct nfp_flower_meta_tci);
+ ext += sizeof(struct nfp_flower_meta_tci);
+ if (key_ls->key_layer_two) {
+ mask += sizeof(struct nfp_flower_ext_meta);
+ ext += sizeof(struct nfp_flower_ext_meta);
+ }
mask += sizeof(struct nfp_flower_in_port);
+ ext += sizeof(struct nfp_flower_in_port);
/* Ensure destination MAC address is fully matched. */
mac = (struct nfp_flower_mac_mpls *)mask;
@@ -1118,6 +1159,8 @@ nfp_flower_validate_pre_tun_rule(struct nfp_app *app,
return -EOPNOTSUPP;
}
+ mask += sizeof(struct nfp_flower_mac_mpls);
+ ext += sizeof(struct nfp_flower_mac_mpls);
if (key_layer & NFP_FLOWER_LAYER_IPV4 ||
key_layer & NFP_FLOWER_LAYER_IPV6) {
/* Flags and proto fields have same offset in IPv4 and IPv6. */
@@ -1130,7 +1173,6 @@ nfp_flower_validate_pre_tun_rule(struct nfp_app *app,
sizeof(struct nfp_flower_ipv4) :
sizeof(struct nfp_flower_ipv6);
- mask += sizeof(struct nfp_flower_mac_mpls);
/* Ensure proto and flags are the only IP layer fields. */
for (i = 0; i < size; i++)
@@ -1138,6 +1180,25 @@ nfp_flower_validate_pre_tun_rule(struct nfp_app *app,
NL_SET_ERR_MSG_MOD(extack, "unsupported pre-tunnel rule: only flags and proto can be matched in ip header");
return -EOPNOTSUPP;
}
+ ext += size;
+ mask += size;
+ }
+
+ if ((priv->flower_ext_feats & NFP_FL_FEATS_VLAN_QINQ)) {
+ if (key_ls->key_layer_two & NFP_FLOWER_LAYER2_QINQ) {
+ struct nfp_flower_vlan *vlan_tags;
+ u16 vlan_tci;
+
+ vlan_tags = (struct nfp_flower_vlan *)ext;
+
+ vlan_tci = be16_to_cpu(vlan_tags->outer_tci);
+
+ vlan_tci &= ~NFP_FLOWER_MASK_VLAN_PRESENT;
+ flow->pre_tun_rule.vlan_tci = cpu_to_be16(vlan_tci);
+ vlan = true;
+ } else {
+ flow->pre_tun_rule.vlan_tci = cpu_to_be16(0xffff);
+ }
}
/* Action must be a single egress or pop_vlan and egress. */
@@ -1220,7 +1281,7 @@ nfp_flower_add_offload(struct nfp_app *app, struct net_device *netdev,
goto err_destroy_flow;
if (flow_pay->pre_tun_rule.dev) {
- err = nfp_flower_validate_pre_tun_rule(app, flow_pay, extack);
+ err = nfp_flower_validate_pre_tun_rule(app, flow_pay, key_layer, extack);
if (err)
goto err_destroy_flow;
}
diff --git a/drivers/net/ethernet/pensando/ionic/ionic.h b/drivers/net/ethernet/pensando/ionic/ionic.h
index f5a910c..084a924 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic.h
+++ b/drivers/net/ethernet/pensando/ionic/ionic.h
@@ -42,13 +42,11 @@ struct ionic {
struct ionic_dev_bar bars[IONIC_BARS_MAX];
unsigned int num_bars;
struct ionic_identity ident;
- struct list_head lifs;
- struct ionic_lif *master_lif;
+ struct ionic_lif *lif;
unsigned int nnqs_per_lif;
unsigned int neqs_per_lif;
unsigned int ntxqs_per_lif;
unsigned int nrxqs_per_lif;
- DECLARE_BITMAP(lifbits, IONIC_LIFS_MAX);
unsigned int nintrs;
DECLARE_BITMAP(intrs, IONIC_INTR_CTRL_REGS_MAX);
struct work_struct nb_work;
@@ -66,9 +64,6 @@ struct ionic_admin_ctx {
union ionic_adminq_comp comp;
};
-int ionic_napi(struct napi_struct *napi, int budget, ionic_cq_cb cb,
- ionic_cq_done_cb done_cb, void *done_arg);
-
int ionic_adminq_post_wait(struct ionic_lif *lif, struct ionic_admin_ctx *ctx);
int ionic_dev_cmd_wait(struct ionic *ionic, unsigned long max_wait);
int ionic_set_dma_mask(struct ionic *ionic);
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c b/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c
index 85c686c..d1d6fb6 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c
@@ -294,21 +294,21 @@ static int ionic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
goto err_out_port_reset;
}
- err = ionic_lifs_size(ionic);
+ err = ionic_lif_size(ionic);
if (err) {
- dev_err(dev, "Cannot size LIFs: %d, aborting\n", err);
+ dev_err(dev, "Cannot size LIF: %d, aborting\n", err);
goto err_out_port_reset;
}
- err = ionic_lifs_alloc(ionic);
+ err = ionic_lif_alloc(ionic);
if (err) {
- dev_err(dev, "Cannot allocate LIFs: %d, aborting\n", err);
+ dev_err(dev, "Cannot allocate LIF: %d, aborting\n", err);
goto err_out_free_irqs;
}
- err = ionic_lifs_init(ionic);
+ err = ionic_lif_init(ionic->lif);
if (err) {
- dev_err(dev, "Cannot init LIFs: %d, aborting\n", err);
+ dev_err(dev, "Cannot init LIF: %d, aborting\n", err);
goto err_out_free_lifs;
}
@@ -321,9 +321,9 @@ static int ionic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
dev_err(dev, "Cannot enable existing VFs: %d\n", err);
}
- err = ionic_lifs_register(ionic);
+ err = ionic_lif_register(ionic->lif);
if (err) {
- dev_err(dev, "Cannot register LIFs: %d, aborting\n", err);
+ dev_err(dev, "Cannot register LIF: %d, aborting\n", err);
goto err_out_deinit_lifs;
}
@@ -336,12 +336,13 @@ static int ionic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
return 0;
err_out_deregister_lifs:
- ionic_lifs_unregister(ionic);
+ ionic_lif_unregister(ionic->lif);
err_out_deinit_lifs:
ionic_vf_dealloc(ionic);
- ionic_lifs_deinit(ionic);
+ ionic_lif_deinit(ionic->lif);
err_out_free_lifs:
- ionic_lifs_free(ionic);
+ ionic_lif_free(ionic->lif);
+ ionic->lif = NULL;
err_out_free_irqs:
ionic_bus_free_irq_vectors(ionic);
err_out_port_reset:
@@ -377,11 +378,12 @@ static void ionic_remove(struct pci_dev *pdev)
if (!ionic)
return;
- if (ionic->master_lif) {
+ if (ionic->lif) {
ionic_devlink_unregister(ionic);
- ionic_lifs_unregister(ionic);
- ionic_lifs_deinit(ionic);
- ionic_lifs_free(ionic);
+ ionic_lif_unregister(ionic->lif);
+ ionic_lif_deinit(ionic->lif);
+ ionic_lif_free(ionic->lif);
+ ionic->lif = NULL;
ionic_bus_free_irq_vectors(ionic);
}
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_debugfs.c b/drivers/net/ethernet/pensando/ionic/ionic_debugfs.c
index 11621cc..683bbbf 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_debugfs.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_debugfs.c
@@ -76,7 +76,7 @@ static int q_tail_show(struct seq_file *seq, void *v)
{
struct ionic_queue *q = seq->private;
- seq_printf(seq, "%d\n", q->tail->index);
+ seq_printf(seq, "%d\n", q->tail_idx);
return 0;
}
@@ -86,7 +86,7 @@ static int q_head_show(struct seq_file *seq, void *v)
{
struct ionic_queue *q = seq->private;
- seq_printf(seq, "%d\n", q->head->index);
+ seq_printf(seq, "%d\n", q->head_idx);
return 0;
}
@@ -96,7 +96,7 @@ static int cq_tail_show(struct seq_file *seq, void *v)
{
struct ionic_cq *cq = seq->private;
- seq_printf(seq, "%d\n", cq->tail->index);
+ seq_printf(seq, "%d\n", cq->tail_idx);
return 0;
}
@@ -112,7 +112,8 @@ static const struct debugfs_reg32 intr_ctrl_regs[] = {
void ionic_debugfs_add_qcq(struct ionic_lif *lif, struct ionic_qcq *qcq)
{
- struct dentry *q_dentry, *cq_dentry, *intr_dentry, *stats_dentry;
+ struct dentry *qcq_dentry, *q_dentry, *cq_dentry;
+ struct dentry *intr_dentry, *stats_dentry;
struct ionic_dev *idev = &lif->ionic->idev;
struct debugfs_regset32 *intr_ctrl_regset;
struct ionic_intr_info *intr = &qcq->intr;
@@ -121,21 +122,21 @@ void ionic_debugfs_add_qcq(struct ionic_lif *lif, struct ionic_qcq *qcq)
struct ionic_queue *q = &qcq->q;
struct ionic_cq *cq = &qcq->cq;
- qcq->dentry = debugfs_create_dir(q->name, lif->dentry);
+ qcq_dentry = debugfs_create_dir(q->name, lif->dentry);
+ if (IS_ERR_OR_NULL(qcq_dentry))
+ return;
+ qcq->dentry = qcq_dentry;
- debugfs_create_x32("total_size", 0400, qcq->dentry, &qcq->total_size);
- debugfs_create_x64("base_pa", 0400, qcq->dentry, &qcq->base_pa);
+ debugfs_create_x64("q_base_pa", 0400, qcq_dentry, &qcq->q_base_pa);
+ debugfs_create_x32("q_size", 0400, qcq_dentry, &qcq->q_size);
+ debugfs_create_x64("cq_base_pa", 0400, qcq_dentry, &qcq->cq_base_pa);
+ debugfs_create_x32("cq_size", 0400, qcq_dentry, &qcq->cq_size);
+ debugfs_create_x64("sg_base_pa", 0400, qcq_dentry, &qcq->sg_base_pa);
+ debugfs_create_x32("sg_size", 0400, qcq_dentry, &qcq->sg_size);
q_dentry = debugfs_create_dir("q", qcq->dentry);
debugfs_create_u32("index", 0400, q_dentry, &q->index);
- debugfs_create_x64("base_pa", 0400, q_dentry, &q->base_pa);
- if (qcq->flags & IONIC_QCQ_F_SG) {
- debugfs_create_x64("sg_base_pa", 0400, q_dentry,
- &q->sg_base_pa);
- debugfs_create_u32("sg_desc_size", 0400, q_dentry,
- &q->sg_desc_size);
- }
debugfs_create_u32("num_descs", 0400, q_dentry, &q->num_descs);
debugfs_create_u32("desc_size", 0400, q_dentry, &q->desc_size);
debugfs_create_u32("pid", 0400, q_dentry, &q->pid);
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_dev.c b/drivers/net/ethernet/pensando/ionic/ionic_dev.c
index d83eff0..6068f51 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_dev.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_dev.c
@@ -21,8 +21,8 @@ static void ionic_watchdog_cb(struct timer_list *t)
hb = ionic_heartbeat_check(ionic);
- if (hb >= 0 && ionic->master_lif)
- ionic_link_status_check_request(ionic->master_lif);
+ if (hb >= 0 && ionic->lif)
+ ionic_link_status_check_request(ionic->lif);
}
void ionic_init_devinfo(struct ionic *ionic)
@@ -126,7 +126,7 @@ int ionic_heartbeat_check(struct ionic *ionic)
/* is this a transition? */
if (fw_status != idev->last_fw_status &&
idev->last_fw_status != 0xff) {
- struct ionic_lif *lif = ionic->master_lif;
+ struct ionic_lif *lif = ionic->lif;
bool trigger = false;
if (!fw_status || fw_status == 0xff) {
@@ -467,9 +467,7 @@ int ionic_cq_init(struct ionic_lif *lif, struct ionic_cq *cq,
struct ionic_intr_info *intr,
unsigned int num_descs, size_t desc_size)
{
- struct ionic_cq_info *cur;
unsigned int ring_size;
- unsigned int i;
if (desc_size == 0 || !is_power_of_2(num_descs))
return -EINVAL;
@@ -482,22 +480,9 @@ int ionic_cq_init(struct ionic_lif *lif, struct ionic_cq *cq,
cq->bound_intr = intr;
cq->num_descs = num_descs;
cq->desc_size = desc_size;
- cq->tail = cq->info;
+ cq->tail_idx = 0;
cq->done_color = 1;
- cur = cq->info;
-
- for (i = 0; i < num_descs; i++) {
- if (i + 1 == num_descs) {
- cur->next = cq->info;
- cur->last = true;
- } else {
- cur->next = cur + 1;
- }
- cur->index = i;
- cur++;
- }
-
return 0;
}
@@ -522,15 +507,18 @@ unsigned int ionic_cq_service(struct ionic_cq *cq, unsigned int work_to_do,
ionic_cq_cb cb, ionic_cq_done_cb done_cb,
void *done_arg)
{
+ struct ionic_cq_info *cq_info;
unsigned int work_done = 0;
if (work_to_do == 0)
return 0;
- while (cb(cq, cq->tail)) {
- if (cq->tail->last)
+ cq_info = &cq->info[cq->tail_idx];
+ while (cb(cq, cq_info)) {
+ if (cq->tail_idx == cq->num_descs - 1)
cq->done_color = !cq->done_color;
- cq->tail = cq->tail->next;
+ cq->tail_idx = (cq->tail_idx + 1) & (cq->num_descs - 1);
+ cq_info = &cq->info[cq->tail_idx];
DEBUG_STATS_CQE_CNT(cq);
if (++work_done >= work_to_do)
@@ -548,9 +536,7 @@ int ionic_q_init(struct ionic_lif *lif, struct ionic_dev *idev,
unsigned int num_descs, size_t desc_size,
size_t sg_desc_size, unsigned int pid)
{
- struct ionic_desc_info *cur;
unsigned int ring_size;
- unsigned int i;
if (desc_size == 0 || !is_power_of_2(num_descs))
return -EINVAL;
@@ -565,24 +551,12 @@ int ionic_q_init(struct ionic_lif *lif, struct ionic_dev *idev,
q->num_descs = num_descs;
q->desc_size = desc_size;
q->sg_desc_size = sg_desc_size;
- q->tail = q->info;
- q->head = q->tail;
+ q->tail_idx = 0;
+ q->head_idx = 0;
q->pid = pid;
snprintf(q->name, sizeof(q->name), "L%d-%s%u", lif->index, name, index);
- cur = q->info;
-
- for (i = 0; i < num_descs; i++) {
- if (i + 1 == num_descs)
- cur->next = q->info;
- else
- cur->next = cur + 1;
- cur->index = i;
- cur->left = num_descs - i;
- cur++;
- }
-
return 0;
}
@@ -614,19 +588,22 @@ void ionic_q_post(struct ionic_queue *q, bool ring_doorbell, ionic_desc_cb cb,
void *cb_arg)
{
struct device *dev = q->lif->ionic->dev;
+ struct ionic_desc_info *desc_info;
struct ionic_lif *lif = q->lif;
- q->head->cb = cb;
- q->head->cb_arg = cb_arg;
- q->head = q->head->next;
+ desc_info = &q->info[q->head_idx];
+ desc_info->cb = cb;
+ desc_info->cb_arg = cb_arg;
+
+ q->head_idx = (q->head_idx + 1) & (q->num_descs - 1);
dev_dbg(dev, "lif=%d qname=%s qid=%d qtype=%d p_index=%d ringdb=%d\n",
q->lif->index, q->name, q->hw_type, q->hw_index,
- q->head->index, ring_doorbell);
+ q->head_idx, ring_doorbell);
if (ring_doorbell)
ionic_dbell_ring(lif->kern_dbpage, q->hw_type,
- q->dbval | q->head->index);
+ q->dbval | q->head_idx);
}
static bool ionic_q_is_posted(struct ionic_queue *q, unsigned int pos)
@@ -634,8 +611,8 @@ static bool ionic_q_is_posted(struct ionic_queue *q, unsigned int pos)
unsigned int mask, tail, head;
mask = q->num_descs - 1;
- tail = q->tail->index;
- head = q->head->index;
+ tail = q->tail_idx;
+ head = q->head_idx;
return ((pos - tail) & mask) < ((head - tail) & mask);
}
@@ -646,20 +623,22 @@ void ionic_q_service(struct ionic_queue *q, struct ionic_cq_info *cq_info,
struct ionic_desc_info *desc_info;
ionic_desc_cb cb;
void *cb_arg;
+ u16 index;
/* check for empty queue */
- if (q->tail->index == q->head->index)
+ if (q->tail_idx == q->head_idx)
return;
/* stop index must be for a descriptor that is not yet completed */
if (unlikely(!ionic_q_is_posted(q, stop_index)))
dev_err(q->lif->ionic->dev,
"ionic stop is not posted %s stop %u tail %u head %u\n",
- q->name, stop_index, q->tail->index, q->head->index);
+ q->name, stop_index, q->tail_idx, q->head_idx);
do {
- desc_info = q->tail;
- q->tail = desc_info->next;
+ desc_info = &q->info[q->tail_idx];
+ index = q->tail_idx;
+ q->tail_idx = (q->tail_idx + 1) & (q->num_descs - 1);
cb = desc_info->cb;
cb_arg = desc_info->cb_arg;
@@ -669,5 +648,5 @@ void ionic_q_service(struct ionic_queue *q, struct ionic_cq_info *cq_info,
if (cb)
cb(q, desc_info, cq_info, cb_arg);
- } while (desc_info->index != stop_index);
+ } while (index != stop_index);
}
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_dev.h b/drivers/net/ethernet/pensando/ionic/ionic_dev.h
index d5cba50..4a35174 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_dev.h
+++ b/drivers/net/ethernet/pensando/ionic/ionic_dev.h
@@ -149,10 +149,13 @@ struct ionic_dev {
};
struct ionic_cq_info {
- void *cq_desc;
- struct ionic_cq_info *next;
- unsigned int index;
- bool last;
+ union {
+ void *cq_desc;
+ struct ionic_txq_comp *txcq;
+ struct ionic_rxq_comp *rxcq;
+ struct ionic_admin_comp *admincq;
+ struct ionic_notifyq_event *notifyq;
+ };
};
struct ionic_queue;
@@ -169,11 +172,17 @@ struct ionic_page_info {
};
struct ionic_desc_info {
- void *desc;
- void *sg_desc;
- struct ionic_desc_info *next;
- unsigned int index;
- unsigned int left;
+ union {
+ void *desc;
+ struct ionic_txq_desc *txq_desc;
+ struct ionic_rxq_desc *rxq_desc;
+ struct ionic_admin_cmd *adminq_desc;
+ };
+ union {
+ void *sg_desc;
+ struct ionic_txq_sg_desc *txq_sg_desc;
+ struct ionic_rxq_sg_desc *rxq_sgl_desc;
+ };
unsigned int npages;
struct ionic_page_info pages[IONIC_RX_MAX_SG_ELEMS + 1];
ionic_desc_cb cb;
@@ -183,25 +192,35 @@ struct ionic_desc_info {
#define IONIC_QUEUE_NAME_MAX_SZ 32
struct ionic_queue {
- u64 dbell_count;
- u64 drop;
- u64 stop;
- u64 wake;
+ struct device *dev;
struct ionic_lif *lif;
struct ionic_desc_info *info;
- struct ionic_desc_info *tail;
- struct ionic_desc_info *head;
- struct ionic_dev *idev;
+ u16 head_idx;
+ u16 tail_idx;
unsigned int index;
+ unsigned int num_descs;
+ u64 dbell_count;
+ u64 stop;
+ u64 wake;
+ u64 drop;
+ struct ionic_dev *idev;
unsigned int type;
unsigned int hw_index;
unsigned int hw_type;
u64 dbval;
- void *base;
- void *sg_base;
+ union {
+ void *base;
+ struct ionic_txq_desc *txq;
+ struct ionic_rxq_desc *rxq;
+ struct ionic_admin_cmd *adminq;
+ };
+ union {
+ void *sg_base;
+ struct ionic_txq_sg_desc *txq_sgl;
+ struct ionic_rxq_sg_desc *rxq_sgl;
+ };
dma_addr_t base_pa;
dma_addr_t sg_base_pa;
- unsigned int num_descs;
unsigned int desc_size;
unsigned int sg_desc_size;
unsigned int pid;
@@ -221,17 +240,17 @@ struct ionic_intr_info {
};
struct ionic_cq {
- void *base;
- dma_addr_t base_pa;
struct ionic_lif *lif;
struct ionic_cq_info *info;
- struct ionic_cq_info *tail;
struct ionic_queue *bound_q;
struct ionic_intr_info *bound_intr;
+ u16 tail_idx;
bool done_color;
unsigned int num_descs;
- u64 compl_count;
unsigned int desc_size;
+ u64 compl_count;
+ void *base;
+ dma_addr_t base_pa;
};
struct ionic;
@@ -246,12 +265,12 @@ static inline void ionic_intr_init(struct ionic_dev *idev,
static inline unsigned int ionic_q_space_avail(struct ionic_queue *q)
{
- unsigned int avail = q->tail->index;
+ unsigned int avail = q->tail_idx;
- if (q->head->index >= avail)
- avail += q->head->left - 1;
+ if (q->head_idx >= avail)
+ avail += q->num_descs - q->head_idx - 1;
else
- avail -= q->head->index + 1;
+ avail -= q->head_idx + 1;
return avail;
}
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_devlink.c b/drivers/net/ethernet/pensando/ionic/ionic_devlink.c
index c4f4fd4..8d9fb2e 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_devlink.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_devlink.c
@@ -85,7 +85,7 @@ int ionic_devlink_register(struct ionic *ionic)
dev_err(ionic->dev, "devlink_port_register failed: %d\n", err);
else
devlink_port_type_eth_set(&ionic->dl_port,
- ionic->master_lif->netdev);
+ ionic->lif->netdev);
return err;
}
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c b/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c
index 3c57c33..0d14659 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c
@@ -298,8 +298,8 @@ static void ionic_get_pauseparam(struct net_device *netdev,
pause_type = lif->ionic->idev.port_info->config.pause_type;
if (pause_type) {
- pause->rx_pause = pause_type & IONIC_PAUSE_F_RX ? 1 : 0;
- pause->tx_pause = pause_type & IONIC_PAUSE_F_TX ? 1 : 0;
+ pause->rx_pause = (pause_type & IONIC_PAUSE_F_RX) ? 1 : 0;
+ pause->tx_pause = (pause_type & IONIC_PAUSE_F_TX) ? 1 : 0;
}
}
@@ -454,7 +454,7 @@ static int ionic_set_coalesce(struct net_device *netdev,
if (test_bit(IONIC_LIF_F_UP, lif->state)) {
for (i = 0; i < lif->nxqs; i++) {
- qcq = lif->rxqcqs[i].qcq;
+ qcq = lif->rxqcqs[i];
ionic_intr_coal_init(lif->ionic->idev.intr_ctrl,
qcq->intr.index,
lif->rx_coalesce_hw);
@@ -471,7 +471,7 @@ static int ionic_set_coalesce(struct net_device *netdev,
if (test_bit(IONIC_LIF_F_UP, lif->state)) {
for (i = 0; i < lif->nxqs; i++) {
- qcq = lif->txqcqs[i].qcq;
+ qcq = lif->txqcqs[i];
ionic_intr_coal_init(lif->ionic->idev.intr_ctrl,
qcq->intr.index,
lif->tx_coalesce_hw);
@@ -493,18 +493,14 @@ static void ionic_get_ringparam(struct net_device *netdev,
ring->rx_pending = lif->nrxq_descs;
}
-static void ionic_set_ringsize(struct ionic_lif *lif, void *arg)
-{
- struct ethtool_ringparam *ring = arg;
-
- lif->ntxq_descs = ring->tx_pending;
- lif->nrxq_descs = ring->rx_pending;
-}
-
static int ionic_set_ringparam(struct net_device *netdev,
struct ethtool_ringparam *ring)
{
struct ionic_lif *lif = netdev_priv(netdev);
+ struct ionic_queue_params qparam;
+ int err;
+
+ ionic_init_queue_params(lif, &qparam);
if (ring->rx_mini_pending || ring->rx_jumbo_pending) {
netdev_info(netdev, "Changing jumbo or mini descriptors not supported\n");
@@ -522,7 +518,28 @@ static int ionic_set_ringparam(struct net_device *netdev,
ring->rx_pending == lif->nrxq_descs)
return 0;
- return ionic_reset_queues(lif, ionic_set_ringsize, ring);
+ if (ring->tx_pending != lif->ntxq_descs)
+ netdev_info(netdev, "Changing Tx ring size from %d to %d\n",
+ lif->ntxq_descs, ring->tx_pending);
+
+ if (ring->rx_pending != lif->nrxq_descs)
+ netdev_info(netdev, "Changing Rx ring size from %d to %d\n",
+ lif->nrxq_descs, ring->rx_pending);
+
+ /* if we're not running, just set the values and return */
+ if (!netif_running(lif->netdev)) {
+ lif->ntxq_descs = ring->tx_pending;
+ lif->nrxq_descs = ring->rx_pending;
+ return 0;
+ }
+
+ qparam.ntxq_descs = ring->tx_pending;
+ qparam.nrxq_descs = ring->rx_pending;
+ err = ionic_reconfigure_queues(lif, &qparam);
+ if (err)
+ netdev_info(netdev, "Ring reconfiguration failed, changes canceled: %d\n", err);
+
+ return err;
}
static void ionic_get_channels(struct net_device *netdev,
@@ -544,32 +561,15 @@ static void ionic_get_channels(struct net_device *netdev,
}
}
-static void ionic_set_queuecount(struct ionic_lif *lif, void *arg)
-{
- struct ethtool_channels *ch = arg;
-
- if (ch->combined_count) {
- lif->nxqs = ch->combined_count;
- if (test_bit(IONIC_LIF_F_SPLIT_INTR, lif->state)) {
- clear_bit(IONIC_LIF_F_SPLIT_INTR, lif->state);
- lif->tx_coalesce_usecs = lif->rx_coalesce_usecs;
- lif->tx_coalesce_hw = lif->rx_coalesce_hw;
- netdev_info(lif->netdev, "Sharing queue interrupts\n");
- }
- } else {
- lif->nxqs = ch->rx_count;
- if (!test_bit(IONIC_LIF_F_SPLIT_INTR, lif->state)) {
- set_bit(IONIC_LIF_F_SPLIT_INTR, lif->state);
- netdev_info(lif->netdev, "Splitting queue interrupts\n");
- }
- }
-}
-
static int ionic_set_channels(struct net_device *netdev,
struct ethtool_channels *ch)
{
struct ionic_lif *lif = netdev_priv(netdev);
- int new_cnt;
+ struct ionic_queue_params qparam;
+ int max_cnt;
+ int err;
+
+ ionic_init_queue_params(lif, &qparam);
if (ch->rx_count != ch->tx_count) {
netdev_info(netdev, "The rx and tx count must be equal\n");
@@ -577,20 +577,63 @@ static int ionic_set_channels(struct net_device *netdev,
}
if (ch->combined_count && ch->rx_count) {
- netdev_info(netdev, "Use either combined_count or rx/tx_count, not both\n");
+ netdev_info(netdev, "Use either combined or rx and tx, not both\n");
return -EINVAL;
}
- if (ch->combined_count)
- new_cnt = ch->combined_count;
- else
- new_cnt = ch->rx_count;
+ max_cnt = lif->ionic->ntxqs_per_lif;
+ if (ch->combined_count) {
+ if (ch->combined_count > max_cnt)
+ return -EINVAL;
- if (lif->nxqs != new_cnt)
- netdev_info(netdev, "Changing queue count from %d to %d\n",
- lif->nxqs, new_cnt);
+ if (test_bit(IONIC_LIF_F_SPLIT_INTR, lif->state))
+ netdev_info(lif->netdev, "Sharing queue interrupts\n");
+ else if (ch->combined_count == lif->nxqs)
+ return 0;
- return ionic_reset_queues(lif, ionic_set_queuecount, ch);
+ if (lif->nxqs != ch->combined_count)
+ netdev_info(netdev, "Changing queue count from %d to %d\n",
+ lif->nxqs, ch->combined_count);
+
+ qparam.nxqs = ch->combined_count;
+ qparam.intr_split = 0;
+ } else {
+ max_cnt /= 2;
+ if (ch->rx_count > max_cnt)
+ return -EINVAL;
+
+ if (!test_bit(IONIC_LIF_F_SPLIT_INTR, lif->state))
+ netdev_info(lif->netdev, "Splitting queue interrupts\n");
+ else if (ch->rx_count == lif->nxqs)
+ return 0;
+
+ if (lif->nxqs != ch->rx_count)
+ netdev_info(netdev, "Changing queue count from %d to %d\n",
+ lif->nxqs, ch->rx_count);
+
+ qparam.nxqs = ch->rx_count;
+ qparam.intr_split = 1;
+ }
+
+ /* if we're not running, just set the values and return */
+ if (!netif_running(lif->netdev)) {
+ lif->nxqs = qparam.nxqs;
+
+ if (qparam.intr_split) {
+ set_bit(IONIC_LIF_F_SPLIT_INTR, lif->state);
+ } else {
+ clear_bit(IONIC_LIF_F_SPLIT_INTR, lif->state);
+ lif->tx_coalesce_usecs = lif->rx_coalesce_usecs;
+ lif->tx_coalesce_hw = lif->rx_coalesce_hw;
+ }
+ return 0;
+ }
+
+ err = ionic_reconfigure_queues(lif, &qparam);
+ if (err)
+ netdev_info(netdev, "Queue reconfiguration failed, changes canceled: %d\n", err);
+
+ return err;
}
static u32 ionic_get_priv_flags(struct net_device *netdev)
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
index 26988ad..ee683cb 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
@@ -36,6 +36,8 @@ static void ionic_lif_handle_fw_down(struct ionic_lif *lif);
static void ionic_lif_handle_fw_up(struct ionic_lif *lif);
static void ionic_lif_set_netdev_info(struct ionic_lif *lif);
+static void ionic_txrx_deinit(struct ionic_lif *lif);
+static int ionic_txrx_init(struct ionic_lif *lif);
static int ionic_start_queues(struct ionic_lif *lif);
static void ionic_stop_queues(struct ionic_lif *lif);
static void ionic_lif_queue_identify(struct ionic_lif *lif);
@@ -297,6 +299,18 @@ static void ionic_lif_qcq_deinit(struct ionic_lif *lif, struct ionic_qcq *qcq)
qcq->flags &= ~IONIC_QCQ_F_INITED;
}
+static void ionic_qcq_intr_free(struct ionic_lif *lif, struct ionic_qcq *qcq)
+{
+ if (!(qcq->flags & IONIC_QCQ_F_INTR) || qcq->intr.vector == 0)
+ return;
+
+ irq_set_affinity_hint(qcq->intr.vector, NULL);
+ devm_free_irq(lif->ionic->dev, qcq->intr.vector, &qcq->napi);
+ qcq->intr.vector = 0;
+ ionic_intr_free(lif->ionic, qcq->intr.index);
+ qcq->intr.index = IONIC_INTR_INDEX_NOT_ASSIGNED;
+}
+
static void ionic_qcq_free(struct ionic_lif *lif, struct ionic_qcq *qcq)
{
struct device *dev = lif->ionic->dev;
@@ -306,51 +320,62 @@ static void ionic_qcq_free(struct ionic_lif *lif, struct ionic_qcq *qcq)
ionic_debugfs_del_qcq(qcq);
- dma_free_coherent(dev, qcq->total_size, qcq->base, qcq->base_pa);
- qcq->base = NULL;
- qcq->base_pa = 0;
-
- if (qcq->flags & IONIC_QCQ_F_INTR) {
- irq_set_affinity_hint(qcq->intr.vector, NULL);
- devm_free_irq(dev, qcq->intr.vector, &qcq->napi);
- qcq->intr.vector = 0;
- ionic_intr_free(lif->ionic, qcq->intr.index);
+ if (qcq->q_base) {
+ dma_free_coherent(dev, qcq->q_size, qcq->q_base, qcq->q_base_pa);
+ qcq->q_base = NULL;
+ qcq->q_base_pa = 0;
}
- devm_kfree(dev, qcq->cq.info);
- qcq->cq.info = NULL;
- devm_kfree(dev, qcq->q.info);
- qcq->q.info = NULL;
- devm_kfree(dev, qcq);
+ if (qcq->cq_base) {
+ dma_free_coherent(dev, qcq->cq_size, qcq->cq_base, qcq->cq_base_pa);
+ qcq->cq_base = NULL;
+ qcq->cq_base_pa = 0;
+ }
+
+ if (qcq->sg_base) {
+ dma_free_coherent(dev, qcq->sg_size, qcq->sg_base, qcq->sg_base_pa);
+ qcq->sg_base = NULL;
+ qcq->sg_base_pa = 0;
+ }
+
+ ionic_qcq_intr_free(lif, qcq);
+
+ if (qcq->cq.info) {
+ devm_kfree(dev, qcq->cq.info);
+ qcq->cq.info = NULL;
+ }
+ if (qcq->q.info) {
+ devm_kfree(dev, qcq->q.info);
+ qcq->q.info = NULL;
+ }
}
static void ionic_qcqs_free(struct ionic_lif *lif)
{
struct device *dev = lif->ionic->dev;
- unsigned int i;
if (lif->notifyqcq) {
ionic_qcq_free(lif, lif->notifyqcq);
+ devm_kfree(dev, lif->notifyqcq);
lif->notifyqcq = NULL;
}
if (lif->adminqcq) {
ionic_qcq_free(lif, lif->adminqcq);
+ devm_kfree(dev, lif->adminqcq);
lif->adminqcq = NULL;
}
if (lif->rxqcqs) {
- for (i = 0; i < lif->nxqs; i++)
- if (lif->rxqcqs[i].stats)
- devm_kfree(dev, lif->rxqcqs[i].stats);
+ devm_kfree(dev, lif->rxqstats);
+ lif->rxqstats = NULL;
devm_kfree(dev, lif->rxqcqs);
lif->rxqcqs = NULL;
}
if (lif->txqcqs) {
- for (i = 0; i < lif->nxqs; i++)
- if (lif->txqcqs[i].stats)
- devm_kfree(dev, lif->txqcqs[i].stats);
+ devm_kfree(dev, lif->txqstats);
+ lif->txqstats = NULL;
devm_kfree(dev, lif->txqcqs);
lif->txqcqs = NULL;
}
@@ -368,6 +393,53 @@ static void ionic_link_qcq_interrupts(struct ionic_qcq *src_qcq,
n_qcq->intr.index = src_qcq->intr.index;
}
+static int ionic_alloc_qcq_interrupt(struct ionic_lif *lif, struct ionic_qcq *qcq)
+{
+ int err;
+
+ if (!(qcq->flags & IONIC_QCQ_F_INTR)) {
+ qcq->intr.index = IONIC_INTR_INDEX_NOT_ASSIGNED;
+ return 0;
+ }
+
+ err = ionic_intr_alloc(lif, &qcq->intr);
+ if (err) {
+ netdev_warn(lif->netdev, "no intr for %s: %d\n",
+ qcq->q.name, err);
+ goto err_out;
+ }
+
+ err = ionic_bus_get_irq(lif->ionic, qcq->intr.index);
+ if (err < 0) {
+ netdev_warn(lif->netdev, "no vector for %s: %d\n",
+ qcq->q.name, err);
+ goto err_out_free_intr;
+ }
+ qcq->intr.vector = err;
+ ionic_intr_mask_assert(lif->ionic->idev.intr_ctrl, qcq->intr.index,
+ IONIC_INTR_MASK_SET);
+
+ err = ionic_request_irq(lif, qcq);
+ if (err) {
+ netdev_warn(lif->netdev, "irq request failed %d\n", err);
+ goto err_out_free_intr;
+ }
+
+ /* try to get the irq on the local numa node first */
+ qcq->intr.cpu = cpumask_local_spread(qcq->intr.index,
+ dev_to_node(lif->ionic->dev));
+ if (qcq->intr.cpu != -1)
+ cpumask_set_cpu(qcq->intr.cpu, &qcq->intr.affinity_mask);
+
+ netdev_dbg(lif->netdev, "%s: Interrupt index %d\n", qcq->q.name, qcq->intr.index);
+ return 0;
+
+err_out_free_intr:
+ ionic_intr_free(lif->ionic, qcq->intr.index);
+err_out:
+ return err;
+}
+
static int ionic_qcq_alloc(struct ionic_lif *lif, unsigned int type,
unsigned int index,
const char *name, unsigned int flags,
@@ -377,7 +449,6 @@ static int ionic_qcq_alloc(struct ionic_lif *lif, unsigned int type,
unsigned int pid, struct ionic_qcq **qcq)
{
struct ionic_dev *idev = &lif->ionic->idev;
- u32 q_size, cq_size, sg_size, total_size;
struct device *dev = lif->ionic->dev;
void *q_base, *cq_base, *sg_base;
dma_addr_t cq_base_pa = 0;
@@ -388,21 +459,6 @@ static int ionic_qcq_alloc(struct ionic_lif *lif, unsigned int type,
*qcq = NULL;
- q_size = num_descs * desc_size;
- cq_size = num_descs * cq_desc_size;
- sg_size = num_descs * sg_desc_size;
-
- total_size = ALIGN(q_size, PAGE_SIZE) + ALIGN(cq_size, PAGE_SIZE);
- /* Note: aligning q_size/cq_size is not enough due to cq_base
- * address aligning as q_base could be not aligned to the page.
- * Adding PAGE_SIZE.
- */
- total_size += PAGE_SIZE;
- if (flags & IONIC_QCQ_F_SG) {
- total_size += ALIGN(sg_size, PAGE_SIZE);
- total_size += PAGE_SIZE;
- }
-
new = devm_kzalloc(dev, sizeof(*new), GFP_KERNEL);
if (!new) {
netdev_err(lif->netdev, "Cannot allocate queue structure\n");
@@ -417,7 +473,7 @@ static int ionic_qcq_alloc(struct ionic_lif *lif, unsigned int type,
if (!new->q.info) {
netdev_err(lif->netdev, "Cannot allocate queue info\n");
err = -ENOMEM;
- goto err_out;
+ goto err_out_free_qcq;
}
new->q.type = type;
@@ -426,41 +482,12 @@ static int ionic_qcq_alloc(struct ionic_lif *lif, unsigned int type,
desc_size, sg_desc_size, pid);
if (err) {
netdev_err(lif->netdev, "Cannot initialize queue\n");
+ goto err_out_free_q_info;
+ }
+
+ err = ionic_alloc_qcq_interrupt(lif, new);
+ if (err)
goto err_out;
- }
-
- if (flags & IONIC_QCQ_F_INTR) {
- err = ionic_intr_alloc(lif, &new->intr);
- if (err) {
- netdev_warn(lif->netdev, "no intr for %s: %d\n",
- name, err);
- goto err_out;
- }
-
- err = ionic_bus_get_irq(lif->ionic, new->intr.index);
- if (err < 0) {
- netdev_warn(lif->netdev, "no vector for %s: %d\n",
- name, err);
- goto err_out_free_intr;
- }
- new->intr.vector = err;
- ionic_intr_mask_assert(idev->intr_ctrl, new->intr.index,
- IONIC_INTR_MASK_SET);
-
- err = ionic_request_irq(lif, new);
- if (err) {
- netdev_warn(lif->netdev, "irq request failed %d\n", err);
- goto err_out_free_intr;
- }
-
- new->intr.cpu = cpumask_local_spread(new->intr.index,
- dev_to_node(dev));
- if (new->intr.cpu != -1)
- cpumask_set_cpu(new->intr.cpu,
- &new->intr.affinity_mask);
- } else {
- new->intr.index = IONIC_INTR_INDEX_NOT_ASSIGNED;
- }
new->cq.info = devm_kcalloc(dev, num_descs, sizeof(*new->cq.info),
GFP_KERNEL);
@@ -473,46 +500,67 @@ static int ionic_qcq_alloc(struct ionic_lif *lif, unsigned int type,
err = ionic_cq_init(lif, &new->cq, &new->intr, num_descs, cq_desc_size);
if (err) {
netdev_err(lif->netdev, "Cannot initialize completion queue\n");
- goto err_out_free_irq;
+ goto err_out_free_cq_info;
}
- new->base = dma_alloc_coherent(dev, total_size, &new->base_pa,
- GFP_KERNEL);
- if (!new->base) {
+ new->q_size = PAGE_SIZE + (num_descs * desc_size);
+ new->q_base = dma_alloc_coherent(dev, new->q_size, &new->q_base_pa,
+ GFP_KERNEL);
+ if (!new->q_base) {
netdev_err(lif->netdev, "Cannot allocate queue DMA memory\n");
err = -ENOMEM;
- goto err_out_free_irq;
+ goto err_out_free_cq_info;
}
-
- new->total_size = total_size;
-
- q_base = new->base;
- q_base_pa = new->base_pa;
-
- cq_base = (void *)ALIGN((uintptr_t)q_base + q_size, PAGE_SIZE);
- cq_base_pa = ALIGN(q_base_pa + q_size, PAGE_SIZE);
-
- if (flags & IONIC_QCQ_F_SG) {
- sg_base = (void *)ALIGN((uintptr_t)cq_base + cq_size,
- PAGE_SIZE);
- sg_base_pa = ALIGN(cq_base_pa + cq_size, PAGE_SIZE);
- ionic_q_sg_map(&new->q, sg_base, sg_base_pa);
- }
-
+ q_base = PTR_ALIGN(new->q_base, PAGE_SIZE);
+ q_base_pa = ALIGN(new->q_base_pa, PAGE_SIZE);
ionic_q_map(&new->q, q_base, q_base_pa);
+
+ new->cq_size = PAGE_SIZE + (num_descs * cq_desc_size);
+ new->cq_base = dma_alloc_coherent(dev, new->cq_size, &new->cq_base_pa,
+ GFP_KERNEL);
+ if (!new->cq_base) {
+ netdev_err(lif->netdev, "Cannot allocate cq DMA memory\n");
+ err = -ENOMEM;
+ goto err_out_free_q;
+ }
+ cq_base = PTR_ALIGN(new->cq_base, PAGE_SIZE);
+ cq_base_pa = ALIGN(new->cq_base_pa, PAGE_SIZE);
ionic_cq_map(&new->cq, cq_base, cq_base_pa);
ionic_cq_bind(&new->cq, &new->q);
+ if (flags & IONIC_QCQ_F_SG) {
+ new->sg_size = PAGE_SIZE + (num_descs * sg_desc_size);
+ new->sg_base = dma_alloc_coherent(dev, new->sg_size, &new->sg_base_pa,
+ GFP_KERNEL);
+ if (!new->sg_base) {
+ netdev_err(lif->netdev, "Cannot allocate sg DMA memory\n");
+ err = -ENOMEM;
+ goto err_out_free_cq;
+ }
+ sg_base = PTR_ALIGN(new->sg_base, PAGE_SIZE);
+ sg_base_pa = ALIGN(new->sg_base_pa, PAGE_SIZE);
+ ionic_q_sg_map(&new->q, sg_base, sg_base_pa);
+ }
+
*qcq = new;
return 0;
+err_out_free_cq:
+ dma_free_coherent(dev, new->cq_size, new->cq_base, new->cq_base_pa);
+err_out_free_q:
+ dma_free_coherent(dev, new->q_size, new->q_base, new->q_base_pa);
+err_out_free_cq_info:
+ devm_kfree(dev, new->cq.info);
err_out_free_irq:
- if (flags & IONIC_QCQ_F_INTR)
+ if (flags & IONIC_QCQ_F_INTR) {
devm_free_irq(dev, new->intr.vector, &new->napi);
-err_out_free_intr:
- if (flags & IONIC_QCQ_F_INTR)
ionic_intr_free(lif->ionic, new->intr.index);
+ }
+err_out_free_q_info:
+ devm_kfree(dev, new->q.info);
+err_out_free_qcq:
+ devm_kfree(dev, new);
err_out:
dev_err(dev, "qcq alloc of %s%d failed %d\n", name, index, err);
return err;
@@ -521,10 +569,8 @@ static int ionic_qcq_alloc(struct ionic_lif *lif, unsigned int type,
static int ionic_qcqs_alloc(struct ionic_lif *lif)
{
struct device *dev = lif->ionic->dev;
- unsigned int q_list_size;
unsigned int flags;
int err;
- int i;
flags = IONIC_QCQ_F_INTR;
err = ionic_qcq_alloc(lif, IONIC_QTYPE_ADMINQ, 0, "admin", flags,
@@ -544,63 +590,50 @@ static int ionic_qcqs_alloc(struct ionic_lif *lif)
sizeof(union ionic_notifyq_comp),
0, lif->kern_pid, &lif->notifyqcq);
if (err)
- goto err_out_free_adminqcq;
+ goto err_out;
ionic_debugfs_add_qcq(lif, lif->notifyqcq);
/* Let the notifyq ride on the adminq interrupt */
ionic_link_qcq_interrupts(lif->adminqcq, lif->notifyqcq);
}
- q_list_size = sizeof(*lif->txqcqs) * lif->nxqs;
err = -ENOMEM;
- lif->txqcqs = devm_kzalloc(dev, q_list_size, GFP_KERNEL);
+ lif->txqcqs = devm_kcalloc(dev, lif->ionic->ntxqs_per_lif,
+ sizeof(struct ionic_qcq *), GFP_KERNEL);
if (!lif->txqcqs)
- goto err_out_free_notifyqcq;
- for (i = 0; i < lif->nxqs; i++) {
- lif->txqcqs[i].stats = devm_kzalloc(dev,
- sizeof(struct ionic_q_stats),
- GFP_KERNEL);
- if (!lif->txqcqs[i].stats)
- goto err_out_free_tx_stats;
- }
-
- lif->rxqcqs = devm_kzalloc(dev, q_list_size, GFP_KERNEL);
+ goto err_out;
+ lif->rxqcqs = devm_kcalloc(dev, lif->ionic->nrxqs_per_lif,
+ sizeof(struct ionic_qcq *), GFP_KERNEL);
if (!lif->rxqcqs)
- goto err_out_free_tx_stats;
- for (i = 0; i < lif->nxqs; i++) {
- lif->rxqcqs[i].stats = devm_kzalloc(dev,
- sizeof(struct ionic_q_stats),
- GFP_KERNEL);
- if (!lif->rxqcqs[i].stats)
- goto err_out_free_rx_stats;
- }
+ goto err_out;
+
+ lif->txqstats = devm_kcalloc(dev, lif->ionic->ntxqs_per_lif,
+ sizeof(struct ionic_tx_stats), GFP_KERNEL);
+ if (!lif->txqstats)
+ goto err_out;
+ lif->rxqstats = devm_kcalloc(dev, lif->ionic->nrxqs_per_lif,
+ sizeof(struct ionic_rx_stats), GFP_KERNEL);
+ if (!lif->rxqstats)
+ goto err_out;
return 0;
-err_out_free_rx_stats:
- for (i = 0; i < lif->nxqs; i++)
- if (lif->rxqcqs[i].stats)
- devm_kfree(dev, lif->rxqcqs[i].stats);
- devm_kfree(dev, lif->rxqcqs);
- lif->rxqcqs = NULL;
-err_out_free_tx_stats:
- for (i = 0; i < lif->nxqs; i++)
- if (lif->txqcqs[i].stats)
- devm_kfree(dev, lif->txqcqs[i].stats);
- devm_kfree(dev, lif->txqcqs);
- lif->txqcqs = NULL;
-err_out_free_notifyqcq:
- if (lif->notifyqcq) {
- ionic_qcq_free(lif, lif->notifyqcq);
- lif->notifyqcq = NULL;
- }
-err_out_free_adminqcq:
- ionic_qcq_free(lif, lif->adminqcq);
- lif->adminqcq = NULL;
-
+err_out:
+ ionic_qcqs_free(lif);
return err;
}
+static void ionic_qcq_sanitize(struct ionic_qcq *qcq)
+{
+ qcq->q.tail_idx = 0;
+ qcq->q.head_idx = 0;
+ qcq->cq.tail_idx = 0;
+ qcq->cq.done_color = 1;
+ memset(qcq->q_base, 0, qcq->q_size);
+ memset(qcq->cq_base, 0, qcq->cq_size);
+ memset(qcq->sg_base, 0, qcq->sg_size);
+}
+
static int ionic_lif_txq_init(struct ionic_lif *lif, struct ionic_qcq *qcq)
{
struct device *dev = lif->ionic->dev;
@@ -626,10 +659,10 @@ static int ionic_lif_txq_init(struct ionic_lif *lif, struct ionic_qcq *qcq)
unsigned int intr_index;
int err;
- if (test_bit(IONIC_LIF_F_SPLIT_INTR, lif->state))
+ if (qcq->flags & IONIC_QCQ_F_INTR)
intr_index = qcq->intr.index;
else
- intr_index = lif->rxqcqs[q->index].qcq->intr.index;
+ intr_index = lif->rxqcqs[q->index]->intr.index;
ctx.cmd.q_init.intr_index = cpu_to_le16(intr_index);
dev_dbg(dev, "txq_init.pid %d\n", ctx.cmd.q_init.pid);
@@ -640,9 +673,7 @@ static int ionic_lif_txq_init(struct ionic_lif *lif, struct ionic_qcq *qcq)
dev_dbg(dev, "txq_init.ver %d\n", ctx.cmd.q_init.ver);
dev_dbg(dev, "txq_init.intr_index %d\n", ctx.cmd.q_init.intr_index);
- q->tail = q->info;
- q->head = q->tail;
- cq->tail = cq->info;
+ ionic_qcq_sanitize(qcq);
err = ionic_adminq_post_wait(lif, &ctx);
if (err)
@@ -697,9 +728,7 @@ static int ionic_lif_rxq_init(struct ionic_lif *lif, struct ionic_qcq *qcq)
dev_dbg(dev, "rxq_init.ver %d\n", ctx.cmd.q_init.ver);
dev_dbg(dev, "rxq_init.intr_index %d\n", ctx.cmd.q_init.intr_index);
- q->tail = q->info;
- q->head = q->tail;
- cq->tail = cq->info;
+ ionic_qcq_sanitize(qcq);
err = ionic_adminq_post_wait(lif, &ctx);
if (err)
@@ -771,21 +800,6 @@ static bool ionic_notifyq_service(struct ionic_cq *cq,
return true;
}
-static int ionic_notifyq_clean(struct ionic_lif *lif, int budget)
-{
- struct ionic_dev *idev = &lif->ionic->idev;
- struct ionic_cq *cq = &lif->notifyqcq->cq;
- u32 work_done;
-
- work_done = ionic_cq_service(cq, budget, ionic_notifyq_service,
- NULL, NULL);
- if (work_done)
- ionic_intr_credits(idev->intr_ctrl, cq->bound_intr->index,
- work_done, IONIC_INTR_CRED_RESET_COALESCE);
-
- return work_done;
-}
-
static bool ionic_adminq_service(struct ionic_cq *cq,
struct ionic_cq_info *cq_info)
{
@@ -801,15 +815,36 @@ static bool ionic_adminq_service(struct ionic_cq *cq,
static int ionic_adminq_napi(struct napi_struct *napi, int budget)
{
+ struct ionic_intr_info *intr = napi_to_cq(napi)->bound_intr;
struct ionic_lif *lif = napi_to_cq(napi)->lif;
+ struct ionic_dev *idev = &lif->ionic->idev;
+ unsigned int flags = 0;
int n_work = 0;
int a_work = 0;
+ int work_done;
- if (likely(lif->notifyqcq && lif->notifyqcq->flags & IONIC_QCQ_F_INITED))
- n_work = ionic_notifyq_clean(lif, budget);
- a_work = ionic_napi(napi, budget, ionic_adminq_service, NULL, NULL);
+ if (lif->notifyqcq && lif->notifyqcq->flags & IONIC_QCQ_F_INITED)
+ n_work = ionic_cq_service(&lif->notifyqcq->cq, budget,
+ ionic_notifyq_service, NULL, NULL);
- return max(n_work, a_work);
+ if (lif->adminqcq && lif->adminqcq->flags & IONIC_QCQ_F_INITED)
+ a_work = ionic_cq_service(&lif->adminqcq->cq, budget,
+ ionic_adminq_service, NULL, NULL);
+
+ work_done = max(n_work, a_work);
+ if (work_done < budget && napi_complete_done(napi, work_done)) {
+ flags |= IONIC_INTR_CRED_UNMASK;
+ DEBUG_STATS_INTR_REARM(intr);
+ }
+
+ if (work_done || flags) {
+ flags |= IONIC_INTR_CRED_RESET_COALESCE;
+ ionic_intr_credits(idev->intr_ctrl,
+ intr->index,
+ n_work + a_work, flags);
+ }
+
+ return work_done;
}
void ionic_get_stats64(struct net_device *netdev,
@@ -1315,6 +1350,35 @@ static int ionic_set_mac_address(struct net_device *netdev, void *sa)
return ionic_addr_add(netdev, mac);
}
+static void ionic_stop_queues_reconfig(struct ionic_lif *lif)
+{
+ /* Stop and clean the queues before reconfiguration */
+ mutex_lock(&lif->queue_lock);
+ netif_device_detach(lif->netdev);
+ ionic_stop_queues(lif);
+ ionic_txrx_deinit(lif);
+}
+
+static int ionic_start_queues_reconfig(struct ionic_lif *lif)
+{
+ int err;
+
+ /* Re-init the queues after reconfiguration */
+
+ /* The only way txrx_init can fail here is if communication
+ * with FW is suddenly broken. There's not much we can do
+ * at this point - error messages have already been printed,
+ * so we can continue on and the user can eventually do a
+ * DOWN and UP to try to reset and clear the issue.
+ */
+ err = ionic_txrx_init(lif);
+ mutex_unlock(&lif->queue_lock);
+ ionic_link_status_check_request(lif);
+ netif_device_attach(lif->netdev);
+
+ return err;
+}
+
static int ionic_change_mtu(struct net_device *netdev, int new_mtu)
{
struct ionic_lif *lif = netdev_priv(netdev);
@@ -1334,9 +1398,12 @@ static int ionic_change_mtu(struct net_device *netdev, int new_mtu)
return err;
netdev->mtu = new_mtu;
- err = ionic_reset_queues(lif, NULL, NULL);
+ /* if we're not running, nothing more to do */
+ if (!netif_running(netdev))
+ return 0;
- return err;
+ ionic_stop_queues_reconfig(lif);
+ return ionic_start_queues_reconfig(lif);
}
static void ionic_tx_timeout_work(struct work_struct *ws)
@@ -1345,9 +1412,14 @@ static void ionic_tx_timeout_work(struct work_struct *ws)
netdev_info(lif->netdev, "Tx Timeout recovery\n");
- rtnl_lock();
- ionic_reset_queues(lif, NULL, NULL);
- rtnl_unlock();
+ /* if we were stopped before this scheduled job was launched,
+ * don't bother the queues as they are already stopped.
+ */
+ if (!netif_running(lif->netdev))
+ return;
+
+ ionic_stop_queues_reconfig(lif);
+ ionic_start_queues_reconfig(lif);
}
static void ionic_tx_timeout(struct net_device *netdev, unsigned int txqueue)
@@ -1482,7 +1554,7 @@ static void ionic_txrx_disable(struct ionic_lif *lif)
if (lif->txqcqs) {
for (i = 0; i < lif->nxqs; i++) {
- err = ionic_qcq_disable(lif->txqcqs[i].qcq);
+ err = ionic_qcq_disable(lif->txqcqs[i]);
if (err == -ETIMEDOUT)
break;
}
@@ -1490,7 +1562,7 @@ static void ionic_txrx_disable(struct ionic_lif *lif)
if (lif->rxqcqs) {
for (i = 0; i < lif->nxqs; i++) {
- err = ionic_qcq_disable(lif->rxqcqs[i].qcq);
+ err = ionic_qcq_disable(lif->rxqcqs[i]);
if (err == -ETIMEDOUT)
break;
}
@@ -1502,18 +1574,18 @@ static void ionic_txrx_deinit(struct ionic_lif *lif)
unsigned int i;
if (lif->txqcqs) {
- for (i = 0; i < lif->nxqs; i++) {
- ionic_lif_qcq_deinit(lif, lif->txqcqs[i].qcq);
- ionic_tx_flush(&lif->txqcqs[i].qcq->cq);
- ionic_tx_empty(&lif->txqcqs[i].qcq->q);
+ for (i = 0; i < lif->nxqs && lif->txqcqs[i]; i++) {
+ ionic_lif_qcq_deinit(lif, lif->txqcqs[i]);
+ ionic_tx_flush(&lif->txqcqs[i]->cq);
+ ionic_tx_empty(&lif->txqcqs[i]->q);
}
}
if (lif->rxqcqs) {
- for (i = 0; i < lif->nxqs; i++) {
- ionic_lif_qcq_deinit(lif, lif->rxqcqs[i].qcq);
- ionic_rx_flush(&lif->rxqcqs[i].qcq->cq);
- ionic_rx_empty(&lif->rxqcqs[i].qcq->q);
+ for (i = 0; i < lif->nxqs && lif->rxqcqs[i]; i++) {
+ ionic_lif_qcq_deinit(lif, lif->rxqcqs[i]);
+ ionic_rx_flush(&lif->rxqcqs[i]->cq);
+ ionic_rx_empty(&lif->rxqcqs[i]->q);
}
}
lif->rx_mode = 0;
@@ -1524,16 +1596,18 @@ static void ionic_txrx_free(struct ionic_lif *lif)
unsigned int i;
if (lif->txqcqs) {
- for (i = 0; i < lif->nxqs; i++) {
- ionic_qcq_free(lif, lif->txqcqs[i].qcq);
- lif->txqcqs[i].qcq = NULL;
+ for (i = 0; i < lif->ionic->ntxqs_per_lif && lif->txqcqs[i]; i++) {
+ ionic_qcq_free(lif, lif->txqcqs[i]);
+ devm_kfree(lif->ionic->dev, lif->txqcqs[i]);
+ lif->txqcqs[i] = NULL;
}
}
if (lif->rxqcqs) {
- for (i = 0; i < lif->nxqs; i++) {
- ionic_qcq_free(lif, lif->rxqcqs[i].qcq);
- lif->rxqcqs[i].qcq = NULL;
+ for (i = 0; i < lif->ionic->nrxqs_per_lif && lif->rxqcqs[i]; i++) {
+ ionic_qcq_free(lif, lif->rxqcqs[i]);
+ devm_kfree(lif->ionic->dev, lif->rxqcqs[i]);
+ lif->rxqcqs[i] = NULL;
}
}
}
@@ -1561,17 +1635,16 @@ static int ionic_txrx_alloc(struct ionic_lif *lif)
sizeof(struct ionic_txq_desc),
sizeof(struct ionic_txq_comp),
sg_desc_sz,
- lif->kern_pid, &lif->txqcqs[i].qcq);
+ lif->kern_pid, &lif->txqcqs[i]);
if (err)
goto err_out;
if (flags & IONIC_QCQ_F_INTR)
ionic_intr_coal_init(lif->ionic->idev.intr_ctrl,
- lif->txqcqs[i].qcq->intr.index,
+ lif->txqcqs[i]->intr.index,
lif->tx_coalesce_hw);
- lif->txqcqs[i].qcq->stats = lif->txqcqs[i].stats;
- ionic_debugfs_add_qcq(lif, lif->txqcqs[i].qcq);
+ ionic_debugfs_add_qcq(lif, lif->txqcqs[i]);
}
flags = IONIC_QCQ_F_RX_STATS | IONIC_QCQ_F_SG | IONIC_QCQ_F_INTR;
@@ -1581,20 +1654,19 @@ static int ionic_txrx_alloc(struct ionic_lif *lif)
sizeof(struct ionic_rxq_desc),
sizeof(struct ionic_rxq_comp),
sizeof(struct ionic_rxq_sg_desc),
- lif->kern_pid, &lif->rxqcqs[i].qcq);
+ lif->kern_pid, &lif->rxqcqs[i]);
if (err)
goto err_out;
ionic_intr_coal_init(lif->ionic->idev.intr_ctrl,
- lif->rxqcqs[i].qcq->intr.index,
+ lif->rxqcqs[i]->intr.index,
lif->rx_coalesce_hw);
if (!test_bit(IONIC_LIF_F_SPLIT_INTR, lif->state))
- ionic_link_qcq_interrupts(lif->rxqcqs[i].qcq,
- lif->txqcqs[i].qcq);
+ ionic_link_qcq_interrupts(lif->rxqcqs[i],
+ lif->txqcqs[i]);
- lif->rxqcqs[i].qcq->stats = lif->rxqcqs[i].stats;
- ionic_debugfs_add_qcq(lif, lif->rxqcqs[i].qcq);
+ ionic_debugfs_add_qcq(lif, lif->rxqcqs[i]);
}
return 0;
@@ -1611,13 +1683,13 @@ static int ionic_txrx_init(struct ionic_lif *lif)
int err;
for (i = 0; i < lif->nxqs; i++) {
- err = ionic_lif_txq_init(lif, lif->txqcqs[i].qcq);
+ err = ionic_lif_txq_init(lif, lif->txqcqs[i]);
if (err)
goto err_out;
- err = ionic_lif_rxq_init(lif, lif->rxqcqs[i].qcq);
+ err = ionic_lif_rxq_init(lif, lif->rxqcqs[i]);
if (err) {
- ionic_lif_qcq_deinit(lif, lif->txqcqs[i].qcq);
+ ionic_lif_qcq_deinit(lif, lif->txqcqs[i]);
goto err_out;
}
}
@@ -1631,8 +1703,8 @@ static int ionic_txrx_init(struct ionic_lif *lif)
err_out:
while (i--) {
- ionic_lif_qcq_deinit(lif, lif->txqcqs[i].qcq);
- ionic_lif_qcq_deinit(lif, lif->rxqcqs[i].qcq);
+ ionic_lif_qcq_deinit(lif, lif->txqcqs[i]);
+ ionic_lif_qcq_deinit(lif, lif->rxqcqs[i]);
}
return err;
@@ -1643,15 +1715,15 @@ static int ionic_txrx_enable(struct ionic_lif *lif)
int i, err;
for (i = 0; i < lif->nxqs; i++) {
- ionic_rx_fill(&lif->rxqcqs[i].qcq->q);
- err = ionic_qcq_enable(lif->rxqcqs[i].qcq);
+ ionic_rx_fill(&lif->rxqcqs[i]->q);
+ err = ionic_qcq_enable(lif->rxqcqs[i]);
if (err)
goto err_out;
- err = ionic_qcq_enable(lif->txqcqs[i].qcq);
+ err = ionic_qcq_enable(lif->txqcqs[i]);
if (err) {
if (err != -ETIMEDOUT)
- ionic_qcq_disable(lif->rxqcqs[i].qcq);
+ ionic_qcq_disable(lif->rxqcqs[i]);
goto err_out;
}
}
@@ -1660,10 +1732,10 @@ static int ionic_txrx_enable(struct ionic_lif *lif)
err_out:
while (i--) {
- err = ionic_qcq_disable(lif->txqcqs[i].qcq);
+ err = ionic_qcq_disable(lif->txqcqs[i]);
if (err == -ETIMEDOUT)
break;
- err = ionic_qcq_disable(lif->rxqcqs[i].qcq);
+ err = ionic_qcq_disable(lif->rxqcqs[i]);
if (err == -ETIMEDOUT)
break;
}
@@ -1688,7 +1760,7 @@ static int ionic_start_queues(struct ionic_lif *lif)
return 0;
}
-int ionic_open(struct net_device *netdev)
+static int ionic_open(struct net_device *netdev)
{
struct ionic_lif *lif = netdev_priv(netdev);
int err;
@@ -1734,7 +1806,7 @@ static void ionic_stop_queues(struct ionic_lif *lif)
ionic_txrx_disable(lif);
}
-int ionic_stop(struct net_device *netdev)
+static int ionic_stop(struct net_device *netdev)
{
struct ionic_lif *lif = netdev_priv(netdev);
@@ -2016,35 +2088,210 @@ static const struct net_device_ops ionic_netdev_ops = {
.ndo_get_vf_stats = ionic_get_vf_stats,
};
-int ionic_reset_queues(struct ionic_lif *lif, ionic_reset_cb cb, void *arg)
+static void ionic_swap_queues(struct ionic_qcq *a, struct ionic_qcq *b)
{
- bool running;
- int err = 0;
+ /* only swapping the queues, not the napi, flags, or other stuff */
+ swap(a->q.num_descs, b->q.num_descs);
+ swap(a->q.base, b->q.base);
+ swap(a->q.base_pa, b->q.base_pa);
+ swap(a->q.info, b->q.info);
+ swap(a->q_base, b->q_base);
+ swap(a->q_base_pa, b->q_base_pa);
+ swap(a->q_size, b->q_size);
- mutex_lock(&lif->queue_lock);
- running = netif_running(lif->netdev);
- if (running) {
- netif_device_detach(lif->netdev);
- err = ionic_stop(lif->netdev);
+ swap(a->q.sg_base, b->q.sg_base);
+ swap(a->q.sg_base_pa, b->q.sg_base_pa);
+ swap(a->sg_base, b->sg_base);
+ swap(a->sg_base_pa, b->sg_base_pa);
+ swap(a->sg_size, b->sg_size);
+
+ swap(a->cq.num_descs, b->cq.num_descs);
+ swap(a->cq.base, b->cq.base);
+ swap(a->cq.base_pa, b->cq.base_pa);
+ swap(a->cq.info, b->cq.info);
+ swap(a->cq_base, b->cq_base);
+ swap(a->cq_base_pa, b->cq_base_pa);
+ swap(a->cq_size, b->cq_size);
+}
+
+int ionic_reconfigure_queues(struct ionic_lif *lif,
+ struct ionic_queue_params *qparam)
+{
+ struct ionic_qcq **tx_qcqs = NULL;
+ struct ionic_qcq **rx_qcqs = NULL;
+ unsigned int sg_desc_sz;
+ unsigned int flags;
+ int err = -ENOMEM;
+ unsigned int i;
+
+ /* allocate temporary qcq arrays to hold new queue structs */
+ if (qparam->nxqs != lif->nxqs || qparam->ntxq_descs != lif->ntxq_descs) {
+ tx_qcqs = devm_kcalloc(lif->ionic->dev, lif->ionic->ntxqs_per_lif,
+ sizeof(struct ionic_qcq *), GFP_KERNEL);
+ if (!tx_qcqs)
+ goto err_out;
+ }
+ if (qparam->nxqs != lif->nxqs || qparam->nrxq_descs != lif->nrxq_descs) {
+ rx_qcqs = devm_kcalloc(lif->ionic->dev, lif->ionic->nrxqs_per_lif,
+ sizeof(struct ionic_qcq *), GFP_KERNEL);
+ if (!rx_qcqs)
+ goto err_out;
+ }
+
+ /* allocate new desc_info and rings, but leave the interrupt setup
+ * until later so as to not mess with the still-running queues
+ */
+ if (lif->qtype_info[IONIC_QTYPE_TXQ].version >= 1 &&
+ lif->qtype_info[IONIC_QTYPE_TXQ].sg_desc_sz ==
+ sizeof(struct ionic_txq_sg_desc_v1))
+ sg_desc_sz = sizeof(struct ionic_txq_sg_desc_v1);
+ else
+ sg_desc_sz = sizeof(struct ionic_txq_sg_desc);
+
+ if (tx_qcqs) {
+ for (i = 0; i < qparam->nxqs; i++) {
+ flags = lif->txqcqs[i]->flags & ~IONIC_QCQ_F_INTR;
+ err = ionic_qcq_alloc(lif, IONIC_QTYPE_TXQ, i, "tx", flags,
+ qparam->ntxq_descs,
+ sizeof(struct ionic_txq_desc),
+ sizeof(struct ionic_txq_comp),
+ sg_desc_sz,
+ lif->kern_pid, &tx_qcqs[i]);
+ if (err)
+ goto err_out;
+ }
+ }
+
+ if (rx_qcqs) {
+ for (i = 0; i < qparam->nxqs; i++) {
+ flags = lif->rxqcqs[i]->flags & ~IONIC_QCQ_F_INTR;
+ err = ionic_qcq_alloc(lif, IONIC_QTYPE_RXQ, i, "rx", flags,
+ qparam->nrxq_descs,
+ sizeof(struct ionic_rxq_desc),
+ sizeof(struct ionic_rxq_comp),
+ sizeof(struct ionic_rxq_sg_desc),
+ lif->kern_pid, &rx_qcqs[i]);
+ if (err)
+ goto err_out;
+ }
+ }
+
+ /* stop and clean the queues */
+ ionic_stop_queues_reconfig(lif);
+
+ if (qparam->nxqs != lif->nxqs) {
+ err = netif_set_real_num_tx_queues(lif->netdev, qparam->nxqs);
if (err)
- goto reset_out;
+ goto err_out_reinit_unlock;
+ err = netif_set_real_num_rx_queues(lif->netdev, qparam->nxqs);
+ if (err) {
+ netif_set_real_num_tx_queues(lif->netdev, lif->nxqs);
+ goto err_out_reinit_unlock;
+ }
}
- if (cb)
- cb(lif, arg);
-
- if (running) {
- err = ionic_open(lif->netdev);
- netif_device_attach(lif->netdev);
+ /* swap new desc_info and rings, keeping existing interrupt config */
+ if (tx_qcqs) {
+ lif->ntxq_descs = qparam->ntxq_descs;
+ for (i = 0; i < qparam->nxqs; i++)
+ ionic_swap_queues(lif->txqcqs[i], tx_qcqs[i]);
}
-reset_out:
- mutex_unlock(&lif->queue_lock);
+ if (rx_qcqs) {
+ lif->nrxq_descs = qparam->nrxq_descs;
+ for (i = 0; i < qparam->nxqs; i++)
+ ionic_swap_queues(lif->rxqcqs[i], rx_qcqs[i]);
+ }
+
+ /* if we need to change the interrupt layout, this is the time */
+ if (qparam->intr_split != test_bit(IONIC_LIF_F_SPLIT_INTR, lif->state) ||
+ qparam->nxqs != lif->nxqs) {
+ if (qparam->intr_split) {
+ set_bit(IONIC_LIF_F_SPLIT_INTR, lif->state);
+ } else {
+ clear_bit(IONIC_LIF_F_SPLIT_INTR, lif->state);
+ lif->tx_coalesce_usecs = lif->rx_coalesce_usecs;
+ lif->tx_coalesce_hw = lif->rx_coalesce_hw;
+ }
+
+ /* clear existing interrupt assignments */
+ for (i = 0; i < lif->ionic->ntxqs_per_lif; i++) {
+ ionic_qcq_intr_free(lif, lif->txqcqs[i]);
+ ionic_qcq_intr_free(lif, lif->rxqcqs[i]);
+ }
+
+ /* re-assign the interrupts */
+ for (i = 0; i < qparam->nxqs; i++) {
+ lif->rxqcqs[i]->flags |= IONIC_QCQ_F_INTR;
+ err = ionic_alloc_qcq_interrupt(lif, lif->rxqcqs[i]);
+ ionic_intr_coal_init(lif->ionic->idev.intr_ctrl,
+ lif->rxqcqs[i]->intr.index,
+ lif->rx_coalesce_hw);
+
+ if (qparam->intr_split) {
+ lif->txqcqs[i]->flags |= IONIC_QCQ_F_INTR;
+ err = ionic_alloc_qcq_interrupt(lif, lif->txqcqs[i]);
+ ionic_intr_coal_init(lif->ionic->idev.intr_ctrl,
+ lif->txqcqs[i]->intr.index,
+ lif->tx_coalesce_hw);
+ } else {
+ lif->txqcqs[i]->flags &= ~IONIC_QCQ_F_INTR;
+ ionic_link_qcq_interrupts(lif->rxqcqs[i], lif->txqcqs[i]);
+ }
+ }
+ }
+
+ swap(lif->nxqs, qparam->nxqs);
+
+err_out_reinit_unlock:
+ /* re-init the queues, but don't loose an error code */
+ if (err)
+ ionic_start_queues_reconfig(lif);
+ else
+ err = ionic_start_queues_reconfig(lif);
+
+err_out:
+ /* free old allocs without cleaning intr */
+ for (i = 0; i < qparam->nxqs; i++) {
+ if (tx_qcqs && tx_qcqs[i]) {
+ tx_qcqs[i]->flags &= ~IONIC_QCQ_F_INTR;
+ ionic_qcq_free(lif, tx_qcqs[i]);
+ devm_kfree(lif->ionic->dev, tx_qcqs[i]);
+ tx_qcqs[i] = NULL;
+ }
+ if (rx_qcqs && rx_qcqs[i]) {
+ rx_qcqs[i]->flags &= ~IONIC_QCQ_F_INTR;
+ ionic_qcq_free(lif, rx_qcqs[i]);
+ devm_kfree(lif->ionic->dev, rx_qcqs[i]);
+ rx_qcqs[i] = NULL;
+ }
+ }
+
+ /* free q array */
+ if (rx_qcqs) {
+ devm_kfree(lif->ionic->dev, rx_qcqs);
+ rx_qcqs = NULL;
+ }
+ if (tx_qcqs) {
+ devm_kfree(lif->ionic->dev, tx_qcqs);
+ tx_qcqs = NULL;
+ }
+
+ /* clean the unused dma and info allocations when new set is smaller
+ * than the full array, but leave the qcq shells in place
+ */
+ for (i = lif->nxqs; i < lif->ionic->ntxqs_per_lif; i++) {
+ lif->txqcqs[i]->flags &= ~IONIC_QCQ_F_INTR;
+ ionic_qcq_free(lif, lif->txqcqs[i]);
+
+ lif->rxqcqs[i]->flags &= ~IONIC_QCQ_F_INTR;
+ ionic_qcq_free(lif, lif->rxqcqs[i]);
+ }
return err;
}
-static struct ionic_lif *ionic_lif_alloc(struct ionic *ionic, unsigned int index)
+int ionic_lif_alloc(struct ionic *ionic)
{
struct device *dev = ionic->dev;
union ionic_lif_identity *lid;
@@ -2055,7 +2302,7 @@ static struct ionic_lif *ionic_lif_alloc(struct ionic *ionic, unsigned int index
lid = kzalloc(sizeof(*lid), GFP_KERNEL);
if (!lid)
- return ERR_PTR(-ENOMEM);
+ return -ENOMEM;
netdev = alloc_etherdev_mqs(sizeof(*lif),
ionic->ntxqs_per_lif, ionic->ntxqs_per_lif);
@@ -2069,7 +2316,7 @@ static struct ionic_lif *ionic_lif_alloc(struct ionic *ionic, unsigned int index
lif = netdev_priv(netdev);
lif->netdev = netdev;
- ionic->master_lif = lif;
+ ionic->lif = lif;
netdev->netdev_ops = &ionic_netdev_ops;
ionic_ethtool_set_ops(netdev);
@@ -2079,7 +2326,8 @@ static struct ionic_lif *ionic_lif_alloc(struct ionic *ionic, unsigned int index
lif->identity = lid;
lif->lif_type = IONIC_LIF_TYPE_CLASSIC;
ionic_lif_identify(ionic, lif->lif_type, lif->identity);
- lif->netdev->min_mtu = le32_to_cpu(lif->identity->eth.min_frame_size);
+ lif->netdev->min_mtu = max_t(unsigned int, ETH_MIN_MTU,
+ le32_to_cpu(lif->identity->eth.min_frame_size));
lif->netdev->max_mtu =
le32_to_cpu(lif->identity->eth.max_frame_size) - ETH_HLEN - VLAN_HLEN;
@@ -2087,7 +2335,7 @@ static struct ionic_lif *ionic_lif_alloc(struct ionic *ionic, unsigned int index
lif->nxqs = ionic->ntxqs_per_lif;
lif->ionic = ionic;
- lif->index = index;
+ lif->index = 0;
lif->ntxq_descs = IONIC_DEF_TXRX_DESC;
lif->nrxq_descs = IONIC_DEF_TXRX_DESC;
lif->tx_budget = IONIC_TX_BUDGET_DEFAULT;
@@ -2099,7 +2347,7 @@ static struct ionic_lif *ionic_lif_alloc(struct ionic *ionic, unsigned int index
lif->tx_coalesce_usecs = lif->rx_coalesce_usecs;
lif->tx_coalesce_hw = lif->rx_coalesce_hw;
- snprintf(lif->name, sizeof(lif->name), "lif%u", index);
+ snprintf(lif->name, sizeof(lif->name), "lif%u", lif->index);
spin_lock_init(&lif->adminq_lock);
@@ -2119,7 +2367,8 @@ static struct ionic_lif *ionic_lif_alloc(struct ionic *ionic, unsigned int index
ionic_debugfs_add_lif(lif);
- /* allocate queues */
+ /* allocate control queues and txrx queue arrays */
+ ionic_lif_queue_identify(lif);
err = ionic_qcqs_alloc(lif);
if (err)
goto err_out_free_lif_info;
@@ -2138,9 +2387,7 @@ static struct ionic_lif *ionic_lif_alloc(struct ionic *ionic, unsigned int index
}
netdev_rss_key_fill(lif->rss_hash_key, IONIC_RSS_HASH_KEY_SIZE);
- list_add_tail(&lif->list, &ionic->lifs);
-
- return lif;
+ return 0;
err_out_free_qcqs:
ionic_qcqs_free(lif);
@@ -2154,27 +2401,7 @@ static struct ionic_lif *ionic_lif_alloc(struct ionic *ionic, unsigned int index
err_out_free_lid:
kfree(lid);
- return ERR_PTR(err);
-}
-
-int ionic_lifs_alloc(struct ionic *ionic)
-{
- struct ionic_lif *lif;
-
- INIT_LIST_HEAD(&ionic->lifs);
-
- /* only build the first lif, others are for later features */
- set_bit(0, ionic->lifbits);
-
- lif = ionic_lif_alloc(ionic, 0);
- if (IS_ERR_OR_NULL(lif)) {
- clear_bit(0, ionic->lifbits);
- return -ENOMEM;
- }
-
- ionic_lif_queue_identify(lif);
-
- return 0;
+ return err;
}
static void ionic_lif_reset(struct ionic_lif *lif)
@@ -2209,7 +2436,7 @@ static void ionic_lif_handle_fw_down(struct ionic_lif *lif)
ionic_txrx_deinit(lif);
ionic_txrx_free(lif);
}
- ionic_lifs_deinit(ionic);
+ ionic_lif_deinit(lif);
ionic_reset(ionic);
ionic_qcqs_free(lif);
@@ -2232,7 +2459,7 @@ static void ionic_lif_handle_fw_up(struct ionic_lif *lif)
if (err)
goto err_out;
- err = ionic_lifs_init(ionic);
+ err = ionic_lif_init(lif);
if (err)
goto err_qcqs_free;
@@ -2261,14 +2488,14 @@ static void ionic_lif_handle_fw_up(struct ionic_lif *lif)
err_txrx_free:
ionic_txrx_free(lif);
err_lifs_deinit:
- ionic_lifs_deinit(ionic);
+ ionic_lif_deinit(lif);
err_qcqs_free:
ionic_qcqs_free(lif);
err_out:
dev_err(ionic->dev, "FW Up: LIFs restart failed - err %d\n", err);
}
-static void ionic_lif_free(struct ionic_lif *lif)
+void ionic_lif_free(struct ionic_lif *lif)
{
struct device *dev = lif->ionic->dev;
@@ -2297,23 +2524,10 @@ static void ionic_lif_free(struct ionic_lif *lif)
/* free netdev & lif */
ionic_debugfs_del_lif(lif);
- list_del(&lif->list);
free_netdev(lif->netdev);
}
-void ionic_lifs_free(struct ionic *ionic)
-{
- struct list_head *cur, *tmp;
- struct ionic_lif *lif;
-
- list_for_each_safe(cur, tmp, &ionic->lifs) {
- lif = list_entry(cur, struct ionic_lif, list);
-
- ionic_lif_free(lif);
- }
-}
-
-static void ionic_lif_deinit(struct ionic_lif *lif)
+void ionic_lif_deinit(struct ionic_lif *lif)
{
if (!test_and_clear_bit(IONIC_LIF_F_INITED, lif->state))
return;
@@ -2334,17 +2548,6 @@ static void ionic_lif_deinit(struct ionic_lif *lif)
ionic_lif_reset(lif);
}
-void ionic_lifs_deinit(struct ionic *ionic)
-{
- struct list_head *cur, *tmp;
- struct ionic_lif *lif;
-
- list_for_each_safe(cur, tmp, &ionic->lifs) {
- lif = list_entry(cur, struct ionic_lif, list);
- ionic_lif_deinit(lif);
- }
-}
-
static int ionic_lif_adminq_init(struct ionic_lif *lif)
{
struct device *dev = lif->ionic->dev;
@@ -2490,7 +2693,7 @@ static int ionic_station_set(struct ionic_lif *lif)
return 0;
}
-static int ionic_lif_init(struct ionic_lif *lif)
+int ionic_lif_init(struct ionic_lif *lif)
{
struct ionic_dev *idev = &lif->ionic->idev;
struct device *dev = lif->ionic->dev;
@@ -2580,22 +2783,6 @@ static int ionic_lif_init(struct ionic_lif *lif)
return err;
}
-int ionic_lifs_init(struct ionic *ionic)
-{
- struct list_head *cur, *tmp;
- struct ionic_lif *lif;
- int err;
-
- list_for_each_safe(cur, tmp, &ionic->lifs) {
- lif = list_entry(cur, struct ionic_lif, list);
- err = ionic_lif_init(lif);
- if (err)
- return err;
- }
-
- return 0;
-}
-
static void ionic_lif_notify_work(struct work_struct *ws)
{
}
@@ -2644,45 +2831,41 @@ static int ionic_lif_notify(struct notifier_block *nb,
return NOTIFY_DONE;
}
-int ionic_lifs_register(struct ionic *ionic)
+int ionic_lif_register(struct ionic_lif *lif)
{
int err;
- INIT_WORK(&ionic->nb_work, ionic_lif_notify_work);
+ INIT_WORK(&lif->ionic->nb_work, ionic_lif_notify_work);
- ionic->nb.notifier_call = ionic_lif_notify;
+ lif->ionic->nb.notifier_call = ionic_lif_notify;
- err = register_netdevice_notifier(&ionic->nb);
+ err = register_netdevice_notifier(&lif->ionic->nb);
if (err)
- ionic->nb.notifier_call = NULL;
+ lif->ionic->nb.notifier_call = NULL;
/* only register LIF0 for now */
- err = register_netdev(ionic->master_lif->netdev);
+ err = register_netdev(lif->netdev);
if (err) {
- dev_err(ionic->dev, "Cannot register net device, aborting\n");
+ dev_err(lif->ionic->dev, "Cannot register net device, aborting\n");
return err;
}
- ionic->master_lif->registered = true;
- ionic_lif_set_netdev_info(ionic->master_lif);
+ lif->registered = true;
+ ionic_lif_set_netdev_info(lif);
return 0;
}
-void ionic_lifs_unregister(struct ionic *ionic)
+void ionic_lif_unregister(struct ionic_lif *lif)
{
- if (ionic->nb.notifier_call) {
- unregister_netdevice_notifier(&ionic->nb);
- cancel_work_sync(&ionic->nb_work);
- ionic->nb.notifier_call = NULL;
+ if (lif->ionic->nb.notifier_call) {
+ unregister_netdevice_notifier(&lif->ionic->nb);
+ cancel_work_sync(&lif->ionic->nb_work);
+ lif->ionic->nb.notifier_call = NULL;
}
- /* There is only one lif ever registered in the
- * current model, so don't bother searching the
- * ionic->lif for candidates to unregister
- */
- if (ionic->master_lif &&
- ionic->master_lif->netdev->reg_state == NETREG_REGISTERED)
- unregister_netdev(ionic->master_lif->netdev);
+ if (lif->netdev->reg_state == NETREG_REGISTERED)
+ unregister_netdev(lif->netdev);
+ lif->registered = false;
}
static void ionic_lif_queue_identify(struct ionic_lif *lif)
@@ -2801,7 +2984,7 @@ int ionic_lif_identify(struct ionic *ionic, u8 lif_type,
return 0;
}
-int ionic_lifs_size(struct ionic *ionic)
+int ionic_lif_size(struct ionic *ionic)
{
struct ionic_identity *ident = &ionic->ident;
unsigned int nintrs, dev_nintrs;
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.h b/drivers/net/ethernet/pensando/ionic/ionic_lif.h
index 1ee3b14..11ea9e0 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_lif.h
+++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.h
@@ -16,32 +16,32 @@
#define IONIC_TX_BUDGET_DEFAULT 256
struct ionic_tx_stats {
- u64 dma_map_err;
u64 pkts;
u64 bytes;
- u64 clean;
- u64 linearize;
u64 csum_none;
u64 csum;
- u64 crc32_csum;
u64 tso;
u64 tso_bytes;
u64 frags;
u64 vlan_inserted;
+ u64 clean;
+ u64 linearize;
+ u64 crc32_csum;
u64 sg_cntr[IONIC_MAX_NUM_SG_CNTR];
+ u64 dma_map_err;
};
struct ionic_rx_stats {
- u64 dma_map_err;
- u64 alloc_err;
u64 pkts;
u64 bytes;
u64 csum_none;
u64 csum_complete;
- u64 csum_error;
u64 buffers_posted;
u64 dropped;
u64 vlan_stripped;
+ u64 csum_error;
+ u64 dma_map_err;
+ u64 alloc_err;
};
#define IONIC_QCQ_F_INITED BIT(0)
@@ -56,35 +56,28 @@ struct ionic_napi_stats {
u64 work_done_cntr[IONIC_MAX_NUM_NAPI_CNTR];
};
-struct ionic_q_stats {
- union {
- struct ionic_tx_stats tx;
- struct ionic_rx_stats rx;
- };
-};
-
struct ionic_qcq {
- void *base;
- dma_addr_t base_pa;
- unsigned int total_size;
+ void *q_base;
+ dma_addr_t q_base_pa;
+ u32 q_size;
+ void *cq_base;
+ dma_addr_t cq_base_pa;
+ u32 cq_size;
+ void *sg_base;
+ dma_addr_t sg_base_pa;
+ u32 sg_size;
struct ionic_queue q;
struct ionic_cq cq;
struct ionic_intr_info intr;
struct napi_struct napi;
struct ionic_napi_stats napi_stats;
- struct ionic_q_stats *stats;
unsigned int flags;
struct dentry *dentry;
};
-struct ionic_qcqst {
- struct ionic_qcq *qcq;
- struct ionic_q_stats *stats;
-};
-
#define q_to_qcq(q) container_of(q, struct ionic_qcq, q)
-#define q_to_tx_stats(q) (&q_to_qcq(q)->stats->tx)
-#define q_to_rx_stats(q) (&q_to_qcq(q)->stats->rx)
+#define q_to_tx_stats(q) (&(q)->lif->txqstats[(q)->index])
+#define q_to_rx_stats(q) (&(q)->lif->rxqstats[(q)->index])
#define napi_to_qcq(napi) container_of(napi, struct ionic_qcq, napi)
#define napi_to_cq(napi) (&napi_to_qcq(napi)->cq)
@@ -170,8 +163,10 @@ struct ionic_lif {
spinlock_t adminq_lock; /* lock for AdminQ operations */
struct ionic_qcq *adminqcq;
struct ionic_qcq *notifyqcq;
- struct ionic_qcqst *txqcqs;
- struct ionic_qcqst *rxqcqs;
+ struct ionic_qcq **txqcqs;
+ struct ionic_tx_stats *txqstats;
+ struct ionic_qcq **rxqcqs;
+ struct ionic_rx_stats *rxqstats;
u64 last_eid;
unsigned int neqs;
unsigned int nxqs;
@@ -212,12 +207,21 @@ struct ionic_lif {
struct work_struct tx_timeout_work;
};
-#define lif_to_txqcq(lif, i) ((lif)->txqcqs[i].qcq)
-#define lif_to_rxqcq(lif, i) ((lif)->rxqcqs[i].qcq)
-#define lif_to_txstats(lif, i) ((lif)->txqcqs[i].stats->tx)
-#define lif_to_rxstats(lif, i) ((lif)->rxqcqs[i].stats->rx)
-#define lif_to_txq(lif, i) (&lif_to_txqcq((lif), i)->q)
-#define lif_to_rxq(lif, i) (&lif_to_txqcq((lif), i)->q)
+struct ionic_queue_params {
+ unsigned int nxqs;
+ unsigned int ntxq_descs;
+ unsigned int nrxq_descs;
+ unsigned int intr_split;
+};
+
+static inline void ionic_init_queue_params(struct ionic_lif *lif,
+ struct ionic_queue_params *qparam)
+{
+ qparam->nxqs = lif->nxqs;
+ qparam->ntxq_descs = lif->ntxq_descs;
+ qparam->nrxq_descs = lif->nrxq_descs;
+ qparam->intr_split = test_bit(IONIC_LIF_F_SPLIT_INTR, lif->state);
+}
static inline u32 ionic_coal_usec_to_hw(struct ionic *ionic, u32 usecs)
{
@@ -242,34 +246,33 @@ void ionic_get_stats64(struct net_device *netdev,
struct rtnl_link_stats64 *ns);
void ionic_lif_deferred_enqueue(struct ionic_deferred *def,
struct ionic_deferred_work *work);
-int ionic_lifs_alloc(struct ionic *ionic);
-void ionic_lifs_free(struct ionic *ionic);
-void ionic_lifs_deinit(struct ionic *ionic);
-int ionic_lifs_init(struct ionic *ionic);
-int ionic_lifs_register(struct ionic *ionic);
-void ionic_lifs_unregister(struct ionic *ionic);
+int ionic_lif_alloc(struct ionic *ionic);
+int ionic_lif_init(struct ionic_lif *lif);
+void ionic_lif_free(struct ionic_lif *lif);
+void ionic_lif_deinit(struct ionic_lif *lif);
+int ionic_lif_register(struct ionic_lif *lif);
+void ionic_lif_unregister(struct ionic_lif *lif);
int ionic_lif_identify(struct ionic *ionic, u8 lif_type,
union ionic_lif_identity *lif_ident);
-int ionic_lifs_size(struct ionic *ionic);
+int ionic_lif_size(struct ionic *ionic);
int ionic_lif_rss_config(struct ionic_lif *lif, u16 types,
const u8 *key, const u32 *indir);
+int ionic_reconfigure_queues(struct ionic_lif *lif,
+ struct ionic_queue_params *qparam);
-int ionic_open(struct net_device *netdev);
-int ionic_stop(struct net_device *netdev);
-int ionic_reset_queues(struct ionic_lif *lif, ionic_reset_cb cb, void *arg);
-
-static inline void debug_stats_txq_post(struct ionic_qcq *qcq,
- struct ionic_txq_desc *desc, bool dbell)
+static inline void debug_stats_txq_post(struct ionic_queue *q, bool dbell)
{
- u8 num_sg_elems = ((le64_to_cpu(desc->cmd) >> IONIC_TXQ_DESC_NSGE_SHIFT)
+ struct ionic_txq_desc *desc = &q->txq[q->head_idx];
+ u8 num_sg_elems;
+
+ q->dbell_count += dbell;
+
+ num_sg_elems = ((le64_to_cpu(desc->cmd) >> IONIC_TXQ_DESC_NSGE_SHIFT)
& IONIC_TXQ_DESC_NSGE_MASK);
-
- qcq->q.dbell_count += dbell;
-
if (num_sg_elems > (IONIC_MAX_NUM_SG_CNTR - 1))
num_sg_elems = IONIC_MAX_NUM_SG_CNTR - 1;
- qcq->stats->tx.sg_cntr[num_sg_elems]++;
+ q->lif->txqstats[q->index].sg_cntr[num_sg_elems]++;
}
static inline void debug_stats_napi_poll(struct ionic_qcq *qcq,
@@ -284,10 +287,9 @@ static inline void debug_stats_napi_poll(struct ionic_qcq *qcq,
}
#define DEBUG_STATS_CQE_CNT(cq) ((cq)->compl_count++)
-#define DEBUG_STATS_RX_BUFF_CNT(qcq) ((qcq)->stats->rx.buffers_posted++)
+#define DEBUG_STATS_RX_BUFF_CNT(q) ((q)->lif->rxqstats[q->index].buffers_posted++)
#define DEBUG_STATS_INTR_REARM(intr) ((intr)->rearm_count++)
-#define DEBUG_STATS_TXQ_POST(qcq, txdesc, dbell) \
- debug_stats_txq_post(qcq, txdesc, dbell)
+#define DEBUG_STATS_TXQ_POST(q, dbell) debug_stats_txq_post(q, dbell)
#define DEBUG_STATS_NAPI_POLL(qcq, work_done) \
debug_stats_napi_poll(qcq, work_done)
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_main.c b/drivers/net/ethernet/pensando/ionic/ionic_main.c
index df5b9bc..cfb90bf 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_main.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_main.c
@@ -181,15 +181,17 @@ static const char *ionic_opcode_to_str(enum ionic_cmd_opcode opcode)
static void ionic_adminq_flush(struct ionic_lif *lif)
{
- struct ionic_queue *adminq = &lif->adminqcq->q;
+ struct ionic_queue *q = &lif->adminqcq->q;
+ struct ionic_desc_info *desc_info;
spin_lock(&lif->adminq_lock);
- while (adminq->tail != adminq->head) {
- memset(adminq->tail->desc, 0, sizeof(union ionic_adminq_cmd));
- adminq->tail->cb = NULL;
- adminq->tail->cb_arg = NULL;
- adminq->tail = adminq->tail->next;
+ while (q->tail_idx != q->head_idx) {
+ desc_info = &q->info[q->tail_idx];
+ memset(desc_info->desc, 0, sizeof(union ionic_adminq_cmd));
+ desc_info->cb = NULL;
+ desc_info->cb_arg = NULL;
+ q->tail_idx = (q->tail_idx + 1) & (q->num_descs - 1);
}
spin_unlock(&lif->adminq_lock);
}
@@ -245,7 +247,8 @@ static void ionic_adminq_cb(struct ionic_queue *q,
static int ionic_adminq_post(struct ionic_lif *lif, struct ionic_admin_ctx *ctx)
{
- struct ionic_queue *adminq;
+ struct ionic_desc_info *desc_info;
+ struct ionic_queue *q;
int err = 0;
WARN_ON(in_interrupt());
@@ -253,10 +256,10 @@ static int ionic_adminq_post(struct ionic_lif *lif, struct ionic_admin_ctx *ctx)
if (!lif->adminqcq)
return -EIO;
- adminq = &lif->adminqcq->q;
+ q = &lif->adminqcq->q;
spin_lock(&lif->adminq_lock);
- if (!ionic_q_has_space(adminq, 1)) {
+ if (!ionic_q_has_space(q, 1)) {
err = -ENOSPC;
goto err_out;
}
@@ -265,13 +268,14 @@ static int ionic_adminq_post(struct ionic_lif *lif, struct ionic_admin_ctx *ctx)
if (err)
goto err_out;
- memcpy(adminq->head->desc, &ctx->cmd, sizeof(ctx->cmd));
+ desc_info = &q->info[q->head_idx];
+ memcpy(desc_info->desc, &ctx->cmd, sizeof(ctx->cmd));
dev_dbg(&lif->netdev->dev, "post admin queue command:\n");
dynamic_hex_dump("cmd ", DUMP_PREFIX_OFFSET, 16, 1,
&ctx->cmd, sizeof(ctx->cmd), true);
- ionic_q_post(adminq, true, ionic_adminq_cb, ctx);
+ ionic_q_post(q, true, ionic_adminq_cb, ctx);
err_out:
spin_unlock(&lif->adminq_lock);
@@ -301,32 +305,6 @@ int ionic_adminq_post_wait(struct ionic_lif *lif, struct ionic_admin_ctx *ctx)
return ionic_adminq_check_err(lif, ctx, (remaining == 0));
}
-int ionic_napi(struct napi_struct *napi, int budget, ionic_cq_cb cb,
- ionic_cq_done_cb done_cb, void *done_arg)
-{
- struct ionic_qcq *qcq = napi_to_qcq(napi);
- struct ionic_cq *cq = &qcq->cq;
- u32 work_done, flags = 0;
-
- work_done = ionic_cq_service(cq, budget, cb, done_cb, done_arg);
-
- if (work_done < budget && napi_complete_done(napi, work_done)) {
- flags |= IONIC_INTR_CRED_UNMASK;
- DEBUG_STATS_INTR_REARM(cq->bound_intr);
- }
-
- if (work_done || flags) {
- flags |= IONIC_INTR_CRED_RESET_COALESCE;
- ionic_intr_credits(cq->lif->ionic->idev.intr_ctrl,
- cq->bound_intr->index,
- work_done, flags);
- }
-
- DEBUG_STATS_NAPI_POLL(qcq, work_done);
-
- return work_done;
-}
-
static void ionic_dev_cmd_clean(struct ionic *ionic)
{
union ionic_dev_cmd_regs *regs = ionic->idev.dev_cmd_regs;
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_stats.c b/drivers/net/ethernet/pensando/ionic/ionic_stats.c
index 2a1885da..ff20a2a 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_stats.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_stats.c
@@ -179,36 +179,28 @@ static const struct ionic_stat_desc ionic_dbg_napi_stats_desc[] = {
static void ionic_get_lif_stats(struct ionic_lif *lif,
struct ionic_lif_sw_stats *stats)
{
- struct ionic_tx_stats *tstats;
- struct ionic_rx_stats *rstats;
+ struct ionic_tx_stats *txstats;
+ struct ionic_rx_stats *rxstats;
struct rtnl_link_stats64 ns;
- struct ionic_qcq *txqcq;
- struct ionic_qcq *rxqcq;
int q_num;
memset(stats, 0, sizeof(*stats));
for (q_num = 0; q_num < MAX_Q(lif); q_num++) {
- txqcq = lif_to_txqcq(lif, q_num);
- if (txqcq && txqcq->stats) {
- tstats = &txqcq->stats->tx;
- stats->tx_packets += tstats->pkts;
- stats->tx_bytes += tstats->bytes;
- stats->tx_tso += tstats->tso;
- stats->tx_tso_bytes += tstats->tso_bytes;
- stats->tx_csum_none += tstats->csum_none;
- stats->tx_csum += tstats->csum;
- }
+ txstats = &lif->txqstats[q_num];
+ stats->tx_packets += txstats->pkts;
+ stats->tx_bytes += txstats->bytes;
+ stats->tx_tso += txstats->tso;
+ stats->tx_tso_bytes += txstats->tso_bytes;
+ stats->tx_csum_none += txstats->csum_none;
+ stats->tx_csum += txstats->csum;
- rxqcq = lif_to_rxqcq(lif, q_num);
- if (rxqcq && rxqcq->stats) {
- rstats = &rxqcq->stats->rx;
- stats->rx_packets += rstats->pkts;
- stats->rx_bytes += rstats->bytes;
- stats->rx_csum_none += rstats->csum_none;
- stats->rx_csum_complete += rstats->csum_complete;
- stats->rx_csum_error += rstats->csum_error;
- }
+ rxstats = &lif->rxqstats[q_num];
+ stats->rx_packets += rxstats->pkts;
+ stats->rx_bytes += rxstats->bytes;
+ stats->rx_csum_none += rxstats->csum_none;
+ stats->rx_csum_complete += rxstats->csum_complete;
+ stats->rx_csum_error += rxstats->csum_error;
}
ionic_get_stats64(lif->netdev, &ns);
@@ -371,7 +363,7 @@ static void ionic_sw_stats_get_values(struct ionic_lif *lif, u64 **buf)
}
for (q_num = 0; q_num < MAX_Q(lif); q_num++) {
- txstats = &lif_to_txstats(lif, q_num);
+ txstats = &lif->txqstats[q_num];
for (i = 0; i < IONIC_NUM_TX_STATS; i++) {
**buf = IONIC_READ_STAT64(txstats,
@@ -381,7 +373,7 @@ static void ionic_sw_stats_get_values(struct ionic_lif *lif, u64 **buf)
if (test_bit(IONIC_LIF_F_UP, lif->state) &&
test_bit(IONIC_LIF_F_SW_DEBUG_STATS, lif->state)) {
- txqcq = lif_to_txqcq(lif, q_num);
+ txqcq = lif->txqcqs[q_num];
for (i = 0; i < IONIC_NUM_TX_Q_STATS; i++) {
**buf = IONIC_READ_STAT64(&txqcq->q,
&ionic_txq_stats_desc[i]);
@@ -405,7 +397,7 @@ static void ionic_sw_stats_get_values(struct ionic_lif *lif, u64 **buf)
}
for (q_num = 0; q_num < MAX_Q(lif); q_num++) {
- rxstats = &lif_to_rxstats(lif, q_num);
+ rxstats = &lif->rxqstats[q_num];
for (i = 0; i < IONIC_NUM_RX_STATS; i++) {
**buf = IONIC_READ_STAT64(rxstats,
@@ -415,7 +407,7 @@ static void ionic_sw_stats_get_values(struct ionic_lif *lif, u64 **buf)
if (test_bit(IONIC_LIF_F_UP, lif->state) &&
test_bit(IONIC_LIF_F_SW_DEBUG_STATS, lif->state)) {
- rxqcq = lif_to_rxqcq(lif, q_num);
+ rxqcq = lif->rxqcqs[q_num];
for (i = 0; i < IONIC_NUM_DBG_CQ_STATS; i++) {
**buf = IONIC_READ_STAT64(&rxqcq->cq,
&ionic_dbg_cq_stats_desc[i]);
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
index def65fe..7225251 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
@@ -22,7 +22,7 @@ static bool ionic_tx_service(struct ionic_cq *cq, struct ionic_cq_info *cq_info)
static inline void ionic_txq_post(struct ionic_queue *q, bool ring_dbell,
ionic_desc_cb cb_func, void *cb_arg)
{
- DEBUG_STATS_TXQ_POST(q_to_qcq(q), q->head->desc, ring_dbell);
+ DEBUG_STATS_TXQ_POST(q, ring_dbell);
ionic_q_post(q, ring_dbell, cb_func, cb_arg);
}
@@ -32,7 +32,7 @@ static inline void ionic_rxq_post(struct ionic_queue *q, bool ring_dbell,
{
ionic_q_post(q, ring_dbell, cb_func, cb_arg);
- DEBUG_STATS_RX_BUFF_CNT(q_to_qcq(q));
+ DEBUG_STATS_RX_BUFF_CNT(q);
}
static inline struct netdev_queue *q_to_ndq(struct ionic_queue *q)
@@ -49,7 +49,7 @@ static struct sk_buff *ionic_rx_skb_alloc(struct ionic_queue *q,
struct sk_buff *skb;
netdev = lif->netdev;
- stats = q_to_rx_stats(q);
+ stats = &q->lif->rxqstats[q->index];
if (frags)
skb = napi_get_frags(&q_to_qcq(q)->napi);
@@ -235,14 +235,14 @@ static bool ionic_rx_service(struct ionic_cq *cq, struct ionic_cq_info *cq_info)
return false;
/* check for empty queue */
- if (q->tail->index == q->head->index)
+ if (q->tail_idx == q->head_idx)
return false;
- desc_info = q->tail;
- if (desc_info->index != le16_to_cpu(comp->comp_index))
+ if (q->tail_idx != le16_to_cpu(comp->comp_index))
return false;
- q->tail = desc_info->next;
+ desc_info = &q->info[q->tail_idx];
+ q->tail_idx = (q->tail_idx + 1) & (q->num_descs - 1);
/* clean the related q entry, only one per qc completion */
ionic_rx_clean(q, desc_info, cq_info, desc_info->cb_arg);
@@ -266,40 +266,49 @@ void ionic_rx_flush(struct ionic_cq *cq)
work_done, IONIC_INTR_CRED_RESET_COALESCE);
}
-static struct page *ionic_rx_page_alloc(struct ionic_queue *q,
- dma_addr_t *dma_addr)
+static int ionic_rx_page_alloc(struct ionic_queue *q,
+ struct ionic_page_info *page_info)
{
struct ionic_lif *lif = q->lif;
struct ionic_rx_stats *stats;
struct net_device *netdev;
struct device *dev;
- struct page *page;
netdev = lif->netdev;
dev = lif->ionic->dev;
stats = q_to_rx_stats(q);
- page = alloc_page(GFP_ATOMIC);
- if (unlikely(!page)) {
- net_err_ratelimited("%s: Page alloc failed on %s!\n",
+
+ if (unlikely(!page_info)) {
+ net_err_ratelimited("%s: %s invalid page_info in alloc\n",
+ netdev->name, q->name);
+ return -EINVAL;
+ }
+
+ page_info->page = dev_alloc_page();
+ if (unlikely(!page_info->page)) {
+ net_err_ratelimited("%s: %s page alloc failed\n",
netdev->name, q->name);
stats->alloc_err++;
- return NULL;
+ return -ENOMEM;
}
- *dma_addr = dma_map_page(dev, page, 0, PAGE_SIZE, DMA_FROM_DEVICE);
- if (unlikely(dma_mapping_error(dev, *dma_addr))) {
- __free_page(page);
- net_err_ratelimited("%s: DMA single map failed on %s!\n",
+ page_info->dma_addr = dma_map_page(dev, page_info->page, 0, PAGE_SIZE,
+ DMA_FROM_DEVICE);
+ if (unlikely(dma_mapping_error(dev, page_info->dma_addr))) {
+ put_page(page_info->page);
+ page_info->dma_addr = 0;
+ page_info->page = NULL;
+ net_err_ratelimited("%s: %s dma map failed\n",
netdev->name, q->name);
stats->dma_map_err++;
- return NULL;
+ return -EIO;
}
- return page;
+ return 0;
}
-static void ionic_rx_page_free(struct ionic_queue *q, struct page *page,
- dma_addr_t dma_addr)
+static void ionic_rx_page_free(struct ionic_queue *q,
+ struct ionic_page_info *page_info)
{
struct ionic_lif *lif = q->lif;
struct net_device *netdev;
@@ -308,15 +317,23 @@ static void ionic_rx_page_free(struct ionic_queue *q, struct page *page,
netdev = lif->netdev;
dev = lif->ionic->dev;
- if (unlikely(!page)) {
- net_err_ratelimited("%s: Trying to free unallocated buffer on %s!\n",
+ if (unlikely(!page_info)) {
+ net_err_ratelimited("%s: %s invalid page_info in free\n",
netdev->name, q->name);
return;
}
- dma_unmap_page(dev, dma_addr, PAGE_SIZE, DMA_FROM_DEVICE);
+ if (unlikely(!page_info->page)) {
+ net_err_ratelimited("%s: %s invalid page in free\n",
+ netdev->name, q->name);
+ return;
+ }
- __free_page(page);
+ dma_unmap_page(dev, page_info->dma_addr, PAGE_SIZE, DMA_FROM_DEVICE);
+
+ put_page(page_info->page);
+ page_info->dma_addr = 0;
+ page_info->page = NULL;
}
void ionic_rx_fill(struct ionic_queue *q)
@@ -338,7 +355,7 @@ void ionic_rx_fill(struct ionic_queue *q)
for (i = ionic_q_space_avail(q); i; i--) {
remain_len = len;
- desc_info = q->head;
+ desc_info = &q->info[q->head_idx];
desc = desc_info->desc;
sg_desc = desc_info->sg_desc;
page_info = &desc_info->pages[0];
@@ -352,8 +369,7 @@ void ionic_rx_fill(struct ionic_queue *q)
desc->opcode = (nfrags > 1) ? IONIC_RXQ_DESC_OPCODE_SG :
IONIC_RXQ_DESC_OPCODE_SIMPLE;
desc_info->npages = nfrags;
- page_info->page = ionic_rx_page_alloc(q, &page_info->dma_addr);
- if (unlikely(!page_info->page)) {
+ if (unlikely(ionic_rx_page_alloc(q, page_info))) {
desc->addr = 0;
desc->len = 0;
return;
@@ -370,8 +386,7 @@ void ionic_rx_fill(struct ionic_queue *q)
continue;
sg_elem = &sg_desc->elems[j];
- page_info->page = ionic_rx_page_alloc(q, &page_info->dma_addr);
- if (unlikely(!page_info->page)) {
+ if (unlikely(ionic_rx_page_alloc(q, page_info))) {
sg_elem->addr = 0;
sg_elem->len = 0;
return;
@@ -387,7 +402,7 @@ void ionic_rx_fill(struct ionic_queue *q)
}
ionic_dbell_ring(q->lif->kern_dbpage, q->hw_type,
- q->dbval | q->head->index);
+ q->dbval | q->head_idx);
}
static void ionic_rx_fill_cb(void *arg)
@@ -397,25 +412,23 @@ static void ionic_rx_fill_cb(void *arg)
void ionic_rx_empty(struct ionic_queue *q)
{
- struct ionic_desc_info *cur;
+ struct ionic_desc_info *desc_info;
struct ionic_rxq_desc *desc;
unsigned int i;
+ u16 idx;
- for (cur = q->tail; cur != q->head; cur = cur->next) {
- desc = cur->desc;
+ idx = q->tail_idx;
+ while (idx != q->head_idx) {
+ desc_info = &q->info[idx];
+ desc = desc_info->desc;
desc->addr = 0;
desc->len = 0;
- for (i = 0; i < cur->npages; i++) {
- if (likely(cur->pages[i].page)) {
- ionic_rx_page_free(q, cur->pages[i].page,
- cur->pages[i].dma_addr);
- cur->pages[i].page = NULL;
- cur->pages[i].dma_addr = 0;
- }
- }
+ for (i = 0; i < desc_info->npages; i++)
+ ionic_rx_page_free(q, &desc_info->pages[i]);
- cur->cb_arg = NULL;
+ desc_info->cb_arg = NULL;
+ idx = (idx + 1) & (q->num_descs - 1);
}
}
@@ -500,7 +513,7 @@ int ionic_txrx_napi(struct napi_struct *napi, int budget)
lif = rxcq->bound_q->lif;
idev = &lif->ionic->idev;
- txcq = &lif->txqcqs[qi].qcq->cq;
+ txcq = &lif->txqcqs[qi]->cq;
tx_work_done = ionic_cq_service(txcq, lif->tx_budget,
ionic_tx_service, NULL, NULL);
@@ -615,6 +628,7 @@ static bool ionic_tx_service(struct ionic_cq *cq, struct ionic_cq_info *cq_info)
struct ionic_txq_comp *comp = cq_info->cq_desc;
struct ionic_queue *q = cq->bound_q;
struct ionic_desc_info *desc_info;
+ u16 index;
if (!color_match(comp->color, cq->done_color))
return false;
@@ -623,12 +637,13 @@ static bool ionic_tx_service(struct ionic_cq *cq, struct ionic_cq_info *cq_info)
* several q entries completed for each cq completion
*/
do {
- desc_info = q->tail;
- q->tail = desc_info->next;
- ionic_tx_clean(q, desc_info, cq->tail, desc_info->cb_arg);
+ desc_info = &q->info[q->tail_idx];
+ index = q->tail_idx;
+ q->tail_idx = (q->tail_idx + 1) & (q->num_descs - 1);
+ ionic_tx_clean(q, desc_info, cq_info, desc_info->cb_arg);
desc_info->cb = NULL;
desc_info->cb_arg = NULL;
- } while (desc_info->index != le16_to_cpu(comp->comp_index));
+ } while (index != le16_to_cpu(comp->comp_index));
return true;
}
@@ -648,16 +663,14 @@ void ionic_tx_flush(struct ionic_cq *cq)
void ionic_tx_empty(struct ionic_queue *q)
{
struct ionic_desc_info *desc_info;
- int done = 0;
/* walk the not completed tx entries, if any */
- while (q->head != q->tail) {
- desc_info = q->tail;
- q->tail = desc_info->next;
+ while (q->head_idx != q->tail_idx) {
+ desc_info = &q->info[q->tail_idx];
+ q->tail_idx = (q->tail_idx + 1) & (q->num_descs - 1);
ionic_tx_clean(q, desc_info, NULL, desc_info->cb_arg);
desc_info->cb = NULL;
desc_info->cb_arg = NULL;
- done++;
}
}
@@ -741,8 +754,8 @@ static void ionic_tx_tso_post(struct ionic_queue *q, struct ionic_txq_desc *desc
static struct ionic_txq_desc *ionic_tx_tso_next(struct ionic_queue *q,
struct ionic_txq_sg_elem **elem)
{
- struct ionic_txq_sg_desc *sg_desc = q->head->sg_desc;
- struct ionic_txq_desc *desc = q->head->desc;
+ struct ionic_txq_sg_desc *sg_desc = q->info[q->head_idx].txq_sg_desc;
+ struct ionic_txq_desc *desc = q->info[q->head_idx].txq_desc;
*elem = sg_desc->elems;
return desc;
@@ -751,13 +764,13 @@ static struct ionic_txq_desc *ionic_tx_tso_next(struct ionic_queue *q,
static int ionic_tx_tso(struct ionic_queue *q, struct sk_buff *skb)
{
struct ionic_tx_stats *stats = q_to_tx_stats(q);
- struct ionic_desc_info *abort = q->head;
+ struct ionic_desc_info *rewind_desc_info;
struct device *dev = q->lif->ionic->dev;
- struct ionic_desc_info *rewind = abort;
struct ionic_txq_sg_elem *elem;
struct ionic_txq_desc *desc;
unsigned int frag_left = 0;
unsigned int offset = 0;
+ u16 abort = q->head_idx;
unsigned int len_left;
dma_addr_t desc_addr;
unsigned int hdrlen;
@@ -765,6 +778,7 @@ static int ionic_tx_tso(struct ionic_queue *q, struct sk_buff *skb)
unsigned int seglen;
u64 total_bytes = 0;
u64 total_pkts = 0;
+ u16 rewind = abort;
unsigned int left;
unsigned int len;
unsigned int mss;
@@ -909,19 +923,20 @@ static int ionic_tx_tso(struct ionic_queue *q, struct sk_buff *skb)
return 0;
err_out_abort:
- while (rewind->desc != q->head->desc) {
- ionic_tx_clean(q, rewind, NULL, NULL);
- rewind = rewind->next;
+ while (rewind != q->head_idx) {
+ rewind_desc_info = &q->info[rewind];
+ ionic_tx_clean(q, rewind_desc_info, NULL, NULL);
+ rewind = (rewind + 1) & (q->num_descs - 1);
}
- q->head = abort;
+ q->head_idx = abort;
return -ENOMEM;
}
static int ionic_tx_calc_csum(struct ionic_queue *q, struct sk_buff *skb)
{
+ struct ionic_txq_desc *desc = q->info[q->head_idx].txq_desc;
struct ionic_tx_stats *stats = q_to_tx_stats(q);
- struct ionic_txq_desc *desc = q->head->desc;
struct device *dev = q->lif->ionic->dev;
dma_addr_t dma_addr;
bool has_vlan;
@@ -960,8 +975,8 @@ static int ionic_tx_calc_csum(struct ionic_queue *q, struct sk_buff *skb)
static int ionic_tx_calc_no_csum(struct ionic_queue *q, struct sk_buff *skb)
{
+ struct ionic_txq_desc *desc = q->info[q->head_idx].txq_desc;
struct ionic_tx_stats *stats = q_to_tx_stats(q);
- struct ionic_txq_desc *desc = q->head->desc;
struct device *dev = q->lif->ionic->dev;
dma_addr_t dma_addr;
bool has_vlan;
@@ -995,7 +1010,7 @@ static int ionic_tx_calc_no_csum(struct ionic_queue *q, struct sk_buff *skb)
static int ionic_tx_skb_frags(struct ionic_queue *q, struct sk_buff *skb)
{
- struct ionic_txq_sg_desc *sg_desc = q->head->sg_desc;
+ struct ionic_txq_sg_desc *sg_desc = q->info[q->head_idx].txq_sg_desc;
unsigned int len_left = skb->len - skb_headlen(skb);
struct ionic_txq_sg_elem *elem = sg_desc->elems;
struct ionic_tx_stats *stats = q_to_tx_stats(q);
@@ -1104,9 +1119,9 @@ netdev_tx_t ionic_start_xmit(struct sk_buff *skb, struct net_device *netdev)
return NETDEV_TX_OK;
}
- if (unlikely(!lif_to_txqcq(lif, queue_index)))
+ if (unlikely(queue_index >= lif->nxqs))
queue_index = 0;
- q = lif_to_txq(lif, queue_index);
+ q = &lif->txqcqs[queue_index]->q;
ndescs = ionic_tx_descs_needed(q, skb);
if (ndescs < 0)
diff --git a/drivers/net/ethernet/qlogic/Kconfig b/drivers/net/ethernet/qlogic/Kconfig
index 8f743d8..4366c7a 100644
--- a/drivers/net/ethernet/qlogic/Kconfig
+++ b/drivers/net/ethernet/qlogic/Kconfig
@@ -80,7 +80,7 @@
select CRC8
select NET_DEVLINK
help
- This enables the support for ...
+ This enables the support for Marvell FastLinQ adapters family.
config QED_LL2
bool
@@ -100,7 +100,8 @@
depends on QED
imply PTP_1588_CLOCK
help
- This enables the support for ...
+ This enables the support for Marvell FastLinQ adapters family,
+ ethernet driver.
config QED_RDMA
bool
diff --git a/drivers/net/ethernet/qlogic/qed/Makefile b/drivers/net/ethernet/qlogic/qed/Makefile
index f947b10..8251755 100644
--- a/drivers/net/ethernet/qlogic/qed/Makefile
+++ b/drivers/net/ethernet/qlogic/qed/Makefile
@@ -9,6 +9,7 @@
qed_dcbx.o \
qed_debug.o \
qed_dev.o \
+ qed_devlink.o \
qed_hw.o \
qed_init_fw_funcs.o \
qed_init_ops.o \
diff --git a/drivers/net/ethernet/qlogic/qed/qed.h b/drivers/net/ethernet/qlogic/qed/qed.h
index b2a7b53..f34b25a 100644
--- a/drivers/net/ethernet/qlogic/qed/qed.h
+++ b/drivers/net/ethernet/qlogic/qed/qed.h
@@ -807,6 +807,7 @@ struct qed_dev {
struct qed_llh_info *p_llh_info;
/* Linux specific here */
+ struct qed_dev_info common_dev_info;
struct qede_dev *edev;
struct pci_dev *pdev;
u32 flags;
@@ -849,7 +850,6 @@ struct qed_dev {
u32 rdma_max_srq_sge;
u16 tunn_feature_mask;
- struct devlink *dl;
bool iwarp_cmt;
};
@@ -981,6 +981,7 @@ void qed_bw_update(struct qed_hwfn *hwfn, struct qed_ptt *ptt);
u32 qed_unzip_data(struct qed_hwfn *p_hwfn,
u32 input_len, u8 *input_buf,
u32 max_size, u8 *unzip_buf);
+int qed_recovery_process(struct qed_dev *cdev);
void qed_schedule_recovery_handler(struct qed_hwfn *p_hwfn);
void qed_hw_error_occurred(struct qed_hwfn *p_hwfn,
enum qed_hw_err_type err_type);
diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c
index b8f076e..f7f08e6 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dev.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c
@@ -3973,6 +3973,7 @@ static int qed_hw_get_nvm_info(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
struct qed_mcp_link_speed_params *ext_speed;
struct qed_mcp_link_capabilities *p_caps;
struct qed_mcp_link_params *link;
+ int i;
/* Read global nvm_cfg address */
nvm_cfg_addr = qed_rd(p_hwfn, p_ptt, MISC_REG_GEN_PURP_CR0);
@@ -4290,6 +4291,14 @@ static int qed_hw_get_nvm_info(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
__set_bit(QED_DEV_CAP_ROCE,
&p_hwfn->hw_info.device_capabilities);
+ /* Read device serial number information from shmem */
+ addr = MCP_REG_SCRATCH + nvm_cfg1_offset +
+ offsetof(struct nvm_cfg1, glob) +
+ offsetof(struct nvm_cfg1_glob, serial_number);
+
+ for (i = 0; i < 4; i++)
+ p_hwfn->hw_info.part_num[i] = qed_rd(p_hwfn, p_ptt, addr + i * 4);
+
return qed_mcp_fill_shmem_func_info(p_hwfn, p_ptt);
}
diff --git a/drivers/net/ethernet/qlogic/qed/qed_devlink.c b/drivers/net/ethernet/qlogic/qed/qed_devlink.c
new file mode 100644
index 0000000..cf7f4da
--- /dev/null
+++ b/drivers/net/ethernet/qlogic/qed/qed_devlink.c
@@ -0,0 +1,259 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* Marvell/Qlogic FastLinQ NIC driver
+ *
+ * Copyright (C) 2020 Marvell International Ltd.
+ */
+
+#include <linux/kernel.h>
+#include <linux/qed/qed_if.h>
+#include <linux/vmalloc.h>
+#include "qed.h"
+#include "qed_devlink.h"
+
+enum qed_devlink_param_id {
+ QED_DEVLINK_PARAM_ID_BASE = DEVLINK_PARAM_GENERIC_ID_MAX,
+ QED_DEVLINK_PARAM_ID_IWARP_CMT,
+};
+
+struct qed_fw_fatal_ctx {
+ enum qed_hw_err_type err_type;
+};
+
+int qed_report_fatal_error(struct devlink *devlink, enum qed_hw_err_type err_type)
+{
+ struct qed_devlink *qdl = devlink_priv(devlink);
+ struct qed_fw_fatal_ctx fw_fatal_ctx = {
+ .err_type = err_type,
+ };
+
+ if (qdl->fw_reporter)
+ devlink_health_report(qdl->fw_reporter,
+ "Fatal error occurred", &fw_fatal_ctx);
+
+ return 0;
+}
+
+static int
+qed_fw_fatal_reporter_dump(struct devlink_health_reporter *reporter,
+ struct devlink_fmsg *fmsg, void *priv_ctx,
+ struct netlink_ext_ack *extack)
+{
+ struct qed_devlink *qdl = devlink_health_reporter_priv(reporter);
+ struct qed_fw_fatal_ctx *fw_fatal_ctx = priv_ctx;
+ struct qed_dev *cdev = qdl->cdev;
+ u32 dbg_data_buf_size;
+ u8 *p_dbg_data_buf;
+ int err;
+
+ /* Having context means that was a dump request after fatal,
+ * so we enable extra debugging while gathering the dump,
+ * just in case
+ */
+ cdev->print_dbg_data = fw_fatal_ctx ? true : false;
+
+ dbg_data_buf_size = qed_dbg_all_data_size(cdev);
+ p_dbg_data_buf = vzalloc(dbg_data_buf_size);
+ if (!p_dbg_data_buf) {
+ DP_NOTICE(cdev,
+ "Failed to allocate memory for a debug data buffer\n");
+ return -ENOMEM;
+ }
+
+ err = qed_dbg_all_data(cdev, p_dbg_data_buf);
+ if (err) {
+ DP_NOTICE(cdev, "Failed to obtain debug data\n");
+ vfree(p_dbg_data_buf);
+ return err;
+ }
+
+ err = devlink_fmsg_binary_pair_put(fmsg, "dump_data",
+ p_dbg_data_buf, dbg_data_buf_size);
+
+ vfree(p_dbg_data_buf);
+
+ return err;
+}
+
+static int
+qed_fw_fatal_reporter_recover(struct devlink_health_reporter *reporter,
+ void *priv_ctx,
+ struct netlink_ext_ack *extack)
+{
+ struct qed_devlink *qdl = devlink_health_reporter_priv(reporter);
+ struct qed_dev *cdev = qdl->cdev;
+
+ qed_recovery_process(cdev);
+
+ return 0;
+}
+
+static const struct devlink_health_reporter_ops qed_fw_fatal_reporter_ops = {
+ .name = "fw_fatal",
+ .recover = qed_fw_fatal_reporter_recover,
+ .dump = qed_fw_fatal_reporter_dump,
+};
+
+#define QED_REPORTER_FW_GRACEFUL_PERIOD 1200000
+
+void qed_fw_reporters_create(struct devlink *devlink)
+{
+ struct qed_devlink *dl = devlink_priv(devlink);
+
+ dl->fw_reporter = devlink_health_reporter_create(devlink, &qed_fw_fatal_reporter_ops,
+ QED_REPORTER_FW_GRACEFUL_PERIOD, dl);
+ if (IS_ERR(dl->fw_reporter)) {
+ DP_NOTICE(dl->cdev, "Failed to create fw reporter, err = %ld\n",
+ PTR_ERR(dl->fw_reporter));
+ dl->fw_reporter = NULL;
+ }
+}
+
+void qed_fw_reporters_destroy(struct devlink *devlink)
+{
+ struct qed_devlink *dl = devlink_priv(devlink);
+ struct devlink_health_reporter *rep;
+
+ rep = dl->fw_reporter;
+
+ if (!IS_ERR_OR_NULL(rep))
+ devlink_health_reporter_destroy(rep);
+}
+
+static int qed_dl_param_get(struct devlink *dl, u32 id,
+ struct devlink_param_gset_ctx *ctx)
+{
+ struct qed_devlink *qed_dl = devlink_priv(dl);
+ struct qed_dev *cdev;
+
+ cdev = qed_dl->cdev;
+ ctx->val.vbool = cdev->iwarp_cmt;
+
+ return 0;
+}
+
+static int qed_dl_param_set(struct devlink *dl, u32 id,
+ struct devlink_param_gset_ctx *ctx)
+{
+ struct qed_devlink *qed_dl = devlink_priv(dl);
+ struct qed_dev *cdev;
+
+ cdev = qed_dl->cdev;
+ cdev->iwarp_cmt = ctx->val.vbool;
+
+ return 0;
+}
+
+static const struct devlink_param qed_devlink_params[] = {
+ DEVLINK_PARAM_DRIVER(QED_DEVLINK_PARAM_ID_IWARP_CMT,
+ "iwarp_cmt", DEVLINK_PARAM_TYPE_BOOL,
+ BIT(DEVLINK_PARAM_CMODE_RUNTIME),
+ qed_dl_param_get, qed_dl_param_set, NULL),
+};
+
+static int qed_devlink_info_get(struct devlink *devlink,
+ struct devlink_info_req *req,
+ struct netlink_ext_ack *extack)
+{
+ struct qed_devlink *qed_dl = devlink_priv(devlink);
+ struct qed_dev *cdev = qed_dl->cdev;
+ struct qed_dev_info *dev_info;
+ char buf[100];
+ int err;
+
+ dev_info = &cdev->common_dev_info;
+
+ err = devlink_info_driver_name_put(req, KBUILD_MODNAME);
+ if (err)
+ return err;
+
+ memcpy(buf, cdev->hwfns[0].hw_info.part_num, sizeof(cdev->hwfns[0].hw_info.part_num));
+ buf[sizeof(cdev->hwfns[0].hw_info.part_num)] = 0;
+
+ if (buf[0]) {
+ err = devlink_info_board_serial_number_put(req, buf);
+ if (err)
+ return err;
+ }
+
+ snprintf(buf, sizeof(buf), "%d.%d.%d.%d",
+ GET_MFW_FIELD(dev_info->mfw_rev, QED_MFW_VERSION_3),
+ GET_MFW_FIELD(dev_info->mfw_rev, QED_MFW_VERSION_2),
+ GET_MFW_FIELD(dev_info->mfw_rev, QED_MFW_VERSION_1),
+ GET_MFW_FIELD(dev_info->mfw_rev, QED_MFW_VERSION_0));
+
+ err = devlink_info_version_stored_put(req,
+ DEVLINK_INFO_VERSION_GENERIC_FW_MGMT, buf);
+ if (err)
+ return err;
+
+ snprintf(buf, sizeof(buf), "%d.%d.%d.%d",
+ dev_info->fw_major,
+ dev_info->fw_minor,
+ dev_info->fw_rev,
+ dev_info->fw_eng);
+
+ return devlink_info_version_running_put(req,
+ DEVLINK_INFO_VERSION_GENERIC_FW_APP, buf);
+}
+
+static const struct devlink_ops qed_dl_ops = {
+ .info_get = qed_devlink_info_get,
+};
+
+struct devlink *qed_devlink_register(struct qed_dev *cdev)
+{
+ union devlink_param_value value;
+ struct qed_devlink *qdevlink;
+ struct devlink *dl;
+ int rc;
+
+ dl = devlink_alloc(&qed_dl_ops, sizeof(struct qed_devlink));
+ if (!dl)
+ return ERR_PTR(-ENOMEM);
+
+ qdevlink = devlink_priv(dl);
+ qdevlink->cdev = cdev;
+
+ rc = devlink_register(dl, &cdev->pdev->dev);
+ if (rc)
+ goto err_free;
+
+ rc = devlink_params_register(dl, qed_devlink_params,
+ ARRAY_SIZE(qed_devlink_params));
+ if (rc)
+ goto err_unregister;
+
+ value.vbool = false;
+ devlink_param_driverinit_value_set(dl,
+ QED_DEVLINK_PARAM_ID_IWARP_CMT,
+ value);
+
+ devlink_params_publish(dl);
+ cdev->iwarp_cmt = false;
+
+ qed_fw_reporters_create(dl);
+
+ return dl;
+
+err_unregister:
+ devlink_unregister(dl);
+
+err_free:
+ devlink_free(dl);
+
+ return ERR_PTR(rc);
+}
+
+void qed_devlink_unregister(struct devlink *devlink)
+{
+ if (!devlink)
+ return;
+
+ qed_fw_reporters_destroy(devlink);
+
+ devlink_params_unregister(devlink, qed_devlink_params,
+ ARRAY_SIZE(qed_devlink_params));
+
+ devlink_unregister(devlink);
+ devlink_free(devlink);
+}
diff --git a/drivers/net/ethernet/qlogic/qed/qed_devlink.h b/drivers/net/ethernet/qlogic/qed/qed_devlink.h
new file mode 100644
index 0000000..ccc7d1d
--- /dev/null
+++ b/drivers/net/ethernet/qlogic/qed/qed_devlink.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/* Marvell/Qlogic FastLinQ NIC driver
+ *
+ * Copyright (C) 2020 Marvell International Ltd.
+ */
+#ifndef _QED_DEVLINK_H
+#define _QED_DEVLINK_H
+
+#include <linux/qed/qed_if.h>
+#include <net/devlink.h>
+
+struct devlink *qed_devlink_register(struct qed_dev *cdev);
+void qed_devlink_unregister(struct devlink *devlink);
+
+void qed_fw_reporters_create(struct devlink *devlink);
+void qed_fw_reporters_destroy(struct devlink *devlink);
+
+int qed_report_fatal_error(struct devlink *dl, enum qed_hw_err_type err_type);
+
+#endif
diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c
index f39f629..5b149ce 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_main.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_main.c
@@ -39,6 +39,7 @@
#include "qed_hw.h"
#include "qed_selftest.h"
#include "qed_debug.h"
+#include "qed_devlink.h"
#define QED_ROCE_QPS (8192)
#define QED_ROCE_DPIS (8)
@@ -478,6 +479,7 @@ int qed_fill_dev_info(struct qed_dev *cdev,
}
dev_info->mtu = hw_info->mtu;
+ cdev->common_dev_info = *dev_info;
return 0;
}
@@ -510,107 +512,6 @@ static int qed_set_power_state(struct qed_dev *cdev, pci_power_t state)
return 0;
}
-struct qed_devlink {
- struct qed_dev *cdev;
-};
-
-enum qed_devlink_param_id {
- QED_DEVLINK_PARAM_ID_BASE = DEVLINK_PARAM_GENERIC_ID_MAX,
- QED_DEVLINK_PARAM_ID_IWARP_CMT,
-};
-
-static int qed_dl_param_get(struct devlink *dl, u32 id,
- struct devlink_param_gset_ctx *ctx)
-{
- struct qed_devlink *qed_dl;
- struct qed_dev *cdev;
-
- qed_dl = devlink_priv(dl);
- cdev = qed_dl->cdev;
- ctx->val.vbool = cdev->iwarp_cmt;
-
- return 0;
-}
-
-static int qed_dl_param_set(struct devlink *dl, u32 id,
- struct devlink_param_gset_ctx *ctx)
-{
- struct qed_devlink *qed_dl;
- struct qed_dev *cdev;
-
- qed_dl = devlink_priv(dl);
- cdev = qed_dl->cdev;
- cdev->iwarp_cmt = ctx->val.vbool;
-
- return 0;
-}
-
-static const struct devlink_param qed_devlink_params[] = {
- DEVLINK_PARAM_DRIVER(QED_DEVLINK_PARAM_ID_IWARP_CMT,
- "iwarp_cmt", DEVLINK_PARAM_TYPE_BOOL,
- BIT(DEVLINK_PARAM_CMODE_RUNTIME),
- qed_dl_param_get, qed_dl_param_set, NULL),
-};
-
-static const struct devlink_ops qed_dl_ops;
-
-static int qed_devlink_register(struct qed_dev *cdev)
-{
- union devlink_param_value value;
- struct qed_devlink *qed_dl;
- struct devlink *dl;
- int rc;
-
- dl = devlink_alloc(&qed_dl_ops, sizeof(*qed_dl));
- if (!dl)
- return -ENOMEM;
-
- qed_dl = devlink_priv(dl);
-
- cdev->dl = dl;
- qed_dl->cdev = cdev;
-
- rc = devlink_register(dl, &cdev->pdev->dev);
- if (rc)
- goto err_free;
-
- rc = devlink_params_register(dl, qed_devlink_params,
- ARRAY_SIZE(qed_devlink_params));
- if (rc)
- goto err_unregister;
-
- value.vbool = false;
- devlink_param_driverinit_value_set(dl,
- QED_DEVLINK_PARAM_ID_IWARP_CMT,
- value);
-
- devlink_params_publish(dl);
- cdev->iwarp_cmt = false;
-
- return 0;
-
-err_unregister:
- devlink_unregister(dl);
-
-err_free:
- cdev->dl = NULL;
- devlink_free(dl);
-
- return rc;
-}
-
-static void qed_devlink_unregister(struct qed_dev *cdev)
-{
- if (!cdev->dl)
- return;
-
- devlink_params_unregister(cdev->dl, qed_devlink_params,
- ARRAY_SIZE(qed_devlink_params));
-
- devlink_unregister(cdev->dl);
- devlink_free(cdev->dl);
-}
-
/* probing */
static struct qed_dev *qed_probe(struct pci_dev *pdev,
struct qed_probe_params *params)
@@ -639,12 +540,6 @@ static struct qed_dev *qed_probe(struct pci_dev *pdev,
}
DP_INFO(cdev, "PCI init completed successfully\n");
- rc = qed_devlink_register(cdev);
- if (rc) {
- DP_INFO(cdev, "Failed to register devlink.\n");
- goto err2;
- }
-
rc = qed_hw_prepare(cdev, QED_PCI_DEFAULT);
if (rc) {
DP_ERR(cdev, "hw prepare failed\n");
@@ -674,8 +569,6 @@ static void qed_remove(struct qed_dev *cdev)
qed_set_power_state(cdev, PCI_D3hot);
- qed_devlink_unregister(cdev);
-
qed_free_cdev(cdev);
}
@@ -2924,7 +2817,7 @@ static int qed_set_led(struct qed_dev *cdev, enum qed_led_mode mode)
return status;
}
-static int qed_recovery_process(struct qed_dev *cdev)
+int qed_recovery_process(struct qed_dev *cdev)
{
struct qed_hwfn *p_hwfn = QED_LEADING_HWFN(cdev);
struct qed_ptt *p_ptt;
@@ -3112,6 +3005,9 @@ const struct qed_common_ops qed_common_ops_pass = {
.get_link = &qed_get_current_link,
.drain = &qed_drain,
.update_msglvl = &qed_init_dp,
+ .devlink_register = qed_devlink_register,
+ .devlink_unregister = qed_devlink_unregister,
+ .report_fatal_error = qed_report_fatal_error,
.dbg_all_data = &qed_dbg_all_data,
.dbg_all_data_size = &qed_dbg_all_data_size,
.chain_alloc = &qed_chain_alloc,
diff --git a/drivers/net/ethernet/qlogic/qed/qed_rdma.c b/drivers/net/ethernet/qlogic/qed/qed_rdma.c
index a4bcde5..4394a4d 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_rdma.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_rdma.c
@@ -1151,7 +1151,6 @@ qed_rdma_destroy_cq(void *rdma_cxt,
DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "icid = %08x\n", in_params->icid);
p_ramrod_res =
- (struct rdma_destroy_cq_output_params *)
dma_alloc_coherent(&p_hwfn->cdev->pdev->dev,
sizeof(struct rdma_destroy_cq_output_params),
&ramrod_res_phys, GFP_KERNEL);
diff --git a/drivers/net/ethernet/qlogic/qede/qede.h b/drivers/net/ethernet/qlogic/qede/qede.h
index 803c1fc..3efc5899 100644
--- a/drivers/net/ethernet/qlogic/qede/qede.h
+++ b/drivers/net/ethernet/qlogic/qede/qede.h
@@ -172,6 +172,7 @@ struct qede_dev {
struct qed_dev *cdev;
struct net_device *ndev;
struct pci_dev *pdev;
+ struct devlink *devlink;
u32 dp_module;
u8 dp_level;
@@ -263,6 +264,7 @@ struct qede_dev {
struct bpf_prog *xdp_prog;
+ enum qed_hw_err_type last_err_type;
unsigned long err_flags;
#define QEDE_ERR_IS_HANDLED 31
#define QEDE_ERR_ATTN_CLR_EN 0
diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c
index 140a392..20d2296 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_main.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_main.c
@@ -1170,10 +1170,23 @@ static int __qede_probe(struct pci_dev *pdev, u32 dp_module, u8 dp_level,
rc = -ENOMEM;
goto err2;
}
+
+ edev->devlink = qed_ops->common->devlink_register(cdev);
+ if (IS_ERR(edev->devlink)) {
+ DP_NOTICE(edev, "Cannot register devlink\n");
+ edev->devlink = NULL;
+ /* Go on, we can live without devlink */
+ }
} else {
struct net_device *ndev = pci_get_drvdata(pdev);
edev = netdev_priv(ndev);
+
+ if (edev->devlink) {
+ struct qed_devlink *qdl = devlink_priv(edev->devlink);
+
+ qdl->cdev = cdev;
+ }
edev->cdev = cdev;
memset(&edev->stats, 0, sizeof(edev->stats));
memcpy(&edev->dev_info, &dev_info, sizeof(dev_info));
@@ -1225,7 +1238,10 @@ static int __qede_probe(struct pci_dev *pdev, u32 dp_module, u8 dp_level,
err4:
qede_rdma_dev_remove(edev, (mode == QEDE_PROBE_RECOVERY));
err3:
- free_netdev(edev->ndev);
+ if (mode != QEDE_PROBE_RECOVERY)
+ free_netdev(edev->ndev);
+ else
+ edev->cdev = NULL;
err2:
qed_ops->common->slowpath_stop(cdev);
err1:
@@ -1296,6 +1312,11 @@ static void __qede_remove(struct pci_dev *pdev, enum qede_remove_mode mode)
qed_ops->common->slowpath_stop(cdev);
if (system_state == SYSTEM_POWER_OFF)
return;
+
+ if (mode != QEDE_REMOVE_RECOVERY && edev->devlink) {
+ qed_ops->common->devlink_unregister(edev->devlink);
+ edev->devlink = NULL;
+ }
qed_ops->common->remove(cdev);
edev->cdev = NULL;
@@ -2455,7 +2476,8 @@ static int qede_close(struct net_device *ndev)
qede_unload(edev, QEDE_UNLOAD_NORMAL, false);
- edev->ops->common->update_drv_state(edev->cdev, false);
+ if (edev->cdev)
+ edev->ops->common->update_drv_state(edev->cdev, false);
return 0;
}
@@ -2577,19 +2599,12 @@ static void qede_atomic_hw_err_handler(struct qede_dev *edev)
static void qede_generic_hw_err_handler(struct qede_dev *edev)
{
- struct qed_dev *cdev = edev->cdev;
-
DP_NOTICE(edev,
"Generic sleepable HW error handling started - err_flags 0x%lx\n",
edev->err_flags);
- /* Trigger a recovery process.
- * This is placed in the sleep requiring section just to make
- * sure it is the last one, and that all the other operations
- * were completed.
- */
- if (test_bit(QEDE_ERR_IS_RECOVERABLE, &edev->err_flags))
- edev->ops->common->recovery_process(cdev);
+ if (edev->devlink)
+ edev->ops->common->report_fatal_error(edev->devlink, edev->last_err_type);
clear_bit(QEDE_ERR_IS_HANDLED, &edev->err_flags);
@@ -2643,6 +2658,7 @@ static void qede_schedule_hw_err_handler(void *dev,
return;
}
+ edev->last_err_type = err_type;
qede_set_hw_err_flags(edev, err_type);
qede_atomic_hw_err_handler(edev);
set_bit(QEDE_SP_HW_ERR, &edev->sp_flags);
diff --git a/drivers/net/ethernet/qualcomm/qca_uart.c b/drivers/net/ethernet/qualcomm/qca_uart.c
index 375a844..362b4f5 100644
--- a/drivers/net/ethernet/qualcomm/qca_uart.c
+++ b/drivers/net/ethernet/qualcomm/qca_uart.c
@@ -167,7 +167,7 @@ static void qca_tty_wakeup(struct serdev_device *serdev)
schedule_work(&qca->tx_work);
}
-static struct serdev_device_ops qca_serdev_ops = {
+static const struct serdev_device_ops qca_serdev_ops = {
.receive_buf = qca_tty_receive,
.write_wakeup = qca_tty_wakeup,
};
diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index fc9e662..9e4e6a8 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -617,7 +617,6 @@ struct rtl8169_private {
struct work_struct work;
} wk;
- unsigned irq_enabled:1;
unsigned supports_gmii:1;
unsigned aspm_manageable:1;
dma_addr_t counters_phys_addr;
@@ -1280,12 +1279,10 @@ static void rtl_irq_disable(struct rtl8169_private *tp)
RTL_W32(tp, IntrMask_8125, 0);
else
RTL_W16(tp, IntrMask, 0);
- tp->irq_enabled = 0;
}
static void rtl_irq_enable(struct rtl8169_private *tp)
{
- tp->irq_enabled = 1;
if (rtl_is_8125(tp))
RTL_W32(tp, IntrMask_8125, tp->irq_mask);
else
@@ -4541,8 +4538,7 @@ static irqreturn_t rtl8169_interrupt(int irq, void *dev_instance)
struct rtl8169_private *tp = dev_instance;
u32 status = rtl_get_events(tp);
- if (!tp->irq_enabled || (status & 0xffff) == 0xffff ||
- !(status & tp->irq_mask))
+ if ((status & 0xffff) == 0xffff || !(status & tp->irq_mask))
return IRQ_NONE;
if (unlikely(status & SYSErr)) {
@@ -4596,10 +4592,8 @@ static int rtl8169_poll(struct napi_struct *napi, int budget)
rtl_tx(dev, tp, budget);
- if (work_done < budget) {
- napi_complete_done(napi, work_done);
+ if (work_done < budget && napi_complete_done(napi, work_done))
rtl_irq_enable(tp);
- }
return work_done;
}
diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c
index df89d09..f684296 100644
--- a/drivers/net/ethernet/renesas/ravb_main.c
+++ b/drivers/net/ethernet/renesas/ravb_main.c
@@ -162,7 +162,7 @@ static int ravb_get_mdio_data(struct mdiobb_ctrl *ctrl)
}
/* MDIO bus control struct */
-static struct mdiobb_ops bb_ops = {
+static const struct mdiobb_ops bb_ops = {
.owner = THIS_MODULE,
.set_mdc = ravb_set_mdc,
.set_mdio_dir = ravb_set_mdio_dir,
diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c
index f45331e..586642c 100644
--- a/drivers/net/ethernet/renesas/sh_eth.c
+++ b/drivers/net/ethernet/renesas/sh_eth.c
@@ -1202,7 +1202,7 @@ static void sh_mdc_ctrl(struct mdiobb_ctrl *ctrl, int bit)
}
/* mdio bus control struct */
-static struct mdiobb_ops bb_ops = {
+static const struct mdiobb_ops bb_ops = {
.owner = THIS_MODULE,
.set_mdc = sh_mdc_ctrl,
.set_mdio_dir = sh_mmd_ctrl,
diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c
index 4b0b2cf..0b4bcac 100644
--- a/drivers/net/ethernet/sfc/ef10.c
+++ b/drivers/net/ethernet/sfc/ef10.c
@@ -3955,7 +3955,6 @@ const struct efx_nic_type efx_hunt_a0_vf_nic_type = {
.start_stats = efx_port_dummy_op_void,
.pull_stats = efx_port_dummy_op_void,
.stop_stats = efx_port_dummy_op_void,
- .set_id_led = efx_mcdi_set_id_led,
.push_irq_moderation = efx_ef10_push_irq_moderation,
.reconfigure_mac = efx_ef10_mac_reconfigure,
.check_mac_fault = efx_mcdi_mac_check_fault,
@@ -4066,7 +4065,6 @@ const struct efx_nic_type efx_hunt_a0_nic_type = {
.start_stats = efx_mcdi_mac_start_stats,
.pull_stats = efx_mcdi_mac_pull_stats,
.stop_stats = efx_mcdi_mac_stop_stats,
- .set_id_led = efx_mcdi_set_id_led,
.push_irq_moderation = efx_ef10_push_irq_moderation,
.reconfigure_mac = efx_ef10_mac_reconfigure,
.check_mac_fault = efx_mcdi_mac_check_fault,
diff --git a/drivers/net/ethernet/sfc/ethtool.c b/drivers/net/ethernet/sfc/ethtool.c
index 4ffda77..12a91c5 100644
--- a/drivers/net/ethernet/sfc/ethtool.c
+++ b/drivers/net/ethernet/sfc/ethtool.c
@@ -50,8 +50,7 @@ static int efx_ethtool_phys_id(struct net_device *net_dev,
return 1; /* cycle on/off once per second */
}
- efx->type->set_id_led(efx, mode);
- return 0;
+ return efx_mcdi_set_id_led(efx, mode);
}
static int efx_ethtool_get_regs_len(struct net_device *net_dev)
diff --git a/drivers/net/ethernet/sfc/farch.c b/drivers/net/ethernet/sfc/farch.c
index 4002f9a..a48a931a 100644
--- a/drivers/net/ethernet/sfc/farch.c
+++ b/drivers/net/ethernet/sfc/farch.c
@@ -863,13 +863,8 @@ static u16 efx_farch_handle_rx_not_ok(struct efx_rx_queue *rx_queue,
bool rx_ev_tcp_udp_chksum_err, rx_ev_eth_crc_err;
bool rx_ev_frm_trunc, rx_ev_tobe_disc;
bool rx_ev_other_err, rx_ev_pause_frm;
- bool rx_ev_hdr_type, rx_ev_mcast_pkt;
- unsigned rx_ev_pkt_type;
- rx_ev_hdr_type = EFX_QWORD_FIELD(*event, FSF_AZ_RX_EV_HDR_TYPE);
- rx_ev_mcast_pkt = EFX_QWORD_FIELD(*event, FSF_AZ_RX_EV_MCAST_PKT);
rx_ev_tobe_disc = EFX_QWORD_FIELD(*event, FSF_AZ_RX_EV_TOBE_DISC);
- rx_ev_pkt_type = EFX_QWORD_FIELD(*event, FSF_AZ_RX_EV_PKT_TYPE);
rx_ev_buf_owner_id_err = EFX_QWORD_FIELD(*event,
FSF_AZ_RX_EV_BUF_OWNER_ID_ERR);
rx_ev_ip_hdr_chksum_err = EFX_QWORD_FIELD(*event,
@@ -918,6 +913,8 @@ static u16 efx_farch_handle_rx_not_ok(struct efx_rx_queue *rx_queue,
rx_ev_tobe_disc ? " [TOBE_DISC]" : "",
rx_ev_pause_frm ? " [PAUSE]" : "");
}
+#else
+ (void) rx_ev_other_err;
#endif
if (efx->net_dev->features & NETIF_F_RXALL)
@@ -2592,7 +2589,6 @@ int efx_farch_filter_remove_safe(struct efx_nic *efx,
enum efx_farch_filter_table_id table_id;
struct efx_farch_filter_table *table;
unsigned int filter_idx;
- struct efx_farch_filter_spec *spec;
int rc;
table_id = efx_farch_filter_id_table_id(filter_id);
@@ -2604,7 +2600,6 @@ int efx_farch_filter_remove_safe(struct efx_nic *efx,
if (filter_idx >= table->size)
return -ENOENT;
down_write(&state->lock);
- spec = &table->spec[filter_idx];
rc = efx_farch_filter_remove(efx, table, filter_idx, priority);
up_write(&state->lock);
diff --git a/drivers/net/ethernet/sfc/mcdi.c b/drivers/net/ethernet/sfc/mcdi.c
index 5467819..be6bfd6 100644
--- a/drivers/net/ethernet/sfc/mcdi.c
+++ b/drivers/net/ethernet/sfc/mcdi.c
@@ -1868,10 +1868,9 @@ int efx_mcdi_handle_assertion(struct efx_nic *efx)
return efx_mcdi_exit_assertion(efx);
}
-void efx_mcdi_set_id_led(struct efx_nic *efx, enum efx_led_mode mode)
+int efx_mcdi_set_id_led(struct efx_nic *efx, enum efx_led_mode mode)
{
MCDI_DECLARE_BUF(inbuf, MC_CMD_SET_ID_LED_IN_LEN);
- int rc;
BUILD_BUG_ON(EFX_LED_OFF != MC_CMD_LED_OFF);
BUILD_BUG_ON(EFX_LED_ON != MC_CMD_LED_ON);
@@ -1881,8 +1880,7 @@ void efx_mcdi_set_id_led(struct efx_nic *efx, enum efx_led_mode mode)
MCDI_SET_DWORD(inbuf, SET_ID_LED_IN_STATE, mode);
- rc = efx_mcdi_rpc(efx, MC_CMD_SET_ID_LED, inbuf, sizeof(inbuf),
- NULL, 0, NULL);
+ return efx_mcdi_rpc(efx, MC_CMD_SET_ID_LED, inbuf, sizeof(inbuf), NULL, 0, NULL);
}
static int efx_mcdi_reset_func(struct efx_nic *efx)
diff --git a/drivers/net/ethernet/sfc/mcdi.h b/drivers/net/ethernet/sfc/mcdi.h
index 658cf34..8aed650 100644
--- a/drivers/net/ethernet/sfc/mcdi.h
+++ b/drivers/net/ethernet/sfc/mcdi.h
@@ -348,7 +348,7 @@ int efx_mcdi_nvram_info(struct efx_nic *efx, unsigned int type,
int efx_new_mcdi_nvram_test_all(struct efx_nic *efx);
int efx_mcdi_nvram_test_all(struct efx_nic *efx);
int efx_mcdi_handle_assertion(struct efx_nic *efx);
-void efx_mcdi_set_id_led(struct efx_nic *efx, enum efx_led_mode mode);
+int efx_mcdi_set_id_led(struct efx_nic *efx, enum efx_led_mode mode);
int efx_mcdi_wol_filter_set_magic(struct efx_nic *efx, const u8 *mac,
int *id_out);
int efx_mcdi_wol_filter_get_magic(struct efx_nic *efx, int *id_out);
diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h
index 062462a..338ebb0 100644
--- a/drivers/net/ethernet/sfc/net_driver.h
+++ b/drivers/net/ethernet/sfc/net_driver.h
@@ -1217,7 +1217,6 @@ struct efx_udp_tunnel {
* @start_stats: Start the regular fetching of statistics
* @pull_stats: Pull stats from the NIC and wait until they arrive.
* @stop_stats: Stop the regular fetching of statistics
- * @set_id_led: Set state of identifying LED or revert to automatic function
* @push_irq_moderation: Apply interrupt moderation value
* @reconfigure_port: Push loopback/power/txdis changes to the MAC and PHY
* @prepare_enable_fc_tx: Prepare MAC to enable pause frame TX (may be %NULL)
@@ -1362,7 +1361,6 @@ struct efx_nic_type {
void (*start_stats)(struct efx_nic *efx);
void (*pull_stats)(struct efx_nic *efx);
void (*stop_stats)(struct efx_nic *efx);
- void (*set_id_led)(struct efx_nic *efx, enum efx_led_mode mode);
void (*push_irq_moderation)(struct efx_channel *channel);
int (*reconfigure_port)(struct efx_nic *efx);
void (*prepare_enable_fc_tx)(struct efx_nic *efx);
diff --git a/drivers/net/ethernet/sfc/selftest.c b/drivers/net/ethernet/sfc/selftest.c
index e71d6d3..34b9c7d 100644
--- a/drivers/net/ethernet/sfc/selftest.c
+++ b/drivers/net/ethernet/sfc/selftest.c
@@ -67,7 +67,7 @@ static const char *const efx_interrupt_mode_names[] = {
STRING_TABLE_LOOKUP(efx->interrupt_mode, efx_interrupt_mode)
/**
- * efx_loopback_state - persistent state during a loopback selftest
+ * struct efx_loopback_state - persistent state during a loopback selftest
* @flush: Drop all packets in efx_loopback_rx_packet
* @packet_count: Number of packets being used in this test
* @skbs: An array of skbs transmitted
diff --git a/drivers/net/ethernet/sfc/siena.c b/drivers/net/ethernet/sfc/siena.c
index a7ea630..16347a6 100644
--- a/drivers/net/ethernet/sfc/siena.c
+++ b/drivers/net/ethernet/sfc/siena.c
@@ -994,7 +994,6 @@ const struct efx_nic_type siena_a0_nic_type = {
.start_stats = efx_mcdi_mac_start_stats,
.pull_stats = efx_mcdi_mac_pull_stats,
.stop_stats = efx_mcdi_mac_stop_stats,
- .set_id_led = efx_mcdi_set_id_led,
.push_irq_moderation = siena_push_irq_moderation,
.reconfigure_mac = siena_mac_reconfigure,
.check_mac_fault = efx_mcdi_mac_check_fault,
diff --git a/drivers/net/ethernet/smsc/epic100.c b/drivers/net/ethernet/smsc/epic100.c
index d950b31..51cd7dc 100644
--- a/drivers/net/ethernet/smsc/epic100.c
+++ b/drivers/net/ethernet/smsc/epic100.c
@@ -374,13 +374,15 @@ static int epic_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
ep->mii.phy_id_mask = 0x1f;
ep->mii.reg_num_mask = 0x1f;
- ring_space = pci_alloc_consistent(pdev, TX_TOTAL_SIZE, &ring_dma);
+ ring_space = dma_alloc_coherent(&pdev->dev, TX_TOTAL_SIZE, &ring_dma,
+ GFP_KERNEL);
if (!ring_space)
goto err_out_iounmap;
ep->tx_ring = ring_space;
ep->tx_ring_dma = ring_dma;
- ring_space = pci_alloc_consistent(pdev, RX_TOTAL_SIZE, &ring_dma);
+ ring_space = dma_alloc_coherent(&pdev->dev, RX_TOTAL_SIZE, &ring_dma,
+ GFP_KERNEL);
if (!ring_space)
goto err_out_unmap_tx;
ep->rx_ring = ring_space;
@@ -493,9 +495,11 @@ static int epic_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
return ret;
err_out_unmap_rx:
- pci_free_consistent(pdev, RX_TOTAL_SIZE, ep->rx_ring, ep->rx_ring_dma);
+ dma_free_coherent(&pdev->dev, RX_TOTAL_SIZE, ep->rx_ring,
+ ep->rx_ring_dma);
err_out_unmap_tx:
- pci_free_consistent(pdev, TX_TOTAL_SIZE, ep->tx_ring, ep->tx_ring_dma);
+ dma_free_coherent(&pdev->dev, TX_TOTAL_SIZE, ep->tx_ring,
+ ep->tx_ring_dma);
err_out_iounmap:
pci_iounmap(pdev, ioaddr);
err_out_free_netdev:
@@ -918,8 +922,10 @@ static void epic_init_ring(struct net_device *dev)
if (skb == NULL)
break;
skb_reserve(skb, 2); /* 16 byte align the IP header. */
- ep->rx_ring[i].bufaddr = pci_map_single(ep->pci_dev,
- skb->data, ep->rx_buf_sz, PCI_DMA_FROMDEVICE);
+ ep->rx_ring[i].bufaddr = dma_map_single(&ep->pci_dev->dev,
+ skb->data,
+ ep->rx_buf_sz,
+ DMA_FROM_DEVICE);
ep->rx_ring[i].rxstatus = DescOwn;
}
ep->dirty_rx = (unsigned int)(i - RX_RING_SIZE);
@@ -955,8 +961,9 @@ static netdev_tx_t epic_start_xmit(struct sk_buff *skb, struct net_device *dev)
entry = ep->cur_tx % TX_RING_SIZE;
ep->tx_skbuff[entry] = skb;
- ep->tx_ring[entry].bufaddr = pci_map_single(ep->pci_dev, skb->data,
- skb->len, PCI_DMA_TODEVICE);
+ ep->tx_ring[entry].bufaddr = dma_map_single(&ep->pci_dev->dev,
+ skb->data, skb->len,
+ DMA_TO_DEVICE);
if (free_count < TX_QUEUE_LEN/2) {/* Typical path */
ctrl_word = 0x100000; /* No interrupt */
} else if (free_count == TX_QUEUE_LEN/2) {
@@ -1036,8 +1043,9 @@ static void epic_tx(struct net_device *dev, struct epic_private *ep)
/* Free the original skb. */
skb = ep->tx_skbuff[entry];
- pci_unmap_single(ep->pci_dev, ep->tx_ring[entry].bufaddr,
- skb->len, PCI_DMA_TODEVICE);
+ dma_unmap_single(&ep->pci_dev->dev,
+ ep->tx_ring[entry].bufaddr, skb->len,
+ DMA_TO_DEVICE);
dev_consume_skb_irq(skb);
ep->tx_skbuff[entry] = NULL;
}
@@ -1178,20 +1186,21 @@ static int epic_rx(struct net_device *dev, int budget)
if (pkt_len < rx_copybreak &&
(skb = netdev_alloc_skb(dev, pkt_len + 2)) != NULL) {
skb_reserve(skb, 2); /* 16 byte align the IP header */
- pci_dma_sync_single_for_cpu(ep->pci_dev,
- ep->rx_ring[entry].bufaddr,
- ep->rx_buf_sz,
- PCI_DMA_FROMDEVICE);
+ dma_sync_single_for_cpu(&ep->pci_dev->dev,
+ ep->rx_ring[entry].bufaddr,
+ ep->rx_buf_sz,
+ DMA_FROM_DEVICE);
skb_copy_to_linear_data(skb, ep->rx_skbuff[entry]->data, pkt_len);
skb_put(skb, pkt_len);
- pci_dma_sync_single_for_device(ep->pci_dev,
- ep->rx_ring[entry].bufaddr,
- ep->rx_buf_sz,
- PCI_DMA_FROMDEVICE);
+ dma_sync_single_for_device(&ep->pci_dev->dev,
+ ep->rx_ring[entry].bufaddr,
+ ep->rx_buf_sz,
+ DMA_FROM_DEVICE);
} else {
- pci_unmap_single(ep->pci_dev,
- ep->rx_ring[entry].bufaddr,
- ep->rx_buf_sz, PCI_DMA_FROMDEVICE);
+ dma_unmap_single(&ep->pci_dev->dev,
+ ep->rx_ring[entry].bufaddr,
+ ep->rx_buf_sz,
+ DMA_FROM_DEVICE);
skb_put(skb = ep->rx_skbuff[entry], pkt_len);
ep->rx_skbuff[entry] = NULL;
}
@@ -1213,8 +1222,10 @@ static int epic_rx(struct net_device *dev, int budget)
if (skb == NULL)
break;
skb_reserve(skb, 2); /* Align IP on 16 byte boundaries */
- ep->rx_ring[entry].bufaddr = pci_map_single(ep->pci_dev,
- skb->data, ep->rx_buf_sz, PCI_DMA_FROMDEVICE);
+ ep->rx_ring[entry].bufaddr = dma_map_single(&ep->pci_dev->dev,
+ skb->data,
+ ep->rx_buf_sz,
+ DMA_FROM_DEVICE);
work_done++;
}
/* AV: shouldn't we add a barrier here? */
@@ -1294,8 +1305,8 @@ static int epic_close(struct net_device *dev)
ep->rx_ring[i].rxstatus = 0; /* Not owned by Epic chip. */
ep->rx_ring[i].buflength = 0;
if (skb) {
- pci_unmap_single(pdev, ep->rx_ring[i].bufaddr,
- ep->rx_buf_sz, PCI_DMA_FROMDEVICE);
+ dma_unmap_single(&pdev->dev, ep->rx_ring[i].bufaddr,
+ ep->rx_buf_sz, DMA_FROM_DEVICE);
dev_kfree_skb(skb);
}
ep->rx_ring[i].bufaddr = 0xBADF00D0; /* An invalid address. */
@@ -1305,8 +1316,8 @@ static int epic_close(struct net_device *dev)
ep->tx_skbuff[i] = NULL;
if (!skb)
continue;
- pci_unmap_single(pdev, ep->tx_ring[i].bufaddr, skb->len,
- PCI_DMA_TODEVICE);
+ dma_unmap_single(&pdev->dev, ep->tx_ring[i].bufaddr, skb->len,
+ DMA_TO_DEVICE);
dev_kfree_skb(skb);
}
@@ -1502,8 +1513,10 @@ static void epic_remove_one(struct pci_dev *pdev)
struct net_device *dev = pci_get_drvdata(pdev);
struct epic_private *ep = netdev_priv(dev);
- pci_free_consistent(pdev, TX_TOTAL_SIZE, ep->tx_ring, ep->tx_ring_dma);
- pci_free_consistent(pdev, RX_TOTAL_SIZE, ep->rx_ring, ep->rx_ring_dma);
+ dma_free_coherent(&pdev->dev, TX_TOTAL_SIZE, ep->tx_ring,
+ ep->tx_ring_dma);
+ dma_free_coherent(&pdev->dev, RX_TOTAL_SIZE, ep->rx_ring,
+ ep->rx_ring_dma);
unregister_netdev(dev);
pci_iounmap(pdev, ep->ioaddr);
pci_release_regions(pdev);
diff --git a/drivers/net/ethernet/smsc/smsc9420.c b/drivers/net/ethernet/smsc/smsc9420.c
index 42bef04..c1dab00 100644
--- a/drivers/net/ethernet/smsc/smsc9420.c
+++ b/drivers/net/ethernet/smsc/smsc9420.c
@@ -497,8 +497,9 @@ static void smsc9420_free_tx_ring(struct smsc9420_pdata *pd)
if (skb) {
BUG_ON(!pd->tx_buffers[i].mapping);
- pci_unmap_single(pd->pdev, pd->tx_buffers[i].mapping,
- skb->len, PCI_DMA_TODEVICE);
+ dma_unmap_single(&pd->pdev->dev,
+ pd->tx_buffers[i].mapping, skb->len,
+ DMA_TO_DEVICE);
dev_kfree_skb_any(skb);
}
@@ -530,8 +531,9 @@ static void smsc9420_free_rx_ring(struct smsc9420_pdata *pd)
dev_kfree_skb_any(pd->rx_buffers[i].skb);
if (pd->rx_buffers[i].mapping)
- pci_unmap_single(pd->pdev, pd->rx_buffers[i].mapping,
- PKT_BUF_SZ, PCI_DMA_FROMDEVICE);
+ dma_unmap_single(&pd->pdev->dev,
+ pd->rx_buffers[i].mapping,
+ PKT_BUF_SZ, DMA_FROM_DEVICE);
pd->rx_ring[i].status = 0;
pd->rx_ring[i].length = 0;
@@ -749,8 +751,8 @@ static void smsc9420_rx_handoff(struct smsc9420_pdata *pd, const int index,
dev->stats.rx_packets++;
dev->stats.rx_bytes += packet_length;
- pci_unmap_single(pd->pdev, pd->rx_buffers[index].mapping,
- PKT_BUF_SZ, PCI_DMA_FROMDEVICE);
+ dma_unmap_single(&pd->pdev->dev, pd->rx_buffers[index].mapping,
+ PKT_BUF_SZ, DMA_FROM_DEVICE);
pd->rx_buffers[index].mapping = 0;
skb = pd->rx_buffers[index].skb;
@@ -782,9 +784,9 @@ static int smsc9420_alloc_rx_buffer(struct smsc9420_pdata *pd, int index)
if (unlikely(!skb))
return -ENOMEM;
- mapping = pci_map_single(pd->pdev, skb_tail_pointer(skb),
- PKT_BUF_SZ, PCI_DMA_FROMDEVICE);
- if (pci_dma_mapping_error(pd->pdev, mapping)) {
+ mapping = dma_map_single(&pd->pdev->dev, skb_tail_pointer(skb),
+ PKT_BUF_SZ, DMA_FROM_DEVICE);
+ if (dma_mapping_error(&pd->pdev->dev, mapping)) {
dev_kfree_skb_any(skb);
netif_warn(pd, rx_err, pd->dev, "pci_map_single failed!\n");
return -ENOMEM;
@@ -901,8 +903,10 @@ static void smsc9420_complete_tx(struct net_device *dev)
BUG_ON(!pd->tx_buffers[index].skb);
BUG_ON(!pd->tx_buffers[index].mapping);
- pci_unmap_single(pd->pdev, pd->tx_buffers[index].mapping,
- pd->tx_buffers[index].skb->len, PCI_DMA_TODEVICE);
+ dma_unmap_single(&pd->pdev->dev,
+ pd->tx_buffers[index].mapping,
+ pd->tx_buffers[index].skb->len,
+ DMA_TO_DEVICE);
pd->tx_buffers[index].mapping = 0;
dev_kfree_skb_any(pd->tx_buffers[index].skb);
@@ -932,9 +936,9 @@ static netdev_tx_t smsc9420_hard_start_xmit(struct sk_buff *skb,
BUG_ON(pd->tx_buffers[index].skb);
BUG_ON(pd->tx_buffers[index].mapping);
- mapping = pci_map_single(pd->pdev, skb->data,
- skb->len, PCI_DMA_TODEVICE);
- if (pci_dma_mapping_error(pd->pdev, mapping)) {
+ mapping = dma_map_single(&pd->pdev->dev, skb->data, skb->len,
+ DMA_TO_DEVICE);
+ if (dma_mapping_error(&pd->pdev->dev, mapping)) {
netif_warn(pd, tx_err, pd->dev,
"pci_map_single failed, dropping packet\n");
return NETDEV_TX_BUSY;
@@ -1522,7 +1526,7 @@ smsc9420_probe(struct pci_dev *pdev, const struct pci_device_id *id)
goto out_free_netdev_2;
}
- if (pci_set_dma_mask(pdev, DMA_BIT_MASK(32))) {
+ if (dma_set_mask(&pdev->dev, DMA_BIT_MASK(32))) {
netdev_err(dev, "No usable DMA configuration, aborting\n");
goto out_free_regions_3;
}
@@ -1540,10 +1544,9 @@ smsc9420_probe(struct pci_dev *pdev, const struct pci_device_id *id)
pd = netdev_priv(dev);
/* pci descriptors are created in the PCI consistent area */
- pd->rx_ring = pci_alloc_consistent(pdev,
- sizeof(struct smsc9420_dma_desc) * RX_RING_SIZE +
- sizeof(struct smsc9420_dma_desc) * TX_RING_SIZE,
- &pd->rx_dma_addr);
+ pd->rx_ring = dma_alloc_coherent(&pdev->dev,
+ sizeof(struct smsc9420_dma_desc) * (RX_RING_SIZE + TX_RING_SIZE),
+ &pd->rx_dma_addr, GFP_KERNEL);
if (!pd->rx_ring)
goto out_free_io_4;
@@ -1599,8 +1602,9 @@ smsc9420_probe(struct pci_dev *pdev, const struct pci_device_id *id)
return 0;
out_free_dmadesc_5:
- pci_free_consistent(pdev, sizeof(struct smsc9420_dma_desc) *
- (RX_RING_SIZE + TX_RING_SIZE), pd->rx_ring, pd->rx_dma_addr);
+ dma_free_coherent(&pdev->dev,
+ sizeof(struct smsc9420_dma_desc) * (RX_RING_SIZE + TX_RING_SIZE),
+ pd->rx_ring, pd->rx_dma_addr);
out_free_io_4:
iounmap(virt_addr - LAN9420_CPSR_ENDIAN_OFFSET);
out_free_regions_3:
@@ -1632,8 +1636,9 @@ static void smsc9420_remove(struct pci_dev *pdev)
BUG_ON(!pd->tx_ring);
BUG_ON(!pd->rx_ring);
- pci_free_consistent(pdev, sizeof(struct smsc9420_dma_desc) *
- (RX_RING_SIZE + TX_RING_SIZE), pd->rx_ring, pd->rx_dma_addr);
+ dma_free_coherent(&pdev->dev,
+ sizeof(struct smsc9420_dma_desc) * (RX_RING_SIZE + TX_RING_SIZE),
+ pd->rx_ring, pd->rx_dma_addr);
iounmap(pd->ioaddr - LAN9420_CPSR_ENDIAN_OFFSET);
pci_release_regions(pdev);
diff --git a/drivers/net/ethernet/stmicro/stmmac/Kconfig b/drivers/net/ethernet/stmicro/stmmac/Kconfig
index 9a47c5a..53f14c5 100644
--- a/drivers/net/ethernet/stmicro/stmmac/Kconfig
+++ b/drivers/net/ethernet/stmicro/stmmac/Kconfig
@@ -3,7 +3,7 @@
tristate "STMicroelectronics Multi-Gigabit Ethernet driver"
depends on HAS_IOMEM && HAS_DMA
select MII
- select MDIO_XPCS
+ select PCS_XPCS
select PAGE_POOL
select PHYLINK
select CRC32
@@ -209,6 +209,16 @@
device driver. This driver is used for i.MX8 series like
iMX8MP/iMX8DXL GMAC ethernet controller.
+config DWMAC_INTEL_PLAT
+ tristate "Intel dwmac support"
+ depends on OF && COMMON_CLK
+ depends on STMMAC_ETH
+ help
+ Support for ethernet controllers on Intel SoCs
+
+ This selects the Intel platform specific glue layer support for
+ the stmmac device driver. This driver is used for the Intel Keem Bay
+ SoC.
endif
config DWMAC_INTEL
diff --git a/drivers/net/ethernet/stmicro/stmmac/Makefile b/drivers/net/ethernet/stmicro/stmmac/Makefile
index 295615a..24e6145 100644
--- a/drivers/net/ethernet/stmicro/stmmac/Makefile
+++ b/drivers/net/ethernet/stmicro/stmmac/Makefile
@@ -26,6 +26,7 @@
obj-$(CONFIG_DWMAC_SUNXI) += dwmac-sunxi.o
obj-$(CONFIG_DWMAC_SUN8I) += dwmac-sun8i.o
obj-$(CONFIG_DWMAC_DWC_QOS_ETH) += dwmac-dwc-qos-eth.o
+obj-$(CONFIG_DWMAC_INTEL_PLAT) += dwmac-intel-plat.o
obj-$(CONFIG_DWMAC_GENERIC) += dwmac-generic.o
obj-$(CONFIG_DWMAC_IMX8) += dwmac-imx.o
stmmac-platform-objs:= stmmac_platform.o
diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h
index 127f758..acc5e3f 100644
--- a/drivers/net/ethernet/stmicro/stmmac/common.h
+++ b/drivers/net/ethernet/stmicro/stmmac/common.h
@@ -15,7 +15,7 @@
#include <linux/netdevice.h>
#include <linux/stmmac.h>
#include <linux/phy.h>
-#include <linux/mdio-xpcs.h>
+#include <linux/pcs/pcs-xpcs.h>
#include <linux/module.h>
#if IS_ENABLED(CONFIG_VLAN_8021Q)
#define STMMAC_VLAN_TAG_USED
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel-plat.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel-plat.c
new file mode 100644
index 0000000..ccac7bf
--- /dev/null
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel-plat.c
@@ -0,0 +1,191 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Intel DWMAC platform driver
+ *
+ * Copyright(C) 2020 Intel Corporation
+ */
+
+#include <linux/ethtool.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+#include <linux/stmmac.h>
+
+#include "stmmac.h"
+#include "stmmac_platform.h"
+
+struct intel_dwmac {
+ struct device *dev;
+ struct clk *tx_clk;
+ const struct intel_dwmac_data *data;
+};
+
+struct intel_dwmac_data {
+ void (*fix_mac_speed)(void *priv, unsigned int speed);
+ unsigned long ptp_ref_clk_rate;
+ unsigned long tx_clk_rate;
+ bool tx_clk_en;
+};
+
+static void kmb_eth_fix_mac_speed(void *priv, unsigned int speed)
+{
+ struct intel_dwmac *dwmac = priv;
+ unsigned long rate;
+ int ret;
+
+ rate = clk_get_rate(dwmac->tx_clk);
+
+ switch (speed) {
+ case SPEED_1000:
+ rate = 125000000;
+ break;
+
+ case SPEED_100:
+ rate = 25000000;
+ break;
+
+ case SPEED_10:
+ rate = 2500000;
+ break;
+
+ default:
+ dev_err(dwmac->dev, "Invalid speed\n");
+ break;
+ }
+
+ ret = clk_set_rate(dwmac->tx_clk, rate);
+ if (ret)
+ dev_err(dwmac->dev, "Failed to configure tx clock rate\n");
+}
+
+static const struct intel_dwmac_data kmb_data = {
+ .fix_mac_speed = kmb_eth_fix_mac_speed,
+ .ptp_ref_clk_rate = 200000000,
+ .tx_clk_rate = 125000000,
+ .tx_clk_en = true,
+};
+
+static const struct of_device_id intel_eth_plat_match[] = {
+ { .compatible = "intel,keembay-dwmac", .data = &kmb_data },
+ { }
+};
+MODULE_DEVICE_TABLE(of, intel_eth_plat_match);
+
+static int intel_eth_plat_probe(struct platform_device *pdev)
+{
+ struct net_device *ndev = platform_get_drvdata(pdev);
+ struct stmmac_priv *priv = netdev_priv(ndev);
+ struct plat_stmmacenet_data *plat_dat;
+ struct stmmac_resources stmmac_res;
+ const struct of_device_id *match;
+ struct intel_dwmac *dwmac;
+ unsigned long rate;
+ int ret;
+
+ plat_dat = priv->plat;
+ ret = stmmac_get_platform_resources(pdev, &stmmac_res);
+ if (ret)
+ return ret;
+
+ plat_dat = stmmac_probe_config_dt(pdev, &stmmac_res.mac);
+ if (IS_ERR(plat_dat)) {
+ dev_err(&pdev->dev, "dt configuration failed\n");
+ return PTR_ERR(plat_dat);
+ }
+
+ dwmac = devm_kzalloc(&pdev->dev, sizeof(*dwmac), GFP_KERNEL);
+ if (!dwmac) {
+ ret = -ENOMEM;
+ goto err_remove_config_dt;
+ }
+
+ dwmac->dev = &pdev->dev;
+ dwmac->tx_clk = NULL;
+
+ match = of_match_device(intel_eth_plat_match, &pdev->dev);
+ if (match && match->data) {
+ dwmac->data = (const struct intel_dwmac_data *)match->data;
+
+ if (dwmac->data->fix_mac_speed)
+ plat_dat->fix_mac_speed = dwmac->data->fix_mac_speed;
+
+ /* Enable TX clock */
+ if (dwmac->data->tx_clk_en) {
+ dwmac->tx_clk = devm_clk_get(&pdev->dev, "tx_clk");
+ if (IS_ERR(dwmac->tx_clk))
+ goto err_remove_config_dt;
+
+ clk_prepare_enable(dwmac->tx_clk);
+
+ /* Check and configure TX clock rate */
+ rate = clk_get_rate(dwmac->tx_clk);
+ if (dwmac->data->tx_clk_rate &&
+ rate != dwmac->data->tx_clk_rate) {
+ rate = dwmac->data->tx_clk_rate;
+ ret = clk_set_rate(dwmac->tx_clk, rate);
+ if (ret) {
+ dev_err(&pdev->dev,
+ "Failed to set tx_clk\n");
+ return ret;
+ }
+ }
+ }
+
+ /* Check and configure PTP ref clock rate */
+ rate = clk_get_rate(plat_dat->clk_ptp_ref);
+ if (dwmac->data->ptp_ref_clk_rate &&
+ rate != dwmac->data->ptp_ref_clk_rate) {
+ rate = dwmac->data->ptp_ref_clk_rate;
+ ret = clk_set_rate(plat_dat->clk_ptp_ref, rate);
+ if (ret) {
+ dev_err(&pdev->dev,
+ "Failed to set clk_ptp_ref\n");
+ return ret;
+ }
+ }
+ }
+
+ plat_dat->bsp_priv = dwmac;
+
+ ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res);
+ if (ret) {
+ if (dwmac->tx_clk)
+ clk_disable_unprepare(dwmac->tx_clk);
+
+ goto err_remove_config_dt;
+ }
+
+ return 0;
+
+err_remove_config_dt:
+ stmmac_remove_config_dt(pdev, plat_dat);
+
+ return ret;
+}
+
+static int intel_eth_plat_remove(struct platform_device *pdev)
+{
+ struct intel_dwmac *dwmac = get_stmmac_bsp_priv(&pdev->dev);
+ int ret;
+
+ ret = stmmac_pltfr_remove(pdev);
+
+ if (dwmac->tx_clk)
+ clk_disable_unprepare(dwmac->tx_clk);
+
+ return ret;
+}
+
+static struct platform_driver intel_eth_plat_driver = {
+ .probe = intel_eth_plat_probe,
+ .remove = intel_eth_plat_remove,
+ .driver = {
+ .name = "intel-eth-plat",
+ .pm = &stmmac_pltfr_pm_ops,
+ .of_match_table = intel_eth_plat_match,
+ },
+};
+module_platform_driver(intel_eth_plat_driver);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("Intel DWMAC platform driver");
diff --git a/drivers/net/ethernet/sun/sungem.c b/drivers/net/ethernet/sun/sungem.c
index 8deb943..58f142e 100644
--- a/drivers/net/ethernet/sun/sungem.c
+++ b/drivers/net/ethernet/sun/sungem.c
@@ -2965,9 +2965,8 @@ static int gem_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
/* It is guaranteed that the returned buffer will be at least
* PAGE_SIZE aligned.
*/
- gp->init_block = (struct gem_init_block *)
- dma_alloc_coherent(&pdev->dev, sizeof(struct gem_init_block),
- &gp->gblock_dvma, GFP_KERNEL);
+ gp->init_block = dma_alloc_coherent(&pdev->dev, sizeof(struct gem_init_block),
+ &gp->gblock_dvma, GFP_KERNEL);
if (!gp->init_block) {
pr_err("Cannot allocate init block, aborting\n");
err = -ENOMEM;
diff --git a/drivers/net/ethernet/ti/am65-cpts.c b/drivers/net/ethernet/ti/am65-cpts.c
index c59a289..75056c1 100644
--- a/drivers/net/ethernet/ti/am65-cpts.c
+++ b/drivers/net/ethernet/ti/am65-cpts.c
@@ -83,6 +83,8 @@ struct am65_cpts_regs {
#define AM65_CPTS_CONTROL_HW8_TS_PUSH_EN BIT(15)
#define AM65_CPTS_CONTROL_HW1_TS_PUSH_OFFSET (8)
+#define AM65_CPTS_CONTROL_TX_GENF_CLR_EN BIT(17)
+
#define AM65_CPTS_CONTROL_TS_SYNC_SEL_MASK (0xF)
#define AM65_CPTS_CONTROL_TS_SYNC_SEL_SHIFT (28)
@@ -748,42 +750,23 @@ EXPORT_SYMBOL_GPL(am65_cpts_rx_enable);
static int am65_skb_get_mtype_seqid(struct sk_buff *skb, u32 *mtype_seqid)
{
unsigned int ptp_class = ptp_classify_raw(skb);
- u8 *msgtype, *data = skb->data;
- unsigned int offset = 0;
- __be16 *seqid;
+ struct ptp_header *hdr;
+ u8 msgtype;
+ u16 seqid;
if (ptp_class == PTP_CLASS_NONE)
return 0;
- if (ptp_class & PTP_CLASS_VLAN)
- offset += VLAN_HLEN;
-
- switch (ptp_class & PTP_CLASS_PMASK) {
- case PTP_CLASS_IPV4:
- offset += ETH_HLEN + IPV4_HLEN(data + offset) + UDP_HLEN;
- break;
- case PTP_CLASS_IPV6:
- offset += ETH_HLEN + IP6_HLEN + UDP_HLEN;
- break;
- case PTP_CLASS_L2:
- offset += ETH_HLEN;
- break;
- default:
- return 0;
- }
-
- if (skb->len + ETH_HLEN < offset + OFF_PTP_SEQUENCE_ID + sizeof(*seqid))
+ hdr = ptp_parse_header(skb, ptp_class);
+ if (!hdr)
return 0;
- if (unlikely(ptp_class & PTP_CLASS_V1))
- msgtype = data + offset + OFF_PTP_CONTROL;
- else
- msgtype = data + offset;
+ msgtype = ptp_get_msgtype(hdr, ptp_class);
+ seqid = ntohs(hdr->sequence_id);
- seqid = (__be16 *)(data + offset + OFF_PTP_SEQUENCE_ID);
- *mtype_seqid = (*msgtype << AM65_CPTS_EVENT_1_MESSAGE_TYPE_SHIFT) &
+ *mtype_seqid = (msgtype << AM65_CPTS_EVENT_1_MESSAGE_TYPE_SHIFT) &
AM65_CPTS_EVENT_1_MESSAGE_TYPE_MASK;
- *mtype_seqid |= (ntohs(*seqid) & AM65_CPTS_EVENT_1_SEQUENCE_ID_MASK);
+ *mtype_seqid |= (seqid & AM65_CPTS_EVENT_1_SEQUENCE_ID_MASK);
return 1;
}
@@ -1005,7 +988,9 @@ struct am65_cpts *am65_cpts_create(struct device *dev, void __iomem *regs,
am65_cpts_set_add_val(cpts);
- am65_cpts_write32(cpts, AM65_CPTS_CONTROL_EN | AM65_CPTS_CONTROL_64MODE,
+ am65_cpts_write32(cpts, AM65_CPTS_CONTROL_EN |
+ AM65_CPTS_CONTROL_64MODE |
+ AM65_CPTS_CONTROL_TX_GENF_CLR_EN,
control);
am65_cpts_write32(cpts, AM65_CPTS_INT_ENABLE_TS_PEND_EN, int_enable);
diff --git a/drivers/net/ethernet/ti/cpts.c b/drivers/net/ethernet/ti/cpts.c
index 7c55d39..d1fc795 100644
--- a/drivers/net/ethernet/ti/cpts.c
+++ b/drivers/net/ethernet/ti/cpts.c
@@ -446,41 +446,22 @@ static const struct ptp_clock_info cpts_info = {
static int cpts_skb_get_mtype_seqid(struct sk_buff *skb, u32 *mtype_seqid)
{
unsigned int ptp_class = ptp_classify_raw(skb);
- u8 *msgtype, *data = skb->data;
- unsigned int offset = 0;
- u16 *seqid;
+ struct ptp_header *hdr;
+ u8 msgtype;
+ u16 seqid;
if (ptp_class == PTP_CLASS_NONE)
return 0;
- if (ptp_class & PTP_CLASS_VLAN)
- offset += VLAN_HLEN;
-
- switch (ptp_class & PTP_CLASS_PMASK) {
- case PTP_CLASS_IPV4:
- offset += ETH_HLEN + IPV4_HLEN(data + offset) + UDP_HLEN;
- break;
- case PTP_CLASS_IPV6:
- offset += ETH_HLEN + IP6_HLEN + UDP_HLEN;
- break;
- case PTP_CLASS_L2:
- offset += ETH_HLEN;
- break;
- default:
- return 0;
- }
-
- if (skb->len + ETH_HLEN < offset + OFF_PTP_SEQUENCE_ID + sizeof(*seqid))
+ hdr = ptp_parse_header(skb, ptp_class);
+ if (!hdr)
return 0;
- if (unlikely(ptp_class & PTP_CLASS_V1))
- msgtype = data + offset + OFF_PTP_CONTROL;
- else
- msgtype = data + offset;
+ msgtype = ptp_get_msgtype(hdr, ptp_class);
+ seqid = ntohs(hdr->sequence_id);
- seqid = (u16 *)(data + offset + OFF_PTP_SEQUENCE_ID);
- *mtype_seqid = (*msgtype & MESSAGE_TYPE_MASK) << MESSAGE_TYPE_SHIFT;
- *mtype_seqid |= (ntohs(*seqid) & SEQUENCE_ID_MASK) << SEQUENCE_ID_SHIFT;
+ *mtype_seqid = (msgtype & MESSAGE_TYPE_MASK) << MESSAGE_TYPE_SHIFT;
+ *mtype_seqid |= (seqid & SEQUENCE_ID_MASK) << SEQUENCE_ID_SHIFT;
return 1;
}
@@ -528,6 +509,11 @@ void cpts_rx_timestamp(struct cpts *cpts, struct sk_buff *skb)
int ret;
u64 ns;
+ /* cpts_rx_timestamp() is called before eth_type_trans(), so
+ * skb MAC Hdr properties are not configured yet. Hence need to
+ * reset skb MAC header here
+ */
+ skb_reset_mac_header(skb);
ret = cpts_skb_get_mtype_seqid(skb, &skb_cb->skb_mtype_seqid);
if (!ret)
return;
diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c
index 8e47d01..611722e 100644
--- a/drivers/net/gtp.c
+++ b/drivers/net/gtp.c
@@ -928,8 +928,8 @@ static void ipv4_pdp_fill(struct pdp_ctx *pctx, struct genl_info *info)
}
}
-static int gtp_pdp_add(struct gtp_dev *gtp, struct sock *sk,
- struct genl_info *info)
+static struct pdp_ctx *gtp_pdp_add(struct gtp_dev *gtp, struct sock *sk,
+ struct genl_info *info)
{
struct pdp_ctx *pctx, *pctx_tid = NULL;
struct net_device *dev = gtp->dev;
@@ -956,12 +956,12 @@ static int gtp_pdp_add(struct gtp_dev *gtp, struct sock *sk,
if (found) {
if (info->nlhdr->nlmsg_flags & NLM_F_EXCL)
- return -EEXIST;
+ return ERR_PTR(-EEXIST);
if (info->nlhdr->nlmsg_flags & NLM_F_REPLACE)
- return -EOPNOTSUPP;
+ return ERR_PTR(-EOPNOTSUPP);
if (pctx && pctx_tid)
- return -EEXIST;
+ return ERR_PTR(-EEXIST);
if (!pctx)
pctx = pctx_tid;
@@ -974,13 +974,13 @@ static int gtp_pdp_add(struct gtp_dev *gtp, struct sock *sk,
netdev_dbg(dev, "GTPv1-U: update tunnel id = %x/%x (pdp %p)\n",
pctx->u.v1.i_tei, pctx->u.v1.o_tei, pctx);
- return 0;
+ return pctx;
}
pctx = kmalloc(sizeof(*pctx), GFP_ATOMIC);
if (pctx == NULL)
- return -ENOMEM;
+ return ERR_PTR(-ENOMEM);
sock_hold(sk);
pctx->sk = sk;
@@ -1018,7 +1018,7 @@ static int gtp_pdp_add(struct gtp_dev *gtp, struct sock *sk,
break;
}
- return 0;
+ return pctx;
}
static void pdp_context_free(struct rcu_head *head)
@@ -1036,9 +1036,12 @@ static void pdp_context_delete(struct pdp_ctx *pctx)
call_rcu(&pctx->rcu_head, pdp_context_free);
}
+static int gtp_tunnel_notify(struct pdp_ctx *pctx, u8 cmd, gfp_t allocation);
+
static int gtp_genl_new_pdp(struct sk_buff *skb, struct genl_info *info)
{
unsigned int version;
+ struct pdp_ctx *pctx;
struct gtp_dev *gtp;
struct sock *sk;
int err;
@@ -1068,7 +1071,6 @@ static int gtp_genl_new_pdp(struct sk_buff *skb, struct genl_info *info)
}
rtnl_lock();
- rcu_read_lock();
gtp = gtp_find_dev(sock_net(skb->sk), info->attrs);
if (!gtp) {
@@ -1088,10 +1090,15 @@ static int gtp_genl_new_pdp(struct sk_buff *skb, struct genl_info *info)
goto out_unlock;
}
- err = gtp_pdp_add(gtp, sk, info);
+ pctx = gtp_pdp_add(gtp, sk, info);
+ if (IS_ERR(pctx)) {
+ err = PTR_ERR(pctx);
+ } else {
+ gtp_tunnel_notify(pctx, GTP_CMD_NEWPDP, GFP_KERNEL);
+ err = 0;
+ }
out_unlock:
- rcu_read_unlock();
rtnl_unlock();
return err;
}
@@ -1159,6 +1166,7 @@ static int gtp_genl_del_pdp(struct sk_buff *skb, struct genl_info *info)
netdev_dbg(pctx->dev, "GTPv1-U: deleting tunnel id = %x/%x (pdp %p)\n",
pctx->u.v1.i_tei, pctx->u.v1.o_tei, pctx);
+ gtp_tunnel_notify(pctx, GTP_CMD_DELPDP, GFP_ATOMIC);
pdp_context_delete(pctx);
out_unlock:
@@ -1168,6 +1176,14 @@ static int gtp_genl_del_pdp(struct sk_buff *skb, struct genl_info *info)
static struct genl_family gtp_genl_family;
+enum gtp_multicast_groups {
+ GTP_GENL_MCGRP,
+};
+
+static const struct genl_multicast_group gtp_genl_mcgrps[] = {
+ [GTP_GENL_MCGRP] = { .name = GTP_GENL_MCGRP_NAME },
+};
+
static int gtp_genl_fill_info(struct sk_buff *skb, u32 snd_portid, u32 snd_seq,
int flags, u32 type, struct pdp_ctx *pctx)
{
@@ -1205,6 +1221,26 @@ static int gtp_genl_fill_info(struct sk_buff *skb, u32 snd_portid, u32 snd_seq,
return -EMSGSIZE;
}
+static int gtp_tunnel_notify(struct pdp_ctx *pctx, u8 cmd, gfp_t allocation)
+{
+ struct sk_buff *msg;
+ int ret;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, allocation);
+ if (!msg)
+ return -ENOMEM;
+
+ ret = gtp_genl_fill_info(msg, 0, 0, 0, cmd, pctx);
+ if (ret < 0) {
+ nlmsg_free(msg);
+ return ret;
+ }
+
+ ret = genlmsg_multicast_netns(>p_genl_family, dev_net(pctx->dev), msg,
+ 0, GTP_GENL_MCGRP, GFP_ATOMIC);
+ return ret;
+}
+
static int gtp_genl_get_pdp(struct sk_buff *skb, struct genl_info *info)
{
struct pdp_ctx *pctx = NULL;
@@ -1335,6 +1371,8 @@ static struct genl_family gtp_genl_family __ro_after_init = {
.module = THIS_MODULE,
.ops = gtp_genl_ops,
.n_ops = ARRAY_SIZE(gtp_genl_ops),
+ .mcgrps = gtp_genl_mcgrps,
+ .n_mcgrps = ARRAY_SIZE(gtp_genl_mcgrps),
};
static int __net_init gtp_net_init(struct net *net)
diff --git a/drivers/net/ipa/ipa.h b/drivers/net/ipa/ipa.h
index 55115cf..407fee8 100644
--- a/drivers/net/ipa/ipa.h
+++ b/drivers/net/ipa/ipa.h
@@ -10,7 +10,6 @@
#include <linux/device.h>
#include <linux/notifier.h>
#include <linux/pm_wakeup.h>
-#include <linux/notifier.h>
#include "ipa_version.h"
#include "gsi.h"
diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c
index 5bca94c..60b7d93 100644
--- a/drivers/net/ipvlan/ipvlan_main.c
+++ b/drivers/net/ipvlan/ipvlan_main.c
@@ -684,6 +684,13 @@ static const struct nla_policy ipvlan_nl_policy[IFLA_IPVLAN_MAX + 1] =
[IFLA_IPVLAN_FLAGS] = { .type = NLA_U16 },
};
+static struct net *ipvlan_get_link_net(const struct net_device *dev)
+{
+ struct ipvl_dev *ipvlan = netdev_priv(dev);
+
+ return dev_net(ipvlan->phy_dev);
+}
+
static struct rtnl_link_ops ipvlan_link_ops = {
.kind = "ipvlan",
.priv_size = sizeof(struct ipvl_dev),
@@ -691,6 +698,7 @@ static struct rtnl_link_ops ipvlan_link_ops = {
.setup = ipvlan_link_setup,
.newlink = ipvlan_link_new,
.dellink = ipvlan_link_delete,
+ .get_link_net = ipvlan_get_link_net,
};
int ipvlan_link_register(struct rtnl_link_ops *ops)
diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c
index 9159846..124045c 100644
--- a/drivers/net/macsec.c
+++ b/drivers/net/macsec.c
@@ -1611,7 +1611,7 @@ static const struct nla_policy macsec_genl_rxsc_policy[NUM_MACSEC_RXSC_ATTR] = {
static const struct nla_policy macsec_genl_sa_policy[NUM_MACSEC_SA_ATTR] = {
[MACSEC_SA_ATTR_AN] = { .type = NLA_U8 },
[MACSEC_SA_ATTR_ACTIVE] = { .type = NLA_U8 },
- [MACSEC_SA_ATTR_PN] = { .type = NLA_MIN_LEN, .len = 4 },
+ [MACSEC_SA_ATTR_PN] = NLA_POLICY_MIN_LEN(4),
[MACSEC_SA_ATTR_KEYID] = { .type = NLA_BINARY,
.len = MACSEC_KEYID_LEN, },
[MACSEC_SA_ATTR_KEY] = { .type = NLA_BINARY,
diff --git a/drivers/net/mdio/Kconfig b/drivers/net/mdio/Kconfig
new file mode 100644
index 0000000..1299880
--- /dev/null
+++ b/drivers/net/mdio/Kconfig
@@ -0,0 +1,241 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# MDIO Layer Configuration
+#
+
+menuconfig MDIO_DEVICE
+ tristate "MDIO bus device drivers"
+ help
+ MDIO devices and driver infrastructure code.
+
+if MDIO_DEVICE
+
+config MDIO_BUS
+ tristate
+ default m if PHYLIB=m
+ default MDIO_DEVICE
+ help
+ This internal symbol is used for link time dependencies and it
+ reflects whether the mdio_bus/mdio_device code is built as a
+ loadable module or built-in.
+
+if MDIO_BUS
+
+config MDIO_DEVRES
+ tristate
+
+config MDIO_SUN4I
+ tristate "Allwinner sun4i MDIO interface support"
+ depends on ARCH_SUNXI || COMPILE_TEST
+ help
+ This driver supports the MDIO interface found in the network
+ interface units of the Allwinner SoC that have an EMAC (A10,
+ A12, A10s, etc.)
+
+config MDIO_XGENE
+ tristate "APM X-Gene SoC MDIO bus controller"
+ depends on ARCH_XGENE || COMPILE_TEST
+ help
+ This module provides a driver for the MDIO busses found in the
+ APM X-Gene SoC's.
+
+config MDIO_ASPEED
+ tristate "ASPEED MDIO bus controller"
+ depends on ARCH_ASPEED || COMPILE_TEST
+ depends on OF_MDIO && HAS_IOMEM
+ help
+ This module provides a driver for the independent MDIO bus
+ controllers found in the ASPEED AST2600 SoC. This is a driver for the
+ third revision of the ASPEED MDIO register interface - the first two
+ revisions are the "old" and "new" interfaces found in the AST2400 and
+ AST2500, embedded in the MAC. For legacy reasons, FTGMAC100 driver
+ continues to drive the embedded MDIO controller for the AST2400 and
+ AST2500 SoCs, so say N if AST2600 support is not required.
+
+config MDIO_BITBANG
+ tristate "Bitbanged MDIO buses"
+ help
+ This module implements the MDIO bus protocol in software,
+ for use by low level drivers that export the ability to
+ drive the relevant pins.
+
+ If in doubt, say N.
+
+config MDIO_BCM_IPROC
+ tristate "Broadcom iProc MDIO bus controller"
+ depends on ARCH_BCM_IPROC || COMPILE_TEST
+ depends on HAS_IOMEM && OF_MDIO
+ default ARCH_BCM_IPROC
+ help
+ This module provides a driver for the MDIO busses found in the
+ Broadcom iProc SoC's.
+
+config MDIO_BCM_UNIMAC
+ tristate "Broadcom UniMAC MDIO bus controller"
+ depends on HAS_IOMEM
+ help
+ This module provides a driver for the Broadcom UniMAC MDIO busses.
+ This hardware can be found in the Broadcom GENET Ethernet MAC
+ controllers as well as some Broadcom Ethernet switches such as the
+ Starfighter 2 switches.
+
+config MDIO_CAVIUM
+ tristate
+
+config MDIO_GPIO
+ tristate "GPIO lib-based bitbanged MDIO buses"
+ depends on MDIO_BITBANG
+ depends on GPIOLIB || COMPILE_TEST
+ help
+ Supports GPIO lib-based MDIO busses.
+
+ To compile this driver as a module, choose M here: the module
+ will be called mdio-gpio.
+
+config MDIO_HISI_FEMAC
+ tristate "Hisilicon FEMAC MDIO bus controller"
+ depends on HAS_IOMEM && OF_MDIO
+ help
+ This module provides a driver for the MDIO busses found in the
+ Hisilicon SoC that have an Fast Ethernet MAC.
+
+config MDIO_I2C
+ tristate
+ depends on I2C
+ help
+ Support I2C based PHYs. This provides a MDIO bus bridged
+ to I2C to allow PHYs connected in I2C mode to be accessed
+ using the existing infrastructure.
+
+ This is library mode.
+
+config MDIO_MVUSB
+ tristate "Marvell USB to MDIO Adapter"
+ depends on USB
+ select MDIO_DEVRES
+ help
+ A USB to MDIO converter present on development boards for
+ Marvell's Link Street family of Ethernet switches.
+
+config MDIO_MSCC_MIIM
+ tristate "Microsemi MIIM interface support"
+ depends on HAS_IOMEM
+ select MDIO_DEVRES
+ help
+ This driver supports the MIIM (MDIO) interface found in the network
+ switches of the Microsemi SoCs; it is recommended to switch on
+ CONFIG_HIGH_RES_TIMERS
+
+config MDIO_MOXART
+ tristate "MOXA ART MDIO interface support"
+ depends on ARCH_MOXART || COMPILE_TEST
+ help
+ This driver supports the MDIO interface found in the network
+ interface units of the MOXA ART SoC
+
+config MDIO_OCTEON
+ tristate "Octeon and some ThunderX SOCs MDIO buses"
+ depends on (64BIT && OF_MDIO) || COMPILE_TEST
+ depends on HAS_IOMEM
+ select MDIO_CAVIUM
+ help
+ This module provides a driver for the Octeon and ThunderX MDIO
+ buses. It is required by the Octeon and ThunderX ethernet device
+ drivers on some systems.
+
+config MDIO_IPQ4019
+ tristate "Qualcomm IPQ4019 MDIO interface support"
+ depends on HAS_IOMEM && OF_MDIO
+ help
+ This driver supports the MDIO interface found in Qualcomm
+ IPQ40xx series Soc-s.
+
+config MDIO_IPQ8064
+ tristate "Qualcomm IPQ8064 MDIO interface support"
+ depends on HAS_IOMEM && OF_MDIO
+ depends on MFD_SYSCON
+ help
+ This driver supports the MDIO interface found in the network
+ interface units of the IPQ8064 SoC
+
+config MDIO_THUNDER
+ tristate "ThunderX SOCs MDIO buses"
+ depends on 64BIT
+ depends on PCI
+ select MDIO_CAVIUM
+ help
+ This driver supports the MDIO interfaces found on Cavium
+ ThunderX SoCs when the MDIO bus device appears as a PCI
+ device.
+
+comment "MDIO Multiplexers"
+
+config MDIO_BUS_MUX
+ tristate
+ depends on OF_MDIO
+ help
+ This module provides a driver framework for MDIO bus
+ multiplexers which connect one of several child MDIO busses
+ to a parent bus. Switching between child busses is done by
+ device specific drivers.
+
+config MDIO_BUS_MUX_MESON_G12A
+ tristate "Amlogic G12a based MDIO bus multiplexer"
+ depends on ARCH_MESON || COMPILE_TEST
+ depends on OF_MDIO && HAS_IOMEM && COMMON_CLK
+ select MDIO_BUS_MUX
+ default m if ARCH_MESON
+ help
+ This module provides a driver for the MDIO multiplexer/glue of
+ the amlogic g12a SoC. The multiplexers connects either the external
+ or the internal MDIO bus to the parent bus.
+
+config MDIO_BUS_MUX_BCM_IPROC
+ tristate "Broadcom iProc based MDIO bus multiplexers"
+ depends on OF && OF_MDIO && (ARCH_BCM_IPROC || COMPILE_TEST)
+ select MDIO_BUS_MUX
+ default ARCH_BCM_IPROC
+ help
+ This module provides a driver for MDIO bus multiplexers found in
+ iProc based Broadcom SoCs. This multiplexer connects one of several
+ child MDIO bus to a parent bus. Buses could be internal as well as
+ external and selection logic lies inside the same multiplexer.
+
+config MDIO_BUS_MUX_GPIO
+ tristate "GPIO controlled MDIO bus multiplexers"
+ depends on OF_GPIO && OF_MDIO
+ select MDIO_BUS_MUX
+ help
+ This module provides a driver for MDIO bus multiplexers that
+ are controlled via GPIO lines. The multiplexer connects one of
+ several child MDIO busses to a parent bus. Child bus
+ selection is under the control of GPIO lines.
+
+config MDIO_BUS_MUX_MULTIPLEXER
+ tristate "MDIO bus multiplexer using kernel multiplexer subsystem"
+ depends on OF_MDIO
+ select MULTIPLEXER
+ select MDIO_BUS_MUX
+ help
+ This module provides a driver for MDIO bus multiplexer
+ that is controlled via the kernel multiplexer subsystem. The
+ bus multiplexer connects one of several child MDIO busses to
+ a parent bus. Child bus selection is under the control of
+ the kernel multiplexer subsystem.
+
+config MDIO_BUS_MUX_MMIOREG
+ tristate "MMIO device-controlled MDIO bus multiplexers"
+ depends on OF_MDIO && HAS_IOMEM
+ select MDIO_BUS_MUX
+ help
+ This module provides a driver for MDIO bus multiplexers that
+ are controlled via a simple memory-mapped device, like an FPGA.
+ The multiplexer connects one of several child MDIO busses to a
+ parent bus. Child bus selection is under the control of one of
+ the FPGA's registers.
+
+ Currently, only 8/16/32 bits registers are supported.
+
+
+endif
+endif
diff --git a/drivers/net/mdio/Makefile b/drivers/net/mdio/Makefile
new file mode 100644
index 0000000..14d1beb
--- /dev/null
+++ b/drivers/net/mdio/Makefile
@@ -0,0 +1,27 @@
+# SPDX-License-Identifier: GPL-2.0
+# Makefile for Linux MDIO bus drivers
+
+obj-$(CONFIG_MDIO_ASPEED) += mdio-aspeed.o
+obj-$(CONFIG_MDIO_BCM_IPROC) += mdio-bcm-iproc.o
+obj-$(CONFIG_MDIO_BCM_UNIMAC) += mdio-bcm-unimac.o
+obj-$(CONFIG_MDIO_BITBANG) += mdio-bitbang.o
+obj-$(CONFIG_MDIO_CAVIUM) += mdio-cavium.o
+obj-$(CONFIG_MDIO_GPIO) += mdio-gpio.o
+obj-$(CONFIG_MDIO_HISI_FEMAC) += mdio-hisi-femac.o
+obj-$(CONFIG_MDIO_I2C) += mdio-i2c.o
+obj-$(CONFIG_MDIO_IPQ4019) += mdio-ipq4019.o
+obj-$(CONFIG_MDIO_IPQ8064) += mdio-ipq8064.o
+obj-$(CONFIG_MDIO_MOXART) += mdio-moxart.o
+obj-$(CONFIG_MDIO_MSCC_MIIM) += mdio-mscc-miim.o
+obj-$(CONFIG_MDIO_MVUSB) += mdio-mvusb.o
+obj-$(CONFIG_MDIO_OCTEON) += mdio-octeon.o
+obj-$(CONFIG_MDIO_SUN4I) += mdio-sun4i.o
+obj-$(CONFIG_MDIO_THUNDER) += mdio-thunder.o
+obj-$(CONFIG_MDIO_XGENE) += mdio-xgene.o
+
+obj-$(CONFIG_MDIO_BUS_MUX) += mdio-mux.o
+obj-$(CONFIG_MDIO_BUS_MUX_BCM_IPROC) += mdio-mux-bcm-iproc.o
+obj-$(CONFIG_MDIO_BUS_MUX_GPIO) += mdio-mux-gpio.o
+obj-$(CONFIG_MDIO_BUS_MUX_MESON_G12A) += mdio-mux-meson-g12a.o
+obj-$(CONFIG_MDIO_BUS_MUX_MMIOREG) += mdio-mux-mmioreg.o
+obj-$(CONFIG_MDIO_BUS_MUX_MULTIPLEXER) += mdio-mux-multiplexer.o
diff --git a/drivers/net/phy/mdio-aspeed.c b/drivers/net/mdio/mdio-aspeed.c
similarity index 100%
rename from drivers/net/phy/mdio-aspeed.c
rename to drivers/net/mdio/mdio-aspeed.c
diff --git a/drivers/net/phy/mdio-bcm-iproc.c b/drivers/net/mdio/mdio-bcm-iproc.c
similarity index 100%
rename from drivers/net/phy/mdio-bcm-iproc.c
rename to drivers/net/mdio/mdio-bcm-iproc.c
diff --git a/drivers/net/phy/mdio-bcm-unimac.c b/drivers/net/mdio/mdio-bcm-unimac.c
similarity index 100%
rename from drivers/net/phy/mdio-bcm-unimac.c
rename to drivers/net/mdio/mdio-bcm-unimac.c
diff --git a/drivers/net/phy/mdio-bitbang.c b/drivers/net/mdio/mdio-bitbang.c
similarity index 100%
rename from drivers/net/phy/mdio-bitbang.c
rename to drivers/net/mdio/mdio-bitbang.c
diff --git a/drivers/net/phy/mdio-cavium.c b/drivers/net/mdio/mdio-cavium.c
similarity index 100%
rename from drivers/net/phy/mdio-cavium.c
rename to drivers/net/mdio/mdio-cavium.c
diff --git a/drivers/net/phy/mdio-cavium.h b/drivers/net/mdio/mdio-cavium.h
similarity index 100%
rename from drivers/net/phy/mdio-cavium.h
rename to drivers/net/mdio/mdio-cavium.h
diff --git a/drivers/net/phy/mdio-gpio.c b/drivers/net/mdio/mdio-gpio.c
similarity index 100%
rename from drivers/net/phy/mdio-gpio.c
rename to drivers/net/mdio/mdio-gpio.c
diff --git a/drivers/net/phy/mdio-hisi-femac.c b/drivers/net/mdio/mdio-hisi-femac.c
similarity index 100%
rename from drivers/net/phy/mdio-hisi-femac.c
rename to drivers/net/mdio/mdio-hisi-femac.c
diff --git a/drivers/net/phy/mdio-i2c.c b/drivers/net/mdio/mdio-i2c.c
similarity index 98%
rename from drivers/net/phy/mdio-i2c.c
rename to drivers/net/mdio/mdio-i2c.c
index 0746e2c..09200a7 100644
--- a/drivers/net/phy/mdio-i2c.c
+++ b/drivers/net/mdio/mdio-i2c.c
@@ -10,10 +10,9 @@
* of their settings.
*/
#include <linux/i2c.h>
+#include <linux/mdio/mdio-i2c.h>
#include <linux/phy.h>
-#include "mdio-i2c.h"
-
/*
* I2C bus addresses 0x50 and 0x51 are normally an EEPROM, which is
* specified to be present in SFP modules. These correspond with PHY
diff --git a/drivers/net/phy/mdio-ipq4019.c b/drivers/net/mdio/mdio-ipq4019.c
similarity index 100%
rename from drivers/net/phy/mdio-ipq4019.c
rename to drivers/net/mdio/mdio-ipq4019.c
diff --git a/drivers/net/phy/mdio-ipq8064.c b/drivers/net/mdio/mdio-ipq8064.c
similarity index 100%
rename from drivers/net/phy/mdio-ipq8064.c
rename to drivers/net/mdio/mdio-ipq8064.c
diff --git a/drivers/net/phy/mdio-moxart.c b/drivers/net/mdio/mdio-moxart.c
similarity index 100%
rename from drivers/net/phy/mdio-moxart.c
rename to drivers/net/mdio/mdio-moxart.c
diff --git a/drivers/net/phy/mdio-mscc-miim.c b/drivers/net/mdio/mdio-mscc-miim.c
similarity index 100%
rename from drivers/net/phy/mdio-mscc-miim.c
rename to drivers/net/mdio/mdio-mscc-miim.c
diff --git a/drivers/net/phy/mdio-mux-bcm-iproc.c b/drivers/net/mdio/mdio-mux-bcm-iproc.c
similarity index 100%
rename from drivers/net/phy/mdio-mux-bcm-iproc.c
rename to drivers/net/mdio/mdio-mux-bcm-iproc.c
diff --git a/drivers/net/phy/mdio-mux-gpio.c b/drivers/net/mdio/mdio-mux-gpio.c
similarity index 100%
rename from drivers/net/phy/mdio-mux-gpio.c
rename to drivers/net/mdio/mdio-mux-gpio.c
diff --git a/drivers/net/phy/mdio-mux-meson-g12a.c b/drivers/net/mdio/mdio-mux-meson-g12a.c
similarity index 100%
rename from drivers/net/phy/mdio-mux-meson-g12a.c
rename to drivers/net/mdio/mdio-mux-meson-g12a.c
diff --git a/drivers/net/phy/mdio-mux-mmioreg.c b/drivers/net/mdio/mdio-mux-mmioreg.c
similarity index 100%
rename from drivers/net/phy/mdio-mux-mmioreg.c
rename to drivers/net/mdio/mdio-mux-mmioreg.c
diff --git a/drivers/net/phy/mdio-mux-multiplexer.c b/drivers/net/mdio/mdio-mux-multiplexer.c
similarity index 100%
rename from drivers/net/phy/mdio-mux-multiplexer.c
rename to drivers/net/mdio/mdio-mux-multiplexer.c
diff --git a/drivers/net/phy/mdio-mux.c b/drivers/net/mdio/mdio-mux.c
similarity index 100%
rename from drivers/net/phy/mdio-mux.c
rename to drivers/net/mdio/mdio-mux.c
diff --git a/drivers/net/phy/mdio-mvusb.c b/drivers/net/mdio/mdio-mvusb.c
similarity index 100%
rename from drivers/net/phy/mdio-mvusb.c
rename to drivers/net/mdio/mdio-mvusb.c
diff --git a/drivers/net/phy/mdio-octeon.c b/drivers/net/mdio/mdio-octeon.c
similarity index 100%
rename from drivers/net/phy/mdio-octeon.c
rename to drivers/net/mdio/mdio-octeon.c
diff --git a/drivers/net/phy/mdio-sun4i.c b/drivers/net/mdio/mdio-sun4i.c
similarity index 100%
rename from drivers/net/phy/mdio-sun4i.c
rename to drivers/net/mdio/mdio-sun4i.c
diff --git a/drivers/net/phy/mdio-thunder.c b/drivers/net/mdio/mdio-thunder.c
similarity index 100%
rename from drivers/net/phy/mdio-thunder.c
rename to drivers/net/mdio/mdio-thunder.c
diff --git a/drivers/net/phy/mdio-xgene.c b/drivers/net/mdio/mdio-xgene.c
similarity index 99%
rename from drivers/net/phy/mdio-xgene.c
rename to drivers/net/mdio/mdio-xgene.c
index 34990ea..461207c 100644
--- a/drivers/net/phy/mdio-xgene.c
+++ b/drivers/net/mdio/mdio-xgene.c
@@ -11,6 +11,7 @@
#include <linux/efi.h>
#include <linux/if_vlan.h>
#include <linux/io.h>
+#include <linux/mdio/mdio-xgene.h>
#include <linux/module.h>
#include <linux/of_platform.h>
#include <linux/of_net.h>
@@ -18,7 +19,6 @@
#include <linux/prefetch.h>
#include <linux/phy.h>
#include <net/ip.h>
-#include "mdio-xgene.h"
static bool xgene_mdio_status;
diff --git a/drivers/net/pcs/Kconfig b/drivers/net/pcs/Kconfig
new file mode 100644
index 0000000..074fb3f
--- /dev/null
+++ b/drivers/net/pcs/Kconfig
@@ -0,0 +1,22 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# PCS Layer Configuration
+#
+
+menu "PCS device drivers"
+
+config PCS_XPCS
+ tristate "Synopsys DesignWare XPCS controller"
+ select MDIO_BUS
+ depends on MDIO_DEVICE
+ help
+ This module provides helper functions for Synopsys DesignWare XPCS
+ controllers.
+
+config PCS_LYNX
+ tristate
+ help
+ This module provides helpers to phylink for managing the Lynx PCS
+ which is part of the Layerscape and QorIQ Ethernet SERDES.
+
+endmenu
diff --git a/drivers/net/pcs/Makefile b/drivers/net/pcs/Makefile
new file mode 100644
index 0000000..c231467
--- /dev/null
+++ b/drivers/net/pcs/Makefile
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0
+# Makefile for Linux PCS drivers
+
+obj-$(CONFIG_PCS_XPCS) += pcs-xpcs.o
+obj-$(CONFIG_PCS_LYNX) += pcs-lynx.o
diff --git a/drivers/net/pcs/pcs-lynx.c b/drivers/net/pcs/pcs-lynx.c
new file mode 100644
index 0000000..c43d976
--- /dev/null
+++ b/drivers/net/pcs/pcs-lynx.c
@@ -0,0 +1,312 @@
+// SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause)
+/* Copyright 2020 NXP
+ * Lynx PCS MDIO helpers
+ */
+
+#include <linux/mdio.h>
+#include <linux/phylink.h>
+#include <linux/pcs-lynx.h>
+
+#define SGMII_CLOCK_PERIOD_NS 8 /* PCS is clocked at 125 MHz */
+#define LINK_TIMER_VAL(ns) ((u32)((ns) / SGMII_CLOCK_PERIOD_NS))
+
+#define SGMII_AN_LINK_TIMER_NS 1600000 /* defined by SGMII spec */
+
+#define LINK_TIMER_LO 0x12
+#define LINK_TIMER_HI 0x13
+#define IF_MODE 0x14
+#define IF_MODE_SGMII_EN BIT(0)
+#define IF_MODE_USE_SGMII_AN BIT(1)
+#define IF_MODE_SPEED(x) (((x) << 2) & GENMASK(3, 2))
+#define IF_MODE_SPEED_MSK GENMASK(3, 2)
+#define IF_MODE_HALF_DUPLEX BIT(4)
+
+enum sgmii_speed {
+ SGMII_SPEED_10 = 0,
+ SGMII_SPEED_100 = 1,
+ SGMII_SPEED_1000 = 2,
+ SGMII_SPEED_2500 = 2,
+};
+
+#define phylink_pcs_to_lynx(pl_pcs) container_of((pl_pcs), struct lynx_pcs, pcs)
+
+static void lynx_pcs_get_state_usxgmii(struct mdio_device *pcs,
+ struct phylink_link_state *state)
+{
+ struct mii_bus *bus = pcs->bus;
+ int addr = pcs->addr;
+ int status, lpa;
+
+ status = mdiobus_c45_read(bus, addr, MDIO_MMD_VEND2, MII_BMSR);
+ if (status < 0)
+ return;
+
+ state->link = !!(status & MDIO_STAT1_LSTATUS);
+ state->an_complete = !!(status & MDIO_AN_STAT1_COMPLETE);
+ if (!state->link || !state->an_complete)
+ return;
+
+ lpa = mdiobus_c45_read(bus, addr, MDIO_MMD_VEND2, MII_LPA);
+ if (lpa < 0)
+ return;
+
+ phylink_decode_usxgmii_word(state, lpa);
+}
+
+static void lynx_pcs_get_state_2500basex(struct mdio_device *pcs,
+ struct phylink_link_state *state)
+{
+ struct mii_bus *bus = pcs->bus;
+ int addr = pcs->addr;
+ int bmsr, lpa;
+
+ bmsr = mdiobus_read(bus, addr, MII_BMSR);
+ lpa = mdiobus_read(bus, addr, MII_LPA);
+ if (bmsr < 0 || lpa < 0) {
+ state->link = false;
+ return;
+ }
+
+ state->link = !!(bmsr & BMSR_LSTATUS);
+ state->an_complete = !!(bmsr & BMSR_ANEGCOMPLETE);
+ if (!state->link)
+ return;
+
+ state->speed = SPEED_2500;
+ state->pause |= MLO_PAUSE_TX | MLO_PAUSE_RX;
+ state->duplex = DUPLEX_FULL;
+}
+
+static void lynx_pcs_get_state(struct phylink_pcs *pcs,
+ struct phylink_link_state *state)
+{
+ struct lynx_pcs *lynx = phylink_pcs_to_lynx(pcs);
+
+ switch (state->interface) {
+ case PHY_INTERFACE_MODE_SGMII:
+ case PHY_INTERFACE_MODE_QSGMII:
+ phylink_mii_c22_pcs_get_state(lynx->mdio, state);
+ break;
+ case PHY_INTERFACE_MODE_2500BASEX:
+ lynx_pcs_get_state_2500basex(lynx->mdio, state);
+ break;
+ case PHY_INTERFACE_MODE_USXGMII:
+ lynx_pcs_get_state_usxgmii(lynx->mdio, state);
+ break;
+ default:
+ break;
+ }
+
+ dev_dbg(&lynx->mdio->dev,
+ "mode=%s/%s/%s link=%u an_enabled=%u an_complete=%u\n",
+ phy_modes(state->interface),
+ phy_speed_to_str(state->speed),
+ phy_duplex_to_str(state->duplex),
+ state->link, state->an_enabled, state->an_complete);
+}
+
+static int lynx_pcs_config_sgmii(struct mdio_device *pcs, unsigned int mode,
+ const unsigned long *advertising)
+{
+ struct mii_bus *bus = pcs->bus;
+ int addr = pcs->addr;
+ u16 if_mode;
+ int err;
+
+ if_mode = IF_MODE_SGMII_EN;
+ if (mode == MLO_AN_INBAND) {
+ u32 link_timer;
+
+ if_mode |= IF_MODE_USE_SGMII_AN;
+
+ /* Adjust link timer for SGMII */
+ link_timer = LINK_TIMER_VAL(SGMII_AN_LINK_TIMER_NS);
+ mdiobus_write(bus, addr, LINK_TIMER_LO, link_timer & 0xffff);
+ mdiobus_write(bus, addr, LINK_TIMER_HI, link_timer >> 16);
+ }
+ err = mdiobus_modify(bus, addr, IF_MODE,
+ IF_MODE_SGMII_EN | IF_MODE_USE_SGMII_AN,
+ if_mode);
+ if (err)
+ return err;
+
+ return phylink_mii_c22_pcs_config(pcs, mode, PHY_INTERFACE_MODE_SGMII,
+ advertising);
+}
+
+static int lynx_pcs_config_usxgmii(struct mdio_device *pcs, unsigned int mode,
+ const unsigned long *advertising)
+{
+ struct mii_bus *bus = pcs->bus;
+ int addr = pcs->addr;
+
+ if (!phylink_autoneg_inband(mode)) {
+ dev_err(&pcs->dev, "USXGMII only supports in-band AN for now\n");
+ return -EOPNOTSUPP;
+ }
+
+ /* Configure device ability for the USXGMII Replicator */
+ return mdiobus_c45_write(bus, addr, MDIO_MMD_VEND2, MII_ADVERTISE,
+ MDIO_USXGMII_10G | MDIO_USXGMII_LINK |
+ MDIO_USXGMII_FULL_DUPLEX |
+ ADVERTISE_SGMII | ADVERTISE_LPACK);
+}
+
+static int lynx_pcs_config(struct phylink_pcs *pcs, unsigned int mode,
+ phy_interface_t ifmode,
+ const unsigned long *advertising,
+ bool permit)
+{
+ struct lynx_pcs *lynx = phylink_pcs_to_lynx(pcs);
+
+ switch (ifmode) {
+ case PHY_INTERFACE_MODE_SGMII:
+ case PHY_INTERFACE_MODE_QSGMII:
+ return lynx_pcs_config_sgmii(lynx->mdio, mode, advertising);
+ case PHY_INTERFACE_MODE_2500BASEX:
+ if (phylink_autoneg_inband(mode)) {
+ dev_err(&lynx->mdio->dev,
+ "AN not supported on 3.125GHz SerDes lane\n");
+ return -EOPNOTSUPP;
+ }
+ break;
+ case PHY_INTERFACE_MODE_USXGMII:
+ return lynx_pcs_config_usxgmii(lynx->mdio, mode, advertising);
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+static void lynx_pcs_link_up_sgmii(struct mdio_device *pcs, unsigned int mode,
+ int speed, int duplex)
+{
+ struct mii_bus *bus = pcs->bus;
+ u16 if_mode = 0, sgmii_speed;
+ int addr = pcs->addr;
+
+ /* The PCS needs to be configured manually only
+ * when not operating on in-band mode
+ */
+ if (mode == MLO_AN_INBAND)
+ return;
+
+ if (duplex == DUPLEX_HALF)
+ if_mode |= IF_MODE_HALF_DUPLEX;
+
+ switch (speed) {
+ case SPEED_1000:
+ sgmii_speed = SGMII_SPEED_1000;
+ break;
+ case SPEED_100:
+ sgmii_speed = SGMII_SPEED_100;
+ break;
+ case SPEED_10:
+ sgmii_speed = SGMII_SPEED_10;
+ break;
+ case SPEED_UNKNOWN:
+ /* Silently don't do anything */
+ return;
+ default:
+ dev_err(&pcs->dev, "Invalid PCS speed %d\n", speed);
+ return;
+ }
+ if_mode |= IF_MODE_SPEED(sgmii_speed);
+
+ mdiobus_modify(bus, addr, IF_MODE,
+ IF_MODE_HALF_DUPLEX | IF_MODE_SPEED_MSK,
+ if_mode);
+}
+
+/* 2500Base-X is SerDes protocol 7 on Felix and 6 on ENETC. It is a SerDes lane
+ * clocked at 3.125 GHz which encodes symbols with 8b/10b and does not have
+ * auto-negotiation of any link parameters. Electrically it is compatible with
+ * a single lane of XAUI.
+ * The hardware reference manual wants to call this mode SGMII, but it isn't
+ * really, since the fundamental features of SGMII:
+ * - Downgrading the link speed by duplicating symbols
+ * - Auto-negotiation
+ * are not there.
+ * The speed is configured at 1000 in the IF_MODE because the clock frequency
+ * is actually given by a PLL configured in the Reset Configuration Word (RCW).
+ * Since there is no difference between fixed speed SGMII w/o AN and 802.3z w/o
+ * AN, we call this PHY interface type 2500Base-X. In case a PHY negotiates a
+ * lower link speed on line side, the system-side interface remains fixed at
+ * 2500 Mbps and we do rate adaptation through pause frames.
+ */
+static void lynx_pcs_link_up_2500basex(struct mdio_device *pcs,
+ unsigned int mode,
+ int speed, int duplex)
+{
+ struct mii_bus *bus = pcs->bus;
+ int addr = pcs->addr;
+ u16 if_mode = 0;
+
+ if (mode == MLO_AN_INBAND) {
+ dev_err(&pcs->dev, "AN not supported for 2500BaseX\n");
+ return;
+ }
+
+ if (duplex == DUPLEX_HALF)
+ if_mode |= IF_MODE_HALF_DUPLEX;
+ if_mode |= IF_MODE_SPEED(SGMII_SPEED_2500);
+
+ mdiobus_modify(bus, addr, IF_MODE,
+ IF_MODE_HALF_DUPLEX | IF_MODE_SPEED_MSK,
+ if_mode);
+}
+
+static void lynx_pcs_link_up(struct phylink_pcs *pcs, unsigned int mode,
+ phy_interface_t interface,
+ int speed, int duplex)
+{
+ struct lynx_pcs *lynx = phylink_pcs_to_lynx(pcs);
+
+ switch (interface) {
+ case PHY_INTERFACE_MODE_SGMII:
+ case PHY_INTERFACE_MODE_QSGMII:
+ lynx_pcs_link_up_sgmii(lynx->mdio, mode, speed, duplex);
+ break;
+ case PHY_INTERFACE_MODE_2500BASEX:
+ lynx_pcs_link_up_2500basex(lynx->mdio, mode, speed, duplex);
+ break;
+ case PHY_INTERFACE_MODE_USXGMII:
+ /* At the moment, only in-band AN is supported for USXGMII
+ * so nothing to do in link_up
+ */
+ break;
+ default:
+ break;
+ }
+}
+
+static const struct phylink_pcs_ops lynx_pcs_phylink_ops = {
+ .pcs_get_state = lynx_pcs_get_state,
+ .pcs_config = lynx_pcs_config,
+ .pcs_link_up = lynx_pcs_link_up,
+};
+
+struct lynx_pcs *lynx_pcs_create(struct mdio_device *mdio)
+{
+ struct lynx_pcs *lynx_pcs;
+
+ lynx_pcs = kzalloc(sizeof(*lynx_pcs), GFP_KERNEL);
+ if (!lynx_pcs)
+ return NULL;
+
+ lynx_pcs->mdio = mdio;
+ lynx_pcs->pcs.ops = &lynx_pcs_phylink_ops;
+ lynx_pcs->pcs.poll = true;
+
+ return lynx_pcs;
+}
+EXPORT_SYMBOL(lynx_pcs_create);
+
+void lynx_pcs_destroy(struct lynx_pcs *pcs)
+{
+ kfree(pcs);
+}
+EXPORT_SYMBOL(lynx_pcs_destroy);
+
+MODULE_LICENSE("Dual BSD/GPL");
diff --git a/drivers/net/phy/mdio-xpcs.c b/drivers/net/pcs/pcs-xpcs.c
similarity index 99%
rename from drivers/net/phy/mdio-xpcs.c
rename to drivers/net/pcs/pcs-xpcs.c
index 0d66a8b..1aa9903 100644
--- a/drivers/net/phy/mdio-xpcs.c
+++ b/drivers/net/pcs/pcs-xpcs.c
@@ -7,8 +7,8 @@
*/
#include <linux/delay.h>
+#include <linux/pcs/pcs-xpcs.h>
#include <linux/mdio.h>
-#include <linux/mdio-xpcs.h>
#include <linux/phylink.h>
#include <linux/workqueue.h>
diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig
index 726e4b2..698bea3 100644
--- a/drivers/net/phy/Kconfig
+++ b/drivers/net/phy/Kconfig
@@ -3,246 +3,6 @@
# PHY Layer Configuration
#
-menuconfig MDIO_DEVICE
- tristate "MDIO bus device drivers"
- help
- MDIO devices and driver infrastructure code.
-
-if MDIO_DEVICE
-
-config MDIO_BUS
- tristate
- default m if PHYLIB=m
- default MDIO_DEVICE
- help
- This internal symbol is used for link time dependencies and it
- reflects whether the mdio_bus/mdio_device code is built as a
- loadable module or built-in.
-
-if MDIO_BUS
-
-config MDIO_DEVRES
- tristate
-
-config MDIO_ASPEED
- tristate "ASPEED MDIO bus controller"
- depends on ARCH_ASPEED || COMPILE_TEST
- depends on OF_MDIO && HAS_IOMEM
- help
- This module provides a driver for the independent MDIO bus
- controllers found in the ASPEED AST2600 SoC. This is a driver for the
- third revision of the ASPEED MDIO register interface - the first two
- revisions are the "old" and "new" interfaces found in the AST2400 and
- AST2500, embedded in the MAC. For legacy reasons, FTGMAC100 driver
- continues to drive the embedded MDIO controller for the AST2400 and
- AST2500 SoCs, so say N if AST2600 support is not required.
-
-config MDIO_BCM_IPROC
- tristate "Broadcom iProc MDIO bus controller"
- depends on ARCH_BCM_IPROC || COMPILE_TEST
- depends on HAS_IOMEM && OF_MDIO
- default ARCH_BCM_IPROC
- help
- This module provides a driver for the MDIO busses found in the
- Broadcom iProc SoC's.
-
-config MDIO_BCM_UNIMAC
- tristate "Broadcom UniMAC MDIO bus controller"
- depends on HAS_IOMEM
- help
- This module provides a driver for the Broadcom UniMAC MDIO busses.
- This hardware can be found in the Broadcom GENET Ethernet MAC
- controllers as well as some Broadcom Ethernet switches such as the
- Starfighter 2 switches.
-
-config MDIO_BITBANG
- tristate "Bitbanged MDIO buses"
- help
- This module implements the MDIO bus protocol in software,
- for use by low level drivers that export the ability to
- drive the relevant pins.
-
- If in doubt, say N.
-
-config MDIO_BUS_MUX
- tristate
- depends on OF_MDIO
- help
- This module provides a driver framework for MDIO bus
- multiplexers which connect one of several child MDIO busses
- to a parent bus. Switching between child busses is done by
- device specific drivers.
-
-config MDIO_BUS_MUX_BCM_IPROC
- tristate "Broadcom iProc based MDIO bus multiplexers"
- depends on OF && OF_MDIO && (ARCH_BCM_IPROC || COMPILE_TEST)
- select MDIO_BUS_MUX
- default ARCH_BCM_IPROC
- help
- This module provides a driver for MDIO bus multiplexers found in
- iProc based Broadcom SoCs. This multiplexer connects one of several
- child MDIO bus to a parent bus. Buses could be internal as well as
- external and selection logic lies inside the same multiplexer.
-
-config MDIO_BUS_MUX_GPIO
- tristate "GPIO controlled MDIO bus multiplexers"
- depends on OF_GPIO && OF_MDIO
- select MDIO_BUS_MUX
- help
- This module provides a driver for MDIO bus multiplexers that
- are controlled via GPIO lines. The multiplexer connects one of
- several child MDIO busses to a parent bus. Child bus
- selection is under the control of GPIO lines.
-
-config MDIO_BUS_MUX_MESON_G12A
- tristate "Amlogic G12a based MDIO bus multiplexer"
- depends on ARCH_MESON || COMPILE_TEST
- depends on OF_MDIO && HAS_IOMEM && COMMON_CLK
- select MDIO_BUS_MUX
- default m if ARCH_MESON
- help
- This module provides a driver for the MDIO multiplexer/glue of
- the amlogic g12a SoC. The multiplexers connects either the external
- or the internal MDIO bus to the parent bus.
-
-config MDIO_BUS_MUX_MMIOREG
- tristate "MMIO device-controlled MDIO bus multiplexers"
- depends on OF_MDIO && HAS_IOMEM
- select MDIO_BUS_MUX
- help
- This module provides a driver for MDIO bus multiplexers that
- are controlled via a simple memory-mapped device, like an FPGA.
- The multiplexer connects one of several child MDIO busses to a
- parent bus. Child bus selection is under the control of one of
- the FPGA's registers.
-
- Currently, only 8/16/32 bits registers are supported.
-
-config MDIO_BUS_MUX_MULTIPLEXER
- tristate "MDIO bus multiplexer using kernel multiplexer subsystem"
- depends on OF_MDIO
- select MULTIPLEXER
- select MDIO_BUS_MUX
- help
- This module provides a driver for MDIO bus multiplexer
- that is controlled via the kernel multiplexer subsystem. The
- bus multiplexer connects one of several child MDIO busses to
- a parent bus. Child bus selection is under the control of
- the kernel multiplexer subsystem.
-
-config MDIO_CAVIUM
- tristate
-
-config MDIO_GPIO
- tristate "GPIO lib-based bitbanged MDIO buses"
- depends on MDIO_BITBANG
- depends on GPIOLIB || COMPILE_TEST
- help
- Supports GPIO lib-based MDIO busses.
-
- To compile this driver as a module, choose M here: the module
- will be called mdio-gpio.
-
-config MDIO_HISI_FEMAC
- tristate "Hisilicon FEMAC MDIO bus controller"
- depends on HAS_IOMEM && OF_MDIO
- help
- This module provides a driver for the MDIO busses found in the
- Hisilicon SoC that have an Fast Ethernet MAC.
-
-config MDIO_I2C
- tristate
- depends on I2C
- help
- Support I2C based PHYs. This provides a MDIO bus bridged
- to I2C to allow PHYs connected in I2C mode to be accessed
- using the existing infrastructure.
-
- This is library mode.
-
-config MDIO_IPQ4019
- tristate "Qualcomm IPQ4019 MDIO interface support"
- depends on HAS_IOMEM && OF_MDIO
- help
- This driver supports the MDIO interface found in Qualcomm
- IPQ40xx series Soc-s.
-
-config MDIO_IPQ8064
- tristate "Qualcomm IPQ8064 MDIO interface support"
- depends on HAS_IOMEM && OF_MDIO
- depends on MFD_SYSCON
- help
- This driver supports the MDIO interface found in the network
- interface units of the IPQ8064 SoC
-
-config MDIO_MOXART
- tristate "MOXA ART MDIO interface support"
- depends on ARCH_MOXART || COMPILE_TEST
- help
- This driver supports the MDIO interface found in the network
- interface units of the MOXA ART SoC
-
-config MDIO_MSCC_MIIM
- tristate "Microsemi MIIM interface support"
- depends on HAS_IOMEM
- select MDIO_DEVRES
- help
- This driver supports the MIIM (MDIO) interface found in the network
- switches of the Microsemi SoCs; it is recommended to switch on
- CONFIG_HIGH_RES_TIMERS
-
-config MDIO_MVUSB
- tristate "Marvell USB to MDIO Adapter"
- depends on USB
- select MDIO_DEVRES
- help
- A USB to MDIO converter present on development boards for
- Marvell's Link Street family of Ethernet switches.
-
-config MDIO_OCTEON
- tristate "Octeon and some ThunderX SOCs MDIO buses"
- depends on (64BIT && OF_MDIO) || COMPILE_TEST
- depends on HAS_IOMEM
- select MDIO_CAVIUM
- help
- This module provides a driver for the Octeon and ThunderX MDIO
- buses. It is required by the Octeon and ThunderX ethernet device
- drivers on some systems.
-
-config MDIO_SUN4I
- tristate "Allwinner sun4i MDIO interface support"
- depends on ARCH_SUNXI || COMPILE_TEST
- help
- This driver supports the MDIO interface found in the network
- interface units of the Allwinner SoC that have an EMAC (A10,
- A12, A10s, etc.)
-
-config MDIO_THUNDER
- tristate "ThunderX SOCs MDIO buses"
- depends on 64BIT
- depends on PCI
- select MDIO_CAVIUM
- help
- This driver supports the MDIO interfaces found on Cavium
- ThunderX SoCs when the MDIO bus device appears as a PCI
- device.
-
-config MDIO_XGENE
- tristate "APM X-Gene SoC MDIO bus controller"
- depends on ARCH_XGENE || COMPILE_TEST
- help
- This module provides a driver for the MDIO busses found in the
- APM X-Gene SoC's.
-
-config MDIO_XPCS
- tristate "Synopsys DesignWare XPCS controller"
- help
- This module provides helper functions for Synopsys DesignWare XPCS
- controllers.
-
-endif
-endif
-
config PHYLINK
tristate
depends on NETDEVICES
@@ -285,7 +45,15 @@
for any speed known to the PHY.
-comment "MII PHY device drivers"
+config FIXED_PHY
+ tristate "MDIO Bus/PHY emulation with fixed speed/link PHYs"
+ depends on PHYLIB
+ select SWPHY
+ help
+ Adds the platform "fixed" MDIO Bus to cover the boards that use
+ PHYs that are not connected to the real MDIO bus.
+
+ Currently tested with mpc866ads and mpc8349e-mitx.
config SFP
tristate "SFP cage support"
@@ -293,6 +61,19 @@
depends on HWMON || HWMON=n
select MDIO_I2C
+comment "MII PHY device drivers"
+
+config AMD_PHY
+ tristate "AMD PHYs"
+ help
+ Currently supports the am79c874
+
+config MESON_GXL_PHY
+ tristate "Amlogic Meson GXL Internal PHY"
+ depends on ARCH_MESON || COMPILE_TEST
+ help
+ Currently has a driver for the Amlogic Meson GXL Internal PHY
+
config ADIN_PHY
tristate "Analog Devices Industrial Ethernet PHYs"
help
@@ -302,11 +83,6 @@
- ADIN1300 - Robust,Industrial, Low Latency 10/100/1000 Gigabit
Ethernet PHY
-config AMD_PHY
- tristate "AMD PHYs"
- help
- Currently supports the am79c874
-
config AQUANTIA_PHY
tristate "Aquantia PHYs"
help
@@ -318,6 +94,24 @@
Currently supports the Asix Electronics PHY found in the X-Surf 100
AX88796B package.
+config BROADCOM_PHY
+ tristate "Broadcom 54XX PHYs"
+ select BCM_NET_PHYLIB
+ help
+ Currently supports the BCM5411, BCM5421, BCM5461, BCM54616S, BCM5464,
+ BCM5481, BCM54810 and BCM5482 PHYs.
+
+config BCM54140_PHY
+ tristate "Broadcom BCM54140 PHY"
+ depends on PHYLIB
+ depends on HWMON || HWMON=n
+ select BCM_NET_PHYLIB
+ help
+ Support the Broadcom BCM54140 Quad SGMII/QSGMII PHY.
+
+ This driver also supports the hardware monitoring of this PHY and
+ exposes voltage and temperature sensors.
+
config BCM63XX_PHY
tristate "Broadcom 63xx SOCs internal PHY"
depends on BCM63XX || COMPILE_TEST
@@ -332,6 +126,12 @@
Currently supports the BCM7366, BCM7439, BCM7445, and
40nm and 65nm generation of BCM7xxx Set Top Box SoCs.
+config BCM84881_PHY
+ tristate "Broadcom BCM84881 PHY"
+ depends on PHYLIB
+ help
+ Support the Broadcom BCM84881 PHY.
+
config BCM87XX_PHY
tristate "Broadcom BCM8706 and BCM8727 PHYs"
help
@@ -353,30 +153,6 @@
config BCM_NET_PHYLIB
tristate
-config BROADCOM_PHY
- tristate "Broadcom PHYs"
- select BCM_NET_PHYLIB
- help
- Currently supports the BCM5411, BCM5421, BCM5461, BCM54616S, BCM5464,
- BCM5481, BCM54810 and BCM5482 PHYs.
-
-config BCM54140_PHY
- tristate "Broadcom BCM54140 PHY"
- depends on PHYLIB
- depends on HWMON || HWMON=n
- select BCM_NET_PHYLIB
- help
- Support the Broadcom BCM54140 Quad SGMII/QSGMII PHY.
-
- This driver also supports the hardware monitoring of this PHY and
- exposes voltage and temperature sensors.
-
-config BCM84881_PHY
- tristate "Broadcom BCM84881 PHY"
- depends on PHYLIB
- help
- Support the Broadcom BCM84881 PHY.
-
config CICADA_PHY
tristate "Cicada PHYs"
help
@@ -392,48 +168,16 @@
help
Currently supports dm9161e and dm9131
-config DP83822_PHY
- tristate "Texas Instruments DP83822/825/826 PHYs"
- help
- Supports the DP83822, DP83825I, DP83825CM, DP83825CS, DP83825S,
- DP83826C and DP83826NC PHYs.
-
-config DP83TC811_PHY
- tristate "Texas Instruments DP83TC811 PHY"
- help
- Supports the DP83TC811 PHY.
-
-config DP83848_PHY
- tristate "Texas Instruments DP83848 PHY"
- help
- Supports the DP83848 PHY.
-
-config DP83867_PHY
- tristate "Texas Instruments DP83867 Gigabit PHY"
- help
- Currently supports the DP83867 PHY.
-
-config DP83869_PHY
- tristate "Texas Instruments DP83869 Gigabit PHY"
- help
- Currently supports the DP83869 PHY. This PHY supports copper and
- fiber connections.
-
-config FIXED_PHY
- tristate "MDIO Bus/PHY emulation with fixed speed/link PHYs"
- depends on PHYLIB
- select SWPHY
- help
- Adds the platform "fixed" MDIO Bus to cover the boards that use
- PHYs that are not connected to the real MDIO bus.
-
- Currently tested with mpc866ads and mpc8349e-mitx.
-
config ICPLUS_PHY
tristate "ICPlus PHYs"
help
Currently supports the IP175C and IP1001 PHYs.
+config LXT_PHY
+ tristate "Intel LXT PHYs"
+ help
+ Currently supports the lxt970, lxt971
+
config INTEL_XWAY_PHY
tristate "Intel XWAY PHYs"
help
@@ -447,27 +191,16 @@
help
Supports the LSI ET1011C PHY.
-config LXT_PHY
- tristate "Intel LXT PHYs"
- help
- Currently supports the lxt970, lxt971
-
config MARVELL_PHY
- tristate "Marvell PHYs"
+ tristate "Marvell Alaska PHYs"
help
- Currently has a driver for the 88E1011S
+ Currently has a driver for the 88E1XXX
config MARVELL_10G_PHY
tristate "Marvell Alaska 10Gbit PHYs"
help
Support for the Marvell Alaska MV88X3310 and compatible PHYs.
-config MESON_GXL_PHY
- tristate "Amlogic Meson GXL Internal PHY"
- depends on ARCH_MESON || COMPILE_TEST
- help
- Currently has a driver for the Amlogic Meson GXL Internal PHY
-
config MICREL_PHY
tristate "Micrel PHYs"
help
@@ -518,12 +251,12 @@
Supports the Realtek 821x PHY.
config RENESAS_PHY
- tristate "Driver for Renesas PHYs"
+ tristate "Renesas PHYs"
help
Supports the Renesas PHYs uPD60620 and uPD60620A.
config ROCKCHIP_PHY
- tristate "Driver for Rockchip Ethernet PHYs"
+ tristate "Rockchip Ethernet PHYs"
help
Currently supports the integrated Ethernet PHY.
@@ -542,6 +275,33 @@
help
Currently supports the Teranetics TN2020
+config DP83822_PHY
+ tristate "Texas Instruments DP83822/825/826 PHYs"
+ help
+ Supports the DP83822, DP83825I, DP83825CM, DP83825CS, DP83825S,
+ DP83826C and DP83826NC PHYs.
+
+config DP83TC811_PHY
+ tristate "Texas Instruments DP83TC811 PHY"
+ help
+ Supports the DP83TC811 PHY.
+
+config DP83848_PHY
+ tristate "Texas Instruments DP83848 PHY"
+ help
+ Supports the DP83848 PHY.
+
+config DP83867_PHY
+ tristate "Texas Instruments DP83867 Gigabit PHY"
+ help
+ Currently supports the DP83867 PHY.
+
+config DP83869_PHY
+ tristate "Texas Instruments DP83869 Gigabit PHY"
+ help
+ Currently supports the DP83869 PHY. This PHY supports copper and
+ fiber connections.
+
config VITESSE_PHY
tristate "Vitesse PHYs"
help
diff --git a/drivers/net/phy/Makefile b/drivers/net/phy/Makefile
index d84bab4..a13e402 100644
--- a/drivers/net/phy/Makefile
+++ b/drivers/net/phy/Makefile
@@ -1,5 +1,5 @@
# SPDX-License-Identifier: GPL-2.0
-# Makefile for Linux PHY drivers and MDIO bus drivers
+# Makefile for Linux PHY drivers
libphy-y := phy.o phy-c45.o phy-core.o phy_device.o \
linkmode.o
@@ -24,31 +24,6 @@
obj-$(CONFIG_PHYLINK) += phylink.o
obj-$(CONFIG_PHYLIB) += libphy.o
-obj-$(CONFIG_MDIO_ASPEED) += mdio-aspeed.o
-obj-$(CONFIG_MDIO_BCM_IPROC) += mdio-bcm-iproc.o
-obj-$(CONFIG_MDIO_BCM_UNIMAC) += mdio-bcm-unimac.o
-obj-$(CONFIG_MDIO_BITBANG) += mdio-bitbang.o
-obj-$(CONFIG_MDIO_BUS_MUX) += mdio-mux.o
-obj-$(CONFIG_MDIO_BUS_MUX_BCM_IPROC) += mdio-mux-bcm-iproc.o
-obj-$(CONFIG_MDIO_BUS_MUX_GPIO) += mdio-mux-gpio.o
-obj-$(CONFIG_MDIO_BUS_MUX_MESON_G12A) += mdio-mux-meson-g12a.o
-obj-$(CONFIG_MDIO_BUS_MUX_MMIOREG) += mdio-mux-mmioreg.o
-obj-$(CONFIG_MDIO_BUS_MUX_MULTIPLEXER) += mdio-mux-multiplexer.o
-obj-$(CONFIG_MDIO_CAVIUM) += mdio-cavium.o
-obj-$(CONFIG_MDIO_GPIO) += mdio-gpio.o
-obj-$(CONFIG_MDIO_HISI_FEMAC) += mdio-hisi-femac.o
-obj-$(CONFIG_MDIO_I2C) += mdio-i2c.o
-obj-$(CONFIG_MDIO_IPQ4019) += mdio-ipq4019.o
-obj-$(CONFIG_MDIO_IPQ8064) += mdio-ipq8064.o
-obj-$(CONFIG_MDIO_MOXART) += mdio-moxart.o
-obj-$(CONFIG_MDIO_MSCC_MIIM) += mdio-mscc-miim.o
-obj-$(CONFIG_MDIO_MVUSB) += mdio-mvusb.o
-obj-$(CONFIG_MDIO_OCTEON) += mdio-octeon.o
-obj-$(CONFIG_MDIO_SUN4I) += mdio-sun4i.o
-obj-$(CONFIG_MDIO_THUNDER) += mdio-thunder.o
-obj-$(CONFIG_MDIO_XGENE) += mdio-xgene.o
-obj-$(CONFIG_MDIO_XPCS) += mdio-xpcs.o
-
obj-$(CONFIG_NETWORK_PHY_TIMESTAMPING) += mii_timestamper.o
obj-$(CONFIG_SFP) += sfp.o
@@ -62,32 +37,32 @@
aquantia-objs += aquantia_hwmon.o
endif
obj-$(CONFIG_AQUANTIA_PHY) += aquantia.o
-obj-$(CONFIG_AX88796B_PHY) += ax88796b.o
obj-$(CONFIG_AT803X_PHY) += at803x.o
+obj-$(CONFIG_AX88796B_PHY) += ax88796b.o
+obj-$(CONFIG_BCM54140_PHY) += bcm54140.o
obj-$(CONFIG_BCM63XX_PHY) += bcm63xx.o
obj-$(CONFIG_BCM7XXX_PHY) += bcm7xxx.o
+obj-$(CONFIG_BCM84881_PHY) += bcm84881.o
obj-$(CONFIG_BCM87XX_PHY) += bcm87xx.o
obj-$(CONFIG_BCM_CYGNUS_PHY) += bcm-cygnus.o
obj-$(CONFIG_BCM_NET_PHYLIB) += bcm-phy-lib.o
obj-$(CONFIG_BROADCOM_PHY) += broadcom.o
-obj-$(CONFIG_BCM54140_PHY) += bcm54140.o
-obj-$(CONFIG_BCM84881_PHY) += bcm84881.o
obj-$(CONFIG_CICADA_PHY) += cicada.o
obj-$(CONFIG_CORTINA_PHY) += cortina.o
obj-$(CONFIG_DAVICOM_PHY) += davicom.o
obj-$(CONFIG_DP83640_PHY) += dp83640.o
obj-$(CONFIG_DP83822_PHY) += dp83822.o
-obj-$(CONFIG_DP83TC811_PHY) += dp83tc811.o
obj-$(CONFIG_DP83848_PHY) += dp83848.o
obj-$(CONFIG_DP83867_PHY) += dp83867.o
obj-$(CONFIG_DP83869_PHY) += dp83869.o
+obj-$(CONFIG_DP83TC811_PHY) += dp83tc811.o
obj-$(CONFIG_FIXED_PHY) += fixed_phy.o
obj-$(CONFIG_ICPLUS_PHY) += icplus.o
obj-$(CONFIG_INTEL_XWAY_PHY) += intel-xway.o
obj-$(CONFIG_LSI_ET1011C_PHY) += et1011c.o
obj-$(CONFIG_LXT_PHY) += lxt.o
-obj-$(CONFIG_MARVELL_PHY) += marvell.o
obj-$(CONFIG_MARVELL_10G_PHY) += marvell10g.o
+obj-$(CONFIG_MARVELL_PHY) += marvell.o
obj-$(CONFIG_MESON_GXL_PHY) += meson-gxl.o
obj-$(CONFIG_MICREL_KS8995MA) += spi_ks8995.o
obj-$(CONFIG_MICREL_PHY) += micrel.o
diff --git a/drivers/net/phy/at803x.c b/drivers/net/phy/at803x.c
index 101651b..ed601a7 100644
--- a/drivers/net/phy/at803x.c
+++ b/drivers/net/phy/at803x.c
@@ -343,7 +343,7 @@ static int at803x_rgmii_reg_get_voltage_sel(struct regulator_dev *rdev)
return (val & AT803X_DEBUG_RGMII_1V8) ? 1 : 0;
}
-static struct regulator_ops vddio_regulator_ops = {
+static const struct regulator_ops vddio_regulator_ops = {
.list_voltage = regulator_list_voltage_table,
.set_voltage_sel = at803x_rgmii_reg_set_voltage_sel,
.get_voltage_sel = at803x_rgmii_reg_get_voltage_sel,
@@ -364,7 +364,7 @@ static const struct regulator_desc vddio_desc = {
.owner = THIS_MODULE,
};
-static struct regulator_ops vddh_regulator_ops = {
+static const struct regulator_ops vddh_regulator_ops = {
};
static const struct regulator_desc vddh_desc = {
diff --git a/drivers/net/phy/dp83640.c b/drivers/net/phy/dp83640.c
index 79e67f2..f2cacca 100644
--- a/drivers/net/phy/dp83640.c
+++ b/drivers/net/phy/dp83640.c
@@ -798,51 +798,32 @@ static int decode_evnt(struct dp83640_private *dp83640,
return parsed;
}
-#define DP83640_PACKET_HASH_OFFSET 20
#define DP83640_PACKET_HASH_LEN 10
static int match(struct sk_buff *skb, unsigned int type, struct rxts *rxts)
{
- unsigned int offset = 0;
- u8 *msgtype, *data = skb_mac_header(skb);
- __be16 *seqid;
+ struct ptp_header *hdr;
+ u8 msgtype;
+ u16 seqid;
u16 hash;
/* check sequenceID, messageType, 12 bit hash of offset 20-29 */
- if (type & PTP_CLASS_VLAN)
- offset += VLAN_HLEN;
-
- switch (type & PTP_CLASS_PMASK) {
- case PTP_CLASS_IPV4:
- offset += ETH_HLEN + IPV4_HLEN(data + offset) + UDP_HLEN;
- break;
- case PTP_CLASS_IPV6:
- offset += ETH_HLEN + IP6_HLEN + UDP_HLEN;
- break;
- case PTP_CLASS_L2:
- offset += ETH_HLEN;
- break;
- default:
- return 0;
- }
-
- if (skb->len + ETH_HLEN < offset + OFF_PTP_SEQUENCE_ID + sizeof(*seqid))
+ hdr = ptp_parse_header(skb, type);
+ if (!hdr)
return 0;
- if (unlikely(type & PTP_CLASS_V1))
- msgtype = data + offset + OFF_PTP_CONTROL;
- else
- msgtype = data + offset;
- if (rxts->msgtype != (*msgtype & 0xf))
+ msgtype = ptp_get_msgtype(hdr, type);
+
+ if (rxts->msgtype != (msgtype & 0xf))
return 0;
- seqid = (__be16 *)(data + offset + OFF_PTP_SEQUENCE_ID);
- if (rxts->seqid != ntohs(*seqid))
+ seqid = be16_to_cpu(hdr->sequence_id);
+ if (rxts->seqid != seqid)
return 0;
hash = ether_crc(DP83640_PACKET_HASH_LEN,
- data + offset + DP83640_PACKET_HASH_OFFSET) >> 20;
+ (unsigned char *)&hdr->source_port_identity) >> 20;
if (rxts->hash != hash)
return 0;
@@ -982,35 +963,16 @@ static void decode_status_frame(struct dp83640_private *dp83640,
static int is_sync(struct sk_buff *skb, int type)
{
- u8 *data = skb->data, *msgtype;
- unsigned int offset = 0;
+ struct ptp_header *hdr;
+ u8 msgtype;
- if (type & PTP_CLASS_VLAN)
- offset += VLAN_HLEN;
-
- switch (type & PTP_CLASS_PMASK) {
- case PTP_CLASS_IPV4:
- offset += ETH_HLEN + IPV4_HLEN(data + offset) + UDP_HLEN;
- break;
- case PTP_CLASS_IPV6:
- offset += ETH_HLEN + IP6_HLEN + UDP_HLEN;
- break;
- case PTP_CLASS_L2:
- offset += ETH_HLEN;
- break;
- default:
- return 0;
- }
-
- if (type & PTP_CLASS_V1)
- offset += OFF_PTP_CONTROL;
-
- if (skb->len < offset + 1)
+ hdr = ptp_parse_header(skb, type);
+ if (!hdr)
return 0;
- msgtype = data + offset;
+ msgtype = ptp_get_msgtype(hdr, type);
- return (*msgtype & 0xf) == 0;
+ return (msgtype & 0xf) == 0;
}
static void dp83640_free_clocks(void)
diff --git a/drivers/net/phy/dp83822.c b/drivers/net/phy/dp83822.c
index 37643c4..732c8be 100644
--- a/drivers/net/phy/dp83822.c
+++ b/drivers/net/phy/dp83822.c
@@ -23,16 +23,31 @@
#define DP83822_DEVADDR 0x1f
+#define MII_DP83822_CTRL_2 0x0a
+#define MII_DP83822_PHYSTS 0x10
#define MII_DP83822_PHYSCR 0x11
#define MII_DP83822_MISR1 0x12
#define MII_DP83822_MISR2 0x13
+#define MII_DP83822_FCSCR 0x14
#define MII_DP83822_RCSR 0x17
#define MII_DP83822_RESET_CTRL 0x1f
#define MII_DP83822_GENCFG 0x465
+#define MII_DP83822_SOR1 0x467
+
+/* GENCFG */
+#define DP83822_SIG_DET_LOW BIT(0)
+
+/* Control Register 2 bits */
+#define DP83822_FX_ENABLE BIT(14)
#define DP83822_HW_RESET BIT(15)
#define DP83822_SW_RESET BIT(14)
+/* PHY STS bits */
+#define DP83822_PHYSTS_DUPLEX BIT(2)
+#define DP83822_PHYSTS_10 BIT(1)
+#define DP83822_PHYSTS_LINK BIT(0)
+
/* PHYSCR Register Fields */
#define DP83822_PHYSCR_INT_OE BIT(0) /* Interrupt Output Enable */
#define DP83822_PHYSCR_INTEN BIT(1) /* Interrupt Enable */
@@ -83,6 +98,28 @@
#define DP83822_RX_CLK_SHIFT BIT(12)
#define DP83822_TX_CLK_SHIFT BIT(11)
+/* SOR1 mode */
+#define DP83822_STRAP_MODE1 0
+#define DP83822_STRAP_MODE2 BIT(0)
+#define DP83822_STRAP_MODE3 BIT(1)
+#define DP83822_STRAP_MODE4 GENMASK(1, 0)
+
+#define DP83822_COL_STRAP_MASK GENMASK(11, 10)
+#define DP83822_COL_SHIFT 10
+#define DP83822_RX_ER_STR_MASK GENMASK(9, 8)
+#define DP83822_RX_ER_SHIFT 8
+
+#define MII_DP83822_FIBER_ADVERTISE (ADVERTISED_TP | ADVERTISED_MII | \
+ ADVERTISED_FIBRE | ADVERTISED_BNC | \
+ ADVERTISED_Pause | ADVERTISED_Asym_Pause | \
+ ADVERTISED_100baseT_Full)
+
+struct dp83822_private {
+ bool fx_signal_det_low;
+ int fx_enabled;
+ u16 fx_sd_enable;
+};
+
static int dp83822_ack_interrupt(struct phy_device *phydev)
{
int err;
@@ -197,6 +234,7 @@ static void dp83822_get_wol(struct phy_device *phydev,
static int dp83822_config_intr(struct phy_device *phydev)
{
+ struct dp83822_private *dp83822 = phydev->priv;
int misr_status;
int physcr_status;
int err;
@@ -208,13 +246,16 @@ static int dp83822_config_intr(struct phy_device *phydev)
misr_status |= (DP83822_RX_ERR_HF_INT_EN |
DP83822_FALSE_CARRIER_HF_INT_EN |
- DP83822_ANEG_COMPLETE_INT_EN |
- DP83822_DUP_MODE_CHANGE_INT_EN |
- DP83822_SPEED_CHANGED_INT_EN |
DP83822_LINK_STAT_INT_EN |
DP83822_ENERGY_DET_INT_EN |
DP83822_LINK_QUAL_INT_EN);
+ if (!dp83822->fx_enabled)
+ misr_status |= DP83822_ANEG_COMPLETE_INT_EN |
+ DP83822_DUP_MODE_CHANGE_INT_EN |
+ DP83822_SPEED_CHANGED_INT_EN;
+
+
err = phy_write(phydev, MII_DP83822_MISR1, misr_status);
if (err < 0)
return err;
@@ -224,14 +265,16 @@ static int dp83822_config_intr(struct phy_device *phydev)
return misr_status;
misr_status |= (DP83822_JABBER_DET_INT_EN |
- DP83822_WOL_PKT_INT_EN |
DP83822_SLEEP_MODE_INT_EN |
- DP83822_MDI_XOVER_INT_EN |
DP83822_LB_FIFO_INT_EN |
DP83822_PAGE_RX_INT_EN |
- DP83822_ANEG_ERR_INT_EN |
DP83822_EEE_ERROR_CHANGE_INT_EN);
+ if (!dp83822->fx_enabled)
+ misr_status |= DP83822_MDI_XOVER_INT_EN |
+ DP83822_ANEG_ERR_INT_EN |
+ DP83822_WOL_PKT_INT_EN;
+
err = phy_write(phydev, MII_DP83822_MISR2, misr_status);
if (err < 0)
return err;
@@ -270,13 +313,60 @@ static int dp8382x_disable_wol(struct phy_device *phydev)
MII_DP83822_WOL_CFG, value);
}
+static int dp83822_read_status(struct phy_device *phydev)
+{
+ struct dp83822_private *dp83822 = phydev->priv;
+ int status = phy_read(phydev, MII_DP83822_PHYSTS);
+ int ctrl2;
+ int ret;
+
+ if (dp83822->fx_enabled) {
+ if (status & DP83822_PHYSTS_LINK) {
+ phydev->speed = SPEED_UNKNOWN;
+ phydev->duplex = DUPLEX_UNKNOWN;
+ } else {
+ ctrl2 = phy_read(phydev, MII_DP83822_CTRL_2);
+ if (ctrl2 < 0)
+ return ctrl2;
+
+ if (!(ctrl2 & DP83822_FX_ENABLE)) {
+ ret = phy_write(phydev, MII_DP83822_CTRL_2,
+ DP83822_FX_ENABLE | ctrl2);
+ if (ret < 0)
+ return ret;
+ }
+ }
+ }
+
+ ret = genphy_read_status(phydev);
+ if (ret)
+ return ret;
+
+ if (status < 0)
+ return status;
+
+ if (status & DP83822_PHYSTS_DUPLEX)
+ phydev->duplex = DUPLEX_FULL;
+ else
+ phydev->duplex = DUPLEX_HALF;
+
+ if (status & DP83822_PHYSTS_10)
+ phydev->speed = SPEED_10;
+ else
+ phydev->speed = SPEED_100;
+
+ return 0;
+}
+
static int dp83822_config_init(struct phy_device *phydev)
{
+ struct dp83822_private *dp83822 = phydev->priv;
struct device *dev = &phydev->mdio.dev;
int rgmii_delay;
s32 rx_int_delay;
s32 tx_int_delay;
int err = 0;
+ int bmcr;
if (phy_interface_is_rgmii(phydev)) {
rx_int_delay = phy_get_internal_delay(phydev, dev, NULL, 0,
@@ -302,6 +392,53 @@ static int dp83822_config_init(struct phy_device *phydev)
}
}
+ if (dp83822->fx_enabled) {
+ err = phy_modify(phydev, MII_DP83822_CTRL_2,
+ DP83822_FX_ENABLE, 1);
+ if (err < 0)
+ return err;
+
+ /* Only allow advertising what this PHY supports */
+ linkmode_and(phydev->advertising, phydev->advertising,
+ phydev->supported);
+
+ linkmode_set_bit(ETHTOOL_LINK_MODE_FIBRE_BIT,
+ phydev->supported);
+ linkmode_set_bit(ETHTOOL_LINK_MODE_FIBRE_BIT,
+ phydev->advertising);
+
+ /* Auto neg is not supported in fiber mode */
+ bmcr = phy_read(phydev, MII_BMCR);
+ if (bmcr < 0)
+ return bmcr;
+
+ if (bmcr & BMCR_ANENABLE) {
+ err = phy_modify(phydev, MII_BMCR, BMCR_ANENABLE, 0);
+ if (err < 0)
+ return err;
+ }
+ phydev->autoneg = AUTONEG_DISABLE;
+ linkmode_clear_bit(ETHTOOL_LINK_MODE_Autoneg_BIT,
+ phydev->supported);
+ linkmode_clear_bit(ETHTOOL_LINK_MODE_Autoneg_BIT,
+ phydev->advertising);
+
+ /* Setup fiber advertisement */
+ err = phy_modify_changed(phydev, MII_ADVERTISE,
+ MII_DP83822_FIBER_ADVERTISE,
+ MII_DP83822_FIBER_ADVERTISE);
+
+ if (err < 0)
+ return err;
+
+ if (dp83822->fx_signal_det_low) {
+ err = phy_set_bits_mmd(phydev, DP83822_DEVADDR,
+ MII_DP83822_GENCFG,
+ DP83822_SIG_DET_LOW);
+ if (err)
+ return err;
+ }
+ }
return dp8382x_disable_wol(phydev);
}
@@ -314,13 +451,85 @@ static int dp83822_phy_reset(struct phy_device *phydev)
{
int err;
- err = phy_write(phydev, MII_DP83822_RESET_CTRL, DP83822_HW_RESET);
+ err = phy_write(phydev, MII_DP83822_RESET_CTRL, DP83822_SW_RESET);
if (err < 0)
return err;
return phydev->drv->config_init(phydev);
}
+#ifdef CONFIG_OF_MDIO
+static int dp83822_of_init(struct phy_device *phydev)
+{
+ struct dp83822_private *dp83822 = phydev->priv;
+ struct device *dev = &phydev->mdio.dev;
+
+ /* Signal detection for the PHY is only enabled if the FX_EN and the
+ * SD_EN pins are strapped. Signal detection can only enabled if FX_EN
+ * is strapped otherwise signal detection is disabled for the PHY.
+ */
+ if (dp83822->fx_enabled && dp83822->fx_sd_enable)
+ dp83822->fx_signal_det_low = device_property_present(dev,
+ "ti,link-loss-low");
+ if (!dp83822->fx_enabled)
+ dp83822->fx_enabled = device_property_present(dev,
+ "ti,fiber-mode");
+
+ return 0;
+}
+#else
+static int dp83822_of_init(struct phy_device *phydev)
+{
+ return 0;
+}
+#endif /* CONFIG_OF_MDIO */
+
+static int dp83822_read_straps(struct phy_device *phydev)
+{
+ struct dp83822_private *dp83822 = phydev->priv;
+ int fx_enabled, fx_sd_enable;
+ int val;
+
+ val = phy_read_mmd(phydev, DP83822_DEVADDR, MII_DP83822_SOR1);
+ if (val < 0)
+ return val;
+
+ fx_enabled = (val & DP83822_COL_STRAP_MASK) >> DP83822_COL_SHIFT;
+ if (fx_enabled == DP83822_STRAP_MODE2 ||
+ fx_enabled == DP83822_STRAP_MODE3)
+ dp83822->fx_enabled = 1;
+
+ if (dp83822->fx_enabled) {
+ fx_sd_enable = (val & DP83822_RX_ER_STR_MASK) >> DP83822_RX_ER_SHIFT;
+ if (fx_sd_enable == DP83822_STRAP_MODE3 ||
+ fx_sd_enable == DP83822_STRAP_MODE4)
+ dp83822->fx_sd_enable = 1;
+ }
+
+ return 0;
+}
+
+static int dp83822_probe(struct phy_device *phydev)
+{
+ struct dp83822_private *dp83822;
+ int ret;
+
+ dp83822 = devm_kzalloc(&phydev->mdio.dev, sizeof(*dp83822),
+ GFP_KERNEL);
+ if (!dp83822)
+ return -ENOMEM;
+
+ phydev->priv = dp83822;
+
+ ret = dp83822_read_straps(phydev);
+ if (ret)
+ return ret;
+
+ dp83822_of_init(phydev);
+
+ return 0;
+}
+
static int dp83822_suspend(struct phy_device *phydev)
{
int value;
@@ -352,8 +561,10 @@ static int dp83822_resume(struct phy_device *phydev)
PHY_ID_MATCH_MODEL(_id), \
.name = (_name), \
/* PHY_BASIC_FEATURES */ \
+ .probe = dp83822_probe, \
.soft_reset = dp83822_phy_reset, \
.config_init = dp83822_config_init, \
+ .read_status = dp83822_read_status, \
.get_wol = dp83822_get_wol, \
.set_wol = dp83822_set_wol, \
.ack_interrupt = dp83822_ack_interrupt, \
diff --git a/drivers/net/phy/dp83867.c b/drivers/net/phy/dp83867.c
index cd70326..69d3eac 100644
--- a/drivers/net/phy/dp83867.c
+++ b/drivers/net/phy/dp83867.c
@@ -1,6 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/*
- * Driver for the Texas Instruments DP83867 PHY
+/* Driver for the Texas Instruments DP83867 PHY
*
* Copyright (C) 2015 Texas Instruments Inc.
*/
@@ -113,7 +112,6 @@
#define DP83867_RGMII_RX_CLK_DELAY_SHIFT 0
#define DP83867_RGMII_RX_CLK_DELAY_INV (DP83867_RGMII_RX_CLK_DELAY_MAX + 1)
-
/* IO_MUX_CFG bits */
#define DP83867_IO_MUX_CFG_IO_IMPEDANCE_MASK 0x1f
#define DP83867_IO_MUX_CFG_IO_IMPEDANCE_MAX 0x0
@@ -384,22 +382,22 @@ static int dp83867_set_downshift(struct phy_device *phydev, u8 cnt)
DP83867_DOWNSHIFT_EN);
switch (cnt) {
- case DP83867_DOWNSHIFT_1_COUNT:
- count = DP83867_DOWNSHIFT_1_COUNT_VAL;
- break;
- case DP83867_DOWNSHIFT_2_COUNT:
- count = DP83867_DOWNSHIFT_2_COUNT_VAL;
- break;
- case DP83867_DOWNSHIFT_4_COUNT:
- count = DP83867_DOWNSHIFT_4_COUNT_VAL;
- break;
- case DP83867_DOWNSHIFT_8_COUNT:
- count = DP83867_DOWNSHIFT_8_COUNT_VAL;
- break;
- default:
- phydev_err(phydev,
- "Downshift count must be 1, 2, 4 or 8\n");
- return -EINVAL;
+ case DP83867_DOWNSHIFT_1_COUNT:
+ count = DP83867_DOWNSHIFT_1_COUNT_VAL;
+ break;
+ case DP83867_DOWNSHIFT_2_COUNT:
+ count = DP83867_DOWNSHIFT_2_COUNT_VAL;
+ break;
+ case DP83867_DOWNSHIFT_4_COUNT:
+ count = DP83867_DOWNSHIFT_4_COUNT_VAL;
+ break;
+ case DP83867_DOWNSHIFT_8_COUNT:
+ count = DP83867_DOWNSHIFT_8_COUNT_VAL;
+ break;
+ default:
+ phydev_err(phydev,
+ "Downshift count must be 1, 2, 4 or 8\n");
+ return -EINVAL;
}
val = DP83867_DOWNSHIFT_EN;
@@ -411,7 +409,7 @@ static int dp83867_set_downshift(struct phy_device *phydev, u8 cnt)
}
static int dp83867_get_tunable(struct phy_device *phydev,
- struct ethtool_tunable *tuna, void *data)
+ struct ethtool_tunable *tuna, void *data)
{
switch (tuna->id) {
case ETHTOOL_PHY_DOWNSHIFT:
@@ -422,7 +420,7 @@ static int dp83867_get_tunable(struct phy_device *phydev,
}
static int dp83867_set_tunable(struct phy_device *phydev,
- struct ethtool_tunable *tuna, const void *data)
+ struct ethtool_tunable *tuna, const void *data)
{
switch (tuna->id) {
case ETHTOOL_PHY_DOWNSHIFT:
@@ -524,11 +522,10 @@ static int dp83867_of_init(struct phy_device *phydev)
dp83867->io_impedance = -1; /* leave at default */
dp83867->rxctrl_strap_quirk = of_property_read_bool(of_node,
- "ti,dp83867-rxctrl-strap-quirk");
+ "ti,dp83867-rxctrl-strap-quirk");
dp83867->sgmii_ref_clk_en = of_property_read_bool(of_node,
- "ti,sgmii-ref-clock-output-enable");
-
+ "ti,sgmii-ref-clock-output-enable");
dp83867->rx_id_delay = DP83867_RGMII_RX_CLK_DELAY_INV;
ret = of_property_read_u32(of_node, "ti,rx-internal-delay",
diff --git a/drivers/net/phy/mscc/mscc_macsec.c b/drivers/net/phy/mscc/mscc_macsec.c
index 1d4c012..6cf9b79 100644
--- a/drivers/net/phy/mscc/mscc_macsec.c
+++ b/drivers/net/phy/mscc/mscc_macsec.c
@@ -958,7 +958,7 @@ static int vsc8584_macsec_del_txsa(struct macsec_context *ctx)
return 0;
}
-static struct macsec_ops vsc8584_macsec_ops = {
+static const struct macsec_ops vsc8584_macsec_ops = {
.mdo_dev_open = vsc8584_macsec_dev_open,
.mdo_dev_stop = vsc8584_macsec_dev_stop,
.mdo_add_secy = vsc8584_macsec_add_secy,
diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c
index 32f4e8e..fe2296f 100644
--- a/drivers/net/phy/phylink.c
+++ b/drivers/net/phy/phylink.c
@@ -535,8 +535,10 @@ static void phylink_mac_pcs_get_state(struct phylink *pl,
if (pl->pcs_ops)
pl->pcs_ops->pcs_get_state(pl->pcs, state);
- else
+ else if (pl->mac_ops->mac_pcs_get_state)
pl->mac_ops->mac_pcs_get_state(pl->config, state);
+ else
+ state->link = 0;
}
/* The fixed state is... fixed except for the link state,
@@ -2319,6 +2321,49 @@ static void phylink_decode_sgmii_word(struct phylink_link_state *state,
}
/**
+ * phylink_decode_usxgmii_word() - decode the USXGMII word from a MAC PCS
+ * @state: a pointer to a struct phylink_link_state.
+ * @lpa: a 16 bit value which stores the USXGMII auto-negotiation word
+ *
+ * Helper for MAC PCS supporting the USXGMII protocol and the auto-negotiation
+ * code word. Decode the USXGMII code word and populate the corresponding fields
+ * (speed, duplex) into the phylink_link_state structure.
+ */
+void phylink_decode_usxgmii_word(struct phylink_link_state *state,
+ uint16_t lpa)
+{
+ switch (lpa & MDIO_USXGMII_SPD_MASK) {
+ case MDIO_USXGMII_10:
+ state->speed = SPEED_10;
+ break;
+ case MDIO_USXGMII_100:
+ state->speed = SPEED_100;
+ break;
+ case MDIO_USXGMII_1000:
+ state->speed = SPEED_1000;
+ break;
+ case MDIO_USXGMII_2500:
+ state->speed = SPEED_2500;
+ break;
+ case MDIO_USXGMII_5000:
+ state->speed = SPEED_5000;
+ break;
+ case MDIO_USXGMII_10G:
+ state->speed = SPEED_10000;
+ break;
+ default:
+ state->link = false;
+ return;
+ }
+
+ if (lpa & MDIO_USXGMII_FULL_DUPLEX)
+ state->duplex = DUPLEX_FULL;
+ else
+ state->duplex = DUPLEX_HALF;
+}
+EXPORT_SYMBOL_GPL(phylink_decode_usxgmii_word);
+
+/**
* phylink_mii_c22_pcs_get_state() - read the MAC PCS state
* @pcs: a pointer to a &struct mdio_device.
* @state: a pointer to a &struct phylink_link_state.
@@ -2361,6 +2406,7 @@ void phylink_mii_c22_pcs_get_state(struct mdio_device *pcs,
break;
case PHY_INTERFACE_MODE_SGMII:
+ case PHY_INTERFACE_MODE_QSGMII:
phylink_decode_sgmii_word(state, lpa);
break;
diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c
index cf83314..1d18c10 100644
--- a/drivers/net/phy/sfp.c
+++ b/drivers/net/phy/sfp.c
@@ -7,6 +7,7 @@
#include <linux/i2c.h>
#include <linux/interrupt.h>
#include <linux/jiffies.h>
+#include <linux/mdio/mdio-i2c.h>
#include <linux/module.h>
#include <linux/mutex.h>
#include <linux/of.h>
@@ -16,7 +17,6 @@
#include <linux/slab.h>
#include <linux/workqueue.h>
-#include "mdio-i2c.h"
#include "sfp.h"
#include "swphy.h"
diff --git a/drivers/net/phy/smsc.c b/drivers/net/phy/smsc.c
index 74568ae..638e8c3 100644
--- a/drivers/net/phy/smsc.c
+++ b/drivers/net/phy/smsc.c
@@ -21,6 +21,17 @@
#include <linux/netdevice.h>
#include <linux/smscphy.h>
+/* Vendor-specific PHY Definitions */
+/* EDPD NLP / crossover time configuration */
+#define PHY_EDPD_CONFIG 16
+#define PHY_EDPD_CONFIG_EXT_CROSSOVER_ 0x0001
+
+/* Control/Status Indication Register */
+#define SPECIAL_CTRL_STS 27
+#define SPECIAL_CTRL_STS_OVRRD_AMDIX_ 0x8000
+#define SPECIAL_CTRL_STS_AMDIX_ENABLE_ 0x4000
+#define SPECIAL_CTRL_STS_AMDIX_STATE_ 0x2000
+
struct smsc_hw_stat {
const char *string;
u8 reg;
@@ -96,6 +107,54 @@ static int lan911x_config_init(struct phy_device *phydev)
return smsc_phy_ack_interrupt(phydev);
}
+static int lan87xx_config_aneg(struct phy_device *phydev)
+{
+ int rc;
+ int val;
+
+ switch (phydev->mdix_ctrl) {
+ case ETH_TP_MDI:
+ val = SPECIAL_CTRL_STS_OVRRD_AMDIX_;
+ break;
+ case ETH_TP_MDI_X:
+ val = SPECIAL_CTRL_STS_OVRRD_AMDIX_ |
+ SPECIAL_CTRL_STS_AMDIX_STATE_;
+ break;
+ case ETH_TP_MDI_AUTO:
+ val = SPECIAL_CTRL_STS_AMDIX_ENABLE_;
+ break;
+ default:
+ return genphy_config_aneg(phydev);
+ }
+
+ rc = phy_read(phydev, SPECIAL_CTRL_STS);
+ if (rc < 0)
+ return rc;
+
+ rc &= ~(SPECIAL_CTRL_STS_OVRRD_AMDIX_ |
+ SPECIAL_CTRL_STS_AMDIX_ENABLE_ |
+ SPECIAL_CTRL_STS_AMDIX_STATE_);
+ rc |= val;
+ phy_write(phydev, SPECIAL_CTRL_STS, rc);
+
+ phydev->mdix = phydev->mdix_ctrl;
+ return genphy_config_aneg(phydev);
+}
+
+static int lan87xx_config_aneg_ext(struct phy_device *phydev)
+{
+ int rc;
+
+ /* Extend Manual AutoMDIX timer */
+ rc = phy_read(phydev, PHY_EDPD_CONFIG);
+ if (rc < 0)
+ return rc;
+
+ rc |= PHY_EDPD_CONFIG_EXT_CROSSOVER_;
+ phy_write(phydev, PHY_EDPD_CONFIG, rc);
+ return lan87xx_config_aneg(phydev);
+}
+
/*
* The LAN87xx suffers from rare absence of the ENERGYON-bit when Ethernet cable
* plugs in while LAN87xx is in Energy Detect Power-Down mode. This leads to
@@ -250,6 +309,9 @@ static struct phy_driver smsc_phy_driver[] = {
.suspend = genphy_suspend,
.resume = genphy_resume,
}, {
+ /* This covers internal PHY (phy_id: 0x0007C0C3) for
+ * LAN9500 (PID: 0x9500), LAN9514 (PID: 0xec00), LAN9505 (PID: 0x9505)
+ */
.phy_id = 0x0007c0c0, /* OUI=0x00800f, Model#=0x0c */
.phy_id_mask = 0xfffffff0,
.name = "SMSC LAN8700",
@@ -262,6 +324,7 @@ static struct phy_driver smsc_phy_driver[] = {
.read_status = lan87xx_read_status,
.config_init = smsc_phy_config_init,
.soft_reset = smsc_phy_reset,
+ .config_aneg = lan87xx_config_aneg,
/* IRQ related */
.ack_interrupt = smsc_phy_ack_interrupt,
@@ -293,6 +356,9 @@ static struct phy_driver smsc_phy_driver[] = {
.suspend = genphy_suspend,
.resume = genphy_resume,
}, {
+ /* This covers internal PHY (phy_id: 0x0007C0F0) for
+ * LAN9500A (PID: 0x9E00), LAN9505A (PID: 0x9E01)
+ */
.phy_id = 0x0007c0f0, /* OUI=0x00800f, Model#=0x0f */
.phy_id_mask = 0xfffffff0,
.name = "SMSC LAN8710/LAN8720",
@@ -306,6 +372,7 @@ static struct phy_driver smsc_phy_driver[] = {
.read_status = lan87xx_read_status,
.config_init = smsc_phy_config_init,
.soft_reset = smsc_phy_reset,
+ .config_aneg = lan87xx_config_aneg_ext,
/* IRQ related */
.ack_interrupt = smsc_phy_ack_interrupt,
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 7959b5c..be69d27 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -219,24 +219,6 @@ struct veth {
__be16 h_vlan_TCI;
};
-bool tun_is_xdp_frame(void *ptr)
-{
- return (unsigned long)ptr & TUN_XDP_FLAG;
-}
-EXPORT_SYMBOL(tun_is_xdp_frame);
-
-void *tun_xdp_to_ptr(void *ptr)
-{
- return (void *)((unsigned long)ptr | TUN_XDP_FLAG);
-}
-EXPORT_SYMBOL(tun_xdp_to_ptr);
-
-void *tun_ptr_to_xdp(void *ptr)
-{
- return (void *)((unsigned long)ptr & ~TUN_XDP_FLAG);
-}
-EXPORT_SYMBOL(tun_ptr_to_xdp);
-
static int tun_napi_receive(struct napi_struct *napi, int budget)
{
struct tun_file *tfile = container_of(napi, struct tun_file, napi);
diff --git a/drivers/net/usb/Kconfig b/drivers/net/usb/Kconfig
index c7bcfca..b46993d 100644
--- a/drivers/net/usb/Kconfig
+++ b/drivers/net/usb/Kconfig
@@ -346,6 +346,8 @@
config USB_NET_SMSC95XX
tristate "SMSC LAN95XX based USB 2.0 10/100 ethernet devices"
depends on USB_USBNET
+ select PHYLIB
+ select SMSC_PHY
select BITREVERSE
select CRC16
select CRC32
diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c
index bb4ccbd..601fb40 100644
--- a/drivers/net/usb/smsc95xx.c
+++ b/drivers/net/usb/smsc95xx.c
@@ -18,10 +18,12 @@
#include <linux/usb/usbnet.h>
#include <linux/slab.h>
#include <linux/of_net.h>
+#include <linux/mdio.h>
+#include <linux/phy.h>
#include "smsc95xx.h"
#define SMSC_CHIPNAME "smsc95xx"
-#define SMSC_DRIVER_VERSION "1.0.6"
+#define SMSC_DRIVER_VERSION "2.0.0"
#define HS_USB_PKT_SIZE (512)
#define FS_USB_PKT_SIZE (64)
#define DEFAULT_HS_BURST_CAP_SIZE (16 * 1024 + 5 * HS_USB_PKT_SIZE)
@@ -49,10 +51,7 @@
#define SUSPEND_ALLMODES (SUSPEND_SUSPEND0 | SUSPEND_SUSPEND1 | \
SUSPEND_SUSPEND2 | SUSPEND_SUSPEND3)
-#define CARRIER_CHECK_DELAY (2 * HZ)
-
struct smsc95xx_priv {
- u32 chip_id;
u32 mac_cr;
u32 hash_hi;
u32 hash_lo;
@@ -60,10 +59,8 @@ struct smsc95xx_priv {
spinlock_t mac_cr_lock;
u8 features;
u8 suspend_flags;
- u8 mdix_ctrl;
- bool link_ok;
- struct delayed_work carrier_check;
- struct usbnet *dev;
+ struct mii_bus *mdiobus;
+ struct phy_device *phydev;
};
static bool turbo_mode = true;
@@ -173,10 +170,14 @@ static int __must_check __smsc95xx_phy_wait_not_busy(struct usbnet *dev,
return -EIO;
}
-static int __smsc95xx_mdio_read(struct net_device *netdev, int phy_id, int idx,
+static u32 mii_address_cmd(int phy_id, int idx, u16 op)
+{
+ return (phy_id & 0x1f) << 11 | (idx & 0x1f) << 6 | op;
+}
+
+static int __smsc95xx_mdio_read(struct usbnet *dev, int phy_id, int idx,
int in_pm)
{
- struct usbnet *dev = netdev_priv(netdev);
u32 val, addr;
int ret;
@@ -185,14 +186,12 @@ static int __smsc95xx_mdio_read(struct net_device *netdev, int phy_id, int idx,
/* confirm MII not busy */
ret = __smsc95xx_phy_wait_not_busy(dev, in_pm);
if (ret < 0) {
- netdev_warn(dev->net, "MII is busy in smsc95xx_mdio_read\n");
+ netdev_warn(dev->net, "%s: MII is busy\n", __func__);
goto done;
}
/* set the address, index & direction (read from PHY) */
- phy_id &= dev->mii.phy_id_mask;
- idx &= dev->mii.reg_num_mask;
- addr = (phy_id << 11) | (idx << 6) | MII_READ_ | MII_BUSY_;
+ addr = mii_address_cmd(phy_id, idx, MII_READ_ | MII_BUSY_);
ret = __smsc95xx_write_reg(dev, MII_ADDR, addr, in_pm);
if (ret < 0) {
netdev_warn(dev->net, "Error writing MII_ADDR\n");
@@ -218,10 +217,9 @@ static int __smsc95xx_mdio_read(struct net_device *netdev, int phy_id, int idx,
return ret;
}
-static void __smsc95xx_mdio_write(struct net_device *netdev, int phy_id,
+static void __smsc95xx_mdio_write(struct usbnet *dev, int phy_id,
int idx, int regval, int in_pm)
{
- struct usbnet *dev = netdev_priv(netdev);
u32 val, addr;
int ret;
@@ -230,7 +228,7 @@ static void __smsc95xx_mdio_write(struct net_device *netdev, int phy_id,
/* confirm MII not busy */
ret = __smsc95xx_phy_wait_not_busy(dev, in_pm);
if (ret < 0) {
- netdev_warn(dev->net, "MII is busy in smsc95xx_mdio_write\n");
+ netdev_warn(dev->net, "%s: MII is busy\n", __func__);
goto done;
}
@@ -242,9 +240,7 @@ static void __smsc95xx_mdio_write(struct net_device *netdev, int phy_id,
}
/* set the address, index & direction (write to PHY) */
- phy_id &= dev->mii.phy_id_mask;
- idx &= dev->mii.reg_num_mask;
- addr = (phy_id << 11) | (idx << 6) | MII_WRITE_ | MII_BUSY_;
+ addr = mii_address_cmd(phy_id, idx, MII_WRITE_ | MII_BUSY_);
ret = __smsc95xx_write_reg(dev, MII_ADDR, addr, in_pm);
if (ret < 0) {
netdev_warn(dev->net, "Error writing MII_ADDR\n");
@@ -261,27 +257,34 @@ static void __smsc95xx_mdio_write(struct net_device *netdev, int phy_id,
mutex_unlock(&dev->phy_mutex);
}
-static int smsc95xx_mdio_read_nopm(struct net_device *netdev, int phy_id,
- int idx)
+static int smsc95xx_mdio_read_nopm(struct usbnet *dev, int idx)
{
- return __smsc95xx_mdio_read(netdev, phy_id, idx, 1);
+ struct smsc95xx_priv *pdata = dev->driver_priv;
+
+ return __smsc95xx_mdio_read(dev, pdata->phydev->mdio.addr, idx, 1);
}
-static void smsc95xx_mdio_write_nopm(struct net_device *netdev, int phy_id,
- int idx, int regval)
+static void smsc95xx_mdio_write_nopm(struct usbnet *dev, int idx, int regval)
{
- __smsc95xx_mdio_write(netdev, phy_id, idx, regval, 1);
+ struct smsc95xx_priv *pdata = dev->driver_priv;
+
+ __smsc95xx_mdio_write(dev, pdata->phydev->mdio.addr, idx, regval, 1);
}
-static int smsc95xx_mdio_read(struct net_device *netdev, int phy_id, int idx)
+static int smsc95xx_mdiobus_read(struct mii_bus *bus, int phy_id, int idx)
{
- return __smsc95xx_mdio_read(netdev, phy_id, idx, 0);
+ struct usbnet *dev = bus->priv;
+
+ return __smsc95xx_mdio_read(dev, phy_id, idx, 0);
}
-static void smsc95xx_mdio_write(struct net_device *netdev, int phy_id, int idx,
- int regval)
+static int smsc95xx_mdiobus_write(struct mii_bus *bus, int phy_id, int idx,
+ u16 regval)
{
- __smsc95xx_mdio_write(netdev, phy_id, idx, regval, 0);
+ struct usbnet *dev = bus->priv;
+
+ __smsc95xx_mdio_write(dev, phy_id, idx, regval, 0);
+ return 0;
}
static int __must_check smsc95xx_wait_eeprom(struct usbnet *dev)
@@ -455,7 +458,7 @@ static unsigned int smsc95xx_hash(char addr[ETH_ALEN])
static void smsc95xx_set_multicast(struct net_device *netdev)
{
struct usbnet *dev = netdev_priv(netdev);
- struct smsc95xx_priv *pdata = (struct smsc95xx_priv *)(dev->data[0]);
+ struct smsc95xx_priv *pdata = dev->driver_priv;
unsigned long flags;
int ret;
@@ -511,22 +514,23 @@ static void smsc95xx_set_multicast(struct net_device *netdev)
netdev_warn(dev->net, "failed to initiate async write to MAC_CR\n");
}
-static int smsc95xx_phy_update_flowcontrol(struct usbnet *dev, u8 duplex,
- u16 lcladv, u16 rmtadv)
+static int smsc95xx_phy_update_flowcontrol(struct usbnet *dev)
{
u32 flow = 0, afc_cfg;
+ struct smsc95xx_priv *pdata = dev->driver_priv;
+ bool tx_pause, rx_pause;
int ret = smsc95xx_read_reg(dev, AFC_CFG, &afc_cfg);
if (ret < 0)
return ret;
- if (duplex == DUPLEX_FULL) {
- u8 cap = mii_resolve_flowctrl_fdx(lcladv, rmtadv);
+ if (pdata->phydev->duplex == DUPLEX_FULL) {
+ phy_get_pause(pdata->phydev, &tx_pause, &rx_pause);
- if (cap & FLOW_CTRL_RX)
+ if (rx_pause)
flow = 0xFFFF0002;
- if (cap & FLOW_CTRL_TX) {
+ if (tx_pause) {
afc_cfg |= 0xF;
flow |= 0xFFFF0000;
} else {
@@ -534,8 +538,8 @@ static int smsc95xx_phy_update_flowcontrol(struct usbnet *dev, u8 duplex,
}
netif_dbg(dev, link, dev->net, "rx pause %s, tx pause %s\n",
- cap & FLOW_CTRL_RX ? "enabled" : "disabled",
- cap & FLOW_CTRL_TX ? "enabled" : "disabled");
+ rx_pause ? "enabled" : "disabled",
+ tx_pause ? "enabled" : "disabled");
} else {
netif_dbg(dev, link, dev->net, "half duplex\n");
afc_cfg |= 0xF;
@@ -550,33 +554,16 @@ static int smsc95xx_phy_update_flowcontrol(struct usbnet *dev, u8 duplex,
static int smsc95xx_link_reset(struct usbnet *dev)
{
- struct smsc95xx_priv *pdata = (struct smsc95xx_priv *)(dev->data[0]);
- struct mii_if_info *mii = &dev->mii;
- struct ethtool_cmd ecmd = { .cmd = ETHTOOL_GSET };
+ struct smsc95xx_priv *pdata = dev->driver_priv;
unsigned long flags;
- u16 lcladv, rmtadv;
int ret;
- /* clear interrupt status */
- ret = smsc95xx_mdio_read(dev->net, mii->phy_id, PHY_INT_SRC);
- if (ret < 0)
- return ret;
-
ret = smsc95xx_write_reg(dev, INT_STS, INT_STS_CLEAR_ALL_);
if (ret < 0)
return ret;
- mii_check_media(mii, 1, 1);
- mii_ethtool_gset(&dev->mii, &ecmd);
- lcladv = smsc95xx_mdio_read(dev->net, mii->phy_id, MII_ADVERTISE);
- rmtadv = smsc95xx_mdio_read(dev->net, mii->phy_id, MII_LPA);
-
- netif_dbg(dev, link, dev->net,
- "speed: %u duplex: %d lcladv: %04x rmtadv: %04x\n",
- ethtool_cmd_speed(&ecmd), ecmd.duplex, lcladv, rmtadv);
-
spin_lock_irqsave(&pdata->mac_cr_lock, flags);
- if (ecmd.duplex != DUPLEX_FULL) {
+ if (pdata->phydev->duplex != DUPLEX_FULL) {
pdata->mac_cr &= ~MAC_CR_FDPX_;
pdata->mac_cr |= MAC_CR_RCVOWN_;
} else {
@@ -589,7 +576,7 @@ static int smsc95xx_link_reset(struct usbnet *dev)
if (ret < 0)
return ret;
- ret = smsc95xx_phy_update_flowcontrol(dev, ecmd.duplex, lcladv, rmtadv);
+ ret = smsc95xx_phy_update_flowcontrol(dev);
if (ret < 0)
netdev_warn(dev->net, "Error updating PHY flow control\n");
@@ -616,44 +603,6 @@ static void smsc95xx_status(struct usbnet *dev, struct urb *urb)
intdata);
}
-static void set_carrier(struct usbnet *dev, bool link)
-{
- struct smsc95xx_priv *pdata = (struct smsc95xx_priv *)(dev->data[0]);
-
- if (pdata->link_ok == link)
- return;
-
- pdata->link_ok = link;
-
- if (link)
- usbnet_link_change(dev, 1, 0);
- else
- usbnet_link_change(dev, 0, 0);
-}
-
-static void check_carrier(struct work_struct *work)
-{
- struct smsc95xx_priv *pdata = container_of(work, struct smsc95xx_priv,
- carrier_check.work);
- struct usbnet *dev = pdata->dev;
- int ret;
-
- if (pdata->suspend_flags != 0)
- return;
-
- ret = smsc95xx_mdio_read(dev->net, dev->mii.phy_id, MII_BMSR);
- if (ret < 0) {
- netdev_warn(dev->net, "Failed to read MII_BMSR\n");
- return;
- }
- if (ret & BMSR_LSTATUS)
- set_carrier(dev, 1);
- else
- set_carrier(dev, 0);
-
- schedule_delayed_work(&pdata->carrier_check, CARRIER_CHECK_DELAY);
-}
-
/* Enable or disable Tx & Rx checksum offload engines */
static int smsc95xx_set_features(struct net_device *netdev,
netdev_features_t features)
@@ -747,7 +696,7 @@ static void smsc95xx_ethtool_get_wol(struct net_device *net,
struct ethtool_wolinfo *wolinfo)
{
struct usbnet *dev = netdev_priv(net);
- struct smsc95xx_priv *pdata = (struct smsc95xx_priv *)(dev->data[0]);
+ struct smsc95xx_priv *pdata = dev->driver_priv;
wolinfo->supported = SUPPORTED_WAKE;
wolinfo->wolopts = pdata->wolopts;
@@ -757,7 +706,7 @@ static int smsc95xx_ethtool_set_wol(struct net_device *net,
struct ethtool_wolinfo *wolinfo)
{
struct usbnet *dev = netdev_priv(net);
- struct smsc95xx_priv *pdata = (struct smsc95xx_priv *)(dev->data[0]);
+ struct smsc95xx_priv *pdata = dev->driver_priv;
int ret;
if (wolinfo->wolopts & ~SUPPORTED_WAKE)
@@ -772,108 +721,15 @@ static int smsc95xx_ethtool_set_wol(struct net_device *net,
return ret;
}
-static int get_mdix_status(struct net_device *net)
+static u32 smsc95xx_get_link(struct net_device *net)
{
- struct usbnet *dev = netdev_priv(net);
- u32 val;
- int buf;
-
- buf = smsc95xx_mdio_read(dev->net, dev->mii.phy_id, SPECIAL_CTRL_STS);
- if (buf & SPECIAL_CTRL_STS_OVRRD_AMDIX_) {
- if (buf & SPECIAL_CTRL_STS_AMDIX_ENABLE_)
- return ETH_TP_MDI_AUTO;
- else if (buf & SPECIAL_CTRL_STS_AMDIX_STATE_)
- return ETH_TP_MDI_X;
- } else {
- buf = smsc95xx_read_reg(dev, STRAP_STATUS, &val);
- if (val & STRAP_STATUS_AMDIX_EN_)
- return ETH_TP_MDI_AUTO;
- }
-
- return ETH_TP_MDI;
-}
-
-static void set_mdix_status(struct net_device *net, __u8 mdix_ctrl)
-{
- struct usbnet *dev = netdev_priv(net);
- struct smsc95xx_priv *pdata = (struct smsc95xx_priv *)(dev->data[0]);
- int buf;
-
- if ((pdata->chip_id == ID_REV_CHIP_ID_9500A_) ||
- (pdata->chip_id == ID_REV_CHIP_ID_9530_) ||
- (pdata->chip_id == ID_REV_CHIP_ID_89530_) ||
- (pdata->chip_id == ID_REV_CHIP_ID_9730_)) {
- /* Extend Manual AutoMDIX timer for 9500A/9500Ai */
- buf = smsc95xx_mdio_read(dev->net, dev->mii.phy_id,
- PHY_EDPD_CONFIG);
- buf |= PHY_EDPD_CONFIG_EXT_CROSSOVER_;
- smsc95xx_mdio_write(dev->net, dev->mii.phy_id,
- PHY_EDPD_CONFIG, buf);
- }
-
- if (mdix_ctrl == ETH_TP_MDI) {
- buf = smsc95xx_mdio_read(dev->net, dev->mii.phy_id,
- SPECIAL_CTRL_STS);
- buf |= SPECIAL_CTRL_STS_OVRRD_AMDIX_;
- buf &= ~(SPECIAL_CTRL_STS_AMDIX_ENABLE_ |
- SPECIAL_CTRL_STS_AMDIX_STATE_);
- smsc95xx_mdio_write(dev->net, dev->mii.phy_id,
- SPECIAL_CTRL_STS, buf);
- } else if (mdix_ctrl == ETH_TP_MDI_X) {
- buf = smsc95xx_mdio_read(dev->net, dev->mii.phy_id,
- SPECIAL_CTRL_STS);
- buf |= SPECIAL_CTRL_STS_OVRRD_AMDIX_;
- buf &= ~(SPECIAL_CTRL_STS_AMDIX_ENABLE_ |
- SPECIAL_CTRL_STS_AMDIX_STATE_);
- buf |= SPECIAL_CTRL_STS_AMDIX_STATE_;
- smsc95xx_mdio_write(dev->net, dev->mii.phy_id,
- SPECIAL_CTRL_STS, buf);
- } else if (mdix_ctrl == ETH_TP_MDI_AUTO) {
- buf = smsc95xx_mdio_read(dev->net, dev->mii.phy_id,
- SPECIAL_CTRL_STS);
- buf &= ~SPECIAL_CTRL_STS_OVRRD_AMDIX_;
- buf &= ~(SPECIAL_CTRL_STS_AMDIX_ENABLE_ |
- SPECIAL_CTRL_STS_AMDIX_STATE_);
- buf |= SPECIAL_CTRL_STS_AMDIX_ENABLE_;
- smsc95xx_mdio_write(dev->net, dev->mii.phy_id,
- SPECIAL_CTRL_STS, buf);
- }
- pdata->mdix_ctrl = mdix_ctrl;
-}
-
-static int smsc95xx_get_link_ksettings(struct net_device *net,
- struct ethtool_link_ksettings *cmd)
-{
- struct usbnet *dev = netdev_priv(net);
- struct smsc95xx_priv *pdata = (struct smsc95xx_priv *)(dev->data[0]);
- int retval;
-
- retval = usbnet_get_link_ksettings(net, cmd);
-
- cmd->base.eth_tp_mdix = pdata->mdix_ctrl;
- cmd->base.eth_tp_mdix_ctrl = pdata->mdix_ctrl;
-
- return retval;
-}
-
-static int smsc95xx_set_link_ksettings(struct net_device *net,
- const struct ethtool_link_ksettings *cmd)
-{
- struct usbnet *dev = netdev_priv(net);
- struct smsc95xx_priv *pdata = (struct smsc95xx_priv *)(dev->data[0]);
- int retval;
-
- if (pdata->mdix_ctrl != cmd->base.eth_tp_mdix_ctrl)
- set_mdix_status(net, cmd->base.eth_tp_mdix_ctrl);
-
- retval = usbnet_set_link_ksettings(net, cmd);
-
- return retval;
+ phy_read_status(net->phydev);
+ return net->phydev->link;
}
static const struct ethtool_ops smsc95xx_ethtool_ops = {
- .get_link = usbnet_get_link,
- .nway_reset = usbnet_nway_reset,
+ .get_link = smsc95xx_get_link,
+ .nway_reset = phy_ethtool_nway_reset,
.get_drvinfo = usbnet_get_drvinfo,
.get_msglevel = usbnet_get_msglevel,
.set_msglevel = usbnet_set_msglevel,
@@ -884,19 +740,17 @@ static const struct ethtool_ops smsc95xx_ethtool_ops = {
.get_regs = smsc95xx_ethtool_getregs,
.get_wol = smsc95xx_ethtool_get_wol,
.set_wol = smsc95xx_ethtool_set_wol,
- .get_link_ksettings = smsc95xx_get_link_ksettings,
- .set_link_ksettings = smsc95xx_set_link_ksettings,
+ .get_link_ksettings = phy_ethtool_get_link_ksettings,
+ .set_link_ksettings = phy_ethtool_set_link_ksettings,
.get_ts_info = ethtool_op_get_ts_info,
};
static int smsc95xx_ioctl(struct net_device *netdev, struct ifreq *rq, int cmd)
{
- struct usbnet *dev = netdev_priv(netdev);
-
if (!netif_running(netdev))
return -EINVAL;
- return generic_mii_ioctl(&dev->mii, if_mii(rq), cmd, NULL);
+ return phy_mii_ioctl(netdev->phydev, rq, cmd);
}
static void smsc95xx_init_mac_address(struct usbnet *dev)
@@ -942,7 +796,7 @@ static int smsc95xx_set_mac_address(struct usbnet *dev)
/* starts the TX path */
static int smsc95xx_start_tx_path(struct usbnet *dev)
{
- struct smsc95xx_priv *pdata = (struct smsc95xx_priv *)(dev->data[0]);
+ struct smsc95xx_priv *pdata = dev->driver_priv;
unsigned long flags;
int ret;
@@ -962,7 +816,7 @@ static int smsc95xx_start_tx_path(struct usbnet *dev)
/* Starts the Receive path */
static int smsc95xx_start_rx_path(struct usbnet *dev, int in_pm)
{
- struct smsc95xx_priv *pdata = (struct smsc95xx_priv *)(dev->data[0]);
+ struct smsc95xx_priv *pdata = dev->driver_priv;
unsigned long flags;
spin_lock_irqsave(&pdata->mac_cr_lock, flags);
@@ -972,54 +826,9 @@ static int smsc95xx_start_rx_path(struct usbnet *dev, int in_pm)
return __smsc95xx_write_reg(dev, MAC_CR, pdata->mac_cr, in_pm);
}
-static int smsc95xx_phy_initialize(struct usbnet *dev)
-{
- int bmcr, ret, timeout = 0;
-
- /* Initialize MII structure */
- dev->mii.dev = dev->net;
- dev->mii.mdio_read = smsc95xx_mdio_read;
- dev->mii.mdio_write = smsc95xx_mdio_write;
- dev->mii.phy_id_mask = 0x1f;
- dev->mii.reg_num_mask = 0x1f;
- dev->mii.phy_id = SMSC95XX_INTERNAL_PHY_ID;
-
- /* reset phy and wait for reset to complete */
- smsc95xx_mdio_write(dev->net, dev->mii.phy_id, MII_BMCR, BMCR_RESET);
-
- do {
- msleep(10);
- bmcr = smsc95xx_mdio_read(dev->net, dev->mii.phy_id, MII_BMCR);
- timeout++;
- } while ((bmcr & BMCR_RESET) && (timeout < 100));
-
- if (timeout >= 100) {
- netdev_warn(dev->net, "timeout on PHY Reset");
- return -EIO;
- }
-
- smsc95xx_mdio_write(dev->net, dev->mii.phy_id, MII_ADVERTISE,
- ADVERTISE_ALL | ADVERTISE_CSMA | ADVERTISE_PAUSE_CAP |
- ADVERTISE_PAUSE_ASYM);
-
- /* read to clear */
- ret = smsc95xx_mdio_read(dev->net, dev->mii.phy_id, PHY_INT_SRC);
- if (ret < 0) {
- netdev_warn(dev->net, "Failed to read PHY_INT_SRC during init\n");
- return ret;
- }
-
- smsc95xx_mdio_write(dev->net, dev->mii.phy_id, PHY_INT_MASK,
- PHY_INT_MASK_DEFAULT_);
- mii_nway_restart(&dev->mii);
-
- netif_dbg(dev, ifup, dev->net, "phy initialised successfully\n");
- return 0;
-}
-
static int smsc95xx_reset(struct usbnet *dev)
{
- struct smsc95xx_priv *pdata = (struct smsc95xx_priv *)(dev->data[0]);
+ struct smsc95xx_priv *pdata = dev->driver_priv;
u32 read_buf, write_buf, burst_cap;
int ret = 0, timeout;
@@ -1198,12 +1007,6 @@ static int smsc95xx_reset(struct usbnet *dev)
smsc95xx_set_multicast(dev->net);
- ret = smsc95xx_phy_initialize(dev);
- if (ret < 0) {
- netdev_warn(dev->net, "Failed to init PHY\n");
- return ret;
- }
-
ret = smsc95xx_read_reg(dev, INT_EP_CTL, &read_buf);
if (ret < 0)
return ret;
@@ -1247,7 +1050,8 @@ static const struct net_device_ops smsc95xx_netdev_ops = {
static int smsc95xx_bind(struct usbnet *dev, struct usb_interface *intf)
{
- struct smsc95xx_priv *pdata = NULL;
+ struct smsc95xx_priv *pdata;
+ bool is_internal_phy;
u32 val;
int ret;
@@ -1259,13 +1063,12 @@ static int smsc95xx_bind(struct usbnet *dev, struct usb_interface *intf)
return ret;
}
- dev->data[0] = (unsigned long)kzalloc(sizeof(struct smsc95xx_priv),
- GFP_KERNEL);
-
- pdata = (struct smsc95xx_priv *)(dev->data[0]);
+ pdata = kzalloc(sizeof(*pdata), GFP_KERNEL);
if (!pdata)
return -ENOMEM;
+ dev->driver_priv = pdata;
+
spin_lock_init(&pdata->mac_cr_lock);
/* LAN95xx devices do not alter the computed checksum of 0 to 0xffff.
@@ -1290,15 +1093,50 @@ static int smsc95xx_bind(struct usbnet *dev, struct usb_interface *intf)
if (ret)
goto free_pdata;
+ pdata->mdiobus = mdiobus_alloc();
+ if (!pdata->mdiobus) {
+ ret = -ENOMEM;
+ goto free_pdata;
+ }
+
+ ret = smsc95xx_read_reg(dev, HW_CFG, &val);
+ if (ret < 0)
+ goto free_mdio;
+
+ is_internal_phy = !(val & HW_CFG_PSEL_);
+ if (is_internal_phy)
+ pdata->mdiobus->phy_mask = ~(1u << SMSC95XX_INTERNAL_PHY_ID);
+
+ pdata->mdiobus->priv = dev;
+ pdata->mdiobus->read = smsc95xx_mdiobus_read;
+ pdata->mdiobus->write = smsc95xx_mdiobus_write;
+ pdata->mdiobus->name = "smsc95xx-mdiobus";
+ pdata->mdiobus->parent = &dev->udev->dev;
+
+ snprintf(pdata->mdiobus->id, ARRAY_SIZE(pdata->mdiobus->id),
+ "usb-%03d:%03d", dev->udev->bus->busnum, dev->udev->devnum);
+
+ ret = mdiobus_register(pdata->mdiobus);
+ if (ret) {
+ netdev_err(dev->net, "Could not register MDIO bus\n");
+ goto free_mdio;
+ }
+
+ pdata->phydev = phy_find_first(pdata->mdiobus);
+ if (!pdata->phydev) {
+ netdev_err(dev->net, "no PHY found\n");
+ ret = -ENODEV;
+ goto unregister_mdio;
+ }
+
+ pdata->phydev->is_internal = is_internal_phy;
+
/* detect device revision as different features may be available */
ret = smsc95xx_read_reg(dev, ID_REV, &val);
if (ret < 0)
- goto free_pdata;
+ goto unregister_mdio;
val >>= 16;
- pdata->chip_id = val;
- pdata->mdix_ctrl = get_mdix_status(dev->net);
-
if ((val == ID_REV_CHIP_ID_9500A_) || (val == ID_REV_CHIP_ID_9530_) ||
(val == ID_REV_CHIP_ID_89530_) || (val == ID_REV_CHIP_ID_9730_))
pdata->features = (FEATURE_8_WAKEUP_FILTERS |
@@ -1314,13 +1152,14 @@ static int smsc95xx_bind(struct usbnet *dev, struct usb_interface *intf)
dev->net->min_mtu = ETH_MIN_MTU;
dev->net->max_mtu = ETH_DATA_LEN;
dev->hard_mtu = dev->net->mtu + dev->net->hard_header_len;
-
- pdata->dev = dev;
- INIT_DELAYED_WORK(&pdata->carrier_check, check_carrier);
- schedule_delayed_work(&pdata->carrier_check, CARRIER_CHECK_DELAY);
-
return 0;
+unregister_mdio:
+ mdiobus_unregister(pdata->mdiobus);
+
+free_mdio:
+ mdiobus_free(pdata->mdiobus);
+
free_pdata:
kfree(pdata);
return ret;
@@ -1328,15 +1167,47 @@ static int smsc95xx_bind(struct usbnet *dev, struct usb_interface *intf)
static void smsc95xx_unbind(struct usbnet *dev, struct usb_interface *intf)
{
- struct smsc95xx_priv *pdata = (struct smsc95xx_priv *)(dev->data[0]);
+ struct smsc95xx_priv *pdata = dev->driver_priv;
- if (pdata) {
- cancel_delayed_work_sync(&pdata->carrier_check);
- netif_dbg(dev, ifdown, dev->net, "free pdata\n");
- kfree(pdata);
- pdata = NULL;
- dev->data[0] = 0;
+ mdiobus_unregister(pdata->mdiobus);
+ mdiobus_free(pdata->mdiobus);
+ netif_dbg(dev, ifdown, dev->net, "free pdata\n");
+ kfree(pdata);
+}
+
+static void smsc95xx_handle_link_change(struct net_device *net)
+{
+ phy_print_status(net->phydev);
+}
+
+static int smsc95xx_start_phy(struct usbnet *dev)
+{
+ struct smsc95xx_priv *pdata = dev->driver_priv;
+ struct net_device *net = dev->net;
+ int ret;
+
+ ret = smsc95xx_reset(dev);
+ if (ret < 0)
+ return ret;
+
+ ret = phy_connect_direct(net, pdata->phydev,
+ &smsc95xx_handle_link_change,
+ PHY_INTERFACE_MODE_MII);
+ if (ret) {
+ netdev_err(net, "can't attach PHY to %s\n", pdata->mdiobus->id);
+ return ret;
}
+
+ phy_attached_info(net->phydev);
+ phy_start(net->phydev);
+ return 0;
+}
+
+static int smsc95xx_disconnect_phy(struct usbnet *dev)
+{
+ phy_stop(dev->net->phydev);
+ phy_disconnect(dev->net->phydev);
+ return 0;
}
static u32 smsc_crc(const u8 *buffer, size_t len, int filter)
@@ -1347,39 +1218,37 @@ static u32 smsc_crc(const u8 *buffer, size_t len, int filter)
static int smsc95xx_enable_phy_wakeup_interrupts(struct usbnet *dev, u16 mask)
{
- struct mii_if_info *mii = &dev->mii;
int ret;
netdev_dbg(dev->net, "enabling PHY wakeup interrupts\n");
/* read to clear */
- ret = smsc95xx_mdio_read_nopm(dev->net, mii->phy_id, PHY_INT_SRC);
+ ret = smsc95xx_mdio_read_nopm(dev, PHY_INT_SRC);
if (ret < 0)
return ret;
/* enable interrupt source */
- ret = smsc95xx_mdio_read_nopm(dev->net, mii->phy_id, PHY_INT_MASK);
+ ret = smsc95xx_mdio_read_nopm(dev, PHY_INT_MASK);
if (ret < 0)
return ret;
ret |= mask;
- smsc95xx_mdio_write_nopm(dev->net, mii->phy_id, PHY_INT_MASK, ret);
+ smsc95xx_mdio_write_nopm(dev, PHY_INT_MASK, ret);
return 0;
}
static int smsc95xx_link_ok_nopm(struct usbnet *dev)
{
- struct mii_if_info *mii = &dev->mii;
int ret;
/* first, a dummy read, needed to latch some MII phys */
- ret = smsc95xx_mdio_read_nopm(dev->net, mii->phy_id, MII_BMSR);
+ ret = smsc95xx_mdio_read_nopm(dev, MII_BMSR);
if (ret < 0)
return ret;
- ret = smsc95xx_mdio_read_nopm(dev->net, mii->phy_id, MII_BMSR);
+ ret = smsc95xx_mdio_read_nopm(dev, MII_BMSR);
if (ret < 0)
return ret;
@@ -1388,7 +1257,7 @@ static int smsc95xx_link_ok_nopm(struct usbnet *dev)
static int smsc95xx_enter_suspend0(struct usbnet *dev)
{
- struct smsc95xx_priv *pdata = (struct smsc95xx_priv *)(dev->data[0]);
+ struct smsc95xx_priv *pdata = dev->driver_priv;
u32 val;
int ret;
@@ -1427,8 +1296,7 @@ static int smsc95xx_enter_suspend0(struct usbnet *dev)
static int smsc95xx_enter_suspend1(struct usbnet *dev)
{
- struct smsc95xx_priv *pdata = (struct smsc95xx_priv *)(dev->data[0]);
- struct mii_if_info *mii = &dev->mii;
+ struct smsc95xx_priv *pdata = dev->driver_priv;
u32 val;
int ret;
@@ -1436,17 +1304,17 @@ static int smsc95xx_enter_suspend1(struct usbnet *dev)
* compatibility with non-standard link partners
*/
if (pdata->features & FEATURE_PHY_NLP_CROSSOVER)
- smsc95xx_mdio_write_nopm(dev->net, mii->phy_id, PHY_EDPD_CONFIG,
- PHY_EDPD_CONFIG_DEFAULT);
+ smsc95xx_mdio_write_nopm(dev, PHY_EDPD_CONFIG,
+ PHY_EDPD_CONFIG_DEFAULT);
/* enable energy detect power-down mode */
- ret = smsc95xx_mdio_read_nopm(dev->net, mii->phy_id, PHY_MODE_CTRL_STS);
+ ret = smsc95xx_mdio_read_nopm(dev, PHY_MODE_CTRL_STS);
if (ret < 0)
return ret;
ret |= MODE_CTRL_STS_EDPWRDOWN_;
- smsc95xx_mdio_write_nopm(dev->net, mii->phy_id, PHY_MODE_CTRL_STS, ret);
+ smsc95xx_mdio_write_nopm(dev, PHY_MODE_CTRL_STS, ret);
/* enter SUSPEND1 mode */
ret = smsc95xx_read_reg_nopm(dev, PM_CTRL, &val);
@@ -1475,7 +1343,7 @@ static int smsc95xx_enter_suspend1(struct usbnet *dev)
static int smsc95xx_enter_suspend2(struct usbnet *dev)
{
- struct smsc95xx_priv *pdata = (struct smsc95xx_priv *)(dev->data[0]);
+ struct smsc95xx_priv *pdata = dev->driver_priv;
u32 val;
int ret;
@@ -1497,7 +1365,7 @@ static int smsc95xx_enter_suspend2(struct usbnet *dev)
static int smsc95xx_enter_suspend3(struct usbnet *dev)
{
- struct smsc95xx_priv *pdata = (struct smsc95xx_priv *)(dev->data[0]);
+ struct smsc95xx_priv *pdata = dev->driver_priv;
u32 val;
int ret;
@@ -1536,7 +1404,7 @@ static int smsc95xx_enter_suspend3(struct usbnet *dev)
static int smsc95xx_autosuspend(struct usbnet *dev, u32 link_up)
{
- struct smsc95xx_priv *pdata = (struct smsc95xx_priv *)(dev->data[0]);
+ struct smsc95xx_priv *pdata = dev->driver_priv;
int ret;
if (!netif_running(dev->net)) {
@@ -1584,7 +1452,7 @@ static int smsc95xx_autosuspend(struct usbnet *dev, u32 link_up)
static int smsc95xx_suspend(struct usb_interface *intf, pm_message_t message)
{
struct usbnet *dev = usb_get_intfdata(intf);
- struct smsc95xx_priv *pdata = (struct smsc95xx_priv *)(dev->data[0]);
+ struct smsc95xx_priv *pdata = dev->driver_priv;
u32 val, link_up;
int ret;
@@ -1594,8 +1462,6 @@ static int smsc95xx_suspend(struct usb_interface *intf, pm_message_t message)
return ret;
}
- cancel_delayed_work_sync(&pdata->carrier_check);
-
if (pdata->suspend_flags) {
netdev_warn(dev->net, "error during last resume\n");
pdata->suspend_flags = 0;
@@ -1839,10 +1705,6 @@ static int smsc95xx_suspend(struct usb_interface *intf, pm_message_t message)
if (ret && PMSG_IS_AUTO(message))
usbnet_resume(intf);
- if (ret)
- schedule_delayed_work(&pdata->carrier_check,
- CARRIER_CHECK_DELAY);
-
return ret;
}
@@ -1855,14 +1717,13 @@ static int smsc95xx_resume(struct usb_interface *intf)
u32 val;
BUG_ON(!dev);
- pdata = (struct smsc95xx_priv *)(dev->data[0]);
+ pdata = dev->driver_priv;
suspend_flags = pdata->suspend_flags;
netdev_dbg(dev->net, "resume suspend_flags=0x%02x\n", suspend_flags);
/* do this first to ensure it's cleared even in error case */
pdata->suspend_flags = 0;
- schedule_delayed_work(&pdata->carrier_check, CARRIER_CHECK_DELAY);
if (suspend_flags & SUSPEND_ALLMODES) {
/* clear wake-up sources */
@@ -1893,6 +1754,7 @@ static int smsc95xx_resume(struct usb_interface *intf)
if (ret < 0)
netdev_warn(dev->net, "usbnet_resume error\n");
+ phy_init_hw(pdata->phydev);
return ret;
}
@@ -2075,7 +1937,7 @@ static struct sk_buff *smsc95xx_tx_fixup(struct usbnet *dev,
static int smsc95xx_manage_power(struct usbnet *dev, int on)
{
- struct smsc95xx_priv *pdata = (struct smsc95xx_priv *)(dev->data[0]);
+ struct smsc95xx_priv *pdata = dev->driver_priv;
dev->intf->needs_remote_wakeup = on;
@@ -2098,7 +1960,8 @@ static const struct driver_info smsc95xx_info = {
.bind = smsc95xx_bind,
.unbind = smsc95xx_unbind,
.link_reset = smsc95xx_link_reset,
- .reset = smsc95xx_reset,
+ .reset = smsc95xx_start_phy,
+ .stop = smsc95xx_disconnect_phy,
.rx_fixup = smsc95xx_rx_fixup,
.tx_fixup = smsc95xx_tx_fixup,
.status = smsc95xx_status,
diff --git a/drivers/net/veth.c b/drivers/net/veth.c
index a475f48..7de8f0e 100644
--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c
@@ -234,14 +234,14 @@ static bool veth_is_xdp_frame(void *ptr)
return (unsigned long)ptr & VETH_XDP_FLAG;
}
-static void *veth_ptr_to_xdp(void *ptr)
+static struct xdp_frame *veth_ptr_to_xdp(void *ptr)
{
return (void *)((unsigned long)ptr & ~VETH_XDP_FLAG);
}
-static void *veth_xdp_to_ptr(void *ptr)
+static void *veth_xdp_to_ptr(struct xdp_frame *xdp)
{
- return (void *)((unsigned long)ptr | VETH_XDP_FLAG);
+ return (void *)((unsigned long)xdp | VETH_XDP_FLAG);
}
static void veth_ptr_free(void *ptr)
diff --git a/drivers/net/wan/fsl_ucc_hdlc.c b/drivers/net/wan/fsl_ucc_hdlc.c
index 9edd946..dca97cd 100644
--- a/drivers/net/wan/fsl_ucc_hdlc.c
+++ b/drivers/net/wan/fsl_ucc_hdlc.c
@@ -1295,3 +1295,4 @@ static struct platform_driver ucc_hdlc_driver = {
module_platform_driver(ucc_hdlc_driver);
MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION(DRV_DESC);
diff --git a/drivers/net/wan/sbni.c b/drivers/net/wan/sbni.c
index 40c04ea..2fde439 100644
--- a/drivers/net/wan/sbni.c
+++ b/drivers/net/wan/sbni.c
@@ -260,11 +260,12 @@ static int __init sbni_init(struct net_device *dev)
return sbni_isa_probe( dev );
/* otherwise we have to perform search our adapter */
- if( io[ num ] != -1 )
- dev->base_addr = io[ num ],
+ if( io[ num ] != -1 ) {
+ dev->base_addr = io[ num ];
dev->irq = irq[ num ];
- else if( scandone || io[ 0 ] != -1 )
+ } else if( scandone || io[ 0 ] != -1 ) {
return -ENODEV;
+ }
/* if io[ num ] contains non-zero address, then that is on ISA bus */
if( dev->base_addr )
@@ -399,12 +400,13 @@ sbni_probe1( struct net_device *dev, unsigned long ioaddr, int irq )
nl->maxframe = DEFAULT_FRAME_LEN;
nl->csr1.rate = baud[ num ];
- if( (nl->cur_rxl_index = rxl[ num ]) == -1 )
+ if( (nl->cur_rxl_index = rxl[ num ]) == -1 ) {
/* autotune rxl */
- nl->cur_rxl_index = DEF_RXL,
+ nl->cur_rxl_index = DEF_RXL;
nl->delta_rxl = DEF_RXL_DELTA;
- else
+ } else {
nl->delta_rxl = 0;
+ }
nl->csr1.rxl = rxl_tab[ nl->cur_rxl_index ];
if( inb( ioaddr + CSR0 ) & 0x01 )
nl->state |= FL_SLOW_MODE;
@@ -512,13 +514,15 @@ sbni_interrupt( int irq, void *dev_id )
do {
repeat = 0;
- if( inb( dev->base_addr + CSR0 ) & (RC_RDY | TR_RDY) )
- handle_channel( dev ),
+ if( inb( dev->base_addr + CSR0 ) & (RC_RDY | TR_RDY) ) {
+ handle_channel( dev );
repeat = 1;
+ }
if( nl->second && /* second channel present */
- (inb( nl->second->base_addr+CSR0 ) & (RC_RDY | TR_RDY)) )
- handle_channel( nl->second ),
+ (inb( nl->second->base_addr+CSR0 ) & (RC_RDY | TR_RDY)) ) {
+ handle_channel( nl->second );
repeat = 1;
+ }
} while( repeat );
if( nl->second )
@@ -610,11 +614,12 @@ recv_frame( struct net_device *dev )
nl->state |= FL_PREV_OK;
if( framelen > 4 )
nl->in_stats.all_rx_number++;
- } else
- nl->state &= ~FL_PREV_OK,
- change_level( dev ),
- nl->in_stats.all_rx_number++,
+ } else {
+ nl->state &= ~FL_PREV_OK;
+ change_level( dev );
+ nl->in_stats.all_rx_number++;
nl->in_stats.bad_rx_number++;
+ }
return !frame_ok || framelen > 4;
}
@@ -689,9 +694,10 @@ download_data( struct net_device *dev, u32 *crc_p )
*crc_p = calc_crc32( *crc_p, skb->data + nl->outpos, len );
/* if packet too short we should write some more bytes to pad */
- for( len = nl->framelen - len; len--; )
- outb( 0, dev->base_addr + DAT ),
+ for( len = nl->framelen - len; len--; ) {
+ outb( 0, dev->base_addr + DAT );
*crc_p = CRC32( 0, *crc_p );
+ }
}
@@ -703,9 +709,10 @@ upload_data( struct net_device *dev, unsigned framelen, unsigned frameno,
int frame_ok;
- if( is_first )
- nl->wait_frameno = frameno,
+ if( is_first ) {
+ nl->wait_frameno = frameno;
nl->inppos = 0;
+ }
if( nl->wait_frameno == frameno ) {
@@ -717,33 +724,35 @@ upload_data( struct net_device *dev, unsigned framelen, unsigned frameno,
* error was occurred... drop entire packet
*/
else if( (frame_ok = skip_tail( dev->base_addr, framelen, crc ))
- != 0 )
- nl->wait_frameno = 0,
- nl->inppos = 0,
+ != 0 ) {
+ nl->wait_frameno = 0;
+ nl->inppos = 0;
#ifdef CONFIG_SBNI_MULTILINE
- nl->master->stats.rx_errors++,
+ nl->master->stats.rx_errors++;
nl->master->stats.rx_missed_errors++;
#else
- dev->stats.rx_errors++,
+ dev->stats.rx_errors++;
dev->stats.rx_missed_errors++;
#endif
+ }
/* now skip all frames until is_first != 0 */
} else
frame_ok = skip_tail( dev->base_addr, framelen, crc );
- if( is_first && !frame_ok )
+ if( is_first && !frame_ok ) {
/*
* Frame has been broken, but we had already stored
* is_first... Drop entire packet.
*/
- nl->wait_frameno = 0,
+ nl->wait_frameno = 0;
#ifdef CONFIG_SBNI_MULTILINE
- nl->master->stats.rx_errors++,
+ nl->master->stats.rx_errors++;
nl->master->stats.rx_crc_errors++;
#else
- dev->stats.rx_errors++,
+ dev->stats.rx_errors++;
dev->stats.rx_crc_errors++;
#endif
+ }
return frame_ok;
}
@@ -782,17 +791,18 @@ interpret_ack( struct net_device *dev, unsigned ack )
if( nl->state & FL_WAIT_ACK ) {
nl->outpos += nl->framelen;
- if( --nl->tx_frameno )
+ if( --nl->tx_frameno ) {
nl->framelen = min_t(unsigned int,
nl->maxframe,
nl->tx_buf_p->len - nl->outpos);
- else
- send_complete( dev ),
+ } else {
+ send_complete( dev );
#ifdef CONFIG_SBNI_MULTILINE
netif_wake_queue( nl->master );
#else
netif_wake_queue( dev );
#endif
+ }
}
}
@@ -872,16 +882,17 @@ drop_xmit_queue( struct net_device *dev )
{
struct net_local *nl = netdev_priv(dev);
- if( nl->tx_buf_p )
- dev_kfree_skb_any( nl->tx_buf_p ),
- nl->tx_buf_p = NULL,
+ if( nl->tx_buf_p ) {
+ dev_kfree_skb_any( nl->tx_buf_p );
+ nl->tx_buf_p = NULL;
#ifdef CONFIG_SBNI_MULTILINE
- nl->master->stats.tx_errors++,
+ nl->master->stats.tx_errors++;
nl->master->stats.tx_carrier_errors++;
#else
- dev->stats.tx_errors++,
+ dev->stats.tx_errors++;
dev->stats.tx_carrier_errors++;
#endif
+ }
nl->tx_frameno = 0;
nl->framelen = 0;
@@ -1327,12 +1338,13 @@ sbni_ioctl( struct net_device *dev, struct ifreq *ifr, int cmd )
spin_lock( &nl->lock );
flags = *(struct sbni_flags*) &ifr->ifr_ifru;
- if( flags.fixed_rxl )
- nl->delta_rxl = 0,
+ if( flags.fixed_rxl ) {
+ nl->delta_rxl = 0;
nl->cur_rxl_index = flags.rxl;
- else
- nl->delta_rxl = DEF_RXL_DELTA,
+ } else {
+ nl->delta_rxl = DEF_RXL_DELTA;
nl->cur_rxl_index = DEF_RXL;
+ }
nl->csr1.rxl = rxl_tab[ nl->cur_rxl_index ];
nl->csr1.rate = flags.rate;
@@ -1526,13 +1538,16 @@ sbni_setup( char *p )
(*dest[ parm ])[ n ] = simple_strtol( p, &p, 0 );
if( !*p || *p == ')' )
return 1;
- if( *p == ';' )
- ++p, ++n, parm = 0;
- else if( *p++ != ',' )
+ if( *p == ';' ) {
+ ++p;
+ ++n;
+ parm = 0;
+ } else if( *p++ != ',' ) {
break;
- else
+ } else {
if( ++parm >= 5 )
break;
+ }
}
bad_param:
pr_err("Error in sbni kernel parameter!\n");
diff --git a/drivers/net/wan/slic_ds26522.c b/drivers/net/wan/slic_ds26522.c
index 29053be..8e3b1c7 100644
--- a/drivers/net/wan/slic_ds26522.c
+++ b/drivers/net/wan/slic_ds26522.c
@@ -22,8 +22,6 @@
#include <linux/io.h>
#include "slic_ds26522.h"
-#define DRV_NAME "ds26522"
-
#define SLIC_TRANS_LEN 1
#define SLIC_TWO_LEN 2
#define SLIC_THREE_LEN 3
diff --git a/drivers/net/wireguard/netlink.c b/drivers/net/wireguard/netlink.c
index 20a4f3c..d0f3b6d 100644
--- a/drivers/net/wireguard/netlink.c
+++ b/drivers/net/wireguard/netlink.c
@@ -22,8 +22,8 @@ static struct genl_family genl_family;
static const struct nla_policy device_policy[WGDEVICE_A_MAX + 1] = {
[WGDEVICE_A_IFINDEX] = { .type = NLA_U32 },
[WGDEVICE_A_IFNAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
- [WGDEVICE_A_PRIVATE_KEY] = { .type = NLA_EXACT_LEN, .len = NOISE_PUBLIC_KEY_LEN },
- [WGDEVICE_A_PUBLIC_KEY] = { .type = NLA_EXACT_LEN, .len = NOISE_PUBLIC_KEY_LEN },
+ [WGDEVICE_A_PRIVATE_KEY] = NLA_POLICY_EXACT_LEN(NOISE_PUBLIC_KEY_LEN),
+ [WGDEVICE_A_PUBLIC_KEY] = NLA_POLICY_EXACT_LEN(NOISE_PUBLIC_KEY_LEN),
[WGDEVICE_A_FLAGS] = { .type = NLA_U32 },
[WGDEVICE_A_LISTEN_PORT] = { .type = NLA_U16 },
[WGDEVICE_A_FWMARK] = { .type = NLA_U32 },
@@ -31,12 +31,12 @@ static const struct nla_policy device_policy[WGDEVICE_A_MAX + 1] = {
};
static const struct nla_policy peer_policy[WGPEER_A_MAX + 1] = {
- [WGPEER_A_PUBLIC_KEY] = { .type = NLA_EXACT_LEN, .len = NOISE_PUBLIC_KEY_LEN },
- [WGPEER_A_PRESHARED_KEY] = { .type = NLA_EXACT_LEN, .len = NOISE_SYMMETRIC_KEY_LEN },
+ [WGPEER_A_PUBLIC_KEY] = NLA_POLICY_EXACT_LEN(NOISE_PUBLIC_KEY_LEN),
+ [WGPEER_A_PRESHARED_KEY] = NLA_POLICY_EXACT_LEN(NOISE_SYMMETRIC_KEY_LEN),
[WGPEER_A_FLAGS] = { .type = NLA_U32 },
- [WGPEER_A_ENDPOINT] = { .type = NLA_MIN_LEN, .len = sizeof(struct sockaddr) },
+ [WGPEER_A_ENDPOINT] = NLA_POLICY_MIN_LEN(sizeof(struct sockaddr)),
[WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL] = { .type = NLA_U16 },
- [WGPEER_A_LAST_HANDSHAKE_TIME] = { .type = NLA_EXACT_LEN, .len = sizeof(struct __kernel_timespec) },
+ [WGPEER_A_LAST_HANDSHAKE_TIME] = NLA_POLICY_EXACT_LEN(sizeof(struct __kernel_timespec)),
[WGPEER_A_RX_BYTES] = { .type = NLA_U64 },
[WGPEER_A_TX_BYTES] = { .type = NLA_U64 },
[WGPEER_A_ALLOWEDIPS] = { .type = NLA_NESTED },
@@ -45,7 +45,7 @@ static const struct nla_policy peer_policy[WGPEER_A_MAX + 1] = {
static const struct nla_policy allowedip_policy[WGALLOWEDIP_A_MAX + 1] = {
[WGALLOWEDIP_A_FAMILY] = { .type = NLA_U16 },
- [WGALLOWEDIP_A_IPADDR] = { .type = NLA_MIN_LEN, .len = sizeof(struct in_addr) },
+ [WGALLOWEDIP_A_IPADDR] = NLA_POLICY_MIN_LEN(sizeof(struct in_addr)),
[WGALLOWEDIP_A_CIDR_MASK] = { .type = NLA_U8 }
};
diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c
index 3c0c33a..b379022 100644
--- a/drivers/net/wireless/ath/ath10k/mac.c
+++ b/drivers/net/wireless/ath/ath10k/mac.c
@@ -2019,8 +2019,8 @@ static void ath10k_mac_vif_ap_csa_count_down(struct ath10k_vif *arvif)
if (!arvif->is_up)
return;
- if (!ieee80211_csa_is_complete(vif)) {
- ieee80211_csa_update_counter(vif);
+ if (!ieee80211_beacon_cntdwn_is_complete(vif)) {
+ ieee80211_beacon_update_cntdwn(vif);
ret = ath10k_mac_setup_bcn_tmpl(arvif);
if (ret)
diff --git a/drivers/net/wireless/ath/ath10k/wmi.c b/drivers/net/wireless/ath/ath10k/wmi.c
index a81a1ab..b661d4e 100644
--- a/drivers/net/wireless/ath/ath10k/wmi.c
+++ b/drivers/net/wireless/ath/ath10k/wmi.c
@@ -3878,7 +3878,7 @@ void ath10k_wmi_event_host_swba(struct ath10k *ar, struct sk_buff *skb)
* actual channel switch is done
*/
if (arvif->vif->csa_active &&
- ieee80211_csa_is_complete(arvif->vif)) {
+ ieee80211_beacon_cntdwn_is_complete(arvif->vif)) {
ieee80211_csa_finish(arvif->vif);
continue;
}
diff --git a/drivers/net/wireless/ath/ath11k/thermal.c b/drivers/net/wireless/ath/ath11k/thermal.c
index 5a7e150..c96b26f 100644
--- a/drivers/net/wireless/ath/ath11k/thermal.c
+++ b/drivers/net/wireless/ath/ath11k/thermal.c
@@ -53,7 +53,7 @@ ath11k_thermal_set_cur_throttle_state(struct thermal_cooling_device *cdev,
return ret;
}
-static struct thermal_cooling_device_ops ath11k_thermal_ops = {
+static const struct thermal_cooling_device_ops ath11k_thermal_ops = {
.get_max_state = ath11k_thermal_get_max_throttle_state,
.get_cur_state = ath11k_thermal_get_cur_throttle_state,
.set_cur_state = ath11k_thermal_set_cur_throttle_state,
diff --git a/drivers/net/wireless/ath/ath11k/wmi.c b/drivers/net/wireless/ath/ath11k/wmi.c
index 8e3437a..9fffa37 100644
--- a/drivers/net/wireless/ath/ath11k/wmi.c
+++ b/drivers/net/wireless/ath/ath11k/wmi.c
@@ -1593,8 +1593,8 @@ int ath11k_wmi_bcn_tmpl(struct ath11k *ar, u32 vdev_id,
FIELD_PREP(WMI_TLV_LEN, sizeof(*cmd) - TLV_HDR_SIZE);
cmd->vdev_id = vdev_id;
cmd->tim_ie_offset = offs->tim_offset;
- cmd->csa_switch_count_offset = offs->csa_counter_offs[0];
- cmd->ext_csa_switch_count_offset = offs->csa_counter_offs[1];
+ cmd->csa_switch_count_offset = offs->cntdwn_counter_offs[0];
+ cmd->ext_csa_switch_count_offset = offs->cntdwn_counter_offs[1];
cmd->buf_len = bcn->len;
ptr = skb->data + sizeof(*cmd);
diff --git a/drivers/net/wireless/ath/ath9k/beacon.c b/drivers/net/wireless/ath/ath9k/beacon.c
index e36f947..4daaf9b 100644
--- a/drivers/net/wireless/ath/ath9k/beacon.c
+++ b/drivers/net/wireless/ath/ath9k/beacon.c
@@ -365,7 +365,7 @@ bool ath9k_csa_is_finished(struct ath_softc *sc, struct ieee80211_vif *vif)
if (!vif || !vif->csa_active)
return false;
- if (!ieee80211_csa_is_complete(vif))
+ if (!ieee80211_beacon_cntdwn_is_complete(vif))
return false;
ieee80211_csa_finish(vif);
diff --git a/drivers/net/wireless/ath/ath9k/htc_drv_beacon.c b/drivers/net/wireless/ath/ath9k/htc_drv_beacon.c
index f20c839..c745897 100644
--- a/drivers/net/wireless/ath/ath9k/htc_drv_beacon.c
+++ b/drivers/net/wireless/ath/ath9k/htc_drv_beacon.c
@@ -514,7 +514,7 @@ bool ath9k_htc_csa_is_finished(struct ath9k_htc_priv *priv)
if (!vif || !vif->csa_active)
return false;
- if (!ieee80211_csa_is_complete(vif))
+ if (!ieee80211_beacon_cntdwn_is_complete(vif))
return false;
ieee80211_csa_finish(vif);
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c
index b78992e..81bc05d 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c
@@ -1300,8 +1300,8 @@ static void iwl_mvm_csa_count_down(struct iwl_mvm *mvm,
mvmvif->csa_countdown = true;
- if (!ieee80211_csa_is_complete(csa_vif)) {
- int c = ieee80211_csa_update_counter(csa_vif);
+ if (!ieee80211_beacon_cntdwn_is_complete(csa_vif)) {
+ int c = ieee80211_beacon_update_cntdwn(csa_vif);
iwl_mvm_mac_ctxt_beacon_changed(mvm, csa_vif);
if (csa_vif->p2p &&
@@ -1543,7 +1543,7 @@ void iwl_mvm_probe_resp_data_notif(struct iwl_mvm *mvm,
if (notif->csa_counter != IWL_PROBE_RESP_DATA_NO_CSA &&
notif->csa_counter >= 1)
- ieee80211_csa_set_counter(vif, notif->csa_counter);
+ ieee80211_beacon_set_cntdwn(vif, notif->csa_counter);
}
void iwl_mvm_channel_switch_noa_notif(struct iwl_mvm *mvm,
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c b/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c
index 1babc4b..8abb570 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c
@@ -172,7 +172,7 @@ static void iwl_mvm_csa_noa_start(struct iwl_mvm *mvm)
* So we just do nothing here and the switch
* will be performed on the last TBTT.
*/
- if (!ieee80211_csa_is_complete(csa_vif)) {
+ if (!ieee80211_beacon_cntdwn_is_complete(csa_vif)) {
IWL_WARN(mvm, "CSA NOA started too early\n");
goto out_unlock;
}
diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c
index 9dd9d73..dce3bc9 100644
--- a/drivers/net/wireless/mac80211_hwsim.c
+++ b/drivers/net/wireless/mac80211_hwsim.c
@@ -1699,7 +1699,7 @@ static void mac80211_hwsim_beacon_tx(void *arg, u8 *mac,
rcu_dereference(vif->chanctx_conf)->def.chan);
}
- if (vif->csa_active && ieee80211_csa_is_complete(vif))
+ if (vif->csa_active && ieee80211_beacon_cntdwn_is_complete(vif))
ieee80211_csa_finish(vif);
}
diff --git a/drivers/net/wireless/mediatek/mt76/mac80211.c b/drivers/net/wireless/mediatek/mt76/mac80211.c
index 3d4bf72..fbfb991 100644
--- a/drivers/net/wireless/mediatek/mt76/mac80211.c
+++ b/drivers/net/wireless/mediatek/mt76/mac80211.c
@@ -1095,7 +1095,7 @@ EXPORT_SYMBOL_GPL(mt76_get_txpower);
static void
__mt76_csa_finish(void *priv, u8 *mac, struct ieee80211_vif *vif)
{
- if (vif->csa_active && ieee80211_csa_is_complete(vif))
+ if (vif->csa_active && ieee80211_beacon_cntdwn_is_complete(vif))
ieee80211_csa_finish(vif);
}
@@ -1120,7 +1120,7 @@ __mt76_csa_check(void *priv, u8 *mac, struct ieee80211_vif *vif)
if (!vif->csa_active)
return;
- dev->csa_complete |= ieee80211_csa_is_complete(vif);
+ dev->csa_complete |= ieee80211_beacon_cntdwn_is_complete(vif);
}
void mt76_csa_check(struct mt76_dev *dev)
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
index d0cbb28..084982e 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
@@ -650,12 +650,12 @@ mt7615_mcu_add_beacon_offload(struct mt7615_dev *dev,
memcpy(req.pkt + MT_TXD_SIZE, skb->data, skb->len);
req.pkt_len = cpu_to_le16(MT_TXD_SIZE + skb->len);
req.tim_ie_pos = cpu_to_le16(MT_TXD_SIZE + offs.tim_offset);
- if (offs.csa_counter_offs[0]) {
+ if (offs.cntdwn_counter_offs[0]) {
u16 csa_offs;
- csa_offs = MT_TXD_SIZE + offs.csa_counter_offs[0] - 4;
+ csa_offs = MT_TXD_SIZE + offs.cntdwn_counter_offs[0] - 4;
req.csa_ie_pos = cpu_to_le16(csa_offs);
- req.csa_cnt = skb->data[offs.csa_counter_offs[0]];
+ req.csa_cnt = skb->data[offs.cntdwn_counter_offs[0]];
}
dev_kfree_skb(skb);
@@ -1713,10 +1713,10 @@ mt7615_mcu_uni_add_beacon_offload(struct mt7615_dev *dev,
req.beacon_tlv.pkt_len = cpu_to_le16(MT_TXD_SIZE + skb->len);
req.beacon_tlv.tim_ie_pos = cpu_to_le16(MT_TXD_SIZE + offs.tim_offset);
- if (offs.csa_counter_offs[0]) {
+ if (offs.cntdwn_counter_offs[0]) {
u16 csa_offs;
- csa_offs = MT_TXD_SIZE + offs.csa_counter_offs[0] - 4;
+ csa_offs = MT_TXD_SIZE + offs.cntdwn_counter_offs[0] - 4;
req.beacon_tlv.csa_ie_pos = cpu_to_le16(csa_offs);
}
dev_kfree_skb(skb);
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
index eaed5ef..ac8ec257 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
@@ -2282,7 +2282,7 @@ mt7915_mcu_beacon_csa(struct sk_buff *rskb, struct sk_buff *skb,
struct bss_info_bcn *bcn,
struct ieee80211_mutable_offsets *offs)
{
- if (offs->csa_counter_offs[0]) {
+ if (offs->cntdwn_counter_offs[0]) {
struct tlv *tlv;
struct bss_info_bcn_csa *csa;
@@ -2290,7 +2290,7 @@ mt7915_mcu_beacon_csa(struct sk_buff *rskb, struct sk_buff *skb,
sizeof(*csa), &bcn->sub_ntlv,
&bcn->len);
csa = (struct bss_info_bcn_csa *)tlv;
- csa->cnt = skb->data[offs->csa_counter_offs[0]];
+ csa->cnt = skb->data[offs->cntdwn_counter_offs[0]];
}
}
@@ -2312,8 +2312,8 @@ mt7915_mcu_beacon_cont(struct mt7915_dev *dev, struct sk_buff *rskb,
cont->pkt_len = cpu_to_le16(MT_TXD_SIZE + skb->len);
cont->tim_ofs = cpu_to_le16(offs->tim_offset);
- if (offs->csa_counter_offs[0])
- cont->csa_ofs = cpu_to_le16(offs->csa_counter_offs[0] - 4);
+ if (offs->cntdwn_counter_offs[0])
+ cont->csa_ofs = cpu_to_le16(offs->cntdwn_counter_offs[0] - 4);
buf = (u8 *)tlv + sizeof(*cont);
mt7915_mac_write_txwi(dev, (__le32 *)buf, skb, wcid, NULL,
diff --git a/drivers/nfc/st-nci/se.c b/drivers/nfc/st-nci/se.c
index f25f1ec..807eae0 100644
--- a/drivers/nfc/st-nci/se.c
+++ b/drivers/nfc/st-nci/se.c
@@ -331,8 +331,7 @@ static int st_nci_hci_connectivity_event_received(struct nci_dev *ndev,
skb->data[0] != NFC_EVT_TRANSACTION_AID_TAG)
return -EPROTO;
- transaction = (struct nfc_evt_transaction *)devm_kzalloc(dev,
- skb->len - 2, GFP_KERNEL);
+ transaction = devm_kzalloc(dev, skb->len - 2, GFP_KERNEL);
if (!transaction)
return -ENOMEM;
diff --git a/drivers/nfc/st21nfca/se.c b/drivers/nfc/st21nfca/se.c
index 6586378..c8bdf07 100644
--- a/drivers/nfc/st21nfca/se.c
+++ b/drivers/nfc/st21nfca/se.c
@@ -315,8 +315,7 @@ int st21nfca_connectivity_event_received(struct nfc_hci_dev *hdev, u8 host,
skb->data[0] != NFC_EVT_TRANSACTION_AID_TAG)
return -EPROTO;
- transaction = (struct nfc_evt_transaction *)devm_kzalloc(dev,
- skb->len - 2, GFP_KERNEL);
+ transaction = devm_kzalloc(dev, skb->len - 2, GFP_KERNEL);
if (!transaction)
return -ENOMEM;
diff --git a/drivers/ptp/ptp_ines.c b/drivers/ptp/ptp_ines.c
index 7711651..4700ffb 100644
--- a/drivers/ptp/ptp_ines.c
+++ b/drivers/ptp/ptp_ines.c
@@ -93,9 +93,6 @@ MODULE_LICENSE("GPL");
#define TC_E2E_PTP_V2 2
#define TC_P2P_PTP_V2 3
-#define OFF_PTP_CLOCK_ID 20
-#define OFF_PTP_PORT_NUM 28
-
#define PHY_SPEED_10 0
#define PHY_SPEED_100 1
#define PHY_SPEED_1000 2
@@ -443,57 +440,41 @@ static void ines_link_state(struct mii_timestamper *mii_ts,
static bool ines_match(struct sk_buff *skb, unsigned int ptp_class,
struct ines_timestamp *ts, struct device *dev)
{
- u8 *msgtype, *data = skb_mac_header(skb);
- unsigned int offset = 0;
- __be16 *portn, *seqid;
- __be64 *clkid;
+ struct ptp_header *hdr;
+ u16 portn, seqid;
+ u8 msgtype;
+ u64 clkid;
if (unlikely(ptp_class & PTP_CLASS_V1))
return false;
- if (ptp_class & PTP_CLASS_VLAN)
- offset += VLAN_HLEN;
-
- switch (ptp_class & PTP_CLASS_PMASK) {
- case PTP_CLASS_IPV4:
- offset += ETH_HLEN + IPV4_HLEN(data + offset) + UDP_HLEN;
- break;
- case PTP_CLASS_IPV6:
- offset += ETH_HLEN + IP6_HLEN + UDP_HLEN;
- break;
- case PTP_CLASS_L2:
- offset += ETH_HLEN;
- break;
- default:
- return false;
- }
-
- if (skb->len + ETH_HLEN < offset + OFF_PTP_SEQUENCE_ID + sizeof(*seqid))
+ hdr = ptp_parse_header(skb, ptp_class);
+ if (!hdr)
return false;
- msgtype = data + offset;
- clkid = (__be64 *)(data + offset + OFF_PTP_CLOCK_ID);
- portn = (__be16 *)(data + offset + OFF_PTP_PORT_NUM);
- seqid = (__be16 *)(data + offset + OFF_PTP_SEQUENCE_ID);
+ msgtype = ptp_get_msgtype(hdr, ptp_class);
+ clkid = be64_to_cpup((__be64 *)&hdr->source_port_identity.clock_identity.id[0]);
+ portn = be16_to_cpu(hdr->source_port_identity.port_number);
+ seqid = be16_to_cpu(hdr->sequence_id);
- if (tag_to_msgtype(ts->tag & 0x7) != (*msgtype & 0xf)) {
+ if (tag_to_msgtype(ts->tag & 0x7) != msgtype) {
dev_dbg(dev, "msgtype mismatch ts %hhu != skb %hhu\n",
- tag_to_msgtype(ts->tag & 0x7), *msgtype & 0xf);
+ tag_to_msgtype(ts->tag & 0x7), msgtype);
return false;
}
- if (cpu_to_be64(ts->clkid) != *clkid) {
+ if (ts->clkid != clkid) {
dev_dbg(dev, "clkid mismatch ts %llx != skb %llx\n",
- cpu_to_be64(ts->clkid), *clkid);
+ ts->clkid, clkid);
return false;
}
- if (ts->portnum != ntohs(*portn)) {
+ if (ts->portnum != portn) {
dev_dbg(dev, "portn mismatch ts %hu != skb %hu\n",
- ts->portnum, ntohs(*portn));
+ ts->portnum, portn);
return false;
}
- if (ts->seqid != ntohs(*seqid)) {
+ if (ts->seqid != seqid) {
dev_dbg(dev, "seqid mismatch ts %hu != skb %hu\n",
- ts->seqid, ntohs(*seqid));
+ ts->seqid, seqid);
return false;
}
@@ -663,8 +644,7 @@ static void ines_txtstamp(struct mii_timestamper *mii_ts,
spin_unlock_irqrestore(&port->lock, flags);
- if (old_skb)
- kfree_skb(old_skb);
+ kfree_skb(old_skb);
schedule_delayed_work(&port->ts_work, 1);
}
@@ -694,35 +674,16 @@ static void ines_txtstamp_work(struct work_struct *work)
static bool is_sync_pdelay_resp(struct sk_buff *skb, int type)
{
- u8 *data = skb->data, *msgtype;
- unsigned int offset = 0;
+ struct ptp_header *hdr;
+ u8 msgtype;
- if (type & PTP_CLASS_VLAN)
- offset += VLAN_HLEN;
+ hdr = ptp_parse_header(skb, type);
+ if (!hdr)
+ return false;
- switch (type & PTP_CLASS_PMASK) {
- case PTP_CLASS_IPV4:
- offset += ETH_HLEN + IPV4_HLEN(data + offset) + UDP_HLEN;
- break;
- case PTP_CLASS_IPV6:
- offset += ETH_HLEN + IP6_HLEN + UDP_HLEN;
- break;
- case PTP_CLASS_L2:
- offset += ETH_HLEN;
- break;
- default:
- return 0;
- }
+ msgtype = ptp_get_msgtype(hdr, type);
- if (type & PTP_CLASS_V1)
- offset += OFF_PTP_CONTROL;
-
- if (skb->len < offset + 1)
- return 0;
-
- msgtype = data + offset;
-
- switch ((*msgtype & 0xf)) {
+ switch ((msgtype & 0xf)) {
case SYNC:
case PDELAY_RESP:
return true;
diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h
index ecfd6d1..da46af6 100644
--- a/drivers/s390/net/qeth_core.h
+++ b/drivers/s390/net/qeth_core.h
@@ -420,12 +420,6 @@ struct qeth_qdio_out_buffer {
struct qeth_card;
-enum qeth_out_q_states {
- QETH_OUT_Q_UNLOCKED,
- QETH_OUT_Q_LOCKED,
- QETH_OUT_Q_LOCKED_FLUSH,
-};
-
#define QETH_CARD_STAT_ADD(_c, _stat, _val) ((_c)->stats._stat += (_val))
#define QETH_CARD_STAT_INC(_c, _stat) QETH_CARD_STAT_ADD(_c, _stat, 1)
@@ -486,12 +480,12 @@ struct qeth_qdio_out_q {
struct qeth_qdio_out_buffer *bufs[QDIO_MAX_BUFFERS_PER_Q];
struct qdio_outbuf_state *bufstates; /* convenience pointer */
struct qeth_out_q_stats stats;
+ spinlock_t lock;
u8 next_buf_to_fill;
u8 max_elements;
u8 queue_no;
u8 do_pack;
struct qeth_card *card;
- atomic_t state;
/*
* number of buffers that are currently filled (PRIMED)
* -> these buffers are hardware-owned
@@ -680,6 +674,11 @@ struct qeth_card_blkt {
int inter_packet_jumbo;
};
+enum qeth_pnso_mode {
+ QETH_PNSO_NONE,
+ QETH_PNSO_BRIDGEPORT,
+};
+
#define QETH_BROADCAST_WITH_ECHO 0x01
#define QETH_BROADCAST_WITHOUT_ECHO 0x02
struct qeth_card_info {
@@ -696,6 +695,7 @@ struct qeth_card_info {
/* no bitfield, we take a pointer on these two: */
u8 has_lp2lp_cso_v6;
u8 has_lp2lp_cso_v4;
+ enum qeth_pnso_mode pnso_mode;
enum qeth_card_types type;
enum qeth_link_types link_type;
int broadcast_capable;
diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c
index 6a73982..e19640b 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -2702,6 +2702,7 @@ static int qeth_alloc_qdio_queues(struct qeth_card *card)
card->qdio.out_qs[i] = queue;
queue->card = card;
queue->queue_no = i;
+ spin_lock_init(&queue->lock);
timer_setup(&queue->timer, qeth_tx_completion_timer, 0);
queue->coalesce_usecs = QETH_TX_COALESCE_USECS;
queue->max_coalesced_frames = QETH_TX_MAX_COALESCED_FRAMES;
@@ -3068,7 +3069,6 @@ static int qeth_init_qdio_queues(struct qeth_card *card)
queue->bulk_max = qeth_tx_select_bulk_max(card, queue);
atomic_set(&queue->used_buffers, 0);
atomic_set(&queue->set_pci_flags_count, 0);
- atomic_set(&queue->state, QETH_OUT_Q_UNLOCKED);
netdev_tx_reset_queue(netdev_get_tx_queue(card->dev, i));
}
return 0;
@@ -3549,8 +3549,9 @@ static unsigned int qeth_rx_refill_queue(struct qeth_card *card,
static void qeth_buffer_reclaim_work(struct work_struct *work)
{
- struct qeth_card *card = container_of(work, struct qeth_card,
- buffer_reclaim_work.work);
+ struct qeth_card *card = container_of(to_delayed_work(work),
+ struct qeth_card,
+ buffer_reclaim_work);
local_bh_disable();
napi_schedule(&card->napi);
@@ -3740,37 +3741,31 @@ static void qeth_flush_queue(struct qeth_qdio_out_q *queue)
static void qeth_check_outbound_queue(struct qeth_qdio_out_q *queue)
{
- int index;
- int flush_cnt = 0;
- int q_was_packing = 0;
-
/*
* check if weed have to switch to non-packing mode or if
* we have to get a pci flag out on the queue
*/
if ((atomic_read(&queue->used_buffers) <= QETH_LOW_WATERMARK_PACK) ||
!atomic_read(&queue->set_pci_flags_count)) {
- if (atomic_xchg(&queue->state, QETH_OUT_Q_LOCKED_FLUSH) ==
- QETH_OUT_Q_UNLOCKED) {
- /*
- * If we get in here, there was no action in
- * do_send_packet. So, we check if there is a
- * packing buffer to be flushed here.
- */
- index = queue->next_buf_to_fill;
- q_was_packing = queue->do_pack;
- /* queue->do_pack may change */
- barrier();
- flush_cnt += qeth_switch_to_nonpacking_if_needed(queue);
- if (!flush_cnt &&
- !atomic_read(&queue->set_pci_flags_count))
- flush_cnt += qeth_prep_flush_pack_buffer(queue);
+ unsigned int index, flush_cnt;
+ bool q_was_packing;
+
+ spin_lock(&queue->lock);
+
+ index = queue->next_buf_to_fill;
+ q_was_packing = queue->do_pack;
+
+ flush_cnt = qeth_switch_to_nonpacking_if_needed(queue);
+ if (!flush_cnt && !atomic_read(&queue->set_pci_flags_count))
+ flush_cnt = qeth_prep_flush_pack_buffer(queue);
+
+ if (flush_cnt) {
+ qeth_flush_buffers(queue, index, flush_cnt);
if (q_was_packing)
QETH_TXQ_STAT_ADD(queue, bufs_pack, flush_cnt);
- if (flush_cnt)
- qeth_flush_buffers(queue, index, flush_cnt);
- atomic_set(&queue->state, QETH_OUT_Q_UNLOCKED);
}
+
+ spin_unlock(&queue->lock);
}
}
@@ -4282,29 +4277,22 @@ int qeth_do_send_packet(struct qeth_card *card, struct qeth_qdio_out_q *queue,
unsigned int offset, unsigned int hd_len,
int elements_needed)
{
+ unsigned int start_index = queue->next_buf_to_fill;
struct qeth_qdio_out_buffer *buffer;
unsigned int next_element;
struct netdev_queue *txq;
bool stopped = false;
- int start_index;
int flush_count = 0;
int do_pack = 0;
- int tmp;
int rc = 0;
- /* spin until we get the queue ... */
- while (atomic_cmpxchg(&queue->state, QETH_OUT_Q_UNLOCKED,
- QETH_OUT_Q_LOCKED) != QETH_OUT_Q_UNLOCKED);
- start_index = queue->next_buf_to_fill;
buffer = queue->bufs[queue->next_buf_to_fill];
/* Just a sanity check, the wake/stop logic should ensure that we always
* get a free buffer.
*/
- if (atomic_read(&buffer->state) != QETH_QDIO_BUF_EMPTY) {
- atomic_set(&queue->state, QETH_OUT_Q_UNLOCKED);
+ if (atomic_read(&buffer->state) != QETH_QDIO_BUF_EMPTY)
return -EBUSY;
- }
txq = netdev_get_tx_queue(card->dev, skb_get_queue_mapping(skb));
@@ -4327,8 +4315,6 @@ int qeth_do_send_packet(struct qeth_card *card, struct qeth_qdio_out_q *queue,
QETH_QDIO_BUF_EMPTY) {
qeth_flush_buffers(queue, start_index,
flush_count);
- atomic_set(&queue->state,
- QETH_OUT_Q_UNLOCKED);
rc = -EBUSY;
goto out;
}
@@ -4360,31 +4346,8 @@ int qeth_do_send_packet(struct qeth_card *card, struct qeth_qdio_out_q *queue,
if (flush_count)
qeth_flush_buffers(queue, start_index, flush_count);
- else if (!atomic_read(&queue->set_pci_flags_count))
- atomic_xchg(&queue->state, QETH_OUT_Q_LOCKED_FLUSH);
- /*
- * queue->state will go from LOCKED -> UNLOCKED or from
- * LOCKED_FLUSH -> LOCKED if output_handler wanted to 'notify' us
- * (switch packing state or flush buffer to get another pci flag out).
- * In that case we will enter this loop
- */
- while (atomic_dec_return(&queue->state)) {
- start_index = queue->next_buf_to_fill;
- /* check if we can go back to non-packing state */
- tmp = qeth_switch_to_nonpacking_if_needed(queue);
- /*
- * check if we need to flush a packing buffer to get a pci
- * flag out on the queue
- */
- if (!tmp && !atomic_read(&queue->set_pci_flags_count))
- tmp = qeth_prep_flush_pack_buffer(queue);
- if (tmp) {
- qeth_flush_buffers(queue, start_index, tmp);
- flush_count += tmp;
- }
- }
+
out:
- /* at this point the queue is UNLOCKED again */
if (do_pack)
QETH_TXQ_STAT_ADD(queue, bufs_pack, flush_count);
@@ -4458,8 +4421,10 @@ int qeth_xmit(struct qeth_card *card, struct sk_buff *skb,
} else {
/* TODO: drop skb_orphan() once TX completion is fast enough */
skb_orphan(skb);
+ spin_lock(&queue->lock);
rc = qeth_do_send_packet(card, queue, skb, hdr, data_offset,
hd_len, elements);
+ spin_unlock(&queue->lock);
}
if (rc && !push_len)
diff --git a/drivers/s390/net/qeth_core_mpc.h b/drivers/s390/net/qeth_core_mpc.h
index b459def..6541bab 100644
--- a/drivers/s390/net/qeth_core_mpc.h
+++ b/drivers/s390/net/qeth_core_mpc.h
@@ -719,15 +719,8 @@ struct qeth_sbp_port_entry {
struct net_if_token token;
} __packed;
-struct qeth_sbp_query_ports {
- __u8 primary_bp_supported;
- __u8 secondary_bp_supported;
- __u8 num_entries;
- __u8 entry_length;
- struct qeth_sbp_port_entry entry[];
-} __packed;
-
-struct qeth_sbp_state_change {
+/* For IPA_SBP_QUERY_BRIDGE_PORTS, IPA_SBP_BRIDGE_PORT_STATE_CHANGE */
+struct qeth_sbp_port_data {
__u8 primary_bp_supported;
__u8 secondary_bp_supported;
__u8 num_entries;
@@ -741,8 +734,7 @@ struct qeth_ipacmd_setbridgeport {
union {
struct qeth_sbp_query_cmds_supp query_cmds_supp;
struct qeth_sbp_set_primary set_primary;
- struct qeth_sbp_query_ports query_ports;
- struct qeth_sbp_state_change state_change;
+ struct qeth_sbp_port_data port_data;
} data;
} __packed;
diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c
index 3a94f6c..4915780 100644
--- a/drivers/s390/net/qeth_l2_main.c
+++ b/drivers/s390/net/qeth_l2_main.c
@@ -273,6 +273,17 @@ static int qeth_l2_vlan_rx_kill_vid(struct net_device *dev,
return qeth_l2_send_setdelvlan(card, vid, IPA_CMD_DELVLAN);
}
+static void qeth_l2_set_pnso_mode(struct qeth_card *card,
+ enum qeth_pnso_mode mode)
+{
+ spin_lock_irq(get_ccwdev_lock(CARD_RDEV(card)));
+ WRITE_ONCE(card->info.pnso_mode, mode);
+ spin_unlock_irq(get_ccwdev_lock(CARD_RDEV(card)));
+
+ if (mode == QETH_PNSO_NONE)
+ drain_workqueue(card->event_wq);
+}
+
static void qeth_l2_stop_card(struct qeth_card *card)
{
QETH_CARD_TEXT(card, 2, "stopcard");
@@ -290,7 +301,7 @@ static void qeth_l2_stop_card(struct qeth_card *card)
qeth_qdio_clear_card(card, 0);
qeth_clear_working_pool_list(card);
- flush_workqueue(card->event_wq);
+ qeth_l2_set_pnso_mode(card, QETH_PNSO_NONE);
qeth_flush_local_addrs(card);
card->info.promisc_mode = 0;
}
@@ -810,8 +821,6 @@ static void qeth_l2_setup_bridgeport_attrs(struct qeth_card *card)
if (card->options.sbp.hostnotification) {
if (qeth_bridgeport_an_set(card, 1))
card->options.sbp.hostnotification = 0;
- } else {
- qeth_bridgeport_an_set(card, 0);
}
}
@@ -1090,15 +1099,14 @@ static void qeth_bridge_emit_host_event(struct qeth_card *card,
struct qeth_bridge_state_data {
struct work_struct worker;
struct qeth_card *card;
- struct qeth_sbp_state_change qports;
+ u8 role;
+ u8 state;
};
static void qeth_bridge_state_change_worker(struct work_struct *work)
{
struct qeth_bridge_state_data *data =
container_of(work, struct qeth_bridge_state_data, worker);
- /* We are only interested in the first entry - local port */
- struct qeth_sbp_port_entry *entry = &data->qports.entry[0];
char env_locrem[32];
char env_role[32];
char env_state[32];
@@ -1109,22 +1117,16 @@ static void qeth_bridge_state_change_worker(struct work_struct *work)
NULL
};
- /* Role should not change by itself, but if it did, */
- /* information from the hardware is authoritative. */
- mutex_lock(&data->card->sbp_lock);
- data->card->options.sbp.role = entry->role;
- mutex_unlock(&data->card->sbp_lock);
-
snprintf(env_locrem, sizeof(env_locrem), "BRIDGEPORT=statechange");
snprintf(env_role, sizeof(env_role), "ROLE=%s",
- (entry->role == QETH_SBP_ROLE_NONE) ? "none" :
- (entry->role == QETH_SBP_ROLE_PRIMARY) ? "primary" :
- (entry->role == QETH_SBP_ROLE_SECONDARY) ? "secondary" :
+ (data->role == QETH_SBP_ROLE_NONE) ? "none" :
+ (data->role == QETH_SBP_ROLE_PRIMARY) ? "primary" :
+ (data->role == QETH_SBP_ROLE_SECONDARY) ? "secondary" :
"<INVALID>");
snprintf(env_state, sizeof(env_state), "STATE=%s",
- (entry->state == QETH_SBP_STATE_INACTIVE) ? "inactive" :
- (entry->state == QETH_SBP_STATE_STANDBY) ? "standby" :
- (entry->state == QETH_SBP_STATE_ACTIVE) ? "active" :
+ (data->state == QETH_SBP_STATE_INACTIVE) ? "inactive" :
+ (data->state == QETH_SBP_STATE_STANDBY) ? "standby" :
+ (data->state == QETH_SBP_STATE_ACTIVE) ? "active" :
"<INVALID>");
kobject_uevent_env(&data->card->gdev->dev.kobj,
KOBJ_CHANGE, env);
@@ -1134,10 +1136,8 @@ static void qeth_bridge_state_change_worker(struct work_struct *work)
static void qeth_bridge_state_change(struct qeth_card *card,
struct qeth_ipa_cmd *cmd)
{
- struct qeth_sbp_state_change *qports =
- &cmd->data.sbp.data.state_change;
+ struct qeth_sbp_port_data *qports = &cmd->data.sbp.data.port_data;
struct qeth_bridge_state_data *data;
- int extrasize;
QETH_CARD_TEXT(card, 2, "brstchng");
if (qports->num_entries == 0) {
@@ -1148,34 +1148,50 @@ static void qeth_bridge_state_change(struct qeth_card *card,
QETH_CARD_TEXT_(card, 2, "BPsz%04x", qports->entry_length);
return;
}
- extrasize = sizeof(struct qeth_sbp_port_entry) * qports->num_entries;
- data = kzalloc(sizeof(struct qeth_bridge_state_data) + extrasize,
- GFP_ATOMIC);
+
+ data = kzalloc(sizeof(*data), GFP_ATOMIC);
if (!data) {
QETH_CARD_TEXT(card, 2, "BPSalloc");
return;
}
INIT_WORK(&data->worker, qeth_bridge_state_change_worker);
data->card = card;
- memcpy(&data->qports, qports,
- sizeof(struct qeth_sbp_state_change) + extrasize);
+ /* Information for the local port: */
+ data->role = qports->entry[0].role;
+ data->state = qports->entry[0].state;
+
queue_work(card->event_wq, &data->worker);
}
struct qeth_addr_change_data {
- struct work_struct worker;
+ struct delayed_work dwork;
struct qeth_card *card;
struct qeth_ipacmd_addr_change ac_event;
};
static void qeth_addr_change_event_worker(struct work_struct *work)
{
- struct qeth_addr_change_data *data =
- container_of(work, struct qeth_addr_change_data, worker);
+ struct delayed_work *dwork = to_delayed_work(work);
+ struct qeth_addr_change_data *data;
+ struct qeth_card *card;
int i;
+ data = container_of(dwork, struct qeth_addr_change_data, dwork);
+ card = data->card;
+
QETH_CARD_TEXT(data->card, 4, "adrchgew");
+
+ if (READ_ONCE(card->info.pnso_mode) == QETH_PNSO_NONE)
+ goto free;
+
if (data->ac_event.lost_event_mask) {
+ /* Potential re-config in progress, try again later: */
+ if (!mutex_trylock(&card->sbp_lock)) {
+ queue_delayed_work(card->event_wq, dwork,
+ msecs_to_jiffies(100));
+ return;
+ }
+
dev_info(&data->card->gdev->dev,
"Address change notification stopped on %s (%s)\n",
data->card->dev->name,
@@ -1184,8 +1200,9 @@ static void qeth_addr_change_event_worker(struct work_struct *work)
: (data->ac_event.lost_event_mask == 0x02)
? "Bridge port state change"
: "Unknown reason");
- mutex_lock(&data->card->sbp_lock);
+
data->card->options.sbp.hostnotification = 0;
+ card->info.pnso_mode = QETH_PNSO_NONE;
mutex_unlock(&data->card->sbp_lock);
qeth_bridge_emit_host_event(data->card, anev_abort,
0, NULL, NULL);
@@ -1199,6 +1216,8 @@ static void qeth_addr_change_event_worker(struct work_struct *work)
&entry->token,
&entry->addr_lnid);
}
+
+free:
kfree(data);
}
@@ -1210,6 +1229,9 @@ static void qeth_addr_change_event(struct qeth_card *card,
struct qeth_addr_change_data *data;
int extrasize;
+ if (card->info.pnso_mode == QETH_PNSO_NONE)
+ return;
+
QETH_CARD_TEXT(card, 4, "adrchgev");
if (cmd->hdr.return_code != 0x0000) {
if (cmd->hdr.return_code == 0x0010) {
@@ -1229,11 +1251,11 @@ static void qeth_addr_change_event(struct qeth_card *card,
QETH_CARD_TEXT(card, 2, "ACNalloc");
return;
}
- INIT_WORK(&data->worker, qeth_addr_change_event_worker);
+ INIT_DELAYED_WORK(&data->dwork, qeth_addr_change_event_worker);
data->card = card;
memcpy(&data->ac_event, hostevs,
sizeof(struct qeth_ipacmd_addr_change) + extrasize);
- queue_work(card->event_wq, &data->worker);
+ queue_delayed_work(card->event_wq, &data->dwork, 0);
}
/* SETBRIDGEPORT support; sending commands */
@@ -1418,8 +1440,8 @@ static int qeth_bridgeport_query_ports_cb(struct qeth_card *card,
struct qeth_reply *reply, unsigned long data)
{
struct qeth_ipa_cmd *cmd = (struct qeth_ipa_cmd *) data;
- struct qeth_sbp_query_ports *qports = &cmd->data.sbp.data.query_ports;
struct _qeth_sbp_cbctl *cbctl = (struct _qeth_sbp_cbctl *)reply->param;
+ struct qeth_sbp_port_data *qports;
int rc;
QETH_CARD_TEXT(card, 2, "brqprtcb");
@@ -1427,6 +1449,7 @@ static int qeth_bridgeport_query_ports_cb(struct qeth_card *card,
if (rc)
return rc;
+ qports = &cmd->data.sbp.data.port_data;
if (qports->entry_length != sizeof(struct qeth_sbp_port_entry)) {
QETH_CARD_TEXT_(card, 2, "SBPs%04x", qports->entry_length);
return -EINVAL;
@@ -1554,9 +1577,14 @@ int qeth_bridgeport_an_set(struct qeth_card *card, int enable)
if (enable) {
qeth_bridge_emit_host_event(card, anev_reset, 0, NULL, NULL);
+ qeth_l2_set_pnso_mode(card, QETH_PNSO_BRIDGEPORT);
rc = qeth_l2_pnso(card, 1, qeth_bridgeport_an_set_cb, card);
- } else
+ if (rc)
+ qeth_l2_set_pnso_mode(card, QETH_PNSO_NONE);
+ } else {
rc = qeth_l2_pnso(card, 0, NULL, NULL);
+ qeth_l2_set_pnso_mode(card, QETH_PNSO_NONE);
+ }
return rc;
}
diff --git a/drivers/s390/net/qeth_l2_sys.c b/drivers/s390/net/qeth_l2_sys.c
index 86bcae9..4695d25 100644
--- a/drivers/s390/net/qeth_l2_sys.c
+++ b/drivers/s390/net/qeth_l2_sys.c
@@ -157,6 +157,7 @@ static ssize_t qeth_bridgeport_hostnotification_store(struct device *dev,
rc = -EBUSY;
else if (qeth_card_hw_is_reachable(card)) {
rc = qeth_bridgeport_an_set(card, enable);
+ /* sbp_lock ensures ordering vs notifications-stopped events */
if (!rc)
card->options.sbp.hostnotification = enable;
} else
diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c
index 4d46196..767c5bb 100644
--- a/drivers/s390/net/qeth_l3_main.c
+++ b/drivers/s390/net/qeth_l3_main.c
@@ -314,7 +314,8 @@ static int qeth_l3_setdelip_cb(struct qeth_card *card, struct qeth_reply *reply,
}
static int qeth_l3_send_setdelmc(struct qeth_card *card,
- struct qeth_ipaddr *addr, int ipacmd)
+ struct qeth_ipaddr *addr,
+ enum qeth_ipa_cmds ipacmd)
{
struct qeth_cmd_buffer *iob;
struct qeth_ipa_cmd *cmd;
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 0d7be2e..522b891 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -4927,6 +4927,12 @@ static bool io_arm_poll_handler(struct io_kiocb *req)
mask |= POLLIN | POLLRDNORM;
if (def->pollout)
mask |= POLLOUT | POLLWRNORM;
+
+ /* If reading from MSG_ERRQUEUE using recvmsg, ignore POLLIN */
+ if ((req->opcode == IORING_OP_RECVMSG) &&
+ (req->sr_msg.msg_flags & MSG_ERRQUEUE))
+ mask &= ~POLLIN;
+
mask |= POLLERR | POLLPRI;
ipt.pt._qproc = io_async_queue_proc;
diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index 64f3670..2f98d2f 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -279,6 +279,31 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
#define BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk, uaddr) \
BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_UDP6_RECVMSG, NULL)
+/* The SOCK_OPS"_SK" macro should be used when sock_ops->sk is not a
+ * fullsock and its parent fullsock cannot be traced by
+ * sk_to_full_sk().
+ *
+ * e.g. sock_ops->sk is a request_sock and it is under syncookie mode.
+ * Its listener-sk is not attached to the rsk_listener.
+ * In this case, the caller holds the listener-sk (unlocked),
+ * set its sock_ops->sk to req_sk, and call this SOCK_OPS"_SK" with
+ * the listener-sk such that the cgroup-bpf-progs of the
+ * listener-sk will be run.
+ *
+ * Regardless of syncookie mode or not,
+ * calling bpf_setsockopt on listener-sk will not make sense anyway,
+ * so passing 'sock_ops->sk == req_sk' to the bpf prog is appropriate here.
+ */
+#define BPF_CGROUP_RUN_PROG_SOCK_OPS_SK(sock_ops, sk) \
+({ \
+ int __ret = 0; \
+ if (cgroup_bpf_enabled) \
+ __ret = __cgroup_bpf_run_filter_sock_ops(sk, \
+ sock_ops, \
+ BPF_CGROUP_SOCK_OPS); \
+ __ret; \
+})
+
#define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) \
({ \
int __ret = 0; \
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 55f694b..c6d9f2c 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -34,6 +34,8 @@ struct btf_type;
struct exception_table_entry;
struct seq_operations;
struct bpf_iter_aux_info;
+struct bpf_local_storage;
+struct bpf_local_storage_map;
extern struct idr btf_idr;
extern spinlock_t btf_idr_lock;
@@ -104,6 +106,25 @@ struct bpf_map_ops {
__poll_t (*map_poll)(struct bpf_map *map, struct file *filp,
struct poll_table_struct *pts);
+ /* Functions called by bpf_local_storage maps */
+ int (*map_local_storage_charge)(struct bpf_local_storage_map *smap,
+ void *owner, u32 size);
+ void (*map_local_storage_uncharge)(struct bpf_local_storage_map *smap,
+ void *owner, u32 size);
+ struct bpf_local_storage __rcu ** (*map_owner_storage_ptr)(void *owner);
+
+ /* map_meta_equal must be implemented for maps that can be
+ * used as an inner map. It is a runtime check to ensure
+ * an inner map can be inserted to an outer map.
+ *
+ * Some properties of the inner map has been used during the
+ * verification time. When inserting an inner map at the runtime,
+ * map_meta_equal has to ensure the inserting map has the same
+ * properties that the verifier has used earlier.
+ */
+ bool (*map_meta_equal)(const struct bpf_map *meta0,
+ const struct bpf_map *meta1);
+
/* BTF name and id of struct allocated by map_alloc */
const char * const map_btf_name;
int *map_btf_id;
@@ -227,6 +248,9 @@ int map_check_no_btf(const struct bpf_map *map,
const struct btf_type *key_type,
const struct btf_type *value_type);
+bool bpf_map_meta_equal(const struct bpf_map *meta0,
+ const struct bpf_map *meta1);
+
extern const struct bpf_map_ops bpf_map_offload_ops;
/* function argument constraints */
@@ -309,6 +333,7 @@ struct bpf_func_proto {
* for this argument.
*/
int *ret_btf_id; /* return value btf_id */
+ bool (*allowed)(const struct bpf_prog *prog);
};
/* bpf_context is intentionally undefined structure. Pointer to bpf_context is
@@ -514,6 +539,8 @@ int arch_prepare_bpf_trampoline(void *image, void *image_end,
/* these two functions are called from generated trampoline */
u64 notrace __bpf_prog_enter(void);
void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start);
+void notrace __bpf_prog_enter_sleepable(void);
+void notrace __bpf_prog_exit_sleepable(void);
struct bpf_ksym {
unsigned long start;
@@ -709,6 +736,7 @@ struct bpf_prog_aux {
bool offload_requested;
bool attach_btf_trace; /* true if attaching to BTF-enabled raw tp */
bool func_proto_unreliable;
+ bool sleepable;
enum bpf_tramp_prog_type trampoline_prog_type;
struct bpf_trampoline *trampoline;
struct hlist_node tramp_hlist;
@@ -1218,12 +1246,18 @@ typedef int (*bpf_iter_attach_target_t)(struct bpf_prog *prog,
union bpf_iter_link_info *linfo,
struct bpf_iter_aux_info *aux);
typedef void (*bpf_iter_detach_target_t)(struct bpf_iter_aux_info *aux);
+typedef void (*bpf_iter_show_fdinfo_t) (const struct bpf_iter_aux_info *aux,
+ struct seq_file *seq);
+typedef int (*bpf_iter_fill_link_info_t)(const struct bpf_iter_aux_info *aux,
+ struct bpf_link_info *info);
#define BPF_ITER_CTX_ARG_MAX 2
struct bpf_iter_reg {
const char *target;
bpf_iter_attach_target_t attach_target;
bpf_iter_detach_target_t detach_target;
+ bpf_iter_show_fdinfo_t show_fdinfo;
+ bpf_iter_fill_link_info_t fill_link_info;
u32 ctx_arg_info_size;
struct bpf_ctx_arg_aux ctx_arg_info[BPF_ITER_CTX_ARG_MAX];
const struct bpf_iter_seq_info *seq_info;
@@ -1250,6 +1284,10 @@ int bpf_iter_new_fd(struct bpf_link *link);
bool bpf_link_is_iter(struct bpf_link *link);
struct bpf_prog *bpf_iter_get_info(struct bpf_iter_meta *meta, bool in_stop);
int bpf_iter_run_prog(struct bpf_prog *prog, void *ctx);
+void bpf_iter_map_show_fdinfo(const struct bpf_iter_aux_info *aux,
+ struct seq_file *seq);
+int bpf_iter_map_fill_link_info(const struct bpf_iter_aux_info *aux,
+ struct bpf_link_info *info);
int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value);
int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value);
@@ -1340,6 +1378,8 @@ int btf_struct_access(struct bpf_verifier_log *log,
const struct btf_type *t, int off, int size,
enum bpf_access_type atype,
u32 *next_btf_id);
+bool btf_struct_ids_match(struct bpf_verifier_log *log,
+ int off, u32 id, u32 need_type_id);
int btf_resolve_helper_id(struct bpf_verifier_log *log,
const struct bpf_func_proto *fn, int);
@@ -1358,6 +1398,7 @@ int btf_check_type_match(struct bpf_verifier_env *env, struct bpf_prog *prog,
struct btf *btf, const struct btf_type *t);
struct bpf_prog *bpf_prog_by_id(u32 id);
+struct bpf_link *bpf_link_by_id(u32 id);
const struct bpf_func_proto *bpf_base_func_proto(enum bpf_func_id func_id);
#else /* !CONFIG_BPF_SYSCALL */
@@ -1637,6 +1678,7 @@ int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog,
struct bpf_prog *old, u32 which);
int sock_map_get_from_fd(const union bpf_attr *attr, struct bpf_prog *prog);
int sock_map_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype);
+int sock_map_update_elem_sys(struct bpf_map *map, void *key, void *value, u64 flags);
void sock_map_unhash(struct sock *sk);
void sock_map_close(struct sock *sk, long timeout);
#else
@@ -1658,6 +1700,12 @@ static inline int sock_map_prog_detach(const union bpf_attr *attr,
{
return -EOPNOTSUPP;
}
+
+static inline int sock_map_update_elem_sys(struct bpf_map *map, void *key, void *value,
+ u64 flags)
+{
+ return -EOPNOTSUPP;
+}
#endif /* CONFIG_BPF_STREAM_PARSER */
#if defined(CONFIG_INET) && defined(CONFIG_BPF_SYSCALL)
@@ -1736,6 +1784,7 @@ extern const struct bpf_func_proto bpf_skc_to_tcp_sock_proto;
extern const struct bpf_func_proto bpf_skc_to_tcp_timewait_sock_proto;
extern const struct bpf_func_proto bpf_skc_to_tcp_request_sock_proto;
extern const struct bpf_func_proto bpf_skc_to_udp6_sock_proto;
+extern const struct bpf_func_proto bpf_copy_from_user_proto;
const struct bpf_func_proto *bpf_tracing_func_proto(
enum bpf_func_id func_id, const struct bpf_prog *prog);
@@ -1850,4 +1899,7 @@ enum bpf_text_poke_type {
int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
void *addr1, void *addr2);
+struct btf_id_set;
+bool btf_id_set_contains(struct btf_id_set *set, u32 id);
+
#endif /* _LINUX_BPF_H */
diff --git a/include/linux/bpf_local_storage.h b/include/linux/bpf_local_storage.h
new file mode 100644
index 0000000..b2c9463
--- /dev/null
+++ b/include/linux/bpf_local_storage.h
@@ -0,0 +1,163 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2019 Facebook
+ * Copyright 2020 Google LLC.
+ */
+
+#ifndef _BPF_LOCAL_STORAGE_H
+#define _BPF_LOCAL_STORAGE_H
+
+#include <linux/bpf.h>
+#include <linux/rculist.h>
+#include <linux/list.h>
+#include <linux/hash.h>
+#include <linux/types.h>
+#include <uapi/linux/btf.h>
+
+#define BPF_LOCAL_STORAGE_CACHE_SIZE 16
+
+struct bpf_local_storage_map_bucket {
+ struct hlist_head list;
+ raw_spinlock_t lock;
+};
+
+/* Thp map is not the primary owner of a bpf_local_storage_elem.
+ * Instead, the container object (eg. sk->sk_bpf_storage) is.
+ *
+ * The map (bpf_local_storage_map) is for two purposes
+ * 1. Define the size of the "local storage". It is
+ * the map's value_size.
+ *
+ * 2. Maintain a list to keep track of all elems such
+ * that they can be cleaned up during the map destruction.
+ *
+ * When a bpf local storage is being looked up for a
+ * particular object, the "bpf_map" pointer is actually used
+ * as the "key" to search in the list of elem in
+ * the respective bpf_local_storage owned by the object.
+ *
+ * e.g. sk->sk_bpf_storage is the mini-map with the "bpf_map" pointer
+ * as the searching key.
+ */
+struct bpf_local_storage_map {
+ struct bpf_map map;
+ /* Lookup elem does not require accessing the map.
+ *
+ * Updating/Deleting requires a bucket lock to
+ * link/unlink the elem from the map. Having
+ * multiple buckets to improve contention.
+ */
+ struct bpf_local_storage_map_bucket *buckets;
+ u32 bucket_log;
+ u16 elem_size;
+ u16 cache_idx;
+};
+
+struct bpf_local_storage_data {
+ /* smap is used as the searching key when looking up
+ * from the object's bpf_local_storage.
+ *
+ * Put it in the same cacheline as the data to minimize
+ * the number of cachelines access during the cache hit case.
+ */
+ struct bpf_local_storage_map __rcu *smap;
+ u8 data[] __aligned(8);
+};
+
+/* Linked to bpf_local_storage and bpf_local_storage_map */
+struct bpf_local_storage_elem {
+ struct hlist_node map_node; /* Linked to bpf_local_storage_map */
+ struct hlist_node snode; /* Linked to bpf_local_storage */
+ struct bpf_local_storage __rcu *local_storage;
+ struct rcu_head rcu;
+ /* 8 bytes hole */
+ /* The data is stored in aother cacheline to minimize
+ * the number of cachelines access during a cache hit.
+ */
+ struct bpf_local_storage_data sdata ____cacheline_aligned;
+};
+
+struct bpf_local_storage {
+ struct bpf_local_storage_data __rcu *cache[BPF_LOCAL_STORAGE_CACHE_SIZE];
+ struct hlist_head list; /* List of bpf_local_storage_elem */
+ void *owner; /* The object that owns the above "list" of
+ * bpf_local_storage_elem.
+ */
+ struct rcu_head rcu;
+ raw_spinlock_t lock; /* Protect adding/removing from the "list" */
+};
+
+/* U16_MAX is much more than enough for sk local storage
+ * considering a tcp_sock is ~2k.
+ */
+#define BPF_LOCAL_STORAGE_MAX_VALUE_SIZE \
+ min_t(u32, \
+ (KMALLOC_MAX_SIZE - MAX_BPF_STACK - \
+ sizeof(struct bpf_local_storage_elem)), \
+ (U16_MAX - sizeof(struct bpf_local_storage_elem)))
+
+#define SELEM(_SDATA) \
+ container_of((_SDATA), struct bpf_local_storage_elem, sdata)
+#define SDATA(_SELEM) (&(_SELEM)->sdata)
+
+#define BPF_LOCAL_STORAGE_CACHE_SIZE 16
+
+struct bpf_local_storage_cache {
+ spinlock_t idx_lock;
+ u64 idx_usage_counts[BPF_LOCAL_STORAGE_CACHE_SIZE];
+};
+
+#define DEFINE_BPF_STORAGE_CACHE(name) \
+static struct bpf_local_storage_cache name = { \
+ .idx_lock = __SPIN_LOCK_UNLOCKED(name.idx_lock), \
+}
+
+u16 bpf_local_storage_cache_idx_get(struct bpf_local_storage_cache *cache);
+void bpf_local_storage_cache_idx_free(struct bpf_local_storage_cache *cache,
+ u16 idx);
+
+/* Helper functions for bpf_local_storage */
+int bpf_local_storage_map_alloc_check(union bpf_attr *attr);
+
+struct bpf_local_storage_map *bpf_local_storage_map_alloc(union bpf_attr *attr);
+
+struct bpf_local_storage_data *
+bpf_local_storage_lookup(struct bpf_local_storage *local_storage,
+ struct bpf_local_storage_map *smap,
+ bool cacheit_lockit);
+
+void bpf_local_storage_map_free(struct bpf_local_storage_map *smap);
+
+int bpf_local_storage_map_check_btf(const struct bpf_map *map,
+ const struct btf *btf,
+ const struct btf_type *key_type,
+ const struct btf_type *value_type);
+
+void bpf_selem_link_storage_nolock(struct bpf_local_storage *local_storage,
+ struct bpf_local_storage_elem *selem);
+
+bool bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_storage,
+ struct bpf_local_storage_elem *selem,
+ bool uncharge_omem);
+
+void bpf_selem_unlink(struct bpf_local_storage_elem *selem);
+
+void bpf_selem_link_map(struct bpf_local_storage_map *smap,
+ struct bpf_local_storage_elem *selem);
+
+void bpf_selem_unlink_map(struct bpf_local_storage_elem *selem);
+
+struct bpf_local_storage_elem *
+bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner, void *value,
+ bool charge_mem);
+
+int
+bpf_local_storage_alloc(void *owner,
+ struct bpf_local_storage_map *smap,
+ struct bpf_local_storage_elem *first_selem);
+
+struct bpf_local_storage_data *
+bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
+ void *value, u64 map_flags);
+
+#endif /* _BPF_LOCAL_STORAGE_H */
diff --git a/include/linux/bpf_lsm.h b/include/linux/bpf_lsm.h
index af74712..aaacb6a 100644
--- a/include/linux/bpf_lsm.h
+++ b/include/linux/bpf_lsm.h
@@ -17,9 +17,28 @@
#include <linux/lsm_hook_defs.h>
#undef LSM_HOOK
+struct bpf_storage_blob {
+ struct bpf_local_storage __rcu *storage;
+};
+
+extern struct lsm_blob_sizes bpf_lsm_blob_sizes;
+
int bpf_lsm_verify_prog(struct bpf_verifier_log *vlog,
const struct bpf_prog *prog);
+static inline struct bpf_storage_blob *bpf_inode(
+ const struct inode *inode)
+{
+ if (unlikely(!inode->i_security))
+ return NULL;
+
+ return inode->i_security + bpf_lsm_blob_sizes.lbs_inode;
+}
+
+extern const struct bpf_func_proto bpf_inode_storage_get_proto;
+extern const struct bpf_func_proto bpf_inode_storage_delete_proto;
+void bpf_inode_storage_free(struct inode *inode);
+
#else /* !CONFIG_BPF_LSM */
static inline int bpf_lsm_verify_prog(struct bpf_verifier_log *vlog,
@@ -28,6 +47,16 @@ static inline int bpf_lsm_verify_prog(struct bpf_verifier_log *vlog,
return -EOPNOTSUPP;
}
+static inline struct bpf_storage_blob *bpf_inode(
+ const struct inode *inode)
+{
+ return NULL;
+}
+
+static inline void bpf_inode_storage_free(struct inode *inode)
+{
+}
+
#endif /* CONFIG_BPF_LSM */
#endif /* _LINUX_BPF_LSM_H */
diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index a52a568..2e6f568 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -107,6 +107,9 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_SK_STORAGE, sk_storage_map_ops)
BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKMAP, sock_map_ops)
BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKHASH, sock_hash_ops)
#endif
+#ifdef CONFIG_BPF_LSM
+BPF_MAP_TYPE(BPF_MAP_TYPE_INODE_STORAGE, inode_storage_map_ops)
+#endif
BPF_MAP_TYPE(BPF_MAP_TYPE_CPUMAP, cpu_map_ops)
#if defined(CONFIG_XDP_SOCKETS)
BPF_MAP_TYPE(BPF_MAP_TYPE_XSKMAP, xsk_map_ops)
diff --git a/include/linux/btf.h b/include/linux/btf.h
index 8b81fbb..a9af5e7 100644
--- a/include/linux/btf.h
+++ b/include/linux/btf.h
@@ -64,8 +64,7 @@ const struct btf_type *btf_type_resolve_func_ptr(const struct btf *btf,
u32 id, u32 *res_id);
const struct btf_type *
btf_resolve_size(const struct btf *btf, const struct btf_type *type,
- u32 *type_size, const struct btf_type **elem_type,
- u32 *total_nelems);
+ u32 *type_size);
#define for_each_member(i, struct_type, member) \
for (i = 0, member = btf_type_member(struct_type); \
diff --git a/include/linux/btf_ids.h b/include/linux/btf_ids.h
index 4867d54..210b086 100644
--- a/include/linux/btf_ids.h
+++ b/include/linux/btf_ids.h
@@ -3,6 +3,11 @@
#ifndef _LINUX_BTF_IDS_H
#define _LINUX_BTF_IDS_H
+struct btf_id_set {
+ u32 cnt;
+ u32 ids[];
+};
+
#ifdef CONFIG_DEBUG_INFO_BTF
#include <linux/compiler.h> /* for __PASTE */
@@ -62,7 +67,7 @@ asm( \
".pushsection " BTF_IDS_SECTION ",\"a\"; \n" \
"." #scope " " #name "; \n" \
#name ":; \n" \
-".popsection; \n"); \
+".popsection; \n");
#define BTF_ID_LIST(name) \
__BTF_ID_LIST(name, local) \
@@ -88,12 +93,56 @@ asm( \
".zero 4 \n" \
".popsection; \n");
+/*
+ * The BTF_SET_START/END macros pair defines sorted list of
+ * BTF IDs plus its members count, with following layout:
+ *
+ * BTF_SET_START(list)
+ * BTF_ID(type1, name1)
+ * BTF_ID(type2, name2)
+ * BTF_SET_END(list)
+ *
+ * __BTF_ID__set__list:
+ * .zero 4
+ * list:
+ * __BTF_ID__type1__name1__3:
+ * .zero 4
+ * __BTF_ID__type2__name2__4:
+ * .zero 4
+ *
+ */
+#define __BTF_SET_START(name, scope) \
+asm( \
+".pushsection " BTF_IDS_SECTION ",\"a\"; \n" \
+"." #scope " __BTF_ID__set__" #name "; \n" \
+"__BTF_ID__set__" #name ":; \n" \
+".zero 4 \n" \
+".popsection; \n");
+
+#define BTF_SET_START(name) \
+__BTF_ID_LIST(name, local) \
+__BTF_SET_START(name, local)
+
+#define BTF_SET_START_GLOBAL(name) \
+__BTF_ID_LIST(name, globl) \
+__BTF_SET_START(name, globl)
+
+#define BTF_SET_END(name) \
+asm( \
+".pushsection " BTF_IDS_SECTION ",\"a\"; \n" \
+".size __BTF_ID__set__" #name ", .-" #name " \n" \
+".popsection; \n"); \
+extern struct btf_id_set name;
+
#else
#define BTF_ID_LIST(name) static u32 name[5];
#define BTF_ID(prefix, name)
#define BTF_ID_UNUSED
#define BTF_ID_LIST_GLOBAL(name) u32 name[1];
+#define BTF_SET_START(name) static struct btf_id_set name = { 0 };
+#define BTF_SET_START_GLOBAL(name) static struct btf_id_set name = { 0 };
+#define BTF_SET_END(name)
#endif /* CONFIG_DEBUG_INFO_BTF */
diff --git a/include/linux/filter.h b/include/linux/filter.h
index ebfb7cf..05b4052 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -1236,13 +1236,17 @@ struct bpf_sock_addr_kern {
struct bpf_sock_ops_kern {
struct sock *sk;
- u32 op;
union {
u32 args[4];
u32 reply;
u32 replylong[4];
};
- u32 is_fullsock;
+ struct sk_buff *syn_skb;
+ struct sk_buff *skb;
+ void *skb_data_end;
+ u8 op;
+ u8 is_fullsock;
+ u8 remaining_opt_len;
u64 temp; /* temp and everything after is not
* initialized to 0 before calling
* the BPF program. New fields that
diff --git a/include/linux/if_tun.h b/include/linux/if_tun.h
index 5bda8cf..2a76608 100644
--- a/include/linux/if_tun.h
+++ b/include/linux/if_tun.h
@@ -27,9 +27,18 @@ struct tun_xdp_hdr {
#if defined(CONFIG_TUN) || defined(CONFIG_TUN_MODULE)
struct socket *tun_get_socket(struct file *);
struct ptr_ring *tun_get_tx_ring(struct file *file);
-bool tun_is_xdp_frame(void *ptr);
-void *tun_xdp_to_ptr(void *ptr);
-void *tun_ptr_to_xdp(void *ptr);
+static inline bool tun_is_xdp_frame(void *ptr)
+{
+ return (unsigned long)ptr & TUN_XDP_FLAG;
+}
+static inline void *tun_xdp_to_ptr(struct xdp_frame *xdp)
+{
+ return (void *)((unsigned long)xdp | TUN_XDP_FLAG);
+}
+static inline struct xdp_frame *tun_ptr_to_xdp(void *ptr)
+{
+ return (void *)((unsigned long)ptr & ~TUN_XDP_FLAG);
+}
void tun_ptr_free(void *ptr);
#else
#include <linux/err.h>
@@ -48,11 +57,11 @@ static inline bool tun_is_xdp_frame(void *ptr)
{
return false;
}
-static inline void *tun_xdp_to_ptr(void *ptr)
+static inline void *tun_xdp_to_ptr(struct xdp_frame *xdp)
{
return NULL;
}
-static inline void *tun_ptr_to_xdp(void *ptr)
+static inline struct xdp_frame *tun_ptr_to_xdp(void *ptr)
{
return NULL;
}
diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h
index 0ef2d80..84abb30 100644
--- a/include/linux/inet_diag.h
+++ b/include/linux/inet_diag.h
@@ -75,6 +75,8 @@ static inline size_t inet_diag_msg_attrs_size(void)
#ifdef CONFIG_SOCK_CGROUP_DATA
+ nla_total_size_64bit(sizeof(u64)) /* INET_DIAG_CGROUP_ID */
#endif
+ + nla_total_size(sizeof(struct inet_diag_sockopt))
+ /* INET_DIAG_SOCKOPT */
;
}
int inet_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb,
diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index a44789d..dda61d1 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -177,17 +177,6 @@ static inline int inet6_sdif(const struct sk_buff *skb)
return 0;
}
-/* can not be used in TCP layer after tcp_v6_fill_cb */
-static inline bool inet6_exact_dif_match(struct net *net, struct sk_buff *skb)
-{
-#if defined(CONFIG_NET_L3_MASTER_DEV)
- if (!net->ipv4.sysctl_tcp_l3mdev_accept &&
- skb && ipv6_l3mdev_skb(IP6CB(skb)->flags))
- return true;
-#endif
- return false;
-}
-
struct tcp6_request_sock {
struct tcp_request_sock tcp6rsk_tcp;
};
@@ -345,17 +334,6 @@ static inline struct raw6_sock *raw6_sk(const struct sock *sk)
return (struct raw6_sock *)sk;
}
-static inline void inet_sk_copy_descendant(struct sock *sk_to,
- const struct sock *sk_from)
-{
- int ancestor_size = sizeof(struct inet_sock);
-
- if (sk_from->sk_family == PF_INET6)
- ancestor_size += sizeof(struct ipv6_pinfo);
-
- __inet_sk_copy_descendant(sk_to, sk_from, ancestor_size);
-}
-
#define __ipv6_only_sock(sk) (sk->sk_ipv6only)
#define ipv6_only_sock(sk) (__ipv6_only_sock(sk))
#define ipv6_sk_rxinfo(sk) ((sk)->sk_family == PF_INET6 && \
diff --git a/include/linux/mdio.h b/include/linux/mdio.h
index 898cbf0..3a88b69 100644
--- a/include/linux/mdio.h
+++ b/include/linux/mdio.h
@@ -358,6 +358,12 @@ static inline int mdiobus_c45_read(struct mii_bus *bus, int prtad, int devad,
return mdiobus_read(bus, prtad, mdiobus_c45_addr(devad, regnum));
}
+static inline int mdiobus_c45_write(struct mii_bus *bus, int prtad, int devad,
+ u16 regnum, u16 val)
+{
+ return mdiobus_write(bus, prtad, mdiobus_c45_addr(devad, regnum), val);
+}
+
int mdiobus_register_device(struct mdio_device *mdiodev);
int mdiobus_unregister_device(struct mdio_device *mdiodev);
bool mdiobus_is_registered_device(struct mii_bus *bus, int addr);
diff --git a/drivers/net/phy/mdio-i2c.h b/include/linux/mdio/mdio-i2c.h
similarity index 100%
rename from drivers/net/phy/mdio-i2c.h
rename to include/linux/mdio/mdio-i2c.h
diff --git a/drivers/net/phy/mdio-xgene.h b/include/linux/mdio/mdio-xgene.h
similarity index 100%
rename from drivers/net/phy/mdio-xgene.h
rename to include/linux/mdio/mdio-xgene.h
diff --git a/include/linux/net.h b/include/linux/net.h
index d48ff11..7657c64 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -41,6 +41,8 @@ struct net;
#define SOCK_PASSCRED 3
#define SOCK_PASSSEC 4
+#define PROTO_CMSG_DATA_ONLY 0x0001
+
#ifndef ARCH_HAS_SOCKET_TYPES
/**
* enum sock_type - Socket types
@@ -135,6 +137,7 @@ typedef int (*sk_read_actor_t)(read_descriptor_t *, struct sk_buff *,
struct proto_ops {
int family;
+ unsigned int flags;
struct module *owner;
int (*release) (struct socket *sock);
int (*bind) (struct socket *sock,
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index b0e303f..7f9fcfd 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -618,7 +618,7 @@ struct netdev_queue {
/* Subordinate device that the queue has been assigned to */
struct net_device *sb_dev;
#ifdef CONFIG_XDP_SOCKETS
- struct xdp_umem *umem;
+ struct xsk_buff_pool *pool;
#endif
/*
* write-mostly part
@@ -640,10 +640,14 @@ struct netdev_queue {
extern int sysctl_fb_tunnels_only_for_init_net;
extern int sysctl_devconf_inherit_init_net;
+/*
+ * sysctl_fb_tunnels_only_for_init_net == 0 : For all netns
+ * == 1 : For initns only
+ * == 2 : For none.
+ */
static inline bool net_has_fallback_tunnels(const struct net *net)
{
- return net == &init_net ||
- !IS_ENABLED(CONFIG_SYSCTL) ||
+ return (net == &init_net && sysctl_fb_tunnels_only_for_init_net == 1) ||
!sysctl_fb_tunnels_only_for_init_net;
}
@@ -751,7 +755,7 @@ struct netdev_rx_queue {
struct net_device *dev;
struct xdp_rxq_info xdp_rxq;
#ifdef CONFIG_XDP_SOCKETS
- struct xdp_umem *umem;
+ struct xsk_buff_pool *pool;
#endif
} ____cacheline_aligned_in_smp;
@@ -879,7 +883,7 @@ enum bpf_netdev_command {
/* BPF program for offload callbacks, invoked at program load time. */
BPF_OFFLOAD_MAP_ALLOC,
BPF_OFFLOAD_MAP_FREE,
- XDP_SETUP_XSK_UMEM,
+ XDP_SETUP_XSK_POOL,
};
struct bpf_prog_offload_ops;
@@ -913,9 +917,9 @@ struct netdev_bpf {
struct {
struct bpf_offloaded_map *offmap;
};
- /* XDP_SETUP_XSK_UMEM */
+ /* XDP_SETUP_XSK_POOL */
struct {
- struct xdp_umem *umem;
+ struct xsk_buff_pool *pool;
u16 queue_id;
} xsk;
};
@@ -2193,6 +2197,22 @@ int netdev_get_num_tc(struct net_device *dev)
return dev->num_tc;
}
+static inline void net_prefetch(void *p)
+{
+ prefetch(p);
+#if L1_CACHE_BYTES < 128
+ prefetch((u8 *)p + L1_CACHE_BYTES);
+#endif
+}
+
+static inline void net_prefetchw(void *p)
+{
+ prefetchw(p);
+#if L1_CACHE_BYTES < 128
+ prefetchw((u8 *)p + L1_CACHE_BYTES);
+#endif
+}
+
void netdev_unbind_sb_channel(struct net_device *dev,
struct net_device *sb_dev);
int netdev_bind_sb_channel_queue(struct net_device *dev,
diff --git a/include/linux/pcs-lynx.h b/include/linux/pcs-lynx.h
new file mode 100644
index 0000000..a6440d6
--- /dev/null
+++ b/include/linux/pcs-lynx.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */
+/* Copyright 2020 NXP
+ * Lynx PCS helpers
+ */
+
+#ifndef __LINUX_PCS_LYNX_H
+#define __LINUX_PCS_LYNX_H
+
+#include <linux/mdio.h>
+#include <linux/phylink.h>
+
+struct lynx_pcs {
+ struct phylink_pcs pcs;
+ struct mdio_device *mdio;
+};
+
+struct lynx_pcs *lynx_pcs_create(struct mdio_device *mdio);
+
+void lynx_pcs_destroy(struct lynx_pcs *pcs);
+
+#endif /* __LINUX_PCS_LYNX_H */
diff --git a/include/linux/mdio-xpcs.h b/include/linux/pcs/pcs-xpcs.h
similarity index 87%
rename from include/linux/mdio-xpcs.h
rename to include/linux/pcs/pcs-xpcs.h
index 9a841aa..351c1c9 100644
--- a/include/linux/mdio-xpcs.h
+++ b/include/linux/pcs/pcs-xpcs.h
@@ -4,8 +4,8 @@
* Synopsys DesignWare XPCS helpers
*/
-#ifndef __LINUX_MDIO_XPCS_H
-#define __LINUX_MDIO_XPCS_H
+#ifndef __LINUX_PCS_XPCS_H
+#define __LINUX_PCS_XPCS_H
#include <linux/phy.h>
#include <linux/phylink.h>
@@ -29,7 +29,7 @@ struct mdio_xpcs_ops {
int (*probe)(struct mdio_xpcs_args *xpcs, phy_interface_t interface);
};
-#if IS_ENABLED(CONFIG_MDIO_XPCS)
+#if IS_ENABLED(CONFIG_PCS_XPCS)
struct mdio_xpcs_ops *mdio_xpcs_get_ops(void);
#else
static inline struct mdio_xpcs_ops *mdio_xpcs_get_ops(void)
@@ -38,4 +38,4 @@ static inline struct mdio_xpcs_ops *mdio_xpcs_get_ops(void)
}
#endif
-#endif /* __LINUX_MDIO_XPCS_H */
+#endif /* __LINUX_PCS_XPCS_H */
diff --git a/include/linux/phylink.h b/include/linux/phylink.h
index c36fb41..d81a714 100644
--- a/include/linux/phylink.h
+++ b/include/linux/phylink.h
@@ -490,4 +490,7 @@ void phylink_mii_c22_pcs_an_restart(struct mdio_device *pcs);
void phylink_mii_c45_pcs_get_state(struct mdio_device *pcs,
struct phylink_link_state *state);
+
+void phylink_decode_usxgmii_word(struct phylink_link_state *state,
+ uint16_t lpa);
#endif
diff --git a/include/linux/ptp_classify.h b/include/linux/ptp_classify.h
index dd00fa4..8437307 100644
--- a/include/linux/ptp_classify.h
+++ b/include/linux/ptp_classify.h
@@ -36,7 +36,6 @@
#define OFF_PTP_SOURCE_UUID 22 /* PTPv1 only */
#define OFF_PTP_SEQUENCE_ID 30
-#define OFF_PTP_CONTROL 32 /* PTPv1 only */
/* Below defines should actually be removed at some point in time. */
#define IP6_HLEN 40
@@ -44,6 +43,30 @@
#define OFF_IHL 14
#define IPV4_HLEN(data) (((struct iphdr *)(data + OFF_IHL))->ihl << 2)
+struct clock_identity {
+ u8 id[8];
+} __packed;
+
+struct port_identity {
+ struct clock_identity clock_identity;
+ __be16 port_number;
+} __packed;
+
+struct ptp_header {
+ u8 tsmt; /* transportSpecific | messageType */
+ u8 ver; /* reserved | versionPTP */
+ __be16 message_length;
+ u8 domain_number;
+ u8 reserved1;
+ u8 flag_field[2];
+ __be64 correction;
+ __be32 reserved2;
+ struct port_identity source_port_identity;
+ __be16 sequence_id;
+ u8 control;
+ u8 log_message_interval;
+} __packed;
+
#if defined(CONFIG_NET_PTP_CLASSIFY)
/**
* ptp_classify_raw - classify a PTP packet
@@ -57,6 +80,46 @@
*/
unsigned int ptp_classify_raw(const struct sk_buff *skb);
+/**
+ * ptp_parse_header - Get pointer to the PTP v2 header
+ * @skb: packet buffer
+ * @type: type of the packet (see ptp_classify_raw())
+ *
+ * This function takes care of the VLAN, UDP, IPv4 and IPv6 headers. The length
+ * is checked.
+ *
+ * Note, internally skb_mac_header() is used. Make sure that the @skb is
+ * initialized accordingly.
+ *
+ * Return: Pointer to the ptp v2 header or NULL if not found
+ */
+struct ptp_header *ptp_parse_header(struct sk_buff *skb, unsigned int type);
+
+/**
+ * ptp_get_msgtype - Extract ptp message type from given header
+ * @hdr: ptp header
+ * @type: type of the packet (see ptp_classify_raw())
+ *
+ * This function returns the message type for a given ptp header. It takes care
+ * of the different ptp header versions (v1 or v2).
+ *
+ * Return: The message type
+ */
+static inline u8 ptp_get_msgtype(const struct ptp_header *hdr,
+ unsigned int type)
+{
+ u8 msgtype;
+
+ if (unlikely(type & PTP_CLASS_V1)) {
+ /* msg type is located at the control field for ptp v1 */
+ msgtype = hdr->control;
+ } else {
+ msgtype = hdr->tsmt & 0x0f;
+ }
+
+ return msgtype;
+}
+
void __init ptp_classifier_init(void);
#else
static inline void ptp_classifier_init(void)
@@ -66,5 +129,10 @@ static inline unsigned int ptp_classify_raw(struct sk_buff *skb)
{
return PTP_CLASS_NONE;
}
+static inline struct ptp_header *ptp_parse_header(struct sk_buff *skb,
+ unsigned int type)
+{
+ return NULL;
+}
#endif
#endif /* _PTP_CLASSIFY_H_ */
diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h
index cd6a5c7..56fa558 100644
--- a/include/linux/qed/qed_if.h
+++ b/include/linux/qed/qed_if.h
@@ -21,6 +21,7 @@
#include <linux/qed/common_hsi.h>
#include <linux/qed/qed_chain.h>
#include <linux/io-64-nonatomic-lo-hi.h>
+#include <net/devlink.h>
enum dcbx_protocol_type {
DCBX_PROTOCOL_ISCSI,
@@ -779,6 +780,11 @@ enum qed_nvm_flash_cmd {
QED_NVM_FLASH_CMD_NVM_MAX,
};
+struct qed_devlink {
+ struct qed_dev *cdev;
+ struct devlink_health_reporter *fw_reporter;
+};
+
struct qed_common_cb_ops {
void (*arfs_filter_op)(void *dev, void *fltr, u8 fw_rc);
void (*link_update)(void *dev, struct qed_link_output *link);
@@ -844,10 +850,9 @@ struct qed_common_ops {
struct qed_dev* (*probe)(struct pci_dev *dev,
struct qed_probe_params *params);
- void (*remove)(struct qed_dev *cdev);
+ void (*remove)(struct qed_dev *cdev);
- int (*set_power_state)(struct qed_dev *cdev,
- pci_power_t state);
+ int (*set_power_state)(struct qed_dev *cdev, pci_power_t state);
void (*set_name) (struct qed_dev *cdev, char name[]);
@@ -855,50 +860,51 @@ struct qed_common_ops {
* PF params required for the call before slowpath_start is
* documented within the qed_pf_params structure definition.
*/
- void (*update_pf_params)(struct qed_dev *cdev,
- struct qed_pf_params *params);
- int (*slowpath_start)(struct qed_dev *cdev,
- struct qed_slowpath_params *params);
+ void (*update_pf_params)(struct qed_dev *cdev,
+ struct qed_pf_params *params);
- int (*slowpath_stop)(struct qed_dev *cdev);
+ int (*slowpath_start)(struct qed_dev *cdev,
+ struct qed_slowpath_params *params);
+
+ int (*slowpath_stop)(struct qed_dev *cdev);
/* Requests to use `cnt' interrupts for fastpath.
* upon success, returns number of interrupts allocated for fastpath.
*/
- int (*set_fp_int)(struct qed_dev *cdev,
- u16 cnt);
+ int (*set_fp_int)(struct qed_dev *cdev, u16 cnt);
/* Fills `info' with pointers required for utilizing interrupts */
- int (*get_fp_int)(struct qed_dev *cdev,
- struct qed_int_info *info);
+ int (*get_fp_int)(struct qed_dev *cdev, struct qed_int_info *info);
- u32 (*sb_init)(struct qed_dev *cdev,
- struct qed_sb_info *sb_info,
- void *sb_virt_addr,
- dma_addr_t sb_phy_addr,
- u16 sb_id,
- enum qed_sb_type type);
+ u32 (*sb_init)(struct qed_dev *cdev,
+ struct qed_sb_info *sb_info,
+ void *sb_virt_addr,
+ dma_addr_t sb_phy_addr,
+ u16 sb_id,
+ enum qed_sb_type type);
- u32 (*sb_release)(struct qed_dev *cdev,
- struct qed_sb_info *sb_info,
- u16 sb_id,
- enum qed_sb_type type);
+ u32 (*sb_release)(struct qed_dev *cdev,
+ struct qed_sb_info *sb_info,
+ u16 sb_id,
+ enum qed_sb_type type);
- void (*simd_handler_config)(struct qed_dev *cdev,
- void *token,
- int index,
- void (*handler)(void *));
+ void (*simd_handler_config)(struct qed_dev *cdev,
+ void *token,
+ int index,
+ void (*handler)(void *));
- void (*simd_handler_clean)(struct qed_dev *cdev,
- int index);
- int (*dbg_grc)(struct qed_dev *cdev,
- void *buffer, u32 *num_dumped_bytes);
+ void (*simd_handler_clean)(struct qed_dev *cdev, int index);
+
+ int (*dbg_grc)(struct qed_dev *cdev, void *buffer, u32 *num_dumped_bytes);
int (*dbg_grc_size)(struct qed_dev *cdev);
- int (*dbg_all_data) (struct qed_dev *cdev, void *buffer);
+ int (*dbg_all_data)(struct qed_dev *cdev, void *buffer);
- int (*dbg_all_data_size) (struct qed_dev *cdev);
+ int (*dbg_all_data_size)(struct qed_dev *cdev);
+
+ int (*report_fatal_error)(struct devlink *devlink,
+ enum qed_hw_err_type err_type);
/**
* @brief can_link_change - can the instance change the link or not
@@ -1137,6 +1143,10 @@ struct qed_common_ops {
*
*/
int (*set_grc_config)(struct qed_dev *cdev, u32 cfg_id, u32 val);
+
+ struct devlink* (*devlink_register)(struct qed_dev *cdev);
+
+ void (*devlink_unregister)(struct devlink *devlink);
};
#define MASK_FIELD(_name, _value) \
diff --git a/include/linux/rcupdate_trace.h b/include/linux/rcupdate_trace.h
index d9015aa..aaaac8a 100644
--- a/include/linux/rcupdate_trace.h
+++ b/include/linux/rcupdate_trace.h
@@ -82,7 +82,14 @@ static inline void rcu_read_unlock_trace(void)
void call_rcu_tasks_trace(struct rcu_head *rhp, rcu_callback_t func);
void synchronize_rcu_tasks_trace(void);
void rcu_barrier_tasks_trace(void);
-
+#else
+/*
+ * The BPF JIT forms these addresses even when it doesn't call these
+ * functions, so provide definitions that result in runtime errors.
+ */
+static inline void call_rcu_tasks_trace(struct rcu_head *rhp, rcu_callback_t func) { BUG(); }
+static inline void rcu_read_lock_trace(void) { BUG(); }
+static inline void rcu_read_unlock_trace(void) { BUG(); }
#endif /* #ifdef CONFIG_TASKS_TRACE_RCU */
#endif /* __LINUX_RCUPDATE_TRACE_H */
diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h
index 1e9ed84..3119928 100644
--- a/include/linux/skmsg.h
+++ b/include/linux/skmsg.h
@@ -340,23 +340,6 @@ static inline void sk_psock_update_proto(struct sock *sk,
struct sk_psock *psock,
struct proto *ops)
{
- /* Initialize saved callbacks and original proto only once, since this
- * function may be called multiple times for a psock, e.g. when
- * psock->progs.msg_parser is updated.
- *
- * Since we've not installed the new proto, psock is not yet in use and
- * we can initialize it without synchronization.
- */
- if (!psock->sk_proto) {
- struct proto *orig = READ_ONCE(sk->sk_prot);
-
- psock->saved_unhash = orig->unhash;
- psock->saved_close = orig->close;
- psock->saved_write_space = sk->sk_write_space;
-
- psock->sk_proto = orig;
- }
-
/* Pairs with lockless read in sk_clone_lock() */
WRITE_ONCE(sk->sk_prot, ops);
}
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 14b62d7..56ff295 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -92,6 +92,8 @@ struct tcp_options_received {
smc_ok : 1, /* SMC seen on SYN packet */
snd_wscale : 4, /* Window scaling received from sender */
rcv_wscale : 4; /* Window scaling to send to receiver */
+ u8 saw_unknown:1, /* Received unknown option */
+ unused:7;
u8 num_sacks; /* Number of SACK blocks */
u16 user_mss; /* mss requested by user in ioctl */
u16 mss_clamp; /* Maximal mss, negotiated at connection setup */
@@ -237,14 +239,13 @@ struct tcp_sock {
repair : 1,
frto : 1;/* F-RTO (RFC5682) activated in CA_Loss */
u8 repair_queue;
- u8 syn_data:1, /* SYN includes data */
+ u8 save_syn:2, /* Save headers of SYN packet */
+ syn_data:1, /* SYN includes data */
syn_fastopen:1, /* SYN includes Fast Open option */
syn_fastopen_exp:1,/* SYN includes Fast Open exp. option */
syn_fastopen_ch:1, /* Active TFO re-enabling probe */
syn_data_acked:1,/* data in SYN is acked by SYN-ACK */
- save_syn:1, /* Save headers of SYN packet */
- is_cwnd_limited:1,/* forward progress limited by snd_cwnd? */
- syn_smc:1; /* SYN includes SMC */
+ is_cwnd_limited:1;/* forward progress limited by snd_cwnd? */
u32 tlp_high_seq; /* snd_nxt at the time of TLP */
u32 tcp_tx_delay; /* delay (in usec) added to TX packets */
@@ -391,6 +392,9 @@ struct tcp_sock {
#if IS_ENABLED(CONFIG_MPTCP)
bool is_mptcp;
#endif
+#if IS_ENABLED(CONFIG_SMC)
+ bool syn_smc; /* SYN includes SMC */
+#endif
#ifdef CONFIG_TCP_MD5SIG
/* TCP AF-Specific parts; only used by MD5 Signature support so far */
@@ -406,7 +410,7 @@ struct tcp_sock {
* socket. Used to retransmit SYNACKs etc.
*/
struct request_sock __rcu *fastopen_rsk;
- u32 *saved_syn;
+ struct saved_syn *saved_syn;
};
enum tsq_enum {
@@ -484,6 +488,12 @@ static inline void tcp_saved_syn_free(struct tcp_sock *tp)
tp->saved_syn = NULL;
}
+static inline u32 tcp_saved_syn_len(const struct saved_syn *saved_syn)
+{
+ return saved_syn->mac_hdrlen + saved_syn->network_hdrlen +
+ saved_syn->tcp_hdrlen;
+}
+
struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk,
const struct sk_buff *orig_skb);
diff --git a/include/net/bpf_sk_storage.h b/include/net/bpf_sk_storage.h
index 5036c94..119f4c9 100644
--- a/include/net/bpf_sk_storage.h
+++ b/include/net/bpf_sk_storage.h
@@ -3,13 +3,27 @@
#ifndef _BPF_SK_STORAGE_H
#define _BPF_SK_STORAGE_H
+#include <linux/rculist.h>
+#include <linux/list.h>
+#include <linux/hash.h>
+#include <linux/types.h>
+#include <linux/spinlock.h>
+#include <linux/bpf.h>
+#include <net/sock.h>
+#include <uapi/linux/sock_diag.h>
+#include <uapi/linux/btf.h>
+#include <linux/bpf_local_storage.h>
+
struct sock;
void bpf_sk_storage_free(struct sock *sk);
extern const struct bpf_func_proto bpf_sk_storage_get_proto;
extern const struct bpf_func_proto bpf_sk_storage_delete_proto;
+extern const struct bpf_func_proto sk_storage_get_btf_proto;
+extern const struct bpf_func_proto sk_storage_delete_btf_proto;
+struct bpf_local_storage_elem;
struct bpf_sk_storage_diag;
struct sk_buff;
struct nlattr;
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index d9e6b9f..c9bce9b 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -678,7 +678,10 @@ struct cfg80211_bitrate_mask {
u32 legacy;
u8 ht_mcs[IEEE80211_HT_MCS_MASK_LEN];
u16 vht_mcs[NL80211_VHT_NSS_MAX];
+ u16 he_mcs[NL80211_HE_NSS_MAX];
enum nl80211_txrate_gi gi;
+ enum nl80211_he_gi he_gi;
+ enum nl80211_he_ltf he_ltf;
} control[NUM_NL80211_BANDS];
};
diff --git a/include/net/dst.h b/include/net/dst.h
index 6ae2e62..8ea8812 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -214,7 +214,7 @@ dst_allfrag(const struct dst_entry *dst)
static inline int
dst_metric_locked(const struct dst_entry *dst, int metric)
{
- return dst_metric(dst, RTAX_LOCK) & (1<<metric);
+ return dst_metric(dst, RTAX_LOCK) & (1 << metric);
}
static inline void dst_hold(struct dst_entry *dst)
diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index aa8893c..c738abe 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -86,6 +86,8 @@ struct inet_connection_sock {
struct timer_list icsk_retransmit_timer;
struct timer_list icsk_delack_timer;
__u32 icsk_rto;
+ __u32 icsk_rto_min;
+ __u32 icsk_delack_max;
__u32 icsk_pmtu_cookie;
const struct tcp_congestion_ops *icsk_ca_ops;
const struct inet_connection_sock_af_ops *icsk_af_ops;
diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index a3702d1..89163ef 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -296,13 +296,6 @@ static inline void __inet_sk_copy_descendant(struct sock *sk_to,
memcpy(inet_sk(sk_to) + 1, inet_sk(sk_from) + 1,
sk_from->sk_prot->obj_size - ancestor_size);
}
-#if !(IS_ENABLED(CONFIG_IPV6))
-static inline void inet_sk_copy_descendant(struct sock *sk_to,
- const struct sock *sk_from)
-{
- __inet_sk_copy_descendant(sk_to, sk_from, sizeof(struct inet_sock));
-}
-#endif
int inet_sk_rebuild_header(struct sock *sk);
diff --git a/include/net/ipv6_stubs.h b/include/net/ipv6_stubs.h
index d7a7f7c..8fce558 100644
--- a/include/net/ipv6_stubs.h
+++ b/include/net/ipv6_stubs.h
@@ -63,6 +63,9 @@ struct ipv6_stub {
int encap_type);
#endif
struct neigh_table *nd_tbl;
+
+ int (*ipv6_fragment)(struct net *net, struct sock *sk, struct sk_buff *skb,
+ int (*output)(struct net *, struct sock *, struct sk_buff *));
};
extern const struct ipv6_stub *ipv6_stub __read_mostly;
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 66e2bfd..ec148b3 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -3736,7 +3736,7 @@ enum ieee80211_reconfig_type {
* decremented, and when they reach 1 the driver must call
* ieee80211_csa_finish(). Drivers which use ieee80211_beacon_get()
* get the csa counter decremented by mac80211, but must check if it is
- * 1 using ieee80211_csa_is_complete() after the beacon has been
+ * 1 using ieee80211_beacon_counter_is_complete() after the beacon has been
* transmitted and then call ieee80211_csa_finish().
* If the CSA count starts as zero or 1, this function will not be called,
* since there won't be any time to beacon before the switch anyway.
@@ -4763,21 +4763,21 @@ void ieee80211_tx_status_8023(struct ieee80211_hw *hw,
*/
void ieee80211_report_low_ack(struct ieee80211_sta *sta, u32 num_packets);
-#define IEEE80211_MAX_CSA_COUNTERS_NUM 2
+#define IEEE80211_MAX_CNTDWN_COUNTERS_NUM 2
/**
* struct ieee80211_mutable_offsets - mutable beacon offsets
* @tim_offset: position of TIM element
* @tim_length: size of TIM element
- * @csa_counter_offs: array of IEEE80211_MAX_CSA_COUNTERS_NUM offsets
- * to CSA counters. This array can contain zero values which
+ * @cntdwn_counter_offs: array of IEEE80211_MAX_CNTDWN_COUNTERS_NUM offsets
+ * to countdown counters. This array can contain zero values which
* should be ignored.
*/
struct ieee80211_mutable_offsets {
u16 tim_offset;
u16 tim_length;
- u16 csa_counter_offs[IEEE80211_MAX_CSA_COUNTERS_NUM];
+ u16 cntdwn_counter_offs[IEEE80211_MAX_CNTDWN_COUNTERS_NUM];
};
/**
@@ -4846,31 +4846,31 @@ static inline struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw,
}
/**
- * ieee80211_csa_update_counter - request mac80211 to decrement the csa counter
+ * ieee80211_beacon_update_cntdwn - request mac80211 to decrement the beacon countdown
* @vif: &struct ieee80211_vif pointer from the add_interface callback.
*
- * The csa counter should be updated after each beacon transmission.
+ * The beacon counter should be updated after each beacon transmission.
* This function is called implicitly when
* ieee80211_beacon_get/ieee80211_beacon_get_tim are called, however if the
* beacon frames are generated by the device, the driver should call this
- * function after each beacon transmission to sync mac80211's csa counters.
+ * function after each beacon transmission to sync mac80211's beacon countdown.
*
- * Return: new csa counter value
+ * Return: new countdown value
*/
-u8 ieee80211_csa_update_counter(struct ieee80211_vif *vif);
+u8 ieee80211_beacon_update_cntdwn(struct ieee80211_vif *vif);
/**
- * ieee80211_csa_set_counter - request mac80211 to set csa counter
+ * ieee80211_beacon_set_cntdwn - request mac80211 to set beacon countdown
* @vif: &struct ieee80211_vif pointer from the add_interface callback.
* @counter: the new value for the counter
*
- * The csa counter can be changed by the device, this API should be
+ * The beacon countdown can be changed by the device, this API should be
* used by the device driver to update csa counter in mac80211.
*
- * It should never be used together with ieee80211_csa_update_counter(),
+ * It should never be used together with ieee80211_beacon_update_cntdwn(),
* as it will cause a race condition around the counter value.
*/
-void ieee80211_csa_set_counter(struct ieee80211_vif *vif, u8 counter);
+void ieee80211_beacon_set_cntdwn(struct ieee80211_vif *vif, u8 counter);
/**
* ieee80211_csa_finish - notify mac80211 about channel switch
@@ -4883,13 +4883,12 @@ void ieee80211_csa_set_counter(struct ieee80211_vif *vif, u8 counter);
void ieee80211_csa_finish(struct ieee80211_vif *vif);
/**
- * ieee80211_csa_is_complete - find out if counters reached 1
+ * ieee80211_beacon_cntdwn_is_complete - find out if countdown reached 1
* @vif: &struct ieee80211_vif pointer from the add_interface callback.
*
- * This function returns whether the channel switch counters reached zero.
+ * This function returns whether the countdown reached zero.
*/
-bool ieee80211_csa_is_complete(struct ieee80211_vif *vif);
-
+bool ieee80211_beacon_cntdwn_is_complete(struct ieee80211_vif *vif);
/**
* ieee80211_proberesp_get - retrieve a Probe Response template
diff --git a/include/net/netlink.h b/include/net/netlink.h
index c0411f1..fdd317f 100644
--- a/include/net/netlink.h
+++ b/include/net/netlink.h
@@ -181,8 +181,6 @@ enum {
NLA_S64,
NLA_BITFIELD32,
NLA_REJECT,
- NLA_EXACT_LEN,
- NLA_MIN_LEN,
__NLA_TYPE_MAX,
};
@@ -199,11 +197,11 @@ struct netlink_range_validation_signed {
enum nla_policy_validation {
NLA_VALIDATE_NONE,
NLA_VALIDATE_RANGE,
+ NLA_VALIDATE_RANGE_WARN_TOO_LONG,
NLA_VALIDATE_MIN,
NLA_VALIDATE_MAX,
NLA_VALIDATE_RANGE_PTR,
NLA_VALIDATE_FUNCTION,
- NLA_VALIDATE_WARN_TOO_LONG,
};
/**
@@ -222,7 +220,7 @@ enum nla_policy_validation {
* NLA_NUL_STRING Maximum length of string (excluding NUL)
* NLA_FLAG Unused
* NLA_BINARY Maximum length of attribute payload
- * NLA_MIN_LEN Minimum length of attribute payload
+ * (but see also below with the validation type)
* NLA_NESTED,
* NLA_NESTED_ARRAY Length verification is done by checking len of
* nested header (or empty); len field is used if
@@ -237,11 +235,6 @@ enum nla_policy_validation {
* just like "All other"
* NLA_BITFIELD32 Unused
* NLA_REJECT Unused
- * NLA_EXACT_LEN Attribute should have exactly this length, otherwise
- * it is rejected or warned about, the latter happening
- * if and only if the `validation_type' is set to
- * NLA_VALIDATE_WARN_TOO_LONG.
- * NLA_MIN_LEN Minimum length of attribute payload
* All other Minimum length of attribute payload
*
* Meaning of validation union:
@@ -296,6 +289,11 @@ enum nla_policy_validation {
* pointer to a struct netlink_range_validation_signed
* that indicates the min/max values.
* Use NLA_POLICY_FULL_RANGE_SIGNED().
+ *
+ * NLA_BINARY If the validation type is like the ones for integers
+ * above, then the min/max length (not value like for
+ * integers) of the attribute is enforced.
+ *
* All other Unused - but note that it's a union
*
* Meaning of `validate' field, use via NLA_POLICY_VALIDATE_FN:
@@ -309,7 +307,7 @@ enum nla_policy_validation {
* static const struct nla_policy my_policy[ATTR_MAX+1] = {
* [ATTR_FOO] = { .type = NLA_U16 },
* [ATTR_BAR] = { .type = NLA_STRING, .len = BARSIZ },
- * [ATTR_BAZ] = { .type = NLA_EXACT_LEN, .len = sizeof(struct mystruct) },
+ * [ATTR_BAZ] = NLA_POLICY_EXACT_LEN(sizeof(struct mystruct)),
* [ATTR_GOO] = NLA_POLICY_BITFIELD32(myvalidflags),
* };
*/
@@ -335,9 +333,10 @@ struct nla_policy {
* nesting validation starts here.
*
* Additionally, it means that NLA_UNSPEC is actually NLA_REJECT
- * for any types >= this, so need to use NLA_MIN_LEN to get the
- * previous pure { .len = xyz } behaviour. The advantage of this
- * is that types not specified in the policy will be rejected.
+ * for any types >= this, so need to use NLA_POLICY_MIN_LEN() to
+ * get the previous pure { .len = xyz } behaviour. The advantage
+ * of this is that types not specified in the policy will be
+ * rejected.
*
* For completely new families it should be set to 1 so that the
* validation is enforced for all attributes. For existing ones
@@ -349,12 +348,6 @@ struct nla_policy {
};
};
-#define NLA_POLICY_EXACT_LEN(_len) { .type = NLA_EXACT_LEN, .len = _len }
-#define NLA_POLICY_EXACT_LEN_WARN(_len) \
- { .type = NLA_EXACT_LEN, .len = _len, \
- .validation_type = NLA_VALIDATE_WARN_TOO_LONG, }
-#define NLA_POLICY_MIN_LEN(_len) { .type = NLA_MIN_LEN, .len = _len }
-
#define NLA_POLICY_ETH_ADDR NLA_POLICY_EXACT_LEN(ETH_ALEN)
#define NLA_POLICY_ETH_ADDR_COMPAT NLA_POLICY_EXACT_LEN_WARN(ETH_ALEN)
@@ -370,19 +363,21 @@ struct nla_policy {
{ .type = NLA_BITFIELD32, .bitfield32_valid = valid }
#define __NLA_ENSURE(condition) BUILD_BUG_ON_ZERO(!(condition))
-#define NLA_ENSURE_UINT_TYPE(tp) \
+#define NLA_ENSURE_UINT_OR_BINARY_TYPE(tp) \
(__NLA_ENSURE(tp == NLA_U8 || tp == NLA_U16 || \
tp == NLA_U32 || tp == NLA_U64 || \
- tp == NLA_MSECS) + tp)
+ tp == NLA_MSECS || \
+ tp == NLA_BINARY) + tp)
#define NLA_ENSURE_SINT_TYPE(tp) \
(__NLA_ENSURE(tp == NLA_S8 || tp == NLA_S16 || \
tp == NLA_S32 || tp == NLA_S64) + tp)
-#define NLA_ENSURE_INT_TYPE(tp) \
+#define NLA_ENSURE_INT_OR_BINARY_TYPE(tp) \
(__NLA_ENSURE(tp == NLA_S8 || tp == NLA_U8 || \
tp == NLA_S16 || tp == NLA_U16 || \
tp == NLA_S32 || tp == NLA_U32 || \
tp == NLA_S64 || tp == NLA_U64 || \
- tp == NLA_MSECS) + tp)
+ tp == NLA_MSECS || \
+ tp == NLA_BINARY) + tp)
#define NLA_ENSURE_NO_VALIDATION_PTR(tp) \
(__NLA_ENSURE(tp != NLA_BITFIELD32 && \
tp != NLA_REJECT && \
@@ -390,14 +385,14 @@ struct nla_policy {
tp != NLA_NESTED_ARRAY) + tp)
#define NLA_POLICY_RANGE(tp, _min, _max) { \
- .type = NLA_ENSURE_INT_TYPE(tp), \
+ .type = NLA_ENSURE_INT_OR_BINARY_TYPE(tp), \
.validation_type = NLA_VALIDATE_RANGE, \
.min = _min, \
.max = _max \
}
#define NLA_POLICY_FULL_RANGE(tp, _range) { \
- .type = NLA_ENSURE_UINT_TYPE(tp), \
+ .type = NLA_ENSURE_UINT_OR_BINARY_TYPE(tp), \
.validation_type = NLA_VALIDATE_RANGE_PTR, \
.range = _range, \
}
@@ -409,13 +404,13 @@ struct nla_policy {
}
#define NLA_POLICY_MIN(tp, _min) { \
- .type = NLA_ENSURE_INT_TYPE(tp), \
+ .type = NLA_ENSURE_INT_OR_BINARY_TYPE(tp), \
.validation_type = NLA_VALIDATE_MIN, \
.min = _min, \
}
#define NLA_POLICY_MAX(tp, _max) { \
- .type = NLA_ENSURE_INT_TYPE(tp), \
+ .type = NLA_ENSURE_INT_OR_BINARY_TYPE(tp), \
.validation_type = NLA_VALIDATE_MAX, \
.max = _max, \
}
@@ -427,6 +422,15 @@ struct nla_policy {
.len = __VA_ARGS__ + 0, \
}
+#define NLA_POLICY_EXACT_LEN(_len) NLA_POLICY_RANGE(NLA_BINARY, _len, _len)
+#define NLA_POLICY_EXACT_LEN_WARN(_len) { \
+ .type = NLA_BINARY, \
+ .validation_type = NLA_VALIDATE_RANGE_WARN_TOO_LONG, \
+ .min = _len, \
+ .max = _len \
+}
+#define NLA_POLICY_MIN_LEN(_len) NLA_POLICY_MIN(NLA_BINARY, _len)
+
/**
* struct nl_info - netlink source information
* @nlh: Netlink message header of original request
diff --git a/include/net/request_sock.h b/include/net/request_sock.h
index b2eb8b4..29e41ff 100644
--- a/include/net/request_sock.h
+++ b/include/net/request_sock.h
@@ -41,6 +41,13 @@ struct request_sock_ops {
int inet_rtx_syn_ack(const struct sock *parent, struct request_sock *req);
+struct saved_syn {
+ u32 mac_hdrlen;
+ u32 network_hdrlen;
+ u32 tcp_hdrlen;
+ u8 data[];
+};
+
/* struct request_sock - mini sock to represent a connection request
*/
struct request_sock {
@@ -60,7 +67,7 @@ struct request_sock {
struct timer_list rsk_timer;
const struct request_sock_ops *rsk_ops;
struct sock *sk;
- u32 *saved_syn;
+ struct saved_syn *saved_syn;
u32 secid;
u32 peer_secid;
};
diff --git a/include/net/sock.h b/include/net/sock.h
index 064637d..7dd3051 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -246,7 +246,7 @@ struct sock_common {
/* public: */
};
-struct bpf_sk_storage;
+struct bpf_local_storage;
/**
* struct sock - network layer representation of sockets
@@ -517,7 +517,7 @@ struct sock {
void (*sk_destruct)(struct sock *sk);
struct sock_reuseport __rcu *sk_reuseport_cb;
#ifdef CONFIG_BPF_SYSCALL
- struct bpf_sk_storage __rcu *sk_bpf_storage;
+ struct bpf_local_storage __rcu *sk_bpf_storage;
#endif
struct rcu_head sk_rcu;
};
@@ -1478,7 +1478,7 @@ sk_rmem_schedule(struct sock *sk, struct sk_buff *skb, int size)
{
if (!sk_has_account(sk))
return true;
- return size<= sk->sk_forward_alloc ||
+ return size <= sk->sk_forward_alloc ||
__sk_mem_schedule(sk, size, SK_MEM_RECV) ||
skb_pfmemalloc(skb);
}
diff --git a/include/net/tcp.h b/include/net/tcp.h
index eab6c75..e85d564 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -394,7 +394,7 @@ void tcp_metrics_init(void);
bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst);
void tcp_close(struct sock *sk, long timeout);
void tcp_init_sock(struct sock *sk);
-void tcp_init_transfer(struct sock *sk, int bpf_op);
+void tcp_init_transfer(struct sock *sk, int bpf_op, struct sk_buff *skb);
__poll_t tcp_poll(struct file *file, struct socket *sock,
struct poll_table_struct *wait);
int tcp_getsockopt(struct sock *sk, int level, int optname,
@@ -455,7 +455,8 @@ enum tcp_synack_type {
struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
struct request_sock *req,
struct tcp_fastopen_cookie *foc,
- enum tcp_synack_type synack_type);
+ enum tcp_synack_type synack_type,
+ struct sk_buff *syn_skb);
int tcp_disconnect(struct sock *sk, int flags);
void tcp_finish_connect(struct sock *sk, struct sk_buff *skb);
@@ -699,7 +700,7 @@ static inline void tcp_fast_path_check(struct sock *sk)
static inline u32 tcp_rto_min(struct sock *sk)
{
const struct dst_entry *dst = __sk_dst_get(sk);
- u32 rto_min = TCP_RTO_MIN;
+ u32 rto_min = inet_csk(sk)->icsk_rto_min;
if (dst && dst_metric_locked(dst, RTAX_RTO_MIN))
rto_min = dst_metric_rtt(dst, RTAX_RTO_MIN);
@@ -941,16 +942,6 @@ INDIRECT_CALLABLE_DECLARE(void tcp_v6_early_demux(struct sk_buff *skb));
#endif
-static inline bool inet_exact_dif_match(struct net *net, struct sk_buff *skb)
-{
-#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
- if (!net->ipv4.sysctl_tcp_l3mdev_accept &&
- skb && ipv4_l3mdev_skb(IPCB(skb)->flags))
- return true;
-#endif
- return false;
-}
-
/* TCP_SKB_CB reference means this can not be used from early demux */
static inline int tcp_v4_sdif(struct sk_buff *skb)
{
@@ -2035,7 +2026,8 @@ struct tcp_request_sock_ops {
int (*send_synack)(const struct sock *sk, struct dst_entry *dst,
struct flowi *fl, struct request_sock *req,
struct tcp_fastopen_cookie *foc,
- enum tcp_synack_type synack_type);
+ enum tcp_synack_type synack_type,
+ struct sk_buff *syn_skb);
};
extern const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops;
@@ -2233,6 +2225,55 @@ int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock,
struct msghdr *msg, int len, int flags);
#endif /* CONFIG_NET_SOCK_MSG */
+#ifdef CONFIG_CGROUP_BPF
+/* Copy the listen sk's HDR_OPT_CB flags to its child.
+ *
+ * During 3-Way-HandShake, the synack is usually sent from
+ * the listen sk with the HDR_OPT_CB flags set so that
+ * bpf-prog will be called to write the BPF hdr option.
+ *
+ * In fastopen, the child sk is used to send synack instead
+ * of the listen sk. Thus, inheriting the HDR_OPT_CB flags
+ * from the listen sk gives the bpf-prog a chance to write
+ * BPF hdr option in the synack pkt during fastopen.
+ *
+ * Both fastopen and non-fastopen child will inherit the
+ * HDR_OPT_CB flags to keep the bpf-prog having a consistent
+ * behavior when deciding to clear this cb flags (or not)
+ * during the PASSIVE_ESTABLISHED_CB.
+ *
+ * In the future, other cb flags could be inherited here also.
+ */
+static inline void bpf_skops_init_child(const struct sock *sk,
+ struct sock *child)
+{
+ tcp_sk(child)->bpf_sock_ops_cb_flags =
+ tcp_sk(sk)->bpf_sock_ops_cb_flags &
+ (BPF_SOCK_OPS_PARSE_ALL_HDR_OPT_CB_FLAG |
+ BPF_SOCK_OPS_PARSE_UNKNOWN_HDR_OPT_CB_FLAG |
+ BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG);
+}
+
+static inline void bpf_skops_init_skb(struct bpf_sock_ops_kern *skops,
+ struct sk_buff *skb,
+ unsigned int end_offset)
+{
+ skops->skb = skb;
+ skops->skb_data_end = skb->data + end_offset;
+}
+#else
+static inline void bpf_skops_init_child(const struct sock *sk,
+ struct sock *child)
+{
+}
+
+static inline void bpf_skops_init_skb(struct bpf_sock_ops_kern *skops,
+ struct sk_buff *skb,
+ unsigned int end_offset)
+{
+}
+#endif
+
/* Call BPF_SOCK_OPS program that returns an int. If the return value
* is < 0, then the BPF op failed (for example if the loaded BPF
* program does not support the chosen operation or there is no BPF
diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h
index c9d87cc..1a9559c 100644
--- a/include/net/xdp_sock.h
+++ b/include/net/xdp_sock.h
@@ -18,25 +18,19 @@ struct xsk_queue;
struct xdp_buff;
struct xdp_umem {
- struct xsk_queue *fq;
- struct xsk_queue *cq;
- struct xsk_buff_pool *pool;
+ void *addrs;
u64 size;
u32 headroom;
u32 chunk_size;
+ u32 chunks;
+ u32 npgs;
struct user_struct *user;
refcount_t users;
- struct work_struct work;
- struct page **pgs;
- u32 npgs;
- u16 queue_id;
- u8 need_wakeup;
u8 flags;
- int id;
- struct net_device *dev;
bool zc;
- spinlock_t xsk_tx_list_lock;
- struct list_head xsk_tx_list;
+ struct page **pgs;
+ int id;
+ struct list_head xsk_dma_list;
};
struct xsk_map {
@@ -48,10 +42,11 @@ struct xsk_map {
struct xdp_sock {
/* struct sock must be the first member of struct xdp_sock */
struct sock sk;
- struct xsk_queue *rx;
+ struct xsk_queue *rx ____cacheline_aligned_in_smp;
struct net_device *dev;
struct xdp_umem *umem;
struct list_head flush_node;
+ struct xsk_buff_pool *pool;
u16 queue_id;
bool zc;
enum {
@@ -59,10 +54,9 @@ struct xdp_sock {
XSK_BOUND,
XSK_UNBOUND,
} state;
- /* Protects multiple processes in the control path */
- struct mutex mutex;
+
struct xsk_queue *tx ____cacheline_aligned_in_smp;
- struct list_head list;
+ struct list_head tx_list;
/* Mutual exclusion of NAPI TX thread and sendmsg error paths
* in the SKB destructor callback.
*/
@@ -77,6 +71,10 @@ struct xdp_sock {
struct list_head map_list;
/* Protects map_list */
spinlock_t map_list_lock;
+ /* Protects multiple processes in the control path */
+ struct mutex mutex;
+ struct xsk_queue *fq_tmp; /* Only as tmp storage before bind */
+ struct xsk_queue *cq_tmp; /* Only as tmp storage before bind */
};
#ifdef CONFIG_XDP_SOCKETS
diff --git a/include/net/xdp_sock_drv.h b/include/net/xdp_sock_drv.h
index ccf848f..5b1ee8a 100644
--- a/include/net/xdp_sock_drv.h
+++ b/include/net/xdp_sock_drv.h
@@ -11,47 +11,50 @@
#ifdef CONFIG_XDP_SOCKETS
-void xsk_umem_complete_tx(struct xdp_umem *umem, u32 nb_entries);
-bool xsk_umem_consume_tx(struct xdp_umem *umem, struct xdp_desc *desc);
-void xsk_umem_consume_tx_done(struct xdp_umem *umem);
-struct xdp_umem *xdp_get_umem_from_qid(struct net_device *dev, u16 queue_id);
-void xsk_set_rx_need_wakeup(struct xdp_umem *umem);
-void xsk_set_tx_need_wakeup(struct xdp_umem *umem);
-void xsk_clear_rx_need_wakeup(struct xdp_umem *umem);
-void xsk_clear_tx_need_wakeup(struct xdp_umem *umem);
-bool xsk_umem_uses_need_wakeup(struct xdp_umem *umem);
+void xsk_tx_completed(struct xsk_buff_pool *pool, u32 nb_entries);
+bool xsk_tx_peek_desc(struct xsk_buff_pool *pool, struct xdp_desc *desc);
+void xsk_tx_release(struct xsk_buff_pool *pool);
+struct xsk_buff_pool *xsk_get_pool_from_qid(struct net_device *dev,
+ u16 queue_id);
+void xsk_set_rx_need_wakeup(struct xsk_buff_pool *pool);
+void xsk_set_tx_need_wakeup(struct xsk_buff_pool *pool);
+void xsk_clear_rx_need_wakeup(struct xsk_buff_pool *pool);
+void xsk_clear_tx_need_wakeup(struct xsk_buff_pool *pool);
+bool xsk_uses_need_wakeup(struct xsk_buff_pool *pool);
-static inline u32 xsk_umem_get_headroom(struct xdp_umem *umem)
+static inline u32 xsk_pool_get_headroom(struct xsk_buff_pool *pool)
{
- return XDP_PACKET_HEADROOM + umem->headroom;
+ return XDP_PACKET_HEADROOM + pool->headroom;
}
-static inline u32 xsk_umem_get_chunk_size(struct xdp_umem *umem)
+static inline u32 xsk_pool_get_chunk_size(struct xsk_buff_pool *pool)
{
- return umem->chunk_size;
+ return pool->chunk_size;
}
-static inline u32 xsk_umem_get_rx_frame_size(struct xdp_umem *umem)
+static inline u32 xsk_pool_get_rx_frame_size(struct xsk_buff_pool *pool)
{
- return xsk_umem_get_chunk_size(umem) - xsk_umem_get_headroom(umem);
+ return xsk_pool_get_chunk_size(pool) - xsk_pool_get_headroom(pool);
}
-static inline void xsk_buff_set_rxq_info(struct xdp_umem *umem,
+static inline void xsk_pool_set_rxq_info(struct xsk_buff_pool *pool,
struct xdp_rxq_info *rxq)
{
- xp_set_rxq_info(umem->pool, rxq);
+ xp_set_rxq_info(pool, rxq);
}
-static inline void xsk_buff_dma_unmap(struct xdp_umem *umem,
+static inline void xsk_pool_dma_unmap(struct xsk_buff_pool *pool,
unsigned long attrs)
{
- xp_dma_unmap(umem->pool, attrs);
+ xp_dma_unmap(pool, attrs);
}
-static inline int xsk_buff_dma_map(struct xdp_umem *umem, struct device *dev,
- unsigned long attrs)
+static inline int xsk_pool_dma_map(struct xsk_buff_pool *pool,
+ struct device *dev, unsigned long attrs)
{
- return xp_dma_map(umem->pool, dev, attrs, umem->pgs, umem->npgs);
+ struct xdp_umem *umem = pool->umem;
+
+ return xp_dma_map(pool, dev, attrs, umem->pgs, umem->npgs);
}
static inline dma_addr_t xsk_buff_xdp_get_dma(struct xdp_buff *xdp)
@@ -68,14 +71,14 @@ static inline dma_addr_t xsk_buff_xdp_get_frame_dma(struct xdp_buff *xdp)
return xp_get_frame_dma(xskb);
}
-static inline struct xdp_buff *xsk_buff_alloc(struct xdp_umem *umem)
+static inline struct xdp_buff *xsk_buff_alloc(struct xsk_buff_pool *pool)
{
- return xp_alloc(umem->pool);
+ return xp_alloc(pool);
}
-static inline bool xsk_buff_can_alloc(struct xdp_umem *umem, u32 count)
+static inline bool xsk_buff_can_alloc(struct xsk_buff_pool *pool, u32 count)
{
- return xp_can_alloc(umem->pool, count);
+ return xp_can_alloc(pool, count);
}
static inline void xsk_buff_free(struct xdp_buff *xdp)
@@ -85,100 +88,104 @@ static inline void xsk_buff_free(struct xdp_buff *xdp)
xp_free(xskb);
}
-static inline dma_addr_t xsk_buff_raw_get_dma(struct xdp_umem *umem, u64 addr)
+static inline dma_addr_t xsk_buff_raw_get_dma(struct xsk_buff_pool *pool,
+ u64 addr)
{
- return xp_raw_get_dma(umem->pool, addr);
+ return xp_raw_get_dma(pool, addr);
}
-static inline void *xsk_buff_raw_get_data(struct xdp_umem *umem, u64 addr)
+static inline void *xsk_buff_raw_get_data(struct xsk_buff_pool *pool, u64 addr)
{
- return xp_raw_get_data(umem->pool, addr);
+ return xp_raw_get_data(pool, addr);
}
-static inline void xsk_buff_dma_sync_for_cpu(struct xdp_buff *xdp)
+static inline void xsk_buff_dma_sync_for_cpu(struct xdp_buff *xdp, struct xsk_buff_pool *pool)
{
struct xdp_buff_xsk *xskb = container_of(xdp, struct xdp_buff_xsk, xdp);
+ if (!pool->dma_need_sync)
+ return;
+
xp_dma_sync_for_cpu(xskb);
}
-static inline void xsk_buff_raw_dma_sync_for_device(struct xdp_umem *umem,
+static inline void xsk_buff_raw_dma_sync_for_device(struct xsk_buff_pool *pool,
dma_addr_t dma,
size_t size)
{
- xp_dma_sync_for_device(umem->pool, dma, size);
+ xp_dma_sync_for_device(pool, dma, size);
}
#else
-static inline void xsk_umem_complete_tx(struct xdp_umem *umem, u32 nb_entries)
+static inline void xsk_tx_completed(struct xsk_buff_pool *pool, u32 nb_entries)
{
}
-static inline bool xsk_umem_consume_tx(struct xdp_umem *umem,
- struct xdp_desc *desc)
+static inline bool xsk_tx_peek_desc(struct xsk_buff_pool *pool,
+ struct xdp_desc *desc)
{
return false;
}
-static inline void xsk_umem_consume_tx_done(struct xdp_umem *umem)
+static inline void xsk_tx_release(struct xsk_buff_pool *pool)
{
}
-static inline struct xdp_umem *xdp_get_umem_from_qid(struct net_device *dev,
- u16 queue_id)
+static inline struct xsk_buff_pool *
+xsk_get_pool_from_qid(struct net_device *dev, u16 queue_id)
{
return NULL;
}
-static inline void xsk_set_rx_need_wakeup(struct xdp_umem *umem)
+static inline void xsk_set_rx_need_wakeup(struct xsk_buff_pool *pool)
{
}
-static inline void xsk_set_tx_need_wakeup(struct xdp_umem *umem)
+static inline void xsk_set_tx_need_wakeup(struct xsk_buff_pool *pool)
{
}
-static inline void xsk_clear_rx_need_wakeup(struct xdp_umem *umem)
+static inline void xsk_clear_rx_need_wakeup(struct xsk_buff_pool *pool)
{
}
-static inline void xsk_clear_tx_need_wakeup(struct xdp_umem *umem)
+static inline void xsk_clear_tx_need_wakeup(struct xsk_buff_pool *pool)
{
}
-static inline bool xsk_umem_uses_need_wakeup(struct xdp_umem *umem)
+static inline bool xsk_uses_need_wakeup(struct xsk_buff_pool *pool)
{
return false;
}
-static inline u32 xsk_umem_get_headroom(struct xdp_umem *umem)
+static inline u32 xsk_pool_get_headroom(struct xsk_buff_pool *pool)
{
return 0;
}
-static inline u32 xsk_umem_get_chunk_size(struct xdp_umem *umem)
+static inline u32 xsk_pool_get_chunk_size(struct xsk_buff_pool *pool)
{
return 0;
}
-static inline u32 xsk_umem_get_rx_frame_size(struct xdp_umem *umem)
+static inline u32 xsk_pool_get_rx_frame_size(struct xsk_buff_pool *pool)
{
return 0;
}
-static inline void xsk_buff_set_rxq_info(struct xdp_umem *umem,
+static inline void xsk_pool_set_rxq_info(struct xsk_buff_pool *pool,
struct xdp_rxq_info *rxq)
{
}
-static inline void xsk_buff_dma_unmap(struct xdp_umem *umem,
+static inline void xsk_pool_dma_unmap(struct xsk_buff_pool *pool,
unsigned long attrs)
{
}
-static inline int xsk_buff_dma_map(struct xdp_umem *umem, struct device *dev,
- unsigned long attrs)
+static inline int xsk_pool_dma_map(struct xsk_buff_pool *pool,
+ struct device *dev, unsigned long attrs)
{
return 0;
}
@@ -193,12 +200,12 @@ static inline dma_addr_t xsk_buff_xdp_get_frame_dma(struct xdp_buff *xdp)
return 0;
}
-static inline struct xdp_buff *xsk_buff_alloc(struct xdp_umem *umem)
+static inline struct xdp_buff *xsk_buff_alloc(struct xsk_buff_pool *pool)
{
return NULL;
}
-static inline bool xsk_buff_can_alloc(struct xdp_umem *umem, u32 count)
+static inline bool xsk_buff_can_alloc(struct xsk_buff_pool *pool, u32 count)
{
return false;
}
@@ -207,21 +214,22 @@ static inline void xsk_buff_free(struct xdp_buff *xdp)
{
}
-static inline dma_addr_t xsk_buff_raw_get_dma(struct xdp_umem *umem, u64 addr)
+static inline dma_addr_t xsk_buff_raw_get_dma(struct xsk_buff_pool *pool,
+ u64 addr)
{
return 0;
}
-static inline void *xsk_buff_raw_get_data(struct xdp_umem *umem, u64 addr)
+static inline void *xsk_buff_raw_get_data(struct xsk_buff_pool *pool, u64 addr)
{
return NULL;
}
-static inline void xsk_buff_dma_sync_for_cpu(struct xdp_buff *xdp)
+static inline void xsk_buff_dma_sync_for_cpu(struct xdp_buff *xdp, struct xsk_buff_pool *pool)
{
}
-static inline void xsk_buff_raw_dma_sync_for_device(struct xdp_umem *umem,
+static inline void xsk_buff_raw_dma_sync_for_device(struct xsk_buff_pool *pool,
dma_addr_t dma,
size_t size)
{
diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h
index 6842990..0140d08 100644
--- a/include/net/xsk_buff_pool.h
+++ b/include/net/xsk_buff_pool.h
@@ -13,6 +13,8 @@ struct xsk_buff_pool;
struct xdp_rxq_info;
struct xsk_queue;
struct xdp_desc;
+struct xdp_umem;
+struct xdp_sock;
struct device;
struct page;
@@ -26,34 +28,68 @@ struct xdp_buff_xsk {
struct list_head free_list_node;
};
+struct xsk_dma_map {
+ dma_addr_t *dma_pages;
+ struct device *dev;
+ struct net_device *netdev;
+ refcount_t users;
+ struct list_head list; /* Protected by the RTNL_LOCK */
+ u32 dma_pages_cnt;
+ bool dma_need_sync;
+};
+
struct xsk_buff_pool {
- struct xsk_queue *fq;
+ /* Members only used in the control path first. */
+ struct device *dev;
+ struct net_device *netdev;
+ struct list_head xsk_tx_list;
+ /* Protects modifications to the xsk_tx_list */
+ spinlock_t xsk_tx_list_lock;
+ refcount_t users;
+ struct xdp_umem *umem;
+ struct work_struct work;
struct list_head free_list;
+ u32 heads_cnt;
+ u16 queue_id;
+
+ /* Data path members as close to free_heads at the end as possible. */
+ struct xsk_queue *fq ____cacheline_aligned_in_smp;
+ struct xsk_queue *cq;
+ /* For performance reasons, each buff pool has its own array of dma_pages
+ * even when they are identical.
+ */
dma_addr_t *dma_pages;
struct xdp_buff_xsk *heads;
u64 chunk_mask;
u64 addrs_cnt;
u32 free_list_cnt;
u32 dma_pages_cnt;
- u32 heads_cnt;
u32 free_heads_cnt;
u32 headroom;
u32 chunk_size;
u32 frame_len;
+ u8 cached_need_wakeup;
+ bool uses_need_wakeup;
bool dma_need_sync;
bool unaligned;
void *addrs;
- struct device *dev;
struct xdp_buff_xsk *free_heads[];
};
/* AF_XDP core. */
-struct xsk_buff_pool *xp_create(struct page **pages, u32 nr_pages, u32 chunks,
- u32 chunk_size, u32 headroom, u64 size,
- bool unaligned);
-void xp_set_fq(struct xsk_buff_pool *pool, struct xsk_queue *fq);
+struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs,
+ struct xdp_umem *umem);
+int xp_assign_dev(struct xsk_buff_pool *pool, struct net_device *dev,
+ u16 queue_id, u16 flags);
+int xp_assign_dev_shared(struct xsk_buff_pool *pool, struct xdp_umem *umem,
+ struct net_device *dev, u16 queue_id);
void xp_destroy(struct xsk_buff_pool *pool);
void xp_release(struct xdp_buff_xsk *xskb);
+void xp_get_pool(struct xsk_buff_pool *pool);
+void xp_put_pool(struct xsk_buff_pool *pool);
+void xp_clear_dev(struct xsk_buff_pool *pool);
+void xp_add_xsk(struct xsk_buff_pool *pool, struct xdp_sock *xs);
+void xp_del_xsk(struct xsk_buff_pool *pool, struct xdp_sock *xs);
/* AF_XDP, and XDP core. */
void xp_free(struct xdp_buff_xsk *xskb);
@@ -80,9 +116,6 @@ static inline dma_addr_t xp_get_frame_dma(struct xdp_buff_xsk *xskb)
void xp_dma_sync_for_cpu_slow(struct xdp_buff_xsk *xskb);
static inline void xp_dma_sync_for_cpu(struct xdp_buff_xsk *xskb)
{
- if (!xskb->pool->dma_need_sync)
- return;
-
xp_dma_sync_for_cpu_slow(xskb);
}
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index b6238b2..8dda138 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -155,6 +155,7 @@ enum bpf_map_type {
BPF_MAP_TYPE_DEVMAP_HASH,
BPF_MAP_TYPE_STRUCT_OPS,
BPF_MAP_TYPE_RINGBUF,
+ BPF_MAP_TYPE_INODE_STORAGE,
};
/* Note that tracing related programs such as
@@ -345,6 +346,14 @@ enum bpf_link_type {
/* The verifier internal test flag. Behavior is undefined */
#define BPF_F_TEST_STATE_FREQ (1U << 3)
+/* If BPF_F_SLEEPABLE is used in BPF_PROG_LOAD command, the verifier will
+ * restrict map and helper usage for such programs. Sleepable BPF programs can
+ * only be attached to hooks where kernel execution context allows sleeping.
+ * Such programs are allowed to use helpers that may sleep like
+ * bpf_copy_from_user().
+ */
+#define BPF_F_SLEEPABLE (1U << 4)
+
/* When BPF ldimm64's insn[0].src_reg != 0 then this can have
* two extensions:
*
@@ -2807,7 +2816,7 @@ union bpf_attr {
*
* **-ERANGE** if resulting value was out of range.
*
- * void *bpf_sk_storage_get(struct bpf_map *map, struct bpf_sock *sk, void *value, u64 flags)
+ * void *bpf_sk_storage_get(struct bpf_map *map, void *sk, void *value, u64 flags)
* Description
* Get a bpf-local-storage from a *sk*.
*
@@ -2823,6 +2832,9 @@ union bpf_attr {
* "type". The bpf-local-storage "type" (i.e. the *map*) is
* searched against all bpf-local-storages residing at *sk*.
*
+ * *sk* is a kernel **struct sock** pointer for LSM program.
+ * *sk* is a **struct bpf_sock** pointer for other program types.
+ *
* An optional *flags* (**BPF_SK_STORAGE_GET_F_CREATE**) can be
* used such that a new bpf-local-storage will be
* created if one does not exist. *value* can be used
@@ -2835,7 +2847,7 @@ union bpf_attr {
* **NULL** if not found or there was an error in adding
* a new bpf-local-storage.
*
- * long bpf_sk_storage_delete(struct bpf_map *map, struct bpf_sock *sk)
+ * long bpf_sk_storage_delete(struct bpf_map *map, void *sk)
* Description
* Delete a bpf-local-storage from a *sk*.
* Return
@@ -3395,6 +3407,175 @@ union bpf_attr {
* A non-negative value equal to or less than *size* on success,
* or a negative error in case of failure.
*
+ * long bpf_load_hdr_opt(struct bpf_sock_ops *skops, void *searchby_res, u32 len, u64 flags)
+ * Description
+ * Load header option. Support reading a particular TCP header
+ * option for bpf program (BPF_PROG_TYPE_SOCK_OPS).
+ *
+ * If *flags* is 0, it will search the option from the
+ * sock_ops->skb_data. The comment in "struct bpf_sock_ops"
+ * has details on what skb_data contains under different
+ * sock_ops->op.
+ *
+ * The first byte of the *searchby_res* specifies the
+ * kind that it wants to search.
+ *
+ * If the searching kind is an experimental kind
+ * (i.e. 253 or 254 according to RFC6994). It also
+ * needs to specify the "magic" which is either
+ * 2 bytes or 4 bytes. It then also needs to
+ * specify the size of the magic by using
+ * the 2nd byte which is "kind-length" of a TCP
+ * header option and the "kind-length" also
+ * includes the first 2 bytes "kind" and "kind-length"
+ * itself as a normal TCP header option also does.
+ *
+ * For example, to search experimental kind 254 with
+ * 2 byte magic 0xeB9F, the searchby_res should be
+ * [ 254, 4, 0xeB, 0x9F, 0, 0, .... 0 ].
+ *
+ * To search for the standard window scale option (3),
+ * the searchby_res should be [ 3, 0, 0, .... 0 ].
+ * Note, kind-length must be 0 for regular option.
+ *
+ * Searching for No-Op (0) and End-of-Option-List (1) are
+ * not supported.
+ *
+ * *len* must be at least 2 bytes which is the minimal size
+ * of a header option.
+ *
+ * Supported flags:
+ * * **BPF_LOAD_HDR_OPT_TCP_SYN** to search from the
+ * saved_syn packet or the just-received syn packet.
+ *
+ * Return
+ * >0 when found, the header option is copied to *searchby_res*.
+ * The return value is the total length copied.
+ *
+ * **-EINVAL** If param is invalid
+ *
+ * **-ENOMSG** The option is not found
+ *
+ * **-ENOENT** No syn packet available when
+ * **BPF_LOAD_HDR_OPT_TCP_SYN** is used
+ *
+ * **-ENOSPC** Not enough space. Only *len* number of
+ * bytes are copied.
+ *
+ * **-EFAULT** Cannot parse the header options in the packet
+ *
+ * **-EPERM** This helper cannot be used under the
+ * current sock_ops->op.
+ *
+ * long bpf_store_hdr_opt(struct bpf_sock_ops *skops, const void *from, u32 len, u64 flags)
+ * Description
+ * Store header option. The data will be copied
+ * from buffer *from* with length *len* to the TCP header.
+ *
+ * The buffer *from* should have the whole option that
+ * includes the kind, kind-length, and the actual
+ * option data. The *len* must be at least kind-length
+ * long. The kind-length does not have to be 4 byte
+ * aligned. The kernel will take care of the padding
+ * and setting the 4 bytes aligned value to th->doff.
+ *
+ * This helper will check for duplicated option
+ * by searching the same option in the outgoing skb.
+ *
+ * This helper can only be called during
+ * BPF_SOCK_OPS_WRITE_HDR_OPT_CB.
+ *
+ * Return
+ * 0 on success, or negative error in case of failure:
+ *
+ * **-EINVAL** If param is invalid
+ *
+ * **-ENOSPC** Not enough space in the header.
+ * Nothing has been written
+ *
+ * **-EEXIST** The option has already existed
+ *
+ * **-EFAULT** Cannot parse the existing header options
+ *
+ * **-EPERM** This helper cannot be used under the
+ * current sock_ops->op.
+ *
+ * long bpf_reserve_hdr_opt(struct bpf_sock_ops *skops, u32 len, u64 flags)
+ * Description
+ * Reserve *len* bytes for the bpf header option. The
+ * space will be used by bpf_store_hdr_opt() later in
+ * BPF_SOCK_OPS_WRITE_HDR_OPT_CB.
+ *
+ * If bpf_reserve_hdr_opt() is called multiple times,
+ * the total number of bytes will be reserved.
+ *
+ * This helper can only be called during
+ * BPF_SOCK_OPS_HDR_OPT_LEN_CB.
+ *
+ * Return
+ * 0 on success, or negative error in case of failure:
+ *
+ * **-EINVAL** if param is invalid
+ *
+ * **-ENOSPC** Not enough space in the header.
+ *
+ * **-EPERM** This helper cannot be used under the
+ * current sock_ops->op.
+ *
+ * void *bpf_inode_storage_get(struct bpf_map *map, void *inode, void *value, u64 flags)
+ * Description
+ * Get a bpf_local_storage from an *inode*.
+ *
+ * Logically, it could be thought of as getting the value from
+ * a *map* with *inode* as the **key**. From this
+ * perspective, the usage is not much different from
+ * **bpf_map_lookup_elem**\ (*map*, **&**\ *inode*) except this
+ * helper enforces the key must be an inode and the map must also
+ * be a **BPF_MAP_TYPE_INODE_STORAGE**.
+ *
+ * Underneath, the value is stored locally at *inode* instead of
+ * the *map*. The *map* is used as the bpf-local-storage
+ * "type". The bpf-local-storage "type" (i.e. the *map*) is
+ * searched against all bpf_local_storage residing at *inode*.
+ *
+ * An optional *flags* (**BPF_LOCAL_STORAGE_GET_F_CREATE**) can be
+ * used such that a new bpf_local_storage will be
+ * created if one does not exist. *value* can be used
+ * together with **BPF_LOCAL_STORAGE_GET_F_CREATE** to specify
+ * the initial value of a bpf_local_storage. If *value* is
+ * **NULL**, the new bpf_local_storage will be zero initialized.
+ * Return
+ * A bpf_local_storage pointer is returned on success.
+ *
+ * **NULL** if not found or there was an error in adding
+ * a new bpf_local_storage.
+ *
+ * int bpf_inode_storage_delete(struct bpf_map *map, void *inode)
+ * Description
+ * Delete a bpf_local_storage from an *inode*.
+ * Return
+ * 0 on success.
+ *
+ * **-ENOENT** if the bpf_local_storage cannot be found.
+ *
+ * long bpf_d_path(struct path *path, char *buf, u32 sz)
+ * Description
+ * Return full path for given 'struct path' object, which
+ * needs to be the kernel BTF 'path' object. The path is
+ * returned in the provided buffer 'buf' of size 'sz' and
+ * is zero terminated.
+ *
+ * Return
+ * On success, the strictly positive length of the string,
+ * including the trailing NUL character. On error, a negative
+ * value.
+ *
+ * long bpf_copy_from_user(void *dst, u32 size, const void *user_ptr)
+ * Description
+ * Read *size* bytes from user space address *user_ptr* and store
+ * the data in *dst*. This is a wrapper of copy_from_user().
+ * Return
+ * 0 on success, or a negative error in case of failure.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@@ -3539,6 +3720,13 @@ union bpf_attr {
FN(skc_to_tcp_request_sock), \
FN(skc_to_udp6_sock), \
FN(get_task_stack), \
+ FN(load_hdr_opt), \
+ FN(store_hdr_opt), \
+ FN(reserve_hdr_opt), \
+ FN(inode_storage_get), \
+ FN(inode_storage_delete), \
+ FN(d_path), \
+ FN(copy_from_user), \
/* */
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
@@ -3648,9 +3836,13 @@ enum {
BPF_F_SYSCTL_BASE_NAME = (1ULL << 0),
};
-/* BPF_FUNC_sk_storage_get flags */
+/* BPF_FUNC_<kernel_obj>_storage_get flags */
enum {
- BPF_SK_STORAGE_GET_F_CREATE = (1ULL << 0),
+ BPF_LOCAL_STORAGE_GET_F_CREATE = (1ULL << 0),
+ /* BPF_SK_STORAGE_GET_F_CREATE is only kept for backward compatibility
+ * and BPF_LOCAL_STORAGE_GET_F_CREATE must be used instead.
+ */
+ BPF_SK_STORAGE_GET_F_CREATE = BPF_LOCAL_STORAGE_GET_F_CREATE,
};
/* BPF_FUNC_read_branch_records flags. */
@@ -4071,6 +4263,15 @@ struct bpf_link_info {
__u64 cgroup_id;
__u32 attach_type;
} cgroup;
+ struct {
+ __aligned_u64 target_name; /* in/out: target_name buffer ptr */
+ __u32 target_name_len; /* in/out: target_name buffer len */
+ union {
+ struct {
+ __u32 map_id;
+ } map;
+ };
+ } iter;
struct {
__u32 netns_ino;
__u32 attach_type;
@@ -4158,6 +4359,36 @@ struct bpf_sock_ops {
__u64 bytes_received;
__u64 bytes_acked;
__bpf_md_ptr(struct bpf_sock *, sk);
+ /* [skb_data, skb_data_end) covers the whole TCP header.
+ *
+ * BPF_SOCK_OPS_PARSE_HDR_OPT_CB: The packet received
+ * BPF_SOCK_OPS_HDR_OPT_LEN_CB: Not useful because the
+ * header has not been written.
+ * BPF_SOCK_OPS_WRITE_HDR_OPT_CB: The header and options have
+ * been written so far.
+ * BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB: The SYNACK that concludes
+ * the 3WHS.
+ * BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB: The ACK that concludes
+ * the 3WHS.
+ *
+ * bpf_load_hdr_opt() can also be used to read a particular option.
+ */
+ __bpf_md_ptr(void *, skb_data);
+ __bpf_md_ptr(void *, skb_data_end);
+ __u32 skb_len; /* The total length of a packet.
+ * It includes the header, options,
+ * and payload.
+ */
+ __u32 skb_tcp_flags; /* tcp_flags of the header. It provides
+ * an easy way to check for tcp_flags
+ * without parsing skb_data.
+ *
+ * In particular, the skb_tcp_flags
+ * will still be available in
+ * BPF_SOCK_OPS_HDR_OPT_LEN even though
+ * the outgoing header has not
+ * been written yet.
+ */
};
/* Definitions for bpf_sock_ops_cb_flags */
@@ -4166,8 +4397,51 @@ enum {
BPF_SOCK_OPS_RETRANS_CB_FLAG = (1<<1),
BPF_SOCK_OPS_STATE_CB_FLAG = (1<<2),
BPF_SOCK_OPS_RTT_CB_FLAG = (1<<3),
+ /* Call bpf for all received TCP headers. The bpf prog will be
+ * called under sock_ops->op == BPF_SOCK_OPS_PARSE_HDR_OPT_CB
+ *
+ * Please refer to the comment in BPF_SOCK_OPS_PARSE_HDR_OPT_CB
+ * for the header option related helpers that will be useful
+ * to the bpf programs.
+ *
+ * It could be used at the client/active side (i.e. connect() side)
+ * when the server told it that the server was in syncookie
+ * mode and required the active side to resend the bpf-written
+ * options. The active side can keep writing the bpf-options until
+ * it received a valid packet from the server side to confirm
+ * the earlier packet (and options) has been received. The later
+ * example patch is using it like this at the active side when the
+ * server is in syncookie mode.
+ *
+ * The bpf prog will usually turn this off in the common cases.
+ */
+ BPF_SOCK_OPS_PARSE_ALL_HDR_OPT_CB_FLAG = (1<<4),
+ /* Call bpf when kernel has received a header option that
+ * the kernel cannot handle. The bpf prog will be called under
+ * sock_ops->op == BPF_SOCK_OPS_PARSE_HDR_OPT_CB.
+ *
+ * Please refer to the comment in BPF_SOCK_OPS_PARSE_HDR_OPT_CB
+ * for the header option related helpers that will be useful
+ * to the bpf programs.
+ */
+ BPF_SOCK_OPS_PARSE_UNKNOWN_HDR_OPT_CB_FLAG = (1<<5),
+ /* Call bpf when the kernel is writing header options for the
+ * outgoing packet. The bpf prog will first be called
+ * to reserve space in a skb under
+ * sock_ops->op == BPF_SOCK_OPS_HDR_OPT_LEN_CB. Then
+ * the bpf prog will be called to write the header option(s)
+ * under sock_ops->op == BPF_SOCK_OPS_WRITE_HDR_OPT_CB.
+ *
+ * Please refer to the comment in BPF_SOCK_OPS_HDR_OPT_LEN_CB
+ * and BPF_SOCK_OPS_WRITE_HDR_OPT_CB for the header option
+ * related helpers that will be useful to the bpf programs.
+ *
+ * The kernel gets its chance to reserve space and write
+ * options first before the BPF program does.
+ */
+ BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG = (1<<6),
/* Mask of all currently supported cb flags */
- BPF_SOCK_OPS_ALL_CB_FLAGS = 0xF,
+ BPF_SOCK_OPS_ALL_CB_FLAGS = 0x7F,
};
/* List of known BPF sock_ops operators.
@@ -4223,6 +4497,63 @@ enum {
*/
BPF_SOCK_OPS_RTT_CB, /* Called on every RTT.
*/
+ BPF_SOCK_OPS_PARSE_HDR_OPT_CB, /* Parse the header option.
+ * It will be called to handle
+ * the packets received at
+ * an already established
+ * connection.
+ *
+ * sock_ops->skb_data:
+ * Referring to the received skb.
+ * It covers the TCP header only.
+ *
+ * bpf_load_hdr_opt() can also
+ * be used to search for a
+ * particular option.
+ */
+ BPF_SOCK_OPS_HDR_OPT_LEN_CB, /* Reserve space for writing the
+ * header option later in
+ * BPF_SOCK_OPS_WRITE_HDR_OPT_CB.
+ * Arg1: bool want_cookie. (in
+ * writing SYNACK only)
+ *
+ * sock_ops->skb_data:
+ * Not available because no header has
+ * been written yet.
+ *
+ * sock_ops->skb_tcp_flags:
+ * The tcp_flags of the
+ * outgoing skb. (e.g. SYN, ACK, FIN).
+ *
+ * bpf_reserve_hdr_opt() should
+ * be used to reserve space.
+ */
+ BPF_SOCK_OPS_WRITE_HDR_OPT_CB, /* Write the header options
+ * Arg1: bool want_cookie. (in
+ * writing SYNACK only)
+ *
+ * sock_ops->skb_data:
+ * Referring to the outgoing skb.
+ * It covers the TCP header
+ * that has already been written
+ * by the kernel and the
+ * earlier bpf-progs.
+ *
+ * sock_ops->skb_tcp_flags:
+ * The tcp_flags of the outgoing
+ * skb. (e.g. SYN, ACK, FIN).
+ *
+ * bpf_store_hdr_opt() should
+ * be used to write the
+ * option.
+ *
+ * bpf_load_hdr_opt() can also
+ * be used to search for a
+ * particular option that
+ * has already been written
+ * by the kernel or the
+ * earlier bpf-progs.
+ */
};
/* List of TCP states. There is a build check in net/ipv4/tcp.c to detect
@@ -4250,6 +4581,63 @@ enum {
enum {
TCP_BPF_IW = 1001, /* Set TCP initial congestion window */
TCP_BPF_SNDCWND_CLAMP = 1002, /* Set sndcwnd_clamp */
+ TCP_BPF_DELACK_MAX = 1003, /* Max delay ack in usecs */
+ TCP_BPF_RTO_MIN = 1004, /* Min delay ack in usecs */
+ /* Copy the SYN pkt to optval
+ *
+ * BPF_PROG_TYPE_SOCK_OPS only. It is similar to the
+ * bpf_getsockopt(TCP_SAVED_SYN) but it does not limit
+ * to only getting from the saved_syn. It can either get the
+ * syn packet from:
+ *
+ * 1. the just-received SYN packet (only available when writing the
+ * SYNACK). It will be useful when it is not necessary to
+ * save the SYN packet for latter use. It is also the only way
+ * to get the SYN during syncookie mode because the syn
+ * packet cannot be saved during syncookie.
+ *
+ * OR
+ *
+ * 2. the earlier saved syn which was done by
+ * bpf_setsockopt(TCP_SAVE_SYN).
+ *
+ * The bpf_getsockopt(TCP_BPF_SYN*) option will hide where the
+ * SYN packet is obtained.
+ *
+ * If the bpf-prog does not need the IP[46] header, the
+ * bpf-prog can avoid parsing the IP header by using
+ * TCP_BPF_SYN. Otherwise, the bpf-prog can get both
+ * IP[46] and TCP header by using TCP_BPF_SYN_IP.
+ *
+ * >0: Total number of bytes copied
+ * -ENOSPC: Not enough space in optval. Only optlen number of
+ * bytes is copied.
+ * -ENOENT: The SYN skb is not available now and the earlier SYN pkt
+ * is not saved by setsockopt(TCP_SAVE_SYN).
+ */
+ TCP_BPF_SYN = 1005, /* Copy the TCP header */
+ TCP_BPF_SYN_IP = 1006, /* Copy the IP[46] and TCP header */
+ TCP_BPF_SYN_MAC = 1007, /* Copy the MAC, IP[46], and TCP header */
+};
+
+enum {
+ BPF_LOAD_HDR_OPT_TCP_SYN = (1ULL << 0),
+};
+
+/* args[0] value during BPF_SOCK_OPS_HDR_OPT_LEN_CB and
+ * BPF_SOCK_OPS_WRITE_HDR_OPT_CB.
+ */
+enum {
+ BPF_WRITE_HDR_TCP_CURRENT_MSS = 1, /* Kernel is finding the
+ * total option spaces
+ * required for an established
+ * sk in order to calculate the
+ * MSS. No skb is actually
+ * sent.
+ */
+ BPF_WRITE_HDR_TCP_SYNACK_COOKIE = 2, /* Kernel is in syncookie mode
+ * when sending a SYN.
+ */
};
struct bpf_perf_event_value {
diff --git a/include/uapi/linux/gtp.h b/include/uapi/linux/gtp.h
index c7d6675..79f9191 100644
--- a/include/uapi/linux/gtp.h
+++ b/include/uapi/linux/gtp.h
@@ -2,6 +2,8 @@
#ifndef _UAPI_LINUX_GTP_H_
#define _UAPI_LINUX_GTP_H_
+#define GTP_GENL_MCGRP_NAME "gtp"
+
enum gtp_genl_cmds {
GTP_CMD_NEWPDP,
GTP_CMD_DELPDP,
diff --git a/include/uapi/linux/if_pppol2tp.h b/include/uapi/linux/if_pppol2tp.h
index 060b4d1..a910443 100644
--- a/include/uapi/linux/if_pppol2tp.h
+++ b/include/uapi/linux/if_pppol2tp.h
@@ -75,7 +75,7 @@ struct pppol2tpv3in6_addr {
};
/* Socket options:
- * DEBUG - bitmask of debug message categories
+ * DEBUG - bitmask of debug message categories (not used)
* SENDSEQ - 0 => don't send packets with sequence numbers
* 1 => send packets with sequence numbers
* RECVSEQ - 0 => receive packet sequence numbers are optional
diff --git a/include/uapi/linux/inet_diag.h b/include/uapi/linux/inet_diag.h
index 5ba122c..20ee93f 100644
--- a/include/uapi/linux/inet_diag.h
+++ b/include/uapi/linux/inet_diag.h
@@ -160,6 +160,7 @@ enum {
INET_DIAG_ULP_INFO,
INET_DIAG_SK_BPF_STORAGES,
INET_DIAG_CGROUP_ID,
+ INET_DIAG_SOCKOPT,
__INET_DIAG_MAX,
};
@@ -183,6 +184,23 @@ struct inet_diag_meminfo {
__u32 idiag_tmem;
};
+/* INET_DIAG_SOCKOPT */
+
+struct inet_diag_sockopt {
+ __u8 recverr:1,
+ is_icsk:1,
+ freebind:1,
+ hdrincl:1,
+ mc_loop:1,
+ transparent:1,
+ mc_all:1,
+ nodefrag:1;
+ __u8 bind_address_no_port:1,
+ recverr_rfc4884:1,
+ defer_connect:1,
+ unused:5;
+};
+
/* INET_DIAG_VEGASINFO */
struct tcpvegas_info {
diff --git a/include/uapi/linux/l2tp.h b/include/uapi/linux/l2tp.h
index 61158f5..88a0d32 100644
--- a/include/uapi/linux/l2tp.h
+++ b/include/uapi/linux/l2tp.h
@@ -108,7 +108,7 @@ enum {
L2TP_ATTR_VLAN_ID, /* u16 (not used) */
L2TP_ATTR_COOKIE, /* 0, 4 or 8 bytes */
L2TP_ATTR_PEER_COOKIE, /* 0, 4 or 8 bytes */
- L2TP_ATTR_DEBUG, /* u32, enum l2tp_debug_flags */
+ L2TP_ATTR_DEBUG, /* u32, enum l2tp_debug_flags (not used) */
L2TP_ATTR_RECV_SEQ, /* u8 */
L2TP_ATTR_SEND_SEQ, /* u8 */
L2TP_ATTR_LNS_MODE, /* u8 */
@@ -177,7 +177,9 @@ enum l2tp_seqmode {
};
/**
- * enum l2tp_debug_flags - debug message categories for L2TP tunnels/sessions
+ * enum l2tp_debug_flags - debug message categories for L2TP tunnels/sessions.
+ *
+ * Unused.
*
* @L2TP_MSG_DEBUG: verbose debug (if compiled in)
* @L2TP_MSG_CONTROL: userspace - kernel interface
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 631f3a99..0584e0d 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -252,9 +252,13 @@
* DOC: SAE authentication offload
*
* By setting @NL80211_EXT_FEATURE_SAE_OFFLOAD flag drivers can indicate they
- * support offloading SAE authentication for WPA3-Personal networks. In
- * %NL80211_CMD_CONNECT the password for SAE should be specified using
- * %NL80211_ATTR_SAE_PASSWORD.
+ * support offloading SAE authentication for WPA3-Personal networks in station
+ * mode. Similarly @NL80211_EXT_FEATURE_SAE_OFFLOAD_AP flag can be set by
+ * drivers indicating the offload support in AP mode.
+ *
+ * The password for SAE should be specified using %NL80211_ATTR_SAE_PASSWORD in
+ * %NL80211_CMD_CONNECT and %NL80211_CMD_START_AP for station and AP mode
+ * respectively.
*/
/**
@@ -647,13 +651,9 @@
* authentication/association or not receiving a response from the AP.
* Non-zero %NL80211_ATTR_STATUS_CODE value is indicated in that case as
* well to remain backwards compatible.
- * When establishing a security association, drivers that support 4 way
- * handshake offload should send %NL80211_CMD_PORT_AUTHORIZED event when
- * the 4 way handshake is completed successfully.
* @NL80211_CMD_ROAM: Notification indicating the card/driver roamed by itself.
- * When a security association was established with the new AP (e.g. if
- * the FT protocol was used for roaming or the driver completed the 4 way
- * handshake), this event should be followed by an
+ * When a security association was established on an 802.1X network using
+ * fast transition, this event should be followed by an
* %NL80211_CMD_PORT_AUTHORIZED event.
* @NL80211_CMD_DISCONNECT: drop a given connection; also used to notify
* userspace that a connection was dropped by the AP or due to other
@@ -1067,13 +1067,11 @@
* @NL80211_CMD_DEL_PMK: For offloaded 4-Way handshake, delete the previously
* configured PMK for the authenticator address identified by
* %NL80211_ATTR_MAC.
- * @NL80211_CMD_PORT_AUTHORIZED: An event that indicates that the 4 way
- * handshake was completed successfully by the driver. The BSSID is
- * specified with %NL80211_ATTR_MAC. Drivers that support 4 way handshake
- * offload should send this event after indicating 802.11 association with
- * %NL80211_CMD_CONNECT or %NL80211_CMD_ROAM. If the 4 way handshake failed
- * %NL80211_CMD_DISCONNECT should be indicated instead.
- *
+ * @NL80211_CMD_PORT_AUTHORIZED: An event that indicates an 802.1X FT roam was
+ * completed successfully. Drivers that support 4 way handshake offload
+ * should send this event after indicating 802.1X FT assocation with
+ * %NL80211_CMD_ROAM. If the 4 way handshake failed %NL80211_CMD_DISCONNECT
+ * should be indicated instead.
* @NL80211_CMD_CONTROL_PORT_FRAME: Control Port (e.g. PAE) frame TX request
* and RX notification. This command is used both as a request to transmit
* a control port frame and as a notification that a control port frame
@@ -2082,10 +2080,10 @@ enum nl80211_commands {
* operation).
* @NL80211_ATTR_CSA_IES: Nested set of attributes containing the IE information
* for the time while performing a channel switch.
- * @NL80211_ATTR_CSA_C_OFF_BEACON: An array of offsets (u16) to the channel
- * switch counters in the beacons tail (%NL80211_ATTR_BEACON_TAIL).
- * @NL80211_ATTR_CSA_C_OFF_PRESP: An array of offsets (u16) to the channel
- * switch counters in the probe response (%NL80211_ATTR_PROBE_RESP).
+ * @NL80211_ATTR_CNTDWN_OFFS_BEACON: An array of offsets (u16) to the channel
+ * switch or color change counters in the beacons tail (%NL80211_ATTR_BEACON_TAIL).
+ * @NL80211_ATTR_CNTDWN_OFFS_PRESP: An array of offsets (u16) to the channel
+ * switch or color change counters in the probe response (%NL80211_ATTR_PROBE_RESP).
*
* @NL80211_ATTR_RXMGMT_FLAGS: flags for nl80211_send_mgmt(), u32.
* As specified in the &enum nl80211_rxmgmt_flags.
@@ -2821,8 +2819,8 @@ enum nl80211_attrs {
NL80211_ATTR_CH_SWITCH_COUNT,
NL80211_ATTR_CH_SWITCH_BLOCK_TX,
NL80211_ATTR_CSA_IES,
- NL80211_ATTR_CSA_C_OFF_BEACON,
- NL80211_ATTR_CSA_C_OFF_PRESP,
+ NL80211_ATTR_CNTDWN_OFFS_BEACON,
+ NL80211_ATTR_CNTDWN_OFFS_PRESP,
NL80211_ATTR_RXMGMT_FLAGS,
@@ -3009,6 +3007,8 @@ enum nl80211_attrs {
#define NL80211_ATTR_MESH_PARAMS NL80211_ATTR_MESH_CONFIG
#define NL80211_ATTR_IFACE_SOCKET_OWNER NL80211_ATTR_SOCKET_OWNER
#define NL80211_ATTR_SAE_DATA NL80211_ATTR_AUTH_DATA
+#define NL80211_ATTR_CSA_C_OFF_BEACON NL80211_ATTR_CNTDWN_OFFS_BEACON
+#define NL80211_ATTR_CSA_C_OFF_PRESP NL80211_ATTR_CNTDWN_OFFS_PRESP
/*
* Allow user space programs to use #ifdef on new attributes by defining them
@@ -3187,6 +3187,18 @@ enum nl80211_he_gi {
};
/**
+ * enum nl80211_he_ltf - HE long training field
+ * @NL80211_RATE_INFO_HE_1xLTF: 3.2 usec
+ * @NL80211_RATE_INFO_HE_2xLTF: 6.4 usec
+ * @NL80211_RATE_INFO_HE_4xLTF: 12.8 usec
+ */
+enum nl80211_he_ltf {
+ NL80211_RATE_INFO_HE_1XLTF,
+ NL80211_RATE_INFO_HE_2XLTF,
+ NL80211_RATE_INFO_HE_4XLTF,
+};
+
+/**
* enum nl80211_he_ru_alloc - HE RU allocation values
* @NL80211_RATE_INFO_HE_RU_ALLOC_26: 26-tone RU allocation
* @NL80211_RATE_INFO_HE_RU_ALLOC_52: 52-tone RU allocation
@@ -4741,6 +4753,10 @@ enum nl80211_key_attributes {
* @NL80211_TXRATE_VHT: VHT rates allowed for TX rate selection,
* see &struct nl80211_txrate_vht
* @NL80211_TXRATE_GI: configure GI, see &enum nl80211_txrate_gi
+ * @NL80211_TXRATE_HE: HE rates allowed for TX rate selection,
+ * see &struct nl80211_txrate_he
+ * @NL80211_TXRATE_HE_GI: configure HE GI, 0.8us, 1.6us and 3.2us.
+ * @NL80211_TXRATE_HE_LTF: configure HE LTF, 1XLTF, 2XLTF and 4XLTF.
* @__NL80211_TXRATE_AFTER_LAST: internal
* @NL80211_TXRATE_MAX: highest TX rate attribute
*/
@@ -4750,6 +4766,9 @@ enum nl80211_tx_rate_attributes {
NL80211_TXRATE_HT,
NL80211_TXRATE_VHT,
NL80211_TXRATE_GI,
+ NL80211_TXRATE_HE,
+ NL80211_TXRATE_HE_GI,
+ NL80211_TXRATE_HE_LTF,
/* keep last */
__NL80211_TXRATE_AFTER_LAST,
@@ -4767,6 +4786,15 @@ struct nl80211_txrate_vht {
__u16 mcs[NL80211_VHT_NSS_MAX];
};
+#define NL80211_HE_NSS_MAX 8
+/**
+ * struct nl80211_txrate_he - HE MCS/NSS txrate bitmap
+ * @mcs: MCS bitmap table for each NSS (array index 0 for 1 stream, etc.)
+ */
+struct nl80211_txrate_he {
+ __u16 mcs[NL80211_HE_NSS_MAX];
+};
+
enum nl80211_txrate_gi {
NL80211_TXRATE_DEFAULT_GI,
NL80211_TXRATE_FORCE_SGI,
@@ -5821,6 +5849,9 @@ enum nl80211_feature_flags {
* handshake with PSK in AP mode (PSK is passed as part of the start AP
* command).
*
+ * @NL80211_EXT_FEATURE_SAE_OFFLOAD_AP: Device wants to do SAE authentication
+ * in AP mode (SAE password is passed as part of the start AP command).
+ *
* @NUM_NL80211_EXT_FEATURES: number of extended features.
* @MAX_NL80211_EXT_FEATURES: highest extended feature index.
*/
@@ -5878,6 +5909,7 @@ enum nl80211_ext_feature_index {
NL80211_EXT_FEATURE_CONTROL_PORT_OVER_NL80211_TX_STATUS,
NL80211_EXT_FEATURE_OPERATING_CHANNEL_VALIDATION,
NL80211_EXT_FEATURE_4WAY_HANDSHAKE_AP_PSK,
+ NL80211_EXT_FEATURE_SAE_OFFLOAD_AP,
/* add new features before the definition below */
NUM_NL80211_EXT_FEATURES,
diff --git a/init/Kconfig b/init/Kconfig
index d6a0b31..6ecc00e 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1691,6 +1691,7 @@
bool "Enable bpf() system call"
select BPF
select IRQ_WORK
+ select TASKS_TRACE_RCU
default n
help
Enable the bpf() system call that allows to manipulate eBPF
@@ -1710,6 +1711,8 @@
def_bool ARCH_WANT_DEFAULT_BPF_JIT || BPF_JIT_ALWAYS_ON
depends on HAVE_EBPF_JIT && BPF_JIT
+source "kernel/bpf/preload/Kconfig"
+
config USERFAULTFD
bool "Enable userfaultfd() system call"
depends on MMU
diff --git a/kernel/Makefile b/kernel/Makefile
index 9a20016..22b0760 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -12,7 +12,7 @@
notifier.o ksysfs.o cred.o reboot.o \
async.o range.o smpboot.o ucount.o regset.o
-obj-$(CONFIG_BPFILTER) += usermode_driver.o
+obj-$(CONFIG_USERMODE_DRIVER) += usermode_driver.o
obj-$(CONFIG_MODULES) += kmod.o
obj-$(CONFIG_MULTIUSER) += groups.o
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
index e6eb9c0..bdc8cd1 100644
--- a/kernel/bpf/Makefile
+++ b/kernel/bpf/Makefile
@@ -5,6 +5,7 @@
obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o bpf_iter.o map_iter.o task_iter.o prog_iter.o
obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o
obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o ringbuf.o
+obj-${CONFIG_BPF_LSM} += bpf_inode_storage.o
obj-$(CONFIG_BPF_SYSCALL) += disasm.o
obj-$(CONFIG_BPF_JIT) += trampoline.o
obj-$(CONFIG_BPF_SYSCALL) += btf.o
@@ -12,6 +13,7 @@
ifeq ($(CONFIG_NET),y)
obj-$(CONFIG_BPF_SYSCALL) += devmap.o
obj-$(CONFIG_BPF_SYSCALL) += cpumap.o
+obj-$(CONFIG_BPF_SYSCALL) += bpf_local_storage.o
obj-$(CONFIG_BPF_SYSCALL) += offload.o
obj-$(CONFIG_BPF_SYSCALL) += net_namespace.o
endif
@@ -29,3 +31,4 @@
obj-$(CONFIG_BPF_SYSCALL) += bpf_struct_ops.o
obj-${CONFIG_BPF_LSM} += bpf_lsm.o
endif
+obj-$(CONFIG_BPF_PRELOAD) += preload/
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index 8ff419b..e046fb7 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -10,6 +10,7 @@
#include <linux/filter.h>
#include <linux/perf_event.h>
#include <uapi/linux/btf.h>
+#include <linux/rcupdate_trace.h>
#include "map_in_map.h"
@@ -487,6 +488,13 @@ static int array_map_mmap(struct bpf_map *map, struct vm_area_struct *vma)
vma->vm_pgoff + pgoff);
}
+static bool array_map_meta_equal(const struct bpf_map *meta0,
+ const struct bpf_map *meta1)
+{
+ return meta0->max_entries == meta1->max_entries &&
+ bpf_map_meta_equal(meta0, meta1);
+}
+
struct bpf_iter_seq_array_map_info {
struct bpf_map *map;
void *percpu_value_buf;
@@ -625,6 +633,7 @@ static const struct bpf_iter_seq_info iter_seq_info = {
static int array_map_btf_id;
const struct bpf_map_ops array_map_ops = {
+ .map_meta_equal = array_map_meta_equal,
.map_alloc_check = array_map_alloc_check,
.map_alloc = array_map_alloc,
.map_free = array_map_free,
@@ -647,6 +656,7 @@ const struct bpf_map_ops array_map_ops = {
static int percpu_array_map_btf_id;
const struct bpf_map_ops percpu_array_map_ops = {
+ .map_meta_equal = bpf_map_meta_equal,
.map_alloc_check = array_map_alloc_check,
.map_alloc = array_map_alloc,
.map_free = array_map_free,
@@ -1003,6 +1013,11 @@ static void prog_array_map_free(struct bpf_map *map)
fd_array_map_free(map);
}
+/* prog_array->aux->{type,jited} is a runtime binding.
+ * Doing static check alone in the verifier is not enough.
+ * Thus, prog_array_map cannot be used as an inner_map
+ * and map_meta_equal is not implemented.
+ */
static int prog_array_map_btf_id;
const struct bpf_map_ops prog_array_map_ops = {
.map_alloc_check = fd_array_map_alloc_check,
@@ -1101,6 +1116,7 @@ static void perf_event_fd_array_release(struct bpf_map *map,
static int perf_event_array_map_btf_id;
const struct bpf_map_ops perf_event_array_map_ops = {
+ .map_meta_equal = bpf_map_meta_equal,
.map_alloc_check = fd_array_map_alloc_check,
.map_alloc = array_map_alloc,
.map_free = fd_array_map_free,
@@ -1137,6 +1153,7 @@ static void cgroup_fd_array_free(struct bpf_map *map)
static int cgroup_array_map_btf_id;
const struct bpf_map_ops cgroup_array_map_ops = {
+ .map_meta_equal = bpf_map_meta_equal,
.map_alloc_check = fd_array_map_alloc_check,
.map_alloc = array_map_alloc,
.map_free = cgroup_fd_array_free,
diff --git a/kernel/bpf/bpf_inode_storage.c b/kernel/bpf/bpf_inode_storage.c
new file mode 100644
index 0000000..75be027
--- /dev/null
+++ b/kernel/bpf/bpf_inode_storage.c
@@ -0,0 +1,274 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2019 Facebook
+ * Copyright 2020 Google LLC.
+ */
+
+#include <linux/rculist.h>
+#include <linux/list.h>
+#include <linux/hash.h>
+#include <linux/types.h>
+#include <linux/spinlock.h>
+#include <linux/bpf.h>
+#include <linux/bpf_local_storage.h>
+#include <net/sock.h>
+#include <uapi/linux/sock_diag.h>
+#include <uapi/linux/btf.h>
+#include <linux/bpf_lsm.h>
+#include <linux/btf_ids.h>
+#include <linux/fdtable.h>
+
+DEFINE_BPF_STORAGE_CACHE(inode_cache);
+
+static struct bpf_local_storage __rcu **
+inode_storage_ptr(void *owner)
+{
+ struct inode *inode = owner;
+ struct bpf_storage_blob *bsb;
+
+ bsb = bpf_inode(inode);
+ if (!bsb)
+ return NULL;
+ return &bsb->storage;
+}
+
+static struct bpf_local_storage_data *inode_storage_lookup(struct inode *inode,
+ struct bpf_map *map,
+ bool cacheit_lockit)
+{
+ struct bpf_local_storage *inode_storage;
+ struct bpf_local_storage_map *smap;
+ struct bpf_storage_blob *bsb;
+
+ bsb = bpf_inode(inode);
+ if (!bsb)
+ return NULL;
+
+ inode_storage = rcu_dereference(bsb->storage);
+ if (!inode_storage)
+ return NULL;
+
+ smap = (struct bpf_local_storage_map *)map;
+ return bpf_local_storage_lookup(inode_storage, smap, cacheit_lockit);
+}
+
+void bpf_inode_storage_free(struct inode *inode)
+{
+ struct bpf_local_storage_elem *selem;
+ struct bpf_local_storage *local_storage;
+ bool free_inode_storage = false;
+ struct bpf_storage_blob *bsb;
+ struct hlist_node *n;
+
+ bsb = bpf_inode(inode);
+ if (!bsb)
+ return;
+
+ rcu_read_lock();
+
+ local_storage = rcu_dereference(bsb->storage);
+ if (!local_storage) {
+ rcu_read_unlock();
+ return;
+ }
+
+ /* Netiher the bpf_prog nor the bpf-map's syscall
+ * could be modifying the local_storage->list now.
+ * Thus, no elem can be added-to or deleted-from the
+ * local_storage->list by the bpf_prog or by the bpf-map's syscall.
+ *
+ * It is racing with bpf_local_storage_map_free() alone
+ * when unlinking elem from the local_storage->list and
+ * the map's bucket->list.
+ */
+ raw_spin_lock_bh(&local_storage->lock);
+ hlist_for_each_entry_safe(selem, n, &local_storage->list, snode) {
+ /* Always unlink from map before unlinking from
+ * local_storage.
+ */
+ bpf_selem_unlink_map(selem);
+ free_inode_storage = bpf_selem_unlink_storage_nolock(
+ local_storage, selem, false);
+ }
+ raw_spin_unlock_bh(&local_storage->lock);
+ rcu_read_unlock();
+
+ /* free_inoode_storage should always be true as long as
+ * local_storage->list was non-empty.
+ */
+ if (free_inode_storage)
+ kfree_rcu(local_storage, rcu);
+}
+
+static void *bpf_fd_inode_storage_lookup_elem(struct bpf_map *map, void *key)
+{
+ struct bpf_local_storage_data *sdata;
+ struct file *f;
+ int fd;
+
+ fd = *(int *)key;
+ f = fget_raw(fd);
+ if (!f)
+ return NULL;
+
+ sdata = inode_storage_lookup(f->f_inode, map, true);
+ fput(f);
+ return sdata ? sdata->data : NULL;
+}
+
+static int bpf_fd_inode_storage_update_elem(struct bpf_map *map, void *key,
+ void *value, u64 map_flags)
+{
+ struct bpf_local_storage_data *sdata;
+ struct file *f;
+ int fd;
+
+ fd = *(int *)key;
+ f = fget_raw(fd);
+ if (!f || !inode_storage_ptr(f->f_inode))
+ return -EBADF;
+
+ sdata = bpf_local_storage_update(f->f_inode,
+ (struct bpf_local_storage_map *)map,
+ value, map_flags);
+ fput(f);
+ return PTR_ERR_OR_ZERO(sdata);
+}
+
+static int inode_storage_delete(struct inode *inode, struct bpf_map *map)
+{
+ struct bpf_local_storage_data *sdata;
+
+ sdata = inode_storage_lookup(inode, map, false);
+ if (!sdata)
+ return -ENOENT;
+
+ bpf_selem_unlink(SELEM(sdata));
+
+ return 0;
+}
+
+static int bpf_fd_inode_storage_delete_elem(struct bpf_map *map, void *key)
+{
+ struct file *f;
+ int fd, err;
+
+ fd = *(int *)key;
+ f = fget_raw(fd);
+ if (!f)
+ return -EBADF;
+
+ err = inode_storage_delete(f->f_inode, map);
+ fput(f);
+ return err;
+}
+
+BPF_CALL_4(bpf_inode_storage_get, struct bpf_map *, map, struct inode *, inode,
+ void *, value, u64, flags)
+{
+ struct bpf_local_storage_data *sdata;
+
+ if (flags & ~(BPF_LOCAL_STORAGE_GET_F_CREATE))
+ return (unsigned long)NULL;
+
+ /* explicitly check that the inode_storage_ptr is not
+ * NULL as inode_storage_lookup returns NULL in this case and
+ * bpf_local_storage_update expects the owner to have a
+ * valid storage pointer.
+ */
+ if (!inode_storage_ptr(inode))
+ return (unsigned long)NULL;
+
+ sdata = inode_storage_lookup(inode, map, true);
+ if (sdata)
+ return (unsigned long)sdata->data;
+
+ /* This helper must only called from where the inode is gurranteed
+ * to have a refcount and cannot be freed.
+ */
+ if (flags & BPF_LOCAL_STORAGE_GET_F_CREATE) {
+ sdata = bpf_local_storage_update(
+ inode, (struct bpf_local_storage_map *)map, value,
+ BPF_NOEXIST);
+ return IS_ERR(sdata) ? (unsigned long)NULL :
+ (unsigned long)sdata->data;
+ }
+
+ return (unsigned long)NULL;
+}
+
+BPF_CALL_2(bpf_inode_storage_delete,
+ struct bpf_map *, map, struct inode *, inode)
+{
+ /* This helper must only called from where the inode is gurranteed
+ * to have a refcount and cannot be freed.
+ */
+ return inode_storage_delete(inode, map);
+}
+
+static int notsupp_get_next_key(struct bpf_map *map, void *key,
+ void *next_key)
+{
+ return -ENOTSUPP;
+}
+
+static struct bpf_map *inode_storage_map_alloc(union bpf_attr *attr)
+{
+ struct bpf_local_storage_map *smap;
+
+ smap = bpf_local_storage_map_alloc(attr);
+ if (IS_ERR(smap))
+ return ERR_CAST(smap);
+
+ smap->cache_idx = bpf_local_storage_cache_idx_get(&inode_cache);
+ return &smap->map;
+}
+
+static void inode_storage_map_free(struct bpf_map *map)
+{
+ struct bpf_local_storage_map *smap;
+
+ smap = (struct bpf_local_storage_map *)map;
+ bpf_local_storage_cache_idx_free(&inode_cache, smap->cache_idx);
+ bpf_local_storage_map_free(smap);
+}
+
+static int inode_storage_map_btf_id;
+const struct bpf_map_ops inode_storage_map_ops = {
+ .map_meta_equal = bpf_map_meta_equal,
+ .map_alloc_check = bpf_local_storage_map_alloc_check,
+ .map_alloc = inode_storage_map_alloc,
+ .map_free = inode_storage_map_free,
+ .map_get_next_key = notsupp_get_next_key,
+ .map_lookup_elem = bpf_fd_inode_storage_lookup_elem,
+ .map_update_elem = bpf_fd_inode_storage_update_elem,
+ .map_delete_elem = bpf_fd_inode_storage_delete_elem,
+ .map_check_btf = bpf_local_storage_map_check_btf,
+ .map_btf_name = "bpf_local_storage_map",
+ .map_btf_id = &inode_storage_map_btf_id,
+ .map_owner_storage_ptr = inode_storage_ptr,
+};
+
+BTF_ID_LIST(bpf_inode_storage_btf_ids)
+BTF_ID_UNUSED
+BTF_ID(struct, inode)
+
+const struct bpf_func_proto bpf_inode_storage_get_proto = {
+ .func = bpf_inode_storage_get,
+ .gpl_only = false,
+ .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL,
+ .arg1_type = ARG_CONST_MAP_PTR,
+ .arg2_type = ARG_PTR_TO_BTF_ID,
+ .arg3_type = ARG_PTR_TO_MAP_VALUE_OR_NULL,
+ .arg4_type = ARG_ANYTHING,
+ .btf_id = bpf_inode_storage_btf_ids,
+};
+
+const struct bpf_func_proto bpf_inode_storage_delete_proto = {
+ .func = bpf_inode_storage_delete,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_CONST_MAP_PTR,
+ .arg2_type = ARG_PTR_TO_BTF_ID,
+ .btf_id = bpf_inode_storage_btf_ids,
+};
diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c
index 8faa2ce..30833bb 100644
--- a/kernel/bpf/bpf_iter.c
+++ b/kernel/bpf/bpf_iter.c
@@ -390,10 +390,68 @@ static int bpf_iter_link_replace(struct bpf_link *link,
return ret;
}
+static void bpf_iter_link_show_fdinfo(const struct bpf_link *link,
+ struct seq_file *seq)
+{
+ struct bpf_iter_link *iter_link =
+ container_of(link, struct bpf_iter_link, link);
+ bpf_iter_show_fdinfo_t show_fdinfo;
+
+ seq_printf(seq,
+ "target_name:\t%s\n",
+ iter_link->tinfo->reg_info->target);
+
+ show_fdinfo = iter_link->tinfo->reg_info->show_fdinfo;
+ if (show_fdinfo)
+ show_fdinfo(&iter_link->aux, seq);
+}
+
+static int bpf_iter_link_fill_link_info(const struct bpf_link *link,
+ struct bpf_link_info *info)
+{
+ struct bpf_iter_link *iter_link =
+ container_of(link, struct bpf_iter_link, link);
+ char __user *ubuf = u64_to_user_ptr(info->iter.target_name);
+ bpf_iter_fill_link_info_t fill_link_info;
+ u32 ulen = info->iter.target_name_len;
+ const char *target_name;
+ u32 target_len;
+
+ if (!ulen ^ !ubuf)
+ return -EINVAL;
+
+ target_name = iter_link->tinfo->reg_info->target;
+ target_len = strlen(target_name);
+ info->iter.target_name_len = target_len + 1;
+
+ if (ubuf) {
+ if (ulen >= target_len + 1) {
+ if (copy_to_user(ubuf, target_name, target_len + 1))
+ return -EFAULT;
+ } else {
+ char zero = '\0';
+
+ if (copy_to_user(ubuf, target_name, ulen - 1))
+ return -EFAULT;
+ if (put_user(zero, ubuf + ulen - 1))
+ return -EFAULT;
+ return -ENOSPC;
+ }
+ }
+
+ fill_link_info = iter_link->tinfo->reg_info->fill_link_info;
+ if (fill_link_info)
+ return fill_link_info(&iter_link->aux, info);
+
+ return 0;
+}
+
static const struct bpf_link_ops bpf_iter_link_lops = {
.release = bpf_iter_link_release,
.dealloc = bpf_iter_link_dealloc,
.update_prog = bpf_iter_link_replace,
+ .show_fdinfo = bpf_iter_link_show_fdinfo,
+ .fill_link_info = bpf_iter_link_fill_link_info,
};
bool bpf_link_is_iter(struct bpf_link *link)
diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c
new file mode 100644
index 0000000..ffa7d11
--- /dev/null
+++ b/kernel/bpf/bpf_local_storage.c
@@ -0,0 +1,600 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019 Facebook */
+#include <linux/rculist.h>
+#include <linux/list.h>
+#include <linux/hash.h>
+#include <linux/types.h>
+#include <linux/spinlock.h>
+#include <linux/bpf.h>
+#include <linux/btf_ids.h>
+#include <linux/bpf_local_storage.h>
+#include <net/sock.h>
+#include <uapi/linux/sock_diag.h>
+#include <uapi/linux/btf.h>
+
+#define BPF_LOCAL_STORAGE_CREATE_FLAG_MASK (BPF_F_NO_PREALLOC | BPF_F_CLONE)
+
+static struct bpf_local_storage_map_bucket *
+select_bucket(struct bpf_local_storage_map *smap,
+ struct bpf_local_storage_elem *selem)
+{
+ return &smap->buckets[hash_ptr(selem, smap->bucket_log)];
+}
+
+static int mem_charge(struct bpf_local_storage_map *smap, void *owner, u32 size)
+{
+ struct bpf_map *map = &smap->map;
+
+ if (!map->ops->map_local_storage_charge)
+ return 0;
+
+ return map->ops->map_local_storage_charge(smap, owner, size);
+}
+
+static void mem_uncharge(struct bpf_local_storage_map *smap, void *owner,
+ u32 size)
+{
+ struct bpf_map *map = &smap->map;
+
+ if (map->ops->map_local_storage_uncharge)
+ map->ops->map_local_storage_uncharge(smap, owner, size);
+}
+
+static struct bpf_local_storage __rcu **
+owner_storage(struct bpf_local_storage_map *smap, void *owner)
+{
+ struct bpf_map *map = &smap->map;
+
+ return map->ops->map_owner_storage_ptr(owner);
+}
+
+static bool selem_linked_to_storage(const struct bpf_local_storage_elem *selem)
+{
+ return !hlist_unhashed(&selem->snode);
+}
+
+static bool selem_linked_to_map(const struct bpf_local_storage_elem *selem)
+{
+ return !hlist_unhashed(&selem->map_node);
+}
+
+struct bpf_local_storage_elem *
+bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner,
+ void *value, bool charge_mem)
+{
+ struct bpf_local_storage_elem *selem;
+
+ if (charge_mem && mem_charge(smap, owner, smap->elem_size))
+ return NULL;
+
+ selem = kzalloc(smap->elem_size, GFP_ATOMIC | __GFP_NOWARN);
+ if (selem) {
+ if (value)
+ memcpy(SDATA(selem)->data, value, smap->map.value_size);
+ return selem;
+ }
+
+ if (charge_mem)
+ mem_uncharge(smap, owner, smap->elem_size);
+
+ return NULL;
+}
+
+/* local_storage->lock must be held and selem->local_storage == local_storage.
+ * The caller must ensure selem->smap is still valid to be
+ * dereferenced for its smap->elem_size and smap->cache_idx.
+ */
+bool bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_storage,
+ struct bpf_local_storage_elem *selem,
+ bool uncharge_mem)
+{
+ struct bpf_local_storage_map *smap;
+ bool free_local_storage;
+ void *owner;
+
+ smap = rcu_dereference(SDATA(selem)->smap);
+ owner = local_storage->owner;
+
+ /* All uncharging on the owner must be done first.
+ * The owner may be freed once the last selem is unlinked
+ * from local_storage.
+ */
+ if (uncharge_mem)
+ mem_uncharge(smap, owner, smap->elem_size);
+
+ free_local_storage = hlist_is_singular_node(&selem->snode,
+ &local_storage->list);
+ if (free_local_storage) {
+ mem_uncharge(smap, owner, sizeof(struct bpf_local_storage));
+ local_storage->owner = NULL;
+
+ /* After this RCU_INIT, owner may be freed and cannot be used */
+ RCU_INIT_POINTER(*owner_storage(smap, owner), NULL);
+
+ /* local_storage is not freed now. local_storage->lock is
+ * still held and raw_spin_unlock_bh(&local_storage->lock)
+ * will be done by the caller.
+ *
+ * Although the unlock will be done under
+ * rcu_read_lock(), it is more intutivie to
+ * read if kfree_rcu(local_storage, rcu) is done
+ * after the raw_spin_unlock_bh(&local_storage->lock).
+ *
+ * Hence, a "bool free_local_storage" is returned
+ * to the caller which then calls the kfree_rcu()
+ * after unlock.
+ */
+ }
+ hlist_del_init_rcu(&selem->snode);
+ if (rcu_access_pointer(local_storage->cache[smap->cache_idx]) ==
+ SDATA(selem))
+ RCU_INIT_POINTER(local_storage->cache[smap->cache_idx], NULL);
+
+ kfree_rcu(selem, rcu);
+
+ return free_local_storage;
+}
+
+static void __bpf_selem_unlink_storage(struct bpf_local_storage_elem *selem)
+{
+ struct bpf_local_storage *local_storage;
+ bool free_local_storage = false;
+
+ if (unlikely(!selem_linked_to_storage(selem)))
+ /* selem has already been unlinked from sk */
+ return;
+
+ local_storage = rcu_dereference(selem->local_storage);
+ raw_spin_lock_bh(&local_storage->lock);
+ if (likely(selem_linked_to_storage(selem)))
+ free_local_storage = bpf_selem_unlink_storage_nolock(
+ local_storage, selem, true);
+ raw_spin_unlock_bh(&local_storage->lock);
+
+ if (free_local_storage)
+ kfree_rcu(local_storage, rcu);
+}
+
+void bpf_selem_link_storage_nolock(struct bpf_local_storage *local_storage,
+ struct bpf_local_storage_elem *selem)
+{
+ RCU_INIT_POINTER(selem->local_storage, local_storage);
+ hlist_add_head(&selem->snode, &local_storage->list);
+}
+
+void bpf_selem_unlink_map(struct bpf_local_storage_elem *selem)
+{
+ struct bpf_local_storage_map *smap;
+ struct bpf_local_storage_map_bucket *b;
+
+ if (unlikely(!selem_linked_to_map(selem)))
+ /* selem has already be unlinked from smap */
+ return;
+
+ smap = rcu_dereference(SDATA(selem)->smap);
+ b = select_bucket(smap, selem);
+ raw_spin_lock_bh(&b->lock);
+ if (likely(selem_linked_to_map(selem)))
+ hlist_del_init_rcu(&selem->map_node);
+ raw_spin_unlock_bh(&b->lock);
+}
+
+void bpf_selem_link_map(struct bpf_local_storage_map *smap,
+ struct bpf_local_storage_elem *selem)
+{
+ struct bpf_local_storage_map_bucket *b = select_bucket(smap, selem);
+
+ raw_spin_lock_bh(&b->lock);
+ RCU_INIT_POINTER(SDATA(selem)->smap, smap);
+ hlist_add_head_rcu(&selem->map_node, &b->list);
+ raw_spin_unlock_bh(&b->lock);
+}
+
+void bpf_selem_unlink(struct bpf_local_storage_elem *selem)
+{
+ /* Always unlink from map before unlinking from local_storage
+ * because selem will be freed after successfully unlinked from
+ * the local_storage.
+ */
+ bpf_selem_unlink_map(selem);
+ __bpf_selem_unlink_storage(selem);
+}
+
+struct bpf_local_storage_data *
+bpf_local_storage_lookup(struct bpf_local_storage *local_storage,
+ struct bpf_local_storage_map *smap,
+ bool cacheit_lockit)
+{
+ struct bpf_local_storage_data *sdata;
+ struct bpf_local_storage_elem *selem;
+
+ /* Fast path (cache hit) */
+ sdata = rcu_dereference(local_storage->cache[smap->cache_idx]);
+ if (sdata && rcu_access_pointer(sdata->smap) == smap)
+ return sdata;
+
+ /* Slow path (cache miss) */
+ hlist_for_each_entry_rcu(selem, &local_storage->list, snode)
+ if (rcu_access_pointer(SDATA(selem)->smap) == smap)
+ break;
+
+ if (!selem)
+ return NULL;
+
+ sdata = SDATA(selem);
+ if (cacheit_lockit) {
+ /* spinlock is needed to avoid racing with the
+ * parallel delete. Otherwise, publishing an already
+ * deleted sdata to the cache will become a use-after-free
+ * problem in the next bpf_local_storage_lookup().
+ */
+ raw_spin_lock_bh(&local_storage->lock);
+ if (selem_linked_to_storage(selem))
+ rcu_assign_pointer(local_storage->cache[smap->cache_idx],
+ sdata);
+ raw_spin_unlock_bh(&local_storage->lock);
+ }
+
+ return sdata;
+}
+
+static int check_flags(const struct bpf_local_storage_data *old_sdata,
+ u64 map_flags)
+{
+ if (old_sdata && (map_flags & ~BPF_F_LOCK) == BPF_NOEXIST)
+ /* elem already exists */
+ return -EEXIST;
+
+ if (!old_sdata && (map_flags & ~BPF_F_LOCK) == BPF_EXIST)
+ /* elem doesn't exist, cannot update it */
+ return -ENOENT;
+
+ return 0;
+}
+
+int bpf_local_storage_alloc(void *owner,
+ struct bpf_local_storage_map *smap,
+ struct bpf_local_storage_elem *first_selem)
+{
+ struct bpf_local_storage *prev_storage, *storage;
+ struct bpf_local_storage **owner_storage_ptr;
+ int err;
+
+ err = mem_charge(smap, owner, sizeof(*storage));
+ if (err)
+ return err;
+
+ storage = kzalloc(sizeof(*storage), GFP_ATOMIC | __GFP_NOWARN);
+ if (!storage) {
+ err = -ENOMEM;
+ goto uncharge;
+ }
+
+ INIT_HLIST_HEAD(&storage->list);
+ raw_spin_lock_init(&storage->lock);
+ storage->owner = owner;
+
+ bpf_selem_link_storage_nolock(storage, first_selem);
+ bpf_selem_link_map(smap, first_selem);
+
+ owner_storage_ptr =
+ (struct bpf_local_storage **)owner_storage(smap, owner);
+ /* Publish storage to the owner.
+ * Instead of using any lock of the kernel object (i.e. owner),
+ * cmpxchg will work with any kernel object regardless what
+ * the running context is, bh, irq...etc.
+ *
+ * From now on, the owner->storage pointer (e.g. sk->sk_bpf_storage)
+ * is protected by the storage->lock. Hence, when freeing
+ * the owner->storage, the storage->lock must be held before
+ * setting owner->storage ptr to NULL.
+ */
+ prev_storage = cmpxchg(owner_storage_ptr, NULL, storage);
+ if (unlikely(prev_storage)) {
+ bpf_selem_unlink_map(first_selem);
+ err = -EAGAIN;
+ goto uncharge;
+
+ /* Note that even first_selem was linked to smap's
+ * bucket->list, first_selem can be freed immediately
+ * (instead of kfree_rcu) because
+ * bpf_local_storage_map_free() does a
+ * synchronize_rcu() before walking the bucket->list.
+ * Hence, no one is accessing selem from the
+ * bucket->list under rcu_read_lock().
+ */
+ }
+
+ return 0;
+
+uncharge:
+ kfree(storage);
+ mem_uncharge(smap, owner, sizeof(*storage));
+ return err;
+}
+
+/* sk cannot be going away because it is linking new elem
+ * to sk->sk_bpf_storage. (i.e. sk->sk_refcnt cannot be 0).
+ * Otherwise, it will become a leak (and other memory issues
+ * during map destruction).
+ */
+struct bpf_local_storage_data *
+bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
+ void *value, u64 map_flags)
+{
+ struct bpf_local_storage_data *old_sdata = NULL;
+ struct bpf_local_storage_elem *selem;
+ struct bpf_local_storage *local_storage;
+ int err;
+
+ /* BPF_EXIST and BPF_NOEXIST cannot be both set */
+ if (unlikely((map_flags & ~BPF_F_LOCK) > BPF_EXIST) ||
+ /* BPF_F_LOCK can only be used in a value with spin_lock */
+ unlikely((map_flags & BPF_F_LOCK) &&
+ !map_value_has_spin_lock(&smap->map)))
+ return ERR_PTR(-EINVAL);
+
+ local_storage = rcu_dereference(*owner_storage(smap, owner));
+ if (!local_storage || hlist_empty(&local_storage->list)) {
+ /* Very first elem for the owner */
+ err = check_flags(NULL, map_flags);
+ if (err)
+ return ERR_PTR(err);
+
+ selem = bpf_selem_alloc(smap, owner, value, true);
+ if (!selem)
+ return ERR_PTR(-ENOMEM);
+
+ err = bpf_local_storage_alloc(owner, smap, selem);
+ if (err) {
+ kfree(selem);
+ mem_uncharge(smap, owner, smap->elem_size);
+ return ERR_PTR(err);
+ }
+
+ return SDATA(selem);
+ }
+
+ if ((map_flags & BPF_F_LOCK) && !(map_flags & BPF_NOEXIST)) {
+ /* Hoping to find an old_sdata to do inline update
+ * such that it can avoid taking the local_storage->lock
+ * and changing the lists.
+ */
+ old_sdata =
+ bpf_local_storage_lookup(local_storage, smap, false);
+ err = check_flags(old_sdata, map_flags);
+ if (err)
+ return ERR_PTR(err);
+ if (old_sdata && selem_linked_to_storage(SELEM(old_sdata))) {
+ copy_map_value_locked(&smap->map, old_sdata->data,
+ value, false);
+ return old_sdata;
+ }
+ }
+
+ raw_spin_lock_bh(&local_storage->lock);
+
+ /* Recheck local_storage->list under local_storage->lock */
+ if (unlikely(hlist_empty(&local_storage->list))) {
+ /* A parallel del is happening and local_storage is going
+ * away. It has just been checked before, so very
+ * unlikely. Return instead of retry to keep things
+ * simple.
+ */
+ err = -EAGAIN;
+ goto unlock_err;
+ }
+
+ old_sdata = bpf_local_storage_lookup(local_storage, smap, false);
+ err = check_flags(old_sdata, map_flags);
+ if (err)
+ goto unlock_err;
+
+ if (old_sdata && (map_flags & BPF_F_LOCK)) {
+ copy_map_value_locked(&smap->map, old_sdata->data, value,
+ false);
+ selem = SELEM(old_sdata);
+ goto unlock;
+ }
+
+ /* local_storage->lock is held. Hence, we are sure
+ * we can unlink and uncharge the old_sdata successfully
+ * later. Hence, instead of charging the new selem now
+ * and then uncharge the old selem later (which may cause
+ * a potential but unnecessary charge failure), avoid taking
+ * a charge at all here (the "!old_sdata" check) and the
+ * old_sdata will not be uncharged later during
+ * bpf_selem_unlink_storage_nolock().
+ */
+ selem = bpf_selem_alloc(smap, owner, value, !old_sdata);
+ if (!selem) {
+ err = -ENOMEM;
+ goto unlock_err;
+ }
+
+ /* First, link the new selem to the map */
+ bpf_selem_link_map(smap, selem);
+
+ /* Second, link (and publish) the new selem to local_storage */
+ bpf_selem_link_storage_nolock(local_storage, selem);
+
+ /* Third, remove old selem, SELEM(old_sdata) */
+ if (old_sdata) {
+ bpf_selem_unlink_map(SELEM(old_sdata));
+ bpf_selem_unlink_storage_nolock(local_storage, SELEM(old_sdata),
+ false);
+ }
+
+unlock:
+ raw_spin_unlock_bh(&local_storage->lock);
+ return SDATA(selem);
+
+unlock_err:
+ raw_spin_unlock_bh(&local_storage->lock);
+ return ERR_PTR(err);
+}
+
+u16 bpf_local_storage_cache_idx_get(struct bpf_local_storage_cache *cache)
+{
+ u64 min_usage = U64_MAX;
+ u16 i, res = 0;
+
+ spin_lock(&cache->idx_lock);
+
+ for (i = 0; i < BPF_LOCAL_STORAGE_CACHE_SIZE; i++) {
+ if (cache->idx_usage_counts[i] < min_usage) {
+ min_usage = cache->idx_usage_counts[i];
+ res = i;
+
+ /* Found a free cache_idx */
+ if (!min_usage)
+ break;
+ }
+ }
+ cache->idx_usage_counts[res]++;
+
+ spin_unlock(&cache->idx_lock);
+
+ return res;
+}
+
+void bpf_local_storage_cache_idx_free(struct bpf_local_storage_cache *cache,
+ u16 idx)
+{
+ spin_lock(&cache->idx_lock);
+ cache->idx_usage_counts[idx]--;
+ spin_unlock(&cache->idx_lock);
+}
+
+void bpf_local_storage_map_free(struct bpf_local_storage_map *smap)
+{
+ struct bpf_local_storage_elem *selem;
+ struct bpf_local_storage_map_bucket *b;
+ unsigned int i;
+
+ /* Note that this map might be concurrently cloned from
+ * bpf_sk_storage_clone. Wait for any existing bpf_sk_storage_clone
+ * RCU read section to finish before proceeding. New RCU
+ * read sections should be prevented via bpf_map_inc_not_zero.
+ */
+ synchronize_rcu();
+
+ /* bpf prog and the userspace can no longer access this map
+ * now. No new selem (of this map) can be added
+ * to the owner->storage or to the map bucket's list.
+ *
+ * The elem of this map can be cleaned up here
+ * or when the storage is freed e.g.
+ * by bpf_sk_storage_free() during __sk_destruct().
+ */
+ for (i = 0; i < (1U << smap->bucket_log); i++) {
+ b = &smap->buckets[i];
+
+ rcu_read_lock();
+ /* No one is adding to b->list now */
+ while ((selem = hlist_entry_safe(
+ rcu_dereference_raw(hlist_first_rcu(&b->list)),
+ struct bpf_local_storage_elem, map_node))) {
+ bpf_selem_unlink(selem);
+ cond_resched_rcu();
+ }
+ rcu_read_unlock();
+ }
+
+ /* While freeing the storage we may still need to access the map.
+ *
+ * e.g. when bpf_sk_storage_free() has unlinked selem from the map
+ * which then made the above while((selem = ...)) loop
+ * exit immediately.
+ *
+ * However, while freeing the storage one still needs to access the
+ * smap->elem_size to do the uncharging in
+ * bpf_selem_unlink_storage_nolock().
+ *
+ * Hence, wait another rcu grace period for the storage to be freed.
+ */
+ synchronize_rcu();
+
+ kvfree(smap->buckets);
+ kfree(smap);
+}
+
+int bpf_local_storage_map_alloc_check(union bpf_attr *attr)
+{
+ if (attr->map_flags & ~BPF_LOCAL_STORAGE_CREATE_FLAG_MASK ||
+ !(attr->map_flags & BPF_F_NO_PREALLOC) ||
+ attr->max_entries ||
+ attr->key_size != sizeof(int) || !attr->value_size ||
+ /* Enforce BTF for userspace sk dumping */
+ !attr->btf_key_type_id || !attr->btf_value_type_id)
+ return -EINVAL;
+
+ if (!bpf_capable())
+ return -EPERM;
+
+ if (attr->value_size > BPF_LOCAL_STORAGE_MAX_VALUE_SIZE)
+ return -E2BIG;
+
+ return 0;
+}
+
+struct bpf_local_storage_map *bpf_local_storage_map_alloc(union bpf_attr *attr)
+{
+ struct bpf_local_storage_map *smap;
+ unsigned int i;
+ u32 nbuckets;
+ u64 cost;
+ int ret;
+
+ smap = kzalloc(sizeof(*smap), GFP_USER | __GFP_NOWARN);
+ if (!smap)
+ return ERR_PTR(-ENOMEM);
+ bpf_map_init_from_attr(&smap->map, attr);
+
+ nbuckets = roundup_pow_of_two(num_possible_cpus());
+ /* Use at least 2 buckets, select_bucket() is undefined behavior with 1 bucket */
+ nbuckets = max_t(u32, 2, nbuckets);
+ smap->bucket_log = ilog2(nbuckets);
+ cost = sizeof(*smap->buckets) * nbuckets + sizeof(*smap);
+
+ ret = bpf_map_charge_init(&smap->map.memory, cost);
+ if (ret < 0) {
+ kfree(smap);
+ return ERR_PTR(ret);
+ }
+
+ smap->buckets = kvcalloc(sizeof(*smap->buckets), nbuckets,
+ GFP_USER | __GFP_NOWARN);
+ if (!smap->buckets) {
+ bpf_map_charge_finish(&smap->map.memory);
+ kfree(smap);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ for (i = 0; i < nbuckets; i++) {
+ INIT_HLIST_HEAD(&smap->buckets[i].list);
+ raw_spin_lock_init(&smap->buckets[i].lock);
+ }
+
+ smap->elem_size =
+ sizeof(struct bpf_local_storage_elem) + attr->value_size;
+
+ return smap;
+}
+
+int bpf_local_storage_map_check_btf(const struct bpf_map *map,
+ const struct btf *btf,
+ const struct btf_type *key_type,
+ const struct btf_type *value_type)
+{
+ u32 int_data;
+
+ if (BTF_INFO_KIND(key_type->info) != BTF_KIND_INT)
+ return -EINVAL;
+
+ int_data = *(u32 *)(key_type + 1);
+ if (BTF_INT_BITS(int_data) != 32 || BTF_INT_OFFSET(int_data))
+ return -EINVAL;
+
+ return 0;
+}
diff --git a/kernel/bpf/bpf_lsm.c b/kernel/bpf/bpf_lsm.c
index fb27814..9cd1428 100644
--- a/kernel/bpf/bpf_lsm.c
+++ b/kernel/bpf/bpf_lsm.c
@@ -11,6 +11,8 @@
#include <linux/bpf_lsm.h>
#include <linux/kallsyms.h>
#include <linux/bpf_verifier.h>
+#include <net/bpf_sk_storage.h>
+#include <linux/bpf_local_storage.h>
/* For every LSM hook that allows attachment of BPF programs, declare a nop
* function where a BPF program can be attached.
@@ -45,10 +47,27 @@ int bpf_lsm_verify_prog(struct bpf_verifier_log *vlog,
return 0;
}
+static const struct bpf_func_proto *
+bpf_lsm_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
+{
+ switch (func_id) {
+ case BPF_FUNC_inode_storage_get:
+ return &bpf_inode_storage_get_proto;
+ case BPF_FUNC_inode_storage_delete:
+ return &bpf_inode_storage_delete_proto;
+ case BPF_FUNC_sk_storage_get:
+ return &sk_storage_get_btf_proto;
+ case BPF_FUNC_sk_storage_delete:
+ return &sk_storage_delete_btf_proto;
+ default:
+ return tracing_prog_func_proto(func_id, prog);
+ }
+}
+
const struct bpf_prog_ops lsm_prog_ops = {
};
const struct bpf_verifier_ops lsm_verifier_ops = {
- .get_func_proto = tracing_prog_func_proto,
+ .get_func_proto = bpf_lsm_func_proto,
.is_valid_access = btf_ctx_access,
};
diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c
index 969c5d4..4c3b543 100644
--- a/kernel/bpf/bpf_struct_ops.c
+++ b/kernel/bpf/bpf_struct_ops.c
@@ -298,8 +298,7 @@ static int check_zero_holes(const struct btf_type *t, void *data)
return -EINVAL;
mtype = btf_type_by_id(btf_vmlinux, member->type);
- mtype = btf_resolve_size(btf_vmlinux, mtype, &msize,
- NULL, NULL);
+ mtype = btf_resolve_size(btf_vmlinux, mtype, &msize);
if (IS_ERR(mtype))
return PTR_ERR(mtype);
prev_mend = moff + msize;
@@ -396,8 +395,7 @@ static int bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
u32 msize;
mtype = btf_type_by_id(btf_vmlinux, member->type);
- mtype = btf_resolve_size(btf_vmlinux, mtype, &msize,
- NULL, NULL);
+ mtype = btf_resolve_size(btf_vmlinux, mtype, &msize);
if (IS_ERR(mtype)) {
err = PTR_ERR(mtype);
goto reset_unlock;
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 91afdd4..f9ac693 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -21,6 +21,8 @@
#include <linux/btf_ids.h>
#include <linux/skmsg.h>
#include <linux/perf_event.h>
+#include <linux/bsearch.h>
+#include <linux/btf_ids.h>
#include <net/sock.h>
/* BTF (BPF Type Format) is the meta data format which describes
@@ -1079,23 +1081,27 @@ static const struct resolve_vertex *env_stack_peak(struct btf_verifier_env *env)
* *type_size: (x * y * sizeof(u32)). Hence, *type_size always
* corresponds to the return type.
* *elem_type: u32
+ * *elem_id: id of u32
* *total_nelems: (x * y). Hence, individual elem size is
* (*type_size / *total_nelems)
+ * *type_id: id of type if it's changed within the function, 0 if not
*
* type: is not an array (e.g. const struct X)
* return type: type "struct X"
* *type_size: sizeof(struct X)
* *elem_type: same as return type ("struct X")
+ * *elem_id: 0
* *total_nelems: 1
+ * *type_id: id of type if it's changed within the function, 0 if not
*/
-const struct btf_type *
-btf_resolve_size(const struct btf *btf, const struct btf_type *type,
- u32 *type_size, const struct btf_type **elem_type,
- u32 *total_nelems)
+static const struct btf_type *
+__btf_resolve_size(const struct btf *btf, const struct btf_type *type,
+ u32 *type_size, const struct btf_type **elem_type,
+ u32 *elem_id, u32 *total_nelems, u32 *type_id)
{
const struct btf_type *array_type = NULL;
- const struct btf_array *array;
- u32 i, size, nelems = 1;
+ const struct btf_array *array = NULL;
+ u32 i, size, nelems = 1, id = 0;
for (i = 0; i < MAX_RESOLVE_DEPTH; i++) {
switch (BTF_INFO_KIND(type->info)) {
@@ -1116,6 +1122,7 @@ btf_resolve_size(const struct btf *btf, const struct btf_type *type,
case BTF_KIND_VOLATILE:
case BTF_KIND_CONST:
case BTF_KIND_RESTRICT:
+ id = type->type;
type = btf_type_by_id(btf, type->type);
break;
@@ -1146,10 +1153,21 @@ btf_resolve_size(const struct btf *btf, const struct btf_type *type,
*total_nelems = nelems;
if (elem_type)
*elem_type = type;
+ if (elem_id)
+ *elem_id = array ? array->type : 0;
+ if (type_id && id)
+ *type_id = id;
return array_type ? : type;
}
+const struct btf_type *
+btf_resolve_size(const struct btf *btf, const struct btf_type *type,
+ u32 *type_size)
+{
+ return __btf_resolve_size(btf, type, type_size, NULL, NULL, NULL, NULL);
+}
+
/* The input param "type_id" must point to a needs_resolve type */
static const struct btf_type *btf_type_id_resolve(const struct btf *btf,
u32 *type_id)
@@ -3870,16 +3888,22 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
return true;
}
-int btf_struct_access(struct bpf_verifier_log *log,
- const struct btf_type *t, int off, int size,
- enum bpf_access_type atype,
- u32 *next_btf_id)
+enum bpf_struct_walk_result {
+ /* < 0 error */
+ WALK_SCALAR = 0,
+ WALK_PTR,
+ WALK_STRUCT,
+};
+
+static int btf_struct_walk(struct bpf_verifier_log *log,
+ const struct btf_type *t, int off, int size,
+ u32 *next_btf_id)
{
u32 i, moff, mtrue_end, msize = 0, total_nelems = 0;
const struct btf_type *mtype, *elem_type = NULL;
const struct btf_member *member;
const char *tname, *mname;
- u32 vlen;
+ u32 vlen, elem_id, mid;
again:
tname = __btf_name_by_offset(btf_vmlinux, t->name_off);
@@ -3915,14 +3939,13 @@ int btf_struct_access(struct bpf_verifier_log *log,
/* Only allow structure for now, can be relaxed for
* other types later.
*/
- elem_type = btf_type_skip_modifiers(btf_vmlinux,
- array_elem->type, NULL);
- if (!btf_type_is_struct(elem_type))
+ t = btf_type_skip_modifiers(btf_vmlinux, array_elem->type,
+ NULL);
+ if (!btf_type_is_struct(t))
goto error;
- off = (off - moff) % elem_type->size;
- return btf_struct_access(log, elem_type, off, size, atype,
- next_btf_id);
+ off = (off - moff) % t->size;
+ goto again;
error:
bpf_log(log, "access beyond struct %s at off %u size %u\n",
@@ -3951,7 +3974,7 @@ int btf_struct_access(struct bpf_verifier_log *log,
*/
if (off <= moff &&
BITS_ROUNDUP_BYTES(end_bit) <= off + size)
- return SCALAR_VALUE;
+ return WALK_SCALAR;
/* off may be accessing a following member
*
@@ -3973,11 +3996,13 @@ int btf_struct_access(struct bpf_verifier_log *log,
break;
/* type of the field */
+ mid = member->type;
mtype = btf_type_by_id(btf_vmlinux, member->type);
mname = __btf_name_by_offset(btf_vmlinux, member->name_off);
- mtype = btf_resolve_size(btf_vmlinux, mtype, &msize,
- &elem_type, &total_nelems);
+ mtype = __btf_resolve_size(btf_vmlinux, mtype, &msize,
+ &elem_type, &elem_id, &total_nelems,
+ &mid);
if (IS_ERR(mtype)) {
bpf_log(log, "field %s doesn't have size\n", mname);
return -EFAULT;
@@ -3991,7 +4016,7 @@ int btf_struct_access(struct bpf_verifier_log *log,
if (btf_type_is_array(mtype)) {
u32 elem_idx;
- /* btf_resolve_size() above helps to
+ /* __btf_resolve_size() above helps to
* linearize a multi-dimensional array.
*
* The logic here is treating an array
@@ -4039,6 +4064,7 @@ int btf_struct_access(struct bpf_verifier_log *log,
elem_idx = (off - moff) / msize;
moff += elem_idx * msize;
mtype = elem_type;
+ mid = elem_id;
}
/* the 'off' we're looking for is either equal to start
@@ -4048,6 +4074,12 @@ int btf_struct_access(struct bpf_verifier_log *log,
/* our field must be inside that union or struct */
t = mtype;
+ /* return if the offset matches the member offset */
+ if (off == moff) {
+ *next_btf_id = mid;
+ return WALK_STRUCT;
+ }
+
/* adjust offset we're looking for */
off -= moff;
goto again;
@@ -4063,11 +4095,10 @@ int btf_struct_access(struct bpf_verifier_log *log,
mname, moff, tname, off, size);
return -EACCES;
}
-
stype = btf_type_skip_modifiers(btf_vmlinux, mtype->type, &id);
if (btf_type_is_struct(stype)) {
*next_btf_id = id;
- return PTR_TO_BTF_ID;
+ return WALK_PTR;
}
}
@@ -4084,12 +4115,84 @@ int btf_struct_access(struct bpf_verifier_log *log,
return -EACCES;
}
- return SCALAR_VALUE;
+ return WALK_SCALAR;
}
bpf_log(log, "struct %s doesn't have field at offset %d\n", tname, off);
return -EINVAL;
}
+int btf_struct_access(struct bpf_verifier_log *log,
+ const struct btf_type *t, int off, int size,
+ enum bpf_access_type atype __maybe_unused,
+ u32 *next_btf_id)
+{
+ int err;
+ u32 id;
+
+ do {
+ err = btf_struct_walk(log, t, off, size, &id);
+
+ switch (err) {
+ case WALK_PTR:
+ /* If we found the pointer or scalar on t+off,
+ * we're done.
+ */
+ *next_btf_id = id;
+ return PTR_TO_BTF_ID;
+ case WALK_SCALAR:
+ return SCALAR_VALUE;
+ case WALK_STRUCT:
+ /* We found nested struct, so continue the search
+ * by diving in it. At this point the offset is
+ * aligned with the new type, so set it to 0.
+ */
+ t = btf_type_by_id(btf_vmlinux, id);
+ off = 0;
+ break;
+ default:
+ /* It's either error or unknown return value..
+ * scream and leave.
+ */
+ if (WARN_ONCE(err > 0, "unknown btf_struct_walk return value"))
+ return -EINVAL;
+ return err;
+ }
+ } while (t);
+
+ return -EINVAL;
+}
+
+bool btf_struct_ids_match(struct bpf_verifier_log *log,
+ int off, u32 id, u32 need_type_id)
+{
+ const struct btf_type *type;
+ int err;
+
+ /* Are we already done? */
+ if (need_type_id == id && off == 0)
+ return true;
+
+again:
+ type = btf_type_by_id(btf_vmlinux, id);
+ if (!type)
+ return false;
+ err = btf_struct_walk(log, type, off, 1, &id);
+ if (err != WALK_STRUCT)
+ return false;
+
+ /* We found nested struct object. If it matches
+ * the requested ID, we're done. Otherwise let's
+ * continue the search with offset 0 in the new
+ * type.
+ */
+ if (need_type_id != id) {
+ off = 0;
+ goto again;
+ }
+
+ return true;
+}
+
int btf_resolve_helper_id(struct bpf_verifier_log *log,
const struct bpf_func_proto *fn, int arg)
{
@@ -4661,3 +4764,15 @@ u32 btf_id(const struct btf *btf)
{
return btf->id;
}
+
+static int btf_id_cmp_func(const void *a, const void *b)
+{
+ const int *pa = a, *pb = b;
+
+ return *pa - *pb;
+}
+
+bool btf_id_set_contains(struct btf_id_set *set, u32 id)
+{
+ return bsearch(&id, set->ids, set->cnt, sizeof(u32), btf_id_cmp_func) != NULL;
+}
diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c
index 6386b7b..7e1a8ad 100644
--- a/kernel/bpf/cpumap.c
+++ b/kernel/bpf/cpumap.c
@@ -79,8 +79,6 @@ struct bpf_cpu_map {
static DEFINE_PER_CPU(struct list_head, cpu_map_flush_list);
-static int bq_flush_to_queue(struct xdp_bulk_queue *bq);
-
static struct bpf_map *cpu_map_alloc(union bpf_attr *attr)
{
u32 value_size = attr->value_size;
@@ -658,6 +656,7 @@ static int cpu_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
static int cpu_map_btf_id;
const struct bpf_map_ops cpu_map_ops = {
+ .map_meta_equal = bpf_map_meta_equal,
.map_alloc = cpu_map_alloc,
.map_free = cpu_map_free,
.map_delete_elem = cpu_map_delete_elem,
@@ -669,7 +668,7 @@ const struct bpf_map_ops cpu_map_ops = {
.map_btf_id = &cpu_map_btf_id,
};
-static int bq_flush_to_queue(struct xdp_bulk_queue *bq)
+static void bq_flush_to_queue(struct xdp_bulk_queue *bq)
{
struct bpf_cpu_map_entry *rcpu = bq->obj;
unsigned int processed = 0, drops = 0;
@@ -678,7 +677,7 @@ static int bq_flush_to_queue(struct xdp_bulk_queue *bq)
int i;
if (unlikely(!bq->count))
- return 0;
+ return;
q = rcpu->queue;
spin_lock(&q->producer_lock);
@@ -701,13 +700,12 @@ static int bq_flush_to_queue(struct xdp_bulk_queue *bq)
/* Feedback loop via tracepoints */
trace_xdp_cpumap_enqueue(rcpu->map_id, processed, drops, to_cpu);
- return 0;
}
/* Runs under RCU-read-side, plus in softirq under NAPI protection.
* Thus, safe percpu variable access.
*/
-static int bq_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_frame *xdpf)
+static void bq_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_frame *xdpf)
{
struct list_head *flush_list = this_cpu_ptr(&cpu_map_flush_list);
struct xdp_bulk_queue *bq = this_cpu_ptr(rcpu->bulkq);
@@ -728,8 +726,6 @@ static int bq_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_frame *xdpf)
if (!bq->flush_node.prev)
list_add(&bq->flush_node, flush_list);
-
- return 0;
}
int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_buff *xdp,
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index 10abb06..2b5ca93 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -341,14 +341,14 @@ bool dev_map_can_have_prog(struct bpf_map *map)
return false;
}
-static int bq_xmit_all(struct xdp_dev_bulk_queue *bq, u32 flags)
+static void bq_xmit_all(struct xdp_dev_bulk_queue *bq, u32 flags)
{
struct net_device *dev = bq->dev;
int sent = 0, drops = 0, err = 0;
int i;
if (unlikely(!bq->count))
- return 0;
+ return;
for (i = 0; i < bq->count; i++) {
struct xdp_frame *xdpf = bq->q[i];
@@ -369,7 +369,7 @@ static int bq_xmit_all(struct xdp_dev_bulk_queue *bq, u32 flags)
trace_xdp_devmap_xmit(bq->dev_rx, dev, sent, drops, err);
bq->dev_rx = NULL;
__list_del_clearprev(&bq->flush_node);
- return 0;
+ return;
error:
/* If ndo_xdp_xmit fails with an errno, no frames have been
* xmit'ed and it's our responsibility to them free all.
@@ -421,8 +421,8 @@ struct bpf_dtab_netdev *__dev_map_lookup_elem(struct bpf_map *map, u32 key)
/* Runs under RCU-read-side, plus in softirq under NAPI protection.
* Thus, safe percpu variable access.
*/
-static int bq_enqueue(struct net_device *dev, struct xdp_frame *xdpf,
- struct net_device *dev_rx)
+static void bq_enqueue(struct net_device *dev, struct xdp_frame *xdpf,
+ struct net_device *dev_rx)
{
struct list_head *flush_list = this_cpu_ptr(&dev_flush_list);
struct xdp_dev_bulk_queue *bq = this_cpu_ptr(dev->xdp_bulkq);
@@ -441,8 +441,6 @@ static int bq_enqueue(struct net_device *dev, struct xdp_frame *xdpf,
if (!bq->flush_node.prev)
list_add(&bq->flush_node, flush_list);
-
- return 0;
}
static inline int __xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp,
@@ -462,7 +460,8 @@ static inline int __xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp,
if (unlikely(!xdpf))
return -EOVERFLOW;
- return bq_enqueue(dev, xdpf, dev_rx);
+ bq_enqueue(dev, xdpf, dev_rx);
+ return 0;
}
static struct xdp_buff *dev_map_run_prog(struct net_device *dev,
@@ -751,6 +750,7 @@ static int dev_map_hash_update_elem(struct bpf_map *map, void *key, void *value,
static int dev_map_btf_id;
const struct bpf_map_ops dev_map_ops = {
+ .map_meta_equal = bpf_map_meta_equal,
.map_alloc = dev_map_alloc,
.map_free = dev_map_free,
.map_get_next_key = dev_map_get_next_key,
@@ -764,6 +764,7 @@ const struct bpf_map_ops dev_map_ops = {
static int dev_map_hash_map_btf_id;
const struct bpf_map_ops dev_map_hash_ops = {
+ .map_meta_equal = bpf_map_meta_equal,
.map_alloc = dev_map_alloc,
.map_free = dev_map_free,
.map_get_next_key = dev_map_hash_get_next_key,
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index 78dfff6..fe0e062 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -9,6 +9,7 @@
#include <linux/rculist_nulls.h>
#include <linux/random.h>
#include <uapi/linux/btf.h>
+#include <linux/rcupdate_trace.h>
#include "percpu_freelist.h"
#include "bpf_lru_list.h"
#include "map_in_map.h"
@@ -577,8 +578,7 @@ static void *__htab_map_lookup_elem(struct bpf_map *map, void *key)
struct htab_elem *l;
u32 hash, key_size;
- /* Must be called with rcu_read_lock. */
- WARN_ON_ONCE(!rcu_read_lock_held());
+ WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held());
key_size = map->key_size;
@@ -941,7 +941,7 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,
/* unknown flags */
return -EINVAL;
- WARN_ON_ONCE(!rcu_read_lock_held());
+ WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held());
key_size = map->key_size;
@@ -1032,7 +1032,7 @@ static int htab_lru_map_update_elem(struct bpf_map *map, void *key, void *value,
/* unknown flags */
return -EINVAL;
- WARN_ON_ONCE(!rcu_read_lock_held());
+ WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held());
key_size = map->key_size;
@@ -1220,7 +1220,7 @@ static int htab_map_delete_elem(struct bpf_map *map, void *key)
u32 hash, key_size;
int ret = -ENOENT;
- WARN_ON_ONCE(!rcu_read_lock_held());
+ WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held());
key_size = map->key_size;
@@ -1252,7 +1252,7 @@ static int htab_lru_map_delete_elem(struct bpf_map *map, void *key)
u32 hash, key_size;
int ret = -ENOENT;
- WARN_ON_ONCE(!rcu_read_lock_held());
+ WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held());
key_size = map->key_size;
@@ -1810,6 +1810,7 @@ static const struct bpf_iter_seq_info iter_seq_info = {
static int htab_map_btf_id;
const struct bpf_map_ops htab_map_ops = {
+ .map_meta_equal = bpf_map_meta_equal,
.map_alloc_check = htab_map_alloc_check,
.map_alloc = htab_map_alloc,
.map_free = htab_map_free,
@@ -1827,6 +1828,7 @@ const struct bpf_map_ops htab_map_ops = {
static int htab_lru_map_btf_id;
const struct bpf_map_ops htab_lru_map_ops = {
+ .map_meta_equal = bpf_map_meta_equal,
.map_alloc_check = htab_map_alloc_check,
.map_alloc = htab_map_alloc,
.map_free = htab_map_free,
@@ -1947,6 +1949,7 @@ static void htab_percpu_map_seq_show_elem(struct bpf_map *map, void *key,
static int htab_percpu_map_btf_id;
const struct bpf_map_ops htab_percpu_map_ops = {
+ .map_meta_equal = bpf_map_meta_equal,
.map_alloc_check = htab_map_alloc_check,
.map_alloc = htab_map_alloc,
.map_free = htab_map_free,
@@ -1963,6 +1966,7 @@ const struct bpf_map_ops htab_percpu_map_ops = {
static int htab_lru_percpu_map_btf_id;
const struct bpf_map_ops htab_lru_percpu_map_ops = {
+ .map_meta_equal = bpf_map_meta_equal,
.map_alloc_check = htab_map_alloc_check,
.map_alloc = htab_map_alloc,
.map_free = htab_map_free,
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index be43ab3..5cc7425 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -601,6 +601,28 @@ const struct bpf_func_proto bpf_event_output_data_proto = {
.arg5_type = ARG_CONST_SIZE_OR_ZERO,
};
+BPF_CALL_3(bpf_copy_from_user, void *, dst, u32, size,
+ const void __user *, user_ptr)
+{
+ int ret = copy_from_user(dst, user_ptr, size);
+
+ if (unlikely(ret)) {
+ memset(dst, 0, size);
+ ret = -EFAULT;
+ }
+
+ return ret;
+}
+
+const struct bpf_func_proto bpf_copy_from_user_proto = {
+ .func = bpf_copy_from_user,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_UNINIT_MEM,
+ .arg2_type = ARG_CONST_SIZE_OR_ZERO,
+ .arg3_type = ARG_ANYTHING,
+};
+
const struct bpf_func_proto bpf_get_current_task_proto __weak;
const struct bpf_func_proto bpf_probe_read_user_proto __weak;
const struct bpf_func_proto bpf_probe_read_user_str_proto __weak;
diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c
index fb878ba..b48a56f 100644
--- a/kernel/bpf/inode.c
+++ b/kernel/bpf/inode.c
@@ -20,6 +20,7 @@
#include <linux/filter.h>
#include <linux/bpf.h>
#include <linux/bpf_trace.h>
+#include "preload/bpf_preload.h"
enum bpf_type {
BPF_TYPE_UNSPEC = 0,
@@ -369,9 +370,10 @@ static struct dentry *
bpf_lookup(struct inode *dir, struct dentry *dentry, unsigned flags)
{
/* Dots in names (e.g. "/sys/fs/bpf/foo.bar") are reserved for future
- * extensions.
+ * extensions. That allows popoulate_bpffs() create special files.
*/
- if (strchr(dentry->d_name.name, '.'))
+ if ((dir->i_mode & S_IALLUGO) &&
+ strchr(dentry->d_name.name, '.'))
return ERR_PTR(-EPERM);
return simple_lookup(dir, dentry, flags);
@@ -409,6 +411,27 @@ static const struct inode_operations bpf_dir_iops = {
.unlink = simple_unlink,
};
+/* pin iterator link into bpffs */
+static int bpf_iter_link_pin_kernel(struct dentry *parent,
+ const char *name, struct bpf_link *link)
+{
+ umode_t mode = S_IFREG | S_IRUSR;
+ struct dentry *dentry;
+ int ret;
+
+ inode_lock(parent->d_inode);
+ dentry = lookup_one_len(name, parent, strlen(name));
+ if (IS_ERR(dentry)) {
+ inode_unlock(parent->d_inode);
+ return PTR_ERR(dentry);
+ }
+ ret = bpf_mkobj_ops(dentry, mode, link, &bpf_link_iops,
+ &bpf_iter_fops);
+ dput(dentry);
+ inode_unlock(parent->d_inode);
+ return ret;
+}
+
static int bpf_obj_do_pin(const char __user *pathname, void *raw,
enum bpf_type type)
{
@@ -638,6 +661,91 @@ static int bpf_parse_param(struct fs_context *fc, struct fs_parameter *param)
return 0;
}
+struct bpf_preload_ops *bpf_preload_ops;
+EXPORT_SYMBOL_GPL(bpf_preload_ops);
+
+static bool bpf_preload_mod_get(void)
+{
+ /* If bpf_preload.ko wasn't loaded earlier then load it now.
+ * When bpf_preload is built into vmlinux the module's __init
+ * function will populate it.
+ */
+ if (!bpf_preload_ops) {
+ request_module("bpf_preload");
+ if (!bpf_preload_ops)
+ return false;
+ }
+ /* And grab the reference, so the module doesn't disappear while the
+ * kernel is interacting with the kernel module and its UMD.
+ */
+ if (!try_module_get(bpf_preload_ops->owner)) {
+ pr_err("bpf_preload module get failed.\n");
+ return false;
+ }
+ return true;
+}
+
+static void bpf_preload_mod_put(void)
+{
+ if (bpf_preload_ops)
+ /* now user can "rmmod bpf_preload" if necessary */
+ module_put(bpf_preload_ops->owner);
+}
+
+static DEFINE_MUTEX(bpf_preload_lock);
+
+static int populate_bpffs(struct dentry *parent)
+{
+ struct bpf_preload_info objs[BPF_PRELOAD_LINKS] = {};
+ struct bpf_link *links[BPF_PRELOAD_LINKS] = {};
+ int err = 0, i;
+
+ /* grab the mutex to make sure the kernel interactions with bpf_preload
+ * UMD are serialized
+ */
+ mutex_lock(&bpf_preload_lock);
+
+ /* if bpf_preload.ko wasn't built into vmlinux then load it */
+ if (!bpf_preload_mod_get())
+ goto out;
+
+ if (!bpf_preload_ops->info.tgid) {
+ /* preload() will start UMD that will load BPF iterator programs */
+ err = bpf_preload_ops->preload(objs);
+ if (err)
+ goto out_put;
+ for (i = 0; i < BPF_PRELOAD_LINKS; i++) {
+ links[i] = bpf_link_by_id(objs[i].link_id);
+ if (IS_ERR(links[i])) {
+ err = PTR_ERR(links[i]);
+ goto out_put;
+ }
+ }
+ for (i = 0; i < BPF_PRELOAD_LINKS; i++) {
+ err = bpf_iter_link_pin_kernel(parent,
+ objs[i].link_name, links[i]);
+ if (err)
+ goto out_put;
+ /* do not unlink successfully pinned links even
+ * if later link fails to pin
+ */
+ links[i] = NULL;
+ }
+ /* finish() will tell UMD process to exit */
+ err = bpf_preload_ops->finish();
+ if (err)
+ goto out_put;
+ }
+out_put:
+ bpf_preload_mod_put();
+out:
+ mutex_unlock(&bpf_preload_lock);
+ for (i = 0; i < BPF_PRELOAD_LINKS && err; i++)
+ if (!IS_ERR_OR_NULL(links[i]))
+ bpf_link_put(links[i]);
+ return err;
+}
+
static int bpf_fill_super(struct super_block *sb, struct fs_context *fc)
{
static const struct tree_descr bpf_rfiles[] = { { "" } };
@@ -654,8 +762,8 @@ static int bpf_fill_super(struct super_block *sb, struct fs_context *fc)
inode = sb->s_root->d_inode;
inode->i_op = &bpf_dir_iops;
inode->i_mode &= ~S_IALLUGO;
+ populate_bpffs(sb->s_root);
inode->i_mode |= S_ISVTX | opts->mode;
-
return 0;
}
@@ -705,6 +813,8 @@ static int __init bpf_init(void)
{
int ret;
+ mutex_init(&bpf_preload_lock);
+
ret = sysfs_create_mount_point(fs_kobj, "bpf");
if (ret)
return ret;
diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c
index 44474bf..00e32f2 100644
--- a/kernel/bpf/lpm_trie.c
+++ b/kernel/bpf/lpm_trie.c
@@ -732,6 +732,7 @@ static int trie_check_btf(const struct bpf_map *map,
static int trie_map_btf_id;
const struct bpf_map_ops trie_map_ops = {
+ .map_meta_equal = bpf_map_meta_equal,
.map_alloc = trie_alloc,
.map_free = trie_free,
.map_get_next_key = trie_get_next_key,
diff --git a/kernel/bpf/map_in_map.c b/kernel/bpf/map_in_map.c
index 17738c9..39ab0b6 100644
--- a/kernel/bpf/map_in_map.c
+++ b/kernel/bpf/map_in_map.c
@@ -17,23 +17,17 @@ struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd)
if (IS_ERR(inner_map))
return inner_map;
- /* prog_array->aux->{type,jited} is a runtime binding.
- * Doing static check alone in the verifier is not enough.
- */
- if (inner_map->map_type == BPF_MAP_TYPE_PROG_ARRAY ||
- inner_map->map_type == BPF_MAP_TYPE_CGROUP_STORAGE ||
- inner_map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE ||
- inner_map->map_type == BPF_MAP_TYPE_STRUCT_OPS) {
- fdput(f);
- return ERR_PTR(-ENOTSUPP);
- }
-
/* Does not support >1 level map-in-map */
if (inner_map->inner_map_meta) {
fdput(f);
return ERR_PTR(-EINVAL);
}
+ if (!inner_map->ops->map_meta_equal) {
+ fdput(f);
+ return ERR_PTR(-ENOTSUPP);
+ }
+
if (map_value_has_spin_lock(inner_map)) {
fdput(f);
return ERR_PTR(-ENOTSUPP);
@@ -81,15 +75,14 @@ bool bpf_map_meta_equal(const struct bpf_map *meta0,
return meta0->map_type == meta1->map_type &&
meta0->key_size == meta1->key_size &&
meta0->value_size == meta1->value_size &&
- meta0->map_flags == meta1->map_flags &&
- meta0->max_entries == meta1->max_entries;
+ meta0->map_flags == meta1->map_flags;
}
void *bpf_map_fd_get_ptr(struct bpf_map *map,
struct file *map_file /* not used */,
int ufd)
{
- struct bpf_map *inner_map;
+ struct bpf_map *inner_map, *inner_map_meta;
struct fd f;
f = fdget(ufd);
@@ -97,7 +90,8 @@ void *bpf_map_fd_get_ptr(struct bpf_map *map,
if (IS_ERR(inner_map))
return inner_map;
- if (bpf_map_meta_equal(map->inner_map_meta, inner_map))
+ inner_map_meta = map->inner_map_meta;
+ if (inner_map_meta->ops->map_meta_equal(inner_map_meta, inner_map))
bpf_map_inc(inner_map);
else
inner_map = ERR_PTR(-EINVAL);
diff --git a/kernel/bpf/map_in_map.h b/kernel/bpf/map_in_map.h
index a507bf6..bcb7534 100644
--- a/kernel/bpf/map_in_map.h
+++ b/kernel/bpf/map_in_map.h
@@ -11,8 +11,6 @@ struct bpf_map;
struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd);
void bpf_map_meta_free(struct bpf_map *map_meta);
-bool bpf_map_meta_equal(const struct bpf_map *meta0,
- const struct bpf_map *meta1);
void *bpf_map_fd_get_ptr(struct bpf_map *map, struct file *map_file,
int ufd);
void bpf_map_fd_put_ptr(void *ptr);
diff --git a/kernel/bpf/map_iter.c b/kernel/bpf/map_iter.c
index af86048..6a9542a 100644
--- a/kernel/bpf/map_iter.c
+++ b/kernel/bpf/map_iter.c
@@ -149,6 +149,19 @@ static void bpf_iter_detach_map(struct bpf_iter_aux_info *aux)
bpf_map_put_with_uref(aux->map);
}
+void bpf_iter_map_show_fdinfo(const struct bpf_iter_aux_info *aux,
+ struct seq_file *seq)
+{
+ seq_printf(seq, "map_id:\t%u\n", aux->map->id);
+}
+
+int bpf_iter_map_fill_link_info(const struct bpf_iter_aux_info *aux,
+ struct bpf_link_info *info)
+{
+ info->iter.map.map_id = aux->map->id;
+ return 0;
+}
+
DEFINE_BPF_ITER_FUNC(bpf_map_elem, struct bpf_iter_meta *meta,
struct bpf_map *map, void *key, void *value)
@@ -156,6 +169,8 @@ static const struct bpf_iter_reg bpf_map_elem_reg_info = {
.target = "bpf_map_elem",
.attach_target = bpf_iter_attach_map,
.detach_target = bpf_iter_detach_map,
+ .show_fdinfo = bpf_iter_map_show_fdinfo,
+ .fill_link_info = bpf_iter_map_fill_link_info,
.ctx_arg_info_size = 2,
.ctx_arg_info = {
{ offsetof(struct bpf_iter__bpf_map_elem, key),
diff --git a/kernel/bpf/preload/Kconfig b/kernel/bpf/preload/Kconfig
new file mode 100644
index 0000000..ace4911
--- /dev/null
+++ b/kernel/bpf/preload/Kconfig
@@ -0,0 +1,26 @@
+# SPDX-License-Identifier: GPL-2.0-only
+config USERMODE_DRIVER
+ bool
+ default n
+
+menuconfig BPF_PRELOAD
+ bool "Preload BPF file system with kernel specific program and map iterators"
+ depends on BPF
+ # The dependency on !COMPILE_TEST prevents it from being enabled
+ # in allmodconfig or allyesconfig configurations
+ depends on !COMPILE_TEST
+ select USERMODE_DRIVER
+ help
+ This builds kernel module with several embedded BPF programs that are
+ pinned into BPF FS mount point as human readable files that are
+ useful in debugging and introspection of BPF programs and maps.
+
+if BPF_PRELOAD
+config BPF_PRELOAD_UMD
+ tristate "bpf_preload kernel module with user mode driver"
+ depends on CC_CAN_LINK
+ depends on m || CC_CAN_LINK_STATIC
+ default m
+ help
+ This builds bpf_preload kernel module with embedded user mode driver.
+endif
diff --git a/kernel/bpf/preload/Makefile b/kernel/bpf/preload/Makefile
new file mode 100644
index 0000000..12c7b62
--- /dev/null
+++ b/kernel/bpf/preload/Makefile
@@ -0,0 +1,23 @@
+# SPDX-License-Identifier: GPL-2.0
+
+LIBBPF_SRCS = $(srctree)/tools/lib/bpf/
+LIBBPF_A = $(obj)/libbpf.a
+LIBBPF_OUT = $(abspath $(obj))
+
+$(LIBBPF_A):
+ $(Q)$(MAKE) -C $(LIBBPF_SRCS) OUTPUT=$(LIBBPF_OUT)/ $(LIBBPF_OUT)/libbpf.a
+
+userccflags += -I $(srctree)/tools/include/ -I $(srctree)/tools/include/uapi \
+ -I $(srctree)/tools/lib/ -Wno-unused-result
+
+userprogs := bpf_preload_umd
+
+bpf_preload_umd-objs := iterators/iterators.o
+bpf_preload_umd-userldlibs := $(LIBBPF_A) -lelf -lz
+
+$(obj)/bpf_preload_umd: $(LIBBPF_A)
+
+$(obj)/bpf_preload_umd_blob.o: $(obj)/bpf_preload_umd
+
+obj-$(CONFIG_BPF_PRELOAD_UMD) += bpf_preload.o
+bpf_preload-objs += bpf_preload_kern.o bpf_preload_umd_blob.o
diff --git a/kernel/bpf/preload/bpf_preload.h b/kernel/bpf/preload/bpf_preload.h
new file mode 100644
index 0000000..2f99322
--- /dev/null
+++ b/kernel/bpf/preload/bpf_preload.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BPF_PRELOAD_H
+#define _BPF_PRELOAD_H
+
+#include <linux/usermode_driver.h>
+#include "iterators/bpf_preload_common.h"
+
+struct bpf_preload_ops {
+ struct umd_info info;
+ int (*preload)(struct bpf_preload_info *);
+ int (*finish)(void);
+ struct module *owner;
+};
+extern struct bpf_preload_ops *bpf_preload_ops;
+#define BPF_PRELOAD_LINKS 2
+#endif
diff --git a/kernel/bpf/preload/bpf_preload_kern.c b/kernel/bpf/preload/bpf_preload_kern.c
new file mode 100644
index 0000000..79c5772
--- /dev/null
+++ b/kernel/bpf/preload/bpf_preload_kern.c
@@ -0,0 +1,91 @@
+// SPDX-License-Identifier: GPL-2.0
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/pid.h>
+#include <linux/fs.h>
+#include <linux/sched/signal.h>
+#include "bpf_preload.h"
+
+extern char bpf_preload_umd_start;
+extern char bpf_preload_umd_end;
+
+static int preload(struct bpf_preload_info *obj);
+static int finish(void);
+
+static struct bpf_preload_ops umd_ops = {
+ .info.driver_name = "bpf_preload",
+ .preload = preload,
+ .finish = finish,
+ .owner = THIS_MODULE,
+};
+
+static int preload(struct bpf_preload_info *obj)
+{
+ int magic = BPF_PRELOAD_START;
+ loff_t pos = 0;
+ int i, err;
+ ssize_t n;
+
+ err = fork_usermode_driver(&umd_ops.info);
+ if (err)
+ return err;
+
+ /* send the start magic to let UMD proceed with loading BPF progs */
+ n = kernel_write(umd_ops.info.pipe_to_umh,
+ &magic, sizeof(magic), &pos);
+ if (n != sizeof(magic))
+ return -EPIPE;
+
+ /* receive bpf_link IDs and names from UMD */
+ pos = 0;
+ for (i = 0; i < BPF_PRELOAD_LINKS; i++) {
+ n = kernel_read(umd_ops.info.pipe_from_umh,
+ &obj[i], sizeof(*obj), &pos);
+ if (n != sizeof(*obj))
+ return -EPIPE;
+ }
+ return 0;
+}
+
+static int finish(void)
+{
+ int magic = BPF_PRELOAD_END;
+ struct pid *tgid;
+ loff_t pos = 0;
+ ssize_t n;
+
+ /* send the last magic to UMD. It will do a normal exit. */
+ n = kernel_write(umd_ops.info.pipe_to_umh,
+ &magic, sizeof(magic), &pos);
+ if (n != sizeof(magic))
+ return -EPIPE;
+ tgid = umd_ops.info.tgid;
+ wait_event(tgid->wait_pidfd, thread_group_exited(tgid));
+ umd_ops.info.tgid = NULL;
+ return 0;
+}
+
+static int __init load_umd(void)
+{
+ int err;
+
+ err = umd_load_blob(&umd_ops.info, &bpf_preload_umd_start,
+ &bpf_preload_umd_end - &bpf_preload_umd_start);
+ if (err)
+ return err;
+ bpf_preload_ops = &umd_ops;
+ return err;
+}
+
+static void __exit fini_umd(void)
+{
+ bpf_preload_ops = NULL;
+ /* kill UMD in case it's still there due to earlier error */
+ kill_pid(umd_ops.info.tgid, SIGKILL, 1);
+ umd_ops.info.tgid = NULL;
+ umd_unload_blob(&umd_ops.info);
+}
+late_initcall(load_umd);
+module_exit(fini_umd);
+MODULE_LICENSE("GPL");
diff --git a/kernel/bpf/preload/bpf_preload_umd_blob.S b/kernel/bpf/preload/bpf_preload_umd_blob.S
new file mode 100644
index 0000000..f1f4022
--- /dev/null
+++ b/kernel/bpf/preload/bpf_preload_umd_blob.S
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+ .section .init.rodata, "a"
+ .global bpf_preload_umd_start
+bpf_preload_umd_start:
+ .incbin "kernel/bpf/preload/bpf_preload_umd"
+ .global bpf_preload_umd_end
+bpf_preload_umd_end:
diff --git a/kernel/bpf/preload/iterators/.gitignore b/kernel/bpf/preload/iterators/.gitignore
new file mode 100644
index 0000000..ffdb702
--- /dev/null
+++ b/kernel/bpf/preload/iterators/.gitignore
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
+/.output
diff --git a/kernel/bpf/preload/iterators/Makefile b/kernel/bpf/preload/iterators/Makefile
new file mode 100644
index 0000000..28fa8c1
--- /dev/null
+++ b/kernel/bpf/preload/iterators/Makefile
@@ -0,0 +1,57 @@
+# SPDX-License-Identifier: GPL-2.0
+OUTPUT := .output
+CLANG ?= clang
+LLC ?= llc
+LLVM_STRIP ?= llvm-strip
+DEFAULT_BPFTOOL := $(OUTPUT)/sbin/bpftool
+BPFTOOL ?= $(DEFAULT_BPFTOOL)
+LIBBPF_SRC := $(abspath ../../../../tools/lib/bpf)
+BPFOBJ := $(OUTPUT)/libbpf.a
+BPF_INCLUDE := $(OUTPUT)
+INCLUDES := -I$(OUTPUT) -I$(BPF_INCLUDE) -I$(abspath ../../../../tools/lib) \
+ -I$(abspath ../../../../tools/include/uapi)
+CFLAGS := -g -Wall
+
+abs_out := $(abspath $(OUTPUT))
+ifeq ($(V),1)
+Q =
+msg =
+else
+Q = @
+msg = @printf ' %-8s %s%s\n' "$(1)" "$(notdir $(2))" "$(if $(3), $(3))";
+MAKEFLAGS += --no-print-directory
+submake_extras := feature_display=0
+endif
+
+.DELETE_ON_ERROR:
+
+.PHONY: all clean
+
+all: iterators.skel.h
+
+clean:
+ $(call msg,CLEAN)
+ $(Q)rm -rf $(OUTPUT) iterators
+
+iterators.skel.h: $(OUTPUT)/iterators.bpf.o | $(BPFTOOL)
+ $(call msg,GEN-SKEL,$@)
+ $(Q)$(BPFTOOL) gen skeleton $< > $@
+
+
+$(OUTPUT)/iterators.bpf.o: iterators.bpf.c $(BPFOBJ) | $(OUTPUT)
+ $(call msg,BPF,$@)
+ $(Q)$(CLANG) -g -O2 -target bpf $(INCLUDES) \
+ -c $(filter %.c,$^) -o $@ && \
+ $(LLVM_STRIP) -g $@
+
+$(OUTPUT):
+ $(call msg,MKDIR,$@)
+ $(Q)mkdir -p $(OUTPUT)
+
+$(BPFOBJ): $(wildcard $(LIBBPF_SRC)/*.[ch] $(LIBBPF_SRC)/Makefile) | $(OUTPUT)
+ $(Q)$(MAKE) $(submake_extras) -C $(LIBBPF_SRC) \
+ OUTPUT=$(abspath $(dir $@))/ $(abspath $@)
+
+$(DEFAULT_BPFTOOL):
+ $(Q)$(MAKE) $(submake_extras) -C ../../../../tools/bpf/bpftool \
+ prefix= OUTPUT=$(abs_out)/ DESTDIR=$(abs_out) install
diff --git a/kernel/bpf/preload/iterators/README b/kernel/bpf/preload/iterators/README
new file mode 100644
index 0000000..7fd6d39
--- /dev/null
+++ b/kernel/bpf/preload/iterators/README
@@ -0,0 +1,4 @@
+WARNING:
+If you change "iterators.bpf.c" do "make -j" in this directory to rebuild "iterators.skel.h".
+Make sure to have clang 10 installed.
+See Documentation/bpf/bpf_devel_QA.rst
diff --git a/kernel/bpf/preload/iterators/bpf_preload_common.h b/kernel/bpf/preload/iterators/bpf_preload_common.h
new file mode 100644
index 0000000..8464d1a
--- /dev/null
+++ b/kernel/bpf/preload/iterators/bpf_preload_common.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BPF_PRELOAD_COMMON_H
+#define _BPF_PRELOAD_COMMON_H
+
+#define BPF_PRELOAD_START 0x5555
+#define BPF_PRELOAD_END 0xAAAA
+
+struct bpf_preload_info {
+ char link_name[16];
+ int link_id;
+};
+
+#endif
diff --git a/kernel/bpf/preload/iterators/iterators.bpf.c b/kernel/bpf/preload/iterators/iterators.bpf.c
new file mode 100644
index 0000000..5ded550
--- /dev/null
+++ b/kernel/bpf/preload/iterators/iterators.bpf.c
@@ -0,0 +1,114 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+
+#pragma clang attribute push (__attribute__((preserve_access_index)), apply_to = record)
+struct seq_file;
+struct bpf_iter_meta {
+ struct seq_file *seq;
+ __u64 session_id;
+ __u64 seq_num;
+};
+
+struct bpf_map {
+ __u32 id;
+ char name[16];
+ __u32 max_entries;
+};
+
+struct bpf_iter__bpf_map {
+ struct bpf_iter_meta *meta;
+ struct bpf_map *map;
+};
+
+struct btf_type {
+ __u32 name_off;
+};
+
+struct btf_header {
+ __u32 str_len;
+};
+
+struct btf {
+ const char *strings;
+ struct btf_type **types;
+ struct btf_header hdr;
+};
+
+struct bpf_prog_aux {
+ __u32 id;
+ char name[16];
+ const char *attach_func_name;
+ struct bpf_prog *linked_prog;
+ struct bpf_func_info *func_info;
+ struct btf *btf;
+};
+
+struct bpf_prog {
+ struct bpf_prog_aux *aux;
+};
+
+struct bpf_iter__bpf_prog {
+ struct bpf_iter_meta *meta;
+ struct bpf_prog *prog;
+};
+#pragma clang attribute pop
+
+static const char *get_name(struct btf *btf, long btf_id, const char *fallback)
+{
+ struct btf_type **types, *t;
+ unsigned int name_off;
+ const char *str;
+
+ if (!btf)
+ return fallback;
+ str = btf->strings;
+ types = btf->types;
+ bpf_probe_read_kernel(&t, sizeof(t), types + btf_id);
+ name_off = BPF_CORE_READ(t, name_off);
+ if (name_off >= btf->hdr.str_len)
+ return fallback;
+ return str + name_off;
+}
+
+SEC("iter/bpf_map")
+int dump_bpf_map(struct bpf_iter__bpf_map *ctx)
+{
+ struct seq_file *seq = ctx->meta->seq;
+ __u64 seq_num = ctx->meta->seq_num;
+ struct bpf_map *map = ctx->map;
+
+ if (!map)
+ return 0;
+
+ if (seq_num == 0)
+ BPF_SEQ_PRINTF(seq, " id name max_entries\n");
+
+ BPF_SEQ_PRINTF(seq, "%4u %-16s%6d\n", map->id, map->name, map->max_entries);
+ return 0;
+}
+
+SEC("iter/bpf_prog")
+int dump_bpf_prog(struct bpf_iter__bpf_prog *ctx)
+{
+ struct seq_file *seq = ctx->meta->seq;
+ __u64 seq_num = ctx->meta->seq_num;
+ struct bpf_prog *prog = ctx->prog;
+ struct bpf_prog_aux *aux;
+
+ if (!prog)
+ return 0;
+
+ aux = prog->aux;
+ if (seq_num == 0)
+ BPF_SEQ_PRINTF(seq, " id name attached\n");
+
+ BPF_SEQ_PRINTF(seq, "%4u %-16s %s %s\n", aux->id,
+ get_name(aux->btf, aux->func_info[0].type_id, aux->name),
+ aux->attach_func_name, aux->linked_prog->aux->name);
+ return 0;
+}
+char LICENSE[] SEC("license") = "GPL";
diff --git a/kernel/bpf/preload/iterators/iterators.c b/kernel/bpf/preload/iterators/iterators.c
new file mode 100644
index 0000000..b7ff879
--- /dev/null
+++ b/kernel/bpf/preload/iterators/iterators.c
@@ -0,0 +1,94 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include <argp.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/resource.h>
+#include <bpf/libbpf.h>
+#include <bpf/bpf.h>
+#include <sys/mount.h>
+#include "iterators.skel.h"
+#include "bpf_preload_common.h"
+
+int to_kernel = -1;
+int from_kernel = 0;
+
+static int send_link_to_kernel(struct bpf_link *link, const char *link_name)
+{
+ struct bpf_preload_info obj = {};
+ struct bpf_link_info info = {};
+ __u32 info_len = sizeof(info);
+ int err;
+
+ err = bpf_obj_get_info_by_fd(bpf_link__fd(link), &info, &info_len);
+ if (err)
+ return err;
+ obj.link_id = info.id;
+ if (strlen(link_name) >= sizeof(obj.link_name))
+ return -E2BIG;
+ strcpy(obj.link_name, link_name);
+ if (write(to_kernel, &obj, sizeof(obj)) != sizeof(obj))
+ return -EPIPE;
+ return 0;
+}
+
+int main(int argc, char **argv)
+{
+ struct rlimit rlim = { RLIM_INFINITY, RLIM_INFINITY };
+ struct iterators_bpf *skel;
+ int err, magic;
+ int debug_fd;
+
+ debug_fd = open("/dev/console", O_WRONLY | O_NOCTTY | O_CLOEXEC);
+ if (debug_fd < 0)
+ return 1;
+ to_kernel = dup(1);
+ close(1);
+ dup(debug_fd);
+ /* now stdin and stderr point to /dev/console */
+
+ read(from_kernel, &magic, sizeof(magic));
+ if (magic != BPF_PRELOAD_START) {
+ printf("bad start magic %d\n", magic);
+ return 1;
+ }
+ setrlimit(RLIMIT_MEMLOCK, &rlim);
+ /* libbpf opens BPF object and loads it into the kernel */
+ skel = iterators_bpf__open_and_load();
+ if (!skel) {
+ /* iterators.skel.h is little endian.
+ * libbpf doesn't support automatic little->big conversion
+ * of BPF bytecode yet.
+ * The program load will fail in such case.
+ */
+ printf("Failed load could be due to wrong endianness\n");
+ return 1;
+ }
+ err = iterators_bpf__attach(skel);
+ if (err)
+ goto cleanup;
+
+ /* send two bpf_link IDs with names to the kernel */
+ err = send_link_to_kernel(skel->links.dump_bpf_map, "maps.debug");
+ if (err)
+ goto cleanup;
+ err = send_link_to_kernel(skel->links.dump_bpf_prog, "progs.debug");
+ if (err)
+ goto cleanup;
+
+ /* The kernel will proceed with pinnging the links in bpffs.
+ * UMD will wait on read from pipe.
+ */
+ read(from_kernel, &magic, sizeof(magic));
+ if (magic != BPF_PRELOAD_END) {
+ printf("bad final magic %d\n", magic);
+ err = -EINVAL;
+ }
+cleanup:
+ iterators_bpf__destroy(skel);
+
+ return err != 0;
+}
diff --git a/kernel/bpf/preload/iterators/iterators.skel.h b/kernel/bpf/preload/iterators/iterators.skel.h
new file mode 100644
index 0000000..c317135
--- /dev/null
+++ b/kernel/bpf/preload/iterators/iterators.skel.h
@@ -0,0 +1,410 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+
+/* THIS FILE IS AUTOGENERATED! */
+#ifndef __ITERATORS_BPF_SKEL_H__
+#define __ITERATORS_BPF_SKEL_H__
+
+#include <stdlib.h>
+#include <bpf/libbpf.h>
+
+struct iterators_bpf {
+ struct bpf_object_skeleton *skeleton;
+ struct bpf_object *obj;
+ struct {
+ struct bpf_map *rodata;
+ } maps;
+ struct {
+ struct bpf_program *dump_bpf_map;
+ struct bpf_program *dump_bpf_prog;
+ } progs;
+ struct {
+ struct bpf_link *dump_bpf_map;
+ struct bpf_link *dump_bpf_prog;
+ } links;
+ struct iterators_bpf__rodata {
+ char dump_bpf_map____fmt[35];
+ char dump_bpf_map____fmt_1[14];
+ char dump_bpf_prog____fmt[32];
+ char dump_bpf_prog____fmt_2[17];
+ } *rodata;
+};
+
+static void
+iterators_bpf__destroy(struct iterators_bpf *obj)
+{
+ if (!obj)
+ return;
+ if (obj->skeleton)
+ bpf_object__destroy_skeleton(obj->skeleton);
+ free(obj);
+}
+
+static inline int
+iterators_bpf__create_skeleton(struct iterators_bpf *obj);
+
+static inline struct iterators_bpf *
+iterators_bpf__open_opts(const struct bpf_object_open_opts *opts)
+{
+ struct iterators_bpf *obj;
+
+ obj = (typeof(obj))calloc(1, sizeof(*obj));
+ if (!obj)
+ return NULL;
+ if (iterators_bpf__create_skeleton(obj))
+ goto err;
+ if (bpf_object__open_skeleton(obj->skeleton, opts))
+ goto err;
+
+ return obj;
+err:
+ iterators_bpf__destroy(obj);
+ return NULL;
+}
+
+static inline struct iterators_bpf *
+iterators_bpf__open(void)
+{
+ return iterators_bpf__open_opts(NULL);
+}
+
+static inline int
+iterators_bpf__load(struct iterators_bpf *obj)
+{
+ return bpf_object__load_skeleton(obj->skeleton);
+}
+
+static inline struct iterators_bpf *
+iterators_bpf__open_and_load(void)
+{
+ struct iterators_bpf *obj;
+
+ obj = iterators_bpf__open();
+ if (!obj)
+ return NULL;
+ if (iterators_bpf__load(obj)) {
+ iterators_bpf__destroy(obj);
+ return NULL;
+ }
+ return obj;
+}
+
+static inline int
+iterators_bpf__attach(struct iterators_bpf *obj)
+{
+ return bpf_object__attach_skeleton(obj->skeleton);
+}
+
+static inline void
+iterators_bpf__detach(struct iterators_bpf *obj)
+{
+ return bpf_object__detach_skeleton(obj->skeleton);
+}
+
+static inline int
+iterators_bpf__create_skeleton(struct iterators_bpf *obj)
+{
+ struct bpf_object_skeleton *s;
+
+ s = (typeof(s))calloc(1, sizeof(*s));
+ if (!s)
+ return -1;
+ obj->skeleton = s;
+
+ s->sz = sizeof(*s);
+ s->name = "iterators_bpf";
+ s->obj = &obj->obj;
+
+ /* maps */
+ s->map_cnt = 1;
+ s->map_skel_sz = sizeof(*s->maps);
+ s->maps = (typeof(s->maps))calloc(s->map_cnt, s->map_skel_sz);
+ if (!s->maps)
+ goto err;
+
+ s->maps[0].name = "iterator.rodata";
+ s->maps[0].map = &obj->maps.rodata;
+ s->maps[0].mmaped = (void **)&obj->rodata;
+
+ /* programs */
+ s->prog_cnt = 2;
+ s->prog_skel_sz = sizeof(*s->progs);
+ s->progs = (typeof(s->progs))calloc(s->prog_cnt, s->prog_skel_sz);
+ if (!s->progs)
+ goto err;
+
+ s->progs[0].name = "dump_bpf_map";
+ s->progs[0].prog = &obj->progs.dump_bpf_map;
+ s->progs[0].link = &obj->links.dump_bpf_map;
+
+ s->progs[1].name = "dump_bpf_prog";
+ s->progs[1].prog = &obj->progs.dump_bpf_prog;
+ s->progs[1].link = &obj->links.dump_bpf_prog;
+
+ s->data_sz = 7128;
+ s->data = (void *)"\
+\x7f\x45\x4c\x46\x02\x01\x01\0\0\0\0\0\0\0\0\0\x01\0\xf7\0\x01\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\x18\x18\0\0\0\0\0\0\0\0\0\0\x40\0\0\0\0\0\x40\0\x0f\0\
+\x0e\0\x79\x12\0\0\0\0\0\0\x79\x26\0\0\0\0\0\0\x79\x17\x08\0\0\0\0\0\x15\x07\
+\x1a\0\0\0\0\0\x79\x21\x10\0\0\0\0\0\x55\x01\x08\0\0\0\0\0\xbf\xa4\0\0\0\0\0\0\
+\x07\x04\0\0\xe8\xff\xff\xff\xbf\x61\0\0\0\0\0\0\x18\x02\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\xb7\x03\0\0\x23\0\0\0\xb7\x05\0\0\0\0\0\0\x85\0\0\0\x7e\0\0\0\x61\x71\0\
+\0\0\0\0\0\x7b\x1a\xe8\xff\0\0\0\0\xb7\x01\0\0\x04\0\0\0\xbf\x72\0\0\0\0\0\0\
+\x0f\x12\0\0\0\0\0\0\x7b\x2a\xf0\xff\0\0\0\0\x61\x71\x14\0\0\0\0\0\x7b\x1a\xf8\
+\xff\0\0\0\0\xbf\xa4\0\0\0\0\0\0\x07\x04\0\0\xe8\xff\xff\xff\xbf\x61\0\0\0\0\0\
+\0\x18\x02\0\0\x23\0\0\0\0\0\0\0\0\0\0\0\xb7\x03\0\0\x0e\0\0\0\xb7\x05\0\0\x18\
+\0\0\0\x85\0\0\0\x7e\0\0\0\xb7\0\0\0\0\0\0\0\x95\0\0\0\0\0\0\0\x79\x12\0\0\0\0\
+\0\0\x79\x26\0\0\0\0\0\0\x79\x11\x08\0\0\0\0\0\x15\x01\x3b\0\0\0\0\0\x79\x17\0\
+\0\0\0\0\0\x79\x21\x10\0\0\0\0\0\x55\x01\x08\0\0\0\0\0\xbf\xa4\0\0\0\0\0\0\x07\
+\x04\0\0\xd0\xff\xff\xff\xbf\x61\0\0\0\0\0\0\x18\x02\0\0\x31\0\0\0\0\0\0\0\0\0\
+\0\0\xb7\x03\0\0\x20\0\0\0\xb7\x05\0\0\0\0\0\0\x85\0\0\0\x7e\0\0\0\x7b\x6a\xc8\
+\xff\0\0\0\0\x61\x71\0\0\0\0\0\0\x7b\x1a\xd0\xff\0\0\0\0\xb7\x03\0\0\x04\0\0\0\
+\xbf\x79\0\0\0\0\0\0\x0f\x39\0\0\0\0\0\0\x79\x71\x28\0\0\0\0\0\x79\x78\x30\0\0\
+\0\0\0\x15\x08\x18\0\0\0\0\0\xb7\x02\0\0\0\0\0\0\x0f\x21\0\0\0\0\0\0\x61\x11\
+\x04\0\0\0\0\0\x79\x83\x08\0\0\0\0\0\x67\x01\0\0\x03\0\0\0\x0f\x13\0\0\0\0\0\0\
+\x79\x86\0\0\0\0\0\0\xbf\xa1\0\0\0\0\0\0\x07\x01\0\0\xf8\xff\xff\xff\xb7\x02\0\
+\0\x08\0\0\0\x85\0\0\0\x71\0\0\0\xb7\x01\0\0\0\0\0\0\x79\xa3\xf8\xff\0\0\0\0\
+\x0f\x13\0\0\0\0\0\0\xbf\xa1\0\0\0\0\0\0\x07\x01\0\0\xf4\xff\xff\xff\xb7\x02\0\
+\0\x04\0\0\0\x85\0\0\0\x04\0\0\0\xb7\x03\0\0\x04\0\0\0\x61\xa1\xf4\xff\0\0\0\0\
+\x61\x82\x10\0\0\0\0\0\x3d\x21\x02\0\0\0\0\0\x0f\x16\0\0\0\0\0\0\xbf\x69\0\0\0\
+\0\0\0\x7b\x9a\xd8\xff\0\0\0\0\x79\x71\x18\0\0\0\0\0\x7b\x1a\xe0\xff\0\0\0\0\
+\x79\x71\x20\0\0\0\0\0\x79\x11\0\0\0\0\0\0\x0f\x31\0\0\0\0\0\0\x7b\x1a\xe8\xff\
+\0\0\0\0\xbf\xa4\0\0\0\0\0\0\x07\x04\0\0\xd0\xff\xff\xff\x79\xa1\xc8\xff\0\0\0\
+\0\x18\x02\0\0\x51\0\0\0\0\0\0\0\0\0\0\0\xb7\x03\0\0\x11\0\0\0\xb7\x05\0\0\x20\
+\0\0\0\x85\0\0\0\x7e\0\0\0\xb7\0\0\0\0\0\0\0\x95\0\0\0\0\0\0\0\x20\x20\x69\x64\
+\x20\x6e\x61\x6d\x65\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x6d\
+\x61\x78\x5f\x65\x6e\x74\x72\x69\x65\x73\x0a\0\x25\x34\x75\x20\x25\x2d\x31\x36\
+\x73\x25\x36\x64\x0a\0\x20\x20\x69\x64\x20\x6e\x61\x6d\x65\x20\x20\x20\x20\x20\
+\x20\x20\x20\x20\x20\x20\x20\x20\x61\x74\x74\x61\x63\x68\x65\x64\x0a\0\x25\x34\
+\x75\x20\x25\x2d\x31\x36\x73\x20\x25\x73\x20\x25\x73\x0a\0\x47\x50\x4c\0\x9f\
+\xeb\x01\0\x18\0\0\0\0\0\0\0\x1c\x04\0\0\x1c\x04\0\0\0\x05\0\0\0\0\0\0\0\0\0\
+\x02\x02\0\0\0\x01\0\0\0\x02\0\0\x04\x10\0\0\0\x13\0\0\0\x03\0\0\0\0\0\0\0\x18\
+\0\0\0\x04\0\0\0\x40\0\0\0\0\0\0\0\0\0\0\x02\x08\0\0\0\0\0\0\0\0\0\0\x02\x0d\0\
+\0\0\0\0\0\0\x01\0\0\x0d\x06\0\0\0\x1c\0\0\0\x01\0\0\0\x20\0\0\0\0\0\0\x01\x04\
+\0\0\0\x20\0\0\x01\x24\0\0\0\x01\0\0\x0c\x05\0\0\0\xa3\0\0\0\x03\0\0\x04\x18\0\
+\0\0\xb1\0\0\0\x09\0\0\0\0\0\0\0\xb5\0\0\0\x0b\0\0\0\x40\0\0\0\xc0\0\0\0\x0b\0\
+\0\0\x80\0\0\0\0\0\0\0\0\0\0\x02\x0a\0\0\0\xc8\0\0\0\0\0\0\x07\0\0\0\0\xd1\0\0\
+\0\0\0\0\x08\x0c\0\0\0\xd7\0\0\0\0\0\0\x01\x08\0\0\0\x40\0\0\0\x98\x01\0\0\x03\
+\0\0\x04\x18\0\0\0\xa0\x01\0\0\x0e\0\0\0\0\0\0\0\xa3\x01\0\0\x11\0\0\0\x20\0\0\
+\0\xa8\x01\0\0\x0e\0\0\0\xa0\0\0\0\xb4\x01\0\0\0\0\0\x08\x0f\0\0\0\xba\x01\0\0\
+\0\0\0\x01\x04\0\0\0\x20\0\0\0\xc7\x01\0\0\0\0\0\x01\x01\0\0\0\x08\0\0\x01\0\0\
+\0\0\0\0\0\x03\0\0\0\0\x10\0\0\0\x12\0\0\0\x10\0\0\0\xcc\x01\0\0\0\0\0\x01\x04\
+\0\0\0\x20\0\0\0\0\0\0\0\0\0\0\x02\x14\0\0\0\x30\x02\0\0\x02\0\0\x04\x10\0\0\0\
+\x13\0\0\0\x03\0\0\0\0\0\0\0\x43\x02\0\0\x15\0\0\0\x40\0\0\0\0\0\0\0\0\0\0\x02\
+\x18\0\0\0\0\0\0\0\x01\0\0\x0d\x06\0\0\0\x1c\0\0\0\x13\0\0\0\x48\x02\0\0\x01\0\
+\0\x0c\x16\0\0\0\x94\x02\0\0\x01\0\0\x04\x08\0\0\0\x9d\x02\0\0\x19\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\x02\x1a\0\0\0\xee\x02\0\0\x06\0\0\x04\x38\0\0\0\xa0\x01\0\0\
+\x0e\0\0\0\0\0\0\0\xa3\x01\0\0\x11\0\0\0\x20\0\0\0\xfb\x02\0\0\x1b\0\0\0\xc0\0\
+\0\0\x0c\x03\0\0\x15\0\0\0\0\x01\0\0\x18\x03\0\0\x1d\0\0\0\x40\x01\0\0\x22\x03\
+\0\0\x1e\0\0\0\x80\x01\0\0\0\0\0\0\0\0\0\x02\x1c\0\0\0\0\0\0\0\0\0\0\x0a\x10\0\
+\0\0\0\0\0\0\0\0\0\x02\x1f\0\0\0\0\0\0\0\0\0\0\x02\x20\0\0\0\x6c\x03\0\0\x02\0\
+\0\x04\x08\0\0\0\x7a\x03\0\0\x0e\0\0\0\0\0\0\0\x83\x03\0\0\x0e\0\0\0\x20\0\0\0\
+\x22\x03\0\0\x03\0\0\x04\x18\0\0\0\x8d\x03\0\0\x1b\0\0\0\0\0\0\0\x95\x03\0\0\
+\x21\0\0\0\x40\0\0\0\x9b\x03\0\0\x23\0\0\0\x80\0\0\0\0\0\0\0\0\0\0\x02\x22\0\0\
+\0\0\0\0\0\0\0\0\x02\x24\0\0\0\x9f\x03\0\0\x01\0\0\x04\x04\0\0\0\xaa\x03\0\0\
+\x0e\0\0\0\0\0\0\0\x13\x04\0\0\x01\0\0\x04\x04\0\0\0\x1c\x04\0\0\x0e\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\x03\0\0\0\0\x1c\0\0\0\x12\0\0\0\x23\0\0\0\x92\x04\0\0\0\0\0\
+\x0e\x25\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x03\0\0\0\0\x1c\0\0\0\x12\0\0\0\x0e\0\0\0\
+\xa6\x04\0\0\0\0\0\x0e\x27\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x03\0\0\0\0\x1c\0\0\0\
+\x12\0\0\0\x20\0\0\0\xbc\x04\0\0\0\0\0\x0e\x29\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x03\
+\0\0\0\0\x1c\0\0\0\x12\0\0\0\x11\0\0\0\xd1\x04\0\0\0\0\0\x0e\x2b\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\x03\0\0\0\0\x10\0\0\0\x12\0\0\0\x04\0\0\0\xe8\x04\0\0\0\0\0\x0e\
+\x2d\0\0\0\x01\0\0\0\xf0\x04\0\0\x04\0\0\x0f\0\0\0\0\x26\0\0\0\0\0\0\0\x23\0\0\
+\0\x28\0\0\0\x23\0\0\0\x0e\0\0\0\x2a\0\0\0\x31\0\0\0\x20\0\0\0\x2c\0\0\0\x51\0\
+\0\0\x11\0\0\0\xf8\x04\0\0\x01\0\0\x0f\0\0\0\0\x2e\0\0\0\0\0\0\0\x04\0\0\0\0\
+\x62\x70\x66\x5f\x69\x74\x65\x72\x5f\x5f\x62\x70\x66\x5f\x6d\x61\x70\0\x6d\x65\
+\x74\x61\0\x6d\x61\x70\0\x63\x74\x78\0\x69\x6e\x74\0\x64\x75\x6d\x70\x5f\x62\
+\x70\x66\x5f\x6d\x61\x70\0\x69\x74\x65\x72\x2f\x62\x70\x66\x5f\x6d\x61\x70\0\
+\x30\x3a\x30\0\x2f\x77\x2f\x6e\x65\x74\x2d\x6e\x65\x78\x74\x2f\x6b\x65\x72\x6e\
+\x65\x6c\x2f\x62\x70\x66\x2f\x70\x72\x65\x6c\x6f\x61\x64\x2f\x69\x74\x65\x72\
+\x61\x74\x6f\x72\x73\x2f\x69\x74\x65\x72\x61\x74\x6f\x72\x73\x2e\x62\x70\x66\
+\x2e\x63\0\x09\x73\x74\x72\x75\x63\x74\x20\x73\x65\x71\x5f\x66\x69\x6c\x65\x20\
+\x2a\x73\x65\x71\x20\x3d\x20\x63\x74\x78\x2d\x3e\x6d\x65\x74\x61\x2d\x3e\x73\
+\x65\x71\x3b\0\x62\x70\x66\x5f\x69\x74\x65\x72\x5f\x6d\x65\x74\x61\0\x73\x65\
+\x71\0\x73\x65\x73\x73\x69\x6f\x6e\x5f\x69\x64\0\x73\x65\x71\x5f\x6e\x75\x6d\0\
+\x73\x65\x71\x5f\x66\x69\x6c\x65\0\x5f\x5f\x75\x36\x34\0\x6c\x6f\x6e\x67\x20\
+\x6c\x6f\x6e\x67\x20\x75\x6e\x73\x69\x67\x6e\x65\x64\x20\x69\x6e\x74\0\x30\x3a\
+\x31\0\x09\x73\x74\x72\x75\x63\x74\x20\x62\x70\x66\x5f\x6d\x61\x70\x20\x2a\x6d\
+\x61\x70\x20\x3d\x20\x63\x74\x78\x2d\x3e\x6d\x61\x70\x3b\0\x09\x69\x66\x20\x28\
+\x21\x6d\x61\x70\x29\0\x30\x3a\x32\0\x09\x5f\x5f\x75\x36\x34\x20\x73\x65\x71\
+\x5f\x6e\x75\x6d\x20\x3d\x20\x63\x74\x78\x2d\x3e\x6d\x65\x74\x61\x2d\x3e\x73\
+\x65\x71\x5f\x6e\x75\x6d\x3b\0\x09\x69\x66\x20\x28\x73\x65\x71\x5f\x6e\x75\x6d\
+\x20\x3d\x3d\x20\x30\x29\0\x09\x09\x42\x50\x46\x5f\x53\x45\x51\x5f\x50\x52\x49\
+\x4e\x54\x46\x28\x73\x65\x71\x2c\x20\x22\x20\x20\x69\x64\x20\x6e\x61\x6d\x65\
+\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x6d\x61\x78\x5f\x65\x6e\
+\x74\x72\x69\x65\x73\x5c\x6e\x22\x29\x3b\0\x62\x70\x66\x5f\x6d\x61\x70\0\x69\
+\x64\0\x6e\x61\x6d\x65\0\x6d\x61\x78\x5f\x65\x6e\x74\x72\x69\x65\x73\0\x5f\x5f\
+\x75\x33\x32\0\x75\x6e\x73\x69\x67\x6e\x65\x64\x20\x69\x6e\x74\0\x63\x68\x61\
+\x72\0\x5f\x5f\x41\x52\x52\x41\x59\x5f\x53\x49\x5a\x45\x5f\x54\x59\x50\x45\x5f\
+\x5f\0\x09\x42\x50\x46\x5f\x53\x45\x51\x5f\x50\x52\x49\x4e\x54\x46\x28\x73\x65\
+\x71\x2c\x20\x22\x25\x34\x75\x20\x25\x2d\x31\x36\x73\x25\x36\x64\x5c\x6e\x22\
+\x2c\x20\x6d\x61\x70\x2d\x3e\x69\x64\x2c\x20\x6d\x61\x70\x2d\x3e\x6e\x61\x6d\
+\x65\x2c\x20\x6d\x61\x70\x2d\x3e\x6d\x61\x78\x5f\x65\x6e\x74\x72\x69\x65\x73\
+\x29\x3b\0\x7d\0\x62\x70\x66\x5f\x69\x74\x65\x72\x5f\x5f\x62\x70\x66\x5f\x70\
+\x72\x6f\x67\0\x70\x72\x6f\x67\0\x64\x75\x6d\x70\x5f\x62\x70\x66\x5f\x70\x72\
+\x6f\x67\0\x69\x74\x65\x72\x2f\x62\x70\x66\x5f\x70\x72\x6f\x67\0\x09\x73\x74\
+\x72\x75\x63\x74\x20\x62\x70\x66\x5f\x70\x72\x6f\x67\x20\x2a\x70\x72\x6f\x67\
+\x20\x3d\x20\x63\x74\x78\x2d\x3e\x70\x72\x6f\x67\x3b\0\x09\x69\x66\x20\x28\x21\
+\x70\x72\x6f\x67\x29\0\x62\x70\x66\x5f\x70\x72\x6f\x67\0\x61\x75\x78\0\x09\x61\
+\x75\x78\x20\x3d\x20\x70\x72\x6f\x67\x2d\x3e\x61\x75\x78\x3b\0\x09\x09\x42\x50\
+\x46\x5f\x53\x45\x51\x5f\x50\x52\x49\x4e\x54\x46\x28\x73\x65\x71\x2c\x20\x22\
+\x20\x20\x69\x64\x20\x6e\x61\x6d\x65\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\
+\x20\x20\x20\x61\x74\x74\x61\x63\x68\x65\x64\x5c\x6e\x22\x29\x3b\0\x62\x70\x66\
+\x5f\x70\x72\x6f\x67\x5f\x61\x75\x78\0\x61\x74\x74\x61\x63\x68\x5f\x66\x75\x6e\
+\x63\x5f\x6e\x61\x6d\x65\0\x6c\x69\x6e\x6b\x65\x64\x5f\x70\x72\x6f\x67\0\x66\
+\x75\x6e\x63\x5f\x69\x6e\x66\x6f\0\x62\x74\x66\0\x09\x42\x50\x46\x5f\x53\x45\
+\x51\x5f\x50\x52\x49\x4e\x54\x46\x28\x73\x65\x71\x2c\x20\x22\x25\x34\x75\x20\
+\x25\x2d\x31\x36\x73\x20\x25\x73\x20\x25\x73\x5c\x6e\x22\x2c\x20\x61\x75\x78\
+\x2d\x3e\x69\x64\x2c\0\x30\x3a\x34\0\x30\x3a\x35\0\x09\x69\x66\x20\x28\x21\x62\
+\x74\x66\x29\0\x62\x70\x66\x5f\x66\x75\x6e\x63\x5f\x69\x6e\x66\x6f\0\x69\x6e\
+\x73\x6e\x5f\x6f\x66\x66\0\x74\x79\x70\x65\x5f\x69\x64\0\x30\0\x73\x74\x72\x69\
+\x6e\x67\x73\0\x74\x79\x70\x65\x73\0\x68\x64\x72\0\x62\x74\x66\x5f\x68\x65\x61\
+\x64\x65\x72\0\x73\x74\x72\x5f\x6c\x65\x6e\0\x09\x74\x79\x70\x65\x73\x20\x3d\
+\x20\x62\x74\x66\x2d\x3e\x74\x79\x70\x65\x73\x3b\0\x09\x62\x70\x66\x5f\x70\x72\
+\x6f\x62\x65\x5f\x72\x65\x61\x64\x5f\x6b\x65\x72\x6e\x65\x6c\x28\x26\x74\x2c\
+\x20\x73\x69\x7a\x65\x6f\x66\x28\x74\x29\x2c\x20\x74\x79\x70\x65\x73\x20\x2b\
+\x20\x62\x74\x66\x5f\x69\x64\x29\x3b\0\x09\x73\x74\x72\x20\x3d\x20\x62\x74\x66\
+\x2d\x3e\x73\x74\x72\x69\x6e\x67\x73\x3b\0\x62\x74\x66\x5f\x74\x79\x70\x65\0\
+\x6e\x61\x6d\x65\x5f\x6f\x66\x66\0\x09\x6e\x61\x6d\x65\x5f\x6f\x66\x66\x20\x3d\
+\x20\x42\x50\x46\x5f\x43\x4f\x52\x45\x5f\x52\x45\x41\x44\x28\x74\x2c\x20\x6e\
+\x61\x6d\x65\x5f\x6f\x66\x66\x29\x3b\0\x30\x3a\x32\x3a\x30\0\x09\x69\x66\x20\
+\x28\x6e\x61\x6d\x65\x5f\x6f\x66\x66\x20\x3e\x3d\x20\x62\x74\x66\x2d\x3e\x68\
+\x64\x72\x2e\x73\x74\x72\x5f\x6c\x65\x6e\x29\0\x09\x72\x65\x74\x75\x72\x6e\x20\
+\x73\x74\x72\x20\x2b\x20\x6e\x61\x6d\x65\x5f\x6f\x66\x66\x3b\0\x30\x3a\x33\0\
+\x64\x75\x6d\x70\x5f\x62\x70\x66\x5f\x6d\x61\x70\x2e\x5f\x5f\x5f\x66\x6d\x74\0\
+\x64\x75\x6d\x70\x5f\x62\x70\x66\x5f\x6d\x61\x70\x2e\x5f\x5f\x5f\x66\x6d\x74\
+\x2e\x31\0\x64\x75\x6d\x70\x5f\x62\x70\x66\x5f\x70\x72\x6f\x67\x2e\x5f\x5f\x5f\
+\x66\x6d\x74\0\x64\x75\x6d\x70\x5f\x62\x70\x66\x5f\x70\x72\x6f\x67\x2e\x5f\x5f\
+\x5f\x66\x6d\x74\x2e\x32\0\x4c\x49\x43\x45\x4e\x53\x45\0\x2e\x72\x6f\x64\x61\
+\x74\x61\0\x6c\x69\x63\x65\x6e\x73\x65\0\x9f\xeb\x01\0\x20\0\0\0\0\0\0\0\x24\0\
+\0\0\x24\0\0\0\x44\x02\0\0\x68\x02\0\0\xa4\x01\0\0\x08\0\0\0\x31\0\0\0\x01\0\0\
+\0\0\0\0\0\x07\0\0\0\x56\x02\0\0\x01\0\0\0\0\0\0\0\x17\0\0\0\x10\0\0\0\x31\0\0\
+\0\x09\0\0\0\0\0\0\0\x42\0\0\0\x7b\0\0\0\x1e\x40\x01\0\x08\0\0\0\x42\0\0\0\x7b\
+\0\0\0\x24\x40\x01\0\x10\0\0\0\x42\0\0\0\xf2\0\0\0\x1d\x48\x01\0\x18\0\0\0\x42\
+\0\0\0\x13\x01\0\0\x06\x50\x01\0\x20\0\0\0\x42\0\0\0\x22\x01\0\0\x1d\x44\x01\0\
+\x28\0\0\0\x42\0\0\0\x47\x01\0\0\x06\x5c\x01\0\x38\0\0\0\x42\0\0\0\x5a\x01\0\0\
+\x03\x60\x01\0\x70\0\0\0\x42\0\0\0\xe0\x01\0\0\x02\x68\x01\0\xf0\0\0\0\x42\0\0\
+\0\x2e\x02\0\0\x01\x70\x01\0\x56\x02\0\0\x1a\0\0\0\0\0\0\0\x42\0\0\0\x7b\0\0\0\
+\x1e\x84\x01\0\x08\0\0\0\x42\0\0\0\x7b\0\0\0\x24\x84\x01\0\x10\0\0\0\x42\0\0\0\
+\x64\x02\0\0\x1f\x8c\x01\0\x18\0\0\0\x42\0\0\0\x88\x02\0\0\x06\x98\x01\0\x20\0\
+\0\0\x42\0\0\0\xa1\x02\0\0\x0e\xa4\x01\0\x28\0\0\0\x42\0\0\0\x22\x01\0\0\x1d\
+\x88\x01\0\x30\0\0\0\x42\0\0\0\x47\x01\0\0\x06\xa8\x01\0\x40\0\0\0\x42\0\0\0\
+\xb3\x02\0\0\x03\xac\x01\0\x80\0\0\0\x42\0\0\0\x26\x03\0\0\x02\xb4\x01\0\xb8\0\
+\0\0\x42\0\0\0\x61\x03\0\0\x06\x08\x01\0\xd0\0\0\0\x42\0\0\0\0\0\0\0\0\0\0\0\
+\xd8\0\0\0\x42\0\0\0\xb2\x03\0\0\x0f\x14\x01\0\xe0\0\0\0\x42\0\0\0\xc7\x03\0\0\
+\x2d\x18\x01\0\xf0\0\0\0\x42\0\0\0\xfe\x03\0\0\x0d\x10\x01\0\0\x01\0\0\x42\0\0\
+\0\0\0\0\0\0\0\0\0\x08\x01\0\0\x42\0\0\0\xc7\x03\0\0\x02\x18\x01\0\x20\x01\0\0\
+\x42\0\0\0\x25\x04\0\0\x0d\x1c\x01\0\x38\x01\0\0\x42\0\0\0\0\0\0\0\0\0\0\0\x40\
+\x01\0\0\x42\0\0\0\x25\x04\0\0\x0d\x1c\x01\0\x58\x01\0\0\x42\0\0\0\x25\x04\0\0\
+\x0d\x1c\x01\0\x60\x01\0\0\x42\0\0\0\x53\x04\0\0\x1b\x20\x01\0\x68\x01\0\0\x42\
+\0\0\0\x53\x04\0\0\x06\x20\x01\0\x70\x01\0\0\x42\0\0\0\x76\x04\0\0\x0d\x28\x01\
+\0\x78\x01\0\0\x42\0\0\0\0\0\0\0\0\0\0\0\x80\x01\0\0\x42\0\0\0\x26\x03\0\0\x02\
+\xb4\x01\0\xf8\x01\0\0\x42\0\0\0\x2e\x02\0\0\x01\xc4\x01\0\x10\0\0\0\x31\0\0\0\
+\x07\0\0\0\0\0\0\0\x02\0\0\0\x3e\0\0\0\0\0\0\0\x08\0\0\0\x08\0\0\0\x3e\0\0\0\0\
+\0\0\0\x10\0\0\0\x02\0\0\0\xee\0\0\0\0\0\0\0\x20\0\0\0\x08\0\0\0\x1e\x01\0\0\0\
+\0\0\0\x70\0\0\0\x0d\0\0\0\x3e\0\0\0\0\0\0\0\x80\0\0\0\x0d\0\0\0\xee\0\0\0\0\0\
+\0\0\xa0\0\0\0\x0d\0\0\0\x1e\x01\0\0\0\0\0\0\x56\x02\0\0\x12\0\0\0\0\0\0\0\x14\
+\0\0\0\x3e\0\0\0\0\0\0\0\x08\0\0\0\x08\0\0\0\x3e\0\0\0\0\0\0\0\x10\0\0\0\x14\0\
+\0\0\xee\0\0\0\0\0\0\0\x20\0\0\0\x18\0\0\0\x3e\0\0\0\0\0\0\0\x28\0\0\0\x08\0\0\
+\0\x1e\x01\0\0\0\0\0\0\x80\0\0\0\x1a\0\0\0\x3e\0\0\0\0\0\0\0\x90\0\0\0\x1a\0\0\
+\0\xee\0\0\0\0\0\0\0\xa8\0\0\0\x1a\0\0\0\x59\x03\0\0\0\0\0\0\xb0\0\0\0\x1a\0\0\
+\0\x5d\x03\0\0\0\0\0\0\xc0\0\0\0\x1f\0\0\0\x8b\x03\0\0\0\0\0\0\xd8\0\0\0\x20\0\
+\0\0\xee\0\0\0\0\0\0\0\xf0\0\0\0\x20\0\0\0\x3e\0\0\0\0\0\0\0\x18\x01\0\0\x24\0\
+\0\0\x3e\0\0\0\0\0\0\0\x50\x01\0\0\x1a\0\0\0\xee\0\0\0\0\0\0\0\x60\x01\0\0\x20\
+\0\0\0\x4d\x04\0\0\0\0\0\0\x88\x01\0\0\x1a\0\0\0\x1e\x01\0\0\0\0\0\0\x98\x01\0\
+\0\x1a\0\0\0\x8e\x04\0\0\0\0\0\0\xa0\x01\0\0\x18\0\0\0\x3e\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xd6\0\0\0\0\0\x02\0\x70\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\xc8\0\0\0\0\0\x02\0\xf0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\xcf\0\0\0\0\0\x03\0\x78\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xc1\0\0\0\0\0\x03\0\x80\
+\x01\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xba\0\0\0\0\0\x03\0\xf8\x01\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\x14\0\0\0\x01\0\x04\0\0\0\0\0\0\0\0\0\x23\0\0\0\0\0\0\0\xf4\0\0\0\
+\x01\0\x04\0\x23\0\0\0\0\0\0\0\x0e\0\0\0\0\0\0\0\x28\0\0\0\x01\0\x04\0\x31\0\0\
+\0\0\0\0\0\x20\0\0\0\0\0\0\0\xdd\0\0\0\x01\0\x04\0\x51\0\0\0\0\0\0\0\x11\0\0\0\
+\0\0\0\0\0\0\0\0\x03\0\x02\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x03\0\x03\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x03\0\x04\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\xb2\0\0\0\x11\0\x05\0\0\0\0\0\0\0\0\0\x04\0\0\0\0\0\0\0\x3d\0\0\0\x12\
+\0\x02\0\0\0\0\0\0\0\0\0\0\x01\0\0\0\0\0\0\x5b\0\0\0\x12\0\x03\0\0\0\0\0\0\0\0\
+\0\x08\x02\0\0\0\0\0\0\x48\0\0\0\0\0\0\0\x01\0\0\0\x0c\0\0\0\xc8\0\0\0\0\0\0\0\
+\x01\0\0\0\x0c\0\0\0\x50\0\0\0\0\0\0\0\x01\0\0\0\x0c\0\0\0\xd0\x01\0\0\0\0\0\0\
+\x01\0\0\0\x0c\0\0\0\xf0\x03\0\0\0\0\0\0\x0a\0\0\0\x0c\0\0\0\xfc\x03\0\0\0\0\0\
+\0\x0a\0\0\0\x0c\0\0\0\x08\x04\0\0\0\0\0\0\x0a\0\0\0\x0c\0\0\0\x14\x04\0\0\0\0\
+\0\0\x0a\0\0\0\x0c\0\0\0\x2c\x04\0\0\0\0\0\0\0\0\0\0\x0d\0\0\0\x2c\0\0\0\0\0\0\
+\0\0\0\0\0\x0a\0\0\0\x3c\0\0\0\0\0\0\0\0\0\0\0\x0b\0\0\0\x50\0\0\0\0\0\0\0\0\0\
+\0\0\x0a\0\0\0\x60\0\0\0\0\0\0\0\0\0\0\0\x0a\0\0\0\x70\0\0\0\0\0\0\0\0\0\0\0\
+\x0a\0\0\0\x80\0\0\0\0\0\0\0\0\0\0\0\x0a\0\0\0\x90\0\0\0\0\0\0\0\0\0\0\0\x0a\0\
+\0\0\xa0\0\0\0\0\0\0\0\0\0\0\0\x0a\0\0\0\xb0\0\0\0\0\0\0\0\0\0\0\0\x0a\0\0\0\
+\xc0\0\0\0\0\0\0\0\0\0\0\0\x0a\0\0\0\xd0\0\0\0\0\0\0\0\0\0\0\0\x0a\0\0\0\xe8\0\
+\0\0\0\0\0\0\0\0\0\0\x0b\0\0\0\xf8\0\0\0\0\0\0\0\0\0\0\0\x0b\0\0\0\x08\x01\0\0\
+\0\0\0\0\0\0\0\0\x0b\0\0\0\x18\x01\0\0\0\0\0\0\0\0\0\0\x0b\0\0\0\x28\x01\0\0\0\
+\0\0\0\0\0\0\0\x0b\0\0\0\x38\x01\0\0\0\0\0\0\0\0\0\0\x0b\0\0\0\x48\x01\0\0\0\0\
+\0\0\0\0\0\0\x0b\0\0\0\x58\x01\0\0\0\0\0\0\0\0\0\0\x0b\0\0\0\x68\x01\0\0\0\0\0\
+\0\0\0\0\0\x0b\0\0\0\x78\x01\0\0\0\0\0\0\0\0\0\0\x0b\0\0\0\x88\x01\0\0\0\0\0\0\
+\0\0\0\0\x0b\0\0\0\x98\x01\0\0\0\0\0\0\0\0\0\0\x0b\0\0\0\xa8\x01\0\0\0\0\0\0\0\
+\0\0\0\x0b\0\0\0\xb8\x01\0\0\0\0\0\0\0\0\0\0\x0b\0\0\0\xc8\x01\0\0\0\0\0\0\0\0\
+\0\0\x0b\0\0\0\xd8\x01\0\0\0\0\0\0\0\0\0\0\x0b\0\0\0\xe8\x01\0\0\0\0\0\0\0\0\0\
+\0\x0b\0\0\0\xf8\x01\0\0\0\0\0\0\0\0\0\0\x0b\0\0\0\x08\x02\0\0\0\0\0\0\0\0\0\0\
+\x0b\0\0\0\x18\x02\0\0\0\0\0\0\0\0\0\0\x0b\0\0\0\x28\x02\0\0\0\0\0\0\0\0\0\0\
+\x0b\0\0\0\x38\x02\0\0\0\0\0\0\0\0\0\0\x0b\0\0\0\x48\x02\0\0\0\0\0\0\0\0\0\0\
+\x0b\0\0\0\x58\x02\0\0\0\0\0\0\0\0\0\0\x0b\0\0\0\x68\x02\0\0\0\0\0\0\0\0\0\0\
+\x0b\0\0\0\x78\x02\0\0\0\0\0\0\0\0\0\0\x0b\0\0\0\x94\x02\0\0\0\0\0\0\0\0\0\0\
+\x0a\0\0\0\xa4\x02\0\0\0\0\0\0\0\0\0\0\x0a\0\0\0\xb4\x02\0\0\0\0\0\0\0\0\0\0\
+\x0a\0\0\0\xc4\x02\0\0\0\0\0\0\0\0\0\0\x0a\0\0\0\xd4\x02\0\0\0\0\0\0\0\0\0\0\
+\x0a\0\0\0\xe4\x02\0\0\0\0\0\0\0\0\0\0\x0a\0\0\0\xf4\x02\0\0\0\0\0\0\0\0\0\0\
+\x0a\0\0\0\x0c\x03\0\0\0\0\0\0\0\0\0\0\x0b\0\0\0\x1c\x03\0\0\0\0\0\0\0\0\0\0\
+\x0b\0\0\0\x2c\x03\0\0\0\0\0\0\0\0\0\0\x0b\0\0\0\x3c\x03\0\0\0\0\0\0\0\0\0\0\
+\x0b\0\0\0\x4c\x03\0\0\0\0\0\0\0\0\0\0\x0b\0\0\0\x5c\x03\0\0\0\0\0\0\0\0\0\0\
+\x0b\0\0\0\x6c\x03\0\0\0\0\0\0\0\0\0\0\x0b\0\0\0\x7c\x03\0\0\0\0\0\0\0\0\0\0\
+\x0b\0\0\0\x8c\x03\0\0\0\0\0\0\0\0\0\0\x0b\0\0\0\x9c\x03\0\0\0\0\0\0\0\0\0\0\
+\x0b\0\0\0\xac\x03\0\0\0\0\0\0\0\0\0\0\x0b\0\0\0\xbc\x03\0\0\0\0\0\0\0\0\0\0\
+\x0b\0\0\0\xcc\x03\0\0\0\0\0\0\0\0\0\0\x0b\0\0\0\xdc\x03\0\0\0\0\0\0\0\0\0\0\
+\x0b\0\0\0\xec\x03\0\0\0\0\0\0\0\0\0\0\x0b\0\0\0\xfc\x03\0\0\0\0\0\0\0\0\0\0\
+\x0b\0\0\0\x0c\x04\0\0\0\0\0\0\0\0\0\0\x0b\0\0\0\x1c\x04\0\0\0\0\0\0\0\0\0\0\
+\x0b\0\0\0\x4e\x4f\x41\x42\x43\x44\x4d\0\x2e\x74\x65\x78\x74\0\x2e\x72\x65\x6c\
+\x2e\x42\x54\x46\x2e\x65\x78\x74\0\x64\x75\x6d\x70\x5f\x62\x70\x66\x5f\x6d\x61\
+\x70\x2e\x5f\x5f\x5f\x66\x6d\x74\0\x64\x75\x6d\x70\x5f\x62\x70\x66\x5f\x70\x72\
+\x6f\x67\x2e\x5f\x5f\x5f\x66\x6d\x74\0\x64\x75\x6d\x70\x5f\x62\x70\x66\x5f\x6d\
+\x61\x70\0\x2e\x72\x65\x6c\x69\x74\x65\x72\x2f\x62\x70\x66\x5f\x6d\x61\x70\0\
+\x64\x75\x6d\x70\x5f\x62\x70\x66\x5f\x70\x72\x6f\x67\0\x2e\x72\x65\x6c\x69\x74\
+\x65\x72\x2f\x62\x70\x66\x5f\x70\x72\x6f\x67\0\x2e\x6c\x6c\x76\x6d\x5f\x61\x64\
+\x64\x72\x73\x69\x67\0\x6c\x69\x63\x65\x6e\x73\x65\0\x2e\x73\x74\x72\x74\x61\
+\x62\0\x2e\x73\x79\x6d\x74\x61\x62\0\x2e\x72\x6f\x64\x61\x74\x61\0\x2e\x72\x65\
+\x6c\x2e\x42\x54\x46\0\x4c\x49\x43\x45\x4e\x53\x45\0\x4c\x42\x42\x31\x5f\x37\0\
+\x4c\x42\x42\x31\x5f\x36\0\x4c\x42\x42\x30\x5f\x34\0\x4c\x42\x42\x31\x5f\x33\0\
+\x4c\x42\x42\x30\x5f\x33\0\x64\x75\x6d\x70\x5f\x62\x70\x66\x5f\x70\x72\x6f\x67\
+\x2e\x5f\x5f\x5f\x66\x6d\x74\x2e\x32\0\x64\x75\x6d\x70\x5f\x62\x70\x66\x5f\x6d\
+\x61\x70\x2e\x5f\x5f\x5f\x66\x6d\x74\x2e\x31\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x01\0\0\0\x01\0\0\0\x06\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\x40\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x04\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\x4e\0\0\0\x01\0\0\0\x06\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x40\0\0\0\
+\0\0\0\0\0\x01\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x08\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\x6d\0\0\0\x01\0\0\0\x06\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x40\x01\0\0\0\0\0\0\x08\
+\x02\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x08\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xa1\0\0\0\
+\x01\0\0\0\x02\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x48\x03\0\0\0\0\0\0\x62\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\x01\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x89\0\0\0\x01\0\0\0\x03\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xaa\x03\0\0\0\0\0\0\x04\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\x01\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xad\0\0\0\x01\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\xae\x03\0\0\0\0\0\0\x34\x09\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x01\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\x0b\0\0\0\x01\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\xe2\x0c\0\0\0\0\0\0\x2c\x04\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x01\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\x99\0\0\0\x02\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x10\x11\0\0\0\
+\0\0\0\x80\x01\0\0\0\0\0\0\x0e\0\0\0\x0d\0\0\0\x08\0\0\0\0\0\0\0\x18\0\0\0\0\0\
+\0\0\x4a\0\0\0\x09\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x90\x12\0\0\0\0\0\0\
+\x20\0\0\0\0\0\0\0\x08\0\0\0\x02\0\0\0\x08\0\0\0\0\0\0\0\x10\0\0\0\0\0\0\0\x69\
+\0\0\0\x09\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xb0\x12\0\0\0\0\0\0\x20\0\0\0\
+\0\0\0\0\x08\0\0\0\x03\0\0\0\x08\0\0\0\0\0\0\0\x10\0\0\0\0\0\0\0\xa9\0\0\0\x09\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xd0\x12\0\0\0\0\0\0\x50\0\0\0\0\0\0\0\
+\x08\0\0\0\x06\0\0\0\x08\0\0\0\0\0\0\0\x10\0\0\0\0\0\0\0\x07\0\0\0\x09\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x20\x13\0\0\0\0\0\0\xe0\x03\0\0\0\0\0\0\x08\0\0\
+\0\x07\0\0\0\x08\0\0\0\0\0\0\0\x10\0\0\0\0\0\0\0\x7b\0\0\0\x03\x4c\xff\x6f\0\0\
+\0\x80\0\0\0\0\0\0\0\0\0\0\0\0\0\x17\0\0\0\0\0\0\x07\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\x01\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x91\0\0\0\x03\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\x07\x17\0\0\0\0\0\0\x0a\x01\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x01\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0";
+
+ return 0;
+err:
+ bpf_object__destroy_skeleton(s);
+ return -1;
+}
+
+#endif /* __ITERATORS_BPF_SKEL_H__ */
diff --git a/kernel/bpf/queue_stack_maps.c b/kernel/bpf/queue_stack_maps.c
index 44184f8..0ee2347 100644
--- a/kernel/bpf/queue_stack_maps.c
+++ b/kernel/bpf/queue_stack_maps.c
@@ -257,6 +257,7 @@ static int queue_stack_map_get_next_key(struct bpf_map *map, void *key,
static int queue_map_btf_id;
const struct bpf_map_ops queue_map_ops = {
+ .map_meta_equal = bpf_map_meta_equal,
.map_alloc_check = queue_stack_map_alloc_check,
.map_alloc = queue_stack_map_alloc,
.map_free = queue_stack_map_free,
@@ -273,6 +274,7 @@ const struct bpf_map_ops queue_map_ops = {
static int stack_map_btf_id;
const struct bpf_map_ops stack_map_ops = {
+ .map_meta_equal = bpf_map_meta_equal,
.map_alloc_check = queue_stack_map_alloc_check,
.map_alloc = queue_stack_map_alloc,
.map_free = queue_stack_map_free,
diff --git a/kernel/bpf/reuseport_array.c b/kernel/bpf/reuseport_array.c
index 90b29c5..5a2ba11 100644
--- a/kernel/bpf/reuseport_array.c
+++ b/kernel/bpf/reuseport_array.c
@@ -351,6 +351,7 @@ static int reuseport_array_get_next_key(struct bpf_map *map, void *key,
static int reuseport_array_map_btf_id;
const struct bpf_map_ops reuseport_array_ops = {
+ .map_meta_equal = bpf_map_meta_equal,
.map_alloc_check = reuseport_array_alloc_check,
.map_alloc = reuseport_array_alloc,
.map_free = reuseport_array_free,
diff --git a/kernel/bpf/ringbuf.c b/kernel/bpf/ringbuf.c
index 002f8a5..31cb04a 100644
--- a/kernel/bpf/ringbuf.c
+++ b/kernel/bpf/ringbuf.c
@@ -287,6 +287,7 @@ static __poll_t ringbuf_map_poll(struct bpf_map *map, struct file *filp,
static int ringbuf_map_btf_id;
const struct bpf_map_ops ringbuf_map_ops = {
+ .map_meta_equal = bpf_map_meta_equal,
.map_alloc = ringbuf_map_alloc,
.map_free = ringbuf_map_free,
.map_mmap = ringbuf_map_mmap,
diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
index cfed0ac..a2fa006 100644
--- a/kernel/bpf/stackmap.c
+++ b/kernel/bpf/stackmap.c
@@ -839,6 +839,7 @@ static void stack_map_free(struct bpf_map *map)
static int stack_trace_map_btf_id;
const struct bpf_map_ops stack_trace_map_ops = {
+ .map_meta_equal = bpf_map_meta_equal,
.map_alloc = stack_map_alloc,
.map_free = stack_map_free,
.map_get_next_key = stack_map_get_next_key,
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index b999e7f..178c147 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -29,6 +29,7 @@
#include <linux/bpf_lsm.h>
#include <linux/poll.h>
#include <linux/bpf-netns.h>
+#include <linux/rcupdate_trace.h>
#define IS_FD_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || \
(map)->map_type == BPF_MAP_TYPE_CGROUP_ARRAY || \
@@ -90,6 +91,7 @@ int bpf_check_uarg_tail_zero(void __user *uaddr,
}
const struct bpf_map_ops bpf_map_offload_ops = {
+ .map_meta_equal = bpf_map_meta_equal,
.map_alloc = bpf_map_offload_map_alloc,
.map_free = bpf_map_offload_map_free,
.map_check_btf = map_check_no_btf,
@@ -157,10 +159,11 @@ static int bpf_map_update_value(struct bpf_map *map, struct fd f, void *key,
if (bpf_map_is_dev_bound(map)) {
return bpf_map_offload_update_elem(map, key, value, flags);
} else if (map->map_type == BPF_MAP_TYPE_CPUMAP ||
- map->map_type == BPF_MAP_TYPE_SOCKHASH ||
- map->map_type == BPF_MAP_TYPE_SOCKMAP ||
map->map_type == BPF_MAP_TYPE_STRUCT_OPS) {
return map->ops->map_update_elem(map, key, value, flags);
+ } else if (map->map_type == BPF_MAP_TYPE_SOCKHASH ||
+ map->map_type == BPF_MAP_TYPE_SOCKMAP) {
+ return sock_map_update_elem_sys(map, key, value, flags);
} else if (IS_FD_PROG_ARRAY(map)) {
return bpf_fd_array_map_update_elem(map, f.file, key, value,
flags);
@@ -768,7 +771,8 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf,
if (map->map_type != BPF_MAP_TYPE_HASH &&
map->map_type != BPF_MAP_TYPE_ARRAY &&
map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE &&
- map->map_type != BPF_MAP_TYPE_SK_STORAGE)
+ map->map_type != BPF_MAP_TYPE_SK_STORAGE &&
+ map->map_type != BPF_MAP_TYPE_INODE_STORAGE)
return -ENOTSUPP;
if (map->spin_lock_off + sizeof(struct bpf_spin_lock) >
map->value_size) {
@@ -1728,10 +1732,14 @@ static void __bpf_prog_put_noref(struct bpf_prog *prog, bool deferred)
btf_put(prog->aux->btf);
bpf_prog_free_linfo(prog);
- if (deferred)
- call_rcu(&prog->aux->rcu, __bpf_prog_put_rcu);
- else
+ if (deferred) {
+ if (prog->aux->sleepable)
+ call_rcu_tasks_trace(&prog->aux->rcu, __bpf_prog_put_rcu);
+ else
+ call_rcu(&prog->aux->rcu, __bpf_prog_put_rcu);
+ } else {
__bpf_prog_put_rcu(&prog->aux->rcu);
+ }
}
static void __bpf_prog_put(struct bpf_prog *prog, bool do_idr_lock)
@@ -2101,6 +2109,7 @@ static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr)
if (attr->prog_flags & ~(BPF_F_STRICT_ALIGNMENT |
BPF_F_ANY_ALIGNMENT |
BPF_F_TEST_STATE_FREQ |
+ BPF_F_SLEEPABLE |
BPF_F_TEST_RND_HI32))
return -EINVAL;
@@ -2156,6 +2165,7 @@ static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr)
}
prog->aux->offload_requested = !!attr->prog_ifindex;
+ prog->aux->sleepable = attr->prog_flags & BPF_F_SLEEPABLE;
err = security_bpf_prog_alloc(prog->aux);
if (err)
@@ -4014,9 +4024,31 @@ static int link_detach(union bpf_attr *attr)
return ret;
}
-static int bpf_link_inc_not_zero(struct bpf_link *link)
+static struct bpf_link *bpf_link_inc_not_zero(struct bpf_link *link)
{
- return atomic64_fetch_add_unless(&link->refcnt, 1, 0) ? 0 : -ENOENT;
+ return atomic64_fetch_add_unless(&link->refcnt, 1, 0) ? link : ERR_PTR(-ENOENT);
+}
+
+struct bpf_link *bpf_link_by_id(u32 id)
+{
+ struct bpf_link *link;
+
+ if (!id)
+ return ERR_PTR(-ENOENT);
+
+ spin_lock_bh(&link_idr_lock);
+ /* before link is "settled", ID is 0, pretend it doesn't exist yet */
+ link = idr_find(&link_idr, id);
+ if (link) {
+ if (link->id)
+ link = bpf_link_inc_not_zero(link);
+ else
+ link = ERR_PTR(-EAGAIN);
+ } else {
+ link = ERR_PTR(-ENOENT);
+ }
+ spin_unlock_bh(&link_idr_lock);
+ return link;
}
#define BPF_LINK_GET_FD_BY_ID_LAST_FIELD link_id
@@ -4025,7 +4057,7 @@ static int bpf_link_get_fd_by_id(const union bpf_attr *attr)
{
struct bpf_link *link;
u32 id = attr->link_id;
- int fd, err;
+ int fd;
if (CHECK_ATTR(BPF_LINK_GET_FD_BY_ID))
return -EINVAL;
@@ -4033,21 +4065,9 @@ static int bpf_link_get_fd_by_id(const union bpf_attr *attr)
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
- spin_lock_bh(&link_idr_lock);
- link = idr_find(&link_idr, id);
- /* before link is "settled", ID is 0, pretend it doesn't exist yet */
- if (link) {
- if (link->id)
- err = bpf_link_inc_not_zero(link);
- else
- err = -EAGAIN;
- } else {
- err = -ENOENT;
- }
- spin_unlock_bh(&link_idr_lock);
-
- if (err)
- return err;
+ link = bpf_link_by_id(id);
+ if (IS_ERR(link))
+ return PTR_ERR(link);
fd = bpf_link_new_fd(link);
if (fd < 0)
diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c
index 9be85aa..7dd523a 100644
--- a/kernel/bpf/trampoline.c
+++ b/kernel/bpf/trampoline.c
@@ -7,6 +7,8 @@
#include <linux/rbtree_latch.h>
#include <linux/perf_event.h>
#include <linux/btf.h>
+#include <linux/rcupdate_trace.h>
+#include <linux/rcupdate_wait.h>
/* dummy _ops. The verifier will operate on target program's ops. */
const struct bpf_verifier_ops bpf_extension_verifier_ops = {
@@ -210,9 +212,12 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr)
* updates to trampoline would change the code from underneath the
* preempted task. Hence wait for tasks to voluntarily schedule or go
* to userspace.
+ * The same trampoline can hold both sleepable and non-sleepable progs.
+ * synchronize_rcu_tasks_trace() is needed to make sure all sleepable
+ * programs finish executing.
+ * Wait for these two grace periods together.
*/
-
- synchronize_rcu_tasks();
+ synchronize_rcu_mult(call_rcu_tasks, call_rcu_tasks_trace);
err = arch_prepare_bpf_trampoline(new_image, new_image + PAGE_SIZE / 2,
&tr->func.model, flags, tprogs,
@@ -344,7 +349,14 @@ void bpf_trampoline_put(struct bpf_trampoline *tr)
if (WARN_ON_ONCE(!hlist_empty(&tr->progs_hlist[BPF_TRAMP_FEXIT])))
goto out;
bpf_image_ksym_del(&tr->ksym);
- /* wait for tasks to get out of trampoline before freeing it */
+ /* This code will be executed when all bpf progs (both sleepable and
+ * non-sleepable) went through
+ * bpf_prog_put()->call_rcu[_tasks_trace]()->bpf_prog_free_deferred().
+ * Hence no need for another synchronize_rcu_tasks_trace() here,
+ * but synchronize_rcu_tasks() is still needed, since trampoline
+ * may not have had any sleepable programs and we need to wait
+ * for tasks to get out of trampoline code before freeing it.
+ */
synchronize_rcu_tasks();
bpf_jit_free_exec(tr->image);
hlist_del(&tr->hlist);
@@ -394,6 +406,17 @@ void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start)
rcu_read_unlock();
}
+void notrace __bpf_prog_enter_sleepable(void)
+{
+ rcu_read_lock_trace();
+ might_fault();
+}
+
+void notrace __bpf_prog_exit_sleepable(void)
+{
+ rcu_read_unlock_trace();
+}
+
int __weak
arch_prepare_bpf_trampoline(void *image, void *image_end,
const struct btf_func_model *m, u32 flags,
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 47e74f09..86fdebb 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -21,6 +21,7 @@
#include <linux/ctype.h>
#include <linux/error-injection.h>
#include <linux/bpf_lsm.h>
+#include <linux/btf_ids.h>
#include "disasm.h"
@@ -2625,11 +2626,19 @@ static int check_map_access(struct bpf_verifier_env *env, u32 regno,
#define MAX_PACKET_OFF 0xffff
+static enum bpf_prog_type resolve_prog_type(struct bpf_prog *prog)
+{
+ return prog->aux->linked_prog ? prog->aux->linked_prog->type
+ : prog->type;
+}
+
static bool may_access_direct_pkt_data(struct bpf_verifier_env *env,
const struct bpf_call_arg_meta *meta,
enum bpf_access_type t)
{
- switch (env->prog->type) {
+ enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
+
+ switch (prog_type) {
/* Program types only with direct read access go here! */
case BPF_PROG_TYPE_LWT_IN:
case BPF_PROG_TYPE_LWT_OUT:
@@ -3872,6 +3881,33 @@ static int int_ptr_type_to_size(enum bpf_arg_type type)
return -EINVAL;
}
+static int resolve_map_arg_type(struct bpf_verifier_env *env,
+ const struct bpf_call_arg_meta *meta,
+ enum bpf_arg_type *arg_type)
+{
+ if (!meta->map_ptr) {
+ /* kernel subsystem misconfigured verifier */
+ verbose(env, "invalid map_ptr to access map->type\n");
+ return -EACCES;
+ }
+
+ switch (meta->map_ptr->map_type) {
+ case BPF_MAP_TYPE_SOCKMAP:
+ case BPF_MAP_TYPE_SOCKHASH:
+ if (*arg_type == ARG_PTR_TO_MAP_VALUE) {
+ *arg_type = ARG_PTR_TO_SOCKET;
+ } else {
+ verbose(env, "invalid arg_type for sockmap/sockhash\n");
+ return -EINVAL;
+ }
+ break;
+
+ default:
+ break;
+ }
+ return 0;
+}
+
static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
struct bpf_call_arg_meta *meta,
const struct bpf_func_proto *fn)
@@ -3904,6 +3940,14 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
return -EACCES;
}
+ if (arg_type == ARG_PTR_TO_MAP_VALUE ||
+ arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE ||
+ arg_type == ARG_PTR_TO_MAP_VALUE_OR_NULL) {
+ err = resolve_map_arg_type(env, meta, &arg_type);
+ if (err)
+ return err;
+ }
+
if (arg_type == ARG_PTR_TO_MAP_KEY ||
arg_type == ARG_PTR_TO_MAP_VALUE ||
arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE ||
@@ -3960,16 +4004,21 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
goto err_type;
}
} else if (arg_type == ARG_PTR_TO_BTF_ID) {
+ bool ids_match = false;
+
expected_type = PTR_TO_BTF_ID;
if (type != expected_type)
goto err_type;
if (!fn->check_btf_id) {
if (reg->btf_id != meta->btf_id) {
- verbose(env, "Helper has type %s got %s in R%d\n",
- kernel_type_name(meta->btf_id),
- kernel_type_name(reg->btf_id), regno);
-
- return -EACCES;
+ ids_match = btf_struct_ids_match(&env->log, reg->off, reg->btf_id,
+ meta->btf_id);
+ if (!ids_match) {
+ verbose(env, "Helper has type %s got %s in R%d\n",
+ kernel_type_name(meta->btf_id),
+ kernel_type_name(reg->btf_id), regno);
+ return -EACCES;
+ }
}
} else if (!fn->check_btf_id(reg->btf_id, arg)) {
verbose(env, "Helper does not support %s in R%d\n",
@@ -3977,7 +4026,7 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
return -EACCES;
}
- if (!tnum_is_const(reg->var_off) || reg->var_off.value || reg->off) {
+ if ((reg->off && !ids_match) || !tnum_is_const(reg->var_off) || reg->var_off.value) {
verbose(env, "R%d is a pointer to in-kernel struct with non-zero offset\n",
regno);
return -EACCES;
@@ -4143,6 +4192,38 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
return -EACCES;
}
+static bool may_update_sockmap(struct bpf_verifier_env *env, int func_id)
+{
+ enum bpf_attach_type eatype = env->prog->expected_attach_type;
+ enum bpf_prog_type type = resolve_prog_type(env->prog);
+
+ if (func_id != BPF_FUNC_map_update_elem)
+ return false;
+
+ /* It's not possible to get access to a locked struct sock in these
+ * contexts, so updating is safe.
+ */
+ switch (type) {
+ case BPF_PROG_TYPE_TRACING:
+ if (eatype == BPF_TRACE_ITER)
+ return true;
+ break;
+ case BPF_PROG_TYPE_SOCKET_FILTER:
+ case BPF_PROG_TYPE_SCHED_CLS:
+ case BPF_PROG_TYPE_SCHED_ACT:
+ case BPF_PROG_TYPE_XDP:
+ case BPF_PROG_TYPE_SK_REUSEPORT:
+ case BPF_PROG_TYPE_FLOW_DISSECTOR:
+ case BPF_PROG_TYPE_SK_LOOKUP:
+ return true;
+ default:
+ break;
+ }
+
+ verbose(env, "cannot update sockmap in this context\n");
+ return false;
+}
+
static int check_map_func_compatibility(struct bpf_verifier_env *env,
struct bpf_map *map, int func_id)
{
@@ -4214,7 +4295,8 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
func_id != BPF_FUNC_map_delete_elem &&
func_id != BPF_FUNC_msg_redirect_map &&
func_id != BPF_FUNC_sk_select_reuseport &&
- func_id != BPF_FUNC_map_lookup_elem)
+ func_id != BPF_FUNC_map_lookup_elem &&
+ !may_update_sockmap(env, func_id))
goto error;
break;
case BPF_MAP_TYPE_SOCKHASH:
@@ -4223,7 +4305,8 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
func_id != BPF_FUNC_map_delete_elem &&
func_id != BPF_FUNC_msg_redirect_hash &&
func_id != BPF_FUNC_sk_select_reuseport &&
- func_id != BPF_FUNC_map_lookup_elem)
+ func_id != BPF_FUNC_map_lookup_elem &&
+ !may_update_sockmap(env, func_id))
goto error;
break;
case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY:
@@ -4242,6 +4325,11 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
func_id != BPF_FUNC_sk_storage_delete)
goto error;
break;
+ case BPF_MAP_TYPE_INODE_STORAGE:
+ if (func_id != BPF_FUNC_inode_storage_get &&
+ func_id != BPF_FUNC_inode_storage_delete)
+ goto error;
+ break;
default:
break;
}
@@ -4315,6 +4403,11 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
if (map->map_type != BPF_MAP_TYPE_SK_STORAGE)
goto error;
break;
+ case BPF_FUNC_inode_storage_get:
+ case BPF_FUNC_inode_storage_delete:
+ if (map->map_type != BPF_MAP_TYPE_INODE_STORAGE)
+ goto error;
+ break;
default:
break;
}
@@ -4775,6 +4868,11 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
return -EINVAL;
}
+ if (fn->allowed && !fn->allowed(env->prog)) {
+ verbose(env, "helper call is not allowed in probe\n");
+ return -EINVAL;
+ }
+
/* With LD_ABS/IND some JITs save/restore skb from r1. */
changes_data = bpf_helper_changes_pkt_data(fn->func);
if (changes_data && fn->arg1_type != ARG_PTR_TO_CTX) {
@@ -5732,6 +5830,67 @@ static void scalar_min_max_or(struct bpf_reg_state *dst_reg,
__update_reg_bounds(dst_reg);
}
+static void scalar32_min_max_xor(struct bpf_reg_state *dst_reg,
+ struct bpf_reg_state *src_reg)
+{
+ bool src_known = tnum_subreg_is_const(src_reg->var_off);
+ bool dst_known = tnum_subreg_is_const(dst_reg->var_off);
+ struct tnum var32_off = tnum_subreg(dst_reg->var_off);
+ s32 smin_val = src_reg->s32_min_value;
+
+ /* Assuming scalar64_min_max_xor will be called so it is safe
+ * to skip updating register for known case.
+ */
+ if (src_known && dst_known)
+ return;
+
+ /* We get both minimum and maximum from the var32_off. */
+ dst_reg->u32_min_value = var32_off.value;
+ dst_reg->u32_max_value = var32_off.value | var32_off.mask;
+
+ if (dst_reg->s32_min_value >= 0 && smin_val >= 0) {
+ /* XORing two positive sign numbers gives a positive,
+ * so safe to cast u32 result into s32.
+ */
+ dst_reg->s32_min_value = dst_reg->u32_min_value;
+ dst_reg->s32_max_value = dst_reg->u32_max_value;
+ } else {
+ dst_reg->s32_min_value = S32_MIN;
+ dst_reg->s32_max_value = S32_MAX;
+ }
+}
+
+static void scalar_min_max_xor(struct bpf_reg_state *dst_reg,
+ struct bpf_reg_state *src_reg)
+{
+ bool src_known = tnum_is_const(src_reg->var_off);
+ bool dst_known = tnum_is_const(dst_reg->var_off);
+ s64 smin_val = src_reg->smin_value;
+
+ if (src_known && dst_known) {
+ /* dst_reg->var_off.value has been updated earlier */
+ __mark_reg_known(dst_reg, dst_reg->var_off.value);
+ return;
+ }
+
+ /* We get both minimum and maximum from the var_off. */
+ dst_reg->umin_value = dst_reg->var_off.value;
+ dst_reg->umax_value = dst_reg->var_off.value | dst_reg->var_off.mask;
+
+ if (dst_reg->smin_value >= 0 && smin_val >= 0) {
+ /* XORing two positive sign numbers gives a positive,
+ * so safe to cast u64 result into s64.
+ */
+ dst_reg->smin_value = dst_reg->umin_value;
+ dst_reg->smax_value = dst_reg->umax_value;
+ } else {
+ dst_reg->smin_value = S64_MIN;
+ dst_reg->smax_value = S64_MAX;
+ }
+
+ __update_reg_bounds(dst_reg);
+}
+
static void __scalar32_min_max_lsh(struct bpf_reg_state *dst_reg,
u64 umin_val, u64 umax_val)
{
@@ -6040,6 +6199,11 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
scalar32_min_max_or(dst_reg, &src_reg);
scalar_min_max_or(dst_reg, &src_reg);
break;
+ case BPF_XOR:
+ dst_reg->var_off = tnum_xor(dst_reg->var_off, src_reg.var_off);
+ scalar32_min_max_xor(dst_reg, &src_reg);
+ scalar_min_max_xor(dst_reg, &src_reg);
+ break;
case BPF_LSH:
if (umax_val >= insn_bitness) {
/* Shifts greater than 31 or 63 are undefined.
@@ -7287,7 +7451,7 @@ static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
u8 mode = BPF_MODE(insn->code);
int i, err;
- if (!may_access_skb(env->prog->type)) {
+ if (!may_access_skb(resolve_prog_type(env->prog))) {
verbose(env, "BPF_LD_[ABS|IND] instructions not allowed for this program type\n");
return -EINVAL;
}
@@ -7375,11 +7539,12 @@ static int check_return_code(struct bpf_verifier_env *env)
const struct bpf_prog *prog = env->prog;
struct bpf_reg_state *reg;
struct tnum range = tnum_range(0, 1);
+ enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
int err;
/* LSM and struct_ops func-ptr's return type could be "void" */
- if ((env->prog->type == BPF_PROG_TYPE_STRUCT_OPS ||
- env->prog->type == BPF_PROG_TYPE_LSM) &&
+ if ((prog_type == BPF_PROG_TYPE_STRUCT_OPS ||
+ prog_type == BPF_PROG_TYPE_LSM) &&
!prog->aux->attach_func_proto->type)
return 0;
@@ -7398,7 +7563,7 @@ static int check_return_code(struct bpf_verifier_env *env)
return -EACCES;
}
- switch (env->prog->type) {
+ switch (prog_type) {
case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
if (env->prog->expected_attach_type == BPF_CGROUP_UDP4_RECVMSG ||
env->prog->expected_attach_type == BPF_CGROUP_UDP6_RECVMSG ||
@@ -9154,6 +9319,7 @@ static int check_map_prog_compatibility(struct bpf_verifier_env *env,
struct bpf_prog *prog)
{
+ enum bpf_prog_type prog_type = resolve_prog_type(prog);
/*
* Validate that trace type programs use preallocated hash maps.
*
@@ -9171,8 +9337,8 @@ static int check_map_prog_compatibility(struct bpf_verifier_env *env,
* now, but warnings are emitted so developers are made aware of
* the unsafety and can fix their programs before this is enforced.
*/
- if (is_tracing_prog_type(prog->type) && !is_preallocated_map(map)) {
- if (prog->type == BPF_PROG_TYPE_PERF_EVENT) {
+ if (is_tracing_prog_type(prog_type) && !is_preallocated_map(map)) {
+ if (prog_type == BPF_PROG_TYPE_PERF_EVENT) {
verbose(env, "perf_event programs can only use preallocated hash map\n");
return -EINVAL;
}
@@ -9184,8 +9350,8 @@ static int check_map_prog_compatibility(struct bpf_verifier_env *env,
verbose(env, "trace type programs with run-time allocated hash maps are unsafe. Switch to preallocated hash maps.\n");
}
- if ((is_tracing_prog_type(prog->type) ||
- prog->type == BPF_PROG_TYPE_SOCKET_FILTER) &&
+ if ((is_tracing_prog_type(prog_type) ||
+ prog_type == BPF_PROG_TYPE_SOCKET_FILTER) &&
map_value_has_spin_lock(map)) {
verbose(env, "tracing progs cannot use bpf_spin_lock yet\n");
return -EINVAL;
@@ -9202,6 +9368,23 @@ static int check_map_prog_compatibility(struct bpf_verifier_env *env,
return -EINVAL;
}
+ if (prog->aux->sleepable)
+ switch (map->map_type) {
+ case BPF_MAP_TYPE_HASH:
+ case BPF_MAP_TYPE_LRU_HASH:
+ case BPF_MAP_TYPE_ARRAY:
+ if (!is_preallocated_map(map)) {
+ verbose(env,
+ "Sleepable programs can only use preallocated hash maps\n");
+ return -EINVAL;
+ }
+ break;
+ default:
+ verbose(env,
+ "Sleepable programs can only use array and hash maps\n");
+ return -EINVAL;
+ }
+
return 0;
}
@@ -9897,7 +10080,7 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
insn->code = BPF_LDX | BPF_PROBE_MEM |
BPF_SIZE((insn)->code);
env->prog->aux->num_exentries++;
- } else if (env->prog->type != BPF_PROG_TYPE_STRUCT_OPS) {
+ } else if (resolve_prog_type(env->prog) != BPF_PROG_TYPE_STRUCT_OPS) {
verbose(env, "Writes through BTF pointers are not allowed\n");
return -EINVAL;
}
@@ -10820,6 +11003,37 @@ static int check_attach_modify_return(struct bpf_prog *prog, unsigned long addr)
return -EINVAL;
}
+/* non exhaustive list of sleepable bpf_lsm_*() functions */
+BTF_SET_START(btf_sleepable_lsm_hooks)
+#ifdef CONFIG_BPF_LSM
+BTF_ID(func, bpf_lsm_bprm_committed_creds)
+#else
+BTF_ID_UNUSED
+#endif
+BTF_SET_END(btf_sleepable_lsm_hooks)
+
+static int check_sleepable_lsm_hook(u32 btf_id)
+{
+ return btf_id_set_contains(&btf_sleepable_lsm_hooks, btf_id);
+}
+
+/* list of non-sleepable functions that are otherwise on
+ * ALLOW_ERROR_INJECTION list
+ */
+BTF_SET_START(btf_non_sleepable_error_inject)
+/* Three functions below can be called from sleepable and non-sleepable context.
+ * Assume non-sleepable from bpf safety point of view.
+ */
+BTF_ID(func, __add_to_page_cache_locked)
+BTF_ID(func, should_fail_alloc_page)
+BTF_ID(func, should_failslab)
+BTF_SET_END(btf_non_sleepable_error_inject)
+
+static int check_non_sleepable_error_inject(u32 btf_id)
+{
+ return btf_id_set_contains(&btf_non_sleepable_error_inject, btf_id);
+}
+
static int check_attach_btf_id(struct bpf_verifier_env *env)
{
struct bpf_prog *prog = env->prog;
@@ -10837,6 +11051,12 @@ static int check_attach_btf_id(struct bpf_verifier_env *env)
long addr;
u64 key;
+ if (prog->aux->sleepable && prog->type != BPF_PROG_TYPE_TRACING &&
+ prog->type != BPF_PROG_TYPE_LSM) {
+ verbose(env, "Only fentry/fexit/fmod_ret and lsm programs can be sleepable\n");
+ return -EINVAL;
+ }
+
if (prog->type == BPF_PROG_TYPE_STRUCT_OPS)
return check_struct_ops_btf_id(env);
@@ -11045,13 +11265,36 @@ static int check_attach_btf_id(struct bpf_verifier_env *env)
}
}
- if (prog->expected_attach_type == BPF_MODIFY_RETURN) {
+ if (prog->aux->sleepable) {
+ ret = -EINVAL;
+ switch (prog->type) {
+ case BPF_PROG_TYPE_TRACING:
+ /* fentry/fexit/fmod_ret progs can be sleepable only if they are
+ * attached to ALLOW_ERROR_INJECTION and are not in denylist.
+ */
+ if (!check_non_sleepable_error_inject(btf_id) &&
+ within_error_injection_list(addr))
+ ret = 0;
+ break;
+ case BPF_PROG_TYPE_LSM:
+ /* LSM progs check that they are attached to bpf_lsm_*() funcs.
+ * Only some of them are sleepable.
+ */
+ if (check_sleepable_lsm_hook(btf_id))
+ ret = 0;
+ break;
+ default:
+ break;
+ }
+ if (ret)
+ verbose(env, "%s is not sleepable\n",
+ prog->aux->attach_func_name);
+ } else if (prog->expected_attach_type == BPF_MODIFY_RETURN) {
ret = check_attach_modify_return(prog, addr);
if (ret)
verbose(env, "%s() is not modifiable\n",
prog->aux->attach_func_name);
}
-
if (ret)
goto out;
tr->func.addr = (void *)addr;
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index a8d4f25..b2a5380 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -1098,6 +1098,52 @@ static const struct bpf_func_proto bpf_send_signal_thread_proto = {
.arg1_type = ARG_ANYTHING,
};
+BPF_CALL_3(bpf_d_path, struct path *, path, char *, buf, u32, sz)
+{
+ long len;
+ char *p;
+
+ if (!sz)
+ return 0;
+
+ p = d_path(path, buf, sz);
+ if (IS_ERR(p)) {
+ len = PTR_ERR(p);
+ } else {
+ len = buf + sz - p;
+ memmove(buf, p, len);
+ }
+
+ return len;
+}
+
+BTF_SET_START(btf_allowlist_d_path)
+BTF_ID(func, vfs_truncate)
+BTF_ID(func, vfs_fallocate)
+BTF_ID(func, dentry_open)
+BTF_ID(func, vfs_getattr)
+BTF_ID(func, filp_close)
+BTF_SET_END(btf_allowlist_d_path)
+
+static bool bpf_d_path_allowed(const struct bpf_prog *prog)
+{
+ return btf_id_set_contains(&btf_allowlist_d_path, prog->aux->attach_btf_id);
+}
+
+BTF_ID_LIST(bpf_d_path_btf_ids)
+BTF_ID(struct, path)
+
+static const struct bpf_func_proto bpf_d_path_proto = {
+ .func = bpf_d_path,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_BTF_ID,
+ .arg2_type = ARG_PTR_TO_MEM,
+ .arg3_type = ARG_CONST_SIZE_OR_ZERO,
+ .btf_id = bpf_d_path_btf_ids,
+ .allowed = bpf_d_path_allowed,
+};
+
const struct bpf_func_proto *
bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
@@ -1182,6 +1228,8 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_jiffies64_proto;
case BPF_FUNC_get_task_stack:
return &bpf_get_task_stack_proto;
+ case BPF_FUNC_copy_from_user:
+ return prog->aux->sleepable ? &bpf_copy_from_user_proto : NULL;
default:
return NULL;
}
@@ -1579,6 +1627,8 @@ tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return prog->expected_attach_type == BPF_TRACE_ITER ?
&bpf_seq_write_proto :
NULL;
+ case BPF_FUNC_d_path:
+ return &bpf_d_path_proto;
default:
return raw_tp_prog_func_proto(func_id, prog);
}
diff --git a/lib/nlattr.c b/lib/nlattr.c
index bc5b5cf..80ff9fe 100644
--- a/lib/nlattr.c
+++ b/lib/nlattr.c
@@ -124,6 +124,7 @@ void nla_get_range_unsigned(const struct nla_policy *pt,
range->max = U8_MAX;
break;
case NLA_U16:
+ case NLA_BINARY:
range->max = U16_MAX;
break;
case NLA_U32:
@@ -140,6 +141,7 @@ void nla_get_range_unsigned(const struct nla_policy *pt,
switch (pt->validation_type) {
case NLA_VALIDATE_RANGE:
+ case NLA_VALIDATE_RANGE_WARN_TOO_LONG:
range->min = pt->min;
range->max = pt->max;
break;
@@ -157,9 +159,10 @@ void nla_get_range_unsigned(const struct nla_policy *pt,
}
}
-static int nla_validate_int_range_unsigned(const struct nla_policy *pt,
- const struct nlattr *nla,
- struct netlink_ext_ack *extack)
+static int nla_validate_range_unsigned(const struct nla_policy *pt,
+ const struct nlattr *nla,
+ struct netlink_ext_ack *extack,
+ unsigned int validate)
{
struct netlink_range_validation range;
u64 value;
@@ -178,15 +181,39 @@ static int nla_validate_int_range_unsigned(const struct nla_policy *pt,
case NLA_MSECS:
value = nla_get_u64(nla);
break;
+ case NLA_BINARY:
+ value = nla_len(nla);
+ break;
default:
return -EINVAL;
}
nla_get_range_unsigned(pt, &range);
+ if (pt->validation_type == NLA_VALIDATE_RANGE_WARN_TOO_LONG &&
+ pt->type == NLA_BINARY && value > range.max) {
+ pr_warn_ratelimited("netlink: '%s': attribute type %d has an invalid length.\n",
+ current->comm, pt->type);
+ if (validate & NL_VALIDATE_STRICT_ATTRS) {
+ NL_SET_ERR_MSG_ATTR(extack, nla,
+ "invalid attribute length");
+ return -EINVAL;
+ }
+
+ /* this assumes min <= max (don't validate against min) */
+ return 0;
+ }
+
if (value < range.min || value > range.max) {
- NL_SET_ERR_MSG_ATTR(extack, nla,
- "integer out of range");
+ bool binary = pt->type == NLA_BINARY;
+
+ if (binary)
+ NL_SET_ERR_MSG_ATTR(extack, nla,
+ "binary attribute size out of range");
+ else
+ NL_SET_ERR_MSG_ATTR(extack, nla,
+ "integer out of range");
+
return -ERANGE;
}
@@ -274,7 +301,8 @@ static int nla_validate_int_range_signed(const struct nla_policy *pt,
static int nla_validate_int_range(const struct nla_policy *pt,
const struct nlattr *nla,
- struct netlink_ext_ack *extack)
+ struct netlink_ext_ack *extack,
+ unsigned int validate)
{
switch (pt->type) {
case NLA_U8:
@@ -282,7 +310,8 @@ static int nla_validate_int_range(const struct nla_policy *pt,
case NLA_U32:
case NLA_U64:
case NLA_MSECS:
- return nla_validate_int_range_unsigned(pt, nla, extack);
+ case NLA_BINARY:
+ return nla_validate_range_unsigned(pt, nla, extack, validate);
case NLA_S8:
case NLA_S16:
case NLA_S32:
@@ -313,10 +342,7 @@ static int validate_nla(const struct nlattr *nla, int maxtype,
BUG_ON(pt->type > NLA_TYPE_MAX);
- if ((nla_attr_len[pt->type] && attrlen != nla_attr_len[pt->type]) ||
- (pt->type == NLA_EXACT_LEN &&
- pt->validation_type == NLA_VALIDATE_WARN_TOO_LONG &&
- attrlen != pt->len)) {
+ if (nla_attr_len[pt->type] && attrlen != nla_attr_len[pt->type]) {
pr_warn_ratelimited("netlink: '%s': attribute type %d has an invalid length.\n",
current->comm, type);
if (validate & NL_VALIDATE_STRICT_ATTRS) {
@@ -449,19 +475,10 @@ static int validate_nla(const struct nlattr *nla, int maxtype,
"Unsupported attribute");
return -EINVAL;
}
- /* fall through */
- case NLA_MIN_LEN:
if (attrlen < pt->len)
goto out_err;
break;
- case NLA_EXACT_LEN:
- if (pt->validation_type != NLA_VALIDATE_WARN_TOO_LONG) {
- if (attrlen != pt->len)
- goto out_err;
- break;
- }
- /* fall through */
default:
if (pt->len)
minlen = pt->len;
@@ -479,9 +496,10 @@ static int validate_nla(const struct nlattr *nla, int maxtype,
break;
case NLA_VALIDATE_RANGE_PTR:
case NLA_VALIDATE_RANGE:
+ case NLA_VALIDATE_RANGE_WARN_TOO_LONG:
case NLA_VALIDATE_MIN:
case NLA_VALIDATE_MAX:
- err = nla_validate_int_range(pt, nla, extack);
+ err = nla_validate_int_range(pt, nla, extack, validate);
if (err)
return err;
break;
@@ -816,8 +834,7 @@ EXPORT_SYMBOL(__nla_reserve);
struct nlattr *__nla_reserve_64bit(struct sk_buff *skb, int attrtype,
int attrlen, int padattr)
{
- if (nla_need_padding_for_64bit(skb))
- nla_align_64bit(skb, padattr);
+ nla_align_64bit(skb, padattr);
return __nla_reserve(skb, attrtype, attrlen);
}
diff --git a/mm/filemap.c b/mm/filemap.c
index 1aaea26..054d93a 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -827,10 +827,10 @@ int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask)
}
EXPORT_SYMBOL_GPL(replace_page_cache_page);
-static int __add_to_page_cache_locked(struct page *page,
- struct address_space *mapping,
- pgoff_t offset, gfp_t gfp_mask,
- void **shadowp)
+noinline int __add_to_page_cache_locked(struct page *page,
+ struct address_space *mapping,
+ pgoff_t offset, gfp_t gfp_mask,
+ void **shadowp)
{
XA_STATE(xas, &mapping->i_pages, offset);
int huge = PageHuge(page);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index fab5e97..0608f7f 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3482,7 +3482,7 @@ static inline bool __should_fail_alloc_page(gfp_t gfp_mask, unsigned int order)
#endif /* CONFIG_FAIL_PAGE_ALLOC */
-static noinline bool should_fail_alloc_page(gfp_t gfp_mask, unsigned int order)
+noinline bool should_fail_alloc_page(gfp_t gfp_mask, unsigned int order)
{
return __should_fail_alloc_page(gfp_mask, order);
}
diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
index a4faf5f..206d0b4 100644
--- a/net/batman-adv/bat_iv_ogm.c
+++ b/net/batman-adv/bat_iv_ogm.c
@@ -27,6 +27,7 @@
#include <linux/netdevice.h>
#include <linux/netlink.h>
#include <linux/pkt_sched.h>
+#include <linux/prandom.h>
#include <linux/printk.h>
#include <linux/random.h>
#include <linux/rculist.h>
diff --git a/net/batman-adv/bat_v_elp.c b/net/batman-adv/bat_v_elp.c
index d35aca0e..79a7dfc 100644
--- a/net/batman-adv/bat_v_elp.c
+++ b/net/batman-adv/bat_v_elp.c
@@ -20,6 +20,7 @@
#include <linux/kref.h>
#include <linux/netdevice.h>
#include <linux/nl80211.h>
+#include <linux/prandom.h>
#include <linux/random.h>
#include <linux/rculist.h>
#include <linux/rcupdate.h>
diff --git a/net/batman-adv/bat_v_ogm.c b/net/batman-adv/bat_v_ogm.c
index 717fe65..8c1148f 100644
--- a/net/batman-adv/bat_v_ogm.c
+++ b/net/batman-adv/bat_v_ogm.c
@@ -20,6 +20,7 @@
#include <linux/lockdep.h>
#include <linux/mutex.h>
#include <linux/netdevice.h>
+#include <linux/prandom.h>
#include <linux/random.h>
#include <linux/rculist.h>
#include <linux/rcupdate.h>
diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c
index 8500f56..ab6cec3 100644
--- a/net/batman-adv/bridge_loop_avoidance.c
+++ b/net/batman-adv/bridge_loop_avoidance.c
@@ -1798,7 +1798,7 @@ batadv_bla_loopdetect_check(struct batadv_priv *bat_priv, struct sk_buff *skb,
ret = queue_work(batadv_event_workqueue, &backbone_gw->report_work);
- /* backbone_gw is unreferenced in the report work function function
+ /* backbone_gw is unreferenced in the report work function
* if queue_work() call was successful
*/
if (!ret)
diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c
index 9fdbe30..9a47ef8 100644
--- a/net/batman-adv/fragmentation.c
+++ b/net/batman-adv/fragmentation.c
@@ -306,7 +306,7 @@ batadv_frag_merge_packets(struct hlist_head *chain)
* set *skb to merged packet; 2) Packet is buffered: Return true and set *skb
* to NULL; 3) Error: Return false and free skb.
*
- * Return: true when the packet is merged or buffered, false when skb is not not
+ * Return: true when the packet is merged or buffered, false when skb is not
* used.
*/
bool batadv_frag_skb_buffer(struct sk_buff **skb,
diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c
index fa06b51..dad9964 100644
--- a/net/batman-adv/hard-interface.c
+++ b/net/batman-adv/hard-interface.c
@@ -599,7 +599,7 @@ int batadv_hardif_min_mtu(struct net_device *soft_iface)
/* report to the other components the maximum amount of bytes that
* batman-adv can send over the wire (without considering the payload
* overhead). For example, this value is used by TT to compute the
- * maximum local table table size
+ * maximum local table size
*/
atomic_set(&bat_priv->packet_size_max, min_mtu);
@@ -977,23 +977,6 @@ static void batadv_hardif_remove_interface(struct batadv_hard_iface *hard_iface)
}
/**
- * batadv_hardif_remove_interfaces() - Remove all hard interfaces
- */
-void batadv_hardif_remove_interfaces(void)
-{
- struct batadv_hard_iface *hard_iface, *hard_iface_tmp;
-
- rtnl_lock();
- list_for_each_entry_safe(hard_iface, hard_iface_tmp,
- &batadv_hardif_list, list) {
- list_del_rcu(&hard_iface->list);
- batadv_hardif_generation++;
- batadv_hardif_remove_interface(hard_iface);
- }
- rtnl_unlock();
-}
-
-/**
* batadv_hard_if_event_softif() - Handle events for soft interfaces
* @event: NETDEV_* event to handle
* @net_dev: net_device which generated an event
diff --git a/net/batman-adv/hard-interface.h b/net/batman-adv/hard-interface.h
index bad2e50..b1855d9 100644
--- a/net/batman-adv/hard-interface.h
+++ b/net/batman-adv/hard-interface.h
@@ -100,7 +100,6 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface,
struct net *net, const char *iface_name);
void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface,
enum batadv_hard_if_cleanup autodel);
-void batadv_hardif_remove_interfaces(void);
int batadv_hardif_min_mtu(struct net_device *soft_iface);
void batadv_update_min_mtu(struct net_device *soft_iface);
void batadv_hardif_release(struct kref *ref);
diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c
index 519c08c..70fee9b 100644
--- a/net/batman-adv/main.c
+++ b/net/batman-adv/main.c
@@ -137,7 +137,6 @@ static void __exit batadv_exit(void)
batadv_netlink_unregister();
rtnl_link_unregister(&batadv_link_ops);
unregister_netdevice_notifier(&batadv_hard_if_notifier);
- batadv_hardif_remove_interfaces();
flush_workqueue(batadv_event_workqueue);
destroy_workqueue(batadv_event_workqueue);
diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h
index 0393bb9..a47dc33 100644
--- a/net/batman-adv/main.h
+++ b/net/batman-adv/main.h
@@ -13,7 +13,7 @@
#define BATADV_DRIVER_DEVICE "batman-adv"
#ifndef BATADV_SOURCE_VERSION
-#define BATADV_SOURCE_VERSION "2020.3"
+#define BATADV_SOURCE_VERSION "2020.4"
#endif
/* B.A.T.M.A.N. parameters */
diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c
index bdc4a1f..1622c3f5 100644
--- a/net/batman-adv/multicast.c
+++ b/net/batman-adv/multicast.c
@@ -207,7 +207,7 @@ static u8 batadv_mcast_mla_rtr_flags_bridge_get(struct batadv_priv *bat_priv,
return BATADV_MCAST_WANT_NO_RTR4 | BATADV_MCAST_WANT_NO_RTR6;
/* TODO: ask the bridge if a multicast router is present (the bridge
- * is capable of performing proper RFC4286 multicast multicast router
+ * is capable of performing proper RFC4286 multicast router
* discovery) instead of searching for a ff02::2 listener here
*/
ret = br_multicast_list_adjacent(dev, &bridge_mcast_list);
diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c
index 48d7078..61ddd6d 100644
--- a/net/batman-adv/network-coding.c
+++ b/net/batman-adv/network-coding.c
@@ -26,8 +26,8 @@
#include <linux/lockdep.h>
#include <linux/net.h>
#include <linux/netdevice.h>
+#include <linux/prandom.h>
#include <linux/printk.h>
-#include <linux/random.h>
#include <linux/rculist.h>
#include <linux/rcupdate.h>
#include <linux/seq_file.h>
@@ -250,7 +250,7 @@ static void batadv_nc_path_put(struct batadv_nc_path *nc_path)
/**
* batadv_nc_packet_free() - frees nc packet
* @nc_packet: the nc packet to free
- * @dropped: whether the packet is freed because is is dropped
+ * @dropped: whether the packet is freed because is dropped
*/
static void batadv_nc_packet_free(struct batadv_nc_packet *nc_packet,
bool dropped)
diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c
index d267b94..8701733 100644
--- a/net/batman-adv/send.c
+++ b/net/batman-adv/send.c
@@ -461,7 +461,7 @@ int batadv_send_skb_via_gw(struct batadv_priv *bat_priv, struct sk_buff *skb,
/**
* batadv_forw_packet_free() - free a forwarding packet
* @forw_packet: The packet to free
- * @dropped: whether the packet is freed because is is dropped
+ * @dropped: whether the packet is freed because is dropped
*
* This frees a forwarding packet and releases any resources it might
* have claimed.
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index 23833a0..9d3974b 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -649,7 +649,7 @@ static void batadv_softif_destroy_vlan(struct batadv_priv *bat_priv,
/**
* batadv_interface_add_vid() - ndo_add_vid API implementation
* @dev: the netdev of the mesh interface
- * @proto: protocol of the the vlan id
+ * @proto: protocol of the vlan id
* @vid: identifier of the new vlan
*
* Set up all the internal structures for handling the new vlan on top of the
@@ -707,7 +707,7 @@ static int batadv_interface_add_vid(struct net_device *dev, __be16 proto,
/**
* batadv_interface_kill_vid() - ndo_kill_vid API implementation
* @dev: the netdev of the mesh interface
- * @proto: protocol of the the vlan id
+ * @proto: protocol of the vlan id
* @vid: identifier of the deleted vlan
*
* Destroy all the internal structures used to handle the vlan identified by vid
diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h
index ed519ef..965336a 100644
--- a/net/batman-adv/types.h
+++ b/net/batman-adv/types.h
@@ -1492,7 +1492,7 @@ struct batadv_tp_vars {
/** @unacked_lock: protect unacked_list */
spinlock_t unacked_lock;
- /** @last_recv_time: time time (jiffies) a msg was received */
+ /** @last_recv_time: time (jiffies) a msg was received */
unsigned long last_recv_time;
/** @refcount: number of context where the object is used */
@@ -1996,7 +1996,7 @@ struct batadv_tt_change_node {
*/
struct batadv_tt_req_node {
/**
- * @addr: mac address address of the originator this request was sent to
+ * @addr: mac address of the originator this request was sent to
*/
u8 addr[ETH_ALEN];
diff --git a/net/bpfilter/Kconfig b/net/bpfilter/Kconfig
index 73d0b12..8ad0233 100644
--- a/net/bpfilter/Kconfig
+++ b/net/bpfilter/Kconfig
@@ -2,6 +2,7 @@
menuconfig BPFILTER
bool "BPF based packet filtering framework (BPFILTER)"
depends on NET && BPF && INET
+ select USERMODE_DRIVER
help
This builds experimental bpfilter framework that is aiming to
provide netfilter compatible functionality via BPF
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 147d525..8a71c60 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -1095,8 +1095,8 @@ static const struct nla_policy br_policy[IFLA_BR_MAX + 1] = {
[IFLA_BR_MCAST_IGMP_VERSION] = { .type = NLA_U8 },
[IFLA_BR_MCAST_MLD_VERSION] = { .type = NLA_U8 },
[IFLA_BR_VLAN_STATS_PER_PORT] = { .type = NLA_U8 },
- [IFLA_BR_MULTI_BOOLOPT] = { .type = NLA_EXACT_LEN,
- .len = sizeof(struct br_boolopt_multi) },
+ [IFLA_BR_MULTI_BOOLOPT] =
+ NLA_POLICY_EXACT_LEN(sizeof(struct br_boolopt_multi)),
};
static int br_changelink(struct net_device *brdev, struct nlattr *tb[],
diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
index f9092c7..d2b8737 100644
--- a/net/bridge/br_vlan.c
+++ b/net/bridge/br_vlan.c
@@ -1884,8 +1884,8 @@ static int br_vlan_rtm_dump(struct sk_buff *skb, struct netlink_callback *cb)
}
static const struct nla_policy br_vlan_db_policy[BRIDGE_VLANDB_ENTRY_MAX + 1] = {
- [BRIDGE_VLANDB_ENTRY_INFO] = { .type = NLA_EXACT_LEN,
- .len = sizeof(struct bridge_vlan_info) },
+ [BRIDGE_VLANDB_ENTRY_INFO] =
+ NLA_POLICY_EXACT_LEN(sizeof(struct bridge_vlan_info)),
[BRIDGE_VLANDB_ENTRY_RANGE] = { .type = NLA_U16 },
[BRIDGE_VLANDB_ENTRY_STATE] = { .type = NLA_U8 },
[BRIDGE_VLANDB_ENTRY_TUNNEL_INFO] = { .type = NLA_NESTED },
diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c
index b988f48..a0d1a32 100644
--- a/net/core/bpf_sk_storage.c
+++ b/net/core/bpf_sk_storage.c
@@ -7,97 +7,14 @@
#include <linux/spinlock.h>
#include <linux/bpf.h>
#include <linux/btf_ids.h>
+#include <linux/bpf_local_storage.h>
#include <net/bpf_sk_storage.h>
#include <net/sock.h>
#include <uapi/linux/sock_diag.h>
#include <uapi/linux/btf.h>
+#include <linux/btf_ids.h>
-#define SK_STORAGE_CREATE_FLAG_MASK \
- (BPF_F_NO_PREALLOC | BPF_F_CLONE)
-
-struct bucket {
- struct hlist_head list;
- raw_spinlock_t lock;
-};
-
-/* Thp map is not the primary owner of a bpf_sk_storage_elem.
- * Instead, the sk->sk_bpf_storage is.
- *
- * The map (bpf_sk_storage_map) is for two purposes
- * 1. Define the size of the "sk local storage". It is
- * the map's value_size.
- *
- * 2. Maintain a list to keep track of all elems such
- * that they can be cleaned up during the map destruction.
- *
- * When a bpf local storage is being looked up for a
- * particular sk, the "bpf_map" pointer is actually used
- * as the "key" to search in the list of elem in
- * sk->sk_bpf_storage.
- *
- * Hence, consider sk->sk_bpf_storage is the mini-map
- * with the "bpf_map" pointer as the searching key.
- */
-struct bpf_sk_storage_map {
- struct bpf_map map;
- /* Lookup elem does not require accessing the map.
- *
- * Updating/Deleting requires a bucket lock to
- * link/unlink the elem from the map. Having
- * multiple buckets to improve contention.
- */
- struct bucket *buckets;
- u32 bucket_log;
- u16 elem_size;
- u16 cache_idx;
-};
-
-struct bpf_sk_storage_data {
- /* smap is used as the searching key when looking up
- * from sk->sk_bpf_storage.
- *
- * Put it in the same cacheline as the data to minimize
- * the number of cachelines access during the cache hit case.
- */
- struct bpf_sk_storage_map __rcu *smap;
- u8 data[] __aligned(8);
-};
-
-/* Linked to bpf_sk_storage and bpf_sk_storage_map */
-struct bpf_sk_storage_elem {
- struct hlist_node map_node; /* Linked to bpf_sk_storage_map */
- struct hlist_node snode; /* Linked to bpf_sk_storage */
- struct bpf_sk_storage __rcu *sk_storage;
- struct rcu_head rcu;
- /* 8 bytes hole */
- /* The data is stored in aother cacheline to minimize
- * the number of cachelines access during a cache hit.
- */
- struct bpf_sk_storage_data sdata ____cacheline_aligned;
-};
-
-#define SELEM(_SDATA) container_of((_SDATA), struct bpf_sk_storage_elem, sdata)
-#define SDATA(_SELEM) (&(_SELEM)->sdata)
-#define BPF_SK_STORAGE_CACHE_SIZE 16
-
-static DEFINE_SPINLOCK(cache_idx_lock);
-static u64 cache_idx_usage_counts[BPF_SK_STORAGE_CACHE_SIZE];
-
-struct bpf_sk_storage {
- struct bpf_sk_storage_data __rcu *cache[BPF_SK_STORAGE_CACHE_SIZE];
- struct hlist_head list; /* List of bpf_sk_storage_elem */
- struct sock *sk; /* The sk that owns the the above "list" of
- * bpf_sk_storage_elem.
- */
- struct rcu_head rcu;
- raw_spinlock_t lock; /* Protect adding/removing from the "list" */
-};
-
-static struct bucket *select_bucket(struct bpf_sk_storage_map *smap,
- struct bpf_sk_storage_elem *selem)
-{
- return &smap->buckets[hash_ptr(selem, smap->bucket_log)];
-}
+DEFINE_BPF_STORAGE_CACHE(sk_cache);
static int omem_charge(struct sock *sk, unsigned int size)
{
@@ -111,445 +28,38 @@ static int omem_charge(struct sock *sk, unsigned int size)
return -ENOMEM;
}
-static bool selem_linked_to_sk(const struct bpf_sk_storage_elem *selem)
-{
- return !hlist_unhashed(&selem->snode);
-}
-
-static bool selem_linked_to_map(const struct bpf_sk_storage_elem *selem)
-{
- return !hlist_unhashed(&selem->map_node);
-}
-
-static struct bpf_sk_storage_elem *selem_alloc(struct bpf_sk_storage_map *smap,
- struct sock *sk, void *value,
- bool charge_omem)
-{
- struct bpf_sk_storage_elem *selem;
-
- if (charge_omem && omem_charge(sk, smap->elem_size))
- return NULL;
-
- selem = kzalloc(smap->elem_size, GFP_ATOMIC | __GFP_NOWARN);
- if (selem) {
- if (value)
- memcpy(SDATA(selem)->data, value, smap->map.value_size);
- return selem;
- }
-
- if (charge_omem)
- atomic_sub(smap->elem_size, &sk->sk_omem_alloc);
-
- return NULL;
-}
-
-/* sk_storage->lock must be held and selem->sk_storage == sk_storage.
- * The caller must ensure selem->smap is still valid to be
- * dereferenced for its smap->elem_size and smap->cache_idx.
- */
-static bool __selem_unlink_sk(struct bpf_sk_storage *sk_storage,
- struct bpf_sk_storage_elem *selem,
- bool uncharge_omem)
-{
- struct bpf_sk_storage_map *smap;
- bool free_sk_storage;
- struct sock *sk;
-
- smap = rcu_dereference(SDATA(selem)->smap);
- sk = sk_storage->sk;
-
- /* All uncharging on sk->sk_omem_alloc must be done first.
- * sk may be freed once the last selem is unlinked from sk_storage.
- */
- if (uncharge_omem)
- atomic_sub(smap->elem_size, &sk->sk_omem_alloc);
-
- free_sk_storage = hlist_is_singular_node(&selem->snode,
- &sk_storage->list);
- if (free_sk_storage) {
- atomic_sub(sizeof(struct bpf_sk_storage), &sk->sk_omem_alloc);
- sk_storage->sk = NULL;
- /* After this RCU_INIT, sk may be freed and cannot be used */
- RCU_INIT_POINTER(sk->sk_bpf_storage, NULL);
-
- /* sk_storage is not freed now. sk_storage->lock is
- * still held and raw_spin_unlock_bh(&sk_storage->lock)
- * will be done by the caller.
- *
- * Although the unlock will be done under
- * rcu_read_lock(), it is more intutivie to
- * read if kfree_rcu(sk_storage, rcu) is done
- * after the raw_spin_unlock_bh(&sk_storage->lock).
- *
- * Hence, a "bool free_sk_storage" is returned
- * to the caller which then calls the kfree_rcu()
- * after unlock.
- */
- }
- hlist_del_init_rcu(&selem->snode);
- if (rcu_access_pointer(sk_storage->cache[smap->cache_idx]) ==
- SDATA(selem))
- RCU_INIT_POINTER(sk_storage->cache[smap->cache_idx], NULL);
-
- kfree_rcu(selem, rcu);
-
- return free_sk_storage;
-}
-
-static void selem_unlink_sk(struct bpf_sk_storage_elem *selem)
-{
- struct bpf_sk_storage *sk_storage;
- bool free_sk_storage = false;
-
- if (unlikely(!selem_linked_to_sk(selem)))
- /* selem has already been unlinked from sk */
- return;
-
- sk_storage = rcu_dereference(selem->sk_storage);
- raw_spin_lock_bh(&sk_storage->lock);
- if (likely(selem_linked_to_sk(selem)))
- free_sk_storage = __selem_unlink_sk(sk_storage, selem, true);
- raw_spin_unlock_bh(&sk_storage->lock);
-
- if (free_sk_storage)
- kfree_rcu(sk_storage, rcu);
-}
-
-static void __selem_link_sk(struct bpf_sk_storage *sk_storage,
- struct bpf_sk_storage_elem *selem)
-{
- RCU_INIT_POINTER(selem->sk_storage, sk_storage);
- hlist_add_head(&selem->snode, &sk_storage->list);
-}
-
-static void selem_unlink_map(struct bpf_sk_storage_elem *selem)
-{
- struct bpf_sk_storage_map *smap;
- struct bucket *b;
-
- if (unlikely(!selem_linked_to_map(selem)))
- /* selem has already be unlinked from smap */
- return;
-
- smap = rcu_dereference(SDATA(selem)->smap);
- b = select_bucket(smap, selem);
- raw_spin_lock_bh(&b->lock);
- if (likely(selem_linked_to_map(selem)))
- hlist_del_init_rcu(&selem->map_node);
- raw_spin_unlock_bh(&b->lock);
-}
-
-static void selem_link_map(struct bpf_sk_storage_map *smap,
- struct bpf_sk_storage_elem *selem)
-{
- struct bucket *b = select_bucket(smap, selem);
-
- raw_spin_lock_bh(&b->lock);
- RCU_INIT_POINTER(SDATA(selem)->smap, smap);
- hlist_add_head_rcu(&selem->map_node, &b->list);
- raw_spin_unlock_bh(&b->lock);
-}
-
-static void selem_unlink(struct bpf_sk_storage_elem *selem)
-{
- /* Always unlink from map before unlinking from sk_storage
- * because selem will be freed after successfully unlinked from
- * the sk_storage.
- */
- selem_unlink_map(selem);
- selem_unlink_sk(selem);
-}
-
-static struct bpf_sk_storage_data *
-__sk_storage_lookup(struct bpf_sk_storage *sk_storage,
- struct bpf_sk_storage_map *smap,
- bool cacheit_lockit)
-{
- struct bpf_sk_storage_data *sdata;
- struct bpf_sk_storage_elem *selem;
-
- /* Fast path (cache hit) */
- sdata = rcu_dereference(sk_storage->cache[smap->cache_idx]);
- if (sdata && rcu_access_pointer(sdata->smap) == smap)
- return sdata;
-
- /* Slow path (cache miss) */
- hlist_for_each_entry_rcu(selem, &sk_storage->list, snode)
- if (rcu_access_pointer(SDATA(selem)->smap) == smap)
- break;
-
- if (!selem)
- return NULL;
-
- sdata = SDATA(selem);
- if (cacheit_lockit) {
- /* spinlock is needed to avoid racing with the
- * parallel delete. Otherwise, publishing an already
- * deleted sdata to the cache will become a use-after-free
- * problem in the next __sk_storage_lookup().
- */
- raw_spin_lock_bh(&sk_storage->lock);
- if (selem_linked_to_sk(selem))
- rcu_assign_pointer(sk_storage->cache[smap->cache_idx],
- sdata);
- raw_spin_unlock_bh(&sk_storage->lock);
- }
-
- return sdata;
-}
-
-static struct bpf_sk_storage_data *
+static struct bpf_local_storage_data *
sk_storage_lookup(struct sock *sk, struct bpf_map *map, bool cacheit_lockit)
{
- struct bpf_sk_storage *sk_storage;
- struct bpf_sk_storage_map *smap;
+ struct bpf_local_storage *sk_storage;
+ struct bpf_local_storage_map *smap;
sk_storage = rcu_dereference(sk->sk_bpf_storage);
if (!sk_storage)
return NULL;
- smap = (struct bpf_sk_storage_map *)map;
- return __sk_storage_lookup(sk_storage, smap, cacheit_lockit);
-}
-
-static int check_flags(const struct bpf_sk_storage_data *old_sdata,
- u64 map_flags)
-{
- if (old_sdata && (map_flags & ~BPF_F_LOCK) == BPF_NOEXIST)
- /* elem already exists */
- return -EEXIST;
-
- if (!old_sdata && (map_flags & ~BPF_F_LOCK) == BPF_EXIST)
- /* elem doesn't exist, cannot update it */
- return -ENOENT;
-
- return 0;
-}
-
-static int sk_storage_alloc(struct sock *sk,
- struct bpf_sk_storage_map *smap,
- struct bpf_sk_storage_elem *first_selem)
-{
- struct bpf_sk_storage *prev_sk_storage, *sk_storage;
- int err;
-
- err = omem_charge(sk, sizeof(*sk_storage));
- if (err)
- return err;
-
- sk_storage = kzalloc(sizeof(*sk_storage), GFP_ATOMIC | __GFP_NOWARN);
- if (!sk_storage) {
- err = -ENOMEM;
- goto uncharge;
- }
- INIT_HLIST_HEAD(&sk_storage->list);
- raw_spin_lock_init(&sk_storage->lock);
- sk_storage->sk = sk;
-
- __selem_link_sk(sk_storage, first_selem);
- selem_link_map(smap, first_selem);
- /* Publish sk_storage to sk. sk->sk_lock cannot be acquired.
- * Hence, atomic ops is used to set sk->sk_bpf_storage
- * from NULL to the newly allocated sk_storage ptr.
- *
- * From now on, the sk->sk_bpf_storage pointer is protected
- * by the sk_storage->lock. Hence, when freeing
- * the sk->sk_bpf_storage, the sk_storage->lock must
- * be held before setting sk->sk_bpf_storage to NULL.
- */
- prev_sk_storage = cmpxchg((struct bpf_sk_storage **)&sk->sk_bpf_storage,
- NULL, sk_storage);
- if (unlikely(prev_sk_storage)) {
- selem_unlink_map(first_selem);
- err = -EAGAIN;
- goto uncharge;
-
- /* Note that even first_selem was linked to smap's
- * bucket->list, first_selem can be freed immediately
- * (instead of kfree_rcu) because
- * bpf_sk_storage_map_free() does a
- * synchronize_rcu() before walking the bucket->list.
- * Hence, no one is accessing selem from the
- * bucket->list under rcu_read_lock().
- */
- }
-
- return 0;
-
-uncharge:
- kfree(sk_storage);
- atomic_sub(sizeof(*sk_storage), &sk->sk_omem_alloc);
- return err;
-}
-
-/* sk cannot be going away because it is linking new elem
- * to sk->sk_bpf_storage. (i.e. sk->sk_refcnt cannot be 0).
- * Otherwise, it will become a leak (and other memory issues
- * during map destruction).
- */
-static struct bpf_sk_storage_data *sk_storage_update(struct sock *sk,
- struct bpf_map *map,
- void *value,
- u64 map_flags)
-{
- struct bpf_sk_storage_data *old_sdata = NULL;
- struct bpf_sk_storage_elem *selem;
- struct bpf_sk_storage *sk_storage;
- struct bpf_sk_storage_map *smap;
- int err;
-
- /* BPF_EXIST and BPF_NOEXIST cannot be both set */
- if (unlikely((map_flags & ~BPF_F_LOCK) > BPF_EXIST) ||
- /* BPF_F_LOCK can only be used in a value with spin_lock */
- unlikely((map_flags & BPF_F_LOCK) && !map_value_has_spin_lock(map)))
- return ERR_PTR(-EINVAL);
-
- smap = (struct bpf_sk_storage_map *)map;
- sk_storage = rcu_dereference(sk->sk_bpf_storage);
- if (!sk_storage || hlist_empty(&sk_storage->list)) {
- /* Very first elem for this sk */
- err = check_flags(NULL, map_flags);
- if (err)
- return ERR_PTR(err);
-
- selem = selem_alloc(smap, sk, value, true);
- if (!selem)
- return ERR_PTR(-ENOMEM);
-
- err = sk_storage_alloc(sk, smap, selem);
- if (err) {
- kfree(selem);
- atomic_sub(smap->elem_size, &sk->sk_omem_alloc);
- return ERR_PTR(err);
- }
-
- return SDATA(selem);
- }
-
- if ((map_flags & BPF_F_LOCK) && !(map_flags & BPF_NOEXIST)) {
- /* Hoping to find an old_sdata to do inline update
- * such that it can avoid taking the sk_storage->lock
- * and changing the lists.
- */
- old_sdata = __sk_storage_lookup(sk_storage, smap, false);
- err = check_flags(old_sdata, map_flags);
- if (err)
- return ERR_PTR(err);
- if (old_sdata && selem_linked_to_sk(SELEM(old_sdata))) {
- copy_map_value_locked(map, old_sdata->data,
- value, false);
- return old_sdata;
- }
- }
-
- raw_spin_lock_bh(&sk_storage->lock);
-
- /* Recheck sk_storage->list under sk_storage->lock */
- if (unlikely(hlist_empty(&sk_storage->list))) {
- /* A parallel del is happening and sk_storage is going
- * away. It has just been checked before, so very
- * unlikely. Return instead of retry to keep things
- * simple.
- */
- err = -EAGAIN;
- goto unlock_err;
- }
-
- old_sdata = __sk_storage_lookup(sk_storage, smap, false);
- err = check_flags(old_sdata, map_flags);
- if (err)
- goto unlock_err;
-
- if (old_sdata && (map_flags & BPF_F_LOCK)) {
- copy_map_value_locked(map, old_sdata->data, value, false);
- selem = SELEM(old_sdata);
- goto unlock;
- }
-
- /* sk_storage->lock is held. Hence, we are sure
- * we can unlink and uncharge the old_sdata successfully
- * later. Hence, instead of charging the new selem now
- * and then uncharge the old selem later (which may cause
- * a potential but unnecessary charge failure), avoid taking
- * a charge at all here (the "!old_sdata" check) and the
- * old_sdata will not be uncharged later during __selem_unlink_sk().
- */
- selem = selem_alloc(smap, sk, value, !old_sdata);
- if (!selem) {
- err = -ENOMEM;
- goto unlock_err;
- }
-
- /* First, link the new selem to the map */
- selem_link_map(smap, selem);
-
- /* Second, link (and publish) the new selem to sk_storage */
- __selem_link_sk(sk_storage, selem);
-
- /* Third, remove old selem, SELEM(old_sdata) */
- if (old_sdata) {
- selem_unlink_map(SELEM(old_sdata));
- __selem_unlink_sk(sk_storage, SELEM(old_sdata), false);
- }
-
-unlock:
- raw_spin_unlock_bh(&sk_storage->lock);
- return SDATA(selem);
-
-unlock_err:
- raw_spin_unlock_bh(&sk_storage->lock);
- return ERR_PTR(err);
+ smap = (struct bpf_local_storage_map *)map;
+ return bpf_local_storage_lookup(sk_storage, smap, cacheit_lockit);
}
static int sk_storage_delete(struct sock *sk, struct bpf_map *map)
{
- struct bpf_sk_storage_data *sdata;
+ struct bpf_local_storage_data *sdata;
sdata = sk_storage_lookup(sk, map, false);
if (!sdata)
return -ENOENT;
- selem_unlink(SELEM(sdata));
+ bpf_selem_unlink(SELEM(sdata));
return 0;
}
-static u16 cache_idx_get(void)
-{
- u64 min_usage = U64_MAX;
- u16 i, res = 0;
-
- spin_lock(&cache_idx_lock);
-
- for (i = 0; i < BPF_SK_STORAGE_CACHE_SIZE; i++) {
- if (cache_idx_usage_counts[i] < min_usage) {
- min_usage = cache_idx_usage_counts[i];
- res = i;
-
- /* Found a free cache_idx */
- if (!min_usage)
- break;
- }
- }
- cache_idx_usage_counts[res]++;
-
- spin_unlock(&cache_idx_lock);
-
- return res;
-}
-
-static void cache_idx_free(u16 idx)
-{
- spin_lock(&cache_idx_lock);
- cache_idx_usage_counts[idx]--;
- spin_unlock(&cache_idx_lock);
-}
-
/* Called by __sk_destruct() & bpf_sk_storage_clone() */
void bpf_sk_storage_free(struct sock *sk)
{
- struct bpf_sk_storage_elem *selem;
- struct bpf_sk_storage *sk_storage;
+ struct bpf_local_storage_elem *selem;
+ struct bpf_local_storage *sk_storage;
bool free_sk_storage = false;
struct hlist_node *n;
@@ -565,7 +75,7 @@ void bpf_sk_storage_free(struct sock *sk)
* Thus, no elem can be added-to or deleted-from the
* sk_storage->list by the bpf_prog or by the bpf-map's syscall.
*
- * It is racing with bpf_sk_storage_map_free() alone
+ * It is racing with bpf_local_storage_map_free() alone
* when unlinking elem from the sk_storage->list and
* the map's bucket->list.
*/
@@ -574,8 +84,9 @@ void bpf_sk_storage_free(struct sock *sk)
/* Always unlink from map before unlinking from
* sk_storage.
*/
- selem_unlink_map(selem);
- free_sk_storage = __selem_unlink_sk(sk_storage, selem, true);
+ bpf_selem_unlink_map(selem);
+ free_sk_storage = bpf_selem_unlink_storage_nolock(sk_storage,
+ selem, true);
}
raw_spin_unlock_bh(&sk_storage->lock);
rcu_read_unlock();
@@ -584,132 +95,24 @@ void bpf_sk_storage_free(struct sock *sk)
kfree_rcu(sk_storage, rcu);
}
-static void bpf_sk_storage_map_free(struct bpf_map *map)
+static void sk_storage_map_free(struct bpf_map *map)
{
- struct bpf_sk_storage_elem *selem;
- struct bpf_sk_storage_map *smap;
- struct bucket *b;
- unsigned int i;
+ struct bpf_local_storage_map *smap;
- smap = (struct bpf_sk_storage_map *)map;
-
- cache_idx_free(smap->cache_idx);
-
- /* Note that this map might be concurrently cloned from
- * bpf_sk_storage_clone. Wait for any existing bpf_sk_storage_clone
- * RCU read section to finish before proceeding. New RCU
- * read sections should be prevented via bpf_map_inc_not_zero.
- */
- synchronize_rcu();
-
- /* bpf prog and the userspace can no longer access this map
- * now. No new selem (of this map) can be added
- * to the sk->sk_bpf_storage or to the map bucket's list.
- *
- * The elem of this map can be cleaned up here
- * or
- * by bpf_sk_storage_free() during __sk_destruct().
- */
- for (i = 0; i < (1U << smap->bucket_log); i++) {
- b = &smap->buckets[i];
-
- rcu_read_lock();
- /* No one is adding to b->list now */
- while ((selem = hlist_entry_safe(rcu_dereference_raw(hlist_first_rcu(&b->list)),
- struct bpf_sk_storage_elem,
- map_node))) {
- selem_unlink(selem);
- cond_resched_rcu();
- }
- rcu_read_unlock();
- }
-
- /* bpf_sk_storage_free() may still need to access the map.
- * e.g. bpf_sk_storage_free() has unlinked selem from the map
- * which then made the above while((selem = ...)) loop
- * exited immediately.
- *
- * However, the bpf_sk_storage_free() still needs to access
- * the smap->elem_size to do the uncharging in
- * __selem_unlink_sk().
- *
- * Hence, wait another rcu grace period for the
- * bpf_sk_storage_free() to finish.
- */
- synchronize_rcu();
-
- kvfree(smap->buckets);
- kfree(map);
+ smap = (struct bpf_local_storage_map *)map;
+ bpf_local_storage_cache_idx_free(&sk_cache, smap->cache_idx);
+ bpf_local_storage_map_free(smap);
}
-/* U16_MAX is much more than enough for sk local storage
- * considering a tcp_sock is ~2k.
- */
-#define MAX_VALUE_SIZE \
- min_t(u32, \
- (KMALLOC_MAX_SIZE - MAX_BPF_STACK - sizeof(struct bpf_sk_storage_elem)), \
- (U16_MAX - sizeof(struct bpf_sk_storage_elem)))
-
-static int bpf_sk_storage_map_alloc_check(union bpf_attr *attr)
+static struct bpf_map *sk_storage_map_alloc(union bpf_attr *attr)
{
- if (attr->map_flags & ~SK_STORAGE_CREATE_FLAG_MASK ||
- !(attr->map_flags & BPF_F_NO_PREALLOC) ||
- attr->max_entries ||
- attr->key_size != sizeof(int) || !attr->value_size ||
- /* Enforce BTF for userspace sk dumping */
- !attr->btf_key_type_id || !attr->btf_value_type_id)
- return -EINVAL;
+ struct bpf_local_storage_map *smap;
- if (!bpf_capable())
- return -EPERM;
+ smap = bpf_local_storage_map_alloc(attr);
+ if (IS_ERR(smap))
+ return ERR_CAST(smap);
- if (attr->value_size > MAX_VALUE_SIZE)
- return -E2BIG;
-
- return 0;
-}
-
-static struct bpf_map *bpf_sk_storage_map_alloc(union bpf_attr *attr)
-{
- struct bpf_sk_storage_map *smap;
- unsigned int i;
- u32 nbuckets;
- u64 cost;
- int ret;
-
- smap = kzalloc(sizeof(*smap), GFP_USER | __GFP_NOWARN);
- if (!smap)
- return ERR_PTR(-ENOMEM);
- bpf_map_init_from_attr(&smap->map, attr);
-
- nbuckets = roundup_pow_of_two(num_possible_cpus());
- /* Use at least 2 buckets, select_bucket() is undefined behavior with 1 bucket */
- nbuckets = max_t(u32, 2, nbuckets);
- smap->bucket_log = ilog2(nbuckets);
- cost = sizeof(*smap->buckets) * nbuckets + sizeof(*smap);
-
- ret = bpf_map_charge_init(&smap->map.memory, cost);
- if (ret < 0) {
- kfree(smap);
- return ERR_PTR(ret);
- }
-
- smap->buckets = kvcalloc(sizeof(*smap->buckets), nbuckets,
- GFP_USER | __GFP_NOWARN);
- if (!smap->buckets) {
- bpf_map_charge_finish(&smap->map.memory);
- kfree(smap);
- return ERR_PTR(-ENOMEM);
- }
-
- for (i = 0; i < nbuckets; i++) {
- INIT_HLIST_HEAD(&smap->buckets[i].list);
- raw_spin_lock_init(&smap->buckets[i].lock);
- }
-
- smap->elem_size = sizeof(struct bpf_sk_storage_elem) + attr->value_size;
- smap->cache_idx = cache_idx_get();
-
+ smap->cache_idx = bpf_local_storage_cache_idx_get(&sk_cache);
return &smap->map;
}
@@ -719,26 +122,9 @@ static int notsupp_get_next_key(struct bpf_map *map, void *key,
return -ENOTSUPP;
}
-static int bpf_sk_storage_map_check_btf(const struct bpf_map *map,
- const struct btf *btf,
- const struct btf_type *key_type,
- const struct btf_type *value_type)
-{
- u32 int_data;
-
- if (BTF_INFO_KIND(key_type->info) != BTF_KIND_INT)
- return -EINVAL;
-
- int_data = *(u32 *)(key_type + 1);
- if (BTF_INT_BITS(int_data) != 32 || BTF_INT_OFFSET(int_data))
- return -EINVAL;
-
- return 0;
-}
-
static void *bpf_fd_sk_storage_lookup_elem(struct bpf_map *map, void *key)
{
- struct bpf_sk_storage_data *sdata;
+ struct bpf_local_storage_data *sdata;
struct socket *sock;
int fd, err;
@@ -756,14 +142,16 @@ static void *bpf_fd_sk_storage_lookup_elem(struct bpf_map *map, void *key)
static int bpf_fd_sk_storage_update_elem(struct bpf_map *map, void *key,
void *value, u64 map_flags)
{
- struct bpf_sk_storage_data *sdata;
+ struct bpf_local_storage_data *sdata;
struct socket *sock;
int fd, err;
fd = *(int *)key;
sock = sockfd_lookup(fd, &err);
if (sock) {
- sdata = sk_storage_update(sock->sk, map, value, map_flags);
+ sdata = bpf_local_storage_update(
+ sock->sk, (struct bpf_local_storage_map *)map, value,
+ map_flags);
sockfd_put(sock);
return PTR_ERR_OR_ZERO(sdata);
}
@@ -787,14 +175,14 @@ static int bpf_fd_sk_storage_delete_elem(struct bpf_map *map, void *key)
return err;
}
-static struct bpf_sk_storage_elem *
+static struct bpf_local_storage_elem *
bpf_sk_storage_clone_elem(struct sock *newsk,
- struct bpf_sk_storage_map *smap,
- struct bpf_sk_storage_elem *selem)
+ struct bpf_local_storage_map *smap,
+ struct bpf_local_storage_elem *selem)
{
- struct bpf_sk_storage_elem *copy_selem;
+ struct bpf_local_storage_elem *copy_selem;
- copy_selem = selem_alloc(smap, newsk, NULL, true);
+ copy_selem = bpf_selem_alloc(smap, newsk, NULL, true);
if (!copy_selem)
return NULL;
@@ -810,9 +198,9 @@ bpf_sk_storage_clone_elem(struct sock *newsk,
int bpf_sk_storage_clone(const struct sock *sk, struct sock *newsk)
{
- struct bpf_sk_storage *new_sk_storage = NULL;
- struct bpf_sk_storage *sk_storage;
- struct bpf_sk_storage_elem *selem;
+ struct bpf_local_storage *new_sk_storage = NULL;
+ struct bpf_local_storage *sk_storage;
+ struct bpf_local_storage_elem *selem;
int ret = 0;
RCU_INIT_POINTER(newsk->sk_bpf_storage, NULL);
@@ -824,8 +212,8 @@ int bpf_sk_storage_clone(const struct sock *sk, struct sock *newsk)
goto out;
hlist_for_each_entry_rcu(selem, &sk_storage->list, snode) {
- struct bpf_sk_storage_elem *copy_selem;
- struct bpf_sk_storage_map *smap;
+ struct bpf_local_storage_elem *copy_selem;
+ struct bpf_local_storage_map *smap;
struct bpf_map *map;
smap = rcu_dereference(SDATA(selem)->smap);
@@ -833,7 +221,7 @@ int bpf_sk_storage_clone(const struct sock *sk, struct sock *newsk)
continue;
/* Note that for lockless listeners adding new element
- * here can race with cleanup in bpf_sk_storage_map_free.
+ * here can race with cleanup in bpf_local_storage_map_free.
* Try to grab map refcnt to make sure that it's still
* alive and prevent concurrent removal.
*/
@@ -849,10 +237,10 @@ int bpf_sk_storage_clone(const struct sock *sk, struct sock *newsk)
}
if (new_sk_storage) {
- selem_link_map(smap, copy_selem);
- __selem_link_sk(new_sk_storage, copy_selem);
+ bpf_selem_link_map(smap, copy_selem);
+ bpf_selem_link_storage_nolock(new_sk_storage, copy_selem);
} else {
- ret = sk_storage_alloc(newsk, smap, copy_selem);
+ ret = bpf_local_storage_alloc(newsk, smap, copy_selem);
if (ret) {
kfree(copy_selem);
atomic_sub(smap->elem_size,
@@ -861,7 +249,8 @@ int bpf_sk_storage_clone(const struct sock *sk, struct sock *newsk)
goto out;
}
- new_sk_storage = rcu_dereference(copy_selem->sk_storage);
+ new_sk_storage =
+ rcu_dereference(copy_selem->local_storage);
}
bpf_map_put(map);
}
@@ -879,7 +268,7 @@ int bpf_sk_storage_clone(const struct sock *sk, struct sock *newsk)
BPF_CALL_4(bpf_sk_storage_get, struct bpf_map *, map, struct sock *, sk,
void *, value, u64, flags)
{
- struct bpf_sk_storage_data *sdata;
+ struct bpf_local_storage_data *sdata;
if (flags > BPF_SK_STORAGE_GET_F_CREATE)
return (unsigned long)NULL;
@@ -895,7 +284,9 @@ BPF_CALL_4(bpf_sk_storage_get, struct bpf_map *, map, struct sock *, sk,
* destruction).
*/
refcount_inc_not_zero(&sk->sk_refcnt)) {
- sdata = sk_storage_update(sk, map, value, BPF_NOEXIST);
+ sdata = bpf_local_storage_update(
+ sk, (struct bpf_local_storage_map *)map, value,
+ BPF_NOEXIST);
/* sk must be a fullsock (guaranteed by verifier),
* so sock_gen_put() is unnecessary.
*/
@@ -920,18 +311,44 @@ BPF_CALL_2(bpf_sk_storage_delete, struct bpf_map *, map, struct sock *, sk)
return -ENOENT;
}
+static int sk_storage_charge(struct bpf_local_storage_map *smap,
+ void *owner, u32 size)
+{
+ return omem_charge(owner, size);
+}
+
+static void sk_storage_uncharge(struct bpf_local_storage_map *smap,
+ void *owner, u32 size)
+{
+ struct sock *sk = owner;
+
+ atomic_sub(size, &sk->sk_omem_alloc);
+}
+
+static struct bpf_local_storage __rcu **
+sk_storage_ptr(void *owner)
+{
+ struct sock *sk = owner;
+
+ return &sk->sk_bpf_storage;
+}
+
static int sk_storage_map_btf_id;
const struct bpf_map_ops sk_storage_map_ops = {
- .map_alloc_check = bpf_sk_storage_map_alloc_check,
- .map_alloc = bpf_sk_storage_map_alloc,
- .map_free = bpf_sk_storage_map_free,
+ .map_meta_equal = bpf_map_meta_equal,
+ .map_alloc_check = bpf_local_storage_map_alloc_check,
+ .map_alloc = sk_storage_map_alloc,
+ .map_free = sk_storage_map_free,
.map_get_next_key = notsupp_get_next_key,
.map_lookup_elem = bpf_fd_sk_storage_lookup_elem,
.map_update_elem = bpf_fd_sk_storage_update_elem,
.map_delete_elem = bpf_fd_sk_storage_delete_elem,
- .map_check_btf = bpf_sk_storage_map_check_btf,
- .map_btf_name = "bpf_sk_storage_map",
+ .map_check_btf = bpf_local_storage_map_check_btf,
+ .map_btf_name = "bpf_local_storage_map",
.map_btf_id = &sk_storage_map_btf_id,
+ .map_local_storage_charge = sk_storage_charge,
+ .map_local_storage_uncharge = sk_storage_uncharge,
+ .map_owner_storage_ptr = sk_storage_ptr,
};
const struct bpf_func_proto bpf_sk_storage_get_proto = {
@@ -962,6 +379,30 @@ const struct bpf_func_proto bpf_sk_storage_delete_proto = {
.arg2_type = ARG_PTR_TO_SOCKET,
};
+BTF_ID_LIST(sk_storage_btf_ids)
+BTF_ID_UNUSED
+BTF_ID(struct, sock)
+
+const struct bpf_func_proto sk_storage_get_btf_proto = {
+ .func = bpf_sk_storage_get,
+ .gpl_only = false,
+ .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL,
+ .arg1_type = ARG_CONST_MAP_PTR,
+ .arg2_type = ARG_PTR_TO_BTF_ID,
+ .arg3_type = ARG_PTR_TO_MAP_VALUE_OR_NULL,
+ .arg4_type = ARG_ANYTHING,
+ .btf_id = sk_storage_btf_ids,
+};
+
+const struct bpf_func_proto sk_storage_delete_btf_proto = {
+ .func = bpf_sk_storage_delete,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_CONST_MAP_PTR,
+ .arg2_type = ARG_PTR_TO_BTF_ID,
+ .btf_id = sk_storage_btf_ids,
+};
+
struct bpf_sk_storage_diag {
u32 nr_maps;
struct bpf_map *maps[];
@@ -1022,7 +463,7 @@ bpf_sk_storage_diag_alloc(const struct nlattr *nla_stgs)
u32 nr_maps = 0;
int rem, err;
- /* bpf_sk_storage_map is currently limited to CAP_SYS_ADMIN as
+ /* bpf_local_storage_map is currently limited to CAP_SYS_ADMIN as
* the map_alloc_check() side also does.
*/
if (!bpf_capable())
@@ -1072,13 +513,13 @@ bpf_sk_storage_diag_alloc(const struct nlattr *nla_stgs)
}
EXPORT_SYMBOL_GPL(bpf_sk_storage_diag_alloc);
-static int diag_get(struct bpf_sk_storage_data *sdata, struct sk_buff *skb)
+static int diag_get(struct bpf_local_storage_data *sdata, struct sk_buff *skb)
{
struct nlattr *nla_stg, *nla_value;
- struct bpf_sk_storage_map *smap;
+ struct bpf_local_storage_map *smap;
/* It cannot exceed max nlattr's payload */
- BUILD_BUG_ON(U16_MAX - NLA_HDRLEN < MAX_VALUE_SIZE);
+ BUILD_BUG_ON(U16_MAX - NLA_HDRLEN < BPF_LOCAL_STORAGE_MAX_VALUE_SIZE);
nla_stg = nla_nest_start(skb, SK_DIAG_BPF_STORAGE);
if (!nla_stg)
@@ -1114,9 +555,9 @@ static int bpf_sk_storage_diag_put_all(struct sock *sk, struct sk_buff *skb,
{
/* stg_array_type (e.g. INET_DIAG_BPF_SK_STORAGES) */
unsigned int diag_size = nla_total_size(0);
- struct bpf_sk_storage *sk_storage;
- struct bpf_sk_storage_elem *selem;
- struct bpf_sk_storage_map *smap;
+ struct bpf_local_storage *sk_storage;
+ struct bpf_local_storage_elem *selem;
+ struct bpf_local_storage_map *smap;
struct nlattr *nla_stgs;
unsigned int saved_len;
int err = 0;
@@ -1169,8 +610,8 @@ int bpf_sk_storage_diag_put(struct bpf_sk_storage_diag *diag,
{
/* stg_array_type (e.g. INET_DIAG_BPF_SK_STORAGES) */
unsigned int diag_size = nla_total_size(0);
- struct bpf_sk_storage *sk_storage;
- struct bpf_sk_storage_data *sdata;
+ struct bpf_local_storage *sk_storage;
+ struct bpf_local_storage_data *sdata;
struct nlattr *nla_stgs;
unsigned int saved_len;
int err = 0;
@@ -1197,8 +638,8 @@ int bpf_sk_storage_diag_put(struct bpf_sk_storage_diag *diag,
saved_len = skb->len;
for (i = 0; i < diag->nr_maps; i++) {
- sdata = __sk_storage_lookup(sk_storage,
- (struct bpf_sk_storage_map *)diag->maps[i],
+ sdata = bpf_local_storage_lookup(sk_storage,
+ (struct bpf_local_storage_map *)diag->maps[i],
false);
if (!sdata)
@@ -1235,19 +676,19 @@ struct bpf_iter_seq_sk_storage_map_info {
unsigned skip_elems;
};
-static struct bpf_sk_storage_elem *
+static struct bpf_local_storage_elem *
bpf_sk_storage_map_seq_find_next(struct bpf_iter_seq_sk_storage_map_info *info,
- struct bpf_sk_storage_elem *prev_selem)
+ struct bpf_local_storage_elem *prev_selem)
{
- struct bpf_sk_storage *sk_storage;
- struct bpf_sk_storage_elem *selem;
+ struct bpf_local_storage *sk_storage;
+ struct bpf_local_storage_elem *selem;
u32 skip_elems = info->skip_elems;
- struct bpf_sk_storage_map *smap;
+ struct bpf_local_storage_map *smap;
u32 bucket_id = info->bucket_id;
u32 i, count, n_buckets;
- struct bucket *b;
+ struct bpf_local_storage_map_bucket *b;
- smap = (struct bpf_sk_storage_map *)info->map;
+ smap = (struct bpf_local_storage_map *)info->map;
n_buckets = 1U << smap->bucket_log;
if (bucket_id >= n_buckets)
return NULL;
@@ -1257,7 +698,7 @@ bpf_sk_storage_map_seq_find_next(struct bpf_iter_seq_sk_storage_map_info *info,
count = 0;
while (selem) {
selem = hlist_entry_safe(selem->map_node.next,
- struct bpf_sk_storage_elem, map_node);
+ struct bpf_local_storage_elem, map_node);
if (!selem) {
/* not found, unlock and go to the next bucket */
b = &smap->buckets[bucket_id++];
@@ -1265,7 +706,7 @@ bpf_sk_storage_map_seq_find_next(struct bpf_iter_seq_sk_storage_map_info *info,
skip_elems = 0;
break;
}
- sk_storage = rcu_dereference_raw(selem->sk_storage);
+ sk_storage = rcu_dereference_raw(selem->local_storage);
if (sk_storage) {
info->skip_elems = skip_elems + count;
return selem;
@@ -1278,7 +719,7 @@ bpf_sk_storage_map_seq_find_next(struct bpf_iter_seq_sk_storage_map_info *info,
raw_spin_lock_bh(&b->lock);
count = 0;
hlist_for_each_entry(selem, &b->list, map_node) {
- sk_storage = rcu_dereference_raw(selem->sk_storage);
+ sk_storage = rcu_dereference_raw(selem->local_storage);
if (sk_storage && count >= skip_elems) {
info->bucket_id = i;
info->skip_elems = count;
@@ -1297,7 +738,7 @@ bpf_sk_storage_map_seq_find_next(struct bpf_iter_seq_sk_storage_map_info *info,
static void *bpf_sk_storage_map_seq_start(struct seq_file *seq, loff_t *pos)
{
- struct bpf_sk_storage_elem *selem;
+ struct bpf_local_storage_elem *selem;
selem = bpf_sk_storage_map_seq_find_next(seq->private, NULL);
if (!selem)
@@ -1330,11 +771,11 @@ DEFINE_BPF_ITER_FUNC(bpf_sk_storage_map, struct bpf_iter_meta *meta,
void *value)
static int __bpf_sk_storage_map_seq_show(struct seq_file *seq,
- struct bpf_sk_storage_elem *selem)
+ struct bpf_local_storage_elem *selem)
{
struct bpf_iter_seq_sk_storage_map_info *info = seq->private;
struct bpf_iter__bpf_sk_storage_map ctx = {};
- struct bpf_sk_storage *sk_storage;
+ struct bpf_local_storage *sk_storage;
struct bpf_iter_meta meta;
struct bpf_prog *prog;
int ret = 0;
@@ -1345,8 +786,8 @@ static int __bpf_sk_storage_map_seq_show(struct seq_file *seq,
ctx.meta = &meta;
ctx.map = info->map;
if (selem) {
- sk_storage = rcu_dereference_raw(selem->sk_storage);
- ctx.sk = sk_storage->sk;
+ sk_storage = rcu_dereference_raw(selem->local_storage);
+ ctx.sk = sk_storage->owner;
ctx.value = SDATA(selem)->data;
}
ret = bpf_iter_run_prog(prog, &ctx);
@@ -1363,13 +804,13 @@ static int bpf_sk_storage_map_seq_show(struct seq_file *seq, void *v)
static void bpf_sk_storage_map_seq_stop(struct seq_file *seq, void *v)
{
struct bpf_iter_seq_sk_storage_map_info *info = seq->private;
- struct bpf_sk_storage_map *smap;
- struct bucket *b;
+ struct bpf_local_storage_map *smap;
+ struct bpf_local_storage_map_bucket *b;
if (!v) {
(void)__bpf_sk_storage_map_seq_show(seq, v);
} else {
- smap = (struct bpf_sk_storage_map *)info->map;
+ smap = (struct bpf_local_storage_map *)info->map;
b = &smap->buckets[info->bucket_id];
raw_spin_unlock_bh(&b->lock);
}
@@ -1437,6 +878,8 @@ static struct bpf_iter_reg bpf_sk_storage_map_reg_info = {
.target = "bpf_sk_storage_map",
.attach_target = bpf_iter_attach_map,
.detach_target = bpf_iter_detach_map,
+ .show_fdinfo = bpf_iter_map_show_fdinfo,
+ .fill_link_info = bpf_iter_map_fill_link_info,
.ctx_arg_info_size = 2,
.ctx_arg_info = {
{ offsetof(struct bpf_iter__bpf_sk_storage_map, sk),
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 639745d..9fcaa54 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -623,10 +623,11 @@ int __zerocopy_sg_from_iter(struct sock *sk, struct sk_buff *skb,
while (length && iov_iter_count(from)) {
struct page *pages[MAX_SKB_FRAGS];
+ struct page *last_head = NULL;
size_t start;
ssize_t copied;
unsigned long truesize;
- int n = 0;
+ int refs, n = 0;
if (frag == MAX_SKB_FRAGS)
return -EMSGSIZE;
@@ -649,13 +650,37 @@ int __zerocopy_sg_from_iter(struct sock *sk, struct sk_buff *skb,
} else {
refcount_add(truesize, &skb->sk->sk_wmem_alloc);
}
- while (copied) {
+ for (refs = 0; copied != 0; start = 0) {
int size = min_t(int, copied, PAGE_SIZE - start);
- skb_fill_page_desc(skb, frag++, pages[n], start, size);
- start = 0;
+ struct page *head = compound_head(pages[n]);
+
+ start += (pages[n] - head) << PAGE_SHIFT;
copied -= size;
n++;
+ if (frag) {
+ skb_frag_t *last = &skb_shinfo(skb)->frags[frag - 1];
+
+ if (head == skb_frag_page(last) &&
+ start == skb_frag_off(last) + skb_frag_size(last)) {
+ skb_frag_size_add(last, size);
+ /* We combined this page, we need to release
+ * a reference. Since compound pages refcount
+ * is shared among many pages, batch the refcount
+ * adjustments to limit false sharing.
+ */
+ last_head = head;
+ refs++;
+ continue;
+ }
+ }
+ if (refs) {
+ page_ref_sub(last_head, refs);
+ refs = 0;
+ }
+ skb_fill_page_desc(skb, frag++, head, start, size);
}
+ if (refs)
+ page_ref_sub(last_head, refs);
}
return 0;
}
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 80ec1cd..49e911c 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -5895,6 +5895,7 @@ devlink_nl_cmd_health_reporter_get_dumpit(struct sk_buff *msg,
list_for_each_entry(devlink, &devlink_list, list) {
if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
continue;
+ mutex_lock(&devlink->lock);
list_for_each_entry(port, &devlink->port_list, list) {
mutex_lock(&port->reporters_lock);
list_for_each_entry(reporter, &port->reporter_list, list) {
@@ -5909,12 +5910,14 @@ devlink_nl_cmd_health_reporter_get_dumpit(struct sk_buff *msg,
NLM_F_MULTI);
if (err) {
mutex_unlock(&port->reporters_lock);
+ mutex_unlock(&devlink->lock);
goto out;
}
idx++;
}
mutex_unlock(&port->reporters_lock);
}
+ mutex_unlock(&devlink->lock);
}
out:
mutex_unlock(&devlink_mutex);
@@ -7555,11 +7558,11 @@ int devlink_port_register(struct devlink *devlink,
devlink_port->index = port_index;
devlink_port->registered = true;
spin_lock_init(&devlink_port->type_lock);
+ INIT_LIST_HEAD(&devlink_port->reporter_list);
+ mutex_init(&devlink_port->reporters_lock);
list_add_tail(&devlink_port->list, &devlink->port_list);
INIT_LIST_HEAD(&devlink_port->param_list);
mutex_unlock(&devlink->lock);
- INIT_LIST_HEAD(&devlink_port->reporter_list);
- mutex_init(&devlink_port->reporters_lock);
INIT_DELAYED_WORK(&devlink_port->type_warn_dw, &devlink_port_type_warn);
devlink_port_type_warn_schedule(devlink_port);
devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW);
@@ -7576,13 +7579,13 @@ void devlink_port_unregister(struct devlink_port *devlink_port)
{
struct devlink *devlink = devlink_port->devlink;
- WARN_ON(!list_empty(&devlink_port->reporter_list));
- mutex_destroy(&devlink_port->reporters_lock);
devlink_port_type_warn_cancel(devlink_port);
devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_DEL);
mutex_lock(&devlink->lock);
list_del(&devlink_port->list);
mutex_unlock(&devlink->lock);
+ WARN_ON(!list_empty(&devlink_port->reporter_list));
+ mutex_destroy(&devlink_port->reporters_lock);
}
EXPORT_SYMBOL_GPL(devlink_port_unregister);
diff --git a/net/core/filter.c b/net/core/filter.c
index 1f647ab..2ad9c0e 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -4459,6 +4459,7 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname,
} else {
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
+ unsigned long timeout;
if (optlen != sizeof(int))
return -EINVAL;
@@ -4480,6 +4481,20 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname,
tp->snd_ssthresh = val;
}
break;
+ case TCP_BPF_DELACK_MAX:
+ timeout = usecs_to_jiffies(val);
+ if (timeout > TCP_DELACK_MAX ||
+ timeout < TCP_TIMEOUT_MIN)
+ return -EINVAL;
+ inet_csk(sk)->icsk_delack_max = timeout;
+ break;
+ case TCP_BPF_RTO_MIN:
+ timeout = usecs_to_jiffies(val);
+ if (timeout > TCP_RTO_MIN ||
+ timeout < TCP_TIMEOUT_MIN)
+ return -EINVAL;
+ inet_csk(sk)->icsk_rto_min = timeout;
+ break;
case TCP_SAVE_SYN:
if (val < 0 || val > 1)
ret = -EINVAL;
@@ -4550,9 +4565,9 @@ static int _bpf_getsockopt(struct sock *sk, int level, int optname,
tp = tcp_sk(sk);
if (optlen <= 0 || !tp->saved_syn ||
- optlen > tp->saved_syn[0])
+ optlen > tcp_saved_syn_len(tp->saved_syn))
goto err_clear;
- memcpy(optval, tp->saved_syn + 1, optlen);
+ memcpy(optval, tp->saved_syn->data, optlen);
break;
default:
goto err_clear;
@@ -4654,9 +4669,99 @@ static const struct bpf_func_proto bpf_sock_ops_setsockopt_proto = {
.arg5_type = ARG_CONST_SIZE,
};
+static int bpf_sock_ops_get_syn(struct bpf_sock_ops_kern *bpf_sock,
+ int optname, const u8 **start)
+{
+ struct sk_buff *syn_skb = bpf_sock->syn_skb;
+ const u8 *hdr_start;
+ int ret;
+
+ if (syn_skb) {
+ /* sk is a request_sock here */
+
+ if (optname == TCP_BPF_SYN) {
+ hdr_start = syn_skb->data;
+ ret = tcp_hdrlen(syn_skb);
+ } else if (optname == TCP_BPF_SYN_IP) {
+ hdr_start = skb_network_header(syn_skb);
+ ret = skb_network_header_len(syn_skb) +
+ tcp_hdrlen(syn_skb);
+ } else {
+ /* optname == TCP_BPF_SYN_MAC */
+ hdr_start = skb_mac_header(syn_skb);
+ ret = skb_mac_header_len(syn_skb) +
+ skb_network_header_len(syn_skb) +
+ tcp_hdrlen(syn_skb);
+ }
+ } else {
+ struct sock *sk = bpf_sock->sk;
+ struct saved_syn *saved_syn;
+
+ if (sk->sk_state == TCP_NEW_SYN_RECV)
+ /* synack retransmit. bpf_sock->syn_skb will
+ * not be available. It has to resort to
+ * saved_syn (if it is saved).
+ */
+ saved_syn = inet_reqsk(sk)->saved_syn;
+ else
+ saved_syn = tcp_sk(sk)->saved_syn;
+
+ if (!saved_syn)
+ return -ENOENT;
+
+ if (optname == TCP_BPF_SYN) {
+ hdr_start = saved_syn->data +
+ saved_syn->mac_hdrlen +
+ saved_syn->network_hdrlen;
+ ret = saved_syn->tcp_hdrlen;
+ } else if (optname == TCP_BPF_SYN_IP) {
+ hdr_start = saved_syn->data +
+ saved_syn->mac_hdrlen;
+ ret = saved_syn->network_hdrlen +
+ saved_syn->tcp_hdrlen;
+ } else {
+ /* optname == TCP_BPF_SYN_MAC */
+
+ /* TCP_SAVE_SYN may not have saved the mac hdr */
+ if (!saved_syn->mac_hdrlen)
+ return -ENOENT;
+
+ hdr_start = saved_syn->data;
+ ret = saved_syn->mac_hdrlen +
+ saved_syn->network_hdrlen +
+ saved_syn->tcp_hdrlen;
+ }
+ }
+
+ *start = hdr_start;
+ return ret;
+}
+
BPF_CALL_5(bpf_sock_ops_getsockopt, struct bpf_sock_ops_kern *, bpf_sock,
int, level, int, optname, char *, optval, int, optlen)
{
+ if (IS_ENABLED(CONFIG_INET) && level == SOL_TCP &&
+ optname >= TCP_BPF_SYN && optname <= TCP_BPF_SYN_MAC) {
+ int ret, copy_len = 0;
+ const u8 *start;
+
+ ret = bpf_sock_ops_get_syn(bpf_sock, optname, &start);
+ if (ret > 0) {
+ copy_len = ret;
+ if (optlen < copy_len) {
+ copy_len = optlen;
+ ret = -ENOSPC;
+ }
+
+ memcpy(optval, start, copy_len);
+ }
+
+ /* Zero out unused buffer at the end */
+ memset(optval + copy_len, 0, optlen - copy_len);
+
+ return ret;
+ }
+
return _bpf_getsockopt(bpf_sock->sk, level, optname, optval, optlen);
}
@@ -6150,6 +6255,232 @@ static const struct bpf_func_proto bpf_sk_assign_proto = {
.arg3_type = ARG_ANYTHING,
};
+static const u8 *bpf_search_tcp_opt(const u8 *op, const u8 *opend,
+ u8 search_kind, const u8 *magic,
+ u8 magic_len, bool *eol)
+{
+ u8 kind, kind_len;
+
+ *eol = false;
+
+ while (op < opend) {
+ kind = op[0];
+
+ if (kind == TCPOPT_EOL) {
+ *eol = true;
+ return ERR_PTR(-ENOMSG);
+ } else if (kind == TCPOPT_NOP) {
+ op++;
+ continue;
+ }
+
+ if (opend - op < 2 || opend - op < op[1] || op[1] < 2)
+ /* Something is wrong in the received header.
+ * Follow the TCP stack's tcp_parse_options()
+ * and just bail here.
+ */
+ return ERR_PTR(-EFAULT);
+
+ kind_len = op[1];
+ if (search_kind == kind) {
+ if (!magic_len)
+ return op;
+
+ if (magic_len > kind_len - 2)
+ return ERR_PTR(-ENOMSG);
+
+ if (!memcmp(&op[2], magic, magic_len))
+ return op;
+ }
+
+ op += kind_len;
+ }
+
+ return ERR_PTR(-ENOMSG);
+}
+
+BPF_CALL_4(bpf_sock_ops_load_hdr_opt, struct bpf_sock_ops_kern *, bpf_sock,
+ void *, search_res, u32, len, u64, flags)
+{
+ bool eol, load_syn = flags & BPF_LOAD_HDR_OPT_TCP_SYN;
+ const u8 *op, *opend, *magic, *search = search_res;
+ u8 search_kind, search_len, copy_len, magic_len;
+ int ret;
+
+ /* 2 byte is the minimal option len except TCPOPT_NOP and
+ * TCPOPT_EOL which are useless for the bpf prog to learn
+ * and this helper disallow loading them also.
+ */
+ if (len < 2 || flags & ~BPF_LOAD_HDR_OPT_TCP_SYN)
+ return -EINVAL;
+
+ search_kind = search[0];
+ search_len = search[1];
+
+ if (search_len > len || search_kind == TCPOPT_NOP ||
+ search_kind == TCPOPT_EOL)
+ return -EINVAL;
+
+ if (search_kind == TCPOPT_EXP || search_kind == 253) {
+ /* 16 or 32 bit magic. +2 for kind and kind length */
+ if (search_len != 4 && search_len != 6)
+ return -EINVAL;
+ magic = &search[2];
+ magic_len = search_len - 2;
+ } else {
+ if (search_len)
+ return -EINVAL;
+ magic = NULL;
+ magic_len = 0;
+ }
+
+ if (load_syn) {
+ ret = bpf_sock_ops_get_syn(bpf_sock, TCP_BPF_SYN, &op);
+ if (ret < 0)
+ return ret;
+
+ opend = op + ret;
+ op += sizeof(struct tcphdr);
+ } else {
+ if (!bpf_sock->skb ||
+ bpf_sock->op == BPF_SOCK_OPS_HDR_OPT_LEN_CB)
+ /* This bpf_sock->op cannot call this helper */
+ return -EPERM;
+
+ opend = bpf_sock->skb_data_end;
+ op = bpf_sock->skb->data + sizeof(struct tcphdr);
+ }
+
+ op = bpf_search_tcp_opt(op, opend, search_kind, magic, magic_len,
+ &eol);
+ if (IS_ERR(op))
+ return PTR_ERR(op);
+
+ copy_len = op[1];
+ ret = copy_len;
+ if (copy_len > len) {
+ ret = -ENOSPC;
+ copy_len = len;
+ }
+
+ memcpy(search_res, op, copy_len);
+ return ret;
+}
+
+static const struct bpf_func_proto bpf_sock_ops_load_hdr_opt_proto = {
+ .func = bpf_sock_ops_load_hdr_opt,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_PTR_TO_MEM,
+ .arg3_type = ARG_CONST_SIZE,
+ .arg4_type = ARG_ANYTHING,
+};
+
+BPF_CALL_4(bpf_sock_ops_store_hdr_opt, struct bpf_sock_ops_kern *, bpf_sock,
+ const void *, from, u32, len, u64, flags)
+{
+ u8 new_kind, new_kind_len, magic_len = 0, *opend;
+ const u8 *op, *new_op, *magic = NULL;
+ struct sk_buff *skb;
+ bool eol;
+
+ if (bpf_sock->op != BPF_SOCK_OPS_WRITE_HDR_OPT_CB)
+ return -EPERM;
+
+ if (len < 2 || flags)
+ return -EINVAL;
+
+ new_op = from;
+ new_kind = new_op[0];
+ new_kind_len = new_op[1];
+
+ if (new_kind_len > len || new_kind == TCPOPT_NOP ||
+ new_kind == TCPOPT_EOL)
+ return -EINVAL;
+
+ if (new_kind_len > bpf_sock->remaining_opt_len)
+ return -ENOSPC;
+
+ /* 253 is another experimental kind */
+ if (new_kind == TCPOPT_EXP || new_kind == 253) {
+ if (new_kind_len < 4)
+ return -EINVAL;
+ /* Match for the 2 byte magic also.
+ * RFC 6994: the magic could be 2 or 4 bytes.
+ * Hence, matching by 2 byte only is on the
+ * conservative side but it is the right
+ * thing to do for the 'search-for-duplication'
+ * purpose.
+ */
+ magic = &new_op[2];
+ magic_len = 2;
+ }
+
+ /* Check for duplication */
+ skb = bpf_sock->skb;
+ op = skb->data + sizeof(struct tcphdr);
+ opend = bpf_sock->skb_data_end;
+
+ op = bpf_search_tcp_opt(op, opend, new_kind, magic, magic_len,
+ &eol);
+ if (!IS_ERR(op))
+ return -EEXIST;
+
+ if (PTR_ERR(op) != -ENOMSG)
+ return PTR_ERR(op);
+
+ if (eol)
+ /* The option has been ended. Treat it as no more
+ * header option can be written.
+ */
+ return -ENOSPC;
+
+ /* No duplication found. Store the header option. */
+ memcpy(opend, from, new_kind_len);
+
+ bpf_sock->remaining_opt_len -= new_kind_len;
+ bpf_sock->skb_data_end += new_kind_len;
+
+ return 0;
+}
+
+static const struct bpf_func_proto bpf_sock_ops_store_hdr_opt_proto = {
+ .func = bpf_sock_ops_store_hdr_opt,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_PTR_TO_MEM,
+ .arg3_type = ARG_CONST_SIZE,
+ .arg4_type = ARG_ANYTHING,
+};
+
+BPF_CALL_3(bpf_sock_ops_reserve_hdr_opt, struct bpf_sock_ops_kern *, bpf_sock,
+ u32, len, u64, flags)
+{
+ if (bpf_sock->op != BPF_SOCK_OPS_HDR_OPT_LEN_CB)
+ return -EPERM;
+
+ if (flags || len < 2)
+ return -EINVAL;
+
+ if (len > bpf_sock->remaining_opt_len)
+ return -ENOSPC;
+
+ bpf_sock->remaining_opt_len -= len;
+
+ return 0;
+}
+
+static const struct bpf_func_proto bpf_sock_ops_reserve_hdr_opt_proto = {
+ .func = bpf_sock_ops_reserve_hdr_opt,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_ANYTHING,
+ .arg3_type = ARG_ANYTHING,
+};
+
#endif /* CONFIG_INET */
bool bpf_helper_changes_pkt_data(void *func)
@@ -6179,6 +6510,9 @@ bool bpf_helper_changes_pkt_data(void *func)
func == bpf_lwt_seg6_adjust_srh ||
func == bpf_lwt_seg6_action ||
#endif
+#ifdef CONFIG_INET
+ func == bpf_sock_ops_store_hdr_opt ||
+#endif
func == bpf_lwt_in_push_encap ||
func == bpf_lwt_xmit_push_encap)
return true;
@@ -6550,6 +6884,12 @@ sock_ops_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
case BPF_FUNC_sk_storage_delete:
return &bpf_sk_storage_delete_proto;
#ifdef CONFIG_INET
+ case BPF_FUNC_load_hdr_opt:
+ return &bpf_sock_ops_load_hdr_opt_proto;
+ case BPF_FUNC_store_hdr_opt:
+ return &bpf_sock_ops_store_hdr_opt_proto;
+ case BPF_FUNC_reserve_hdr_opt:
+ return &bpf_sock_ops_reserve_hdr_opt_proto;
case BPF_FUNC_tcp_sock:
return &bpf_tcp_sock_proto;
#endif /* CONFIG_INET */
@@ -7349,6 +7689,20 @@ static bool sock_ops_is_valid_access(int off, int size,
return false;
info->reg_type = PTR_TO_SOCKET_OR_NULL;
break;
+ case offsetof(struct bpf_sock_ops, skb_data):
+ if (size != sizeof(__u64))
+ return false;
+ info->reg_type = PTR_TO_PACKET;
+ break;
+ case offsetof(struct bpf_sock_ops, skb_data_end):
+ if (size != sizeof(__u64))
+ return false;
+ info->reg_type = PTR_TO_PACKET_END;
+ break;
+ case offsetof(struct bpf_sock_ops, skb_tcp_flags):
+ bpf_ctx_record_field_size(info, size_default);
+ return bpf_ctx_narrow_access_ok(off, size,
+ size_default);
default:
if (size != size_default)
return false;
@@ -8450,17 +8804,22 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
return insn - insn_buf;
switch (si->off) {
- case offsetof(struct bpf_sock_ops, op) ...
+ case offsetof(struct bpf_sock_ops, op):
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_sock_ops_kern,
+ op),
+ si->dst_reg, si->src_reg,
+ offsetof(struct bpf_sock_ops_kern, op));
+ break;
+
+ case offsetof(struct bpf_sock_ops, replylong[0]) ...
offsetof(struct bpf_sock_ops, replylong[3]):
- BUILD_BUG_ON(sizeof_field(struct bpf_sock_ops, op) !=
- sizeof_field(struct bpf_sock_ops_kern, op));
BUILD_BUG_ON(sizeof_field(struct bpf_sock_ops, reply) !=
sizeof_field(struct bpf_sock_ops_kern, reply));
BUILD_BUG_ON(sizeof_field(struct bpf_sock_ops, replylong) !=
sizeof_field(struct bpf_sock_ops_kern, replylong));
off = si->off;
- off -= offsetof(struct bpf_sock_ops, op);
- off += offsetof(struct bpf_sock_ops_kern, op);
+ off -= offsetof(struct bpf_sock_ops, replylong[0]);
+ off += offsetof(struct bpf_sock_ops_kern, replylong[0]);
if (type == BPF_WRITE)
*insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg,
off);
@@ -8681,6 +9040,49 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
case offsetof(struct bpf_sock_ops, sk):
SOCK_OPS_GET_SK();
break;
+ case offsetof(struct bpf_sock_ops, skb_data_end):
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_sock_ops_kern,
+ skb_data_end),
+ si->dst_reg, si->src_reg,
+ offsetof(struct bpf_sock_ops_kern,
+ skb_data_end));
+ break;
+ case offsetof(struct bpf_sock_ops, skb_data):
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_sock_ops_kern,
+ skb),
+ si->dst_reg, si->src_reg,
+ offsetof(struct bpf_sock_ops_kern,
+ skb));
+ *insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 1);
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, data),
+ si->dst_reg, si->dst_reg,
+ offsetof(struct sk_buff, data));
+ break;
+ case offsetof(struct bpf_sock_ops, skb_len):
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_sock_ops_kern,
+ skb),
+ si->dst_reg, si->src_reg,
+ offsetof(struct bpf_sock_ops_kern,
+ skb));
+ *insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 1);
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, len),
+ si->dst_reg, si->dst_reg,
+ offsetof(struct sk_buff, len));
+ break;
+ case offsetof(struct bpf_sock_ops, skb_tcp_flags):
+ off = offsetof(struct sk_buff, cb);
+ off += offsetof(struct tcp_skb_cb, tcp_flags);
+ *target_size = sizeof_field(struct tcp_skb_cb, tcp_flags);
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_sock_ops_kern,
+ skb),
+ si->dst_reg, si->src_reg,
+ offsetof(struct bpf_sock_ops_kern,
+ skb));
+ *insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 1);
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct tcp_skb_cb,
+ tcp_flags),
+ si->dst_reg, si->dst_reg, off);
+ break;
}
return insn - insn_buf;
}
diff --git a/net/core/ptp_classifier.c b/net/core/ptp_classifier.c
index d964a51..e33fde0 100644
--- a/net/core/ptp_classifier.c
+++ b/net/core/ptp_classifier.c
@@ -107,6 +107,36 @@ unsigned int ptp_classify_raw(const struct sk_buff *skb)
}
EXPORT_SYMBOL_GPL(ptp_classify_raw);
+struct ptp_header *ptp_parse_header(struct sk_buff *skb, unsigned int type)
+{
+ u8 *ptr = skb_mac_header(skb);
+
+ if (type & PTP_CLASS_VLAN)
+ ptr += VLAN_HLEN;
+
+ switch (type & PTP_CLASS_PMASK) {
+ case PTP_CLASS_IPV4:
+ ptr += IPV4_HLEN(ptr) + UDP_HLEN;
+ break;
+ case PTP_CLASS_IPV6:
+ ptr += IP6_HLEN + UDP_HLEN;
+ break;
+ case PTP_CLASS_L2:
+ break;
+ default:
+ return NULL;
+ }
+
+ ptr += ETH_HLEN;
+
+ /* Ensure that the entire header is present in this packet. */
+ if (ptr + sizeof(struct ptp_header) > skb->data + skb->len)
+ return NULL;
+
+ return (struct ptp_header *)ptr;
+}
+EXPORT_SYMBOL_GPL(ptp_parse_header);
+
void __init ptp_classifier_init(void)
{
static struct sock_filter ptp_filter[] __initdata = {
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 6faf73d..bfd7483 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -5955,8 +5955,7 @@ static int pskb_carve_inside_nonlinear(struct sk_buff *skb, const u32 off,
size = SKB_WITH_OVERHEAD(ksize(data));
memcpy((struct skb_shared_info *)(data + size),
- skb_shinfo(skb), offsetof(struct skb_shared_info,
- frags[skb_shinfo(skb)->nr_frags]));
+ skb_shinfo(skb), offsetof(struct skb_shared_info, frags[0]));
if (skb_orphan_frags(skb, gfp_mask)) {
kfree(data);
return -ENOMEM;
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index 6495831..4b5f7c8 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -494,14 +494,34 @@ static void sk_psock_backlog(struct work_struct *work)
struct sk_psock *sk_psock_init(struct sock *sk, int node)
{
- struct sk_psock *psock = kzalloc_node(sizeof(*psock),
- GFP_ATOMIC | __GFP_NOWARN,
- node);
- if (!psock)
- return NULL;
+ struct sk_psock *psock;
+ struct proto *prot;
+ write_lock_bh(&sk->sk_callback_lock);
+
+ if (inet_csk_has_ulp(sk)) {
+ psock = ERR_PTR(-EINVAL);
+ goto out;
+ }
+
+ if (sk->sk_user_data) {
+ psock = ERR_PTR(-EBUSY);
+ goto out;
+ }
+
+ psock = kzalloc_node(sizeof(*psock), GFP_ATOMIC | __GFP_NOWARN, node);
+ if (!psock) {
+ psock = ERR_PTR(-ENOMEM);
+ goto out;
+ }
+
+ prot = READ_ONCE(sk->sk_prot);
psock->sk = sk;
- psock->eval = __SK_NONE;
+ psock->eval = __SK_NONE;
+ psock->sk_proto = prot;
+ psock->saved_unhash = prot->unhash;
+ psock->saved_close = prot->close;
+ psock->saved_write_space = sk->sk_write_space;
INIT_LIST_HEAD(&psock->link);
spin_lock_init(&psock->link_lock);
@@ -516,6 +536,8 @@ struct sk_psock *sk_psock_init(struct sock *sk, int node)
rcu_assign_sk_user_data_nocopy(sk, psock);
sock_hold(sk);
+out:
+ write_unlock_bh(&sk->sk_callback_lock);
return psock;
}
EXPORT_SYMBOL_GPL(sk_psock_init);
diff --git a/net/core/sock.c b/net/core/sock.c
index 6c5c6b1..ba9e7d9 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -413,18 +413,6 @@ static int sock_set_timeout(long *timeo_p, sockptr_t optval, int optlen,
return 0;
}
-static void sock_warn_obsolete_bsdism(const char *name)
-{
- static int warned;
- static char warncomm[TASK_COMM_LEN];
- if (strcmp(warncomm, current->comm) && warned < 5) {
- strcpy(warncomm, current->comm);
- pr_warn("process `%s' is using obsolete %s SO_BSDCOMPAT\n",
- warncomm, name);
- warned++;
- }
-}
-
static bool sock_needs_netstamp(const struct sock *sk)
{
switch (sk->sk_family) {
@@ -984,7 +972,6 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
break;
case SO_BSDCOMPAT:
- sock_warn_obsolete_bsdism("setsockopt");
break;
case SO_PASSCRED:
@@ -1387,7 +1374,6 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
break;
case SO_BSDCOMPAT:
- sock_warn_obsolete_bsdism("getsockopt");
break;
case SO_TIMESTAMP_OLD:
diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index 119f52a..078386d 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -184,8 +184,6 @@ static int sock_map_init_proto(struct sock *sk, struct sk_psock *psock)
{
struct proto *prot;
- sock_owned_by_me(sk);
-
switch (sk->sk_type) {
case SOCK_STREAM:
prot = tcp_bpf_get_proto(sk, psock);
@@ -272,8 +270,8 @@ static int sock_map_link(struct bpf_map *map, struct sk_psock_progs *progs,
}
} else {
psock = sk_psock_init(sk, map->numa_node);
- if (!psock) {
- ret = -ENOMEM;
+ if (IS_ERR(psock)) {
+ ret = PTR_ERR(psock);
goto out_progs;
}
}
@@ -322,8 +320,8 @@ static int sock_map_link_no_progs(struct bpf_map *map, struct sock *sk)
if (!psock) {
psock = sk_psock_init(sk, map->numa_node);
- if (!psock)
- return -ENOMEM;
+ if (IS_ERR(psock))
+ return PTR_ERR(psock);
}
ret = sock_map_init_proto(sk, psock);
@@ -478,8 +476,6 @@ static int sock_map_update_common(struct bpf_map *map, u32 idx,
return -EINVAL;
if (unlikely(idx >= map->max_entries))
return -E2BIG;
- if (inet_csk_has_ulp(sk))
- return -EINVAL;
link = sk_psock_init_link();
if (!link)
@@ -563,10 +559,12 @@ static bool sock_map_sk_state_allowed(const struct sock *sk)
return false;
}
-static int sock_map_update_elem(struct bpf_map *map, void *key,
- void *value, u64 flags)
+static int sock_hash_update_common(struct bpf_map *map, void *key,
+ struct sock *sk, u64 flags);
+
+int sock_map_update_elem_sys(struct bpf_map *map, void *key, void *value,
+ u64 flags)
{
- u32 idx = *(u32 *)key;
struct socket *sock;
struct sock *sk;
int ret;
@@ -595,14 +593,38 @@ static int sock_map_update_elem(struct bpf_map *map, void *key,
sock_map_sk_acquire(sk);
if (!sock_map_sk_state_allowed(sk))
ret = -EOPNOTSUPP;
+ else if (map->map_type == BPF_MAP_TYPE_SOCKMAP)
+ ret = sock_map_update_common(map, *(u32 *)key, sk, flags);
else
- ret = sock_map_update_common(map, idx, sk, flags);
+ ret = sock_hash_update_common(map, key, sk, flags);
sock_map_sk_release(sk);
out:
fput(sock->file);
return ret;
}
+static int sock_map_update_elem(struct bpf_map *map, void *key,
+ void *value, u64 flags)
+{
+ struct sock *sk = (struct sock *)value;
+ int ret;
+
+ if (!sock_map_sk_is_suitable(sk))
+ return -EOPNOTSUPP;
+
+ local_bh_disable();
+ bh_lock_sock(sk);
+ if (!sock_map_sk_state_allowed(sk))
+ ret = -EOPNOTSUPP;
+ else if (map->map_type == BPF_MAP_TYPE_SOCKMAP)
+ ret = sock_map_update_common(map, *(u32 *)key, sk, flags);
+ else
+ ret = sock_hash_update_common(map, key, sk, flags);
+ bh_unlock_sock(sk);
+ local_bh_enable();
+ return ret;
+}
+
BPF_CALL_4(bpf_sock_map_update, struct bpf_sock_ops_kern *, sops,
struct bpf_map *, map, void *, key, u64, flags)
{
@@ -683,6 +705,7 @@ const struct bpf_func_proto bpf_msg_redirect_map_proto = {
static int sock_map_btf_id;
const struct bpf_map_ops sock_map_ops = {
+ .map_meta_equal = bpf_map_meta_equal,
.map_alloc = sock_map_alloc,
.map_free = sock_map_free,
.map_get_next_key = sock_map_get_next_key,
@@ -855,8 +878,6 @@ static int sock_hash_update_common(struct bpf_map *map, void *key,
WARN_ON_ONCE(!rcu_read_lock_held());
if (unlikely(flags > BPF_EXIST))
return -EINVAL;
- if (inet_csk_has_ulp(sk))
- return -EINVAL;
link = sk_psock_init_link();
if (!link)
@@ -915,45 +936,6 @@ static int sock_hash_update_common(struct bpf_map *map, void *key,
return ret;
}
-static int sock_hash_update_elem(struct bpf_map *map, void *key,
- void *value, u64 flags)
-{
- struct socket *sock;
- struct sock *sk;
- int ret;
- u64 ufd;
-
- if (map->value_size == sizeof(u64))
- ufd = *(u64 *)value;
- else
- ufd = *(u32 *)value;
- if (ufd > S32_MAX)
- return -EINVAL;
-
- sock = sockfd_lookup(ufd, &ret);
- if (!sock)
- return ret;
- sk = sock->sk;
- if (!sk) {
- ret = -EINVAL;
- goto out;
- }
- if (!sock_map_sk_is_suitable(sk)) {
- ret = -EOPNOTSUPP;
- goto out;
- }
-
- sock_map_sk_acquire(sk);
- if (!sock_map_sk_state_allowed(sk))
- ret = -EOPNOTSUPP;
- else
- ret = sock_hash_update_common(map, key, sk, flags);
- sock_map_sk_release(sk);
-out:
- fput(sock->file);
- return ret;
-}
-
static int sock_hash_get_next_key(struct bpf_map *map, void *key,
void *key_next)
{
@@ -1219,10 +1201,11 @@ const struct bpf_func_proto bpf_msg_redirect_hash_proto = {
static int sock_hash_map_btf_id;
const struct bpf_map_ops sock_hash_ops = {
+ .map_meta_equal = bpf_map_meta_equal,
.map_alloc = sock_hash_alloc,
.map_free = sock_hash_free,
.map_get_next_key = sock_hash_get_next_key,
- .map_update_elem = sock_hash_update_elem,
+ .map_update_elem = sock_map_update_elem,
.map_delete_elem = sock_hash_delete_elem,
.map_lookup_elem = sock_hash_lookup,
.map_lookup_elem_sys_only = sock_hash_lookup_sys,
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 6ada114bb..d86d8d1 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -22,7 +22,7 @@
#include <net/busy_poll.h>
#include <net/pkt_sched.h>
-static int two __maybe_unused = 2;
+static int two = 2;
static int three = 3;
static int min_sndbuf = SOCK_MIN_SNDBUF;
static int min_rcvbuf = SOCK_MIN_RCVBUF;
@@ -546,7 +546,7 @@ static struct ctl_table net_core_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
- .extra2 = SYSCTL_ONE,
+ .extra2 = &two,
},
{
.procname = "devconf_inherit_init_net",
@@ -587,6 +587,19 @@ static struct ctl_table netns_core_table[] = {
{ }
};
+static int __init fb_tunnels_only_for_init_net_sysctl_setup(char *str)
+{
+ /* fallback tunnels for initns only */
+ if (!strncmp(str, "initns", 6))
+ sysctl_fb_tunnels_only_for_init_net = 1;
+ /* no fallback tunnels anywhere */
+ else if (!strncmp(str, "none", 4))
+ sysctl_fb_tunnels_only_for_init_net = 2;
+
+ return 1;
+}
+__setup("fb_tunnels=", fb_tunnels_only_for_init_net_sysctl_setup);
+
static __net_init int sysctl_core_net_init(struct net *net)
{
struct ctl_table *tbl;
diff --git a/net/dccp/ackvec.c b/net/dccp/ackvec.c
index 0a72510..8f3dd3b 100644
--- a/net/dccp/ackvec.c
+++ b/net/dccp/ackvec.c
@@ -274,7 +274,7 @@ void dccp_ackvec_input(struct dccp_ackvec *av, struct sk_buff *skb)
/**
* dccp_ackvec_clear_state - Perform house-keeping / garbage-collection
* This routine is called when the peer acknowledges the receipt of Ack Vectors
- * up to and including @ackno. While based on on section A.3 of RFC 4340, here
+ * up to and including @ackno. While based on section A.3 of RFC 4340, here
* are additional precautions to prevent corrupted buffer state. In particular,
* we use tail_ackno to identify outdated records; it always marks the earliest
* packet of group (2) in 11.4.2.
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 9c28c82..d8f3751 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -731,7 +731,7 @@ int dccp_invalid_packet(struct sk_buff *skb)
return 1;
}
/*
- * If P.Data Offset is too too large for packet, drop packet and return
+ * If P.Data Offset is too large for packet, drop packet and return
*/
if (!pskb_may_pull(skb, dccph_doff * sizeof(u32))) {
DCCP_WARN("P.Data Offset(%u) too large\n", dccph_doff);
diff --git a/net/dccp/timer.c b/net/dccp/timer.c
index 0e06dfc..927c796 100644
--- a/net/dccp/timer.c
+++ b/net/dccp/timer.c
@@ -85,7 +85,7 @@ static void dccp_retransmit_timer(struct sock *sk)
struct inet_connection_sock *icsk = inet_csk(sk);
/*
- * More than than 4MSL (8 minutes) has passed, a RESET(aborted) was
+ * More than 4MSL (8 minutes) has passed, a RESET(aborted) was
* sent, no need to retransmit, this sock is dead.
*/
if (dccp_write_timeout(sk))
diff --git a/net/ethtool/channels.c b/net/ethtool/channels.c
index 9ef54cd..9ecda09 100644
--- a/net/ethtool/channels.c
+++ b/net/ethtool/channels.c
@@ -223,7 +223,7 @@ int ethnl_set_channels(struct sk_buff *skb, struct genl_info *info)
from_channel = channels.combined_count +
min(channels.rx_count, channels.tx_count);
for (i = from_channel; i < old_total; i++)
- if (xdp_get_umem_from_qid(dev, i)) {
+ if (xsk_get_pool_from_qid(dev, i)) {
GENL_SET_ERR_MSG(info, "requested channel counts are too low for existing zerocopy AF_XDP sockets");
return -EINVAL;
}
diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c
index 441794e..328d15c 100644
--- a/net/ethtool/ioctl.c
+++ b/net/ethtool/ioctl.c
@@ -1706,7 +1706,7 @@ static noinline_for_stack int ethtool_set_channels(struct net_device *dev,
min(channels.rx_count, channels.tx_count);
to_channel = curr.combined_count + max(curr.rx_count, curr.tx_count);
for (i = from_channel; i < to_channel; i++)
- if (xdp_get_umem_from_qid(dev, i))
+ if (xsk_get_pool_from_qid(dev, i))
return -EINVAL;
ret = dev->ethtool_ops->set_channels(dev, &channels);
@@ -1861,23 +1861,18 @@ static int ethtool_phys_id(struct net_device *dev, void __user *useraddr)
id.data ? (id.data * HZ) : MAX_SCHEDULE_TIMEOUT);
} else {
/* Driver expects to be called at twice the frequency in rc */
- int n = rc * 2, i, interval = HZ / n;
+ int n = rc * 2, interval = HZ / n;
+ u64 count = n * id.data, i = 0;
- /* Count down seconds */
do {
- /* Count down iterations per second */
- i = n;
- do {
- rtnl_lock();
- rc = ops->set_phys_id(dev,
- (i & 1) ? ETHTOOL_ID_OFF : ETHTOOL_ID_ON);
- rtnl_unlock();
- if (rc)
- break;
- schedule_timeout_interruptible(interval);
- } while (!signal_pending(current) && --i != 0);
- } while (!signal_pending(current) &&
- (id.data == 0 || --id.data != 0));
+ rtnl_lock();
+ rc = ops->set_phys_id(dev,
+ (i++ & 1) ? ETHTOOL_ID_OFF : ETHTOOL_ID_ON);
+ rtnl_unlock();
+ if (rc)
+ break;
+ schedule_timeout_interruptible(interval);
+ } while (!signal_pending(current) && (!id.data || i < count));
}
rtnl_lock();
@@ -3025,13 +3020,14 @@ ethtool_rx_flow_rule_create(const struct ethtool_rx_flow_spec_input *input)
case TCP_V4_FLOW:
case TCP_V6_FLOW:
match->key.basic.ip_proto = IPPROTO_TCP;
+ match->mask.basic.ip_proto = 0xff;
break;
case UDP_V4_FLOW:
case UDP_V6_FLOW:
match->key.basic.ip_proto = IPPROTO_UDP;
+ match->mask.basic.ip_proto = 0xff;
break;
}
- match->mask.basic.ip_proto = 0xff;
match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_BASIC);
match->dissector.offset[FLOW_DISSECTOR_KEY_BASIC] =
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 4307503..b7260c8 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1017,6 +1017,7 @@ static int inet_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned lon
const struct proto_ops inet_stream_ops = {
.family = PF_INET,
+ .flags = PROTO_CMSG_DATA_ONLY,
.owner = THIS_MODULE,
.release = inet_release,
.bind = inet_bind,
diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c
index abd0834..5308cfa 100644
--- a/net/ipv4/fou.c
+++ b/net/ipv4/fou.c
@@ -237,7 +237,7 @@ static struct sk_buff *fou_gro_receive(struct sock *sk,
/* We can clear the encap_mark for FOU as we are essentially doing
* one of two possible things. We are either adding an L4 tunnel
- * header to the outer L3 tunnel header, or we are are simply
+ * header to the outer L3 tunnel header, or we are simply
* treating the GRE tunnel header as though it is a UDP protocol
* specific header such as VXLAN or GENEVE.
*/
@@ -429,7 +429,7 @@ static struct sk_buff *gue_gro_receive(struct sock *sk,
/* We can clear the encap_mark for GUE as we are essentially doing
* one of two possible things. We are either adding an L4 tunnel
- * header to the outer L3 tunnel header, or we are are simply
+ * header to the outer L3 tunnel header, or we are simply
* treating the GRE tunnel header as though it is a UDP protocol
* specific header such as VXLAN or GENEVE.
*/
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index cf36f95..8f2e974 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -690,9 +690,9 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info,
rcu_read_unlock();
}
- tos = icmp_pointers[type].error ? ((iph->tos & IPTOS_TOS_MASK) |
+ tos = icmp_pointers[type].error ? (RT_TOS(iph->tos) |
IPTOS_PREC_INTERNETCONTROL) :
- iph->tos;
+ iph->tos;
mark = IP4_REPLY_MARK(net, skb_in->mark);
if (__ip_options_echo(net, &icmp_param.replyopts.opt.opt, skb_in, opt))
@@ -784,7 +784,7 @@ EXPORT_SYMBOL(icmp_ndo_send);
static void icmp_socket_deliver(struct sk_buff *skb, u32 info)
{
- const struct iphdr *iph = (const struct iphdr *) skb->data;
+ const struct iphdr *iph = (const struct iphdr *)skb->data;
const struct net_protocol *ipprot;
int protocol = iph->protocol;
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 4a98dd7..93816d4 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -125,6 +125,7 @@ int inet_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb,
bool net_admin)
{
const struct inet_sock *inet = inet_sk(sk);
+ struct inet_diag_sockopt inet_sockopt;
if (nla_put_u8(skb, INET_DIAG_SHUTDOWN, sk->sk_shutdown))
goto errout;
@@ -180,6 +181,22 @@ int inet_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb,
r->idiag_uid = from_kuid_munged(user_ns, sock_i_uid(sk));
r->idiag_inode = sock_i_ino(sk);
+ memset(&inet_sockopt, 0, sizeof(inet_sockopt));
+ inet_sockopt.recverr = inet->recverr;
+ inet_sockopt.is_icsk = inet->is_icsk;
+ inet_sockopt.freebind = inet->freebind;
+ inet_sockopt.hdrincl = inet->hdrincl;
+ inet_sockopt.mc_loop = inet->mc_loop;
+ inet_sockopt.transparent = inet->transparent;
+ inet_sockopt.mc_all = inet->mc_all;
+ inet_sockopt.nodefrag = inet->nodefrag;
+ inet_sockopt.bind_address_no_port = inet->bind_address_no_port;
+ inet_sockopt.recverr_rfc4884 = inet->recverr_rfc4884;
+ inet_sockopt.defer_connect = inet->defer_connect;
+ if (nla_put(skb, INET_DIAG_SOCKOPT, sizeof(inet_sockopt),
+ &inet_sockopt))
+ goto errout;
+
return 0;
errout:
return 1;
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 239e544..8cbe743 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -228,7 +228,7 @@ static void inet_unhash2(struct inet_hashinfo *h, struct sock *sk)
static inline int compute_score(struct sock *sk, struct net *net,
const unsigned short hnum, const __be32 daddr,
- const int dif, const int sdif, bool exact_dif)
+ const int dif, const int sdif)
{
int score = -1;
@@ -277,15 +277,13 @@ static struct sock *inet_lhash2_lookup(struct net *net,
const __be32 daddr, const unsigned short hnum,
const int dif, const int sdif)
{
- bool exact_dif = inet_exact_dif_match(net, skb);
struct inet_connection_sock *icsk;
struct sock *sk, *result = NULL;
int score, hiscore = 0;
inet_lhash2_for_each_icsk_rcu(icsk, &ilb2->head) {
sk = (struct sock *)icsk;
- score = compute_score(sk, net, hnum, daddr,
- dif, sdif, exact_dif);
+ score = compute_score(sk, net, hnum, daddr, dif, sdif);
if (score > hiscore) {
result = lookup_reuseport(net, sk, skb, doff,
saddr, sport, daddr, hnum);
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index 948747a..da1b503 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -47,32 +47,32 @@ void ip_options_build(struct sk_buff *skb, struct ip_options *opt,
unsigned char *iph = skb_network_header(skb);
memcpy(&(IPCB(skb)->opt), opt, sizeof(struct ip_options));
- memcpy(iph+sizeof(struct iphdr), opt->__data, opt->optlen);
+ memcpy(iph + sizeof(struct iphdr), opt->__data, opt->optlen);
opt = &(IPCB(skb)->opt);
if (opt->srr)
- memcpy(iph+opt->srr+iph[opt->srr+1]-4, &daddr, 4);
+ memcpy(iph + opt->srr + iph[opt->srr + 1] - 4, &daddr, 4);
if (!is_frag) {
if (opt->rr_needaddr)
- ip_rt_get_source(iph+opt->rr+iph[opt->rr+2]-5, skb, rt);
+ ip_rt_get_source(iph + opt->rr + iph[opt->rr + 2] - 5, skb, rt);
if (opt->ts_needaddr)
- ip_rt_get_source(iph+opt->ts+iph[opt->ts+2]-9, skb, rt);
+ ip_rt_get_source(iph + opt->ts + iph[opt->ts + 2] - 9, skb, rt);
if (opt->ts_needtime) {
__be32 midtime;
midtime = inet_current_timestamp();
- memcpy(iph+opt->ts+iph[opt->ts+2]-5, &midtime, 4);
+ memcpy(iph + opt->ts + iph[opt->ts + 2] - 5, &midtime, 4);
}
return;
}
if (opt->rr) {
- memset(iph+opt->rr, IPOPT_NOP, iph[opt->rr+1]);
+ memset(iph + opt->rr, IPOPT_NOP, iph[opt->rr + 1]);
opt->rr = 0;
opt->rr_needaddr = 0;
}
if (opt->ts) {
- memset(iph+opt->ts, IPOPT_NOP, iph[opt->ts+1]);
+ memset(iph + opt->ts, IPOPT_NOP, iph[opt->ts + 1]);
opt->ts = 0;
opt->ts_needaddr = opt->ts_needtime = 0;
}
@@ -495,26 +495,29 @@ EXPORT_SYMBOL(ip_options_compile);
void ip_options_undo(struct ip_options *opt)
{
if (opt->srr) {
- unsigned char *optptr = opt->__data+opt->srr-sizeof(struct iphdr);
- memmove(optptr+7, optptr+3, optptr[1]-7);
- memcpy(optptr+3, &opt->faddr, 4);
+ unsigned char *optptr = opt->__data + opt->srr - sizeof(struct iphdr);
+
+ memmove(optptr + 7, optptr + 3, optptr[1] - 7);
+ memcpy(optptr + 3, &opt->faddr, 4);
}
if (opt->rr_needaddr) {
- unsigned char *optptr = opt->__data+opt->rr-sizeof(struct iphdr);
+ unsigned char *optptr = opt->__data + opt->rr - sizeof(struct iphdr);
+
optptr[2] -= 4;
- memset(&optptr[optptr[2]-1], 0, 4);
+ memset(&optptr[optptr[2] - 1], 0, 4);
}
if (opt->ts) {
- unsigned char *optptr = opt->__data+opt->ts-sizeof(struct iphdr);
+ unsigned char *optptr = opt->__data + opt->ts - sizeof(struct iphdr);
+
if (opt->ts_needtime) {
optptr[2] -= 4;
- memset(&optptr[optptr[2]-1], 0, 4);
- if ((optptr[3]&0xF) == IPOPT_TS_PRESPEC)
+ memset(&optptr[optptr[2] - 1], 0, 4);
+ if ((optptr[3] & 0xF) == IPOPT_TS_PRESPEC)
optptr[2] -= 4;
}
if (opt->ts_needaddr) {
optptr[2] -= 4;
- memset(&optptr[optptr[2]-1], 0, 4);
+ memset(&optptr[optptr[2] - 1], 0, 4);
}
}
}
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 61f802d..b931d0b0 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -996,7 +996,7 @@ static int __ip_append_data(struct sock *sk,
fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0);
maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen;
- maxnonfragsize = ip_sk_ignore_df(sk) ? 0xFFFF : mtu;
+ maxnonfragsize = ip_sk_ignore_df(sk) ? IP_MAX_MTU : mtu;
if (cork->length + length > maxnonfragsize - fragheaderlen) {
ip_local_error(sk, EMSGSIZE, fl4->daddr, inet->inet_dport,
@@ -1351,7 +1351,7 @@ ssize_t ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page,
if (cork->flags & IPCORK_OPT)
opt = cork->opt;
- if (!(rt->dst.dev->features&NETIF_F_SG))
+ if (!(rt->dst.dev->features & NETIF_F_SG))
return -EOPNOTSUPP;
hh_len = LL_RESERVED_SPACE(rt->dst.dev);
@@ -1536,7 +1536,7 @@ struct sk_buff *__ip_make_skb(struct sock *sk,
ip_select_ident(net, skb, sk);
if (opt) {
- iph->ihl += opt->optlen>>2;
+ iph->ihl += opt->optlen >> 2;
ip_options_build(skb, opt, cork->addr, rt, 0);
}
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index d2c2235..ec60367 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -1124,8 +1124,7 @@ static int do_ip_setsockopt(struct sock *sk, int level, int optname,
dev_put(dev);
err = -EINVAL;
- if (sk->sk_bound_dev_if &&
- (!midx || midx != sk->sk_bound_dev_if))
+ if (sk->sk_bound_dev_if && midx != sk->sk_bound_dev_if)
break;
inet->uc_index = ifindex;
@@ -1189,7 +1188,7 @@ static int do_ip_setsockopt(struct sock *sk, int level, int optname,
err = -EINVAL;
if (sk->sk_bound_dev_if &&
mreq.imr_ifindex != sk->sk_bound_dev_if &&
- (!midx || midx != sk->sk_bound_dev_if))
+ midx != sk->sk_bound_dev_if)
break;
inet->mc_index = mreq.imr_ifindex;
diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c
index 134e923..bf9d4cd 100644
--- a/net/ipv4/nexthop.c
+++ b/net/ipv4/nexthop.c
@@ -133,12 +133,9 @@ static struct nexthop *nexthop_alloc(void)
static struct nh_group *nexthop_grp_alloc(u16 num_nh)
{
- size_t sz = offsetof(struct nexthop, nh_grp)
- + sizeof(struct nh_group)
- + sizeof(struct nh_grp_entry) * num_nh;
struct nh_group *nhg;
- nhg = kzalloc(sz, GFP_KERNEL);
+ nhg = kzalloc(struct_size(nhg, nh_entries, num_nh), GFP_KERNEL);
if (nhg)
nhg->num_nh = num_nh;
@@ -279,7 +276,7 @@ static int nh_fill_node(struct sk_buff *skb, struct nexthop *nh,
case AF_INET:
fib_nh = &nhi->fib_nh;
if (fib_nh->fib_nh_gw_family &&
- nla_put_u32(skb, NHA_GATEWAY, fib_nh->fib_nh_gw4))
+ nla_put_be32(skb, NHA_GATEWAY, fib_nh->fib_nh_gw4))
goto nla_put_failure;
break;
@@ -800,7 +797,7 @@ static void remove_nh_grp_entry(struct net *net, struct nh_grp_entry *nhge,
return;
}
- newg->has_v4 = nhg->has_v4;
+ newg->has_v4 = false;
newg->mpath = nhg->mpath;
newg->fdb_nh = nhg->fdb_nh;
newg->num_nh = nhg->num_nh;
@@ -809,12 +806,18 @@ static void remove_nh_grp_entry(struct net *net, struct nh_grp_entry *nhge,
nhges = nhg->nh_entries;
new_nhges = newg->nh_entries;
for (i = 0, j = 0; i < nhg->num_nh; ++i) {
+ struct nh_info *nhi;
+
/* current nexthop getting removed */
if (nhg->nh_entries[i].nh == nh) {
newg->num_nh--;
continue;
}
+ nhi = rtnl_dereference(nhges[i].nh->nh_info);
+ if (nhi->family == AF_INET)
+ newg->has_v4 = true;
+
list_del(&nhges[i].nh_list);
new_nhges[j].nh_parent = nhges[i].nh_parent;
new_nhges[j].nh = nhges[i].nh;
@@ -961,6 +964,23 @@ static int replace_nexthop_grp(struct net *net, struct nexthop *old,
return 0;
}
+static void nh_group_v4_update(struct nh_group *nhg)
+{
+ struct nh_grp_entry *nhges;
+ bool has_v4 = false;
+ int i;
+
+ nhges = nhg->nh_entries;
+ for (i = 0; i < nhg->num_nh; i++) {
+ struct nh_info *nhi;
+
+ nhi = rtnl_dereference(nhges[i].nh->nh_info);
+ if (nhi->family == AF_INET)
+ has_v4 = true;
+ }
+ nhg->has_v4 = has_v4;
+}
+
static int replace_nexthop_single(struct net *net, struct nexthop *old,
struct nexthop *new,
struct netlink_ext_ack *extack)
@@ -984,6 +1004,21 @@ static int replace_nexthop_single(struct net *net, struct nexthop *old,
rcu_assign_pointer(old->nh_info, newi);
rcu_assign_pointer(new->nh_info, oldi);
+ /* When replacing an IPv4 nexthop with an IPv6 nexthop, potentially
+ * update IPv4 indication in all the groups using the nexthop.
+ */
+ if (oldi->family == AF_INET && newi->family == AF_INET6) {
+ struct nh_grp_entry *nhge;
+
+ list_for_each_entry(nhge, &old->grp_list, nh_list) {
+ struct nexthop *nhp = nhge->nh_parent;
+ struct nh_group *nhg;
+
+ nhg = rtnl_dereference(nhp->nh_grp);
+ nh_group_v4_update(nhg);
+ }
+ }
+
return 0;
}
@@ -1101,7 +1136,7 @@ static int insert_nexthop(struct net *net, struct nexthop *new_nh,
while (1) {
struct nexthop *nh;
- next = rtnl_dereference(*pp);
+ next = *pp;
if (!next)
break;
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index df6fbef..248856b 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -293,7 +293,8 @@ EXPORT_SYMBOL_GPL(ping_close);
/* Checks the bind address and possibly modifies sk->sk_bound_dev_if. */
static int ping_check_bind_addr(struct sock *sk, struct inet_sock *isk,
- struct sockaddr *uaddr, int addr_len) {
+ struct sockaddr *uaddr, int addr_len)
+{
struct net *net = sock_net(sk);
if (sk->sk_family == AF_INET) {
struct sockaddr_in *addr = (struct sockaddr_in *) uaddr;
@@ -310,10 +311,10 @@ static int ping_check_bind_addr(struct sock *sk, struct inet_sock *isk,
pr_debug("ping_check_bind_addr(sk=%p,addr=%pI4,port=%d)\n",
sk, &addr->sin_addr.s_addr, ntohs(addr->sin_port));
- chk_addr_ret = inet_addr_type(net, addr->sin_addr.s_addr);
-
if (addr->sin_addr.s_addr == htonl(INADDR_ANY))
chk_addr_ret = RTN_LOCAL;
+ else
+ chk_addr_ret = inet_addr_type(net, addr->sin_addr.s_addr);
if ((!inet_can_nonlocal_bind(net, isk) &&
chk_addr_ret != RTN_LOCAL) ||
@@ -383,20 +384,6 @@ static void ping_set_saddr(struct sock *sk, struct sockaddr *saddr)
}
}
-static void ping_clear_saddr(struct sock *sk, int dif)
-{
- sk->sk_bound_dev_if = dif;
- if (sk->sk_family == AF_INET) {
- struct inet_sock *isk = inet_sk(sk);
- isk->inet_rcv_saddr = isk->inet_saddr = 0;
-#if IS_ENABLED(CONFIG_IPV6)
- } else if (sk->sk_family == AF_INET6) {
- struct ipv6_pinfo *np = inet6_sk(sk);
- memset(&sk->sk_v6_rcv_saddr, 0, sizeof(sk->sk_v6_rcv_saddr));
- memset(&np->saddr, 0, sizeof(np->saddr));
-#endif
- }
-}
/*
* We need our own bind because there are no privileged id's == local ports.
* Moreover, we don't allow binding to multi- and broadcast addresses.
@@ -420,12 +407,13 @@ int ping_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
goto out;
err = -EADDRINUSE;
- ping_set_saddr(sk, uaddr);
snum = ntohs(((struct sockaddr_in *)uaddr)->sin_port);
if (ping_get_port(sk, snum) != 0) {
- ping_clear_saddr(sk, dif);
+ /* Restore possibly modified sk->sk_bound_dev_if by ping_check_bind_addr(). */
+ sk->sk_bound_dev_if = dif;
goto out;
}
+ ping_set_saddr(sk, uaddr);
pr_debug("after bind(): num = %hu, dif = %d\n",
isk->inet_num,
@@ -647,7 +635,8 @@ static int ping_v4_push_pending_frames(struct sock *sk, struct pingfakehdr *pfh,
}
int ping_common_sendmsg(int family, struct msghdr *msg, size_t len,
- void *user_icmph, size_t icmph_len) {
+ void *user_icmph, size_t icmph_len)
+{
u8 type, code;
if (len > 0xFFFF)
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 407956b..1170653 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -260,11 +260,12 @@ static void raw_err(struct sock *sk, struct sk_buff *skb, u32 info)
err = EHOSTUNREACH;
if (code > NR_ICMP_UNREACH)
break;
- err = icmp_err_convert[code].errno;
- harderr = icmp_err_convert[code].fatal;
if (code == ICMP_FRAG_NEEDED) {
harderr = inet->pmtudisc != IP_PMTUDISC_DONT;
err = EMSGSIZE;
+ } else {
+ err = icmp_err_convert[code].errno;
+ harderr = icmp_err_convert[code].fatal;
}
}
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 8ca6bca..2c05b86 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -623,7 +623,7 @@ static inline u32 fnhe_hashfun(__be32 daddr)
u32 hval;
net_get_random_once(&fnhe_hashrnd, sizeof(fnhe_hashrnd));
- hval = jhash_1word((__force u32) daddr, fnhe_hashrnd);
+ hval = jhash_1word((__force u32)daddr, fnhe_hashrnd);
return hash_32(hval, FNHE_HASH_SHIFT);
}
@@ -1013,13 +1013,14 @@ out: kfree_skb(skb);
static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu)
{
struct dst_entry *dst = &rt->dst;
- u32 old_mtu = ipv4_mtu(dst);
struct fib_result res;
bool lock = false;
+ u32 old_mtu;
if (ip_mtu_locked(dst))
return;
+ old_mtu = ipv4_mtu(dst);
if (old_mtu < mtu)
return;
@@ -1061,7 +1062,7 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu,
int oif, u8 protocol)
{
- const struct iphdr *iph = (const struct iphdr *) skb->data;
+ const struct iphdr *iph = (const struct iphdr *)skb->data;
struct flowi4 fl4;
struct rtable *rt;
u32 mark = IP4_REPLY_MARK(net, skb->mark);
@@ -1078,7 +1079,7 @@ EXPORT_SYMBOL_GPL(ipv4_update_pmtu);
static void __ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
{
- const struct iphdr *iph = (const struct iphdr *) skb->data;
+ const struct iphdr *iph = (const struct iphdr *)skb->data;
struct flowi4 fl4;
struct rtable *rt;
@@ -1096,7 +1097,7 @@ static void __ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
{
- const struct iphdr *iph = (const struct iphdr *) skb->data;
+ const struct iphdr *iph = (const struct iphdr *)skb->data;
struct flowi4 fl4;
struct rtable *rt;
struct dst_entry *odst = NULL;
@@ -1126,7 +1127,7 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
new = true;
}
- __ip_rt_update_pmtu((struct rtable *) xfrm_dst_path(&rt->dst), &fl4, mtu);
+ __ip_rt_update_pmtu((struct rtable *)xfrm_dst_path(&rt->dst), &fl4, mtu);
if (!dst_check(&rt->dst, 0)) {
if (new)
@@ -1151,7 +1152,7 @@ EXPORT_SYMBOL_GPL(ipv4_sk_update_pmtu);
void ipv4_redirect(struct sk_buff *skb, struct net *net,
int oif, u8 protocol)
{
- const struct iphdr *iph = (const struct iphdr *) skb->data;
+ const struct iphdr *iph = (const struct iphdr *)skb->data;
struct flowi4 fl4;
struct rtable *rt;
@@ -1167,7 +1168,7 @@ EXPORT_SYMBOL_GPL(ipv4_redirect);
void ipv4_sk_redirect(struct sk_buff *skb, struct sock *sk)
{
- const struct iphdr *iph = (const struct iphdr *) skb->data;
+ const struct iphdr *iph = (const struct iphdr *)skb->data;
struct flowi4 fl4;
struct rtable *rt;
struct net *net = sock_net(sk);
@@ -1307,7 +1308,7 @@ static unsigned int ipv4_default_advmss(const struct dst_entry *dst)
static unsigned int ipv4_mtu(const struct dst_entry *dst)
{
- const struct rtable *rt = (const struct rtable *) dst;
+ const struct rtable *rt = (const struct rtable *)dst;
unsigned int mtu = rt->rt_pmtu;
if (!mtu || time_after_eq(jiffies, rt->dst.expires))
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 31f3b85..57a5688 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -418,6 +418,8 @@ void tcp_init_sock(struct sock *sk)
INIT_LIST_HEAD(&tp->tsorted_sent_queue);
icsk->icsk_rto = TCP_TIMEOUT_INIT;
+ icsk->icsk_rto_min = TCP_RTO_MIN;
+ icsk->icsk_delack_max = TCP_DELACK_MAX;
tp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT);
minmax_reset(&tp->rtt_min, tcp_jiffies32, ~0U);
@@ -2685,6 +2687,8 @@ int tcp_disconnect(struct sock *sk, int flags)
icsk->icsk_backoff = 0;
icsk->icsk_probes_out = 0;
icsk->icsk_rto = TCP_TIMEOUT_INIT;
+ icsk->icsk_rto_min = TCP_RTO_MIN;
+ icsk->icsk_delack_max = TCP_DELACK_MAX;
tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
tp->snd_cwnd = TCP_INIT_CWND;
tp->snd_cwnd_cnt = 0;
@@ -3207,7 +3211,8 @@ static int do_tcp_setsockopt(struct sock *sk, int level, int optname,
break;
case TCP_SAVE_SYN:
- if (val < 0 || val > 1)
+ /* 0: disable, 1: enable, 2: start from ether_header */
+ if (val < 0 || val > 2)
err = -EINVAL;
else
tp->save_syn = val;
@@ -3788,20 +3793,21 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
lock_sock(sk);
if (tp->saved_syn) {
- if (len < tp->saved_syn[0]) {
- if (put_user(tp->saved_syn[0], optlen)) {
+ if (len < tcp_saved_syn_len(tp->saved_syn)) {
+ if (put_user(tcp_saved_syn_len(tp->saved_syn),
+ optlen)) {
release_sock(sk);
return -EFAULT;
}
release_sock(sk);
return -EINVAL;
}
- len = tp->saved_syn[0];
+ len = tcp_saved_syn_len(tp->saved_syn);
if (put_user(len, optlen)) {
release_sock(sk);
return -EFAULT;
}
- if (copy_to_user(optval, tp->saved_syn + 1, len)) {
+ if (copy_to_user(optval, tp->saved_syn->data, len)) {
release_sock(sk);
return -EFAULT;
}
diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c
index 7aa68f4..37f4cb2b 100644
--- a/net/ipv4/tcp_bpf.c
+++ b/net/ipv4/tcp_bpf.c
@@ -567,10 +567,9 @@ static void tcp_bpf_rebuild_protos(struct proto prot[TCP_BPF_NUM_CFGS],
prot[TCP_BPF_TX].sendpage = tcp_bpf_sendpage;
}
-static void tcp_bpf_check_v6_needs_rebuild(struct sock *sk, struct proto *ops)
+static void tcp_bpf_check_v6_needs_rebuild(struct proto *ops)
{
- if (sk->sk_family == AF_INET6 &&
- unlikely(ops != smp_load_acquire(&tcpv6_prot_saved))) {
+ if (unlikely(ops != smp_load_acquire(&tcpv6_prot_saved))) {
spin_lock_bh(&tcpv6_prot_lock);
if (likely(ops != tcpv6_prot_saved)) {
tcp_bpf_rebuild_protos(tcp_bpf_prots[TCP_BPF_IPV6], ops);
@@ -603,13 +602,11 @@ struct proto *tcp_bpf_get_proto(struct sock *sk, struct sk_psock *psock)
int family = sk->sk_family == AF_INET6 ? TCP_BPF_IPV6 : TCP_BPF_IPV4;
int config = psock->progs.msg_parser ? TCP_BPF_TX : TCP_BPF_BASE;
- if (!psock->sk_proto) {
- struct proto *ops = READ_ONCE(sk->sk_prot);
-
- if (tcp_bpf_assert_proto_ops(ops))
+ if (sk->sk_family == AF_INET6) {
+ if (tcp_bpf_assert_proto_ops(psock->sk_proto))
return ERR_PTR(-EINVAL);
- tcp_bpf_check_v6_needs_rebuild(sk, ops);
+ tcp_bpf_check_v6_needs_rebuild(psock->sk_proto);
}
return &tcp_bpf_prots[family][config];
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index 09b62de..af2814c 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -295,7 +295,7 @@ static struct sock *tcp_fastopen_create_child(struct sock *sk,
refcount_set(&req->rsk_refcnt, 2);
/* Now finish processing the fastopen child socket. */
- tcp_init_transfer(child, BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB);
+ tcp_init_transfer(child, BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB, skb);
tp->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 184ea55..4337841 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -138,6 +138,69 @@ void clean_acked_data_flush(void)
EXPORT_SYMBOL_GPL(clean_acked_data_flush);
#endif
+#ifdef CONFIG_CGROUP_BPF
+static void bpf_skops_parse_hdr(struct sock *sk, struct sk_buff *skb)
+{
+ bool unknown_opt = tcp_sk(sk)->rx_opt.saw_unknown &&
+ BPF_SOCK_OPS_TEST_FLAG(tcp_sk(sk),
+ BPF_SOCK_OPS_PARSE_UNKNOWN_HDR_OPT_CB_FLAG);
+ bool parse_all_opt = BPF_SOCK_OPS_TEST_FLAG(tcp_sk(sk),
+ BPF_SOCK_OPS_PARSE_ALL_HDR_OPT_CB_FLAG);
+ struct bpf_sock_ops_kern sock_ops;
+
+ if (likely(!unknown_opt && !parse_all_opt))
+ return;
+
+ /* The skb will be handled in the
+ * bpf_skops_established() or
+ * bpf_skops_write_hdr_opt().
+ */
+ switch (sk->sk_state) {
+ case TCP_SYN_RECV:
+ case TCP_SYN_SENT:
+ case TCP_LISTEN:
+ return;
+ }
+
+ sock_owned_by_me(sk);
+
+ memset(&sock_ops, 0, offsetof(struct bpf_sock_ops_kern, temp));
+ sock_ops.op = BPF_SOCK_OPS_PARSE_HDR_OPT_CB;
+ sock_ops.is_fullsock = 1;
+ sock_ops.sk = sk;
+ bpf_skops_init_skb(&sock_ops, skb, tcp_hdrlen(skb));
+
+ BPF_CGROUP_RUN_PROG_SOCK_OPS(&sock_ops);
+}
+
+static void bpf_skops_established(struct sock *sk, int bpf_op,
+ struct sk_buff *skb)
+{
+ struct bpf_sock_ops_kern sock_ops;
+
+ sock_owned_by_me(sk);
+
+ memset(&sock_ops, 0, offsetof(struct bpf_sock_ops_kern, temp));
+ sock_ops.op = bpf_op;
+ sock_ops.is_fullsock = 1;
+ sock_ops.sk = sk;
+ /* sk with TCP_REPAIR_ON does not have skb in tcp_finish_connect */
+ if (skb)
+ bpf_skops_init_skb(&sock_ops, skb, tcp_hdrlen(skb));
+
+ BPF_CGROUP_RUN_PROG_SOCK_OPS(&sock_ops);
+}
+#else
+static void bpf_skops_parse_hdr(struct sock *sk, struct sk_buff *skb)
+{
+}
+
+static void bpf_skops_established(struct sock *sk, int bpf_op,
+ struct sk_buff *skb)
+{
+}
+#endif
+
static void tcp_gro_dev_warn(struct sock *sk, const struct sk_buff *skb,
unsigned int len)
{
@@ -3801,7 +3864,7 @@ static void tcp_parse_fastopen_option(int len, const unsigned char *cookie,
foc->exp = exp_opt;
}
-static void smc_parse_options(const struct tcphdr *th,
+static bool smc_parse_options(const struct tcphdr *th,
struct tcp_options_received *opt_rx,
const unsigned char *ptr,
int opsize)
@@ -3810,10 +3873,13 @@ static void smc_parse_options(const struct tcphdr *th,
if (static_branch_unlikely(&tcp_have_smc)) {
if (th->syn && !(opsize & 1) &&
opsize >= TCPOLEN_EXP_SMC_BASE &&
- get_unaligned_be32(ptr) == TCPOPT_SMC_MAGIC)
+ get_unaligned_be32(ptr) == TCPOPT_SMC_MAGIC) {
opt_rx->smc_ok = 1;
+ return true;
+ }
}
#endif
+ return false;
}
/* Try to parse the MSS option from the TCP header. Return 0 on failure, clamped
@@ -3874,6 +3940,7 @@ void tcp_parse_options(const struct net *net,
ptr = (const unsigned char *)(th + 1);
opt_rx->saw_tstamp = 0;
+ opt_rx->saw_unknown = 0;
while (length > 0) {
int opcode = *ptr++;
@@ -3964,15 +4031,21 @@ void tcp_parse_options(const struct net *net,
*/
if (opsize >= TCPOLEN_EXP_FASTOPEN_BASE &&
get_unaligned_be16(ptr) ==
- TCPOPT_FASTOPEN_MAGIC)
+ TCPOPT_FASTOPEN_MAGIC) {
tcp_parse_fastopen_option(opsize -
TCPOLEN_EXP_FASTOPEN_BASE,
ptr + 2, th->syn, foc, true);
- else
- smc_parse_options(th, opt_rx, ptr,
- opsize);
+ break;
+ }
+
+ if (smc_parse_options(th, opt_rx, ptr, opsize))
+ break;
+
+ opt_rx->saw_unknown = 1;
break;
+ default:
+ opt_rx->saw_unknown = 1;
}
ptr += opsize-2;
length -= opsize;
@@ -5590,6 +5663,8 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
goto discard;
}
+ bpf_skops_parse_hdr(sk, skb);
+
return true;
discard:
@@ -5798,7 +5873,7 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb)
}
EXPORT_SYMBOL(tcp_rcv_established);
-void tcp_init_transfer(struct sock *sk, int bpf_op)
+void tcp_init_transfer(struct sock *sk, int bpf_op, struct sk_buff *skb)
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
@@ -5819,7 +5894,7 @@ void tcp_init_transfer(struct sock *sk, int bpf_op)
tp->snd_cwnd = tcp_init_cwnd(tp, __sk_dst_get(sk));
tp->snd_cwnd_stamp = tcp_jiffies32;
- tcp_call_bpf(sk, bpf_op, 0, NULL);
+ bpf_skops_established(sk, bpf_op, skb);
tcp_init_congestion_control(sk);
tcp_init_buffer_space(sk);
}
@@ -5838,7 +5913,7 @@ void tcp_finish_connect(struct sock *sk, struct sk_buff *skb)
sk_mark_napi_id(sk, skb);
}
- tcp_init_transfer(sk, BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB);
+ tcp_init_transfer(sk, BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB, skb);
/* Prevent spurious tcp_cwnd_restart() on first data
* packet.
@@ -6310,7 +6385,8 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
} else {
tcp_try_undo_spurious_syn(sk);
tp->retrans_stamp = 0;
- tcp_init_transfer(sk, BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB);
+ tcp_init_transfer(sk, BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB,
+ skb);
WRITE_ONCE(tp->copied_seq, tp->rcv_nxt);
}
smp_mb();
@@ -6599,13 +6675,27 @@ static void tcp_reqsk_record_syn(const struct sock *sk,
{
if (tcp_sk(sk)->save_syn) {
u32 len = skb_network_header_len(skb) + tcp_hdrlen(skb);
- u32 *copy;
+ struct saved_syn *saved_syn;
+ u32 mac_hdrlen;
+ void *base;
- copy = kmalloc(len + sizeof(u32), GFP_ATOMIC);
- if (copy) {
- copy[0] = len;
- memcpy(©[1], skb_network_header(skb), len);
- req->saved_syn = copy;
+ if (tcp_sk(sk)->save_syn == 2) { /* Save full header. */
+ base = skb_mac_header(skb);
+ mac_hdrlen = skb_mac_header_len(skb);
+ len += mac_hdrlen;
+ } else {
+ base = skb_network_header(skb);
+ mac_hdrlen = 0;
+ }
+
+ saved_syn = kmalloc(struct_size(saved_syn, data, len),
+ GFP_ATOMIC);
+ if (saved_syn) {
+ saved_syn->mac_hdrlen = mac_hdrlen;
+ saved_syn->network_hdrlen = skb_network_header_len(skb);
+ saved_syn->tcp_hdrlen = tcp_hdrlen(skb);
+ memcpy(saved_syn->data, base, len);
+ req->saved_syn = saved_syn;
}
}
}
@@ -6752,7 +6842,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
}
if (fastopen_sk) {
af_ops->send_synack(fastopen_sk, dst, &fl, req,
- &foc, TCP_SYNACK_FASTOPEN);
+ &foc, TCP_SYNACK_FASTOPEN, skb);
/* Add the child socket directly into the accept queue */
if (!inet_csk_reqsk_queue_add(sk, req, fastopen_sk)) {
reqsk_fastopen_remove(fastopen_sk, req, false);
@@ -6770,7 +6860,8 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
tcp_timeout_init((struct sock *)req));
af_ops->send_synack(sk, dst, &fl, req, &foc,
!want_cookie ? TCP_SYNACK_NORMAL :
- TCP_SYNACK_COOKIE);
+ TCP_SYNACK_COOKIE,
+ skb);
if (want_cookie) {
reqsk_free(req);
return 0;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 5084333..af27cfa 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -575,7 +575,7 @@ int tcp_v4_err(struct sk_buff *skb, u32 info)
case TCP_SYN_SENT:
case TCP_SYN_RECV:
/* Only in fast or simultaneous open. If a fast open socket is
- * is already accepted it is treated as a connected one below.
+ * already accepted it is treated as a connected one below.
*/
if (fastopen && !fastopen->sk)
break;
@@ -965,7 +965,8 @@ static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst,
struct flowi *fl,
struct request_sock *req,
struct tcp_fastopen_cookie *foc,
- enum tcp_synack_type synack_type)
+ enum tcp_synack_type synack_type,
+ struct sk_buff *syn_skb)
{
const struct inet_request_sock *ireq = inet_rsk(req);
struct flowi4 fl4;
@@ -976,7 +977,7 @@ static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst,
if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL)
return -1;
- skb = tcp_make_synack(sk, dst, req, foc, synack_type);
+ skb = tcp_make_synack(sk, dst, req, foc, synack_type, syn_skb);
if (skb) {
__tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr);
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 495dda2..56c306e 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -548,6 +548,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
newtp->fastopen_req = NULL;
RCU_INIT_POINTER(newtp->fastopen_rsk, NULL);
+ bpf_skops_init_child(sk, newsk);
tcp_bpf_clone(sk, newsk);
__TCP_INC_STATS(sock_net(sk), TCP_MIB_PASSIVEOPENS);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 85ff417..ab79d36 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -438,6 +438,7 @@ struct tcp_out_options {
u8 ws; /* window scale, 0 to disable */
u8 num_sack_blocks; /* number of SACK blocks to include */
u8 hash_size; /* bytes in hash_location */
+ u8 bpf_opt_len; /* length of BPF hdr option */
__u8 *hash_location; /* temporary pointer, overloaded */
__u32 tsval, tsecr; /* need to include OPTION_TS */
struct tcp_fastopen_cookie *fastopen_cookie; /* Fast open cookie */
@@ -452,6 +453,145 @@ static void mptcp_options_write(__be32 *ptr, struct tcp_out_options *opts)
#endif
}
+#ifdef CONFIG_CGROUP_BPF
+static int bpf_skops_write_hdr_opt_arg0(struct sk_buff *skb,
+ enum tcp_synack_type synack_type)
+{
+ if (unlikely(!skb))
+ return BPF_WRITE_HDR_TCP_CURRENT_MSS;
+
+ if (unlikely(synack_type == TCP_SYNACK_COOKIE))
+ return BPF_WRITE_HDR_TCP_SYNACK_COOKIE;
+
+ return 0;
+}
+
+/* req, syn_skb and synack_type are used when writing synack */
+static void bpf_skops_hdr_opt_len(struct sock *sk, struct sk_buff *skb,
+ struct request_sock *req,
+ struct sk_buff *syn_skb,
+ enum tcp_synack_type synack_type,
+ struct tcp_out_options *opts,
+ unsigned int *remaining)
+{
+ struct bpf_sock_ops_kern sock_ops;
+ int err;
+
+ if (likely(!BPF_SOCK_OPS_TEST_FLAG(tcp_sk(sk),
+ BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG)) ||
+ !*remaining)
+ return;
+
+ /* *remaining has already been aligned to 4 bytes, so *remaining >= 4 */
+
+ /* init sock_ops */
+ memset(&sock_ops, 0, offsetof(struct bpf_sock_ops_kern, temp));
+
+ sock_ops.op = BPF_SOCK_OPS_HDR_OPT_LEN_CB;
+
+ if (req) {
+ /* The listen "sk" cannot be passed here because
+ * it is not locked. It would not make too much
+ * sense to do bpf_setsockopt(listen_sk) based
+ * on individual connection request also.
+ *
+ * Thus, "req" is passed here and the cgroup-bpf-progs
+ * of the listen "sk" will be run.
+ *
+ * "req" is also used here for fastopen even the "sk" here is
+ * a fullsock "child" sk. It is to keep the behavior
+ * consistent between fastopen and non-fastopen on
+ * the bpf programming side.
+ */
+ sock_ops.sk = (struct sock *)req;
+ sock_ops.syn_skb = syn_skb;
+ } else {
+ sock_owned_by_me(sk);
+
+ sock_ops.is_fullsock = 1;
+ sock_ops.sk = sk;
+ }
+
+ sock_ops.args[0] = bpf_skops_write_hdr_opt_arg0(skb, synack_type);
+ sock_ops.remaining_opt_len = *remaining;
+ /* tcp_current_mss() does not pass a skb */
+ if (skb)
+ bpf_skops_init_skb(&sock_ops, skb, 0);
+
+ err = BPF_CGROUP_RUN_PROG_SOCK_OPS_SK(&sock_ops, sk);
+
+ if (err || sock_ops.remaining_opt_len == *remaining)
+ return;
+
+ opts->bpf_opt_len = *remaining - sock_ops.remaining_opt_len;
+ /* round up to 4 bytes */
+ opts->bpf_opt_len = (opts->bpf_opt_len + 3) & ~3;
+
+ *remaining -= opts->bpf_opt_len;
+}
+
+static void bpf_skops_write_hdr_opt(struct sock *sk, struct sk_buff *skb,
+ struct request_sock *req,
+ struct sk_buff *syn_skb,
+ enum tcp_synack_type synack_type,
+ struct tcp_out_options *opts)
+{
+ u8 first_opt_off, nr_written, max_opt_len = opts->bpf_opt_len;
+ struct bpf_sock_ops_kern sock_ops;
+ int err;
+
+ if (likely(!max_opt_len))
+ return;
+
+ memset(&sock_ops, 0, offsetof(struct bpf_sock_ops_kern, temp));
+
+ sock_ops.op = BPF_SOCK_OPS_WRITE_HDR_OPT_CB;
+
+ if (req) {
+ sock_ops.sk = (struct sock *)req;
+ sock_ops.syn_skb = syn_skb;
+ } else {
+ sock_owned_by_me(sk);
+
+ sock_ops.is_fullsock = 1;
+ sock_ops.sk = sk;
+ }
+
+ sock_ops.args[0] = bpf_skops_write_hdr_opt_arg0(skb, synack_type);
+ sock_ops.remaining_opt_len = max_opt_len;
+ first_opt_off = tcp_hdrlen(skb) - max_opt_len;
+ bpf_skops_init_skb(&sock_ops, skb, first_opt_off);
+
+ err = BPF_CGROUP_RUN_PROG_SOCK_OPS_SK(&sock_ops, sk);
+
+ if (err)
+ nr_written = 0;
+ else
+ nr_written = max_opt_len - sock_ops.remaining_opt_len;
+
+ if (nr_written < max_opt_len)
+ memset(skb->data + first_opt_off + nr_written, TCPOPT_NOP,
+ max_opt_len - nr_written);
+}
+#else
+static void bpf_skops_hdr_opt_len(struct sock *sk, struct sk_buff *skb,
+ struct request_sock *req,
+ struct sk_buff *syn_skb,
+ enum tcp_synack_type synack_type,
+ struct tcp_out_options *opts,
+ unsigned int *remaining)
+{
+}
+
+static void bpf_skops_write_hdr_opt(struct sock *sk, struct sk_buff *skb,
+ struct request_sock *req,
+ struct sk_buff *syn_skb,
+ enum tcp_synack_type synack_type,
+ struct tcp_out_options *opts)
+{
+}
+#endif
+
/* Write previously computed TCP options to the packet.
*
* Beware: Something in the Internet is very sensitive to the ordering of
@@ -691,6 +831,8 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb,
}
}
+ bpf_skops_hdr_opt_len(sk, skb, NULL, NULL, 0, opts, &remaining);
+
return MAX_TCP_OPTION_SPACE - remaining;
}
@@ -701,7 +843,8 @@ static unsigned int tcp_synack_options(const struct sock *sk,
struct tcp_out_options *opts,
const struct tcp_md5sig_key *md5,
struct tcp_fastopen_cookie *foc,
- enum tcp_synack_type synack_type)
+ enum tcp_synack_type synack_type,
+ struct sk_buff *syn_skb)
{
struct inet_request_sock *ireq = inet_rsk(req);
unsigned int remaining = MAX_TCP_OPTION_SPACE;
@@ -758,6 +901,9 @@ static unsigned int tcp_synack_options(const struct sock *sk,
smc_set_option_cond(tcp_sk(sk), ireq, opts, &remaining);
+ bpf_skops_hdr_opt_len((struct sock *)sk, skb, req, syn_skb,
+ synack_type, opts, &remaining);
+
return MAX_TCP_OPTION_SPACE - remaining;
}
@@ -826,6 +972,15 @@ static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb
opts->num_sack_blocks * TCPOLEN_SACK_PERBLOCK;
}
+ if (unlikely(BPF_SOCK_OPS_TEST_FLAG(tp,
+ BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG))) {
+ unsigned int remaining = MAX_TCP_OPTION_SPACE - size;
+
+ bpf_skops_hdr_opt_len(sk, skb, NULL, NULL, 0, opts, &remaining);
+
+ size = MAX_TCP_OPTION_SPACE - remaining;
+ }
+
return size;
}
@@ -1213,6 +1368,9 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb,
}
#endif
+ /* BPF prog is the last one writing header option */
+ bpf_skops_write_hdr_opt(sk, skb, NULL, NULL, 0, &opts);
+
INDIRECT_CALL_INET(icsk->icsk_af_ops->send_check,
tcp_v6_send_check, tcp_v4_send_check,
sk, skb);
@@ -3336,20 +3494,20 @@ int tcp_send_synack(struct sock *sk)
}
/**
- * tcp_make_synack - Prepare a SYN-ACK.
- * sk: listener socket
- * dst: dst entry attached to the SYNACK
- * req: request_sock pointer
- * foc: cookie for tcp fast open
- * synack_type: Type of synback to prepare
- *
- * Allocate one skb and build a SYNACK packet.
- * @dst is consumed : Caller should not use it again.
+ * tcp_make_synack - Allocate one skb and build a SYNACK packet.
+ * @sk: listener socket
+ * @dst: dst entry attached to the SYNACK. It is consumed and caller
+ * should not use it again.
+ * @req: request_sock pointer
+ * @foc: cookie for tcp fast open
+ * @synack_type: Type of synack to prepare
+ * @syn_skb: SYN packet just received. It could be NULL for rtx case.
*/
struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
struct request_sock *req,
struct tcp_fastopen_cookie *foc,
- enum tcp_synack_type synack_type)
+ enum tcp_synack_type synack_type,
+ struct sk_buff *syn_skb)
{
struct inet_request_sock *ireq = inet_rsk(req);
const struct tcp_sock *tp = tcp_sk(sk);
@@ -3408,8 +3566,11 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
md5 = tcp_rsk(req)->af_specific->req_md5_lookup(sk, req_to_sk(req));
#endif
skb_set_hash(skb, tcp_rsk(req)->txhash, PKT_HASH_TYPE_L4);
+ /* bpf program will be interested in the tcp_flags */
+ TCP_SKB_CB(skb)->tcp_flags = TCPHDR_SYN | TCPHDR_ACK;
tcp_header_size = tcp_synack_options(sk, req, mss, skb, &opts, md5,
- foc, synack_type) + sizeof(*th);
+ foc, synack_type,
+ syn_skb) + sizeof(*th);
skb_push(skb, tcp_header_size);
skb_reset_transport_header(skb);
@@ -3441,6 +3602,9 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
rcu_read_unlock();
#endif
+ bpf_skops_write_hdr_opt((struct sock *)sk, skb, req, syn_skb,
+ synack_type, &opts);
+
skb->skb_mstamp_ns = now;
tcp_add_tx_delay(skb, tp);
@@ -3741,6 +3905,8 @@ void tcp_send_delayed_ack(struct sock *sk)
ato = min(ato, max_ato);
}
+ ato = min_t(u32, ato, inet_csk(sk)->icsk_delack_max);
+
/* Stay within the limit we were given */
timeout = jiffies + ato;
@@ -3934,7 +4100,8 @@ int tcp_rtx_synack(const struct sock *sk, struct request_sock *req)
int res;
tcp_rsk(req)->txhash = net_tx_rndhash();
- res = af_ops->send_synack(sk, NULL, &fl, req, NULL, TCP_SYNACK_NORMAL);
+ res = af_ops->send_synack(sk, NULL, &fl, req, NULL, TCP_SYNACK_NORMAL,
+ NULL);
if (!res) {
__TCP_INC_STATS(sock_net(sk), TCP_MIB_RETRANSSEGS);
__NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSYNRETRANS);
diff --git a/net/ipv4/tcp_scalable.c b/net/ipv4/tcp_scalable.c
index 6cebf41..5842081 100644
--- a/net/ipv4/tcp_scalable.c
+++ b/net/ipv4/tcp_scalable.c
@@ -10,7 +10,7 @@
#include <net/tcp.h>
/* These factors derived from the recommended values in the aer:
- * .01 and and 7/8.
+ * .01 and 7/8.
*/
#define TCP_SCALABLE_AI_CNT 100U
#define TCP_SCALABLE_MD_SCALE 3
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index e88efba..09f0a23 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1170,7 +1170,7 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
ipc.oif = inet->uc_index;
} else if (ipv4_is_lbcast(daddr) && inet->uc_index) {
/* oif is set, packet is to local broadcast and
- * and uc_index is set. oif is most likely set
+ * uc_index is set. oif is most likely set
* by sk_bound_dev_if. If uc_index != oif check if the
* oif is an L3 master and uc_index is an L3 slave.
* If so, we want to allow the send using the uc_index.
diff --git a/net/ipv4/udp_bpf.c b/net/ipv4/udp_bpf.c
index eddd973..7a94791 100644
--- a/net/ipv4/udp_bpf.c
+++ b/net/ipv4/udp_bpf.c
@@ -22,10 +22,9 @@ static void udp_bpf_rebuild_protos(struct proto *prot, const struct proto *base)
prot->close = sock_map_close;
}
-static void udp_bpf_check_v6_needs_rebuild(struct sock *sk, struct proto *ops)
+static void udp_bpf_check_v6_needs_rebuild(struct proto *ops)
{
- if (sk->sk_family == AF_INET6 &&
- unlikely(ops != smp_load_acquire(&udpv6_prot_saved))) {
+ if (unlikely(ops != smp_load_acquire(&udpv6_prot_saved))) {
spin_lock_bh(&udpv6_prot_lock);
if (likely(ops != udpv6_prot_saved)) {
udp_bpf_rebuild_protos(&udp_bpf_prots[UDP_BPF_IPV6], ops);
@@ -46,8 +45,8 @@ struct proto *udp_bpf_get_proto(struct sock *sk, struct sk_psock *psock)
{
int family = sk->sk_family == AF_INET ? UDP_BPF_IPV4 : UDP_BPF_IPV6;
- if (!psock->sk_proto)
- udp_bpf_check_v6_needs_rebuild(sk, READ_ONCE(sk->sk_prot));
+ if (sk->sk_family == AF_INET6)
+ udp_bpf_check_v6_needs_rebuild(psock->sk_proto);
return &udp_bpf_prots[family];
}
diff --git a/net/ipv6/addrconf_core.c b/net/ipv6/addrconf_core.c
index 9ebf3fe..c70c192 100644
--- a/net/ipv6/addrconf_core.c
+++ b/net/ipv6/addrconf_core.c
@@ -191,6 +191,13 @@ static int eafnosupport_ip6_del_rt(struct net *net, struct fib6_info *rt,
return -EAFNOSUPPORT;
}
+static int eafnosupport_ipv6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
+ int (*output)(struct net *, struct sock *, struct sk_buff *))
+{
+ kfree_skb(skb);
+ return -EAFNOSUPPORT;
+}
+
const struct ipv6_stub *ipv6_stub __read_mostly = &(struct ipv6_stub) {
.ipv6_dst_lookup_flow = eafnosupport_ipv6_dst_lookup_flow,
.ipv6_route_input = eafnosupport_ipv6_route_input,
@@ -201,6 +208,7 @@ const struct ipv6_stub *ipv6_stub __read_mostly = &(struct ipv6_stub) {
.ip6_mtu_from_fib6 = eafnosupport_ip6_mtu_from_fib6,
.fib6_nh_init = eafnosupport_fib6_nh_init,
.ip6_del_rt = eafnosupport_ip6_del_rt,
+ .ipv6_fragment = eafnosupport_ipv6_fragment,
};
EXPORT_SYMBOL_GPL(ipv6_stub);
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 0306509..e648fbe 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -661,6 +661,7 @@ int inet6_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
const struct proto_ops inet6_stream_ops = {
.family = PF_INET6,
+ .flags = PROTO_CMSG_DATA_ONLY,
.owner = THIS_MODULE,
.release = inet6_release,
.bind = inet6_bind,
@@ -1026,6 +1027,7 @@ static const struct ipv6_stub ipv6_stub_impl = {
.xfrm6_rcv_encap = xfrm6_rcv_encap,
#endif
.nd_tbl = &nd_tbl,
+ .ipv6_fragment = ip6_fragment,
};
static const struct ipv6_bpf_stub ipv6_bpf_stub_impl = {
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index 2d3add9..55c290d5 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -94,7 +94,7 @@ EXPORT_SYMBOL(__inet6_lookup_established);
static inline int compute_score(struct sock *sk, struct net *net,
const unsigned short hnum,
const struct in6_addr *daddr,
- const int dif, const int sdif, bool exact_dif)
+ const int dif, const int sdif)
{
int score = -1;
@@ -138,15 +138,13 @@ static struct sock *inet6_lhash2_lookup(struct net *net,
const __be16 sport, const struct in6_addr *daddr,
const unsigned short hnum, const int dif, const int sdif)
{
- bool exact_dif = inet6_exact_dif_match(net, skb);
struct inet_connection_sock *icsk;
struct sock *sk, *result = NULL;
int score, hiscore = 0;
inet_lhash2_for_each_icsk_rcu(icsk, &ilb2->head) {
sk = (struct sock *)icsk;
- score = compute_score(sk, net, hnum, daddr, dif, sdif,
- exact_dif);
+ score = compute_score(sk, net, hnum, daddr, dif, sdif);
if (score > hiscore) {
result = lookup_reuseport(net, sk, skb, doff,
saddr, sport, daddr, hnum);
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 25a90f3..44d68ed 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -1812,10 +1812,14 @@ static struct fib6_node *fib6_repair_tree(struct net *net,
children = 0;
child = NULL;
- if (fn_r)
- child = fn_r, children |= 1;
- if (fn_l)
- child = fn_l, children |= 2;
+ if (fn_r) {
+ child = fn_r;
+ children |= 1;
+ }
+ if (fn_l) {
+ child = fn_l;
+ children |= 2;
+ }
if (children == 3 || FIB6_SUBTREE(fn)
#ifdef CONFIG_IPV6_SUBTREES
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 305870a..87a633e 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -501,7 +501,8 @@ static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
struct flowi *fl,
struct request_sock *req,
struct tcp_fastopen_cookie *foc,
- enum tcp_synack_type synack_type)
+ enum tcp_synack_type synack_type,
+ struct sk_buff *syn_skb)
{
struct inet_request_sock *ireq = inet_rsk(req);
struct ipv6_pinfo *np = tcp_inet6_sk(sk);
@@ -515,7 +516,7 @@ static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
IPPROTO_TCP)) == NULL)
goto done;
- skb = tcp_make_synack(sk, dst, req, foc, synack_type);
+ skb = tcp_make_synack(sk, dst, req, foc, synack_type, syn_skb);
if (skb) {
__tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr,
diff --git a/net/l2tp/Makefile b/net/l2tp/Makefile
index 399a7e5..cf8f270 100644
--- a/net/l2tp/Makefile
+++ b/net/l2tp/Makefile
@@ -5,6 +5,8 @@
obj-$(CONFIG_L2TP) += l2tp_core.o
+CFLAGS_l2tp_core.o += -I$(src)
+
# Build l2tp as modules if L2TP is M
obj-$(subst y,$(CONFIG_L2TP),$(CONFIG_PPPOL2TP)) += l2tp_ppp.o
obj-$(subst y,$(CONFIG_L2TP),$(CONFIG_L2TP_IP)) += l2tp_ip.o
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index 701fc72..7de05be 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -61,6 +61,10 @@
#include <linux/atomic.h>
#include "l2tp_core.h"
+#include "trace.h"
+
+#define CREATE_TRACE_POINTS
+#include "trace.h"
#define L2TP_DRV_VERSION "V2.0"
@@ -116,11 +120,6 @@ static bool l2tp_sk_is_v6(struct sock *sk)
}
#endif
-static inline struct l2tp_tunnel *l2tp_tunnel(struct sock *sk)
-{
- return sk->sk_user_data;
-}
-
static inline struct l2tp_net *l2tp_pernet(const struct net *net)
{
return net_generic(net, l2tp_net_id);
@@ -151,24 +150,31 @@ l2tp_session_id_hash(struct l2tp_tunnel *tunnel, u32 session_id)
static void l2tp_tunnel_free(struct l2tp_tunnel *tunnel)
{
+ trace_free_tunnel(tunnel);
sock_put(tunnel->sock);
/* the tunnel is freed in the socket destructor */
}
static void l2tp_session_free(struct l2tp_session *session)
{
- struct l2tp_tunnel *tunnel = session->tunnel;
-
- if (tunnel) {
- if (WARN_ON(tunnel->magic != L2TP_TUNNEL_MAGIC))
- goto out;
- l2tp_tunnel_dec_refcount(tunnel);
- }
-
-out:
+ trace_free_session(session);
+ if (session->tunnel)
+ l2tp_tunnel_dec_refcount(session->tunnel);
kfree(session);
}
+struct l2tp_tunnel *l2tp_sk_to_tunnel(struct sock *sk)
+{
+ struct l2tp_tunnel *tunnel = sk->sk_user_data;
+
+ if (tunnel)
+ if (WARN_ON(tunnel->magic != L2TP_TUNNEL_MAGIC))
+ return NULL;
+
+ return tunnel;
+}
+EXPORT_SYMBOL_GPL(l2tp_sk_to_tunnel);
+
void l2tp_tunnel_inc_refcount(struct l2tp_tunnel *tunnel)
{
refcount_inc(&tunnel->ref_count);
@@ -381,6 +387,8 @@ int l2tp_session_register(struct l2tp_session *session,
hlist_add_head(&session->hlist, head);
write_unlock_bh(&tunnel->hlist_lock);
+ trace_register_session(session);
+
return 0;
err_tlock_pnlock:
@@ -409,10 +417,6 @@ static void l2tp_recv_queue_skb(struct l2tp_session *session, struct sk_buff *sk
skb_queue_walk_safe(&session->reorder_q, skbp, tmp) {
if (L2TP_SKB_CB(skbp)->ns > ns) {
__skb_queue_before(&session->reorder_q, skbp, skb);
- l2tp_dbg(session, L2TP_MSG_SEQ,
- "%s: pkt %hu, inserted before %hu, reorder_q len=%d\n",
- session->name, ns, L2TP_SKB_CB(skbp)->ns,
- skb_queue_len(&session->reorder_q));
atomic_long_inc(&session->stats.rx_oos_packets);
goto out;
}
@@ -445,9 +449,7 @@ static void l2tp_recv_dequeue_skb(struct l2tp_session *session, struct sk_buff *
/* Bump our Nr */
session->nr++;
session->nr &= session->nr_max;
-
- l2tp_dbg(session, L2TP_MSG_SEQ, "%s: updated nr to %hu\n",
- session->name, session->nr);
+ trace_session_seqnum_update(session);
}
/* call private receive handler */
@@ -472,37 +474,27 @@ static void l2tp_recv_dequeue(struct l2tp_session *session)
start:
spin_lock_bh(&session->reorder_q.lock);
skb_queue_walk_safe(&session->reorder_q, skb, tmp) {
- if (time_after(jiffies, L2TP_SKB_CB(skb)->expires)) {
+ struct l2tp_skb_cb *cb = L2TP_SKB_CB(skb);
+
+ /* If the packet has been pending on the queue for too long, discard it */
+ if (time_after(jiffies, cb->expires)) {
atomic_long_inc(&session->stats.rx_seq_discards);
atomic_long_inc(&session->stats.rx_errors);
- l2tp_dbg(session, L2TP_MSG_SEQ,
- "%s: oos pkt %u len %d discarded (too old), waiting for %u, reorder_q_len=%d\n",
- session->name, L2TP_SKB_CB(skb)->ns,
- L2TP_SKB_CB(skb)->length, session->nr,
- skb_queue_len(&session->reorder_q));
+ trace_session_pkt_expired(session, cb->ns);
session->reorder_skip = 1;
__skb_unlink(skb, &session->reorder_q);
kfree_skb(skb);
continue;
}
- if (L2TP_SKB_CB(skb)->has_seq) {
+ if (cb->has_seq) {
if (session->reorder_skip) {
- l2tp_dbg(session, L2TP_MSG_SEQ,
- "%s: advancing nr to next pkt: %u -> %u",
- session->name, session->nr,
- L2TP_SKB_CB(skb)->ns);
session->reorder_skip = 0;
- session->nr = L2TP_SKB_CB(skb)->ns;
+ session->nr = cb->ns;
+ trace_session_seqnum_reset(session);
}
- if (L2TP_SKB_CB(skb)->ns != session->nr) {
- l2tp_dbg(session, L2TP_MSG_SEQ,
- "%s: holding oos pkt %u len %d, waiting for %u, reorder_q_len=%d\n",
- session->name, L2TP_SKB_CB(skb)->ns,
- L2TP_SKB_CB(skb)->length, session->nr,
- skb_queue_len(&session->reorder_q));
+ if (cb->ns != session->nr)
goto out;
- }
}
__skb_unlink(skb, &session->reorder_q);
@@ -535,14 +527,13 @@ static int l2tp_seq_check_rx_window(struct l2tp_session *session, u32 nr)
*/
static int l2tp_recv_data_seq(struct l2tp_session *session, struct sk_buff *skb)
{
- if (!l2tp_seq_check_rx_window(session, L2TP_SKB_CB(skb)->ns)) {
+ struct l2tp_skb_cb *cb = L2TP_SKB_CB(skb);
+
+ if (!l2tp_seq_check_rx_window(session, cb->ns)) {
/* Packet sequence number is outside allowed window.
* Discard it.
*/
- l2tp_dbg(session, L2TP_MSG_SEQ,
- "%s: pkt %u len %d discarded, outside window, nr=%u\n",
- session->name, L2TP_SKB_CB(skb)->ns,
- L2TP_SKB_CB(skb)->length, session->nr);
+ trace_session_pkt_outside_rx_window(session, cb->ns);
goto discard;
}
@@ -559,10 +550,10 @@ static int l2tp_recv_data_seq(struct l2tp_session *session, struct sk_buff *skb)
* is seen. After nr_oos_count_max in-sequence packets, reset the
* sequence number to re-enable packet reception.
*/
- if (L2TP_SKB_CB(skb)->ns == session->nr) {
+ if (cb->ns == session->nr) {
skb_queue_tail(&session->reorder_q, skb);
} else {
- u32 nr_oos = L2TP_SKB_CB(skb)->ns;
+ u32 nr_oos = cb->ns;
u32 nr_next = (session->nr_oos + 1) & session->nr_max;
if (nr_oos == nr_next)
@@ -573,17 +564,10 @@ static int l2tp_recv_data_seq(struct l2tp_session *session, struct sk_buff *skb)
session->nr_oos = nr_oos;
if (session->nr_oos_count > session->nr_oos_count_max) {
session->reorder_skip = 1;
- l2tp_dbg(session, L2TP_MSG_SEQ,
- "%s: %d oos packets received. Resetting sequence numbers\n",
- session->name, session->nr_oos_count);
}
if (!session->reorder_skip) {
atomic_long_inc(&session->stats.rx_seq_discards);
- l2tp_dbg(session, L2TP_MSG_SEQ,
- "%s: oos pkt %u len %d discarded, waiting for %u, reorder_q_len=%d\n",
- session->name, L2TP_SKB_CB(skb)->ns,
- L2TP_SKB_CB(skb)->length, session->nr,
- skb_queue_len(&session->reorder_q));
+ trace_session_pkt_oos(session, cb->ns);
goto discard;
}
skb_queue_tail(&session->reorder_q, skb);
@@ -660,16 +644,14 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb,
int length)
{
struct l2tp_tunnel *tunnel = session->tunnel;
- u32 ns = 0, nr = 0;
int offset;
/* Parse and check optional cookie */
if (session->peer_cookie_len > 0) {
if (memcmp(ptr, &session->peer_cookie[0], session->peer_cookie_len)) {
- l2tp_info(tunnel, L2TP_MSG_DATA,
- "%s: cookie mismatch (%u/%u). Discarding.\n",
- tunnel->name, tunnel->tunnel_id,
- session->session_id);
+ pr_warn_ratelimited("%s: cookie mismatch (%u/%u). Discarding.\n",
+ tunnel->name, tunnel->tunnel_id,
+ session->session_id);
atomic_long_inc(&session->stats.rx_cookie_discards);
goto discard;
}
@@ -686,32 +668,21 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb,
L2TP_SKB_CB(skb)->has_seq = 0;
if (tunnel->version == L2TP_HDR_VER_2) {
if (hdrflags & L2TP_HDRFLAG_S) {
- ns = ntohs(*(__be16 *)ptr);
- ptr += 2;
- nr = ntohs(*(__be16 *)ptr);
- ptr += 2;
-
/* Store L2TP info in the skb */
- L2TP_SKB_CB(skb)->ns = ns;
+ L2TP_SKB_CB(skb)->ns = ntohs(*(__be16 *)ptr);
L2TP_SKB_CB(skb)->has_seq = 1;
+ ptr += 2;
+ /* Skip past nr in the header */
+ ptr += 2;
- l2tp_dbg(session, L2TP_MSG_SEQ,
- "%s: recv data ns=%u, nr=%u, session nr=%u\n",
- session->name, ns, nr, session->nr);
}
} else if (session->l2specific_type == L2TP_L2SPECTYPE_DEFAULT) {
u32 l2h = ntohl(*(__be32 *)ptr);
if (l2h & 0x40000000) {
- ns = l2h & 0x00ffffff;
-
/* Store L2TP info in the skb */
- L2TP_SKB_CB(skb)->ns = ns;
+ L2TP_SKB_CB(skb)->ns = l2h & 0x00ffffff;
L2TP_SKB_CB(skb)->has_seq = 1;
-
- l2tp_dbg(session, L2TP_MSG_SEQ,
- "%s: recv data ns=%u, session nr=%u\n",
- session->name, ns, session->nr);
}
ptr += 4;
}
@@ -722,9 +693,7 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb,
* configure it so.
*/
if (!session->lns_mode && !session->send_seq) {
- l2tp_info(session, L2TP_MSG_SEQ,
- "%s: requested to enable seq numbers by LNS\n",
- session->name);
+ trace_session_seqnum_lns_enable(session);
session->send_seq = 1;
l2tp_session_set_header_len(session, tunnel->version);
}
@@ -733,9 +702,8 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb,
* If user has configured mandatory sequence numbers, discard.
*/
if (session->recv_seq) {
- l2tp_warn(session, L2TP_MSG_SEQ,
- "%s: recv data has no seq numbers when required. Discarding.\n",
- session->name);
+ pr_warn_ratelimited("%s: recv data has no seq numbers when required. Discarding.\n",
+ session->name);
atomic_long_inc(&session->stats.rx_seq_discards);
goto discard;
}
@@ -746,15 +714,12 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb,
* LAC is broken. Discard the frame.
*/
if (!session->lns_mode && session->send_seq) {
- l2tp_info(session, L2TP_MSG_SEQ,
- "%s: requested to disable seq numbers by LNS\n",
- session->name);
+ trace_session_seqnum_lns_disable(session);
session->send_seq = 0;
l2tp_session_set_header_len(session, tunnel->version);
} else if (session->send_seq) {
- l2tp_warn(session, L2TP_MSG_SEQ,
- "%s: recv data has no seq numbers when required. Discarding.\n",
- session->name);
+ pr_warn_ratelimited("%s: recv data has no seq numbers when required. Discarding.\n",
+ session->name);
atomic_long_inc(&session->stats.rx_seq_discards);
goto discard;
}
@@ -816,9 +781,6 @@ static void l2tp_session_queue_purge(struct l2tp_session *session)
{
struct sk_buff *skb = NULL;
- if (WARN_ON(session->magic != L2TP_SESSION_MAGIC))
- return;
-
while ((skb = skb_dequeue(&session->reorder_q))) {
atomic_long_inc(&session->stats.rx_errors);
kfree_skb(skb);
@@ -847,22 +809,11 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb)
/* Short packet? */
if (!pskb_may_pull(skb, L2TP_HDR_SIZE_MAX)) {
- l2tp_info(tunnel, L2TP_MSG_DATA,
- "%s: recv short packet (len=%d)\n",
- tunnel->name, skb->len);
+ pr_warn_ratelimited("%s: recv short packet (len=%d)\n",
+ tunnel->name, skb->len);
goto error;
}
- /* Trace packet contents, if enabled */
- if (tunnel->debug & L2TP_MSG_DATA) {
- length = min(32u, skb->len);
- if (!pskb_may_pull(skb, length))
- goto error;
-
- pr_debug("%s: recv\n", tunnel->name);
- print_hex_dump_bytes("", DUMP_PREFIX_OFFSET, skb->data, length);
- }
-
/* Point to L2TP header */
optr = skb->data;
ptr = skb->data;
@@ -873,9 +824,8 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb)
/* Check protocol version */
version = hdrflags & L2TP_HDR_VER_MASK;
if (version != tunnel->version) {
- l2tp_info(tunnel, L2TP_MSG_DATA,
- "%s: recv protocol version mismatch: got %d expected %d\n",
- tunnel->name, version, tunnel->version);
+ pr_warn_ratelimited("%s: recv protocol version mismatch: got %d expected %d\n",
+ tunnel->name, version, tunnel->version);
goto error;
}
@@ -883,12 +833,8 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb)
length = skb->len;
/* If type is control packet, it is handled by userspace. */
- if (hdrflags & L2TP_HDRFLAG_T) {
- l2tp_dbg(tunnel, L2TP_MSG_DATA,
- "%s: recv control packet, len=%d\n",
- tunnel->name, length);
+ if (hdrflags & L2TP_HDRFLAG_T)
goto error;
- }
/* Skip flags */
ptr += 2;
@@ -917,9 +863,8 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb)
l2tp_session_dec_refcount(session);
/* Not found? Pass to userspace to deal with */
- l2tp_info(tunnel, L2TP_MSG_DATA,
- "%s: no session found (%u/%u). Passing up.\n",
- tunnel->name, tunnel_id, session_id);
+ pr_warn_ratelimited("%s: no session found (%u/%u). Passing up.\n",
+ tunnel->name, tunnel_id, session_id);
goto error;
}
@@ -949,12 +894,17 @@ int l2tp_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
{
struct l2tp_tunnel *tunnel;
+ /* Note that this is called from the encap_rcv hook inside an
+ * RCU-protected region, but without the socket being locked.
+ * Hence we use rcu_dereference_sk_user_data to access the
+ * tunnel data structure rather the usual l2tp_sk_to_tunnel
+ * accessor function.
+ */
tunnel = rcu_dereference_sk_user_data(sk);
if (!tunnel)
goto pass_up;
-
- l2tp_dbg(tunnel, L2TP_MSG_DATA, "%s: received %d bytes\n",
- tunnel->name, skb->len);
+ if (WARN_ON(tunnel->magic != L2TP_TUNNEL_MAGIC))
+ goto pass_up;
if (l2tp_udp_recv_core(tunnel, skb))
goto pass_up;
@@ -993,8 +943,7 @@ static int l2tp_build_l2tpv2_header(struct l2tp_session *session, void *buf)
*bufp++ = 0;
session->ns++;
session->ns &= 0xffff;
- l2tp_dbg(session, L2TP_MSG_SEQ, "%s: updated ns to %u\n",
- session->name, session->ns);
+ trace_session_seqnum_update(session);
}
return bufp - optr;
@@ -1030,9 +979,7 @@ static int l2tp_build_l2tpv3_header(struct l2tp_session *session, void *buf)
l2h = 0x40000000 | session->ns;
session->ns++;
session->ns &= 0xffffff;
- l2tp_dbg(session, L2TP_MSG_SEQ,
- "%s: updated ns to %u\n",
- session->name, session->ns);
+ trace_session_seqnum_update(session);
}
*((__be32 *)bufp) = htonl(l2h);
@@ -1042,74 +989,39 @@ static int l2tp_build_l2tpv3_header(struct l2tp_session *session, void *buf)
return bufp - optr;
}
-static void l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb,
- struct flowi *fl, size_t data_len)
+/* Queue the packet to IP for output: tunnel socket lock must be held */
+static int l2tp_xmit_queue(struct l2tp_tunnel *tunnel, struct sk_buff *skb, struct flowi *fl)
{
- struct l2tp_tunnel *tunnel = session->tunnel;
- unsigned int len = skb->len;
- int error;
+ int err;
- /* Debug */
- if (session->send_seq)
- l2tp_dbg(session, L2TP_MSG_DATA, "%s: send %zd bytes, ns=%u\n",
- session->name, data_len, session->ns - 1);
- else
- l2tp_dbg(session, L2TP_MSG_DATA, "%s: send %zd bytes\n",
- session->name, data_len);
-
- if (session->debug & L2TP_MSG_DATA) {
- int uhlen = (tunnel->encap == L2TP_ENCAPTYPE_UDP) ? sizeof(struct udphdr) : 0;
- unsigned char *datap = skb->data + uhlen;
-
- pr_debug("%s: xmit\n", session->name);
- print_hex_dump_bytes("", DUMP_PREFIX_OFFSET,
- datap, min_t(size_t, 32, len - uhlen));
- }
-
- /* Queue the packet to IP for output */
skb->ignore_df = 1;
skb_dst_drop(skb);
#if IS_ENABLED(CONFIG_IPV6)
if (l2tp_sk_is_v6(tunnel->sock))
- error = inet6_csk_xmit(tunnel->sock, skb, NULL);
+ err = inet6_csk_xmit(tunnel->sock, skb, NULL);
else
#endif
- error = ip_queue_xmit(tunnel->sock, skb, fl);
+ err = ip_queue_xmit(tunnel->sock, skb, fl);
- /* Update stats */
- if (error >= 0) {
- atomic_long_inc(&tunnel->stats.tx_packets);
- atomic_long_add(len, &tunnel->stats.tx_bytes);
- atomic_long_inc(&session->stats.tx_packets);
- atomic_long_add(len, &session->stats.tx_bytes);
- } else {
- atomic_long_inc(&tunnel->stats.tx_errors);
- atomic_long_inc(&session->stats.tx_errors);
- }
+ return err >= 0 ? NET_XMIT_SUCCESS : NET_XMIT_DROP;
}
-/* If caller requires the skb to have a ppp header, the header must be
- * inserted in the skb data before calling this function.
- */
-int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len)
+static int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb)
{
- int data_len = skb->len;
struct l2tp_tunnel *tunnel = session->tunnel;
+ unsigned int data_len = skb->len;
struct sock *sk = tunnel->sock;
- struct flowi *fl;
- struct udphdr *uh;
- struct inet_sock *inet;
- int headroom;
- int uhlen = (tunnel->encap == L2TP_ENCAPTYPE_UDP) ? sizeof(struct udphdr) : 0;
- int udp_len;
+ int headroom, uhlen, udp_len;
int ret = NET_XMIT_SUCCESS;
+ struct inet_sock *inet;
+ struct udphdr *uh;
/* Check that there's enough headroom in the skb to insert IP,
* UDP and L2TP headers. If not enough, expand it to
* make room. Adjust truesize.
*/
- headroom = NET_SKB_PAD + sizeof(struct iphdr) +
- uhlen + hdr_len;
+ uhlen = (tunnel->encap == L2TP_ENCAPTYPE_UDP) ? sizeof(*uh) : 0;
+ headroom = NET_SKB_PAD + sizeof(struct iphdr) + uhlen + session->hdr_len;
if (skb_cow_head(skb, headroom)) {
kfree_skb(skb);
return NET_XMIT_DROP;
@@ -1117,14 +1029,13 @@ int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len
/* Setup L2TP header */
if (tunnel->version == L2TP_HDR_VER_2)
- l2tp_build_l2tpv2_header(session, __skb_push(skb, hdr_len));
+ l2tp_build_l2tpv2_header(session, __skb_push(skb, session->hdr_len));
else
- l2tp_build_l2tpv3_header(session, __skb_push(skb, hdr_len));
+ l2tp_build_l2tpv3_header(session, __skb_push(skb, session->hdr_len));
/* Reset skb netfilter state */
memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
- IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
- IPSKB_REROUTED);
+ IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED | IPSKB_REROUTED);
nf_reset_ct(skb);
bh_lock_sock(sk);
@@ -1144,7 +1055,6 @@ int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len
}
inet = inet_sk(sk);
- fl = &inet->cork.fl;
switch (tunnel->encap) {
case L2TP_ENCAPTYPE_UDP:
/* Setup UDP header */
@@ -1153,7 +1063,7 @@ int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len
uh = udp_hdr(skb);
uh->source = inet->inet_sport;
uh->dest = inet->inet_dport;
- udp_len = uhlen + hdr_len + data_len;
+ udp_len = uhlen + session->hdr_len + data_len;
uh->len = htons(udp_len);
/* Calculate UDP checksum if configured to do so */
@@ -1172,12 +1082,34 @@ int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len
break;
}
- l2tp_xmit_core(session, skb, fl, data_len);
+ ret = l2tp_xmit_queue(tunnel, skb, &inet->cork.fl);
+
out_unlock:
bh_unlock_sock(sk);
return ret;
}
+
+/* If caller requires the skb to have a ppp header, the header must be
+ * inserted in the skb data before calling this function.
+ */
+int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb)
+{
+ unsigned int len = skb->len;
+ int ret;
+
+ ret = l2tp_xmit_core(session, skb);
+ if (ret == NET_XMIT_SUCCESS) {
+ atomic_long_inc(&session->tunnel->stats.tx_packets);
+ atomic_long_add(len, &session->tunnel->stats.tx_bytes);
+ atomic_long_inc(&session->stats.tx_packets);
+ atomic_long_add(len, &session->stats.tx_bytes);
+ } else {
+ atomic_long_inc(&session->tunnel->stats.tx_errors);
+ atomic_long_inc(&session->stats.tx_errors);
+ }
+ return ret;
+}
EXPORT_SYMBOL_GPL(l2tp_xmit_skb);
/*****************************************************************************
@@ -1190,13 +1122,11 @@ EXPORT_SYMBOL_GPL(l2tp_xmit_skb);
*/
static void l2tp_tunnel_destruct(struct sock *sk)
{
- struct l2tp_tunnel *tunnel = l2tp_tunnel(sk);
+ struct l2tp_tunnel *tunnel = l2tp_sk_to_tunnel(sk);
if (!tunnel)
goto end;
- l2tp_info(tunnel, L2TP_MSG_CONTROL, "%s: closing...\n", tunnel->name);
-
/* Disable udp encapsulation */
switch (tunnel->encap) {
case L2TP_ENCAPTYPE_UDP:
@@ -1255,34 +1185,16 @@ static void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel)
struct hlist_node *tmp;
struct l2tp_session *session;
- l2tp_info(tunnel, L2TP_MSG_CONTROL, "%s: closing all sessions...\n",
- tunnel->name);
-
write_lock_bh(&tunnel->hlist_lock);
tunnel->acpt_newsess = false;
for (hash = 0; hash < L2TP_HASH_SIZE; hash++) {
again:
hlist_for_each_safe(walk, tmp, &tunnel->session_hlist[hash]) {
session = hlist_entry(walk, struct l2tp_session, hlist);
-
- l2tp_info(session, L2TP_MSG_CONTROL,
- "%s: closing session\n", session->name);
-
hlist_del_init(&session->hlist);
- if (test_and_set_bit(0, &session->dead))
- goto again;
-
write_unlock_bh(&tunnel->hlist_lock);
-
- l2tp_session_unhash(session);
- l2tp_session_queue_purge(session);
-
- if (session->session_close)
- (*session->session_close)(session);
-
- l2tp_session_dec_refcount(session);
-
+ l2tp_session_delete(session);
write_lock_bh(&tunnel->hlist_lock);
/* Now restart from the beginning of this hash
@@ -1299,7 +1211,7 @@ static void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel)
/* Tunnel socket destroy hook for UDP encapsulation */
static void l2tp_udp_encap_destroy(struct sock *sk)
{
- struct l2tp_tunnel *tunnel = l2tp_tunnel(sk);
+ struct l2tp_tunnel *tunnel = l2tp_sk_to_tunnel(sk);
if (tunnel)
l2tp_tunnel_delete(tunnel);
@@ -1464,7 +1376,7 @@ static int l2tp_tunnel_sock_create(struct net *net,
static struct lock_class_key l2tp_socket_class;
-int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32 peer_tunnel_id,
+int l2tp_tunnel_create(int fd, int version, u32 tunnel_id, u32 peer_tunnel_id,
struct l2tp_tunnel_cfg *cfg, struct l2tp_tunnel **tunnelp)
{
struct l2tp_tunnel *tunnel = NULL;
@@ -1483,16 +1395,12 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32
tunnel->version = version;
tunnel->tunnel_id = tunnel_id;
tunnel->peer_tunnel_id = peer_tunnel_id;
- tunnel->debug = L2TP_DEFAULT_DEBUG_FLAGS;
tunnel->magic = L2TP_TUNNEL_MAGIC;
sprintf(&tunnel->name[0], "tunl %u", tunnel_id);
rwlock_init(&tunnel->hlist_lock);
tunnel->acpt_newsess = true;
- if (cfg)
- tunnel->debug = cfg->debug;
-
tunnel->encap = encap;
refcount_set(&tunnel->ref_count, 1);
@@ -1597,6 +1505,8 @@ int l2tp_tunnel_register(struct l2tp_tunnel *tunnel, struct net *net,
"l2tp_sock");
sk->sk_allocation = GFP_ATOMIC;
+ trace_register_tunnel(tunnel);
+
if (tunnel->fd >= 0)
sockfd_put(sock);
@@ -1617,6 +1527,7 @@ EXPORT_SYMBOL_GPL(l2tp_tunnel_register);
void l2tp_tunnel_delete(struct l2tp_tunnel *tunnel)
{
if (!test_and_set_bit(0, &tunnel->dead)) {
+ trace_delete_tunnel(tunnel);
l2tp_tunnel_inc_refcount(tunnel);
queue_work(l2tp_wq, &tunnel->del_work);
}
@@ -1628,6 +1539,7 @@ void l2tp_session_delete(struct l2tp_session *session)
if (test_and_set_bit(0, &session->dead))
return;
+ trace_delete_session(session);
l2tp_session_unhash(session);
l2tp_session_queue_purge(session);
if (session->session_close)
@@ -1686,12 +1598,8 @@ struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunn
INIT_HLIST_NODE(&session->hlist);
INIT_HLIST_NODE(&session->global_hlist);
- /* Inherit debug options from tunnel */
- session->debug = tunnel->debug;
-
if (cfg) {
session->pwtype = cfg->pw_type;
- session->debug = cfg->debug;
session->send_seq = cfg->send_seq;
session->recv_seq = cfg->recv_seq;
session->lns_mode = cfg->lns_mode;
diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h
index 3468d6b..cb21d90 100644
--- a/net/l2tp/l2tp_core.h
+++ b/net/l2tp/l2tp_core.h
@@ -51,7 +51,6 @@ struct l2tp_session_cfg {
unsigned int lns_mode:1; /* behave as LNS?
* LAC enables sequence numbers under LNS control.
*/
- int debug; /* bitmask of debug message categories */
u16 l2specific_type; /* Layer 2 specific type */
u8 cookie[8]; /* optional cookie */
int cookie_len; /* 0, 4 or 8 bytes */
@@ -66,6 +65,7 @@ struct l2tp_session_cfg {
* Is linked into a per-tunnel session hashlist; and in the case of an L2TPv3 session into
* an additional per-net ("global") hashlist.
*/
+#define L2TP_SESSION_NAME_MAX 32
struct l2tp_session {
int magic; /* should be L2TP_SESSION_MAGIC */
long dead;
@@ -90,14 +90,13 @@ struct l2tp_session {
struct hlist_node hlist; /* hash list node */
refcount_t ref_count;
- char name[32]; /* for logging */
+ char name[L2TP_SESSION_NAME_MAX]; /* for logging */
char ifname[IFNAMSIZ];
unsigned int recv_seq:1; /* expect receive packets with sequence numbers? */
unsigned int send_seq:1; /* send packets with sequence numbers? */
unsigned int lns_mode:1; /* behave as LNS?
* LAC enables sequence numbers under LNS control.
*/
- int debug; /* bitmask of debug message categories */
int reorder_timeout; /* configured reorder timeout (in jiffies) */
int reorder_skip; /* set if skip to next nr */
enum l2tp_pwtype pwtype;
@@ -131,7 +130,6 @@ struct l2tp_session {
/* L2TP tunnel configuration */
struct l2tp_tunnel_cfg {
- int debug; /* bitmask of debug message categories */
enum l2tp_encap_type encap;
/* Used only for kernel-created sockets */
@@ -154,6 +152,7 @@ struct l2tp_tunnel_cfg {
* Maintains a hashlist of sessions belonging to the tunnel instance.
* Is linked into a per-net list of tunnels.
*/
+#define L2TP_TUNNEL_NAME_MAX 20
struct l2tp_tunnel {
int magic; /* Should be L2TP_TUNNEL_MAGIC */
@@ -170,8 +169,7 @@ struct l2tp_tunnel {
u32 peer_tunnel_id;
int version; /* 2=>L2TPv2, 3=>L2TPv3 */
- char name[20]; /* for logging */
- int debug; /* bitmask of debug message categories */
+ char name[L2TP_TUNNEL_NAME_MAX]; /* for logging */
enum l2tp_encap_type encap;
struct l2tp_stats stats;
@@ -237,7 +235,7 @@ struct l2tp_session *l2tp_session_get_by_ifname(const struct net *net,
* Creation of a new instance is a two-step process: create, then register.
* Destruction is triggered using the *_delete functions, and completes asynchronously.
*/
-int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id,
+int l2tp_tunnel_create(int fd, int version, u32 tunnel_id,
u32 peer_tunnel_id, struct l2tp_tunnel_cfg *cfg,
struct l2tp_tunnel **tunnelp);
int l2tp_tunnel_register(struct l2tp_tunnel *tunnel, struct net *net,
@@ -263,8 +261,7 @@ int l2tp_udp_encap_recv(struct sock *sk, struct sk_buff *skb);
/* Transmit path helpers for sending packets over the tunnel socket. */
void l2tp_session_set_header_len(struct l2tp_session *session, int version);
-int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb,
- int hdr_len);
+int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb);
/* Pseudowire management.
* Pseudowires should register with l2tp core on module init, and unregister
@@ -276,6 +273,11 @@ void l2tp_nl_unregister_ops(enum l2tp_pwtype pw_type);
/* IOCTL helper for IP encap modules. */
int l2tp_ioctl(struct sock *sk, int cmd, unsigned long arg);
+/* Extract the tunnel structure from a socket's sk_user_data pointer,
+ * validating the tunnel magic feather.
+ */
+struct l2tp_tunnel *l2tp_sk_to_tunnel(struct sock *sk);
+
static inline int l2tp_get_l2specific_len(struct l2tp_session *session)
{
switch (session->l2specific_type) {
@@ -337,19 +339,6 @@ static inline int l2tp_v3_ensure_opt_in_linear(struct l2tp_session *session, str
return 0;
}
-#define l2tp_printk(ptr, type, func, fmt, ...) \
-do { \
- if (((ptr)->debug) & (type)) \
- func(fmt, ##__VA_ARGS__); \
-} while (0)
-
-#define l2tp_warn(ptr, type, fmt, ...) \
- l2tp_printk(ptr, type, pr_warn, fmt, ##__VA_ARGS__)
-#define l2tp_info(ptr, type, fmt, ...) \
- l2tp_printk(ptr, type, pr_info, fmt, ##__VA_ARGS__)
-#define l2tp_dbg(ptr, type, fmt, ...) \
- l2tp_printk(ptr, type, pr_debug, fmt, ##__VA_ARGS__)
-
#define MODULE_ALIAS_L2TP_PWTYPE(type) \
MODULE_ALIAS("net-l2tp-type-" __stringify(type))
diff --git a/net/l2tp/l2tp_debugfs.c b/net/l2tp/l2tp_debugfs.c
index 96cb960..bca75be 100644
--- a/net/l2tp/l2tp_debugfs.c
+++ b/net/l2tp/l2tp_debugfs.c
@@ -167,7 +167,7 @@ static void l2tp_dfs_seq_tunnel_show(struct seq_file *m, void *v)
tunnel->sock ? refcount_read(&tunnel->sock->sk_refcnt) : 0,
refcount_read(&tunnel->ref_count));
seq_printf(m, " %08x rx %ld/%ld/%ld rx %ld/%ld/%ld\n",
- tunnel->debug,
+ 0,
atomic_long_read(&tunnel->stats.tx_packets),
atomic_long_read(&tunnel->stats.tx_bytes),
atomic_long_read(&tunnel->stats.tx_errors),
@@ -192,7 +192,7 @@ static void l2tp_dfs_seq_session_show(struct seq_file *m, void *v)
session->recv_seq ? 'R' : '-',
session->send_seq ? 'S' : '-',
session->lns_mode ? "LNS" : "LAC",
- session->debug,
+ 0,
jiffies_to_msecs(session->reorder_timeout));
seq_printf(m, " offset 0 l2specific %hu/%hu\n",
session->l2specific_type, l2tp_get_l2specific_len(session));
diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c
index 7ed2b4e..6cd97c75 100644
--- a/net/l2tp/l2tp_eth.c
+++ b/net/l2tp/l2tp_eth.c
@@ -76,7 +76,7 @@ static netdev_tx_t l2tp_eth_dev_xmit(struct sk_buff *skb, struct net_device *dev
struct l2tp_eth *priv = netdev_priv(dev);
struct l2tp_session *session = priv->session;
unsigned int len = skb->len;
- int ret = l2tp_xmit_skb(session, skb, session->hdr_len);
+ int ret = l2tp_xmit_skb(session, skb);
if (likely(ret == NET_XMIT_SUCCESS)) {
atomic_long_add(len, &priv->tx_bytes);
@@ -128,17 +128,6 @@ static void l2tp_eth_dev_recv(struct l2tp_session *session, struct sk_buff *skb,
struct net_device *dev;
struct l2tp_eth *priv;
- if (session->debug & L2TP_MSG_DATA) {
- unsigned int length;
-
- length = min(32u, skb->len);
- if (!pskb_may_pull(skb, length))
- goto error;
-
- pr_debug("%s: eth recv\n", session->name);
- print_hex_dump_bytes("", DUMP_PREFIX_OFFSET, skb->data, length);
- }
-
if (!pskb_may_pull(skb, ETH_HLEN))
goto error;
diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
index df2a35b..97ae125 100644
--- a/net/l2tp/l2tp_ip.c
+++ b/net/l2tp/l2tp_ip.c
@@ -118,7 +118,6 @@ static int l2tp_ip_recv(struct sk_buff *skb)
struct l2tp_session *session;
struct l2tp_tunnel *tunnel = NULL;
struct iphdr *iph;
- int length;
if (!pskb_may_pull(skb, 4))
goto discard;
@@ -147,20 +146,6 @@ static int l2tp_ip_recv(struct sk_buff *skb)
if (!tunnel)
goto discard_sess;
- /* Trace packet contents, if enabled */
- if (tunnel->debug & L2TP_MSG_DATA) {
- length = min(32u, skb->len);
- if (!pskb_may_pull(skb, length))
- goto discard_sess;
-
- /* Point to L2TP header */
- optr = skb->data;
- ptr = skb->data;
- ptr += 4;
- pr_debug("%s: ip recv\n", tunnel->name);
- print_hex_dump_bytes("", DUMP_PREFIX_OFFSET, ptr, length);
- }
-
if (l2tp_v3_ensure_opt_in_linear(session, skb, &ptr, &optr))
goto discard_sess;
@@ -248,8 +233,8 @@ static void l2tp_ip_close(struct sock *sk, long timeout)
static void l2tp_ip_destroy_sock(struct sock *sk)
{
+ struct l2tp_tunnel *tunnel = l2tp_sk_to_tunnel(sk);
struct sk_buff *skb;
- struct l2tp_tunnel *tunnel = sk->sk_user_data;
while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL)
kfree_skb(skb);
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index bc757bc..e5e5036 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -131,7 +131,6 @@ static int l2tp_ip6_recv(struct sk_buff *skb)
struct l2tp_session *session;
struct l2tp_tunnel *tunnel = NULL;
struct ipv6hdr *iph;
- int length;
if (!pskb_may_pull(skb, 4))
goto discard;
@@ -160,20 +159,6 @@ static int l2tp_ip6_recv(struct sk_buff *skb)
if (!tunnel)
goto discard_sess;
- /* Trace packet contents, if enabled */
- if (tunnel->debug & L2TP_MSG_DATA) {
- length = min(32u, skb->len);
- if (!pskb_may_pull(skb, length))
- goto discard_sess;
-
- /* Point to L2TP header */
- optr = skb->data;
- ptr = skb->data;
- ptr += 4;
- pr_debug("%s: ip recv\n", tunnel->name);
- print_hex_dump_bytes("", DUMP_PREFIX_OFFSET, ptr, length);
- }
-
if (l2tp_v3_ensure_opt_in_linear(session, skb, &ptr, &optr))
goto discard_sess;
@@ -262,7 +247,7 @@ static void l2tp_ip6_close(struct sock *sk, long timeout)
static void l2tp_ip6_destroy_sock(struct sock *sk)
{
- struct l2tp_tunnel *tunnel = sk->sk_user_data;
+ struct l2tp_tunnel *tunnel = l2tp_sk_to_tunnel(sk);
lock_sock(sk);
ip6_flush_pending_frames(sk);
diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c
index def78ee..83c015f 100644
--- a/net/l2tp/l2tp_netlink.c
+++ b/net/l2tp/l2tp_netlink.c
@@ -229,14 +229,11 @@ static int l2tp_nl_cmd_tunnel_create(struct sk_buff *skb, struct genl_info *info
goto out;
}
- if (attrs[L2TP_ATTR_DEBUG])
- cfg.debug = nla_get_u32(attrs[L2TP_ATTR_DEBUG]);
-
ret = -EINVAL;
switch (cfg.encap) {
case L2TP_ENCAPTYPE_UDP:
case L2TP_ENCAPTYPE_IP:
- ret = l2tp_tunnel_create(net, fd, proto_version, tunnel_id,
+ ret = l2tp_tunnel_create(fd, proto_version, tunnel_id,
peer_tunnel_id, &cfg, &tunnel);
break;
}
@@ -307,9 +304,6 @@ static int l2tp_nl_cmd_tunnel_modify(struct sk_buff *skb, struct genl_info *info
goto out;
}
- if (info->attrs[L2TP_ATTR_DEBUG])
- tunnel->debug = nla_get_u32(info->attrs[L2TP_ATTR_DEBUG]);
-
ret = l2tp_tunnel_notify(&l2tp_nl_family, info,
tunnel, L2TP_CMD_TUNNEL_MODIFY);
@@ -400,7 +394,7 @@ static int l2tp_nl_tunnel_send(struct sk_buff *skb, u32 portid, u32 seq, int fla
if (nla_put_u8(skb, L2TP_ATTR_PROTO_VERSION, tunnel->version) ||
nla_put_u32(skb, L2TP_ATTR_CONN_ID, tunnel->tunnel_id) ||
nla_put_u32(skb, L2TP_ATTR_PEER_CONN_ID, tunnel->peer_tunnel_id) ||
- nla_put_u32(skb, L2TP_ATTR_DEBUG, tunnel->debug) ||
+ nla_put_u32(skb, L2TP_ATTR_DEBUG, 0) ||
nla_put_u16(skb, L2TP_ATTR_ENCAP_TYPE, tunnel->encap))
goto nla_put_failure;
@@ -605,9 +599,6 @@ static int l2tp_nl_cmd_session_create(struct sk_buff *skb, struct genl_info *inf
cfg.ifname = nla_data(info->attrs[L2TP_ATTR_IFNAME]);
}
- if (info->attrs[L2TP_ATTR_DEBUG])
- cfg.debug = nla_get_u32(info->attrs[L2TP_ATTR_DEBUG]);
-
if (info->attrs[L2TP_ATTR_RECV_SEQ])
cfg.recv_seq = nla_get_u8(info->attrs[L2TP_ATTR_RECV_SEQ]);
@@ -689,9 +680,6 @@ static int l2tp_nl_cmd_session_modify(struct sk_buff *skb, struct genl_info *inf
goto out;
}
- if (info->attrs[L2TP_ATTR_DEBUG])
- session->debug = nla_get_u32(info->attrs[L2TP_ATTR_DEBUG]);
-
if (info->attrs[L2TP_ATTR_RECV_SEQ])
session->recv_seq = nla_get_u8(info->attrs[L2TP_ATTR_RECV_SEQ]);
@@ -730,7 +718,7 @@ static int l2tp_nl_session_send(struct sk_buff *skb, u32 portid, u32 seq, int fl
nla_put_u32(skb, L2TP_ATTR_SESSION_ID, session->session_id) ||
nla_put_u32(skb, L2TP_ATTR_PEER_CONN_ID, tunnel->peer_tunnel_id) ||
nla_put_u32(skb, L2TP_ATTR_PEER_SESSION_ID, session->peer_session_id) ||
- nla_put_u32(skb, L2TP_ATTR_DEBUG, session->debug) ||
+ nla_put_u32(skb, L2TP_ATTR_DEBUG, 0) ||
nla_put_u16(skb, L2TP_ATTR_PW_TYPE, session->pwtype))
goto nla_put_failure;
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index 13c3153..aea85f9 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -237,17 +237,9 @@ static void pppol2tp_recv(struct l2tp_session *session, struct sk_buff *skb, int
if (sk->sk_state & PPPOX_BOUND) {
struct pppox_sock *po;
- l2tp_dbg(session, L2TP_MSG_DATA,
- "%s: recv %d byte data frame, passing to ppp\n",
- session->name, data_len);
-
po = pppox_sk(sk);
ppp_input(&po->chan, skb);
} else {
- l2tp_dbg(session, L2TP_MSG_DATA,
- "%s: recv %d byte data frame, passing to L2TP socket\n",
- session->name, data_len);
-
if (sock_queue_rcv_skb(sk, skb) < 0) {
atomic_long_inc(&session->stats.rx_errors);
kfree_skb(skb);
@@ -259,7 +251,7 @@ static void pppol2tp_recv(struct l2tp_session *session, struct sk_buff *skb, int
no_sock:
rcu_read_unlock();
- l2tp_info(session, L2TP_MSG_DATA, "%s: no socket\n", session->name);
+ pr_warn_ratelimited("%s: no socket in recv\n", session->name);
kfree_skb(skb);
}
@@ -324,7 +316,7 @@ static int pppol2tp_sendmsg(struct socket *sock, struct msghdr *m,
}
local_bh_disable();
- l2tp_xmit_skb(session, skb, session->hdr_len);
+ l2tp_xmit_skb(session, skb);
local_bh_enable();
sock_put(sk);
@@ -383,7 +375,7 @@ static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb)
skb->data[1] = PPP_UI;
local_bh_disable();
- l2tp_xmit_skb(session, skb, session->hdr_len);
+ l2tp_xmit_skb(session, skb);
local_bh_enable();
sock_put(sk);
@@ -710,7 +702,6 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
if (!tunnel) {
struct l2tp_tunnel_cfg tcfg = {
.encap = L2TP_ENCAPTYPE_UDP,
- .debug = 0,
};
/* Prevent l2tp_tunnel_register() from trying to set up
@@ -721,7 +712,7 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
goto end;
}
- error = l2tp_tunnel_create(sock_net(sk), info.fd,
+ error = l2tp_tunnel_create(info.fd,
info.version,
info.tunnel_id,
info.peer_tunnel_id, &tcfg,
@@ -840,8 +831,6 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
drop_refcnt = false;
sk->sk_state = PPPOX_CONNECTED;
- l2tp_info(session, L2TP_MSG_CONTROL, "%s: created\n",
- session->name);
end:
if (error) {
@@ -1076,6 +1065,9 @@ static int pppol2tp_ioctl(struct socket *sock, unsigned int cmd,
if (!session)
return -ENOTCONN;
+ if (WARN_ON(session->magic != L2TP_SESSION_MAGIC))
+ return -EBADF;
+
/* Not defined for tunnels */
if (!session->session_id && !session->peer_session_id)
return -ENOSYS;
@@ -1090,6 +1082,9 @@ static int pppol2tp_ioctl(struct socket *sock, unsigned int cmd,
if (!session)
return -ENOTCONN;
+ if (WARN_ON(session->magic != L2TP_SESSION_MAGIC))
+ return -EBADF;
+
/* Not defined for tunnels */
if (!session->session_id && !session->peer_session_id)
return -ENOSYS;
@@ -1103,6 +1098,9 @@ static int pppol2tp_ioctl(struct socket *sock, unsigned int cmd,
if (!session)
return -ENOTCONN;
+ if (WARN_ON(session->magic != L2TP_SESSION_MAGIC))
+ return -EBADF;
+
/* Session 0 represents the parent tunnel */
if (!session->session_id && !session->peer_session_id) {
u32 session_id;
@@ -1157,9 +1155,7 @@ static int pppol2tp_tunnel_setsockopt(struct sock *sk,
switch (optname) {
case PPPOL2TP_SO_DEBUG:
- tunnel->debug = val;
- l2tp_info(tunnel, L2TP_MSG_CONTROL, "%s: set debug=%x\n",
- tunnel->name, tunnel->debug);
+ /* Tunnel debug flags option is deprecated */
break;
default:
@@ -1185,9 +1181,6 @@ static int pppol2tp_session_setsockopt(struct sock *sk,
break;
}
session->recv_seq = !!val;
- l2tp_info(session, L2TP_MSG_CONTROL,
- "%s: set recv_seq=%d\n",
- session->name, session->recv_seq);
break;
case PPPOL2TP_SO_SENDSEQ:
@@ -1203,9 +1196,6 @@ static int pppol2tp_session_setsockopt(struct sock *sk,
PPPOL2TP_L2TP_HDR_SIZE_NOSEQ;
}
l2tp_session_set_header_len(session, session->tunnel->version);
- l2tp_info(session, L2TP_MSG_CONTROL,
- "%s: set send_seq=%d\n",
- session->name, session->send_seq);
break;
case PPPOL2TP_SO_LNSMODE:
@@ -1214,22 +1204,14 @@ static int pppol2tp_session_setsockopt(struct sock *sk,
break;
}
session->lns_mode = !!val;
- l2tp_info(session, L2TP_MSG_CONTROL,
- "%s: set lns_mode=%d\n",
- session->name, session->lns_mode);
break;
case PPPOL2TP_SO_DEBUG:
- session->debug = val;
- l2tp_info(session, L2TP_MSG_CONTROL, "%s: set debug=%x\n",
- session->name, session->debug);
+ /* Session debug flags option is deprecated */
break;
case PPPOL2TP_SO_REORDERTO:
session->reorder_timeout = msecs_to_jiffies(val);
- l2tp_info(session, L2TP_MSG_CONTROL,
- "%s: set reorder_timeout=%d\n",
- session->name, session->reorder_timeout);
break;
default:
@@ -1297,9 +1279,8 @@ static int pppol2tp_tunnel_getsockopt(struct sock *sk,
switch (optname) {
case PPPOL2TP_SO_DEBUG:
- *val = tunnel->debug;
- l2tp_info(tunnel, L2TP_MSG_CONTROL, "%s: get debug=%x\n",
- tunnel->name, tunnel->debug);
+ /* Tunnel debug flags option is deprecated */
+ *val = 0;
break;
default:
@@ -1321,32 +1302,23 @@ static int pppol2tp_session_getsockopt(struct sock *sk,
switch (optname) {
case PPPOL2TP_SO_RECVSEQ:
*val = session->recv_seq;
- l2tp_info(session, L2TP_MSG_CONTROL,
- "%s: get recv_seq=%d\n", session->name, *val);
break;
case PPPOL2TP_SO_SENDSEQ:
*val = session->send_seq;
- l2tp_info(session, L2TP_MSG_CONTROL,
- "%s: get send_seq=%d\n", session->name, *val);
break;
case PPPOL2TP_SO_LNSMODE:
*val = session->lns_mode;
- l2tp_info(session, L2TP_MSG_CONTROL,
- "%s: get lns_mode=%d\n", session->name, *val);
break;
case PPPOL2TP_SO_DEBUG:
- *val = session->debug;
- l2tp_info(session, L2TP_MSG_CONTROL, "%s: get debug=%d\n",
- session->name, *val);
+ /* Session debug flags option is deprecated */
+ *val = 0;
break;
case PPPOL2TP_SO_REORDERTO:
*val = (int)jiffies_to_msecs(session->reorder_timeout);
- l2tp_info(session, L2TP_MSG_CONTROL,
- "%s: get reorder_timeout=%d\n", session->name, *val);
break;
default:
@@ -1534,7 +1506,7 @@ static void pppol2tp_seq_tunnel_show(struct seq_file *m, void *v)
(tunnel == tunnel->sock->sk_user_data) ? 'Y' : 'N',
refcount_read(&tunnel->ref_count) - 1);
seq_printf(m, " %08x %ld/%ld/%ld %ld/%ld/%ld\n",
- tunnel->debug,
+ 0,
atomic_long_read(&tunnel->stats.tx_packets),
atomic_long_read(&tunnel->stats.tx_bytes),
atomic_long_read(&tunnel->stats.tx_errors),
@@ -1580,7 +1552,7 @@ static void pppol2tp_seq_session_show(struct seq_file *m, void *v)
session->recv_seq ? 'R' : '-',
session->send_seq ? 'S' : '-',
session->lns_mode ? "LNS" : "LAC",
- session->debug,
+ 0,
jiffies_to_msecs(session->reorder_timeout));
seq_printf(m, " %hu/%hu %ld/%ld/%ld %ld/%ld/%ld\n",
session->nr, session->ns,
diff --git a/net/l2tp/trace.h b/net/l2tp/trace.h
new file mode 100644
index 0000000..8596eaa
--- /dev/null
+++ b/net/l2tp/trace.h
@@ -0,0 +1,211 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM l2tp
+
+#if !defined(_TRACE_L2TP_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_L2TP_H
+
+#include <linux/tracepoint.h>
+#include <linux/l2tp.h>
+#include "l2tp_core.h"
+
+#define encap_type_name(e) { L2TP_ENCAPTYPE_##e, #e }
+#define show_encap_type_name(val) \
+ __print_symbolic(val, \
+ encap_type_name(UDP), \
+ encap_type_name(IP))
+
+#define pw_type_name(p) { L2TP_PWTYPE_##p, #p }
+#define show_pw_type_name(val) \
+ __print_symbolic(val, \
+ pw_type_name(ETH_VLAN), \
+ pw_type_name(ETH), \
+ pw_type_name(PPP), \
+ pw_type_name(PPP_AC), \
+ pw_type_name(IP))
+
+DECLARE_EVENT_CLASS(tunnel_only_evt,
+ TP_PROTO(struct l2tp_tunnel *tunnel),
+ TP_ARGS(tunnel),
+ TP_STRUCT__entry(
+ __array(char, name, L2TP_TUNNEL_NAME_MAX)
+ ),
+ TP_fast_assign(
+ memcpy(__entry->name, tunnel->name, L2TP_TUNNEL_NAME_MAX);
+ ),
+ TP_printk("%s", __entry->name)
+);
+
+DECLARE_EVENT_CLASS(session_only_evt,
+ TP_PROTO(struct l2tp_session *session),
+ TP_ARGS(session),
+ TP_STRUCT__entry(
+ __array(char, name, L2TP_SESSION_NAME_MAX)
+ ),
+ TP_fast_assign(
+ memcpy(__entry->name, session->name, L2TP_SESSION_NAME_MAX);
+ ),
+ TP_printk("%s", __entry->name)
+);
+
+TRACE_EVENT(register_tunnel,
+ TP_PROTO(struct l2tp_tunnel *tunnel),
+ TP_ARGS(tunnel),
+ TP_STRUCT__entry(
+ __array(char, name, L2TP_TUNNEL_NAME_MAX)
+ __field(int, fd)
+ __field(u32, tid)
+ __field(u32, ptid)
+ __field(int, version)
+ __field(enum l2tp_encap_type, encap)
+ ),
+ TP_fast_assign(
+ memcpy(__entry->name, tunnel->name, L2TP_TUNNEL_NAME_MAX);
+ __entry->fd = tunnel->fd;
+ __entry->tid = tunnel->tunnel_id;
+ __entry->ptid = tunnel->peer_tunnel_id;
+ __entry->version = tunnel->version;
+ __entry->encap = tunnel->encap;
+ ),
+ TP_printk("%s: type=%s encap=%s version=L2TPv%d tid=%u ptid=%u fd=%d",
+ __entry->name,
+ __entry->fd > 0 ? "managed" : "unmanaged",
+ show_encap_type_name(__entry->encap),
+ __entry->version,
+ __entry->tid,
+ __entry->ptid,
+ __entry->fd)
+);
+
+DEFINE_EVENT(tunnel_only_evt, delete_tunnel,
+ TP_PROTO(struct l2tp_tunnel *tunnel),
+ TP_ARGS(tunnel)
+);
+
+DEFINE_EVENT(tunnel_only_evt, free_tunnel,
+ TP_PROTO(struct l2tp_tunnel *tunnel),
+ TP_ARGS(tunnel)
+);
+
+TRACE_EVENT(register_session,
+ TP_PROTO(struct l2tp_session *session),
+ TP_ARGS(session),
+ TP_STRUCT__entry(
+ __array(char, name, L2TP_SESSION_NAME_MAX)
+ __field(u32, tid)
+ __field(u32, ptid)
+ __field(u32, sid)
+ __field(u32, psid)
+ __field(enum l2tp_pwtype, pwtype)
+ ),
+ TP_fast_assign(
+ memcpy(__entry->name, session->name, L2TP_SESSION_NAME_MAX);
+ __entry->tid = session->tunnel ? session->tunnel->tunnel_id : 0;
+ __entry->ptid = session->tunnel ? session->tunnel->peer_tunnel_id : 0;
+ __entry->sid = session->session_id;
+ __entry->psid = session->peer_session_id;
+ __entry->pwtype = session->pwtype;
+ ),
+ TP_printk("%s: pseudowire=%s sid=%u psid=%u tid=%u ptid=%u",
+ __entry->name,
+ show_pw_type_name(__entry->pwtype),
+ __entry->sid,
+ __entry->psid,
+ __entry->sid,
+ __entry->psid)
+);
+
+DEFINE_EVENT(session_only_evt, delete_session,
+ TP_PROTO(struct l2tp_session *session),
+ TP_ARGS(session)
+);
+
+DEFINE_EVENT(session_only_evt, free_session,
+ TP_PROTO(struct l2tp_session *session),
+ TP_ARGS(session)
+);
+
+DEFINE_EVENT(session_only_evt, session_seqnum_lns_enable,
+ TP_PROTO(struct l2tp_session *session),
+ TP_ARGS(session)
+);
+
+DEFINE_EVENT(session_only_evt, session_seqnum_lns_disable,
+ TP_PROTO(struct l2tp_session *session),
+ TP_ARGS(session)
+);
+
+DECLARE_EVENT_CLASS(session_seqnum_evt,
+ TP_PROTO(struct l2tp_session *session),
+ TP_ARGS(session),
+ TP_STRUCT__entry(
+ __array(char, name, L2TP_SESSION_NAME_MAX)
+ __field(u32, ns)
+ __field(u32, nr)
+ ),
+ TP_fast_assign(
+ memcpy(__entry->name, session->name, L2TP_SESSION_NAME_MAX);
+ __entry->ns = session->ns;
+ __entry->nr = session->nr;
+ ),
+ TP_printk("%s: ns=%u nr=%u",
+ __entry->name,
+ __entry->ns,
+ __entry->nr)
+);
+
+DEFINE_EVENT(session_seqnum_evt, session_seqnum_update,
+ TP_PROTO(struct l2tp_session *session),
+ TP_ARGS(session)
+);
+
+DEFINE_EVENT(session_seqnum_evt, session_seqnum_reset,
+ TP_PROTO(struct l2tp_session *session),
+ TP_ARGS(session)
+);
+
+DECLARE_EVENT_CLASS(session_pkt_discard_evt,
+ TP_PROTO(struct l2tp_session *session, u32 pkt_ns),
+ TP_ARGS(session, pkt_ns),
+ TP_STRUCT__entry(
+ __array(char, name, L2TP_SESSION_NAME_MAX)
+ __field(u32, pkt_ns)
+ __field(u32, my_nr)
+ __field(u32, reorder_q_len)
+ ),
+ TP_fast_assign(
+ memcpy(__entry->name, session->name, L2TP_SESSION_NAME_MAX);
+ __entry->pkt_ns = pkt_ns,
+ __entry->my_nr = session->nr;
+ __entry->reorder_q_len = skb_queue_len(&session->reorder_q);
+ ),
+ TP_printk("%s: pkt_ns=%u my_nr=%u reorder_q_len=%u",
+ __entry->name,
+ __entry->pkt_ns,
+ __entry->my_nr,
+ __entry->reorder_q_len)
+);
+
+DEFINE_EVENT(session_pkt_discard_evt, session_pkt_expired,
+ TP_PROTO(struct l2tp_session *session, u32 pkt_ns),
+ TP_ARGS(session, pkt_ns)
+);
+
+DEFINE_EVENT(session_pkt_discard_evt, session_pkt_outside_rx_window,
+ TP_PROTO(struct l2tp_session *session, u32 pkt_ns),
+ TP_ARGS(session, pkt_ns)
+);
+
+DEFINE_EVENT(session_pkt_discard_evt, session_pkt_oos,
+ TP_PROTO(struct l2tp_session *session, u32 pkt_ns),
+ TP_ARGS(session, pkt_ns)
+);
+
+#endif /* _TRACE_L2TP_H */
+
+/* This part must be outside protection */
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE trace
+#include <trace/define_trace.h>
diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c
index 313ba97..cd4cf84 100644
--- a/net/mac80211/agg-rx.c
+++ b/net/mac80211/agg-rx.c
@@ -350,7 +350,7 @@ void ___ieee80211_start_rx_ba_session(struct sta_info *sta,
sta->sta.addr, tid);
/* We have no API to update the timeout value in the
* driver so reject the timeout update if the timeout
- * changed. If if did not change, i.e., no real update,
+ * changed. If it did not change, i.e., no real update,
* just reply with success.
*/
rcu_read_lock();
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 87fddd8..b4e39e3 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -826,9 +826,9 @@ static int ieee80211_set_probe_resp(struct ieee80211_sub_if_data *sdata,
memcpy(new->data, resp, resp_len);
if (csa)
- memcpy(new->csa_counter_offsets, csa->counter_offsets_presp,
+ memcpy(new->cntdwn_counter_offsets, csa->counter_offsets_presp,
csa->n_counter_offsets_presp *
- sizeof(new->csa_counter_offsets[0]));
+ sizeof(new->cntdwn_counter_offsets[0]));
rcu_assign_pointer(sdata->u.ap.probe_resp, new);
if (old)
@@ -926,10 +926,10 @@ static int ieee80211_assign_beacon(struct ieee80211_sub_if_data *sdata,
new->tail_len = new_tail_len;
if (csa) {
- new->csa_current_counter = csa->count;
- memcpy(new->csa_counter_offsets, csa->counter_offsets_beacon,
+ new->cntdwn_current_counter = csa->count;
+ memcpy(new->cntdwn_counter_offsets, csa->counter_offsets_beacon,
csa->n_counter_offsets_beacon *
- sizeof(new->csa_counter_offsets[0]));
+ sizeof(new->cntdwn_counter_offsets[0]));
}
/* copy in head */
@@ -3186,9 +3186,9 @@ static int ieee80211_set_csa_beacon(struct ieee80211_sub_if_data *sdata,
break;
if ((params->n_counter_offsets_beacon >
- IEEE80211_MAX_CSA_COUNTERS_NUM) ||
+ IEEE80211_MAX_CNTDWN_COUNTERS_NUM) ||
(params->n_counter_offsets_presp >
- IEEE80211_MAX_CSA_COUNTERS_NUM))
+ IEEE80211_MAX_CNTDWN_COUNTERS_NUM))
return -EINVAL;
csa.counter_offsets_beacon = params->counter_offsets_beacon;
diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index 53632c2..c096396 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -145,9 +145,9 @@ ieee80211_ibss_build_presp(struct ieee80211_sub_if_data *sdata,
*pos++ = csa_settings->block_tx ? 1 : 0;
*pos++ = ieee80211_frequency_to_channel(
csa_settings->chandef.chan->center_freq);
- presp->csa_counter_offsets[0] = (pos - presp->head);
+ presp->cntdwn_counter_offsets[0] = (pos - presp->head);
*pos++ = csa_settings->count;
- presp->csa_current_counter = csa_settings->count;
+ presp->cntdwn_current_counter = csa_settings->count;
}
/* put the remaining rates in WLAN_EID_EXT_SUPP_RATES */
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 0b1eaec..6bf8796 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -259,15 +259,15 @@ struct beacon_data {
u8 *head, *tail;
int head_len, tail_len;
struct ieee80211_meshconf_ie *meshconf;
- u16 csa_counter_offsets[IEEE80211_MAX_CSA_COUNTERS_NUM];
- u8 csa_current_counter;
+ u16 cntdwn_counter_offsets[IEEE80211_MAX_CNTDWN_COUNTERS_NUM];
+ u8 cntdwn_current_counter;
struct rcu_head rcu_head;
};
struct probe_resp {
struct rcu_head rcu_head;
int len;
- u16 csa_counter_offsets[IEEE80211_MAX_CSA_COUNTERS_NUM];
+ u16 cntdwn_counter_offsets[IEEE80211_MAX_CNTDWN_COUNTERS_NUM];
u8 data[];
};
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index b4a2efe..523380a 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -1168,7 +1168,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
WLAN_EXT_CAPA3_MULTI_BSSID_SUPPORT;
}
- local->hw.wiphy->max_num_csa_counters = IEEE80211_MAX_CSA_COUNTERS_NUM;
+ local->hw.wiphy->max_num_csa_counters = IEEE80211_MAX_CNTDWN_COUNTERS_NUM;
/*
* We use the number of queues for feature tests (QoS, HT) internally
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index 7ecd801..ce5825d 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -672,7 +672,7 @@ void ieee80211_mesh_root_setup(struct ieee80211_if_mesh *ifmsh)
* @hdr: 802.11 frame header
* @fc: frame control field
* @meshda: destination address in the mesh
- * @meshsa: source address address in the mesh. Same as TA, as frame is
+ * @meshsa: source address in the mesh. Same as TA, as frame is
* locally originated.
*
* Return the length of the 802.11 (does not include a mesh control header)
@@ -864,8 +864,8 @@ ieee80211_mesh_build_beacon(struct ieee80211_if_mesh *ifmsh)
*pos++ = 0x0;
*pos++ = ieee80211_frequency_to_channel(
csa->settings.chandef.chan->center_freq);
- bcn->csa_current_counter = csa->settings.count;
- bcn->csa_counter_offsets[0] = hdr_len + 6;
+ bcn->cntdwn_current_counter = csa->settings.count;
+ bcn->cntdwn_counter_offsets[0] = hdr_len + 6;
*pos++ = csa->settings.count;
*pos++ = WLAN_EID_CHAN_SWITCH_PARAM;
*pos++ = 6;
diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c
index f470d1a..1ac7b8c 100644
--- a/net/mac80211/offchannel.c
+++ b/net/mac80211/offchannel.c
@@ -916,7 +916,7 @@ int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev,
if (beacon)
for (i = 0; i < params->n_csa_offsets; i++)
data[params->csa_offsets[i]] =
- beacon->csa_current_counter;
+ beacon->cntdwn_current_counter;
rcu_read_unlock();
}
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index dca01d7..d213600 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -4538,14 +4538,14 @@ static int ieee80211_beacon_add_tim(struct ieee80211_sub_if_data *sdata,
return 0;
}
-static void ieee80211_set_csa(struct ieee80211_sub_if_data *sdata,
- struct beacon_data *beacon)
+static void ieee80211_set_beacon_cntdwn(struct ieee80211_sub_if_data *sdata,
+ struct beacon_data *beacon)
{
struct probe_resp *resp;
u8 *beacon_data;
size_t beacon_data_len;
int i;
- u8 count = beacon->csa_current_counter;
+ u8 count = beacon->cntdwn_current_counter;
switch (sdata->vif.type) {
case NL80211_IFTYPE_AP:
@@ -4565,36 +4565,36 @@ static void ieee80211_set_csa(struct ieee80211_sub_if_data *sdata,
}
rcu_read_lock();
- for (i = 0; i < IEEE80211_MAX_CSA_COUNTERS_NUM; ++i) {
+ for (i = 0; i < IEEE80211_MAX_CNTDWN_COUNTERS_NUM; ++i) {
resp = rcu_dereference(sdata->u.ap.probe_resp);
- if (beacon->csa_counter_offsets[i]) {
- if (WARN_ON_ONCE(beacon->csa_counter_offsets[i] >=
+ if (beacon->cntdwn_counter_offsets[i]) {
+ if (WARN_ON_ONCE(beacon->cntdwn_counter_offsets[i] >=
beacon_data_len)) {
rcu_read_unlock();
return;
}
- beacon_data[beacon->csa_counter_offsets[i]] = count;
+ beacon_data[beacon->cntdwn_counter_offsets[i]] = count;
}
if (sdata->vif.type == NL80211_IFTYPE_AP && resp)
- resp->data[resp->csa_counter_offsets[i]] = count;
+ resp->data[resp->cntdwn_counter_offsets[i]] = count;
}
rcu_read_unlock();
}
-static u8 __ieee80211_csa_update_counter(struct beacon_data *beacon)
+static u8 __ieee80211_beacon_update_cntdwn(struct beacon_data *beacon)
{
- beacon->csa_current_counter--;
+ beacon->cntdwn_current_counter--;
/* the counter should never reach 0 */
- WARN_ON_ONCE(!beacon->csa_current_counter);
+ WARN_ON_ONCE(!beacon->cntdwn_current_counter);
- return beacon->csa_current_counter;
+ return beacon->cntdwn_current_counter;
}
-u8 ieee80211_csa_update_counter(struct ieee80211_vif *vif)
+u8 ieee80211_beacon_update_cntdwn(struct ieee80211_vif *vif)
{
struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
struct beacon_data *beacon = NULL;
@@ -4612,15 +4612,15 @@ u8 ieee80211_csa_update_counter(struct ieee80211_vif *vif)
if (!beacon)
goto unlock;
- count = __ieee80211_csa_update_counter(beacon);
+ count = __ieee80211_beacon_update_cntdwn(beacon);
unlock:
rcu_read_unlock();
return count;
}
-EXPORT_SYMBOL(ieee80211_csa_update_counter);
+EXPORT_SYMBOL(ieee80211_beacon_update_cntdwn);
-void ieee80211_csa_set_counter(struct ieee80211_vif *vif, u8 counter)
+void ieee80211_beacon_set_cntdwn(struct ieee80211_vif *vif, u8 counter)
{
struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
struct beacon_data *beacon = NULL;
@@ -4637,15 +4637,15 @@ void ieee80211_csa_set_counter(struct ieee80211_vif *vif, u8 counter)
if (!beacon)
goto unlock;
- if (counter < beacon->csa_current_counter)
- beacon->csa_current_counter = counter;
+ if (counter < beacon->cntdwn_current_counter)
+ beacon->cntdwn_current_counter = counter;
unlock:
rcu_read_unlock();
}
-EXPORT_SYMBOL(ieee80211_csa_set_counter);
+EXPORT_SYMBOL(ieee80211_beacon_set_cntdwn);
-bool ieee80211_csa_is_complete(struct ieee80211_vif *vif)
+bool ieee80211_beacon_cntdwn_is_complete(struct ieee80211_vif *vif)
{
struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
struct beacon_data *beacon = NULL;
@@ -4688,20 +4688,21 @@ bool ieee80211_csa_is_complete(struct ieee80211_vif *vif)
goto out;
}
- if (!beacon->csa_counter_offsets[0])
+ if (!beacon->cntdwn_counter_offsets[0])
goto out;
- if (WARN_ON_ONCE(beacon->csa_counter_offsets[0] > beacon_data_len))
+ if (WARN_ON_ONCE(beacon->cntdwn_counter_offsets[0] > beacon_data_len))
goto out;
- if (beacon_data[beacon->csa_counter_offsets[0]] == 1)
+ if (beacon_data[beacon->cntdwn_counter_offsets[0]] == 1)
ret = true;
+
out:
rcu_read_unlock();
return ret;
}
-EXPORT_SYMBOL(ieee80211_csa_is_complete);
+EXPORT_SYMBOL(ieee80211_beacon_cntdwn_is_complete);
static int ieee80211_beacon_protect(struct sk_buff *skb,
struct ieee80211_local *local,
@@ -4761,11 +4762,11 @@ __ieee80211_beacon_get(struct ieee80211_hw *hw,
beacon = rcu_dereference(ap->beacon);
if (beacon) {
- if (beacon->csa_counter_offsets[0]) {
+ if (beacon->cntdwn_counter_offsets[0]) {
if (!is_template)
- __ieee80211_csa_update_counter(beacon);
+ ieee80211_beacon_update_cntdwn(vif);
- ieee80211_set_csa(sdata, beacon);
+ ieee80211_set_beacon_cntdwn(sdata, beacon);
}
/*
@@ -4809,11 +4810,11 @@ __ieee80211_beacon_get(struct ieee80211_hw *hw,
if (!beacon)
goto out;
- if (beacon->csa_counter_offsets[0]) {
+ if (beacon->cntdwn_counter_offsets[0]) {
if (!is_template)
- __ieee80211_csa_update_counter(beacon);
+ __ieee80211_beacon_update_cntdwn(beacon);
- ieee80211_set_csa(sdata, beacon);
+ ieee80211_set_beacon_cntdwn(sdata, beacon);
}
skb = dev_alloc_skb(local->tx_headroom + beacon->head_len +
@@ -4833,16 +4834,16 @@ __ieee80211_beacon_get(struct ieee80211_hw *hw,
if (!beacon)
goto out;
- if (beacon->csa_counter_offsets[0]) {
+ if (beacon->cntdwn_counter_offsets[0]) {
if (!is_template)
/* TODO: For mesh csa_counter is in TU, so
* decrementing it by one isn't correct, but
* for now we leave it consistent with overall
* mac80211's behavior.
*/
- __ieee80211_csa_update_counter(beacon);
+ __ieee80211_beacon_update_cntdwn(beacon);
- ieee80211_set_csa(sdata, beacon);
+ ieee80211_set_beacon_cntdwn(sdata, beacon);
}
if (ifmsh->sync_ops)
@@ -4874,13 +4875,13 @@ __ieee80211_beacon_get(struct ieee80211_hw *hw,
if (offs && beacon) {
int i;
- for (i = 0; i < IEEE80211_MAX_CSA_COUNTERS_NUM; i++) {
- u16 csa_off = beacon->csa_counter_offsets[i];
+ for (i = 0; i < IEEE80211_MAX_CNTDWN_COUNTERS_NUM; i++) {
+ u16 csa_off = beacon->cntdwn_counter_offsets[i];
if (!csa_off)
continue;
- offs->csa_counter_offs[i] = csa_off_base + csa_off;
+ offs->cntdwn_counter_offs[i] = csa_off_base + csa_off;
}
}
diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index c8820c4..2c208d2 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -384,8 +384,8 @@ mptcp_pm_addr_policy[MPTCP_PM_ADDR_ATTR_MAX + 1] = {
[MPTCP_PM_ADDR_ATTR_FAMILY] = { .type = NLA_U16, },
[MPTCP_PM_ADDR_ATTR_ID] = { .type = NLA_U8, },
[MPTCP_PM_ADDR_ATTR_ADDR4] = { .type = NLA_U32, },
- [MPTCP_PM_ADDR_ATTR_ADDR6] = { .type = NLA_EXACT_LEN,
- .len = sizeof(struct in6_addr), },
+ [MPTCP_PM_ADDR_ATTR_ADDR6] =
+ NLA_POLICY_EXACT_LEN(sizeof(struct in6_addr)),
[MPTCP_PM_ADDR_ATTR_PORT] = { .type = NLA_U16 },
[MPTCP_PM_ADDR_ATTR_FLAGS] = { .type = NLA_U32 },
[MPTCP_PM_ADDR_ATTR_IF_IDX] = { .type = NLA_S32 },
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 365ba96..6831962 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -24,8 +24,6 @@
#include "protocol.h"
#include "mib.h"
-#define MPTCP_SAME_STATE TCP_MAX_STATES
-
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
struct mptcp6_sock {
struct mptcp_sock msk;
diff --git a/net/netlabel/netlabel_domainhash.c b/net/netlabel/netlabel_domainhash.c
index f73a838..dc8c39f 100644
--- a/net/netlabel/netlabel_domainhash.c
+++ b/net/netlabel/netlabel_domainhash.c
@@ -612,9 +612,8 @@ int netlbl_domhsh_remove_entry(struct netlbl_dom_map *entry,
audit_buf = netlbl_audit_start_common(AUDIT_MAC_MAP_DEL, audit_info);
if (audit_buf != NULL) {
audit_log_format(audit_buf,
- " nlbl_domain=%s res=%u",
- entry->domain ? entry->domain : "(default)",
- ret_val == 0 ? 1 : 0);
+ " nlbl_domain=%s res=1",
+ entry->domain ? entry->domain : "(default)");
audit_log_end(audit_buf);
}
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index d2d1448..f9efd2c 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -848,7 +848,7 @@ static int netlink_autobind(struct socket *sock)
*
* Test to see if the opener of the socket we received the message
* from had when the netlink socket was created and the sender of the
- * message has has the capability @cap in the user namespace @user_ns.
+ * message has the capability @cap in the user namespace @user_ns.
*/
bool __netlink_ns_capable(const struct netlink_skb_parms *nsp,
struct user_namespace *user_ns, int cap)
@@ -867,7 +867,7 @@ EXPORT_SYMBOL(__netlink_ns_capable);
*
* Test to see if the opener of the socket we received the message
* from had when the netlink socket was created and the sender of the
- * message has has the capability @cap in the user namespace @user_ns.
+ * message has the capability @cap in the user namespace @user_ns.
*/
bool netlink_ns_capable(const struct sk_buff *skb,
struct user_namespace *user_ns, int cap)
@@ -883,7 +883,7 @@ EXPORT_SYMBOL(netlink_ns_capable);
*
* Test to see if the opener of the socket we received the message
* from had when the netlink socket was created and the sender of the
- * message has has the capability @cap in all user namespaces.
+ * message has the capability @cap in all user namespaces.
*/
bool netlink_capable(const struct sk_buff *skb, int cap)
{
@@ -898,7 +898,7 @@ EXPORT_SYMBOL(netlink_capable);
*
* Test to see if the opener of the socket we received the message
* from had when the netlink socket was created and the sender of the
- * message has has the capability @cap over the network namespace of
+ * message has the capability @cap over the network namespace of
* the socket we received the message from.
*/
bool netlink_net_capable(const struct sk_buff *skb, int cap)
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index 1eb65a7..3a718e3 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -222,7 +222,7 @@ static int genl_validate_assign_mc_groups(struct genl_family *family)
family->mcgrp_offset = first_id;
- /* if still initializing, can't and don't need to to realloc bitmaps */
+ /* if still initializing, can't and don't need to realloc bitmaps */
if (!init_net.genl_sock)
return 0;
diff --git a/net/netlink/policy.c b/net/netlink/policy.c
index 641ffbd..62f977f 100644
--- a/net/netlink/policy.c
+++ b/net/netlink/policy.c
@@ -254,12 +254,6 @@ int netlink_policy_dump_write(struct sk_buff *skb, unsigned long _state)
pt->bitfield32_valid))
goto nla_put_failure;
break;
- case NLA_EXACT_LEN:
- type = NL_ATTR_TYPE_BINARY;
- if (nla_put_u32(skb, NL_POLICY_TYPE_ATTR_MIN_LENGTH, pt->len) ||
- nla_put_u32(skb, NL_POLICY_TYPE_ATTR_MAX_LENGTH, pt->len))
- goto nla_put_failure;
- break;
case NLA_STRING:
case NLA_NUL_STRING:
case NLA_BINARY:
@@ -269,14 +263,27 @@ int netlink_policy_dump_write(struct sk_buff *skb, unsigned long _state)
type = NL_ATTR_TYPE_NUL_STRING;
else
type = NL_ATTR_TYPE_BINARY;
- if (pt->len && nla_put_u32(skb, NL_POLICY_TYPE_ATTR_MAX_LENGTH,
- pt->len))
+
+ if (pt->validation_type == NLA_VALIDATE_RANGE ||
+ pt->validation_type == NLA_VALIDATE_RANGE_WARN_TOO_LONG) {
+ struct netlink_range_validation range;
+
+ nla_get_range_unsigned(pt, &range);
+
+ if (range.min &&
+ nla_put_u32(skb, NL_POLICY_TYPE_ATTR_MIN_LENGTH,
+ range.min))
+ goto nla_put_failure;
+
+ if (range.max < U16_MAX &&
+ nla_put_u32(skb, NL_POLICY_TYPE_ATTR_MAX_LENGTH,
+ range.max))
+ goto nla_put_failure;
+ } else if (pt->len &&
+ nla_put_u32(skb, NL_POLICY_TYPE_ATTR_MAX_LENGTH,
+ pt->len)) {
goto nla_put_failure;
- break;
- case NLA_MIN_LEN:
- type = NL_ATTR_TYPE_BINARY;
- if (nla_put_u32(skb, NL_POLICY_TYPE_ATTR_MIN_LENGTH, pt->len))
- goto nla_put_failure;
+ }
break;
case NLA_FLAG:
type = NL_ATTR_TYPE_FLAG;
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index 2611657..855f2c1 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -9,7 +9,6 @@
#include <linux/in.h>
#include <linux/ip.h>
#include <linux/openvswitch.h>
-#include <linux/netfilter_ipv6.h>
#include <linux/sctp.h>
#include <linux/tcp.h>
#include <linux/udp.h>
@@ -742,7 +741,8 @@ static int set_sctp(struct sk_buff *skb, struct sw_flow_key *flow_key,
return 0;
}
-static int ovs_vport_output(struct net *net, struct sock *sk, struct sk_buff *skb)
+static int ovs_vport_output(struct net *net, struct sock *sk,
+ struct sk_buff *skb)
{
struct ovs_frag_data *data = this_cpu_ptr(&ovs_frag_data_storage);
struct vport *vport = data->vport;
@@ -848,13 +848,9 @@ static void ovs_fragment(struct net *net, struct vport *vport,
ip_do_fragment(net, skb->sk, skb, ovs_vport_output);
refdst_drop(orig_dst);
} else if (key->eth.type == htons(ETH_P_IPV6)) {
- const struct nf_ipv6_ops *v6ops = nf_get_ipv6_ops();
unsigned long orig_dst;
struct rt6_info ovs_rt;
- if (!v6ops)
- goto err;
-
prepare_frag(vport, skb, orig_network_offset,
ovs_key_mac_proto(key));
memset(&ovs_rt, 0, sizeof(ovs_rt));
@@ -866,7 +862,7 @@ static void ovs_fragment(struct net *net, struct vport *vport,
skb_dst_set_noref(skb, &ovs_rt.dst);
IP6CB(skb)->frag_max_size = mru;
- v6ops->fragment(net, skb->sk, skb, ovs_vport_output);
+ ipv6_stub->ipv6_fragment(net, skb->sk, skb, ovs_vport_output);
refdst_drop(orig_dst);
} else {
WARN_ONCE(1, "Failed fragment ->%s: eth=%04x, MRU=%d, MTU=%d.",
@@ -925,7 +921,7 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb,
upcall.mru = OVS_CB(skb)->mru;
for (a = nla_data(attr), rem = nla_len(attr); rem > 0;
- a = nla_next(a, &rem)) {
+ a = nla_next(a, &rem)) {
switch (nla_type(a)) {
case OVS_USERSPACE_ATTR_USERDATA:
upcall.userdata = a;
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 6e47ef7..00df39b 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -182,7 +182,7 @@ struct vport *ovs_lookup_vport(const struct datapath *dp, u16 port_no)
head = vport_hash_bucket(dp, port_no);
hlist_for_each_entry_rcu(vport, head, dp_hash_node,
- lockdep_ovsl_is_held()) {
+ lockdep_ovsl_is_held()) {
if (vport->port_no == port_no)
return vport;
}
@@ -254,7 +254,7 @@ void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key)
error = ovs_execute_actions(dp, skb, sf_acts, key);
if (unlikely(error))
net_dbg_ratelimited("ovs: action execution error on datapath %s: %d\n",
- ovs_dp_name(dp), error);
+ ovs_dp_name(dp), error);
stats_counter = &stats->n_hit;
@@ -302,7 +302,7 @@ int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb,
static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb,
const struct sw_flow_key *key,
const struct dp_upcall_info *upcall_info,
- uint32_t cutlen)
+ uint32_t cutlen)
{
unsigned int gso_type = skb_shinfo(skb)->gso_type;
struct sw_flow_key later_key;
@@ -1080,11 +1080,12 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
}
/* Factor out action copy to avoid "Wframe-larger-than=1024" warning. */
-static noinline_for_stack struct sw_flow_actions *get_flow_actions(struct net *net,
- const struct nlattr *a,
- const struct sw_flow_key *key,
- const struct sw_flow_mask *mask,
- bool log)
+static noinline_for_stack
+struct sw_flow_actions *get_flow_actions(struct net *net,
+ const struct nlattr *a,
+ const struct sw_flow_key *key,
+ const struct sw_flow_mask *mask,
+ bool log)
{
struct sw_flow_actions *acts;
struct sw_flow_key masked_key;
@@ -1383,7 +1384,8 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
ovs_notify(&dp_flow_genl_family, reply, info);
} else {
- netlink_set_err(sock_net(skb->sk)->genl_sock, 0, 0, PTR_ERR(reply));
+ netlink_set_err(sock_net(skb->sk)->genl_sock, 0, 0,
+ PTR_ERR(reply));
}
}
@@ -1513,7 +1515,7 @@ static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb,
int err;
ovs_header = genlmsg_put(skb, portid, seq, &dp_datapath_genl_family,
- flags, cmd);
+ flags, cmd);
if (!ovs_header)
goto error;
@@ -1572,11 +1574,13 @@ static struct datapath *lookup_datapath(struct net *net,
return dp ? dp : ERR_PTR(-ENODEV);
}
-static void ovs_dp_reset_user_features(struct sk_buff *skb, struct genl_info *info)
+static void ovs_dp_reset_user_features(struct sk_buff *skb,
+ struct genl_info *info)
{
struct datapath *dp;
- dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
+ dp = lookup_datapath(sock_net(skb->sk), info->userhdr,
+ info->attrs);
if (IS_ERR(dp))
return;
@@ -2075,7 +2079,7 @@ static unsigned int ovs_get_max_headroom(struct datapath *dp)
for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node,
- lockdep_ovsl_is_held()) {
+ lockdep_ovsl_is_held()) {
dev = vport->dev;
dev_headroom = netdev_get_fwd_headroom(dev);
if (dev_headroom > max_headroom)
@@ -2093,10 +2097,11 @@ static void ovs_update_headroom(struct datapath *dp, unsigned int new_headroom)
int i;
dp->max_headroom = new_headroom;
- for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++)
+ for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node,
- lockdep_ovsl_is_held())
+ lockdep_ovsl_is_held())
netdev_set_rx_headroom(vport->dev, new_headroom);
+ }
}
static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
@@ -2476,13 +2481,19 @@ static int __init dp_register_genl(void)
static int __net_init ovs_init_net(struct net *net)
{
struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
+ int err;
INIT_LIST_HEAD(&ovs_net->dps);
INIT_WORK(&ovs_net->dp_notify_work, ovs_dp_notify_wq);
INIT_DELAYED_WORK(&ovs_net->masks_rebalance, ovs_dp_masks_rebalance);
+
+ err = ovs_ct_init(net);
+ if (err)
+ return err;
+
schedule_delayed_work(&ovs_net->masks_rebalance,
msecs_to_jiffies(DP_MASKS_REBALANCE_INTERVAL));
- return ovs_ct_init(net);
+ return 0;
}
static void __net_exit list_vports_from_net(struct net *net, struct net *dnet,
@@ -2551,7 +2562,8 @@ static int __init dp_init(void)
{
int err;
- BUILD_BUG_ON(sizeof(struct ovs_skb_cb) > sizeof_field(struct sk_buff, cb));
+ BUILD_BUG_ON(sizeof(struct ovs_skb_cb) >
+ sizeof_field(struct sk_buff, cb));
pr_info("Open vSwitch switching datapath\n");
diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c
index e223584..87c286a 100644
--- a/net/openvswitch/flow_table.c
+++ b/net/openvswitch/flow_table.c
@@ -111,12 +111,16 @@ static void flow_free(struct sw_flow *flow)
if (ovs_identifier_is_key(&flow->id))
kfree(flow->id.unmasked_key);
if (flow->sf_acts)
- ovs_nla_free_flow_actions((struct sw_flow_actions __force *)flow->sf_acts);
+ ovs_nla_free_flow_actions((struct sw_flow_actions __force *)
+ flow->sf_acts);
/* We open code this to make sure cpu 0 is always considered */
- for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, &flow->cpu_used_mask))
+ for (cpu = 0; cpu < nr_cpu_ids;
+ cpu = cpumask_next(cpu, &flow->cpu_used_mask)) {
if (flow->stats[cpu])
kmem_cache_free(flow_stats_cache,
(struct sw_flow_stats __force *)flow->stats[cpu]);
+ }
+
kmem_cache_free(flow_cache, flow);
}
@@ -164,7 +168,6 @@ static struct table_instance *table_instance_alloc(int new_size)
ti->n_buckets = new_size;
ti->node_ver = 0;
- ti->keep_flows = false;
get_random_bytes(&ti->hash_seed, sizeof(u32));
return ti;
@@ -192,7 +195,7 @@ static void tbl_mask_array_reset_counters(struct mask_array *ma)
* zero based counter we store the value at reset, and subtract it
* later when processing.
*/
- for (i = 0; i < ma->max; i++) {
+ for (i = 0; i < ma->max; i++) {
ma->masks_usage_zero_cntr[i] = 0;
for_each_possible_cpu(cpu) {
@@ -273,7 +276,7 @@ static int tbl_mask_array_add_mask(struct flow_table *tbl,
if (ma_count >= ma->max) {
err = tbl_mask_array_realloc(tbl, ma->max +
- MASK_ARRAY_SIZE_MIN);
+ MASK_ARRAY_SIZE_MIN);
if (err)
return err;
@@ -288,7 +291,7 @@ static int tbl_mask_array_add_mask(struct flow_table *tbl,
BUG_ON(ovsl_dereference(ma->masks[ma_count]));
rcu_assign_pointer(ma->masks[ma_count], new);
- WRITE_ONCE(ma->count, ma_count +1);
+ WRITE_ONCE(ma->count, ma_count + 1);
return 0;
}
@@ -309,10 +312,10 @@ static void tbl_mask_array_del_mask(struct flow_table *tbl,
return;
found:
- WRITE_ONCE(ma->count, ma_count -1);
+ WRITE_ONCE(ma->count, ma_count - 1);
- rcu_assign_pointer(ma->masks[i], ma->masks[ma_count -1]);
- RCU_INIT_POINTER(ma->masks[ma_count -1], NULL);
+ rcu_assign_pointer(ma->masks[i], ma->masks[ma_count - 1]);
+ RCU_INIT_POINTER(ma->masks[ma_count - 1], NULL);
kfree_rcu(mask, rcu);
@@ -448,26 +451,23 @@ int ovs_flow_tbl_init(struct flow_table *table)
static void flow_tbl_destroy_rcu_cb(struct rcu_head *rcu)
{
- struct table_instance *ti = container_of(rcu, struct table_instance, rcu);
+ struct table_instance *ti;
+ ti = container_of(rcu, struct table_instance, rcu);
__table_instance_destroy(ti);
}
static void table_instance_flow_free(struct flow_table *table,
- struct table_instance *ti,
- struct table_instance *ufid_ti,
- struct sw_flow *flow,
- bool count)
+ struct table_instance *ti,
+ struct table_instance *ufid_ti,
+ struct sw_flow *flow)
{
hlist_del_rcu(&flow->flow_table.node[ti->node_ver]);
- if (count)
- table->count--;
+ table->count--;
if (ovs_identifier_is_ufid(&flow->id)) {
hlist_del_rcu(&flow->ufid_table.node[ufid_ti->node_ver]);
-
- if (count)
- table->ufid_count--;
+ table->ufid_count--;
}
flow_mask_remove(table, flow->mask);
@@ -480,22 +480,25 @@ void table_instance_flow_flush(struct flow_table *table,
{
int i;
- if (ti->keep_flows)
- return;
-
for (i = 0; i < ti->n_buckets; i++) {
- struct sw_flow *flow;
struct hlist_head *head = &ti->buckets[i];
struct hlist_node *n;
+ struct sw_flow *flow;
hlist_for_each_entry_safe(flow, n, head,
flow_table.node[ti->node_ver]) {
table_instance_flow_free(table, ti, ufid_ti,
- flow, false);
+ flow);
ovs_flow_free(flow, true);
}
}
+
+ if (WARN_ON(table->count != 0 ||
+ table->ufid_count != 0)) {
+ table->count = 0;
+ table->ufid_count = 0;
+ }
}
static void table_instance_destroy(struct table_instance *ti,
@@ -596,8 +599,6 @@ static void flow_table_copy_flows(struct table_instance *old,
lockdep_ovsl_is_held())
table_instance_insert(new, flow);
}
-
- old->keep_flows = true;
}
static struct table_instance *table_instance_rehash(struct table_instance *ti,
@@ -632,8 +633,6 @@ int ovs_flow_tbl_flush(struct flow_table *flow_table)
rcu_assign_pointer(flow_table->ti, new_ti);
rcu_assign_pointer(flow_table->ufid_ti, new_ufid_ti);
flow_table->last_rehash = jiffies;
- flow_table->count = 0;
- flow_table->ufid_count = 0;
table_instance_flow_flush(flow_table, old_ti, old_ufid_ti);
table_instance_destroy(old_ti, old_ufid_ti);
@@ -661,7 +660,7 @@ static int flow_key_start(const struct sw_flow_key *key)
return 0;
else
return rounddown(offsetof(struct sw_flow_key, phy),
- sizeof(long));
+ sizeof(long));
}
static bool cmp_key(const struct sw_flow_key *key1,
@@ -673,7 +672,7 @@ static bool cmp_key(const struct sw_flow_key *key1,
long diffs = 0;
int i;
- for (i = key_start; i < key_end; i += sizeof(long))
+ for (i = key_start; i < key_end; i += sizeof(long))
diffs |= *cp1++ ^ *cp2++;
return diffs == 0;
@@ -713,7 +712,7 @@ static struct sw_flow *masked_flow_lookup(struct table_instance *ti,
(*n_mask_hit)++;
hlist_for_each_entry_rcu(flow, head, flow_table.node[ti->node_ver],
- lockdep_ovsl_is_held()) {
+ lockdep_ovsl_is_held()) {
if (flow->mask == mask && flow->flow_table.hash == hash &&
flow_cmp_masked_key(flow, &masked_key, &mask->range))
return flow;
@@ -897,7 +896,8 @@ static bool ovs_flow_cmp_ufid(const struct sw_flow *flow,
return !memcmp(flow->id.ufid, sfid->ufid, sfid->ufid_len);
}
-bool ovs_flow_cmp(const struct sw_flow *flow, const struct sw_flow_match *match)
+bool ovs_flow_cmp(const struct sw_flow *flow,
+ const struct sw_flow_match *match)
{
if (ovs_identifier_is_ufid(&flow->id))
return flow_cmp_masked_key(flow, match->key, &match->range);
@@ -916,7 +916,7 @@ struct sw_flow *ovs_flow_tbl_lookup_ufid(struct flow_table *tbl,
hash = ufid_hash(ufid);
head = find_bucket(ti, hash);
hlist_for_each_entry_rcu(flow, head, ufid_table.node[ti->node_ver],
- lockdep_ovsl_is_held()) {
+ lockdep_ovsl_is_held()) {
if (flow->ufid_table.hash == hash &&
ovs_flow_cmp_ufid(flow, ufid))
return flow;
@@ -950,7 +950,7 @@ void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow)
struct table_instance *ufid_ti = ovsl_dereference(table->ufid_ti);
BUG_ON(table->count == 0);
- table_instance_flow_free(table, ti, ufid_ti, flow, true);
+ table_instance_flow_free(table, ti, ufid_ti, flow);
}
static struct sw_flow_mask *mask_alloc(void)
@@ -1107,7 +1107,7 @@ void ovs_flow_masks_rebalance(struct flow_table *table)
if (!masks_and_count)
return;
- for (i = 0; i < ma->max; i++) {
+ for (i = 0; i < ma->max; i++) {
struct sw_flow_mask *mask;
unsigned int start;
int cpu;
diff --git a/net/openvswitch/flow_table.h b/net/openvswitch/flow_table.h
index 6e7d4ac..d8fb7a3 100644
--- a/net/openvswitch/flow_table.h
+++ b/net/openvswitch/flow_table.h
@@ -53,7 +53,6 @@ struct table_instance {
struct rcu_head rcu;
int node_ver;
u32 hash_seed;
- bool keep_flows;
};
struct flow_table {
diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
index 0d44c5c..82d801f 100644
--- a/net/openvswitch/vport.c
+++ b/net/openvswitch/vport.c
@@ -98,7 +98,7 @@ struct vport *ovs_vport_locate(const struct net *net, const char *name)
struct vport *vport;
hlist_for_each_entry_rcu(vport, bucket, hash_node,
- lockdep_ovsl_is_held())
+ lockdep_ovsl_is_held())
if (!strcmp(name, ovs_vport_name(vport)) &&
net_eq(ovs_dp_get_net(vport->dp), net))
return vport;
@@ -118,7 +118,7 @@ struct vport *ovs_vport_locate(const struct net *net, const char *name)
* vport_free().
*/
struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *ops,
- const struct vport_parms *parms)
+ const struct vport_parms *parms)
{
struct vport *vport;
size_t alloc_size;
@@ -397,7 +397,8 @@ int ovs_vport_get_upcall_portids(const struct vport *vport,
*
* Returns the portid of the target socket. Must be called with rcu_read_lock.
*/
-u32 ovs_vport_find_upcall_portid(const struct vport *vport, struct sk_buff *skb)
+u32 ovs_vport_find_upcall_portid(const struct vport *vport,
+ struct sk_buff *skb)
{
struct vport_portids *ids;
u32 ids_index;
diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
index 2c36191..47f9128 100644
--- a/net/sched/act_ct.c
+++ b/net/sched/act_ct.c
@@ -1039,7 +1039,7 @@ static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a,
static const struct nla_policy ct_policy[TCA_CT_MAX + 1] = {
[TCA_CT_ACTION] = { .type = NLA_U16 },
- [TCA_CT_PARMS] = { .type = NLA_EXACT_LEN, .len = sizeof(struct tc_ct) },
+ [TCA_CT_PARMS] = NLA_POLICY_EXACT_LEN(sizeof(struct tc_ct)),
[TCA_CT_ZONE] = { .type = NLA_U16 },
[TCA_CT_MARK] = { .type = NLA_U32 },
[TCA_CT_MARK_MASK] = { .type = NLA_U32 },
@@ -1049,10 +1049,8 @@ static const struct nla_policy ct_policy[TCA_CT_MAX + 1] = {
.len = 128 / BITS_PER_BYTE },
[TCA_CT_NAT_IPV4_MIN] = { .type = NLA_U32 },
[TCA_CT_NAT_IPV4_MAX] = { .type = NLA_U32 },
- [TCA_CT_NAT_IPV6_MIN] = { .type = NLA_EXACT_LEN,
- .len = sizeof(struct in6_addr) },
- [TCA_CT_NAT_IPV6_MAX] = { .type = NLA_EXACT_LEN,
- .len = sizeof(struct in6_addr) },
+ [TCA_CT_NAT_IPV6_MIN] = NLA_POLICY_EXACT_LEN(sizeof(struct in6_addr)),
+ [TCA_CT_NAT_IPV6_MAX] = NLA_POLICY_EXACT_LEN(sizeof(struct in6_addr)),
[TCA_CT_NAT_PORT_MIN] = { .type = NLA_U16 },
[TCA_CT_NAT_PORT_MAX] = { .type = NLA_U16 },
};
diff --git a/net/sched/act_ctinfo.c b/net/sched/act_ctinfo.c
index b5042f3..e88fa19 100644
--- a/net/sched/act_ctinfo.c
+++ b/net/sched/act_ctinfo.c
@@ -144,9 +144,8 @@ static int tcf_ctinfo_act(struct sk_buff *skb, const struct tc_action *a,
}
static const struct nla_policy ctinfo_policy[TCA_CTINFO_MAX + 1] = {
- [TCA_CTINFO_ACT] = { .type = NLA_EXACT_LEN,
- .len = sizeof(struct
- tc_ctinfo) },
+ [TCA_CTINFO_ACT] =
+ NLA_POLICY_EXACT_LEN(sizeof(struct tc_ctinfo)),
[TCA_CTINFO_ZONE] = { .type = NLA_U16 },
[TCA_CTINFO_PARMS_DSCP_MASK] = { .type = NLA_U32 },
[TCA_CTINFO_PARMS_DSCP_STATEMASK] = { .type = NLA_U32 },
diff --git a/net/sched/act_gate.c b/net/sched/act_gate.c
index 1fb8d42..f5dd4d1 100644
--- a/net/sched/act_gate.c
+++ b/net/sched/act_gate.c
@@ -159,8 +159,8 @@ static const struct nla_policy entry_policy[TCA_GATE_ENTRY_MAX + 1] = {
};
static const struct nla_policy gate_policy[TCA_GATE_MAX + 1] = {
- [TCA_GATE_PARMS] = { .len = sizeof(struct tc_gate),
- .type = NLA_EXACT_LEN },
+ [TCA_GATE_PARMS] =
+ NLA_POLICY_EXACT_LEN(sizeof(struct tc_gate)),
[TCA_GATE_PRIORITY] = { .type = NLA_S32 },
[TCA_GATE_ENTRY_LIST] = { .type = NLA_NESTED },
[TCA_GATE_BASE_TIME] = { .type = NLA_U64 },
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 8d73546..fdb69d4 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -1351,7 +1351,7 @@ static void sctp_select_active_and_retran_path(struct sctp_association *asoc)
}
/* We did not find anything useful for a possible retransmission
- * path; either primary path that we found is the the same as
+ * path; either primary path that we found is the same as
* the current one, or we didn't generally find an active one.
*/
if (trans_sec == NULL)
@@ -1537,7 +1537,7 @@ void sctp_assoc_rwnd_decrease(struct sctp_association *asoc, unsigned int len)
/* If we've reached or overflowed our receive buffer, announce
* a 0 rwnd if rwnd would still be positive. Store the
- * the potential pressure overflow so that the window can be restored
+ * potential pressure overflow so that the window can be restored
* back to original value.
*/
if (rx_count >= asoc->base.sk->sk_rcvbuf)
diff --git a/net/sctp/auth.c b/net/sctp/auth.c
index 9e289c7..589868d 100644
--- a/net/sctp/auth.c
+++ b/net/sctp/auth.c
@@ -445,7 +445,7 @@ struct sctp_shared_key *sctp_auth_get_shkey(
}
/*
- * Initialize all the possible digest transforms that we can use. Right now
+ * Initialize all the possible digest transforms that we can use. Right
* now, the supported digests are SHA1 and SHA256. We do this here once
* because of the restrictiong that transforms may only be allocated in
* user context. This forces us to pre-allocated all possible transforms
@@ -810,7 +810,7 @@ int sctp_auth_ep_set_hmacs(struct sctp_endpoint *ep,
}
/* Set a new shared key on either endpoint or association. If the
- * the key with a same ID already exists, replace the key (remove the
+ * key with a same ID already exists, replace the key (remove the
* old key and add a new one).
*/
int sctp_auth_set_key(struct sctp_endpoint *ep,
diff --git a/net/sctp/bind_addr.c b/net/sctp/bind_addr.c
index 701c5a4..53e5ed7 100644
--- a/net/sctp/bind_addr.c
+++ b/net/sctp/bind_addr.c
@@ -505,7 +505,7 @@ int sctp_in_scope(struct net *net, const union sctp_addr *addr,
return 0;
/*
* For INIT and INIT-ACK address list, let L be the level of
- * of requested destination address, sender and receiver
+ * requested destination address, sender and receiver
* SHOULD include all of its addresses with level greater
* than or equal to L.
*
diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c
index ab6a997..fd4f824 100644
--- a/net/sctp/chunk.c
+++ b/net/sctp/chunk.c
@@ -179,7 +179,7 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc,
__func__, asoc, max_data);
}
- /* If the the peer requested that we authenticate DATA chunks
+ /* If the peer requested that we authenticate DATA chunks
* we need to account for bundling of the AUTH chunks along with
* DATA.
*/
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index d19db22..2583323 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -372,7 +372,7 @@ static int sctp_v4_available(union sctp_addr *addr, struct sctp_sock *sp)
* Level 3 - private addresses.
* Level 4 - global addresses
* For INIT and INIT-ACK address list, let L be the level of
- * of requested destination address, sender and receiver
+ * requested destination address, sender and receiver
* SHOULD include all of its addresses with level greater
* than or equal to L.
*
@@ -1483,10 +1483,10 @@ static __init int sctp_init(void)
num_entries = (1UL << order) * PAGE_SIZE /
sizeof(struct sctp_bind_hashbucket);
- /* And finish by rounding it down to the nearest power of two
- * this wastes some memory of course, but its needed because
+ /* And finish by rounding it down to the nearest power of two.
+ * This wastes some memory of course, but it's needed because
* the hash function operates based on the assumption that
- * that the number of entries is a power of two
+ * the number of entries is a power of two.
*/
sctp_port_hashsize = rounddown_pow_of_two(num_entries);
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index c11c245..9a56ae2 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -1235,7 +1235,7 @@ static struct sctp_chunk *sctp_make_op_error_space(
/* Create an Operation Error chunk of a fixed size, specifically,
* min(asoc->pathmtu, SCTP_DEFAULT_MAXSEGMENT) - overheads.
- * This is a helper function to allocate an error chunk for for those
+ * This is a helper function to allocate an error chunk for those
* invalid parameter codes in which we may not want to report all the
* errors, if the incoming chunk is large. If it can't fit in a single
* packet, we ignore it.
@@ -1780,7 +1780,7 @@ struct sctp_association *sctp_unpack_cookie(
* for init collision case of lost COOKIE ACK.
* If skb has been timestamped, then use the stamp, otherwise
* use current time. This introduces a small possibility that
- * that a cookie may be considered expired, but his would only slow
+ * a cookie may be considered expired, but this would only slow
* down the new association establishment instead of every packet.
*/
if (sock_flag(ep->base.sk, SOCK_TIMESTAMP))
@@ -2319,7 +2319,7 @@ int sctp_process_init(struct sctp_association *asoc, struct sctp_chunk *chunk,
/* This implementation defaults to making the first transport
* added as the primary transport. The source address seems to
- * be a a better choice than any of the embedded addresses.
+ * be a better choice than any of the embedded addresses.
*/
if (!sctp_assoc_add_peer(asoc, peer_addr, gfp, SCTP_ACTIVE))
goto nomem;
diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c
index 1c6c640..407fed4 100644
--- a/net/sctp/ulpqueue.c
+++ b/net/sctp/ulpqueue.c
@@ -740,7 +740,7 @@ static void sctp_ulpq_reasm_drain(struct sctp_ulpq *ulpq)
/* Helper function to gather skbs that have possibly become
- * ordered by an an incoming chunk.
+ * ordered by an incoming chunk.
*/
static void sctp_ulpq_retrieve_ordered(struct sctp_ulpq *ulpq,
struct sctp_ulpevent *event)
diff --git a/net/socket.c b/net/socket.c
index 0c01446..82262e1 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -2628,9 +2628,11 @@ long __sys_recvmsg_sock(struct socket *sock, struct msghdr *msg,
struct user_msghdr __user *umsg,
struct sockaddr __user *uaddr, unsigned int flags)
{
- /* disallow ancillary data requests from this path */
- if (msg->msg_control || msg->msg_controllen)
- return -EINVAL;
+ if (msg->msg_control || msg->msg_controllen) {
+ /* disallow ancillary data reqs unless cmsg is plain data */
+ if (!(sock->ops->flags & PROTO_CMSG_DATA_ONLY))
+ return -EINVAL;
+ }
return ____sys_recvmsg(sock, msg, umsg, uaddr, flags, 0);
}
diff --git a/net/sunrpc/sysctl.c b/net/sunrpc/sysctl.c
index 999eee1..6c86e2a 100644
--- a/net/sunrpc/sysctl.c
+++ b/net/sunrpc/sysctl.c
@@ -108,8 +108,10 @@ proc_dodebug(struct ctl_table *table, int write, void *buffer, size_t *lenp,
left -= (s - tmpbuf);
if (left && !isspace(*s))
return -EINVAL;
- while (left && isspace(*s))
- left--, s++;
+ while (left && isspace(*s)) {
+ left--;
+ s++;
+ }
} else
left = 0;
*(unsigned int *) table->data = value;
diff --git a/net/tipc/core.c b/net/tipc/core.c
index 4f6dc74..37d8695 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -109,6 +109,11 @@ static void __net_exit tipc_exit_net(struct net *net)
{
tipc_detach_loopback(net);
tipc_net_stop(net);
+
+ /* Make sure the tipc_net_finalize_work stopped
+ * before releasing the resources.
+ */
+ flush_scheduled_work();
tipc_bcast_stop(net);
tipc_nametbl_stop(net);
tipc_sk_rht_destroy(net);
diff --git a/net/tipc/link.c b/net/tipc/link.c
index b736255..a2989f2 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -216,11 +216,6 @@ enum {
#define TIPC_BC_RETR_LIM (jiffies + msecs_to_jiffies(10))
#define TIPC_UC_RETR_TIME (jiffies + msecs_to_jiffies(1))
-/*
- * Interval between NACKs when packets arrive out of order
- */
-#define TIPC_NACK_INTV (TIPC_MIN_LINK_WIN * 2)
-
/* Link FSM states:
*/
enum {
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index ebd280e..dd93e8e 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -52,7 +52,6 @@
#define NAGLE_START_MAX 1024
#define CONN_TIMEOUT_DEFAULT 8000 /* default connect timeout = 8s */
#define CONN_PROBING_INTV msecs_to_jiffies(3600000) /* [ms] => 1 h */
-#define TIPC_FWD_MSG 1
#define TIPC_MAX_PORT 0xffffffff
#define TIPC_MIN_PORT 1
#define TIPC_ACK_RATE 4 /* ACK at 1/4 of of rcv window size */
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index bbc52b0..002b085 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -330,12 +330,13 @@ static void tls_sk_proto_close(struct sock *sk, long timeout)
tls_ctx_free(sk, ctx);
}
-static int do_tls_getsockopt_tx(struct sock *sk, char __user *optval,
- int __user *optlen)
+static int do_tls_getsockopt_conf(struct sock *sk, char __user *optval,
+ int __user *optlen, int tx)
{
int rc = 0;
struct tls_context *ctx = tls_get_ctx(sk);
struct tls_crypto_info *crypto_info;
+ struct cipher_context *cctx;
int len;
if (get_user(len, optlen))
@@ -352,7 +353,13 @@ static int do_tls_getsockopt_tx(struct sock *sk, char __user *optval,
}
/* get user crypto info */
- crypto_info = &ctx->crypto_send.info;
+ if (tx) {
+ crypto_info = &ctx->crypto_send.info;
+ cctx = &ctx->tx;
+ } else {
+ crypto_info = &ctx->crypto_recv.info;
+ cctx = &ctx->rx;
+ }
if (!TLS_CRYPTO_INFO_READY(crypto_info)) {
rc = -EBUSY;
@@ -379,9 +386,9 @@ static int do_tls_getsockopt_tx(struct sock *sk, char __user *optval,
}
lock_sock(sk);
memcpy(crypto_info_aes_gcm_128->iv,
- ctx->tx.iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE,
+ cctx->iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE,
TLS_CIPHER_AES_GCM_128_IV_SIZE);
- memcpy(crypto_info_aes_gcm_128->rec_seq, ctx->tx.rec_seq,
+ memcpy(crypto_info_aes_gcm_128->rec_seq, cctx->rec_seq,
TLS_CIPHER_AES_GCM_128_REC_SEQ_SIZE);
release_sock(sk);
if (copy_to_user(optval,
@@ -403,9 +410,9 @@ static int do_tls_getsockopt_tx(struct sock *sk, char __user *optval,
}
lock_sock(sk);
memcpy(crypto_info_aes_gcm_256->iv,
- ctx->tx.iv + TLS_CIPHER_AES_GCM_256_SALT_SIZE,
+ cctx->iv + TLS_CIPHER_AES_GCM_256_SALT_SIZE,
TLS_CIPHER_AES_GCM_256_IV_SIZE);
- memcpy(crypto_info_aes_gcm_256->rec_seq, ctx->tx.rec_seq,
+ memcpy(crypto_info_aes_gcm_256->rec_seq, cctx->rec_seq,
TLS_CIPHER_AES_GCM_256_REC_SEQ_SIZE);
release_sock(sk);
if (copy_to_user(optval,
@@ -429,7 +436,9 @@ static int do_tls_getsockopt(struct sock *sk, int optname,
switch (optname) {
case TLS_TX:
- rc = do_tls_getsockopt_tx(sk, optval, optlen);
+ case TLS_RX:
+ rc = do_tls_getsockopt_conf(sk, optval, optlen,
+ optname == TLS_TX);
break;
default:
rc = -ENOPROTOOPT;
diff --git a/net/wireless/core.h b/net/wireless/core.h
index 67b0389..2ebc2a6 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -466,8 +466,8 @@ extern struct work_struct cfg80211_disconnect_work;
*
* Checks if chandef is usable and we can/need start CAC on such channel.
*
- * Return: Return true if all channels available and at least
- * one channel require CAC (NL80211_DFS_USABLE)
+ * Return: true if all channels available and at least
+ * one channel requires CAC (NL80211_DFS_USABLE)
*/
bool cfg80211_chandef_dfs_usable(struct wiphy *wiphy,
const struct cfg80211_chan_def *chandef);
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 2c9e9a2..52a35e7 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -336,6 +336,13 @@ static const struct nla_policy nl80211_txattr_policy[NL80211_TXRATE_MAX + 1] = {
.len = NL80211_MAX_SUPP_HT_RATES },
[NL80211_TXRATE_VHT] = NLA_POLICY_EXACT_LEN_WARN(sizeof(struct nl80211_txrate_vht)),
[NL80211_TXRATE_GI] = { .type = NLA_U8 },
+ [NL80211_TXRATE_HE] = NLA_POLICY_EXACT_LEN(sizeof(struct nl80211_txrate_he)),
+ [NL80211_TXRATE_HE_GI] = NLA_POLICY_RANGE(NLA_U8,
+ NL80211_RATE_INFO_HE_GI_0_8,
+ NL80211_RATE_INFO_HE_GI_3_2),
+ [NL80211_TXRATE_HE_LTF] = NLA_POLICY_RANGE(NLA_U8,
+ NL80211_RATE_INFO_HE_1XLTF,
+ NL80211_RATE_INFO_HE_4XLTF),
};
static const struct nla_policy
@@ -539,7 +546,10 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
[NL80211_ATTR_BG_SCAN_PERIOD] = { .type = NLA_U16 },
[NL80211_ATTR_WDEV] = { .type = NLA_U64 },
[NL80211_ATTR_USER_REG_HINT_TYPE] = { .type = NLA_U32 },
- [NL80211_ATTR_AUTH_DATA] = { .type = NLA_BINARY, },
+
+ /* need to include at least Auth Transaction and Status Code */
+ [NL80211_ATTR_AUTH_DATA] = NLA_POLICY_MIN_LEN(4),
+
[NL80211_ATTR_VHT_CAPABILITY] = NLA_POLICY_EXACT_LEN_WARN(NL80211_VHT_CAPABILITY_LEN),
[NL80211_ATTR_SCAN_FLAGS] = { .type = NLA_U32 },
[NL80211_ATTR_P2P_CTWINDOW] = NLA_POLICY_MAX(NLA_U8, 127),
@@ -561,23 +571,30 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
[NL80211_ATTR_IE_RIC] = { .type = NLA_BINARY,
.len = IEEE80211_MAX_DATA_LEN },
[NL80211_ATTR_CRIT_PROT_ID] = { .type = NLA_U16 },
- [NL80211_ATTR_MAX_CRIT_PROT_DURATION] = { .type = NLA_U16 },
+ [NL80211_ATTR_MAX_CRIT_PROT_DURATION] =
+ NLA_POLICY_MAX(NLA_U16, NL80211_CRIT_PROTO_MAX_DURATION),
[NL80211_ATTR_PEER_AID] =
NLA_POLICY_RANGE(NLA_U16, 1, IEEE80211_MAX_AID),
[NL80211_ATTR_CH_SWITCH_COUNT] = { .type = NLA_U32 },
[NL80211_ATTR_CH_SWITCH_BLOCK_TX] = { .type = NLA_FLAG },
[NL80211_ATTR_CSA_IES] = { .type = NLA_NESTED },
- [NL80211_ATTR_CSA_C_OFF_BEACON] = { .type = NLA_BINARY },
- [NL80211_ATTR_CSA_C_OFF_PRESP] = { .type = NLA_BINARY },
- [NL80211_ATTR_STA_SUPPORTED_CHANNELS] = { .type = NLA_BINARY },
- [NL80211_ATTR_STA_SUPPORTED_OPER_CLASSES] = { .type = NLA_BINARY },
+ [NL80211_ATTR_CNTDWN_OFFS_BEACON] = { .type = NLA_BINARY },
+ [NL80211_ATTR_CNTDWN_OFFS_PRESP] = { .type = NLA_BINARY },
+ [NL80211_ATTR_STA_SUPPORTED_CHANNELS] = NLA_POLICY_MIN_LEN(2),
+ /*
+ * The value of the Length field of the Supported Operating
+ * Classes element is between 2 and 253.
+ */
+ [NL80211_ATTR_STA_SUPPORTED_OPER_CLASSES] =
+ NLA_POLICY_RANGE(NLA_BINARY, 2, 253),
[NL80211_ATTR_HANDLE_DFS] = { .type = NLA_FLAG },
[NL80211_ATTR_OPMODE_NOTIF] = { .type = NLA_U8 },
[NL80211_ATTR_VENDOR_ID] = { .type = NLA_U32 },
[NL80211_ATTR_VENDOR_SUBCMD] = { .type = NLA_U32 },
[NL80211_ATTR_VENDOR_DATA] = { .type = NLA_BINARY },
- [NL80211_ATTR_QOS_MAP] = { .type = NLA_BINARY,
- .len = IEEE80211_QOS_MAP_LEN_MAX },
+ [NL80211_ATTR_QOS_MAP] = NLA_POLICY_RANGE(NLA_BINARY,
+ IEEE80211_QOS_MAP_LEN_MIN,
+ IEEE80211_QOS_MAP_LEN_MAX),
[NL80211_ATTR_MAC_HINT] = NLA_POLICY_EXACT_LEN_WARN(ETH_ALEN),
[NL80211_ATTR_WIPHY_FREQ_HINT] = { .type = NLA_U32 },
[NL80211_ATTR_TDLS_PEER_CAPABILITY] = { .type = NLA_U32 },
@@ -625,15 +642,17 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
.len = FILS_ERP_MAX_RRK_LEN },
[NL80211_ATTR_FILS_CACHE_ID] = NLA_POLICY_EXACT_LEN_WARN(2),
[NL80211_ATTR_PMK] = { .type = NLA_BINARY, .len = PMK_MAX_LEN },
+ [NL80211_ATTR_PMKR0_NAME] = NLA_POLICY_EXACT_LEN(WLAN_PMK_NAME_LEN),
[NL80211_ATTR_SCHED_SCAN_MULTI] = { .type = NLA_FLAG },
[NL80211_ATTR_EXTERNAL_AUTH_SUPPORT] = { .type = NLA_FLAG },
[NL80211_ATTR_TXQ_LIMIT] = { .type = NLA_U32 },
[NL80211_ATTR_TXQ_MEMORY_LIMIT] = { .type = NLA_U32 },
[NL80211_ATTR_TXQ_QUANTUM] = { .type = NLA_U32 },
- [NL80211_ATTR_HE_CAPABILITY] = { .type = NLA_BINARY,
- .len = NL80211_HE_MAX_CAPABILITY_LEN },
-
+ [NL80211_ATTR_HE_CAPABILITY] =
+ NLA_POLICY_RANGE(NLA_BINARY,
+ NL80211_HE_MIN_CAPABILITY_LEN,
+ NL80211_HE_MAX_CAPABILITY_LEN),
[NL80211_ATTR_FTM_RESPONDER] =
NLA_POLICY_NESTED(nl80211_ftm_responder_policy),
[NL80211_ATTR_TIMEOUT] = NLA_POLICY_MIN(NLA_U32, 1),
@@ -654,10 +673,8 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
[NL80211_ATTR_RECEIVE_MULTICAST] = { .type = NLA_FLAG },
[NL80211_ATTR_WIPHY_FREQ_OFFSET] = NLA_POLICY_RANGE(NLA_U32, 0, 999),
[NL80211_ATTR_SCAN_FREQ_KHZ] = { .type = NLA_NESTED },
- [NL80211_ATTR_HE_6GHZ_CAPABILITY] = {
- .type = NLA_EXACT_LEN,
- .len = sizeof(struct ieee80211_he_6ghz_capa),
- },
+ [NL80211_ATTR_HE_6GHZ_CAPABILITY] =
+ NLA_POLICY_EXACT_LEN(sizeof(struct ieee80211_he_6ghz_capa)),
};
/* policy for the key attributes */
@@ -703,7 +720,7 @@ nl80211_wowlan_tcp_policy[NUM_NL80211_WOWLAN_TCP] = {
[NL80211_WOWLAN_TCP_DST_MAC] = NLA_POLICY_EXACT_LEN_WARN(ETH_ALEN),
[NL80211_WOWLAN_TCP_SRC_PORT] = { .type = NLA_U16 },
[NL80211_WOWLAN_TCP_DST_PORT] = { .type = NLA_U16 },
- [NL80211_WOWLAN_TCP_DATA_PAYLOAD] = { .type = NLA_MIN_LEN, .len = 1 },
+ [NL80211_WOWLAN_TCP_DATA_PAYLOAD] = NLA_POLICY_MIN_LEN(1),
[NL80211_WOWLAN_TCP_DATA_PAYLOAD_SEQ] = {
.len = sizeof(struct nl80211_wowlan_tcp_data_seq)
},
@@ -711,8 +728,8 @@ nl80211_wowlan_tcp_policy[NUM_NL80211_WOWLAN_TCP] = {
.len = sizeof(struct nl80211_wowlan_tcp_data_token)
},
[NL80211_WOWLAN_TCP_DATA_INTERVAL] = { .type = NLA_U32 },
- [NL80211_WOWLAN_TCP_WAKE_PAYLOAD] = { .type = NLA_MIN_LEN, .len = 1 },
- [NL80211_WOWLAN_TCP_WAKE_MASK] = { .type = NLA_MIN_LEN, .len = 1 },
+ [NL80211_WOWLAN_TCP_WAKE_PAYLOAD] = NLA_POLICY_MIN_LEN(1),
+ [NL80211_WOWLAN_TCP_WAKE_MASK] = NLA_POLICY_MIN_LEN(1),
};
#endif /* CONFIG_PM */
@@ -738,7 +755,7 @@ nl80211_rekey_policy[NUM_NL80211_REKEY_DATA] = {
.type = NLA_BINARY,
.len = NL80211_KCK_EXT_LEN
},
- [NL80211_REKEY_DATA_REPLAY_CTR] = NLA_POLICY_EXACT_LEN_WARN(NL80211_REPLAY_CTR_LEN),
+ [NL80211_REKEY_DATA_REPLAY_CTR] = NLA_POLICY_EXACT_LEN(NL80211_REPLAY_CTR_LEN),
[NL80211_REKEY_DATA_AKM] = { .type = NLA_U32 },
};
@@ -778,7 +795,8 @@ nl80211_bss_select_policy[NL80211_BSS_SELECT_ATTR_MAX + 1] = {
/* policy for NAN function attributes */
static const struct nla_policy
nl80211_nan_func_policy[NL80211_NAN_FUNC_ATTR_MAX + 1] = {
- [NL80211_NAN_FUNC_TYPE] = { .type = NLA_U8 },
+ [NL80211_NAN_FUNC_TYPE] =
+ NLA_POLICY_MAX(NLA_U8, NL80211_NAN_FUNC_MAX_TYPE),
[NL80211_NAN_FUNC_SERVICE_ID] = {
.len = NL80211_NAN_FUNC_SERVICE_ID_LEN },
[NL80211_NAN_FUNC_PUBLISH_TYPE] = { .type = NLA_U8 },
@@ -4419,21 +4437,106 @@ static bool vht_set_mcs_mask(struct ieee80211_supported_band *sband,
return true;
}
+static u16 he_mcs_map_to_mcs_mask(u8 he_mcs_map)
+{
+ switch (he_mcs_map) {
+ case IEEE80211_HE_MCS_NOT_SUPPORTED:
+ return 0;
+ case IEEE80211_HE_MCS_SUPPORT_0_7:
+ return 0x00FF;
+ case IEEE80211_HE_MCS_SUPPORT_0_9:
+ return 0x03FF;
+ case IEEE80211_HE_MCS_SUPPORT_0_11:
+ return 0xFFF;
+ default:
+ break;
+ }
+ return 0;
+}
+
+static void he_build_mcs_mask(u16 he_mcs_map,
+ u16 he_mcs_mask[NL80211_HE_NSS_MAX])
+{
+ u8 nss;
+
+ for (nss = 0; nss < NL80211_HE_NSS_MAX; nss++) {
+ he_mcs_mask[nss] = he_mcs_map_to_mcs_mask(he_mcs_map & 0x03);
+ he_mcs_map >>= 2;
+ }
+}
+
+static u16 he_get_txmcsmap(struct genl_info *info,
+ const struct ieee80211_sta_he_cap *he_cap)
+{
+ struct net_device *dev = info->user_ptr[1];
+ struct wireless_dev *wdev = dev->ieee80211_ptr;
+ __le16 tx_mcs;
+
+ switch (wdev->chandef.width) {
+ case NL80211_CHAN_WIDTH_80P80:
+ tx_mcs = he_cap->he_mcs_nss_supp.tx_mcs_80p80;
+ break;
+ case NL80211_CHAN_WIDTH_160:
+ tx_mcs = he_cap->he_mcs_nss_supp.tx_mcs_160;
+ break;
+ default:
+ tx_mcs = he_cap->he_mcs_nss_supp.tx_mcs_80;
+ break;
+ }
+ return le16_to_cpu(tx_mcs);
+}
+
+static bool he_set_mcs_mask(struct genl_info *info,
+ struct wireless_dev *wdev,
+ struct ieee80211_supported_band *sband,
+ struct nl80211_txrate_he *txrate,
+ u16 mcs[NL80211_HE_NSS_MAX])
+{
+ const struct ieee80211_sta_he_cap *he_cap;
+ u16 tx_mcs_mask[NL80211_HE_NSS_MAX] = {};
+ u16 tx_mcs_map = 0;
+ u8 i;
+
+ he_cap = ieee80211_get_he_iftype_cap(sband, wdev->iftype);
+ if (!he_cap)
+ return false;
+
+ memset(mcs, 0, sizeof(u16) * NL80211_HE_NSS_MAX);
+
+ tx_mcs_map = he_get_txmcsmap(info, he_cap);
+
+ /* Build he_mcs_mask from HE capabilities */
+ he_build_mcs_mask(tx_mcs_map, tx_mcs_mask);
+
+ for (i = 0; i < NL80211_HE_NSS_MAX; i++) {
+ if ((tx_mcs_mask[i] & txrate->mcs[i]) == txrate->mcs[i])
+ mcs[i] = txrate->mcs[i];
+ else
+ return false;
+ }
+
+ return true;
+}
+
static int nl80211_parse_tx_bitrate_mask(struct genl_info *info,
struct nlattr *attrs[],
enum nl80211_attrs attr,
- struct cfg80211_bitrate_mask *mask)
+ struct cfg80211_bitrate_mask *mask,
+ struct net_device *dev)
{
struct nlattr *tb[NL80211_TXRATE_MAX + 1];
struct cfg80211_registered_device *rdev = info->user_ptr[0];
+ struct wireless_dev *wdev = dev->ieee80211_ptr;
int rem, i;
struct nlattr *tx_rates;
struct ieee80211_supported_band *sband;
- u16 vht_tx_mcs_map;
+ u16 vht_tx_mcs_map, he_tx_mcs_map;
memset(mask, 0, sizeof(*mask));
/* Default to all rates enabled */
for (i = 0; i < NUM_NL80211_BANDS; i++) {
+ const struct ieee80211_sta_he_cap *he_cap;
+
sband = rdev->wiphy.bands[i];
if (!sband)
@@ -4449,6 +4552,16 @@ static int nl80211_parse_tx_bitrate_mask(struct genl_info *info,
vht_tx_mcs_map = le16_to_cpu(sband->vht_cap.vht_mcs.tx_mcs_map);
vht_build_mcs_mask(vht_tx_mcs_map, mask->control[i].vht_mcs);
+
+ he_cap = ieee80211_get_he_iftype_cap(sband, wdev->iftype);
+ if (!he_cap)
+ continue;
+
+ he_tx_mcs_map = he_get_txmcsmap(info, he_cap);
+ he_build_mcs_mask(he_tx_mcs_map, mask->control[i].he_mcs);
+
+ mask->control[i].he_gi = 0xFF;
+ mask->control[i].he_ltf = 0xFF;
}
/* if no rates are given set it back to the defaults */
@@ -4504,13 +4617,25 @@ static int nl80211_parse_tx_bitrate_mask(struct genl_info *info,
if (mask->control[band].gi > NL80211_TXRATE_FORCE_LGI)
return -EINVAL;
}
+ if (tb[NL80211_TXRATE_HE] &&
+ !he_set_mcs_mask(info, wdev, sband,
+ nla_data(tb[NL80211_TXRATE_HE]),
+ mask->control[band].he_mcs))
+ return -EINVAL;
+ if (tb[NL80211_TXRATE_HE_GI])
+ mask->control[band].he_gi =
+ nla_get_u8(tb[NL80211_TXRATE_HE_GI]);
+ if (tb[NL80211_TXRATE_HE_LTF])
+ mask->control[band].he_ltf =
+ nla_get_u8(tb[NL80211_TXRATE_HE_LTF]);
if (mask->control[band].legacy == 0) {
- /* don't allow empty legacy rates if HT or VHT
+ /* don't allow empty legacy rates if HT, VHT or HE
* are not even supported.
*/
if (!(rdev->wiphy.bands[band]->ht_cap.ht_supported ||
- rdev->wiphy.bands[band]->vht_cap.vht_supported))
+ rdev->wiphy.bands[band]->vht_cap.vht_supported ||
+ ieee80211_get_he_iftype_cap(sband, wdev->iftype)))
return -EINVAL;
for (i = 0; i < IEEE80211_HT_MCS_MASK_LEN; i++)
@@ -4521,6 +4646,10 @@ static int nl80211_parse_tx_bitrate_mask(struct genl_info *info,
if (mask->control[band].vht_mcs[i])
goto out;
+ for (i = 0; i < NL80211_HE_NSS_MAX; i++)
+ if (mask->control[band].he_mcs[i])
+ goto out;
+
/* legacy and mcs rates may not be both empty */
return -EINVAL;
}
@@ -4831,8 +4960,9 @@ static bool nl80211_valid_auth_type(struct cfg80211_registered_device *rdev,
return false;
return true;
case NL80211_CMD_START_AP:
- /* SAE not supported yet */
- if (auth_type == NL80211_AUTHTYPE_SAE)
+ if (!wiphy_ext_feature_isset(&rdev->wiphy,
+ NL80211_EXT_FEATURE_SAE_OFFLOAD_AP) &&
+ auth_type == NL80211_AUTHTYPE_SAE)
return false;
/* FILS not supported yet */
if (auth_type == NL80211_AUTHTYPE_FILS_SK ||
@@ -4896,8 +5026,7 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info)
params.ssid = nla_data(info->attrs[NL80211_ATTR_SSID]);
params.ssid_len =
nla_len(info->attrs[NL80211_ATTR_SSID]);
- if (params.ssid_len == 0 ||
- params.ssid_len > IEEE80211_MAX_SSID_LEN)
+ if (params.ssid_len == 0)
return -EINVAL;
}
@@ -4966,7 +5095,8 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info)
if (info->attrs[NL80211_ATTR_TX_RATES]) {
err = nl80211_parse_tx_bitrate_mask(info, info->attrs,
NL80211_ATTR_TX_RATES,
- ¶ms.beacon_rate);
+ ¶ms.beacon_rate,
+ dev);
if (err)
return err;
@@ -5837,11 +5967,9 @@ static int nl80211_parse_sta_channel_info(struct genl_info *info,
nla_len(info->attrs[NL80211_ATTR_STA_SUPPORTED_CHANNELS]);
/*
* Need to include at least one (first channel, number of
- * channels) tuple for each subband, and must have proper
- * tuples for the rest of the data as well.
+ * channels) tuple for each subband (checked in policy),
+ * and must have proper tuples for the rest of the data as well.
*/
- if (params->supported_channels_len < 2)
- return -EINVAL;
if (params->supported_channels_len % 2)
return -EINVAL;
}
@@ -5851,13 +5979,6 @@ static int nl80211_parse_sta_channel_info(struct genl_info *info,
nla_data(info->attrs[NL80211_ATTR_STA_SUPPORTED_OPER_CLASSES]);
params->supported_oper_classes_len =
nla_len(info->attrs[NL80211_ATTR_STA_SUPPORTED_OPER_CLASSES]);
- /*
- * The value of the Length field of the Supported Operating
- * Classes element is between 2 and 253.
- */
- if (params->supported_oper_classes_len < 2 ||
- params->supported_oper_classes_len > 253)
- return -EINVAL;
}
return 0;
}
@@ -5880,9 +6001,6 @@ static int nl80211_set_station_tdls(struct genl_info *info,
nla_data(info->attrs[NL80211_ATTR_HE_CAPABILITY]);
params->he_capa_len =
nla_len(info->attrs[NL80211_ATTR_HE_CAPABILITY]);
-
- if (params->he_capa_len < NL80211_HE_MIN_CAPABILITY_LEN)
- return -EINVAL;
}
err = nl80211_parse_sta_channel_info(info, params);
@@ -6141,10 +6259,6 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info)
nla_data(info->attrs[NL80211_ATTR_HE_CAPABILITY]);
params.he_capa_len =
nla_len(info->attrs[NL80211_ATTR_HE_CAPABILITY]);
-
- /* max len is validated in nla policy */
- if (params.he_capa_len < NL80211_HE_MIN_CAPABILITY_LEN)
- return -EINVAL;
}
if (info->attrs[NL80211_ATTR_HE_6GHZ_CAPABILITY])
@@ -8416,23 +8530,14 @@ nl80211_parse_sched_scan(struct wiphy *wiphy, struct wireless_dev *wdev,
}
if (ssid) {
- if (nla_len(ssid) > IEEE80211_MAX_SSID_LEN) {
- err = -EINVAL;
- goto out_free;
- }
memcpy(request->match_sets[i].ssid.ssid,
nla_data(ssid), nla_len(ssid));
request->match_sets[i].ssid.ssid_len =
nla_len(ssid);
}
- if (bssid) {
- if (nla_len(bssid) != ETH_ALEN) {
- err = -EINVAL;
- goto out_free;
- }
+ if (bssid)
memcpy(request->match_sets[i].bssid,
nla_data(bssid), ETH_ALEN);
- }
/* special attribute - old implementation w/a */
request->match_sets[i].rssi_thold = default_match_rssi;
@@ -8787,10 +8892,10 @@ static int nl80211_channel_switch(struct sk_buff *skb, struct genl_info *info)
if (err)
return err;
- if (!csa_attrs[NL80211_ATTR_CSA_C_OFF_BEACON])
+ if (!csa_attrs[NL80211_ATTR_CNTDWN_OFFS_BEACON])
return -EINVAL;
- len = nla_len(csa_attrs[NL80211_ATTR_CSA_C_OFF_BEACON]);
+ len = nla_len(csa_attrs[NL80211_ATTR_CNTDWN_OFFS_BEACON]);
if (!len || (len % sizeof(u16)))
return -EINVAL;
@@ -8801,7 +8906,7 @@ static int nl80211_channel_switch(struct sk_buff *skb, struct genl_info *info)
return -EINVAL;
params.counter_offsets_beacon =
- nla_data(csa_attrs[NL80211_ATTR_CSA_C_OFF_BEACON]);
+ nla_data(csa_attrs[NL80211_ATTR_CNTDWN_OFFS_BEACON]);
/* sanity checks - counters should fit and be the same */
for (i = 0; i < params.n_counter_offsets_beacon; i++) {
@@ -8814,8 +8919,8 @@ static int nl80211_channel_switch(struct sk_buff *skb, struct genl_info *info)
return -EINVAL;
}
- if (csa_attrs[NL80211_ATTR_CSA_C_OFF_PRESP]) {
- len = nla_len(csa_attrs[NL80211_ATTR_CSA_C_OFF_PRESP]);
+ if (csa_attrs[NL80211_ATTR_CNTDWN_OFFS_PRESP]) {
+ len = nla_len(csa_attrs[NL80211_ATTR_CNTDWN_OFFS_PRESP]);
if (!len || (len % sizeof(u16)))
return -EINVAL;
@@ -8826,7 +8931,7 @@ static int nl80211_channel_switch(struct sk_buff *skb, struct genl_info *info)
return -EINVAL;
params.counter_offsets_presp =
- nla_data(csa_attrs[NL80211_ATTR_CSA_C_OFF_PRESP]);
+ nla_data(csa_attrs[NL80211_ATTR_CNTDWN_OFFS_PRESP]);
/* sanity checks - counters should fit and be the same */
for (i = 0; i < params.n_counter_offsets_presp; i++) {
@@ -9309,9 +9414,6 @@ static int nl80211_authenticate(struct sk_buff *skb, struct genl_info *info)
return -EINVAL;
auth_data = nla_data(info->attrs[NL80211_ATTR_AUTH_DATA]);
auth_data_len = nla_len(info->attrs[NL80211_ATTR_AUTH_DATA]);
- /* need to include at least Auth Transaction and Status Code */
- if (auth_data_len < 4)
- return -EINVAL;
}
local_state_change = !!info->attrs[NL80211_ATTR_LOCAL_STATE_CHANGE];
@@ -9451,7 +9553,9 @@ static int nl80211_crypto_settings(struct cfg80211_registered_device *rdev,
if (info->attrs[NL80211_ATTR_SAE_PASSWORD]) {
if (!wiphy_ext_feature_isset(&rdev->wiphy,
- NL80211_EXT_FEATURE_SAE_OFFLOAD))
+ NL80211_EXT_FEATURE_SAE_OFFLOAD) &&
+ !wiphy_ext_feature_isset(&rdev->wiphy,
+ NL80211_EXT_FEATURE_SAE_OFFLOAD_AP))
return -EINVAL;
settings->sae_pwd =
nla_data(info->attrs[NL80211_ATTR_SAE_PASSWORD]);
@@ -10798,7 +10902,8 @@ static int nl80211_set_tx_bitrate_mask(struct sk_buff *skb,
return -EOPNOTSUPP;
err = nl80211_parse_tx_bitrate_mask(info, info->attrs,
- NL80211_ATTR_TX_RATES, &mask);
+ NL80211_ATTR_TX_RATES, &mask,
+ dev);
if (err)
return err;
@@ -11406,7 +11511,8 @@ static int nl80211_join_mesh(struct sk_buff *skb, struct genl_info *info)
if (info->attrs[NL80211_ATTR_TX_RATES]) {
err = nl80211_parse_tx_bitrate_mask(info, info->attrs,
NL80211_ATTR_TX_RATES,
- &setup.beacon_rate);
+ &setup.beacon_rate,
+ dev);
if (err)
return err;
@@ -12358,8 +12464,6 @@ static int nl80211_set_rekey_data(struct sk_buff *skb, struct genl_info *info)
if (!tb[NL80211_REKEY_DATA_REPLAY_CTR] || !tb[NL80211_REKEY_DATA_KEK] ||
!tb[NL80211_REKEY_DATA_KCK])
return -EINVAL;
- if (nla_len(tb[NL80211_REKEY_DATA_REPLAY_CTR]) != NL80211_REPLAY_CTR_LEN)
- return -ERANGE;
if (nla_len(tb[NL80211_REKEY_DATA_KEK]) != NL80211_KEK_LEN &&
!(rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_EXT_KEK_KCK &&
nla_len(tb[NL80211_REKEY_DATA_KEK]) == NL80211_KEK_EXT_LEN))
@@ -12684,8 +12788,7 @@ static int nl80211_nan_add_func(struct sk_buff *skb,
func->cookie = cfg80211_assign_cookie(rdev);
- if (!tb[NL80211_NAN_FUNC_TYPE] ||
- nla_get_u8(tb[NL80211_NAN_FUNC_TYPE]) > NL80211_NAN_FUNC_MAX_TYPE) {
+ if (!tb[NL80211_NAN_FUNC_TYPE]) {
err = -EINVAL;
goto out;
}
@@ -13175,9 +13278,6 @@ static int nl80211_crit_protocol_start(struct sk_buff *skb,
duration =
nla_get_u16(info->attrs[NL80211_ATTR_MAX_CRIT_PROT_DURATION]);
- if (duration > NL80211_CRIT_PROTO_MAX_DURATION)
- return -ERANGE;
-
ret = rdev_crit_proto_start(rdev, wdev, proto, duration);
if (!ret)
rdev->crit_proto_nlportid = info->snd_portid;
@@ -13562,8 +13662,7 @@ static int nl80211_set_qos_map(struct sk_buff *skb,
pos = nla_data(info->attrs[NL80211_ATTR_QOS_MAP]);
len = nla_len(info->attrs[NL80211_ATTR_QOS_MAP]);
- if (len % 2 || len < IEEE80211_QOS_MAP_LEN_MIN ||
- len > IEEE80211_QOS_MAP_LEN_MAX)
+ if (len % 2)
return -EINVAL;
qos_map = kzalloc(sizeof(struct cfg80211_qos_map), GFP_KERNEL);
@@ -13831,17 +13930,9 @@ static int nl80211_set_pmk(struct sk_buff *skb, struct genl_info *info)
goto out;
}
- if (info->attrs[NL80211_ATTR_PMKR0_NAME]) {
- int r0_name_len = nla_len(info->attrs[NL80211_ATTR_PMKR0_NAME]);
-
- if (r0_name_len != WLAN_PMK_NAME_LEN) {
- ret = -EINVAL;
- goto out;
- }
-
+ if (info->attrs[NL80211_ATTR_PMKR0_NAME])
pmk_conf.pmk_r0_name =
nla_data(info->attrs[NL80211_ATTR_PMKR0_NAME]);
- }
ret = rdev_set_pmk(rdev, dev, &pmk_conf);
out:
@@ -13900,8 +13991,7 @@ static int nl80211_external_auth(struct sk_buff *skb, struct genl_info *info)
if (info->attrs[NL80211_ATTR_SSID]) {
params.ssid.ssid_len = nla_len(info->attrs[NL80211_ATTR_SSID]);
- if (params.ssid.ssid_len == 0 ||
- params.ssid.ssid_len > IEEE80211_MAX_SSID_LEN)
+ if (params.ssid.ssid_len == 0)
return -EINVAL;
memcpy(params.ssid.ssid,
nla_data(info->attrs[NL80211_ATTR_SSID]),
@@ -14202,7 +14292,7 @@ static int parse_tid_conf(struct cfg80211_registered_device *rdev,
if (tid_conf->txrate_type != NL80211_TX_RATE_AUTOMATIC) {
attr = NL80211_TID_CONFIG_ATTR_TX_RATE;
err = nl80211_parse_tx_bitrate_mask(info, attrs, attr,
- &tid_conf->txrate_mask);
+ &tid_conf->txrate_mask, dev);
if (err)
return err;
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index d8a90d3..0ab7808 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -1594,7 +1594,7 @@ freq_reg_info_regd(u32 center_freq,
/*
* We only need to know if one frequency rule was
- * was in center_freq's band, that's enough, so lets
+ * in center_freq's band, that's enough, so let's
* not overwrite it once found
*/
if (!band_rule_found)
@@ -1691,57 +1691,18 @@ static uint32_t reg_rule_to_chan_bw_flags(const struct ieee80211_regdomain *regd
return bw_flags;
}
-/*
- * Note that right now we assume the desired channel bandwidth
- * is always 20 MHz for each individual channel (HT40 uses 20 MHz
- * per channel, the primary and the extension channel).
- */
-static void handle_channel(struct wiphy *wiphy,
- enum nl80211_reg_initiator initiator,
- struct ieee80211_channel *chan)
+static void handle_channel_single_rule(struct wiphy *wiphy,
+ enum nl80211_reg_initiator initiator,
+ struct ieee80211_channel *chan,
+ u32 flags,
+ struct regulatory_request *lr,
+ struct wiphy *request_wiphy,
+ const struct ieee80211_reg_rule *reg_rule)
{
- u32 flags, bw_flags = 0;
- const struct ieee80211_reg_rule *reg_rule = NULL;
+ u32 bw_flags = 0;
const struct ieee80211_power_rule *power_rule = NULL;
- struct wiphy *request_wiphy = NULL;
- struct regulatory_request *lr = get_last_request();
const struct ieee80211_regdomain *regd;
- request_wiphy = wiphy_idx_to_wiphy(lr->wiphy_idx);
-
- flags = chan->orig_flags;
-
- reg_rule = freq_reg_info(wiphy, ieee80211_channel_to_khz(chan));
- if (IS_ERR(reg_rule)) {
- /*
- * We will disable all channels that do not match our
- * received regulatory rule unless the hint is coming
- * from a Country IE and the Country IE had no information
- * about a band. The IEEE 802.11 spec allows for an AP
- * to send only a subset of the regulatory rules allowed,
- * so an AP in the US that only supports 2.4 GHz may only send
- * a country IE with information for the 2.4 GHz band
- * while 5 GHz is still supported.
- */
- if (initiator == NL80211_REGDOM_SET_BY_COUNTRY_IE &&
- PTR_ERR(reg_rule) == -ERANGE)
- return;
-
- if (lr->initiator == NL80211_REGDOM_SET_BY_DRIVER &&
- request_wiphy && request_wiphy == wiphy &&
- request_wiphy->regulatory_flags & REGULATORY_STRICT_REG) {
- pr_debug("Disabling freq %d.%03d MHz for good\n",
- chan->center_freq, chan->freq_offset);
- chan->orig_flags |= IEEE80211_CHAN_DISABLED;
- chan->flags = chan->orig_flags;
- } else {
- pr_debug("Disabling freq %d.%03d MHz\n",
- chan->center_freq, chan->freq_offset);
- chan->flags |= IEEE80211_CHAN_DISABLED;
- }
- return;
- }
-
regd = reg_get_regdomain(wiphy);
power_rule = ®_rule->power_rule;
@@ -1803,6 +1764,204 @@ static void handle_channel(struct wiphy *wiphy,
chan->max_power = chan->max_reg_power;
}
+static void handle_channel_adjacent_rules(struct wiphy *wiphy,
+ enum nl80211_reg_initiator initiator,
+ struct ieee80211_channel *chan,
+ u32 flags,
+ struct regulatory_request *lr,
+ struct wiphy *request_wiphy,
+ const struct ieee80211_reg_rule *rrule1,
+ const struct ieee80211_reg_rule *rrule2,
+ struct ieee80211_freq_range *comb_range)
+{
+ u32 bw_flags1 = 0;
+ u32 bw_flags2 = 0;
+ const struct ieee80211_power_rule *power_rule1 = NULL;
+ const struct ieee80211_power_rule *power_rule2 = NULL;
+ const struct ieee80211_regdomain *regd;
+
+ regd = reg_get_regdomain(wiphy);
+
+ power_rule1 = &rrule1->power_rule;
+ power_rule2 = &rrule2->power_rule;
+ bw_flags1 = reg_rule_to_chan_bw_flags(regd, rrule1, chan);
+ bw_flags2 = reg_rule_to_chan_bw_flags(regd, rrule2, chan);
+
+ if (lr->initiator == NL80211_REGDOM_SET_BY_DRIVER &&
+ request_wiphy && request_wiphy == wiphy &&
+ request_wiphy->regulatory_flags & REGULATORY_STRICT_REG) {
+ /* This guarantees the driver's requested regulatory domain
+ * will always be used as a base for further regulatory
+ * settings
+ */
+ chan->flags =
+ map_regdom_flags(rrule1->flags) |
+ map_regdom_flags(rrule2->flags) |
+ bw_flags1 |
+ bw_flags2;
+ chan->orig_flags = chan->flags;
+ chan->max_antenna_gain =
+ min_t(int, MBI_TO_DBI(power_rule1->max_antenna_gain),
+ MBI_TO_DBI(power_rule2->max_antenna_gain));
+ chan->orig_mag = chan->max_antenna_gain;
+ chan->max_reg_power =
+ min_t(int, MBM_TO_DBM(power_rule1->max_eirp),
+ MBM_TO_DBM(power_rule2->max_eirp));
+ chan->max_power = chan->max_reg_power;
+ chan->orig_mpwr = chan->max_reg_power;
+
+ if (chan->flags & IEEE80211_CHAN_RADAR) {
+ chan->dfs_cac_ms = IEEE80211_DFS_MIN_CAC_TIME_MS;
+ if (rrule1->dfs_cac_ms || rrule2->dfs_cac_ms)
+ chan->dfs_cac_ms = max_t(unsigned int,
+ rrule1->dfs_cac_ms,
+ rrule2->dfs_cac_ms);
+ }
+
+ return;
+ }
+
+ chan->dfs_state = NL80211_DFS_USABLE;
+ chan->dfs_state_entered = jiffies;
+
+ chan->beacon_found = false;
+ chan->flags = flags | bw_flags1 | bw_flags2 |
+ map_regdom_flags(rrule1->flags) |
+ map_regdom_flags(rrule2->flags);
+
+ /* reg_rule_to_chan_bw_flags may forbids 10 and forbids 20 MHz
+ * (otherwise no adj. rule case), recheck therefore
+ */
+ if (cfg80211_does_bw_fit_range(comb_range,
+ ieee80211_channel_to_khz(chan),
+ MHZ_TO_KHZ(10)))
+ chan->flags &= ~IEEE80211_CHAN_NO_10MHZ;
+ if (cfg80211_does_bw_fit_range(comb_range,
+ ieee80211_channel_to_khz(chan),
+ MHZ_TO_KHZ(20)))
+ chan->flags &= ~IEEE80211_CHAN_NO_20MHZ;
+
+ chan->max_antenna_gain =
+ min_t(int, chan->orig_mag,
+ min_t(int,
+ MBI_TO_DBI(power_rule1->max_antenna_gain),
+ MBI_TO_DBI(power_rule2->max_antenna_gain)));
+ chan->max_reg_power = min_t(int,
+ MBM_TO_DBM(power_rule1->max_eirp),
+ MBM_TO_DBM(power_rule2->max_eirp));
+
+ if (chan->flags & IEEE80211_CHAN_RADAR) {
+ if (rrule1->dfs_cac_ms || rrule2->dfs_cac_ms)
+ chan->dfs_cac_ms = max_t(unsigned int,
+ rrule1->dfs_cac_ms,
+ rrule2->dfs_cac_ms);
+ else
+ chan->dfs_cac_ms = IEEE80211_DFS_MIN_CAC_TIME_MS;
+ }
+
+ if (chan->orig_mpwr) {
+ /* Devices that use REGULATORY_COUNTRY_IE_FOLLOW_POWER
+ * will always follow the passed country IE power settings.
+ */
+ if (initiator == NL80211_REGDOM_SET_BY_COUNTRY_IE &&
+ wiphy->regulatory_flags & REGULATORY_COUNTRY_IE_FOLLOW_POWER)
+ chan->max_power = chan->max_reg_power;
+ else
+ chan->max_power = min(chan->orig_mpwr,
+ chan->max_reg_power);
+ } else {
+ chan->max_power = chan->max_reg_power;
+ }
+}
+
+/* Note that right now we assume the desired channel bandwidth
+ * is always 20 MHz for each individual channel (HT40 uses 20 MHz
+ * per channel, the primary and the extension channel).
+ */
+static void handle_channel(struct wiphy *wiphy,
+ enum nl80211_reg_initiator initiator,
+ struct ieee80211_channel *chan)
+{
+ const u32 orig_chan_freq = ieee80211_channel_to_khz(chan);
+ struct regulatory_request *lr = get_last_request();
+ struct wiphy *request_wiphy = wiphy_idx_to_wiphy(lr->wiphy_idx);
+ const struct ieee80211_reg_rule *rrule = NULL;
+ const struct ieee80211_reg_rule *rrule1 = NULL;
+ const struct ieee80211_reg_rule *rrule2 = NULL;
+
+ u32 flags = chan->orig_flags;
+
+ rrule = freq_reg_info(wiphy, orig_chan_freq);
+ if (IS_ERR(rrule)) {
+ /* check for adjacent match, therefore get rules for
+ * chan - 20 MHz and chan + 20 MHz and test
+ * if reg rules are adjacent
+ */
+ rrule1 = freq_reg_info(wiphy,
+ orig_chan_freq - MHZ_TO_KHZ(20));
+ rrule2 = freq_reg_info(wiphy,
+ orig_chan_freq + MHZ_TO_KHZ(20));
+ if (!IS_ERR(rrule1) && !IS_ERR(rrule2)) {
+ struct ieee80211_freq_range comb_range;
+
+ if (rrule1->freq_range.end_freq_khz !=
+ rrule2->freq_range.start_freq_khz)
+ goto disable_chan;
+
+ comb_range.start_freq_khz =
+ rrule1->freq_range.start_freq_khz;
+ comb_range.end_freq_khz =
+ rrule2->freq_range.end_freq_khz;
+ comb_range.max_bandwidth_khz =
+ min_t(u32,
+ rrule1->freq_range.max_bandwidth_khz,
+ rrule2->freq_range.max_bandwidth_khz);
+
+ if (!cfg80211_does_bw_fit_range(&comb_range,
+ orig_chan_freq,
+ MHZ_TO_KHZ(20)))
+ goto disable_chan;
+
+ handle_channel_adjacent_rules(wiphy, initiator, chan,
+ flags, lr, request_wiphy,
+ rrule1, rrule2,
+ &comb_range);
+ return;
+ }
+
+disable_chan:
+ /* We will disable all channels that do not match our
+ * received regulatory rule unless the hint is coming
+ * from a Country IE and the Country IE had no information
+ * about a band. The IEEE 802.11 spec allows for an AP
+ * to send only a subset of the regulatory rules allowed,
+ * so an AP in the US that only supports 2.4 GHz may only send
+ * a country IE with information for the 2.4 GHz band
+ * while 5 GHz is still supported.
+ */
+ if (initiator == NL80211_REGDOM_SET_BY_COUNTRY_IE &&
+ PTR_ERR(rrule) == -ERANGE)
+ return;
+
+ if (lr->initiator == NL80211_REGDOM_SET_BY_DRIVER &&
+ request_wiphy && request_wiphy == wiphy &&
+ request_wiphy->regulatory_flags & REGULATORY_STRICT_REG) {
+ pr_debug("Disabling freq %d.%03d MHz for good\n",
+ chan->center_freq, chan->freq_offset);
+ chan->orig_flags |= IEEE80211_CHAN_DISABLED;
+ chan->flags = chan->orig_flags;
+ } else {
+ pr_debug("Disabling freq %d.%03d MHz\n",
+ chan->center_freq, chan->freq_offset);
+ chan->flags |= IEEE80211_CHAN_DISABLED;
+ }
+ return;
+ }
+
+ handle_channel_single_rule(wiphy, initiator, chan, flags, lr,
+ request_wiphy, rrule);
+}
+
static void handle_band(struct wiphy *wiphy,
enum nl80211_reg_initiator initiator,
struct ieee80211_supported_band *sband)
@@ -3170,7 +3329,7 @@ static void restore_custom_reg_settings(struct wiphy *wiphy)
* - send a user regulatory hint if applicable
*
* Device drivers that send a regulatory hint for a specific country
- * keep their own regulatory domain on wiphy->regd so that does does
+ * keep their own regulatory domain on wiphy->regd so that does
* not need to be remembered.
*/
static void restore_regulatory_settings(bool reset_user, bool cached)
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index 04f2d19..84fc8ab 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -55,7 +55,7 @@
*
* Also note that the hidden_beacon_bss pointer is only relevant
* if the driver uses something other than the IEs, e.g. private
- * data stored stored in the BSS struct, since the beacon IEs are
+ * data stored in the BSS struct, since the beacon IEs are
* also linked into the probe response struct.
*/
@@ -1488,7 +1488,7 @@ static const struct element
ielen - (mbssid_end - ie));
/*
- * If is is not the last subelement in current MBSSID IE or there isn't
+ * If it is not the last subelement in current MBSSID IE or there isn't
* a next MBSSID IE - profile is complete.
*/
if ((sub_elem->data + sub_elem->datalen < mbssid_end - 1) ||
diff --git a/net/wireless/sme.c b/net/wireless/sme.c
index 079ce32..38df713 100644
--- a/net/wireless/sme.c
+++ b/net/wireless/sme.c
@@ -24,7 +24,7 @@
/*
* Software SME in cfg80211, using auth/assoc/deauth calls to the
- * driver. This is is for implementing nl80211's connect/disconnect
+ * driver. This is for implementing nl80211's connect/disconnect
* and wireless extensions (if configured.)
*/
diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c
index 4d2160c..78f2927 100644
--- a/net/wireless/wext-compat.c
+++ b/net/wireless/wext-compat.c
@@ -497,7 +497,7 @@ static int __cfg80211_set_encryption(struct cfg80211_registered_device *rdev,
/*
* We only need to store WEP keys, since they're the only keys that
- * can be be set before a connection is established and persist after
+ * can be set before a connection is established and persist after
* disconnecting.
*/
if (!addr && (params->cipher == WLAN_CIPHER_SUITE_WEP40 ||
diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c
index e97db37..a7227b4 100644
--- a/net/xdp/xdp_umem.c
+++ b/net/xdp/xdp_umem.c
@@ -23,162 +23,6 @@
static DEFINE_IDA(umem_ida);
-void xdp_add_sk_umem(struct xdp_umem *umem, struct xdp_sock *xs)
-{
- unsigned long flags;
-
- if (!xs->tx)
- return;
-
- spin_lock_irqsave(&umem->xsk_tx_list_lock, flags);
- list_add_rcu(&xs->list, &umem->xsk_tx_list);
- spin_unlock_irqrestore(&umem->xsk_tx_list_lock, flags);
-}
-
-void xdp_del_sk_umem(struct xdp_umem *umem, struct xdp_sock *xs)
-{
- unsigned long flags;
-
- if (!xs->tx)
- return;
-
- spin_lock_irqsave(&umem->xsk_tx_list_lock, flags);
- list_del_rcu(&xs->list);
- spin_unlock_irqrestore(&umem->xsk_tx_list_lock, flags);
-}
-
-/* The umem is stored both in the _rx struct and the _tx struct as we do
- * not know if the device has more tx queues than rx, or the opposite.
- * This might also change during run time.
- */
-static int xdp_reg_umem_at_qid(struct net_device *dev, struct xdp_umem *umem,
- u16 queue_id)
-{
- if (queue_id >= max_t(unsigned int,
- dev->real_num_rx_queues,
- dev->real_num_tx_queues))
- return -EINVAL;
-
- if (queue_id < dev->real_num_rx_queues)
- dev->_rx[queue_id].umem = umem;
- if (queue_id < dev->real_num_tx_queues)
- dev->_tx[queue_id].umem = umem;
-
- return 0;
-}
-
-struct xdp_umem *xdp_get_umem_from_qid(struct net_device *dev,
- u16 queue_id)
-{
- if (queue_id < dev->real_num_rx_queues)
- return dev->_rx[queue_id].umem;
- if (queue_id < dev->real_num_tx_queues)
- return dev->_tx[queue_id].umem;
-
- return NULL;
-}
-EXPORT_SYMBOL(xdp_get_umem_from_qid);
-
-static void xdp_clear_umem_at_qid(struct net_device *dev, u16 queue_id)
-{
- if (queue_id < dev->real_num_rx_queues)
- dev->_rx[queue_id].umem = NULL;
- if (queue_id < dev->real_num_tx_queues)
- dev->_tx[queue_id].umem = NULL;
-}
-
-int xdp_umem_assign_dev(struct xdp_umem *umem, struct net_device *dev,
- u16 queue_id, u16 flags)
-{
- bool force_zc, force_copy;
- struct netdev_bpf bpf;
- int err = 0;
-
- ASSERT_RTNL();
-
- force_zc = flags & XDP_ZEROCOPY;
- force_copy = flags & XDP_COPY;
-
- if (force_zc && force_copy)
- return -EINVAL;
-
- if (xdp_get_umem_from_qid(dev, queue_id))
- return -EBUSY;
-
- err = xdp_reg_umem_at_qid(dev, umem, queue_id);
- if (err)
- return err;
-
- umem->dev = dev;
- umem->queue_id = queue_id;
-
- if (flags & XDP_USE_NEED_WAKEUP) {
- umem->flags |= XDP_UMEM_USES_NEED_WAKEUP;
- /* Tx needs to be explicitly woken up the first time.
- * Also for supporting drivers that do not implement this
- * feature. They will always have to call sendto().
- */
- xsk_set_tx_need_wakeup(umem);
- }
-
- dev_hold(dev);
-
- if (force_copy)
- /* For copy-mode, we are done. */
- return 0;
-
- if (!dev->netdev_ops->ndo_bpf || !dev->netdev_ops->ndo_xsk_wakeup) {
- err = -EOPNOTSUPP;
- goto err_unreg_umem;
- }
-
- bpf.command = XDP_SETUP_XSK_UMEM;
- bpf.xsk.umem = umem;
- bpf.xsk.queue_id = queue_id;
-
- err = dev->netdev_ops->ndo_bpf(dev, &bpf);
- if (err)
- goto err_unreg_umem;
-
- umem->zc = true;
- return 0;
-
-err_unreg_umem:
- if (!force_zc)
- err = 0; /* fallback to copy mode */
- if (err)
- xdp_clear_umem_at_qid(dev, queue_id);
- return err;
-}
-
-void xdp_umem_clear_dev(struct xdp_umem *umem)
-{
- struct netdev_bpf bpf;
- int err;
-
- ASSERT_RTNL();
-
- if (!umem->dev)
- return;
-
- if (umem->zc) {
- bpf.command = XDP_SETUP_XSK_UMEM;
- bpf.xsk.umem = NULL;
- bpf.xsk.queue_id = umem->queue_id;
-
- err = umem->dev->netdev_ops->ndo_bpf(umem->dev, &bpf);
-
- if (err)
- WARN(1, "failed to disable umem!\n");
- }
-
- xdp_clear_umem_at_qid(umem->dev, umem->queue_id);
-
- dev_put(umem->dev);
- umem->dev = NULL;
- umem->zc = false;
-}
-
static void xdp_umem_unpin_pages(struct xdp_umem *umem)
{
unpin_user_pages_dirty_lock(umem->pgs, umem->npgs, true);
@@ -195,38 +39,33 @@ static void xdp_umem_unaccount_pages(struct xdp_umem *umem)
}
}
+static void xdp_umem_addr_unmap(struct xdp_umem *umem)
+{
+ vunmap(umem->addrs);
+ umem->addrs = NULL;
+}
+
+static int xdp_umem_addr_map(struct xdp_umem *umem, struct page **pages,
+ u32 nr_pages)
+{
+ umem->addrs = vmap(pages, nr_pages, VM_MAP, PAGE_KERNEL);
+ if (!umem->addrs)
+ return -ENOMEM;
+ return 0;
+}
+
static void xdp_umem_release(struct xdp_umem *umem)
{
- rtnl_lock();
- xdp_umem_clear_dev(umem);
- rtnl_unlock();
-
+ umem->zc = false;
ida_simple_remove(&umem_ida, umem->id);
- if (umem->fq) {
- xskq_destroy(umem->fq);
- umem->fq = NULL;
- }
-
- if (umem->cq) {
- xskq_destroy(umem->cq);
- umem->cq = NULL;
- }
-
- xp_destroy(umem->pool);
+ xdp_umem_addr_unmap(umem);
xdp_umem_unpin_pages(umem);
xdp_umem_unaccount_pages(umem);
kfree(umem);
}
-static void xdp_umem_release_deferred(struct work_struct *work)
-{
- struct xdp_umem *umem = container_of(work, struct xdp_umem, work);
-
- xdp_umem_release(umem);
-}
-
void xdp_get_umem(struct xdp_umem *umem)
{
refcount_inc(&umem->users);
@@ -237,10 +76,8 @@ void xdp_put_umem(struct xdp_umem *umem)
if (!umem)
return;
- if (refcount_dec_and_test(&umem->users)) {
- INIT_WORK(&umem->work, xdp_umem_release_deferred);
- schedule_work(&umem->work);
- }
+ if (refcount_dec_and_test(&umem->users))
+ xdp_umem_release(umem);
}
static int xdp_umem_pin_pages(struct xdp_umem *umem, unsigned long address)
@@ -319,8 +156,7 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
return -EINVAL;
}
- if (mr->flags & ~(XDP_UMEM_UNALIGNED_CHUNK_FLAG |
- XDP_UMEM_USES_NEED_WAKEUP))
+ if (mr->flags & ~XDP_UMEM_UNALIGNED_CHUNK_FLAG)
return -EINVAL;
if (!unaligned_chunks && !is_power_of_2(chunk_size))
@@ -356,13 +192,13 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
umem->size = size;
umem->headroom = headroom;
umem->chunk_size = chunk_size;
+ umem->chunks = chunks;
umem->npgs = (u32)npgs;
umem->pgs = NULL;
umem->user = NULL;
umem->flags = mr->flags;
- INIT_LIST_HEAD(&umem->xsk_tx_list);
- spin_lock_init(&umem->xsk_tx_list_lock);
+ INIT_LIST_HEAD(&umem->xsk_dma_list);
refcount_set(&umem->users, 1);
err = xdp_umem_account_pages(umem);
@@ -373,15 +209,13 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
if (err)
goto out_account;
- umem->pool = xp_create(umem->pgs, umem->npgs, chunks, chunk_size,
- headroom, size, unaligned_chunks);
- if (!umem->pool) {
- err = -ENOMEM;
- goto out_pin;
- }
+ err = xdp_umem_addr_map(umem, umem->pgs, umem->npgs);
+ if (err)
+ goto out_unpin;
+
return 0;
-out_pin:
+out_unpin:
xdp_umem_unpin_pages(umem);
out_account:
xdp_umem_unaccount_pages(umem);
@@ -413,8 +247,3 @@ struct xdp_umem *xdp_umem_create(struct xdp_umem_reg *mr)
return umem;
}
-
-bool xdp_umem_validate_queues(struct xdp_umem *umem)
-{
- return umem->fq && umem->cq;
-}
diff --git a/net/xdp/xdp_umem.h b/net/xdp/xdp_umem.h
index 32067fe..181fdda 100644
--- a/net/xdp/xdp_umem.h
+++ b/net/xdp/xdp_umem.h
@@ -8,14 +8,8 @@
#include <net/xdp_sock_drv.h>
-int xdp_umem_assign_dev(struct xdp_umem *umem, struct net_device *dev,
- u16 queue_id, u16 flags);
-void xdp_umem_clear_dev(struct xdp_umem *umem);
-bool xdp_umem_validate_queues(struct xdp_umem *umem);
void xdp_get_umem(struct xdp_umem *umem);
void xdp_put_umem(struct xdp_umem *umem);
-void xdp_add_sk_umem(struct xdp_umem *umem, struct xdp_sock *xs);
-void xdp_del_sk_umem(struct xdp_umem *umem, struct xdp_sock *xs);
struct xdp_umem *xdp_umem_create(struct xdp_umem_reg *mr);
#endif /* XDP_UMEM_H_ */
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index c323162..5eb6662 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -36,68 +36,108 @@ static DEFINE_PER_CPU(struct list_head, xskmap_flush_list);
bool xsk_is_setup_for_bpf_map(struct xdp_sock *xs)
{
return READ_ONCE(xs->rx) && READ_ONCE(xs->umem) &&
- READ_ONCE(xs->umem->fq);
+ (xs->pool->fq || READ_ONCE(xs->fq_tmp));
}
-void xsk_set_rx_need_wakeup(struct xdp_umem *umem)
+void xsk_set_rx_need_wakeup(struct xsk_buff_pool *pool)
{
- if (umem->need_wakeup & XDP_WAKEUP_RX)
+ if (pool->cached_need_wakeup & XDP_WAKEUP_RX)
return;
- umem->fq->ring->flags |= XDP_RING_NEED_WAKEUP;
- umem->need_wakeup |= XDP_WAKEUP_RX;
+ pool->fq->ring->flags |= XDP_RING_NEED_WAKEUP;
+ pool->cached_need_wakeup |= XDP_WAKEUP_RX;
}
EXPORT_SYMBOL(xsk_set_rx_need_wakeup);
-void xsk_set_tx_need_wakeup(struct xdp_umem *umem)
+void xsk_set_tx_need_wakeup(struct xsk_buff_pool *pool)
{
struct xdp_sock *xs;
- if (umem->need_wakeup & XDP_WAKEUP_TX)
+ if (pool->cached_need_wakeup & XDP_WAKEUP_TX)
return;
rcu_read_lock();
- list_for_each_entry_rcu(xs, &umem->xsk_tx_list, list) {
+ list_for_each_entry_rcu(xs, &pool->xsk_tx_list, tx_list) {
xs->tx->ring->flags |= XDP_RING_NEED_WAKEUP;
}
rcu_read_unlock();
- umem->need_wakeup |= XDP_WAKEUP_TX;
+ pool->cached_need_wakeup |= XDP_WAKEUP_TX;
}
EXPORT_SYMBOL(xsk_set_tx_need_wakeup);
-void xsk_clear_rx_need_wakeup(struct xdp_umem *umem)
+void xsk_clear_rx_need_wakeup(struct xsk_buff_pool *pool)
{
- if (!(umem->need_wakeup & XDP_WAKEUP_RX))
+ if (!(pool->cached_need_wakeup & XDP_WAKEUP_RX))
return;
- umem->fq->ring->flags &= ~XDP_RING_NEED_WAKEUP;
- umem->need_wakeup &= ~XDP_WAKEUP_RX;
+ pool->fq->ring->flags &= ~XDP_RING_NEED_WAKEUP;
+ pool->cached_need_wakeup &= ~XDP_WAKEUP_RX;
}
EXPORT_SYMBOL(xsk_clear_rx_need_wakeup);
-void xsk_clear_tx_need_wakeup(struct xdp_umem *umem)
+void xsk_clear_tx_need_wakeup(struct xsk_buff_pool *pool)
{
struct xdp_sock *xs;
- if (!(umem->need_wakeup & XDP_WAKEUP_TX))
+ if (!(pool->cached_need_wakeup & XDP_WAKEUP_TX))
return;
rcu_read_lock();
- list_for_each_entry_rcu(xs, &umem->xsk_tx_list, list) {
+ list_for_each_entry_rcu(xs, &pool->xsk_tx_list, tx_list) {
xs->tx->ring->flags &= ~XDP_RING_NEED_WAKEUP;
}
rcu_read_unlock();
- umem->need_wakeup &= ~XDP_WAKEUP_TX;
+ pool->cached_need_wakeup &= ~XDP_WAKEUP_TX;
}
EXPORT_SYMBOL(xsk_clear_tx_need_wakeup);
-bool xsk_umem_uses_need_wakeup(struct xdp_umem *umem)
+bool xsk_uses_need_wakeup(struct xsk_buff_pool *pool)
{
- return umem->flags & XDP_UMEM_USES_NEED_WAKEUP;
+ return pool->uses_need_wakeup;
}
-EXPORT_SYMBOL(xsk_umem_uses_need_wakeup);
+EXPORT_SYMBOL(xsk_uses_need_wakeup);
+
+struct xsk_buff_pool *xsk_get_pool_from_qid(struct net_device *dev,
+ u16 queue_id)
+{
+ if (queue_id < dev->real_num_rx_queues)
+ return dev->_rx[queue_id].pool;
+ if (queue_id < dev->real_num_tx_queues)
+ return dev->_tx[queue_id].pool;
+
+ return NULL;
+}
+EXPORT_SYMBOL(xsk_get_pool_from_qid);
+
+void xsk_clear_pool_at_qid(struct net_device *dev, u16 queue_id)
+{
+ if (queue_id < dev->real_num_rx_queues)
+ dev->_rx[queue_id].pool = NULL;
+ if (queue_id < dev->real_num_tx_queues)
+ dev->_tx[queue_id].pool = NULL;
+}
+
+/* The buffer pool is stored both in the _rx struct and the _tx struct as we do
+ * not know if the device has more tx queues than rx, or the opposite.
+ * This might also change during run time.
+ */
+int xsk_reg_pool_at_qid(struct net_device *dev, struct xsk_buff_pool *pool,
+ u16 queue_id)
+{
+ if (queue_id >= max_t(unsigned int,
+ dev->real_num_rx_queues,
+ dev->real_num_tx_queues))
+ return -EINVAL;
+
+ if (queue_id < dev->real_num_rx_queues)
+ dev->_rx[queue_id].pool = pool;
+ if (queue_id < dev->real_num_tx_queues)
+ dev->_tx[queue_id].pool = pool;
+
+ return 0;
+}
void xp_release(struct xdp_buff_xsk *xskb)
{
@@ -155,12 +195,12 @@ static int __xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len,
struct xdp_buff *xsk_xdp;
int err;
- if (len > xsk_umem_get_rx_frame_size(xs->umem)) {
+ if (len > xsk_pool_get_rx_frame_size(xs->pool)) {
xs->rx_dropped++;
return -ENOSPC;
}
- xsk_xdp = xsk_buff_alloc(xs->umem);
+ xsk_xdp = xsk_buff_alloc(xs->pool);
if (!xsk_xdp) {
xs->rx_dropped++;
return -ENOSPC;
@@ -208,7 +248,7 @@ static int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp,
static void xsk_flush(struct xdp_sock *xs)
{
xskq_prod_submit(xs->rx);
- __xskq_cons_release(xs->umem->fq);
+ __xskq_cons_release(xs->pool->fq);
sock_def_readable(&xs->sk);
}
@@ -249,32 +289,32 @@ void __xsk_map_flush(void)
}
}
-void xsk_umem_complete_tx(struct xdp_umem *umem, u32 nb_entries)
+void xsk_tx_completed(struct xsk_buff_pool *pool, u32 nb_entries)
{
- xskq_prod_submit_n(umem->cq, nb_entries);
+ xskq_prod_submit_n(pool->cq, nb_entries);
}
-EXPORT_SYMBOL(xsk_umem_complete_tx);
+EXPORT_SYMBOL(xsk_tx_completed);
-void xsk_umem_consume_tx_done(struct xdp_umem *umem)
+void xsk_tx_release(struct xsk_buff_pool *pool)
{
struct xdp_sock *xs;
rcu_read_lock();
- list_for_each_entry_rcu(xs, &umem->xsk_tx_list, list) {
+ list_for_each_entry_rcu(xs, &pool->xsk_tx_list, tx_list) {
__xskq_cons_release(xs->tx);
xs->sk.sk_write_space(&xs->sk);
}
rcu_read_unlock();
}
-EXPORT_SYMBOL(xsk_umem_consume_tx_done);
+EXPORT_SYMBOL(xsk_tx_release);
-bool xsk_umem_consume_tx(struct xdp_umem *umem, struct xdp_desc *desc)
+bool xsk_tx_peek_desc(struct xsk_buff_pool *pool, struct xdp_desc *desc)
{
struct xdp_sock *xs;
rcu_read_lock();
- list_for_each_entry_rcu(xs, &umem->xsk_tx_list, list) {
- if (!xskq_cons_peek_desc(xs->tx, desc, umem)) {
+ list_for_each_entry_rcu(xs, &pool->xsk_tx_list, tx_list) {
+ if (!xskq_cons_peek_desc(xs->tx, desc, pool)) {
xs->tx->queue_empty_descs++;
continue;
}
@@ -284,7 +324,7 @@ bool xsk_umem_consume_tx(struct xdp_umem *umem, struct xdp_desc *desc)
* if there is space in it. This avoids having to implement
* any buffering in the Tx path.
*/
- if (xskq_prod_reserve_addr(umem->cq, desc->addr))
+ if (xskq_prod_reserve_addr(pool->cq, desc->addr))
goto out;
xskq_cons_release(xs->tx);
@@ -296,7 +336,7 @@ bool xsk_umem_consume_tx(struct xdp_umem *umem, struct xdp_desc *desc)
rcu_read_unlock();
return false;
}
-EXPORT_SYMBOL(xsk_umem_consume_tx);
+EXPORT_SYMBOL(xsk_tx_peek_desc);
static int xsk_wakeup(struct xdp_sock *xs, u8 flags)
{
@@ -322,7 +362,7 @@ static void xsk_destruct_skb(struct sk_buff *skb)
unsigned long flags;
spin_lock_irqsave(&xs->tx_completion_lock, flags);
- xskq_prod_submit_addr(xs->umem->cq, addr);
+ xskq_prod_submit_addr(xs->pool->cq, addr);
spin_unlock_irqrestore(&xs->tx_completion_lock, flags);
sock_wfree(skb);
@@ -342,7 +382,7 @@ static int xsk_generic_xmit(struct sock *sk)
if (xs->queue_id >= xs->dev->real_num_tx_queues)
goto out;
- while (xskq_cons_peek_desc(xs->tx, &desc, xs->umem)) {
+ while (xskq_cons_peek_desc(xs->tx, &desc, xs->pool)) {
char *buffer;
u64 addr;
u32 len;
@@ -359,14 +399,14 @@ static int xsk_generic_xmit(struct sock *sk)
skb_put(skb, len);
addr = desc.addr;
- buffer = xsk_buff_raw_get_data(xs->umem, addr);
+ buffer = xsk_buff_raw_get_data(xs->pool, addr);
err = skb_store_bits(skb, 0, buffer, len);
/* This is the backpressure mechanism for the Tx path.
* Reserve space in the completion queue and only proceed
* if there is space in it. This avoids having to implement
* any buffering in the Tx path.
*/
- if (unlikely(err) || xskq_prod_reserve(xs->umem->cq)) {
+ if (unlikely(err) || xskq_prod_reserve(xs->pool->cq)) {
kfree_skb(skb);
goto out;
}
@@ -431,16 +471,16 @@ static __poll_t xsk_poll(struct file *file, struct socket *sock,
__poll_t mask = datagram_poll(file, sock, wait);
struct sock *sk = sock->sk;
struct xdp_sock *xs = xdp_sk(sk);
- struct xdp_umem *umem;
+ struct xsk_buff_pool *pool;
if (unlikely(!xsk_is_bound(xs)))
return mask;
- umem = xs->umem;
+ pool = xs->pool;
- if (umem->need_wakeup) {
+ if (pool->cached_need_wakeup) {
if (xs->zc)
- xsk_wakeup(xs, umem->need_wakeup);
+ xsk_wakeup(xs, pool->cached_need_wakeup);
else
/* Poll needs to drive Tx also in copy mode */
__xsk_sendmsg(sk);
@@ -481,7 +521,7 @@ static void xsk_unbind_dev(struct xdp_sock *xs)
WRITE_ONCE(xs->state, XSK_UNBOUND);
/* Wait for driver to stop using the xdp socket. */
- xdp_del_sk_umem(xs->umem, xs);
+ xp_del_xsk(xs->pool, xs);
xs->dev = NULL;
synchronize_net();
dev_put(dev);
@@ -559,6 +599,8 @@ static int xsk_release(struct socket *sock)
xskq_destroy(xs->rx);
xskq_destroy(xs->tx);
+ xskq_destroy(xs->fq_tmp);
+ xskq_destroy(xs->cq_tmp);
sock_orphan(sk);
sock->sk = NULL;
@@ -586,6 +628,11 @@ static struct socket *xsk_lookup_xsk_from_fd(int fd)
return sock;
}
+static bool xsk_validate_queues(struct xdp_sock *xs)
+{
+ return xs->fq_tmp && xs->cq_tmp;
+}
+
static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
{
struct sockaddr_xdp *sxdp = (struct sockaddr_xdp *)addr;
@@ -654,29 +701,64 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
sockfd_put(sock);
goto out_unlock;
}
- if (umem_xs->dev != dev || umem_xs->queue_id != qid) {
- err = -EINVAL;
- sockfd_put(sock);
- goto out_unlock;
+
+ if (umem_xs->queue_id != qid || umem_xs->dev != dev) {
+ /* Share the umem with another socket on another qid
+ * and/or device.
+ */
+ xs->pool = xp_create_and_assign_umem(xs,
+ umem_xs->umem);
+ if (!xs->pool) {
+ sockfd_put(sock);
+ goto out_unlock;
+ }
+
+ err = xp_assign_dev_shared(xs->pool, umem_xs->umem,
+ dev, qid);
+ if (err) {
+ xp_destroy(xs->pool);
+ sockfd_put(sock);
+ goto out_unlock;
+ }
+ } else {
+ /* Share the buffer pool with the other socket. */
+ if (xs->fq_tmp || xs->cq_tmp) {
+ /* Do not allow setting your own fq or cq. */
+ err = -EINVAL;
+ sockfd_put(sock);
+ goto out_unlock;
+ }
+
+ xp_get_pool(umem_xs->pool);
+ xs->pool = umem_xs->pool;
}
xdp_get_umem(umem_xs->umem);
WRITE_ONCE(xs->umem, umem_xs->umem);
sockfd_put(sock);
- } else if (!xs->umem || !xdp_umem_validate_queues(xs->umem)) {
+ } else if (!xs->umem || !xsk_validate_queues(xs)) {
err = -EINVAL;
goto out_unlock;
} else {
/* This xsk has its own umem. */
- err = xdp_umem_assign_dev(xs->umem, dev, qid, flags);
- if (err)
+ xs->pool = xp_create_and_assign_umem(xs, xs->umem);
+ if (!xs->pool) {
+ err = -ENOMEM;
goto out_unlock;
+ }
+
+ err = xp_assign_dev(xs->pool, dev, qid, flags);
+ if (err) {
+ xp_destroy(xs->pool);
+ xs->pool = NULL;
+ goto out_unlock;
+ }
}
xs->dev = dev;
xs->zc = xs->umem->zc;
xs->queue_id = qid;
- xdp_add_sk_umem(xs->umem, xs);
+ xp_add_xsk(xs->pool, xs);
out_unlock:
if (err) {
@@ -782,16 +864,10 @@ static int xsk_setsockopt(struct socket *sock, int level, int optname,
mutex_unlock(&xs->mutex);
return -EBUSY;
}
- if (!xs->umem) {
- mutex_unlock(&xs->mutex);
- return -EINVAL;
- }
- q = (optname == XDP_UMEM_FILL_RING) ? &xs->umem->fq :
- &xs->umem->cq;
+ q = (optname == XDP_UMEM_FILL_RING) ? &xs->fq_tmp :
+ &xs->cq_tmp;
err = xsk_init_queue(entries, q, true);
- if (optname == XDP_UMEM_FILL_RING)
- xp_set_fq(xs->umem->pool, *q);
mutex_unlock(&xs->mutex);
return err;
}
@@ -858,7 +934,7 @@ static int xsk_getsockopt(struct socket *sock, int level, int optname,
if (extra_stats) {
stats.rx_ring_full = xs->rx_queue_full;
stats.rx_fill_ring_empty_descs =
- xs->umem ? xskq_nb_queue_empty_descs(xs->umem->fq) : 0;
+ xs->pool ? xskq_nb_queue_empty_descs(xs->pool->fq) : 0;
stats.tx_ring_empty_descs = xskq_nb_queue_empty_descs(xs->tx);
} else {
stats.rx_dropped += xs->rx_queue_full;
@@ -960,7 +1036,6 @@ static int xsk_mmap(struct file *file, struct socket *sock,
unsigned long size = vma->vm_end - vma->vm_start;
struct xdp_sock *xs = xdp_sk(sock->sk);
struct xsk_queue *q = NULL;
- struct xdp_umem *umem;
unsigned long pfn;
struct page *qpg;
@@ -972,16 +1047,12 @@ static int xsk_mmap(struct file *file, struct socket *sock,
} else if (offset == XDP_PGOFF_TX_RING) {
q = READ_ONCE(xs->tx);
} else {
- umem = READ_ONCE(xs->umem);
- if (!umem)
- return -EINVAL;
-
/* Matches the smp_wmb() in XDP_UMEM_REG */
smp_rmb();
if (offset == XDP_UMEM_PGOFF_FILL_RING)
- q = READ_ONCE(umem->fq);
+ q = READ_ONCE(xs->fq_tmp);
else if (offset == XDP_UMEM_PGOFF_COMPLETION_RING)
- q = READ_ONCE(umem->cq);
+ q = READ_ONCE(xs->cq_tmp);
}
if (!q)
@@ -1019,8 +1090,8 @@ static int xsk_notifier(struct notifier_block *this,
xsk_unbind_dev(xs);
- /* Clear device references in umem. */
- xdp_umem_clear_dev(xs->umem);
+ /* Clear device references. */
+ xp_clear_dev(xs->pool);
}
mutex_unlock(&xs->mutex);
}
@@ -1064,7 +1135,7 @@ static void xsk_destruct(struct sock *sk)
if (!sock_flag(sk, SOCK_DEAD))
return;
- xdp_put_umem(xs->umem);
+ xp_put_pool(xs->pool);
sk_refcnt_debug_dec(sk);
}
@@ -1072,8 +1143,8 @@ static void xsk_destruct(struct sock *sk)
static int xsk_create(struct net *net, struct socket *sock, int protocol,
int kern)
{
- struct sock *sk;
struct xdp_sock *xs;
+ struct sock *sk;
if (!ns_capable(net->user_ns, CAP_NET_RAW))
return -EPERM;
diff --git a/net/xdp/xsk.h b/net/xdp/xsk.h
index 455ddd4..da1f73e 100644
--- a/net/xdp/xsk.h
+++ b/net/xdp/xsk.h
@@ -11,13 +11,6 @@
#define XSK_NEXT_PG_CONTIG_SHIFT 0
#define XSK_NEXT_PG_CONTIG_MASK BIT_ULL(XSK_NEXT_PG_CONTIG_SHIFT)
-/* Flags for the umem flags field.
- *
- * The NEED_WAKEUP flag is 1 due to the reuse of the flags field for public
- * flags. See inlude/uapi/include/linux/if_xdp.h.
- */
-#define XDP_UMEM_USES_NEED_WAKEUP BIT(1)
-
struct xdp_ring_offset_v1 {
__u64 producer;
__u64 consumer;
@@ -51,5 +44,8 @@ void xsk_map_try_sock_delete(struct xsk_map *map, struct xdp_sock *xs,
struct xdp_sock **map_entry);
int xsk_map_inc(struct xsk_map *map);
void xsk_map_put(struct xsk_map *map);
+void xsk_clear_pool_at_qid(struct net_device *dev, u16 queue_id);
+int xsk_reg_pool_at_qid(struct net_device *dev, struct xsk_buff_pool *pool,
+ u16 queue_id);
#endif /* XSK_H_ */
diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c
index a2044c2..795d7c8 100644
--- a/net/xdp/xsk_buff_pool.c
+++ b/net/xdp/xsk_buff_pool.c
@@ -2,21 +2,37 @@
#include <net/xsk_buff_pool.h>
#include <net/xdp_sock.h>
+#include <net/xdp_sock_drv.h>
+#include <linux/dma-direct.h>
+#include <linux/dma-noncoherent.h>
+#include <linux/swiotlb.h>
#include "xsk_queue.h"
+#include "xdp_umem.h"
+#include "xsk.h"
-static void xp_addr_unmap(struct xsk_buff_pool *pool)
+void xp_add_xsk(struct xsk_buff_pool *pool, struct xdp_sock *xs)
{
- vunmap(pool->addrs);
+ unsigned long flags;
+
+ if (!xs->tx)
+ return;
+
+ spin_lock_irqsave(&pool->xsk_tx_list_lock, flags);
+ list_add_rcu(&xs->tx_list, &pool->xsk_tx_list);
+ spin_unlock_irqrestore(&pool->xsk_tx_list_lock, flags);
}
-static int xp_addr_map(struct xsk_buff_pool *pool,
- struct page **pages, u32 nr_pages)
+void xp_del_xsk(struct xsk_buff_pool *pool, struct xdp_sock *xs)
{
- pool->addrs = vmap(pages, nr_pages, VM_MAP, PAGE_KERNEL);
- if (!pool->addrs)
- return -ENOMEM;
- return 0;
+ unsigned long flags;
+
+ if (!xs->tx)
+ return;
+
+ spin_lock_irqsave(&pool->xsk_tx_list_lock, flags);
+ list_del_rcu(&xs->tx_list);
+ spin_unlock_irqrestore(&pool->xsk_tx_list_lock, flags);
}
void xp_destroy(struct xsk_buff_pool *pool)
@@ -24,59 +40,61 @@ void xp_destroy(struct xsk_buff_pool *pool)
if (!pool)
return;
- xp_addr_unmap(pool);
kvfree(pool->heads);
kvfree(pool);
}
-struct xsk_buff_pool *xp_create(struct page **pages, u32 nr_pages, u32 chunks,
- u32 chunk_size, u32 headroom, u64 size,
- bool unaligned)
+struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs,
+ struct xdp_umem *umem)
{
struct xsk_buff_pool *pool;
struct xdp_buff_xsk *xskb;
- int err;
u32 i;
- pool = kvzalloc(struct_size(pool, free_heads, chunks), GFP_KERNEL);
+ pool = kvzalloc(struct_size(pool, free_heads, umem->chunks),
+ GFP_KERNEL);
if (!pool)
goto out;
- pool->heads = kvcalloc(chunks, sizeof(*pool->heads), GFP_KERNEL);
+ pool->heads = kvcalloc(umem->chunks, sizeof(*pool->heads), GFP_KERNEL);
if (!pool->heads)
goto out;
- pool->chunk_mask = ~((u64)chunk_size - 1);
- pool->addrs_cnt = size;
- pool->heads_cnt = chunks;
- pool->free_heads_cnt = chunks;
- pool->headroom = headroom;
- pool->chunk_size = chunk_size;
- pool->unaligned = unaligned;
- pool->frame_len = chunk_size - headroom - XDP_PACKET_HEADROOM;
+ pool->chunk_mask = ~((u64)umem->chunk_size - 1);
+ pool->addrs_cnt = umem->size;
+ pool->heads_cnt = umem->chunks;
+ pool->free_heads_cnt = umem->chunks;
+ pool->headroom = umem->headroom;
+ pool->chunk_size = umem->chunk_size;
+ pool->unaligned = umem->flags & XDP_UMEM_UNALIGNED_CHUNK_FLAG;
+ pool->frame_len = umem->chunk_size - umem->headroom -
+ XDP_PACKET_HEADROOM;
+ pool->umem = umem;
+ pool->addrs = umem->addrs;
INIT_LIST_HEAD(&pool->free_list);
+ INIT_LIST_HEAD(&pool->xsk_tx_list);
+ spin_lock_init(&pool->xsk_tx_list_lock);
+ refcount_set(&pool->users, 1);
+
+ pool->fq = xs->fq_tmp;
+ pool->cq = xs->cq_tmp;
+ xs->fq_tmp = NULL;
+ xs->cq_tmp = NULL;
for (i = 0; i < pool->free_heads_cnt; i++) {
xskb = &pool->heads[i];
xskb->pool = pool;
- xskb->xdp.frame_sz = chunk_size - headroom;
+ xskb->xdp.frame_sz = umem->chunk_size - umem->headroom;
pool->free_heads[i] = xskb;
}
- err = xp_addr_map(pool, pages, nr_pages);
- if (!err)
- return pool;
+ return pool;
out:
xp_destroy(pool);
return NULL;
}
-void xp_set_fq(struct xsk_buff_pool *pool, struct xsk_queue *fq)
-{
- pool->fq = fq;
-}
-
void xp_set_rxq_info(struct xsk_buff_pool *pool, struct xdp_rxq_info *rxq)
{
u32 i;
@@ -86,70 +104,320 @@ void xp_set_rxq_info(struct xsk_buff_pool *pool, struct xdp_rxq_info *rxq)
}
EXPORT_SYMBOL(xp_set_rxq_info);
-void xp_dma_unmap(struct xsk_buff_pool *pool, unsigned long attrs)
+static void xp_disable_drv_zc(struct xsk_buff_pool *pool)
+{
+ struct netdev_bpf bpf;
+ int err;
+
+ ASSERT_RTNL();
+
+ if (pool->umem->zc) {
+ bpf.command = XDP_SETUP_XSK_POOL;
+ bpf.xsk.pool = NULL;
+ bpf.xsk.queue_id = pool->queue_id;
+
+ err = pool->netdev->netdev_ops->ndo_bpf(pool->netdev, &bpf);
+
+ if (err)
+ WARN(1, "Failed to disable zero-copy!\n");
+ }
+}
+
+static int __xp_assign_dev(struct xsk_buff_pool *pool,
+ struct net_device *netdev, u16 queue_id, u16 flags)
+{
+ bool force_zc, force_copy;
+ struct netdev_bpf bpf;
+ int err = 0;
+
+ ASSERT_RTNL();
+
+ force_zc = flags & XDP_ZEROCOPY;
+ force_copy = flags & XDP_COPY;
+
+ if (force_zc && force_copy)
+ return -EINVAL;
+
+ if (xsk_get_pool_from_qid(netdev, queue_id))
+ return -EBUSY;
+
+ pool->netdev = netdev;
+ pool->queue_id = queue_id;
+ err = xsk_reg_pool_at_qid(netdev, pool, queue_id);
+ if (err)
+ return err;
+
+ if (flags & XDP_USE_NEED_WAKEUP) {
+ pool->uses_need_wakeup = true;
+ /* Tx needs to be explicitly woken up the first time.
+ * Also for supporting drivers that do not implement this
+ * feature. They will always have to call sendto().
+ */
+ pool->cached_need_wakeup = XDP_WAKEUP_TX;
+ }
+
+ dev_hold(netdev);
+
+ if (force_copy)
+ /* For copy-mode, we are done. */
+ return 0;
+
+ if (!netdev->netdev_ops->ndo_bpf ||
+ !netdev->netdev_ops->ndo_xsk_wakeup) {
+ err = -EOPNOTSUPP;
+ goto err_unreg_pool;
+ }
+
+ bpf.command = XDP_SETUP_XSK_POOL;
+ bpf.xsk.pool = pool;
+ bpf.xsk.queue_id = queue_id;
+
+ err = netdev->netdev_ops->ndo_bpf(netdev, &bpf);
+ if (err)
+ goto err_unreg_pool;
+
+ if (!pool->dma_pages) {
+ WARN(1, "Driver did not DMA map zero-copy buffers");
+ goto err_unreg_xsk;
+ }
+ pool->umem->zc = true;
+ return 0;
+
+err_unreg_xsk:
+ xp_disable_drv_zc(pool);
+err_unreg_pool:
+ if (!force_zc)
+ err = 0; /* fallback to copy mode */
+ if (err)
+ xsk_clear_pool_at_qid(netdev, queue_id);
+ return err;
+}
+
+int xp_assign_dev(struct xsk_buff_pool *pool, struct net_device *dev,
+ u16 queue_id, u16 flags)
+{
+ return __xp_assign_dev(pool, dev, queue_id, flags);
+}
+
+int xp_assign_dev_shared(struct xsk_buff_pool *pool, struct xdp_umem *umem,
+ struct net_device *dev, u16 queue_id)
+{
+ u16 flags;
+
+ /* One fill and completion ring required for each queue id. */
+ if (!pool->fq || !pool->cq)
+ return -EINVAL;
+
+ flags = umem->zc ? XDP_ZEROCOPY : XDP_COPY;
+ if (pool->uses_need_wakeup)
+ flags |= XDP_USE_NEED_WAKEUP;
+
+ return __xp_assign_dev(pool, dev, queue_id, flags);
+}
+
+void xp_clear_dev(struct xsk_buff_pool *pool)
+{
+ if (!pool->netdev)
+ return;
+
+ xp_disable_drv_zc(pool);
+ xsk_clear_pool_at_qid(pool->netdev, pool->queue_id);
+ dev_put(pool->netdev);
+ pool->netdev = NULL;
+}
+
+static void xp_release_deferred(struct work_struct *work)
+{
+ struct xsk_buff_pool *pool = container_of(work, struct xsk_buff_pool,
+ work);
+
+ rtnl_lock();
+ xp_clear_dev(pool);
+ rtnl_unlock();
+
+ if (pool->fq) {
+ xskq_destroy(pool->fq);
+ pool->fq = NULL;
+ }
+
+ if (pool->cq) {
+ xskq_destroy(pool->cq);
+ pool->cq = NULL;
+ }
+
+ xdp_put_umem(pool->umem);
+ xp_destroy(pool);
+}
+
+void xp_get_pool(struct xsk_buff_pool *pool)
+{
+ refcount_inc(&pool->users);
+}
+
+void xp_put_pool(struct xsk_buff_pool *pool)
+{
+ if (!pool)
+ return;
+
+ if (refcount_dec_and_test(&pool->users)) {
+ INIT_WORK(&pool->work, xp_release_deferred);
+ schedule_work(&pool->work);
+ }
+}
+
+static struct xsk_dma_map *xp_find_dma_map(struct xsk_buff_pool *pool)
+{
+ struct xsk_dma_map *dma_map;
+
+ list_for_each_entry(dma_map, &pool->umem->xsk_dma_list, list) {
+ if (dma_map->netdev == pool->netdev)
+ return dma_map;
+ }
+
+ return NULL;
+}
+
+static struct xsk_dma_map *xp_create_dma_map(struct device *dev, struct net_device *netdev,
+ u32 nr_pages, struct xdp_umem *umem)
+{
+ struct xsk_dma_map *dma_map;
+
+ dma_map = kzalloc(sizeof(*dma_map), GFP_KERNEL);
+ if (!dma_map)
+ return NULL;
+
+ dma_map->dma_pages = kvcalloc(nr_pages, sizeof(*dma_map->dma_pages), GFP_KERNEL);
+ if (!dma_map) {
+ kfree(dma_map);
+ return NULL;
+ }
+
+ dma_map->netdev = netdev;
+ dma_map->dev = dev;
+ dma_map->dma_need_sync = false;
+ dma_map->dma_pages_cnt = nr_pages;
+ refcount_set(&dma_map->users, 0);
+ list_add(&dma_map->list, &umem->xsk_dma_list);
+ return dma_map;
+}
+
+static void xp_destroy_dma_map(struct xsk_dma_map *dma_map)
+{
+ list_del(&dma_map->list);
+ kvfree(dma_map->dma_pages);
+ kfree(dma_map);
+}
+
+static void __xp_dma_unmap(struct xsk_dma_map *dma_map, unsigned long attrs)
{
dma_addr_t *dma;
u32 i;
- if (pool->dma_pages_cnt == 0)
- return;
-
- for (i = 0; i < pool->dma_pages_cnt; i++) {
- dma = &pool->dma_pages[i];
+ for (i = 0; i < dma_map->dma_pages_cnt; i++) {
+ dma = &dma_map->dma_pages[i];
if (*dma) {
- dma_unmap_page_attrs(pool->dev, *dma, PAGE_SIZE,
+ dma_unmap_page_attrs(dma_map->dev, *dma, PAGE_SIZE,
DMA_BIDIRECTIONAL, attrs);
*dma = 0;
}
}
+ xp_destroy_dma_map(dma_map);
+}
+
+void xp_dma_unmap(struct xsk_buff_pool *pool, unsigned long attrs)
+{
+ struct xsk_dma_map *dma_map;
+
+ if (pool->dma_pages_cnt == 0)
+ return;
+
+ dma_map = xp_find_dma_map(pool);
+ if (!dma_map) {
+ WARN(1, "Could not find dma_map for device");
+ return;
+ }
+
+ if (!refcount_dec_and_test(&dma_map->users))
+ return;
+
+ __xp_dma_unmap(dma_map, attrs);
kvfree(pool->dma_pages);
pool->dma_pages_cnt = 0;
pool->dev = NULL;
}
EXPORT_SYMBOL(xp_dma_unmap);
-static void xp_check_dma_contiguity(struct xsk_buff_pool *pool)
+static void xp_check_dma_contiguity(struct xsk_dma_map *dma_map)
{
u32 i;
- for (i = 0; i < pool->dma_pages_cnt - 1; i++) {
- if (pool->dma_pages[i] + PAGE_SIZE == pool->dma_pages[i + 1])
- pool->dma_pages[i] |= XSK_NEXT_PG_CONTIG_MASK;
+ for (i = 0; i < dma_map->dma_pages_cnt - 1; i++) {
+ if (dma_map->dma_pages[i] + PAGE_SIZE == dma_map->dma_pages[i + 1])
+ dma_map->dma_pages[i] |= XSK_NEXT_PG_CONTIG_MASK;
else
- pool->dma_pages[i] &= ~XSK_NEXT_PG_CONTIG_MASK;
+ dma_map->dma_pages[i] &= ~XSK_NEXT_PG_CONTIG_MASK;
}
}
+static int xp_init_dma_info(struct xsk_buff_pool *pool, struct xsk_dma_map *dma_map)
+{
+ pool->dma_pages = kvcalloc(dma_map->dma_pages_cnt, sizeof(*pool->dma_pages), GFP_KERNEL);
+ if (!pool->dma_pages)
+ return -ENOMEM;
+
+ pool->dev = dma_map->dev;
+ pool->dma_pages_cnt = dma_map->dma_pages_cnt;
+ pool->dma_need_sync = dma_map->dma_need_sync;
+ refcount_inc(&dma_map->users);
+ memcpy(pool->dma_pages, dma_map->dma_pages,
+ pool->dma_pages_cnt * sizeof(*pool->dma_pages));
+
+ return 0;
+}
+
int xp_dma_map(struct xsk_buff_pool *pool, struct device *dev,
unsigned long attrs, struct page **pages, u32 nr_pages)
{
+ struct xsk_dma_map *dma_map;
dma_addr_t dma;
+ int err;
u32 i;
- pool->dma_pages = kvcalloc(nr_pages, sizeof(*pool->dma_pages),
- GFP_KERNEL);
- if (!pool->dma_pages)
+ dma_map = xp_find_dma_map(pool);
+ if (dma_map) {
+ err = xp_init_dma_info(pool, dma_map);
+ if (err)
+ return err;
+
+ return 0;
+ }
+
+ dma_map = xp_create_dma_map(dev, pool->netdev, nr_pages, pool->umem);
+ if (!dma_map)
return -ENOMEM;
- pool->dev = dev;
- pool->dma_pages_cnt = nr_pages;
- pool->dma_need_sync = false;
-
- for (i = 0; i < pool->dma_pages_cnt; i++) {
+ for (i = 0; i < dma_map->dma_pages_cnt; i++) {
dma = dma_map_page_attrs(dev, pages[i], 0, PAGE_SIZE,
DMA_BIDIRECTIONAL, attrs);
if (dma_mapping_error(dev, dma)) {
- xp_dma_unmap(pool, attrs);
+ __xp_dma_unmap(dma_map, attrs);
return -ENOMEM;
}
if (dma_need_sync(dev, dma))
- pool->dma_need_sync = true;
- pool->dma_pages[i] = dma;
+ dma_map->dma_need_sync = true;
+ dma_map->dma_pages[i] = dma;
}
if (pool->unaligned)
- xp_check_dma_contiguity(pool);
+ xp_check_dma_contiguity(dma_map);
+
+ err = xp_init_dma_info(pool, dma_map);
+ if (err) {
+ __xp_dma_unmap(dma_map, attrs);
+ return err;
+ }
+
return 0;
}
EXPORT_SYMBOL(xp_dma_map);
diff --git a/net/xdp/xsk_diag.c b/net/xdp/xsk_diag.c
index 21e9c2d..5bd8ea9 100644
--- a/net/xdp/xsk_diag.c
+++ b/net/xdp/xsk_diag.c
@@ -46,6 +46,7 @@ static int xsk_diag_put_rings_cfg(const struct xdp_sock *xs,
static int xsk_diag_put_umem(const struct xdp_sock *xs, struct sk_buff *nlskb)
{
+ struct xsk_buff_pool *pool = xs->pool;
struct xdp_umem *umem = xs->umem;
struct xdp_diag_umem du = {};
int err;
@@ -58,8 +59,8 @@ static int xsk_diag_put_umem(const struct xdp_sock *xs, struct sk_buff *nlskb)
du.num_pages = umem->npgs;
du.chunk_size = umem->chunk_size;
du.headroom = umem->headroom;
- du.ifindex = umem->dev ? umem->dev->ifindex : 0;
- du.queue_id = umem->queue_id;
+ du.ifindex = pool->netdev ? pool->netdev->ifindex : 0;
+ du.queue_id = pool->queue_id;
du.flags = 0;
if (umem->zc)
du.flags |= XDP_DU_F_ZEROCOPY;
@@ -67,10 +68,11 @@ static int xsk_diag_put_umem(const struct xdp_sock *xs, struct sk_buff *nlskb)
err = nla_put(nlskb, XDP_DIAG_UMEM, sizeof(du), &du);
- if (!err && umem->fq)
- err = xsk_diag_put_ring(umem->fq, XDP_DIAG_UMEM_FILL_RING, nlskb);
- if (!err && umem->cq) {
- err = xsk_diag_put_ring(umem->cq, XDP_DIAG_UMEM_COMPLETION_RING,
+ if (!err && pool->fq)
+ err = xsk_diag_put_ring(pool->fq,
+ XDP_DIAG_UMEM_FILL_RING, nlskb);
+ if (!err && pool->cq) {
+ err = xsk_diag_put_ring(pool->cq, XDP_DIAG_UMEM_COMPLETION_RING,
nlskb);
}
return err;
@@ -83,7 +85,7 @@ static int xsk_diag_put_stats(const struct xdp_sock *xs, struct sk_buff *nlskb)
du.n_rx_dropped = xs->rx_dropped;
du.n_rx_invalid = xskq_nb_invalid_descs(xs->rx);
du.n_rx_full = xs->rx_queue_full;
- du.n_fill_ring_empty = xs->umem ? xskq_nb_queue_empty_descs(xs->umem->fq) : 0;
+ du.n_fill_ring_empty = xs->pool ? xskq_nb_queue_empty_descs(xs->pool->fq) : 0;
du.n_tx_invalid = xskq_nb_invalid_descs(xs->tx);
du.n_tx_ring_empty = xskq_nb_queue_empty_descs(xs->tx);
return nla_put(nlskb, XDP_DIAG_STATS, sizeof(du), &du);
diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h
index bf42cfd..2d883f6 100644
--- a/net/xdp/xsk_queue.h
+++ b/net/xdp/xsk_queue.h
@@ -166,9 +166,9 @@ static inline bool xp_validate_desc(struct xsk_buff_pool *pool,
static inline bool xskq_cons_is_valid_desc(struct xsk_queue *q,
struct xdp_desc *d,
- struct xdp_umem *umem)
+ struct xsk_buff_pool *pool)
{
- if (!xp_validate_desc(umem->pool, d)) {
+ if (!xp_validate_desc(pool, d)) {
q->invalid_descs++;
return false;
}
@@ -177,14 +177,14 @@ static inline bool xskq_cons_is_valid_desc(struct xsk_queue *q,
static inline bool xskq_cons_read_desc(struct xsk_queue *q,
struct xdp_desc *desc,
- struct xdp_umem *umem)
+ struct xsk_buff_pool *pool)
{
while (q->cached_cons != q->cached_prod) {
struct xdp_rxtx_ring *ring = (struct xdp_rxtx_ring *)q->ring;
u32 idx = q->cached_cons & q->ring_mask;
*desc = ring->desc[idx];
- if (xskq_cons_is_valid_desc(q, desc, umem))
+ if (xskq_cons_is_valid_desc(q, desc, pool))
return true;
q->cached_cons++;
@@ -236,11 +236,11 @@ static inline bool xskq_cons_peek_addr_unchecked(struct xsk_queue *q, u64 *addr)
static inline bool xskq_cons_peek_desc(struct xsk_queue *q,
struct xdp_desc *desc,
- struct xdp_umem *umem)
+ struct xsk_buff_pool *pool)
{
if (q->cached_prod == q->cached_cons)
xskq_cons_get_entries(q);
- return xskq_cons_read_desc(q, desc, umem);
+ return xskq_cons_read_desc(q, desc, pool);
}
static inline void xskq_cons_release(struct xsk_queue *q)
diff --git a/net/xdp/xskmap.c b/net/xdp/xskmap.c
index 8367adb..2a4fd66 100644
--- a/net/xdp/xskmap.c
+++ b/net/xdp/xskmap.c
@@ -254,8 +254,16 @@ void xsk_map_try_sock_delete(struct xsk_map *map, struct xdp_sock *xs,
spin_unlock_bh(&map->lock);
}
+static bool xsk_map_meta_equal(const struct bpf_map *meta0,
+ const struct bpf_map *meta1)
+{
+ return meta0->max_entries == meta1->max_entries &&
+ bpf_map_meta_equal(meta0, meta1);
+}
+
static int xsk_map_btf_id;
const struct bpf_map_ops xsk_map_ops = {
+ .map_meta_equal = xsk_map_meta_equal,
.map_alloc = xsk_map_alloc,
.map_free = xsk_map_free,
.map_get_next_key = xsk_map_get_next_key,
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index f87ee02..4f1ed0e 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -48,6 +48,7 @@
tprogs-y += cpustat
tprogs-y += xdp_adjust_tail
tprogs-y += xdpsock
+tprogs-y += xsk_fwd
tprogs-y += xdp_fwd
tprogs-y += task_fd_query
tprogs-y += xdp_sample_pkts
@@ -71,12 +72,12 @@
tracex5-objs := tracex5_user.o $(TRACE_HELPERS)
tracex6-objs := tracex6_user.o
tracex7-objs := tracex7_user.o
-test_probe_write_user-objs := bpf_load.o test_probe_write_user_user.o
-trace_output-objs := bpf_load.o trace_output_user.o $(TRACE_HELPERS)
-lathist-objs := bpf_load.o lathist_user.o
-offwaketime-objs := bpf_load.o offwaketime_user.o $(TRACE_HELPERS)
-spintest-objs := bpf_load.o spintest_user.o $(TRACE_HELPERS)
-map_perf_test-objs := bpf_load.o map_perf_test_user.o
+test_probe_write_user-objs := test_probe_write_user_user.o
+trace_output-objs := trace_output_user.o $(TRACE_HELPERS)
+lathist-objs := lathist_user.o
+offwaketime-objs := offwaketime_user.o $(TRACE_HELPERS)
+spintest-objs := spintest_user.o $(TRACE_HELPERS)
+map_perf_test-objs := map_perf_test_user.o
test_overhead-objs := bpf_load.o test_overhead_user.o
test_cgrp2_array_pin-objs := test_cgrp2_array_pin.o
test_cgrp2_attach-objs := test_cgrp2_attach.o
@@ -86,7 +87,7 @@
# reuse xdp1 source intentionally
xdp2-objs := xdp1_user.o
xdp_router_ipv4-objs := xdp_router_ipv4_user.o
-test_current_task_under_cgroup-objs := bpf_load.o $(CGROUP_HELPERS) \
+test_current_task_under_cgroup-objs := $(CGROUP_HELPERS) \
test_current_task_under_cgroup_user.o
trace_event-objs := trace_event_user.o $(TRACE_HELPERS)
sampleip-objs := sampleip_user.o $(TRACE_HELPERS)
@@ -100,10 +101,11 @@
xdp_redirect_cpu-objs := bpf_load.o xdp_redirect_cpu_user.o
xdp_monitor-objs := bpf_load.o xdp_monitor_user.o
xdp_rxq_info-objs := xdp_rxq_info_user.o
-syscall_tp-objs := bpf_load.o syscall_tp_user.o
-cpustat-objs := bpf_load.o cpustat_user.o
+syscall_tp-objs := syscall_tp_user.o
+cpustat-objs := cpustat_user.o
xdp_adjust_tail-objs := xdp_adjust_tail_user.o
xdpsock-objs := xdpsock_user.o
+xsk_fwd-objs := xsk_fwd.o
xdp_fwd-objs := xdp_fwd_user.o
task_fd_query-objs := bpf_load.o task_fd_query_user.o $(TRACE_HELPERS)
xdp_sample_pkts-objs := xdp_sample_pkts_user.o $(TRACE_HELPERS)
@@ -203,6 +205,7 @@
TPROGLDLIBS_map_perf_test += -lrt
TPROGLDLIBS_test_overhead += -lrt
TPROGLDLIBS_xdpsock += -pthread
+TPROGLDLIBS_xsk_fwd += -pthread
# Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline:
# make M=samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang
diff --git a/samples/bpf/cpustat_kern.c b/samples/bpf/cpustat_kern.c
index a86a19d..5aefd19 100644
--- a/samples/bpf/cpustat_kern.c
+++ b/samples/bpf/cpustat_kern.c
@@ -51,28 +51,28 @@ static int cpu_opps[] = { 208000, 432000, 729000, 960000, 1200000 };
#define MAP_OFF_PSTATE_IDX 3
#define MAP_OFF_NUM 4
-struct bpf_map_def SEC("maps") my_map = {
- .type = BPF_MAP_TYPE_ARRAY,
- .key_size = sizeof(u32),
- .value_size = sizeof(u64),
- .max_entries = MAX_CPU * MAP_OFF_NUM,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, u32);
+ __type(value, u64);
+ __uint(max_entries, MAX_CPU * MAP_OFF_NUM);
+} my_map SEC(".maps");
/* cstate_duration records duration time for every idle state per CPU */
-struct bpf_map_def SEC("maps") cstate_duration = {
- .type = BPF_MAP_TYPE_ARRAY,
- .key_size = sizeof(u32),
- .value_size = sizeof(u64),
- .max_entries = MAX_CPU * MAX_CSTATE_ENTRIES,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, u32);
+ __type(value, u64);
+ __uint(max_entries, MAX_CPU * MAX_CSTATE_ENTRIES);
+} cstate_duration SEC(".maps");
/* pstate_duration records duration time for every operating point per CPU */
-struct bpf_map_def SEC("maps") pstate_duration = {
- .type = BPF_MAP_TYPE_ARRAY,
- .key_size = sizeof(u32),
- .value_size = sizeof(u64),
- .max_entries = MAX_CPU * MAX_PSTATE_ENTRIES,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, u32);
+ __type(value, u64);
+ __uint(max_entries, MAX_CPU * MAX_PSTATE_ENTRIES);
+} pstate_duration SEC(".maps");
/*
* The trace events for cpu_idle and cpu_frequency are taken from:
diff --git a/samples/bpf/cpustat_user.c b/samples/bpf/cpustat_user.c
index 869a994..9667598 100644
--- a/samples/bpf/cpustat_user.c
+++ b/samples/bpf/cpustat_user.c
@@ -9,7 +9,6 @@
#include <string.h>
#include <unistd.h>
#include <fcntl.h>
-#include <linux/bpf.h>
#include <locale.h>
#include <sys/types.h>
#include <sys/stat.h>
@@ -18,7 +17,9 @@
#include <sys/wait.h>
#include <bpf/bpf.h>
-#include "bpf_load.h"
+#include <bpf/libbpf.h>
+
+static int cstate_map_fd, pstate_map_fd;
#define MAX_CPU 8
#define MAX_PSTATE_ENTRIES 5
@@ -181,21 +182,50 @@ static void int_exit(int sig)
{
cpu_stat_inject_cpu_idle_event();
cpu_stat_inject_cpu_frequency_event();
- cpu_stat_update(map_fd[1], map_fd[2]);
+ cpu_stat_update(cstate_map_fd, pstate_map_fd);
cpu_stat_print();
exit(0);
}
int main(int argc, char **argv)
{
+ struct bpf_link *link = NULL;
+ struct bpf_program *prog;
+ struct bpf_object *obj;
char filename[256];
int ret;
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+ obj = bpf_object__open_file(filename, NULL);
+ if (libbpf_get_error(obj)) {
+ fprintf(stderr, "ERROR: opening BPF object file failed\n");
+ return 0;
+ }
- if (load_bpf_file(filename)) {
- printf("%s", bpf_log_buf);
- return 1;
+ prog = bpf_object__find_program_by_name(obj, "bpf_prog1");
+ if (!prog) {
+ printf("finding a prog in obj file failed\n");
+ goto cleanup;
+ }
+
+ /* load BPF program */
+ if (bpf_object__load(obj)) {
+ fprintf(stderr, "ERROR: loading BPF object file failed\n");
+ goto cleanup;
+ }
+
+ cstate_map_fd = bpf_object__find_map_fd_by_name(obj, "cstate_duration");
+ pstate_map_fd = bpf_object__find_map_fd_by_name(obj, "pstate_duration");
+ if (cstate_map_fd < 0 || pstate_map_fd < 0) {
+ fprintf(stderr, "ERROR: finding a map in obj file failed\n");
+ goto cleanup;
+ }
+
+ link = bpf_program__attach(prog);
+ if (libbpf_get_error(link)) {
+ fprintf(stderr, "ERROR: bpf_program__attach failed\n");
+ link = NULL;
+ goto cleanup;
}
ret = cpu_stat_inject_cpu_idle_event();
@@ -210,10 +240,13 @@ int main(int argc, char **argv)
signal(SIGTERM, int_exit);
while (1) {
- cpu_stat_update(map_fd[1], map_fd[2]);
+ cpu_stat_update(cstate_map_fd, pstate_map_fd);
cpu_stat_print();
sleep(5);
}
+cleanup:
+ bpf_link__destroy(link);
+ bpf_object__close(obj);
return 0;
}
diff --git a/samples/bpf/lathist_kern.c b/samples/bpf/lathist_kern.c
index ca9c2e4..4adfcbb 100644
--- a/samples/bpf/lathist_kern.c
+++ b/samples/bpf/lathist_kern.c
@@ -18,12 +18,12 @@
* trace_preempt_[on|off] tracepoints hooks is not supported.
*/
-struct bpf_map_def SEC("maps") my_map = {
- .type = BPF_MAP_TYPE_ARRAY,
- .key_size = sizeof(int),
- .value_size = sizeof(u64),
- .max_entries = MAX_CPU,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, int);
+ __type(value, u64);
+ __uint(max_entries, MAX_CPU);
+} my_map SEC(".maps");
SEC("kprobe/trace_preempt_off")
int bpf_prog1(struct pt_regs *ctx)
@@ -61,12 +61,12 @@ static unsigned int log2l(unsigned long v)
return log2(v);
}
-struct bpf_map_def SEC("maps") my_lat = {
- .type = BPF_MAP_TYPE_ARRAY,
- .key_size = sizeof(int),
- .value_size = sizeof(long),
- .max_entries = MAX_CPU * MAX_ENTRIES,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, int);
+ __type(value, long);
+ __uint(max_entries, MAX_CPU * MAX_ENTRIES);
+} my_lat SEC(".maps");
SEC("kprobe/trace_preempt_on")
int bpf_prog2(struct pt_regs *ctx)
diff --git a/samples/bpf/lathist_user.c b/samples/bpf/lathist_user.c
index 2ff2839..7d8ff24 100644
--- a/samples/bpf/lathist_user.c
+++ b/samples/bpf/lathist_user.c
@@ -6,9 +6,8 @@
#include <unistd.h>
#include <stdlib.h>
#include <signal.h>
-#include <linux/bpf.h>
+#include <bpf/libbpf.h>
#include <bpf/bpf.h>
-#include "bpf_load.h"
#define MAX_ENTRIES 20
#define MAX_CPU 4
@@ -81,20 +80,51 @@ static void get_data(int fd)
int main(int argc, char **argv)
{
+ struct bpf_link *links[2];
+ struct bpf_program *prog;
+ struct bpf_object *obj;
char filename[256];
+ int map_fd, i = 0;
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+ obj = bpf_object__open_file(filename, NULL);
+ if (libbpf_get_error(obj)) {
+ fprintf(stderr, "ERROR: opening BPF object file failed\n");
+ return 0;
+ }
- if (load_bpf_file(filename)) {
- printf("%s", bpf_log_buf);
- return 1;
+ /* load BPF program */
+ if (bpf_object__load(obj)) {
+ fprintf(stderr, "ERROR: loading BPF object file failed\n");
+ goto cleanup;
+ }
+
+ map_fd = bpf_object__find_map_fd_by_name(obj, "my_lat");
+ if (map_fd < 0) {
+ fprintf(stderr, "ERROR: finding a map in obj file failed\n");
+ goto cleanup;
+ }
+
+ bpf_object__for_each_program(prog, obj) {
+ links[i] = bpf_program__attach(prog);
+ if (libbpf_get_error(links[i])) {
+ fprintf(stderr, "ERROR: bpf_program__attach failed\n");
+ links[i] = NULL;
+ goto cleanup;
+ }
+ i++;
}
while (1) {
- get_data(map_fd[1]);
+ get_data(map_fd);
print_hist();
sleep(5);
}
+cleanup:
+ for (i--; i >= 0; i--)
+ bpf_link__destroy(links[i]);
+
+ bpf_object__close(obj);
return 0;
}
diff --git a/samples/bpf/offwaketime_kern.c b/samples/bpf/offwaketime_kern.c
index e74ee1c..14b7929 100644
--- a/samples/bpf/offwaketime_kern.c
+++ b/samples/bpf/offwaketime_kern.c
@@ -28,38 +28,38 @@ struct key_t {
u32 tret;
};
-struct bpf_map_def SEC("maps") counts = {
- .type = BPF_MAP_TYPE_HASH,
- .key_size = sizeof(struct key_t),
- .value_size = sizeof(u64),
- .max_entries = 10000,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, struct key_t);
+ __type(value, u64);
+ __uint(max_entries, 10000);
+} counts SEC(".maps");
-struct bpf_map_def SEC("maps") start = {
- .type = BPF_MAP_TYPE_HASH,
- .key_size = sizeof(u32),
- .value_size = sizeof(u64),
- .max_entries = 10000,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, u32);
+ __type(value, u64);
+ __uint(max_entries, 10000);
+} start SEC(".maps");
struct wokeby_t {
char name[TASK_COMM_LEN];
u32 ret;
};
-struct bpf_map_def SEC("maps") wokeby = {
- .type = BPF_MAP_TYPE_HASH,
- .key_size = sizeof(u32),
- .value_size = sizeof(struct wokeby_t),
- .max_entries = 10000,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, u32);
+ __type(value, struct wokeby_t);
+ __uint(max_entries, 10000);
+} wokeby SEC(".maps");
-struct bpf_map_def SEC("maps") stackmap = {
- .type = BPF_MAP_TYPE_STACK_TRACE,
- .key_size = sizeof(u32),
- .value_size = PERF_MAX_STACK_DEPTH * sizeof(u64),
- .max_entries = 10000,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_STACK_TRACE);
+ __uint(key_size, sizeof(u32));
+ __uint(value_size, PERF_MAX_STACK_DEPTH * sizeof(u64));
+ __uint(max_entries, 10000);
+} stackmap SEC(".maps");
#define STACKID_FLAGS (0 | BPF_F_FAST_STACK_CMP)
diff --git a/samples/bpf/offwaketime_user.c b/samples/bpf/offwaketime_user.c
index 51c7da5..5734cfd 100644
--- a/samples/bpf/offwaketime_user.c
+++ b/samples/bpf/offwaketime_user.c
@@ -5,19 +5,19 @@
#include <unistd.h>
#include <stdlib.h>
#include <signal.h>
-#include <linux/bpf.h>
-#include <string.h>
#include <linux/perf_event.h>
#include <errno.h>
-#include <assert.h>
#include <stdbool.h>
#include <sys/resource.h>
#include <bpf/libbpf.h>
-#include "bpf_load.h"
+#include <bpf/bpf.h>
#include "trace_helpers.h"
#define PRINT_RAW_ADDR 0
+/* counts, stackmap */
+static int map_fd[2];
+
static void print_ksym(__u64 addr)
{
struct ksym *sym;
@@ -52,14 +52,14 @@ static void print_stack(struct key_t *key, __u64 count)
int i;
printf("%s;", key->target);
- if (bpf_map_lookup_elem(map_fd[3], &key->tret, ip) != 0) {
+ if (bpf_map_lookup_elem(map_fd[1], &key->tret, ip) != 0) {
printf("---;");
} else {
for (i = PERF_MAX_STACK_DEPTH - 1; i >= 0; i--)
print_ksym(ip[i]);
}
printf("-;");
- if (bpf_map_lookup_elem(map_fd[3], &key->wret, ip) != 0) {
+ if (bpf_map_lookup_elem(map_fd[1], &key->wret, ip) != 0) {
printf("---;");
} else {
for (i = 0; i < PERF_MAX_STACK_DEPTH; i++)
@@ -96,23 +96,54 @@ static void int_exit(int sig)
int main(int argc, char **argv)
{
struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
+ struct bpf_object *obj = NULL;
+ struct bpf_link *links[2];
+ struct bpf_program *prog;
+ int delay = 1, i = 0;
char filename[256];
- int delay = 1;
- snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
- setrlimit(RLIMIT_MEMLOCK, &r);
-
- signal(SIGINT, int_exit);
- signal(SIGTERM, int_exit);
+ if (setrlimit(RLIMIT_MEMLOCK, &r)) {
+ perror("setrlimit(RLIMIT_MEMLOCK)");
+ return 1;
+ }
if (load_kallsyms()) {
printf("failed to process /proc/kallsyms\n");
return 2;
}
- if (load_bpf_file(filename)) {
- printf("%s", bpf_log_buf);
- return 1;
+ snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+ obj = bpf_object__open_file(filename, NULL);
+ if (libbpf_get_error(obj)) {
+ fprintf(stderr, "ERROR: opening BPF object file failed\n");
+ obj = NULL;
+ goto cleanup;
+ }
+
+ /* load BPF program */
+ if (bpf_object__load(obj)) {
+ fprintf(stderr, "ERROR: loading BPF object file failed\n");
+ goto cleanup;
+ }
+
+ map_fd[0] = bpf_object__find_map_fd_by_name(obj, "counts");
+ map_fd[1] = bpf_object__find_map_fd_by_name(obj, "stackmap");
+ if (map_fd[0] < 0 || map_fd[1] < 0) {
+ fprintf(stderr, "ERROR: finding a map in obj file failed\n");
+ goto cleanup;
+ }
+
+ signal(SIGINT, int_exit);
+ signal(SIGTERM, int_exit);
+
+ bpf_object__for_each_program(prog, obj) {
+ links[i] = bpf_program__attach(prog);
+ if (libbpf_get_error(links[i])) {
+ fprintf(stderr, "ERROR: bpf_program__attach failed\n");
+ links[i] = NULL;
+ goto cleanup;
+ }
+ i++;
}
if (argc > 1)
@@ -120,5 +151,10 @@ int main(int argc, char **argv)
sleep(delay);
print_stacks(map_fd[0]);
+cleanup:
+ for (i--; i >= 0; i--)
+ bpf_link__destroy(links[i]);
+
+ bpf_object__close(obj);
return 0;
}
diff --git a/samples/bpf/spintest_kern.c b/samples/bpf/spintest_kern.c
index f508af3..455da77 100644
--- a/samples/bpf/spintest_kern.c
+++ b/samples/bpf/spintest_kern.c
@@ -12,25 +12,25 @@
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
-struct bpf_map_def SEC("maps") my_map = {
- .type = BPF_MAP_TYPE_HASH,
- .key_size = sizeof(long),
- .value_size = sizeof(long),
- .max_entries = 1024,
-};
-struct bpf_map_def SEC("maps") my_map2 = {
- .type = BPF_MAP_TYPE_PERCPU_HASH,
- .key_size = sizeof(long),
- .value_size = sizeof(long),
- .max_entries = 1024,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, long);
+ __type(value, long);
+ __uint(max_entries, 1024);
+} my_map SEC(".maps");
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_HASH);
+ __uint(key_size, sizeof(long));
+ __uint(value_size, sizeof(long));
+ __uint(max_entries, 1024);
+} my_map2 SEC(".maps");
-struct bpf_map_def SEC("maps") stackmap = {
- .type = BPF_MAP_TYPE_STACK_TRACE,
- .key_size = sizeof(u32),
- .value_size = PERF_MAX_STACK_DEPTH * sizeof(u64),
- .max_entries = 10000,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_STACK_TRACE);
+ __uint(key_size, sizeof(u32));
+ __uint(value_size, PERF_MAX_STACK_DEPTH * sizeof(u64));
+ __uint(max_entries, 10000);
+} stackmap SEC(".maps");
#define PROG(foo) \
int foo(struct pt_regs *ctx) \
diff --git a/samples/bpf/spintest_user.c b/samples/bpf/spintest_user.c
index fb430ea..847da92 100644
--- a/samples/bpf/spintest_user.c
+++ b/samples/bpf/spintest_user.c
@@ -1,40 +1,77 @@
// SPDX-License-Identifier: GPL-2.0
#include <stdio.h>
#include <unistd.h>
-#include <linux/bpf.h>
#include <string.h>
#include <assert.h>
#include <sys/resource.h>
#include <bpf/libbpf.h>
-#include "bpf_load.h"
+#include <bpf/bpf.h>
#include "trace_helpers.h"
int main(int ac, char **argv)
{
struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
+ char filename[256], symbol[256];
+ struct bpf_object *obj = NULL;
+ struct bpf_link *links[20];
long key, next_key, value;
- char filename[256];
+ struct bpf_program *prog;
+ int map_fd, i, j = 0;
+ const char *title;
struct ksym *sym;
- int i;
- snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
- setrlimit(RLIMIT_MEMLOCK, &r);
+ if (setrlimit(RLIMIT_MEMLOCK, &r)) {
+ perror("setrlimit(RLIMIT_MEMLOCK)");
+ return 1;
+ }
if (load_kallsyms()) {
printf("failed to process /proc/kallsyms\n");
return 2;
}
- if (load_bpf_file(filename)) {
- printf("%s", bpf_log_buf);
- return 1;
+ snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+ obj = bpf_object__open_file(filename, NULL);
+ if (libbpf_get_error(obj)) {
+ fprintf(stderr, "ERROR: opening BPF object file failed\n");
+ obj = NULL;
+ goto cleanup;
+ }
+
+ /* load BPF program */
+ if (bpf_object__load(obj)) {
+ fprintf(stderr, "ERROR: loading BPF object file failed\n");
+ goto cleanup;
+ }
+
+ map_fd = bpf_object__find_map_fd_by_name(obj, "my_map");
+ if (map_fd < 0) {
+ fprintf(stderr, "ERROR: finding a map in obj file failed\n");
+ goto cleanup;
+ }
+
+ bpf_object__for_each_program(prog, obj) {
+ title = bpf_program__title(prog, false);
+ if (sscanf(title, "kprobe/%s", symbol) != 1)
+ continue;
+
+ /* Attach prog only when symbol exists */
+ if (ksym_get_addr(symbol)) {
+ links[j] = bpf_program__attach(prog);
+ if (libbpf_get_error(links[j])) {
+ fprintf(stderr, "bpf_program__attach failed\n");
+ links[j] = NULL;
+ goto cleanup;
+ }
+ j++;
+ }
}
for (i = 0; i < 5; i++) {
key = 0;
printf("kprobing funcs:");
- while (bpf_map_get_next_key(map_fd[0], &key, &next_key) == 0) {
- bpf_map_lookup_elem(map_fd[0], &next_key, &value);
+ while (bpf_map_get_next_key(map_fd, &key, &next_key) == 0) {
+ bpf_map_lookup_elem(map_fd, &next_key, &value);
assert(next_key == value);
sym = ksym_search(value);
key = next_key;
@@ -48,10 +85,15 @@ int main(int ac, char **argv)
if (key)
printf("\n");
key = 0;
- while (bpf_map_get_next_key(map_fd[0], &key, &next_key) == 0)
- bpf_map_delete_elem(map_fd[0], &next_key);
+ while (bpf_map_get_next_key(map_fd, &key, &next_key) == 0)
+ bpf_map_delete_elem(map_fd, &next_key);
sleep(1);
}
+cleanup:
+ for (j--; j >= 0; j--)
+ bpf_link__destroy(links[j]);
+
+ bpf_object__close(obj);
return 0;
}
diff --git a/samples/bpf/syscall_tp_kern.c b/samples/bpf/syscall_tp_kern.c
index 5a62b03..50231c2 100644
--- a/samples/bpf/syscall_tp_kern.c
+++ b/samples/bpf/syscall_tp_kern.c
@@ -18,19 +18,19 @@ struct syscalls_exit_open_args {
long ret;
};
-struct bpf_map_def SEC("maps") enter_open_map = {
- .type = BPF_MAP_TYPE_ARRAY,
- .key_size = sizeof(u32),
- .value_size = sizeof(u32),
- .max_entries = 1,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, u32);
+ __type(value, u32);
+ __uint(max_entries, 1);
+} enter_open_map SEC(".maps");
-struct bpf_map_def SEC("maps") exit_open_map = {
- .type = BPF_MAP_TYPE_ARRAY,
- .key_size = sizeof(u32),
- .value_size = sizeof(u32),
- .max_entries = 1,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, u32);
+ __type(value, u32);
+ __uint(max_entries, 1);
+} exit_open_map SEC(".maps");
static __always_inline void count(void *map)
{
diff --git a/samples/bpf/syscall_tp_user.c b/samples/bpf/syscall_tp_user.c
index 57014ba..76a1d001 100644
--- a/samples/bpf/syscall_tp_user.c
+++ b/samples/bpf/syscall_tp_user.c
@@ -5,16 +5,12 @@
#include <unistd.h>
#include <fcntl.h>
#include <stdlib.h>
-#include <signal.h>
-#include <linux/bpf.h>
#include <string.h>
#include <linux/perf_event.h>
#include <errno.h>
-#include <assert.h>
-#include <stdbool.h>
#include <sys/resource.h>
+#include <bpf/libbpf.h>
#include <bpf/bpf.h>
-#include "bpf_load.h"
/* This program verifies bpf attachment to tracepoint sys_enter_* and sys_exit_*.
* This requires kernel CONFIG_FTRACE_SYSCALLS to be set.
@@ -49,16 +45,44 @@ static void verify_map(int map_id)
static int test(char *filename, int num_progs)
{
- int i, fd, map0_fds[num_progs], map1_fds[num_progs];
+ int map0_fds[num_progs], map1_fds[num_progs], fd, i, j = 0;
+ struct bpf_link *links[num_progs * 4];
+ struct bpf_object *objs[num_progs];
+ struct bpf_program *prog;
for (i = 0; i < num_progs; i++) {
- if (load_bpf_file(filename)) {
- fprintf(stderr, "%s", bpf_log_buf);
- return 1;
+ objs[i] = bpf_object__open_file(filename, NULL);
+ if (libbpf_get_error(objs[i])) {
+ fprintf(stderr, "opening BPF object file failed\n");
+ objs[i] = NULL;
+ goto cleanup;
}
- printf("prog #%d: map ids %d %d\n", i, map_fd[0], map_fd[1]);
- map0_fds[i] = map_fd[0];
- map1_fds[i] = map_fd[1];
+
+ /* load BPF program */
+ if (bpf_object__load(objs[i])) {
+ fprintf(stderr, "loading BPF object file failed\n");
+ goto cleanup;
+ }
+
+ map0_fds[i] = bpf_object__find_map_fd_by_name(objs[i],
+ "enter_open_map");
+ map1_fds[i] = bpf_object__find_map_fd_by_name(objs[i],
+ "exit_open_map");
+ if (map0_fds[i] < 0 || map1_fds[i] < 0) {
+ fprintf(stderr, "finding a map in obj file failed\n");
+ goto cleanup;
+ }
+
+ bpf_object__for_each_program(prog, objs[i]) {
+ links[j] = bpf_program__attach(prog);
+ if (libbpf_get_error(links[j])) {
+ fprintf(stderr, "bpf_program__attach failed\n");
+ links[j] = NULL;
+ goto cleanup;
+ }
+ j++;
+ }
+ printf("prog #%d: map ids %d %d\n", i, map0_fds[i], map1_fds[i]);
}
/* current load_bpf_file has perf_event_open default pid = -1
@@ -80,6 +104,12 @@ static int test(char *filename, int num_progs)
verify_map(map1_fds[i]);
}
+cleanup:
+ for (j--; j >= 0; j--)
+ bpf_link__destroy(links[j]);
+
+ for (i--; i >= 0; i--)
+ bpf_object__close(objs[i]);
return 0;
}
diff --git a/samples/bpf/task_fd_query_kern.c b/samples/bpf/task_fd_query_kern.c
index 278ade5..c821294 100644
--- a/samples/bpf/task_fd_query_kern.c
+++ b/samples/bpf/task_fd_query_kern.c
@@ -10,7 +10,7 @@ int bpf_prog1(struct pt_regs *ctx)
return 0;
}
-SEC("kretprobe/blk_account_io_completion")
+SEC("kretprobe/blk_account_io_done")
int bpf_prog2(struct pt_regs *ctx)
{
return 0;
diff --git a/samples/bpf/task_fd_query_user.c b/samples/bpf/task_fd_query_user.c
index ff2e9c1..4a74531 100644
--- a/samples/bpf/task_fd_query_user.c
+++ b/samples/bpf/task_fd_query_user.c
@@ -314,7 +314,7 @@ int main(int argc, char **argv)
/* test two functions in the corresponding *_kern.c file */
CHECK_AND_RET(test_debug_fs_kprobe(0, "blk_mq_start_request",
BPF_FD_TYPE_KPROBE));
- CHECK_AND_RET(test_debug_fs_kprobe(1, "blk_account_io_completion",
+ CHECK_AND_RET(test_debug_fs_kprobe(1, "blk_account_io_done",
BPF_FD_TYPE_KRETPROBE));
/* test nondebug fs kprobe */
diff --git a/samples/bpf/test_current_task_under_cgroup_kern.c b/samples/bpf/test_current_task_under_cgroup_kern.c
index 6dc4f41..fbd43e2 100644
--- a/samples/bpf/test_current_task_under_cgroup_kern.c
+++ b/samples/bpf/test_current_task_under_cgroup_kern.c
@@ -10,23 +10,24 @@
#include <linux/version.h>
#include <bpf/bpf_helpers.h>
#include <uapi/linux/utsname.h>
+#include "trace_common.h"
-struct bpf_map_def SEC("maps") cgroup_map = {
- .type = BPF_MAP_TYPE_CGROUP_ARRAY,
- .key_size = sizeof(u32),
- .value_size = sizeof(u32),
- .max_entries = 1,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_CGROUP_ARRAY);
+ __uint(key_size, sizeof(u32));
+ __uint(value_size, sizeof(u32));
+ __uint(max_entries, 1);
+} cgroup_map SEC(".maps");
-struct bpf_map_def SEC("maps") perf_map = {
- .type = BPF_MAP_TYPE_ARRAY,
- .key_size = sizeof(u32),
- .value_size = sizeof(u64),
- .max_entries = 1,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, u32);
+ __type(value, u64);
+ __uint(max_entries, 1);
+} perf_map SEC(".maps");
/* Writes the last PID that called sync to a map at index 0 */
-SEC("kprobe/sys_sync")
+SEC("kprobe/" SYSCALL(sys_sync))
int bpf_prog1(struct pt_regs *ctx)
{
u64 pid = bpf_get_current_pid_tgid();
diff --git a/samples/bpf/test_current_task_under_cgroup_user.c b/samples/bpf/test_current_task_under_cgroup_user.c
index 06e9f8c..ac251a4 100644
--- a/samples/bpf/test_current_task_under_cgroup_user.c
+++ b/samples/bpf/test_current_task_under_cgroup_user.c
@@ -4,10 +4,9 @@
#define _GNU_SOURCE
#include <stdio.h>
-#include <linux/bpf.h>
#include <unistd.h>
#include <bpf/bpf.h>
-#include "bpf_load.h"
+#include <bpf/libbpf.h>
#include "cgroup_helpers.h"
#define CGROUP_PATH "/my-cgroup"
@@ -15,13 +14,44 @@
int main(int argc, char **argv)
{
pid_t remote_pid, local_pid = getpid();
- int cg2, idx = 0, rc = 0;
+ struct bpf_link *link = NULL;
+ struct bpf_program *prog;
+ int cg2, idx = 0, rc = 1;
+ struct bpf_object *obj;
char filename[256];
+ int map_fd[2];
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
- if (load_bpf_file(filename)) {
- printf("%s", bpf_log_buf);
- return 1;
+ obj = bpf_object__open_file(filename, NULL);
+ if (libbpf_get_error(obj)) {
+ fprintf(stderr, "ERROR: opening BPF object file failed\n");
+ return 0;
+ }
+
+ prog = bpf_object__find_program_by_name(obj, "bpf_prog1");
+ if (!prog) {
+ printf("finding a prog in obj file failed\n");
+ goto cleanup;
+ }
+
+ /* load BPF program */
+ if (bpf_object__load(obj)) {
+ fprintf(stderr, "ERROR: loading BPF object file failed\n");
+ goto cleanup;
+ }
+
+ map_fd[0] = bpf_object__find_map_fd_by_name(obj, "cgroup_map");
+ map_fd[1] = bpf_object__find_map_fd_by_name(obj, "perf_map");
+ if (map_fd[0] < 0 || map_fd[1] < 0) {
+ fprintf(stderr, "ERROR: finding a map in obj file failed\n");
+ goto cleanup;
+ }
+
+ link = bpf_program__attach(prog);
+ if (libbpf_get_error(link)) {
+ fprintf(stderr, "ERROR: bpf_program__attach failed\n");
+ link = NULL;
+ goto cleanup;
}
if (setup_cgroup_environment())
@@ -70,12 +100,14 @@ int main(int argc, char **argv)
goto err;
}
- goto out;
-err:
- rc = 1;
+ rc = 0;
-out:
+err:
close(cg2);
cleanup_cgroup_environment();
+
+cleanup:
+ bpf_link__destroy(link);
+ bpf_object__close(obj);
return rc;
}
diff --git a/samples/bpf/test_probe_write_user_kern.c b/samples/bpf/test_probe_write_user_kern.c
index fd651a6..220a964 100644
--- a/samples/bpf/test_probe_write_user_kern.c
+++ b/samples/bpf/test_probe_write_user_kern.c
@@ -13,12 +13,12 @@
#include <bpf/bpf_core_read.h>
#include "trace_common.h"
-struct bpf_map_def SEC("maps") dnat_map = {
- .type = BPF_MAP_TYPE_HASH,
- .key_size = sizeof(struct sockaddr_in),
- .value_size = sizeof(struct sockaddr_in),
- .max_entries = 256,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, struct sockaddr_in);
+ __type(value, struct sockaddr_in);
+ __uint(max_entries, 256);
+} dnat_map SEC(".maps");
/* kprobe is NOT a stable ABI
* kernel functions can be removed, renamed or completely change semantics.
diff --git a/samples/bpf/test_probe_write_user_user.c b/samples/bpf/test_probe_write_user_user.c
index 045eb5e..00ccfb8 100644
--- a/samples/bpf/test_probe_write_user_user.c
+++ b/samples/bpf/test_probe_write_user_user.c
@@ -1,21 +1,22 @@
// SPDX-License-Identifier: GPL-2.0
#include <stdio.h>
#include <assert.h>
-#include <linux/bpf.h>
#include <unistd.h>
#include <bpf/bpf.h>
-#include "bpf_load.h"
+#include <bpf/libbpf.h>
#include <sys/socket.h>
-#include <string.h>
#include <netinet/in.h>
#include <arpa/inet.h>
int main(int ac, char **argv)
{
- int serverfd, serverconnfd, clientfd;
- socklen_t sockaddr_len;
- struct sockaddr serv_addr, mapped_addr, tmp_addr;
struct sockaddr_in *serv_addr_in, *mapped_addr_in, *tmp_addr_in;
+ struct sockaddr serv_addr, mapped_addr, tmp_addr;
+ int serverfd, serverconnfd, clientfd, map_fd;
+ struct bpf_link *link = NULL;
+ struct bpf_program *prog;
+ struct bpf_object *obj;
+ socklen_t sockaddr_len;
char filename[256];
char *ip;
@@ -24,10 +25,35 @@ int main(int ac, char **argv)
tmp_addr_in = (struct sockaddr_in *)&tmp_addr;
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+ obj = bpf_object__open_file(filename, NULL);
+ if (libbpf_get_error(obj)) {
+ fprintf(stderr, "ERROR: opening BPF object file failed\n");
+ return 0;
+ }
- if (load_bpf_file(filename)) {
- printf("%s", bpf_log_buf);
- return 1;
+ prog = bpf_object__find_program_by_name(obj, "bpf_prog1");
+ if (libbpf_get_error(prog)) {
+ fprintf(stderr, "ERROR: finding a prog in obj file failed\n");
+ goto cleanup;
+ }
+
+ /* load BPF program */
+ if (bpf_object__load(obj)) {
+ fprintf(stderr, "ERROR: loading BPF object file failed\n");
+ goto cleanup;
+ }
+
+ map_fd = bpf_object__find_map_fd_by_name(obj, "dnat_map");
+ if (map_fd < 0) {
+ fprintf(stderr, "ERROR: finding a map in obj file failed\n");
+ goto cleanup;
+ }
+
+ link = bpf_program__attach(prog);
+ if (libbpf_get_error(link)) {
+ fprintf(stderr, "ERROR: bpf_program__attach failed\n");
+ link = NULL;
+ goto cleanup;
}
assert((serverfd = socket(AF_INET, SOCK_STREAM, 0)) > 0);
@@ -51,7 +77,7 @@ int main(int ac, char **argv)
mapped_addr_in->sin_port = htons(5555);
mapped_addr_in->sin_addr.s_addr = inet_addr("255.255.255.255");
- assert(!bpf_map_update_elem(map_fd[0], &mapped_addr, &serv_addr, BPF_ANY));
+ assert(!bpf_map_update_elem(map_fd, &mapped_addr, &serv_addr, BPF_ANY));
assert(listen(serverfd, 5) == 0);
@@ -75,5 +101,8 @@ int main(int ac, char **argv)
/* Is the server's getsockname = the socket getpeername */
assert(memcmp(&serv_addr, &tmp_addr, sizeof(struct sockaddr_in)) == 0);
+cleanup:
+ bpf_link__destroy(link);
+ bpf_object__close(obj);
return 0;
}
diff --git a/samples/bpf/trace_output_kern.c b/samples/bpf/trace_output_kern.c
index 1d7d422..b64815a 100644
--- a/samples/bpf/trace_output_kern.c
+++ b/samples/bpf/trace_output_kern.c
@@ -2,15 +2,16 @@
#include <linux/version.h>
#include <uapi/linux/bpf.h>
#include <bpf/bpf_helpers.h>
+#include "trace_common.h"
-struct bpf_map_def SEC("maps") my_map = {
- .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,
- .key_size = sizeof(int),
- .value_size = sizeof(u32),
- .max_entries = 2,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
+ __uint(key_size, sizeof(int));
+ __uint(value_size, sizeof(u32));
+ __uint(max_entries, 2);
+} my_map SEC(".maps");
-SEC("kprobe/sys_write")
+SEC("kprobe/" SYSCALL(sys_write))
int bpf_prog1(struct pt_regs *ctx)
{
struct S {
diff --git a/samples/bpf/trace_output_user.c b/samples/bpf/trace_output_user.c
index 60a17dd..364b987 100644
--- a/samples/bpf/trace_output_user.c
+++ b/samples/bpf/trace_output_user.c
@@ -1,23 +1,10 @@
// SPDX-License-Identifier: GPL-2.0-only
#include <stdio.h>
-#include <unistd.h>
-#include <stdlib.h>
-#include <stdbool.h>
-#include <string.h>
#include <fcntl.h>
#include <poll.h>
-#include <linux/perf_event.h>
-#include <linux/bpf.h>
-#include <errno.h>
-#include <assert.h>
-#include <sys/syscall.h>
-#include <sys/ioctl.h>
-#include <sys/mman.h>
#include <time.h>
#include <signal.h>
#include <bpf/libbpf.h>
-#include "bpf_load.h"
-#include "perf-sys.h"
static __u64 time_get_ns(void)
{
@@ -57,20 +44,48 @@ static void print_bpf_output(void *ctx, int cpu, void *data, __u32 size)
int main(int argc, char **argv)
{
struct perf_buffer_opts pb_opts = {};
+ struct bpf_link *link = NULL;
+ struct bpf_program *prog;
struct perf_buffer *pb;
+ struct bpf_object *obj;
+ int map_fd, ret = 0;
char filename[256];
FILE *f;
- int ret;
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+ obj = bpf_object__open_file(filename, NULL);
+ if (libbpf_get_error(obj)) {
+ fprintf(stderr, "ERROR: opening BPF object file failed\n");
+ return 0;
+ }
- if (load_bpf_file(filename)) {
- printf("%s", bpf_log_buf);
- return 1;
+ /* load BPF program */
+ if (bpf_object__load(obj)) {
+ fprintf(stderr, "ERROR: loading BPF object file failed\n");
+ goto cleanup;
+ }
+
+ map_fd = bpf_object__find_map_fd_by_name(obj, "my_map");
+ if (map_fd < 0) {
+ fprintf(stderr, "ERROR: finding a map in obj file failed\n");
+ goto cleanup;
+ }
+
+ prog = bpf_object__find_program_by_name(obj, "bpf_prog1");
+ if (libbpf_get_error(prog)) {
+ fprintf(stderr, "ERROR: finding a prog in obj file failed\n");
+ goto cleanup;
+ }
+
+ link = bpf_program__attach(prog);
+ if (libbpf_get_error(link)) {
+ fprintf(stderr, "ERROR: bpf_program__attach failed\n");
+ link = NULL;
+ goto cleanup;
}
pb_opts.sample_cb = print_bpf_output;
- pb = perf_buffer__new(map_fd[0], 8, &pb_opts);
+ pb = perf_buffer__new(map_fd, 8, &pb_opts);
ret = libbpf_get_error(pb);
if (ret) {
printf("failed to setup perf_buffer: %d\n", ret);
@@ -84,5 +99,9 @@ int main(int argc, char **argv)
while ((ret = perf_buffer__poll(pb, 1000)) >= 0 && cnt < MAX_CNT) {
}
kill(0, SIGINT);
+
+cleanup:
+ bpf_link__destroy(link);
+ bpf_object__close(obj);
return ret;
}
diff --git a/samples/bpf/tracex3_kern.c b/samples/bpf/tracex3_kern.c
index 659613c..710a441 100644
--- a/samples/bpf/tracex3_kern.c
+++ b/samples/bpf/tracex3_kern.c
@@ -49,7 +49,7 @@ struct {
__uint(max_entries, SLOTS);
} lat_map SEC(".maps");
-SEC("kprobe/blk_account_io_completion")
+SEC("kprobe/blk_account_io_done")
int bpf_prog2(struct pt_regs *ctx)
{
long rq = PT_REGS_PARM1(ctx);
diff --git a/samples/bpf/xdpsock_user.c b/samples/bpf/xdpsock_user.c
index 19c6794..4cead34 100644
--- a/samples/bpf/xdpsock_user.c
+++ b/samples/bpf/xdpsock_user.c
@@ -613,7 +613,16 @@ static struct xsk_umem_info *xsk_configure_umem(void *buffer, u64 size)
{
struct xsk_umem_info *umem;
struct xsk_umem_config cfg = {
- .fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
+ /* We recommend that you set the fill ring size >= HW RX ring size +
+ * AF_XDP RX ring size. Make sure you fill up the fill ring
+ * with buffers at regular intervals, and you will with this setting
+ * avoid allocation failures in the driver. These are usually quite
+ * expensive since drivers have not been written to assume that
+ * allocation failures are common. For regular sockets, kernel
+ * allocated memory is used that only runs out in OOM situations
+ * that should be rare.
+ */
+ .fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS * 2,
.comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS,
.frame_size = opt_xsk_frame_size,
.frame_headroom = XSK_UMEM__DEFAULT_FRAME_HEADROOM,
@@ -640,13 +649,13 @@ static void xsk_populate_fill_ring(struct xsk_umem_info *umem)
u32 idx;
ret = xsk_ring_prod__reserve(&umem->fq,
- XSK_RING_PROD__DEFAULT_NUM_DESCS, &idx);
- if (ret != XSK_RING_PROD__DEFAULT_NUM_DESCS)
+ XSK_RING_PROD__DEFAULT_NUM_DESCS * 2, &idx);
+ if (ret != XSK_RING_PROD__DEFAULT_NUM_DESCS * 2)
exit_with_error(-ret);
- for (i = 0; i < XSK_RING_PROD__DEFAULT_NUM_DESCS; i++)
+ for (i = 0; i < XSK_RING_PROD__DEFAULT_NUM_DESCS * 2; i++)
*xsk_ring_prod__fill_addr(&umem->fq, idx++) =
i * opt_xsk_frame_size;
- xsk_ring_prod__submit(&umem->fq, XSK_RING_PROD__DEFAULT_NUM_DESCS);
+ xsk_ring_prod__submit(&umem->fq, XSK_RING_PROD__DEFAULT_NUM_DESCS * 2);
}
static struct xsk_socket_info *xsk_configure_socket(struct xsk_umem_info *umem,
@@ -888,9 +897,6 @@ static inline void complete_tx_l2fwd(struct xsk_socket_info *xsk,
if (!xsk->outstanding_tx)
return;
- if (!opt_need_wakeup || xsk_ring_prod__needs_wakeup(&xsk->tx))
- kick_tx(xsk);
-
ndescs = (xsk->outstanding_tx > opt_batch_size) ? opt_batch_size :
xsk->outstanding_tx;
@@ -1004,7 +1010,7 @@ static void rx_drop_all(void)
}
}
-static void tx_only(struct xsk_socket_info *xsk, u32 frame_nb, int batch_size)
+static void tx_only(struct xsk_socket_info *xsk, u32 *frame_nb, int batch_size)
{
u32 idx;
unsigned int i;
@@ -1017,14 +1023,14 @@ static void tx_only(struct xsk_socket_info *xsk, u32 frame_nb, int batch_size)
for (i = 0; i < batch_size; i++) {
struct xdp_desc *tx_desc = xsk_ring_prod__tx_desc(&xsk->tx,
idx + i);
- tx_desc->addr = (frame_nb + i) << XSK_UMEM__DEFAULT_FRAME_SHIFT;
+ tx_desc->addr = (*frame_nb + i) << XSK_UMEM__DEFAULT_FRAME_SHIFT;
tx_desc->len = PKT_SIZE;
}
xsk_ring_prod__submit(&xsk->tx, batch_size);
xsk->outstanding_tx += batch_size;
- frame_nb += batch_size;
- frame_nb %= NUM_FRAMES;
+ *frame_nb += batch_size;
+ *frame_nb %= NUM_FRAMES;
complete_tx_only(xsk, batch_size);
}
@@ -1080,7 +1086,7 @@ static void tx_only_all(void)
}
for (i = 0; i < num_socks; i++)
- tx_only(xsks[i], frame_nb[i], batch_size);
+ tx_only(xsks[i], &frame_nb[i], batch_size);
pkt_cnt += batch_size;
diff --git a/samples/bpf/xsk_fwd.c b/samples/bpf/xsk_fwd.c
new file mode 100644
index 0000000..1cd97c8
--- /dev/null
+++ b/samples/bpf/xsk_fwd.c
@@ -0,0 +1,1085 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2020 Intel Corporation. */
+
+#define _GNU_SOURCE
+#include <poll.h>
+#include <pthread.h>
+#include <signal.h>
+#include <sched.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/resource.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <time.h>
+#include <unistd.h>
+#include <getopt.h>
+#include <netinet/ether.h>
+#include <net/if.h>
+
+#include <linux/bpf.h>
+#include <linux/if_link.h>
+#include <linux/if_xdp.h>
+
+#include <bpf/libbpf.h>
+#include <bpf/xsk.h>
+#include <bpf/bpf.h>
+
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+
+typedef __u64 u64;
+typedef __u32 u32;
+typedef __u16 u16;
+typedef __u8 u8;
+
+/* This program illustrates the packet forwarding between multiple AF_XDP
+ * sockets in multi-threaded environment. All threads are sharing a common
+ * buffer pool, with each socket having its own private buffer cache.
+ *
+ * Example 1: Single thread handling two sockets. The packets received by socket
+ * A (interface IFA, queue QA) are forwarded to socket B (interface IFB, queue
+ * QB), while the packets received by socket B are forwarded to socket A. The
+ * thread is running on CPU core X:
+ *
+ * ./xsk_fwd -i IFA -q QA -i IFB -q QB -c X
+ *
+ * Example 2: Two threads, each handling two sockets. The thread running on CPU
+ * core X forwards all the packets received by socket A to socket B, and all the
+ * packets received by socket B to socket A. The thread running on CPU core Y is
+ * performing the same packet forwarding between sockets C and D:
+ *
+ * ./xsk_fwd -i IFA -q QA -i IFB -q QB -i IFC -q QC -i IFD -q QD
+ * -c CX -c CY
+ */
+
+/*
+ * Buffer pool and buffer cache
+ *
+ * For packet forwarding, the packet buffers are typically allocated from the
+ * pool for packet reception and freed back to the pool for further reuse once
+ * the packet transmission is completed.
+ *
+ * The buffer pool is shared between multiple threads. In order to minimize the
+ * access latency to the shared buffer pool, each thread creates one (or
+ * several) buffer caches, which, unlike the buffer pool, are private to the
+ * thread that creates them and therefore cannot be shared with other threads.
+ * The access to the shared pool is only needed either (A) when the cache gets
+ * empty due to repeated buffer allocations and it needs to be replenished from
+ * the pool, or (B) when the cache gets full due to repeated buffer free and it
+ * needs to be flushed back to the pull.
+ *
+ * In a packet forwarding system, a packet received on any input port can
+ * potentially be transmitted on any output port, depending on the forwarding
+ * configuration. For AF_XDP sockets, for this to work with zero-copy of the
+ * packet buffers when, it is required that the buffer pool memory fits into the
+ * UMEM area shared by all the sockets.
+ */
+
+struct bpool_params {
+ u32 n_buffers;
+ u32 buffer_size;
+ int mmap_flags;
+
+ u32 n_users_max;
+ u32 n_buffers_per_slab;
+};
+
+/* This buffer pool implementation organizes the buffers into equally sized
+ * slabs of *n_buffers_per_slab*. Initially, there are *n_slabs* slabs in the
+ * pool that are completely filled with buffer pointers (full slabs).
+ *
+ * Each buffer cache has a slab for buffer allocation and a slab for buffer
+ * free, with both of these slabs initially empty. When the cache's allocation
+ * slab goes empty, it is swapped with one of the available full slabs from the
+ * pool, if any is available. When the cache's free slab goes full, it is
+ * swapped for one of the empty slabs from the pool, which is guaranteed to
+ * succeed.
+ *
+ * Partially filled slabs never get traded between the cache and the pool
+ * (except when the cache itself is destroyed), which enables fast operation
+ * through pointer swapping.
+ */
+struct bpool {
+ struct bpool_params params;
+ pthread_mutex_t lock;
+ void *addr;
+
+ u64 **slabs;
+ u64 **slabs_reserved;
+ u64 *buffers;
+ u64 *buffers_reserved;
+
+ u64 n_slabs;
+ u64 n_slabs_reserved;
+ u64 n_buffers;
+
+ u64 n_slabs_available;
+ u64 n_slabs_reserved_available;
+
+ struct xsk_umem_config umem_cfg;
+ struct xsk_ring_prod umem_fq;
+ struct xsk_ring_cons umem_cq;
+ struct xsk_umem *umem;
+};
+
+static struct bpool *
+bpool_init(struct bpool_params *params,
+ struct xsk_umem_config *umem_cfg)
+{
+ struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
+ u64 n_slabs, n_slabs_reserved, n_buffers, n_buffers_reserved;
+ u64 slabs_size, slabs_reserved_size;
+ u64 buffers_size, buffers_reserved_size;
+ u64 total_size, i;
+ struct bpool *bp;
+ u8 *p;
+ int status;
+
+ /* mmap prep. */
+ if (setrlimit(RLIMIT_MEMLOCK, &r))
+ return NULL;
+
+ /* bpool internals dimensioning. */
+ n_slabs = (params->n_buffers + params->n_buffers_per_slab - 1) /
+ params->n_buffers_per_slab;
+ n_slabs_reserved = params->n_users_max * 2;
+ n_buffers = n_slabs * params->n_buffers_per_slab;
+ n_buffers_reserved = n_slabs_reserved * params->n_buffers_per_slab;
+
+ slabs_size = n_slabs * sizeof(u64 *);
+ slabs_reserved_size = n_slabs_reserved * sizeof(u64 *);
+ buffers_size = n_buffers * sizeof(u64);
+ buffers_reserved_size = n_buffers_reserved * sizeof(u64);
+
+ total_size = sizeof(struct bpool) +
+ slabs_size + slabs_reserved_size +
+ buffers_size + buffers_reserved_size;
+
+ /* bpool memory allocation. */
+ p = calloc(total_size, sizeof(u8));
+ if (!p)
+ return NULL;
+
+ /* bpool memory initialization. */
+ bp = (struct bpool *)p;
+ memcpy(&bp->params, params, sizeof(*params));
+ bp->params.n_buffers = n_buffers;
+
+ bp->slabs = (u64 **)&p[sizeof(struct bpool)];
+ bp->slabs_reserved = (u64 **)&p[sizeof(struct bpool) +
+ slabs_size];
+ bp->buffers = (u64 *)&p[sizeof(struct bpool) +
+ slabs_size + slabs_reserved_size];
+ bp->buffers_reserved = (u64 *)&p[sizeof(struct bpool) +
+ slabs_size + slabs_reserved_size + buffers_size];
+
+ bp->n_slabs = n_slabs;
+ bp->n_slabs_reserved = n_slabs_reserved;
+ bp->n_buffers = n_buffers;
+
+ for (i = 0; i < n_slabs; i++)
+ bp->slabs[i] = &bp->buffers[i * params->n_buffers_per_slab];
+ bp->n_slabs_available = n_slabs;
+
+ for (i = 0; i < n_slabs_reserved; i++)
+ bp->slabs_reserved[i] = &bp->buffers_reserved[i *
+ params->n_buffers_per_slab];
+ bp->n_slabs_reserved_available = n_slabs_reserved;
+
+ for (i = 0; i < n_buffers; i++)
+ bp->buffers[i] = i * params->buffer_size;
+
+ /* lock. */
+ status = pthread_mutex_init(&bp->lock, NULL);
+ if (status) {
+ free(p);
+ return NULL;
+ }
+
+ /* mmap. */
+ bp->addr = mmap(NULL,
+ n_buffers * params->buffer_size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS | params->mmap_flags,
+ -1,
+ 0);
+ if (bp->addr == MAP_FAILED) {
+ pthread_mutex_destroy(&bp->lock);
+ free(p);
+ return NULL;
+ }
+
+ /* umem. */
+ status = xsk_umem__create(&bp->umem,
+ bp->addr,
+ bp->params.n_buffers * bp->params.buffer_size,
+ &bp->umem_fq,
+ &bp->umem_cq,
+ umem_cfg);
+ if (status) {
+ munmap(bp->addr, bp->params.n_buffers * bp->params.buffer_size);
+ pthread_mutex_destroy(&bp->lock);
+ free(p);
+ return NULL;
+ }
+ memcpy(&bp->umem_cfg, umem_cfg, sizeof(*umem_cfg));
+
+ return bp;
+}
+
+static void
+bpool_free(struct bpool *bp)
+{
+ if (!bp)
+ return;
+
+ xsk_umem__delete(bp->umem);
+ munmap(bp->addr, bp->params.n_buffers * bp->params.buffer_size);
+ pthread_mutex_destroy(&bp->lock);
+ free(bp);
+}
+
+struct bcache {
+ struct bpool *bp;
+
+ u64 *slab_cons;
+ u64 *slab_prod;
+
+ u64 n_buffers_cons;
+ u64 n_buffers_prod;
+};
+
+static u32
+bcache_slab_size(struct bcache *bc)
+{
+ struct bpool *bp = bc->bp;
+
+ return bp->params.n_buffers_per_slab;
+}
+
+static struct bcache *
+bcache_init(struct bpool *bp)
+{
+ struct bcache *bc;
+
+ bc = calloc(1, sizeof(struct bcache));
+ if (!bc)
+ return NULL;
+
+ bc->bp = bp;
+ bc->n_buffers_cons = 0;
+ bc->n_buffers_prod = 0;
+
+ pthread_mutex_lock(&bp->lock);
+ if (bp->n_slabs_reserved_available == 0) {
+ pthread_mutex_unlock(&bp->lock);
+ free(bc);
+ return NULL;
+ }
+
+ bc->slab_cons = bp->slabs_reserved[bp->n_slabs_reserved_available - 1];
+ bc->slab_prod = bp->slabs_reserved[bp->n_slabs_reserved_available - 2];
+ bp->n_slabs_reserved_available -= 2;
+ pthread_mutex_unlock(&bp->lock);
+
+ return bc;
+}
+
+static void
+bcache_free(struct bcache *bc)
+{
+ struct bpool *bp;
+
+ if (!bc)
+ return;
+
+ /* In order to keep this example simple, the case of freeing any
+ * existing buffers from the cache back to the pool is ignored.
+ */
+
+ bp = bc->bp;
+ pthread_mutex_lock(&bp->lock);
+ bp->slabs_reserved[bp->n_slabs_reserved_available] = bc->slab_prod;
+ bp->slabs_reserved[bp->n_slabs_reserved_available + 1] = bc->slab_cons;
+ bp->n_slabs_reserved_available += 2;
+ pthread_mutex_unlock(&bp->lock);
+
+ free(bc);
+}
+
+/* To work correctly, the implementation requires that the *n_buffers* input
+ * argument is never greater than the buffer pool's *n_buffers_per_slab*. This
+ * is typically the case, with one exception taking place when large number of
+ * buffers are allocated at init time (e.g. for the UMEM fill queue setup).
+ */
+static inline u32
+bcache_cons_check(struct bcache *bc, u32 n_buffers)
+{
+ struct bpool *bp = bc->bp;
+ u64 n_buffers_per_slab = bp->params.n_buffers_per_slab;
+ u64 n_buffers_cons = bc->n_buffers_cons;
+ u64 n_slabs_available;
+ u64 *slab_full;
+
+ /*
+ * Consumer slab is not empty: Use what's available locally. Do not
+ * look for more buffers from the pool when the ask can only be
+ * partially satisfied.
+ */
+ if (n_buffers_cons)
+ return (n_buffers_cons < n_buffers) ?
+ n_buffers_cons :
+ n_buffers;
+
+ /*
+ * Consumer slab is empty: look to trade the current consumer slab
+ * (full) for a full slab from the pool, if any is available.
+ */
+ pthread_mutex_lock(&bp->lock);
+ n_slabs_available = bp->n_slabs_available;
+ if (!n_slabs_available) {
+ pthread_mutex_unlock(&bp->lock);
+ return 0;
+ }
+
+ n_slabs_available--;
+ slab_full = bp->slabs[n_slabs_available];
+ bp->slabs[n_slabs_available] = bc->slab_cons;
+ bp->n_slabs_available = n_slabs_available;
+ pthread_mutex_unlock(&bp->lock);
+
+ bc->slab_cons = slab_full;
+ bc->n_buffers_cons = n_buffers_per_slab;
+ return n_buffers;
+}
+
+static inline u64
+bcache_cons(struct bcache *bc)
+{
+ u64 n_buffers_cons = bc->n_buffers_cons - 1;
+ u64 buffer;
+
+ buffer = bc->slab_cons[n_buffers_cons];
+ bc->n_buffers_cons = n_buffers_cons;
+ return buffer;
+}
+
+static inline void
+bcache_prod(struct bcache *bc, u64 buffer)
+{
+ struct bpool *bp = bc->bp;
+ u64 n_buffers_per_slab = bp->params.n_buffers_per_slab;
+ u64 n_buffers_prod = bc->n_buffers_prod;
+ u64 n_slabs_available;
+ u64 *slab_empty;
+
+ /*
+ * Producer slab is not yet full: store the current buffer to it.
+ */
+ if (n_buffers_prod < n_buffers_per_slab) {
+ bc->slab_prod[n_buffers_prod] = buffer;
+ bc->n_buffers_prod = n_buffers_prod + 1;
+ return;
+ }
+
+ /*
+ * Producer slab is full: trade the cache's current producer slab
+ * (full) for an empty slab from the pool, then store the current
+ * buffer to the new producer slab. As one full slab exists in the
+ * cache, it is guaranteed that there is at least one empty slab
+ * available in the pool.
+ */
+ pthread_mutex_lock(&bp->lock);
+ n_slabs_available = bp->n_slabs_available;
+ slab_empty = bp->slabs[n_slabs_available];
+ bp->slabs[n_slabs_available] = bc->slab_prod;
+ bp->n_slabs_available = n_slabs_available + 1;
+ pthread_mutex_unlock(&bp->lock);
+
+ slab_empty[0] = buffer;
+ bc->slab_prod = slab_empty;
+ bc->n_buffers_prod = 1;
+}
+
+/*
+ * Port
+ *
+ * Each of the forwarding ports sits on top of an AF_XDP socket. In order for
+ * packet forwarding to happen with no packet buffer copy, all the sockets need
+ * to share the same UMEM area, which is used as the buffer pool memory.
+ */
+#ifndef MAX_BURST_RX
+#define MAX_BURST_RX 64
+#endif
+
+#ifndef MAX_BURST_TX
+#define MAX_BURST_TX 64
+#endif
+
+struct burst_rx {
+ u64 addr[MAX_BURST_RX];
+ u32 len[MAX_BURST_RX];
+};
+
+struct burst_tx {
+ u64 addr[MAX_BURST_TX];
+ u32 len[MAX_BURST_TX];
+ u32 n_pkts;
+};
+
+struct port_params {
+ struct xsk_socket_config xsk_cfg;
+ struct bpool *bp;
+ const char *iface;
+ u32 iface_queue;
+};
+
+struct port {
+ struct port_params params;
+
+ struct bcache *bc;
+
+ struct xsk_ring_cons rxq;
+ struct xsk_ring_prod txq;
+ struct xsk_ring_prod umem_fq;
+ struct xsk_ring_cons umem_cq;
+ struct xsk_socket *xsk;
+ int umem_fq_initialized;
+
+ u64 n_pkts_rx;
+ u64 n_pkts_tx;
+};
+
+static void
+port_free(struct port *p)
+{
+ if (!p)
+ return;
+
+ /* To keep this example simple, the code to free the buffers from the
+ * socket's receive and transmit queues, as well as from the UMEM fill
+ * and completion queues, is not included.
+ */
+
+ if (p->xsk)
+ xsk_socket__delete(p->xsk);
+
+ bcache_free(p->bc);
+
+ free(p);
+}
+
+static struct port *
+port_init(struct port_params *params)
+{
+ struct port *p;
+ u32 umem_fq_size, pos = 0;
+ int status, i;
+
+ /* Memory allocation and initialization. */
+ p = calloc(sizeof(struct port), 1);
+ if (!p)
+ return NULL;
+
+ memcpy(&p->params, params, sizeof(p->params));
+ umem_fq_size = params->bp->umem_cfg.fill_size;
+
+ /* bcache. */
+ p->bc = bcache_init(params->bp);
+ if (!p->bc ||
+ (bcache_slab_size(p->bc) < umem_fq_size) ||
+ (bcache_cons_check(p->bc, umem_fq_size) < umem_fq_size)) {
+ port_free(p);
+ return NULL;
+ }
+
+ /* xsk socket. */
+ status = xsk_socket__create_shared(&p->xsk,
+ params->iface,
+ params->iface_queue,
+ params->bp->umem,
+ &p->rxq,
+ &p->txq,
+ &p->umem_fq,
+ &p->umem_cq,
+ ¶ms->xsk_cfg);
+ if (status) {
+ port_free(p);
+ return NULL;
+ }
+
+ /* umem fq. */
+ xsk_ring_prod__reserve(&p->umem_fq, umem_fq_size, &pos);
+
+ for (i = 0; i < umem_fq_size; i++)
+ *xsk_ring_prod__fill_addr(&p->umem_fq, pos + i) =
+ bcache_cons(p->bc);
+
+ xsk_ring_prod__submit(&p->umem_fq, umem_fq_size);
+ p->umem_fq_initialized = 1;
+
+ return p;
+}
+
+static inline u32
+port_rx_burst(struct port *p, struct burst_rx *b)
+{
+ u32 n_pkts, pos, i;
+
+ /* Free buffers for FQ replenish. */
+ n_pkts = ARRAY_SIZE(b->addr);
+
+ n_pkts = bcache_cons_check(p->bc, n_pkts);
+ if (!n_pkts)
+ return 0;
+
+ /* RXQ. */
+ n_pkts = xsk_ring_cons__peek(&p->rxq, n_pkts, &pos);
+ if (!n_pkts) {
+ if (xsk_ring_prod__needs_wakeup(&p->umem_fq)) {
+ struct pollfd pollfd = {
+ .fd = xsk_socket__fd(p->xsk),
+ .events = POLLIN,
+ };
+
+ poll(&pollfd, 1, 0);
+ }
+ return 0;
+ }
+
+ for (i = 0; i < n_pkts; i++) {
+ b->addr[i] = xsk_ring_cons__rx_desc(&p->rxq, pos + i)->addr;
+ b->len[i] = xsk_ring_cons__rx_desc(&p->rxq, pos + i)->len;
+ }
+
+ xsk_ring_cons__release(&p->rxq, n_pkts);
+ p->n_pkts_rx += n_pkts;
+
+ /* UMEM FQ. */
+ for ( ; ; ) {
+ int status;
+
+ status = xsk_ring_prod__reserve(&p->umem_fq, n_pkts, &pos);
+ if (status == n_pkts)
+ break;
+
+ if (xsk_ring_prod__needs_wakeup(&p->umem_fq)) {
+ struct pollfd pollfd = {
+ .fd = xsk_socket__fd(p->xsk),
+ .events = POLLIN,
+ };
+
+ poll(&pollfd, 1, 0);
+ }
+ }
+
+ for (i = 0; i < n_pkts; i++)
+ *xsk_ring_prod__fill_addr(&p->umem_fq, pos + i) =
+ bcache_cons(p->bc);
+
+ xsk_ring_prod__submit(&p->umem_fq, n_pkts);
+
+ return n_pkts;
+}
+
+static inline void
+port_tx_burst(struct port *p, struct burst_tx *b)
+{
+ u32 n_pkts, pos, i;
+ int status;
+
+ /* UMEM CQ. */
+ n_pkts = p->params.bp->umem_cfg.comp_size;
+
+ n_pkts = xsk_ring_cons__peek(&p->umem_cq, n_pkts, &pos);
+
+ for (i = 0; i < n_pkts; i++) {
+ u64 addr = *xsk_ring_cons__comp_addr(&p->umem_cq, pos + i);
+
+ bcache_prod(p->bc, addr);
+ }
+
+ xsk_ring_cons__release(&p->umem_cq, n_pkts);
+
+ /* TXQ. */
+ n_pkts = b->n_pkts;
+
+ for ( ; ; ) {
+ status = xsk_ring_prod__reserve(&p->txq, n_pkts, &pos);
+ if (status == n_pkts)
+ break;
+
+ if (xsk_ring_prod__needs_wakeup(&p->txq))
+ sendto(xsk_socket__fd(p->xsk), NULL, 0, MSG_DONTWAIT,
+ NULL, 0);
+ }
+
+ for (i = 0; i < n_pkts; i++) {
+ xsk_ring_prod__tx_desc(&p->txq, pos + i)->addr = b->addr[i];
+ xsk_ring_prod__tx_desc(&p->txq, pos + i)->len = b->len[i];
+ }
+
+ xsk_ring_prod__submit(&p->txq, n_pkts);
+ if (xsk_ring_prod__needs_wakeup(&p->txq))
+ sendto(xsk_socket__fd(p->xsk), NULL, 0, MSG_DONTWAIT, NULL, 0);
+ p->n_pkts_tx += n_pkts;
+}
+
+/*
+ * Thread
+ *
+ * Packet forwarding threads.
+ */
+#ifndef MAX_PORTS_PER_THREAD
+#define MAX_PORTS_PER_THREAD 16
+#endif
+
+struct thread_data {
+ struct port *ports_rx[MAX_PORTS_PER_THREAD];
+ struct port *ports_tx[MAX_PORTS_PER_THREAD];
+ u32 n_ports_rx;
+ struct burst_rx burst_rx;
+ struct burst_tx burst_tx[MAX_PORTS_PER_THREAD];
+ u32 cpu_core_id;
+ int quit;
+};
+
+static void swap_mac_addresses(void *data)
+{
+ struct ether_header *eth = (struct ether_header *)data;
+ struct ether_addr *src_addr = (struct ether_addr *)ð->ether_shost;
+ struct ether_addr *dst_addr = (struct ether_addr *)ð->ether_dhost;
+ struct ether_addr tmp;
+
+ tmp = *src_addr;
+ *src_addr = *dst_addr;
+ *dst_addr = tmp;
+}
+
+static void *
+thread_func(void *arg)
+{
+ struct thread_data *t = arg;
+ cpu_set_t cpu_cores;
+ u32 i;
+
+ CPU_ZERO(&cpu_cores);
+ CPU_SET(t->cpu_core_id, &cpu_cores);
+ pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &cpu_cores);
+
+ for (i = 0; !t->quit; i = (i + 1) & (t->n_ports_rx - 1)) {
+ struct port *port_rx = t->ports_rx[i];
+ struct port *port_tx = t->ports_tx[i];
+ struct burst_rx *brx = &t->burst_rx;
+ struct burst_tx *btx = &t->burst_tx[i];
+ u32 n_pkts, j;
+
+ /* RX. */
+ n_pkts = port_rx_burst(port_rx, brx);
+ if (!n_pkts)
+ continue;
+
+ /* Process & TX. */
+ for (j = 0; j < n_pkts; j++) {
+ u64 addr = xsk_umem__add_offset_to_addr(brx->addr[j]);
+ u8 *pkt = xsk_umem__get_data(port_rx->params.bp->addr,
+ addr);
+
+ swap_mac_addresses(pkt);
+
+ btx->addr[btx->n_pkts] = brx->addr[j];
+ btx->len[btx->n_pkts] = brx->len[j];
+ btx->n_pkts++;
+
+ if (btx->n_pkts == MAX_BURST_TX) {
+ port_tx_burst(port_tx, btx);
+ btx->n_pkts = 0;
+ }
+ }
+ }
+
+ return NULL;
+}
+
+/*
+ * Process
+ */
+static const struct bpool_params bpool_params_default = {
+ .n_buffers = 64 * 1024,
+ .buffer_size = XSK_UMEM__DEFAULT_FRAME_SIZE,
+ .mmap_flags = 0,
+
+ .n_users_max = 16,
+ .n_buffers_per_slab = XSK_RING_PROD__DEFAULT_NUM_DESCS * 2,
+};
+
+static const struct xsk_umem_config umem_cfg_default = {
+ .fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS * 2,
+ .comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS,
+ .frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE,
+ .frame_headroom = XSK_UMEM__DEFAULT_FRAME_HEADROOM,
+ .flags = 0,
+};
+
+static const struct port_params port_params_default = {
+ .xsk_cfg = {
+ .rx_size = XSK_RING_CONS__DEFAULT_NUM_DESCS,
+ .tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
+ .libbpf_flags = 0,
+ .xdp_flags = XDP_FLAGS_DRV_MODE,
+ .bind_flags = XDP_USE_NEED_WAKEUP | XDP_ZEROCOPY,
+ },
+
+ .bp = NULL,
+ .iface = NULL,
+ .iface_queue = 0,
+};
+
+#ifndef MAX_PORTS
+#define MAX_PORTS 64
+#endif
+
+#ifndef MAX_THREADS
+#define MAX_THREADS 64
+#endif
+
+static struct bpool_params bpool_params;
+static struct xsk_umem_config umem_cfg;
+static struct bpool *bp;
+
+static struct port_params port_params[MAX_PORTS];
+static struct port *ports[MAX_PORTS];
+static u64 n_pkts_rx[MAX_PORTS];
+static u64 n_pkts_tx[MAX_PORTS];
+static int n_ports;
+
+static pthread_t threads[MAX_THREADS];
+static struct thread_data thread_data[MAX_THREADS];
+static int n_threads;
+
+static void
+print_usage(char *prog_name)
+{
+ const char *usage =
+ "Usage:\n"
+ "\t%s [ -b SIZE ] -c CORE -i INTERFACE [ -q QUEUE ]\n"
+ "\n"
+ "-c CORE CPU core to run a packet forwarding thread\n"
+ " on. May be invoked multiple times.\n"
+ "\n"
+ "-b SIZE Number of buffers in the buffer pool shared\n"
+ " by all the forwarding threads. Default: %u.\n"
+ "\n"
+ "-i INTERFACE Network interface. Each (INTERFACE, QUEUE)\n"
+ " pair specifies one forwarding port. May be\n"
+ " invoked multiple times.\n"
+ "\n"
+ "-q QUEUE Network interface queue for RX and TX. Each\n"
+ " (INTERFACE, QUEUE) pair specified one\n"
+ " forwarding port. Default: %u. May be invoked\n"
+ " multiple times.\n"
+ "\n";
+ printf(usage,
+ prog_name,
+ bpool_params_default.n_buffers,
+ port_params_default.iface_queue);
+}
+
+static int
+parse_args(int argc, char **argv)
+{
+ struct option lgopts[] = {
+ { NULL, 0, 0, 0 }
+ };
+ int opt, option_index;
+
+ /* Parse the input arguments. */
+ for ( ; ;) {
+ opt = getopt_long(argc, argv, "c:i:q:", lgopts, &option_index);
+ if (opt == EOF)
+ break;
+
+ switch (opt) {
+ case 'b':
+ bpool_params.n_buffers = atoi(optarg);
+ break;
+
+ case 'c':
+ if (n_threads == MAX_THREADS) {
+ printf("Max number of threads (%d) reached.\n",
+ MAX_THREADS);
+ return -1;
+ }
+
+ thread_data[n_threads].cpu_core_id = atoi(optarg);
+ n_threads++;
+ break;
+
+ case 'i':
+ if (n_ports == MAX_PORTS) {
+ printf("Max number of ports (%d) reached.\n",
+ MAX_PORTS);
+ return -1;
+ }
+
+ port_params[n_ports].iface = optarg;
+ port_params[n_ports].iface_queue = 0;
+ n_ports++;
+ break;
+
+ case 'q':
+ if (n_ports == 0) {
+ printf("No port specified for queue.\n");
+ return -1;
+ }
+ port_params[n_ports - 1].iface_queue = atoi(optarg);
+ break;
+
+ default:
+ printf("Illegal argument.\n");
+ return -1;
+ }
+ }
+
+ optind = 1; /* reset getopt lib */
+
+ /* Check the input arguments. */
+ if (!n_ports) {
+ printf("No ports specified.\n");
+ return -1;
+ }
+
+ if (!n_threads) {
+ printf("No threads specified.\n");
+ return -1;
+ }
+
+ if (n_ports % n_threads) {
+ printf("Ports cannot be evenly distributed to threads.\n");
+ return -1;
+ }
+
+ return 0;
+}
+
+static void
+print_port(u32 port_id)
+{
+ struct port *port = ports[port_id];
+
+ printf("Port %u: interface = %s, queue = %u\n",
+ port_id, port->params.iface, port->params.iface_queue);
+}
+
+static void
+print_thread(u32 thread_id)
+{
+ struct thread_data *t = &thread_data[thread_id];
+ u32 i;
+
+ printf("Thread %u (CPU core %u): ",
+ thread_id, t->cpu_core_id);
+
+ for (i = 0; i < t->n_ports_rx; i++) {
+ struct port *port_rx = t->ports_rx[i];
+ struct port *port_tx = t->ports_tx[i];
+
+ printf("(%s, %u) -> (%s, %u), ",
+ port_rx->params.iface,
+ port_rx->params.iface_queue,
+ port_tx->params.iface,
+ port_tx->params.iface_queue);
+ }
+
+ printf("\n");
+}
+
+static void
+print_port_stats_separator(void)
+{
+ printf("+-%4s-+-%12s-+-%13s-+-%12s-+-%13s-+\n",
+ "----",
+ "------------",
+ "-------------",
+ "------------",
+ "-------------");
+}
+
+static void
+print_port_stats_header(void)
+{
+ print_port_stats_separator();
+ printf("| %4s | %12s | %13s | %12s | %13s |\n",
+ "Port",
+ "RX packets",
+ "RX rate (pps)",
+ "TX packets",
+ "TX_rate (pps)");
+ print_port_stats_separator();
+}
+
+static void
+print_port_stats_trailer(void)
+{
+ print_port_stats_separator();
+ printf("\n");
+}
+
+static void
+print_port_stats(int port_id, u64 ns_diff)
+{
+ struct port *p = ports[port_id];
+ double rx_pps, tx_pps;
+
+ rx_pps = (p->n_pkts_rx - n_pkts_rx[port_id]) * 1000000000. / ns_diff;
+ tx_pps = (p->n_pkts_tx - n_pkts_tx[port_id]) * 1000000000. / ns_diff;
+
+ printf("| %4d | %12llu | %13.0f | %12llu | %13.0f |\n",
+ port_id,
+ p->n_pkts_rx,
+ rx_pps,
+ p->n_pkts_tx,
+ tx_pps);
+
+ n_pkts_rx[port_id] = p->n_pkts_rx;
+ n_pkts_tx[port_id] = p->n_pkts_tx;
+}
+
+static void
+print_port_stats_all(u64 ns_diff)
+{
+ int i;
+
+ print_port_stats_header();
+ for (i = 0; i < n_ports; i++)
+ print_port_stats(i, ns_diff);
+ print_port_stats_trailer();
+}
+
+static int quit;
+
+static void
+signal_handler(int sig)
+{
+ quit = 1;
+}
+
+static void remove_xdp_program(void)
+{
+ int i;
+
+ for (i = 0 ; i < n_ports; i++)
+ bpf_set_link_xdp_fd(if_nametoindex(port_params[i].iface), -1,
+ port_params[i].xsk_cfg.xdp_flags);
+}
+
+int main(int argc, char **argv)
+{
+ struct timespec time;
+ u64 ns0;
+ int i;
+
+ /* Parse args. */
+ memcpy(&bpool_params, &bpool_params_default,
+ sizeof(struct bpool_params));
+ memcpy(&umem_cfg, &umem_cfg_default,
+ sizeof(struct xsk_umem_config));
+ for (i = 0; i < MAX_PORTS; i++)
+ memcpy(&port_params[i], &port_params_default,
+ sizeof(struct port_params));
+
+ if (parse_args(argc, argv)) {
+ print_usage(argv[0]);
+ return -1;
+ }
+
+ /* Buffer pool initialization. */
+ bp = bpool_init(&bpool_params, &umem_cfg);
+ if (!bp) {
+ printf("Buffer pool initialization failed.\n");
+ return -1;
+ }
+ printf("Buffer pool created successfully.\n");
+
+ /* Ports initialization. */
+ for (i = 0; i < MAX_PORTS; i++)
+ port_params[i].bp = bp;
+
+ for (i = 0; i < n_ports; i++) {
+ ports[i] = port_init(&port_params[i]);
+ if (!ports[i]) {
+ printf("Port %d initialization failed.\n", i);
+ return -1;
+ }
+ print_port(i);
+ }
+ printf("All ports created successfully.\n");
+
+ /* Threads. */
+ for (i = 0; i < n_threads; i++) {
+ struct thread_data *t = &thread_data[i];
+ u32 n_ports_per_thread = n_ports / n_threads, j;
+
+ for (j = 0; j < n_ports_per_thread; j++) {
+ t->ports_rx[j] = ports[i * n_ports_per_thread + j];
+ t->ports_tx[j] = ports[i * n_ports_per_thread +
+ (j + 1) % n_ports_per_thread];
+ }
+
+ t->n_ports_rx = n_ports_per_thread;
+
+ print_thread(i);
+ }
+
+ for (i = 0; i < n_threads; i++) {
+ int status;
+
+ status = pthread_create(&threads[i],
+ NULL,
+ thread_func,
+ &thread_data[i]);
+ if (status) {
+ printf("Thread %d creation failed.\n", i);
+ return -1;
+ }
+ }
+ printf("All threads created successfully.\n");
+
+ /* Print statistics. */
+ signal(SIGINT, signal_handler);
+ signal(SIGTERM, signal_handler);
+ signal(SIGABRT, signal_handler);
+
+ clock_gettime(CLOCK_MONOTONIC, &time);
+ ns0 = time.tv_sec * 1000000000UL + time.tv_nsec;
+ for ( ; !quit; ) {
+ u64 ns1, ns_diff;
+
+ sleep(1);
+ clock_gettime(CLOCK_MONOTONIC, &time);
+ ns1 = time.tv_sec * 1000000000UL + time.tv_nsec;
+ ns_diff = ns1 - ns0;
+ ns0 = ns1;
+
+ print_port_stats_all(ns_diff);
+ }
+
+ /* Threads completion. */
+ printf("Quit.\n");
+ for (i = 0; i < n_threads; i++)
+ thread_data[i].quit = 1;
+
+ for (i = 0; i < n_threads; i++)
+ pthread_join(threads[i], NULL);
+
+ for (i = 0; i < n_ports; i++)
+ port_free(ports[i]);
+
+ bpool_free(bp);
+
+ remove_xdp_program();
+
+ return 0;
+}
diff --git a/scripts/bpf_helpers_doc.py b/scripts/bpf_helpers_doc.py
index 5bfa448..0838817 100755
--- a/scripts/bpf_helpers_doc.py
+++ b/scripts/bpf_helpers_doc.py
@@ -432,6 +432,7 @@
'struct __sk_buff',
'struct sk_msg_md',
'struct xdp_md',
+ 'struct path',
]
known_types = {
'...',
@@ -472,6 +473,7 @@
'struct tcp_request_sock',
'struct udp6_sock',
'struct task_struct',
+ 'struct path',
}
mapped_types = {
'u8': '__u8',
diff --git a/security/bpf/hooks.c b/security/bpf/hooks.c
index 32d32d4..788667d 100644
--- a/security/bpf/hooks.c
+++ b/security/bpf/hooks.c
@@ -11,6 +11,7 @@ static struct security_hook_list bpf_lsm_hooks[] __lsm_ro_after_init = {
LSM_HOOK_INIT(NAME, bpf_lsm_##NAME),
#include <linux/lsm_hook_defs.h>
#undef LSM_HOOK
+ LSM_HOOK_INIT(inode_free_security, bpf_inode_storage_free),
};
static int __init bpf_lsm_init(void)
@@ -20,7 +21,12 @@ static int __init bpf_lsm_init(void)
return 0;
}
+struct lsm_blob_sizes bpf_lsm_blob_sizes __lsm_ro_after_init = {
+ .lbs_inode = sizeof(struct bpf_storage_blob),
+};
+
DEFINE_LSM(bpf) = {
.name = "bpf",
.init = bpf_lsm_init,
+ .blobs = &bpf_lsm_blob_sizes
};
diff --git a/tools/bpf/bpftool/Documentation/bpftool-map.rst b/tools/bpf/bpftool/Documentation/bpftool-map.rst
index 41e2a74..083db6c 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-map.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-map.rst
@@ -49,7 +49,7 @@
| | **lru_percpu_hash** | **lpm_trie** | **array_of_maps** | **hash_of_maps**
| | **devmap** | **devmap_hash** | **sockmap** | **cpumap** | **xskmap** | **sockhash**
| | **cgroup_storage** | **reuseport_sockarray** | **percpu_cgroup_storage**
-| | **queue** | **stack** | **sk_storage** | **struct_ops** | **ringbuf** }
+| | **queue** | **stack** | **sk_storage** | **struct_ops** | **ringbuf** | **inode_storage** }
DESCRIPTION
===========
diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile
index 8462690..02c99bc 100644
--- a/tools/bpf/bpftool/Makefile
+++ b/tools/bpf/bpftool/Makefile
@@ -176,7 +176,11 @@
$(OUTPUT)%.o: %.c
$(QUIET_CC)$(CC) $(CFLAGS) -c -MMD -o $@ $<
-clean: $(LIBBPF)-clean
+feature-detect-clean:
+ $(call QUIET_CLEAN, feature-detect)
+ $(Q)$(MAKE) -C $(srctree)/tools/build/feature/ clean >/dev/null
+
+clean: $(LIBBPF)-clean feature-detect-clean
$(call QUIET_CLEAN, bpftool)
$(Q)$(RM) -- $(OUTPUT)bpftool $(OUTPUT)*.o $(OUTPUT)*.d
$(Q)$(RM) -- $(BPFTOOL_BOOTSTRAP) $(OUTPUT)*.skel.h $(OUTPUT)vmlinux.h
diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool
index f53ed2f..7b68e3c 100644
--- a/tools/bpf/bpftool/bash-completion/bpftool
+++ b/tools/bpf/bpftool/bash-completion/bpftool
@@ -704,7 +704,8 @@
lru_percpu_hash lpm_trie array_of_maps \
hash_of_maps devmap devmap_hash sockmap cpumap \
xskmap sockhash cgroup_storage reuseport_sockarray \
- percpu_cgroup_storage queue stack' -- \
+ percpu_cgroup_storage queue stack sk_storage \
+ struct_ops inode_storage' -- \
"$cur" ) )
return 0
;;
diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c
index f611846..4033c46d 100644
--- a/tools/bpf/bpftool/gen.c
+++ b/tools/bpf/bpftool/gen.c
@@ -19,11 +19,9 @@
#include <sys/mman.h>
#include <bpf/btf.h>
-#include "bpf/libbpf_internal.h"
#include "json_writer.h"
#include "main.h"
-
#define MAX_OBJ_NAME_LEN 64
static void sanitize_identifier(char *name)
diff --git a/tools/bpf/bpftool/link.c b/tools/bpf/bpftool/link.c
index a89f09e..e77e152 100644
--- a/tools/bpf/bpftool/link.c
+++ b/tools/bpf/bpftool/link.c
@@ -77,6 +77,22 @@ static void show_link_attach_type_json(__u32 attach_type, json_writer_t *wtr)
jsonw_uint_field(wtr, "attach_type", attach_type);
}
+static bool is_iter_map_target(const char *target_name)
+{
+ return strcmp(target_name, "bpf_map_elem") == 0 ||
+ strcmp(target_name, "bpf_sk_storage_map") == 0;
+}
+
+static void show_iter_json(struct bpf_link_info *info, json_writer_t *wtr)
+{
+ const char *target_name = u64_to_ptr(info->iter.target_name);
+
+ jsonw_string_field(wtr, "target_name", target_name);
+
+ if (is_iter_map_target(target_name))
+ jsonw_uint_field(wtr, "map_id", info->iter.map.map_id);
+}
+
static int get_prog_info(int prog_id, struct bpf_prog_info *info)
{
__u32 len = sizeof(*info);
@@ -128,6 +144,9 @@ static int show_link_close_json(int fd, struct bpf_link_info *info)
info->cgroup.cgroup_id);
show_link_attach_type_json(info->cgroup.attach_type, json_wtr);
break;
+ case BPF_LINK_TYPE_ITER:
+ show_iter_json(info, json_wtr);
+ break;
case BPF_LINK_TYPE_NETNS:
jsonw_uint_field(json_wtr, "netns_ino",
info->netns.netns_ino);
@@ -175,6 +194,16 @@ static void show_link_attach_type_plain(__u32 attach_type)
printf("attach_type %u ", attach_type);
}
+static void show_iter_plain(struct bpf_link_info *info)
+{
+ const char *target_name = u64_to_ptr(info->iter.target_name);
+
+ printf("target_name %s ", target_name);
+
+ if (is_iter_map_target(target_name))
+ printf("map_id %u ", info->iter.map.map_id);
+}
+
static int show_link_close_plain(int fd, struct bpf_link_info *info)
{
struct bpf_prog_info prog_info;
@@ -204,6 +233,9 @@ static int show_link_close_plain(int fd, struct bpf_link_info *info)
printf("\n\tcgroup_id %zu ", (size_t)info->cgroup.cgroup_id);
show_link_attach_type_plain(info->cgroup.attach_type);
break;
+ case BPF_LINK_TYPE_ITER:
+ show_iter_plain(info);
+ break;
case BPF_LINK_TYPE_NETNS:
printf("\n\tnetns_ino %u ", info->netns.netns_ino);
show_link_attach_type_plain(info->netns.attach_type);
@@ -231,7 +263,7 @@ static int do_show_link(int fd)
{
struct bpf_link_info info;
__u32 len = sizeof(info);
- char raw_tp_name[256];
+ char buf[256];
int err;
memset(&info, 0, sizeof(info));
@@ -245,8 +277,14 @@ static int do_show_link(int fd)
}
if (info.type == BPF_LINK_TYPE_RAW_TRACEPOINT &&
!info.raw_tracepoint.tp_name) {
- info.raw_tracepoint.tp_name = (unsigned long)&raw_tp_name;
- info.raw_tracepoint.tp_name_len = sizeof(raw_tp_name);
+ info.raw_tracepoint.tp_name = (unsigned long)&buf;
+ info.raw_tracepoint.tp_name_len = sizeof(buf);
+ goto again;
+ }
+ if (info.type == BPF_LINK_TYPE_ITER &&
+ !info.iter.target_name) {
+ info.iter.target_name = (unsigned long)&buf;
+ info.iter.target_name_len = sizeof(buf);
goto again;
}
diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c
index 3a27d31a..bc00712 100644
--- a/tools/bpf/bpftool/map.c
+++ b/tools/bpf/bpftool/map.c
@@ -50,6 +50,7 @@ const char * const map_type_name[] = {
[BPF_MAP_TYPE_SK_STORAGE] = "sk_storage",
[BPF_MAP_TYPE_STRUCT_OPS] = "struct_ops",
[BPF_MAP_TYPE_RINGBUF] = "ringbuf",
+ [BPF_MAP_TYPE_INODE_STORAGE] = "inode_storage",
};
const size_t map_type_name_size = ARRAY_SIZE(map_type_name);
@@ -1442,7 +1443,7 @@ static int do_help(int argc, char **argv)
" lru_percpu_hash | lpm_trie | array_of_maps | hash_of_maps |\n"
" devmap | devmap_hash | sockmap | cpumap | xskmap | sockhash |\n"
" cgroup_storage | reuseport_sockarray | percpu_cgroup_storage |\n"
- " queue | stack | sk_storage | struct_ops | ringbuf }\n"
+ " queue | stack | sk_storage | struct_ops | ringbuf | inode_storage }\n"
" " HELP_SPEC_OPTIONS "\n"
"",
bin_name, argv[-2]);
diff --git a/tools/bpf/bpftool/net.c b/tools/bpf/bpftool/net.c
index 56c3a2b..910e7ba 100644
--- a/tools/bpf/bpftool/net.c
+++ b/tools/bpf/bpftool/net.c
@@ -6,22 +6,27 @@
#include <fcntl.h>
#include <stdlib.h>
#include <string.h>
+#include <time.h>
#include <unistd.h>
#include <bpf/bpf.h>
#include <bpf/libbpf.h>
#include <net/if.h>
#include <linux/if.h>
#include <linux/rtnetlink.h>
+#include <linux/socket.h>
#include <linux/tc_act/tc_bpf.h>
#include <sys/socket.h>
#include <sys/stat.h>
#include <sys/types.h>
#include "bpf/nlattr.h"
-#include "bpf/libbpf_internal.h"
#include "main.h"
#include "netlink_dumper.h"
+#ifndef SOL_NETLINK
+#define SOL_NETLINK 270
+#endif
+
struct ip_devname_ifindex {
char devname[64];
int ifindex;
@@ -85,6 +90,266 @@ static enum net_attach_type parse_attach_type(const char *str)
return net_attach_type_size;
}
+typedef int (*dump_nlmsg_t)(void *cookie, void *msg, struct nlattr **tb);
+
+typedef int (*__dump_nlmsg_t)(struct nlmsghdr *nlmsg, dump_nlmsg_t, void *cookie);
+
+static int netlink_open(__u32 *nl_pid)
+{
+ struct sockaddr_nl sa;
+ socklen_t addrlen;
+ int one = 1, ret;
+ int sock;
+
+ memset(&sa, 0, sizeof(sa));
+ sa.nl_family = AF_NETLINK;
+
+ sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
+ if (sock < 0)
+ return -errno;
+
+ if (setsockopt(sock, SOL_NETLINK, NETLINK_EXT_ACK,
+ &one, sizeof(one)) < 0) {
+ p_err("Netlink error reporting not supported");
+ }
+
+ if (bind(sock, (struct sockaddr *)&sa, sizeof(sa)) < 0) {
+ ret = -errno;
+ goto cleanup;
+ }
+
+ addrlen = sizeof(sa);
+ if (getsockname(sock, (struct sockaddr *)&sa, &addrlen) < 0) {
+ ret = -errno;
+ goto cleanup;
+ }
+
+ if (addrlen != sizeof(sa)) {
+ ret = -LIBBPF_ERRNO__INTERNAL;
+ goto cleanup;
+ }
+
+ *nl_pid = sa.nl_pid;
+ return sock;
+
+cleanup:
+ close(sock);
+ return ret;
+}
+
+static int netlink_recv(int sock, __u32 nl_pid, __u32 seq,
+ __dump_nlmsg_t _fn, dump_nlmsg_t fn,
+ void *cookie)
+{
+ bool multipart = true;
+ struct nlmsgerr *err;
+ struct nlmsghdr *nh;
+ char buf[4096];
+ int len, ret;
+
+ while (multipart) {
+ multipart = false;
+ len = recv(sock, buf, sizeof(buf), 0);
+ if (len < 0) {
+ ret = -errno;
+ goto done;
+ }
+
+ if (len == 0)
+ break;
+
+ for (nh = (struct nlmsghdr *)buf; NLMSG_OK(nh, len);
+ nh = NLMSG_NEXT(nh, len)) {
+ if (nh->nlmsg_pid != nl_pid) {
+ ret = -LIBBPF_ERRNO__WRNGPID;
+ goto done;
+ }
+ if (nh->nlmsg_seq != seq) {
+ ret = -LIBBPF_ERRNO__INVSEQ;
+ goto done;
+ }
+ if (nh->nlmsg_flags & NLM_F_MULTI)
+ multipart = true;
+ switch (nh->nlmsg_type) {
+ case NLMSG_ERROR:
+ err = (struct nlmsgerr *)NLMSG_DATA(nh);
+ if (!err->error)
+ continue;
+ ret = err->error;
+ libbpf_nla_dump_errormsg(nh);
+ goto done;
+ case NLMSG_DONE:
+ return 0;
+ default:
+ break;
+ }
+ if (_fn) {
+ ret = _fn(nh, fn, cookie);
+ if (ret)
+ return ret;
+ }
+ }
+ }
+ ret = 0;
+done:
+ return ret;
+}
+
+static int __dump_class_nlmsg(struct nlmsghdr *nlh,
+ dump_nlmsg_t dump_class_nlmsg,
+ void *cookie)
+{
+ struct nlattr *tb[TCA_MAX + 1], *attr;
+ struct tcmsg *t = NLMSG_DATA(nlh);
+ int len;
+
+ len = nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*t));
+ attr = (struct nlattr *) ((void *) t + NLMSG_ALIGN(sizeof(*t)));
+ if (libbpf_nla_parse(tb, TCA_MAX, attr, len, NULL) != 0)
+ return -LIBBPF_ERRNO__NLPARSE;
+
+ return dump_class_nlmsg(cookie, t, tb);
+}
+
+static int netlink_get_class(int sock, unsigned int nl_pid, int ifindex,
+ dump_nlmsg_t dump_class_nlmsg, void *cookie)
+{
+ struct {
+ struct nlmsghdr nlh;
+ struct tcmsg t;
+ } req = {
+ .nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)),
+ .nlh.nlmsg_type = RTM_GETTCLASS,
+ .nlh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST,
+ .t.tcm_family = AF_UNSPEC,
+ .t.tcm_ifindex = ifindex,
+ };
+ int seq = time(NULL);
+
+ req.nlh.nlmsg_seq = seq;
+ if (send(sock, &req, req.nlh.nlmsg_len, 0) < 0)
+ return -errno;
+
+ return netlink_recv(sock, nl_pid, seq, __dump_class_nlmsg,
+ dump_class_nlmsg, cookie);
+}
+
+static int __dump_qdisc_nlmsg(struct nlmsghdr *nlh,
+ dump_nlmsg_t dump_qdisc_nlmsg,
+ void *cookie)
+{
+ struct nlattr *tb[TCA_MAX + 1], *attr;
+ struct tcmsg *t = NLMSG_DATA(nlh);
+ int len;
+
+ len = nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*t));
+ attr = (struct nlattr *) ((void *) t + NLMSG_ALIGN(sizeof(*t)));
+ if (libbpf_nla_parse(tb, TCA_MAX, attr, len, NULL) != 0)
+ return -LIBBPF_ERRNO__NLPARSE;
+
+ return dump_qdisc_nlmsg(cookie, t, tb);
+}
+
+static int netlink_get_qdisc(int sock, unsigned int nl_pid, int ifindex,
+ dump_nlmsg_t dump_qdisc_nlmsg, void *cookie)
+{
+ struct {
+ struct nlmsghdr nlh;
+ struct tcmsg t;
+ } req = {
+ .nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)),
+ .nlh.nlmsg_type = RTM_GETQDISC,
+ .nlh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST,
+ .t.tcm_family = AF_UNSPEC,
+ .t.tcm_ifindex = ifindex,
+ };
+ int seq = time(NULL);
+
+ req.nlh.nlmsg_seq = seq;
+ if (send(sock, &req, req.nlh.nlmsg_len, 0) < 0)
+ return -errno;
+
+ return netlink_recv(sock, nl_pid, seq, __dump_qdisc_nlmsg,
+ dump_qdisc_nlmsg, cookie);
+}
+
+static int __dump_filter_nlmsg(struct nlmsghdr *nlh,
+ dump_nlmsg_t dump_filter_nlmsg,
+ void *cookie)
+{
+ struct nlattr *tb[TCA_MAX + 1], *attr;
+ struct tcmsg *t = NLMSG_DATA(nlh);
+ int len;
+
+ len = nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*t));
+ attr = (struct nlattr *) ((void *) t + NLMSG_ALIGN(sizeof(*t)));
+ if (libbpf_nla_parse(tb, TCA_MAX, attr, len, NULL) != 0)
+ return -LIBBPF_ERRNO__NLPARSE;
+
+ return dump_filter_nlmsg(cookie, t, tb);
+}
+
+static int netlink_get_filter(int sock, unsigned int nl_pid, int ifindex, int handle,
+ dump_nlmsg_t dump_filter_nlmsg, void *cookie)
+{
+ struct {
+ struct nlmsghdr nlh;
+ struct tcmsg t;
+ } req = {
+ .nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)),
+ .nlh.nlmsg_type = RTM_GETTFILTER,
+ .nlh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST,
+ .t.tcm_family = AF_UNSPEC,
+ .t.tcm_ifindex = ifindex,
+ .t.tcm_parent = handle,
+ };
+ int seq = time(NULL);
+
+ req.nlh.nlmsg_seq = seq;
+ if (send(sock, &req, req.nlh.nlmsg_len, 0) < 0)
+ return -errno;
+
+ return netlink_recv(sock, nl_pid, seq, __dump_filter_nlmsg,
+ dump_filter_nlmsg, cookie);
+}
+
+static int __dump_link_nlmsg(struct nlmsghdr *nlh,
+ dump_nlmsg_t dump_link_nlmsg, void *cookie)
+{
+ struct nlattr *tb[IFLA_MAX + 1], *attr;
+ struct ifinfomsg *ifi = NLMSG_DATA(nlh);
+ int len;
+
+ len = nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*ifi));
+ attr = (struct nlattr *) ((void *) ifi + NLMSG_ALIGN(sizeof(*ifi)));
+ if (libbpf_nla_parse(tb, IFLA_MAX, attr, len, NULL) != 0)
+ return -LIBBPF_ERRNO__NLPARSE;
+
+ return dump_link_nlmsg(cookie, ifi, tb);
+}
+
+static int netlink_get_link(int sock, unsigned int nl_pid,
+ dump_nlmsg_t dump_link_nlmsg, void *cookie)
+{
+ struct {
+ struct nlmsghdr nlh;
+ struct ifinfomsg ifm;
+ } req = {
+ .nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)),
+ .nlh.nlmsg_type = RTM_GETLINK,
+ .nlh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST,
+ .ifm.ifi_family = AF_PACKET,
+ };
+ int seq = time(NULL);
+
+ req.nlh.nlmsg_seq = seq;
+ if (send(sock, &req, req.nlh.nlmsg_len, 0) < 0)
+ return -errno;
+
+ return netlink_recv(sock, nl_pid, seq, __dump_link_nlmsg,
+ dump_link_nlmsg, cookie);
+}
+
static int dump_link_nlmsg(void *cookie, void *msg, struct nlattr **tb)
{
struct bpf_netdev_t *netinfo = cookie;
@@ -168,14 +433,14 @@ static int show_dev_tc_bpf(int sock, unsigned int nl_pid,
tcinfo.array_len = 0;
tcinfo.is_qdisc = false;
- ret = libbpf_nl_get_class(sock, nl_pid, dev->ifindex,
- dump_class_qdisc_nlmsg, &tcinfo);
+ ret = netlink_get_class(sock, nl_pid, dev->ifindex,
+ dump_class_qdisc_nlmsg, &tcinfo);
if (ret)
goto out;
tcinfo.is_qdisc = true;
- ret = libbpf_nl_get_qdisc(sock, nl_pid, dev->ifindex,
- dump_class_qdisc_nlmsg, &tcinfo);
+ ret = netlink_get_qdisc(sock, nl_pid, dev->ifindex,
+ dump_class_qdisc_nlmsg, &tcinfo);
if (ret)
goto out;
@@ -183,9 +448,9 @@ static int show_dev_tc_bpf(int sock, unsigned int nl_pid,
filter_info.ifindex = dev->ifindex;
for (i = 0; i < tcinfo.used_len; i++) {
filter_info.kind = tcinfo.handle_array[i].kind;
- ret = libbpf_nl_get_filter(sock, nl_pid, dev->ifindex,
- tcinfo.handle_array[i].handle,
- dump_filter_nlmsg, &filter_info);
+ ret = netlink_get_filter(sock, nl_pid, dev->ifindex,
+ tcinfo.handle_array[i].handle,
+ dump_filter_nlmsg, &filter_info);
if (ret)
goto out;
}
@@ -193,22 +458,22 @@ static int show_dev_tc_bpf(int sock, unsigned int nl_pid,
/* root, ingress and egress handle */
handle = TC_H_ROOT;
filter_info.kind = "root";
- ret = libbpf_nl_get_filter(sock, nl_pid, dev->ifindex, handle,
- dump_filter_nlmsg, &filter_info);
+ ret = netlink_get_filter(sock, nl_pid, dev->ifindex, handle,
+ dump_filter_nlmsg, &filter_info);
if (ret)
goto out;
handle = TC_H_MAKE(TC_H_CLSACT, TC_H_MIN_INGRESS);
filter_info.kind = "clsact/ingress";
- ret = libbpf_nl_get_filter(sock, nl_pid, dev->ifindex, handle,
- dump_filter_nlmsg, &filter_info);
+ ret = netlink_get_filter(sock, nl_pid, dev->ifindex, handle,
+ dump_filter_nlmsg, &filter_info);
if (ret)
goto out;
handle = TC_H_MAKE(TC_H_CLSACT, TC_H_MIN_EGRESS);
filter_info.kind = "clsact/egress";
- ret = libbpf_nl_get_filter(sock, nl_pid, dev->ifindex, handle,
- dump_filter_nlmsg, &filter_info);
+ ret = netlink_get_filter(sock, nl_pid, dev->ifindex, handle,
+ dump_filter_nlmsg, &filter_info);
if (ret)
goto out;
@@ -386,7 +651,7 @@ static int do_show(int argc, char **argv)
struct bpf_attach_info attach_info = {};
int i, sock, ret, filter_idx = -1;
struct bpf_netdev_t dev_array;
- unsigned int nl_pid;
+ unsigned int nl_pid = 0;
char err_buf[256];
if (argc == 2) {
@@ -401,7 +666,7 @@ static int do_show(int argc, char **argv)
if (ret)
return -1;
- sock = libbpf_netlink_open(&nl_pid);
+ sock = netlink_open(&nl_pid);
if (sock < 0) {
fprintf(stderr, "failed to open netlink sock\n");
return -1;
@@ -416,7 +681,7 @@ static int do_show(int argc, char **argv)
jsonw_start_array(json_wtr);
NET_START_OBJECT;
NET_START_ARRAY("xdp", "%s:\n");
- ret = libbpf_nl_get_link(sock, nl_pid, dump_link_nlmsg, &dev_array);
+ ret = netlink_get_link(sock, nl_pid, dump_link_nlmsg, &dev_array);
NET_END_ARRAY("\n");
if (!ret) {
diff --git a/tools/bpf/resolve_btfids/main.c b/tools/bpf/resolve_btfids/main.c
index 0def0bb..dfa540d 100644
--- a/tools/bpf/resolve_btfids/main.c
+++ b/tools/bpf/resolve_btfids/main.c
@@ -199,9 +199,16 @@ static char *get_id(const char *prefix_end)
/*
* __BTF_ID__func__vfs_truncate__0
* prefix_end = ^
+ * pos = ^
*/
- char *p, *id = strdup(prefix_end + sizeof("__") - 1);
+ int len = strlen(prefix_end);
+ int pos = sizeof("__") - 1;
+ char *p, *id;
+ if (pos >= len)
+ return NULL;
+
+ id = strdup(prefix_end + pos);
if (id) {
/*
* __BTF_ID__func__vfs_truncate__0
@@ -220,6 +227,24 @@ static char *get_id(const char *prefix_end)
return id;
}
+static struct btf_id *add_set(struct object *obj, char *name)
+{
+ /*
+ * __BTF_ID__set__name
+ * name = ^
+ * id = ^
+ */
+ char *id = name + sizeof(BTF_SET "__") - 1;
+ int len = strlen(name);
+
+ if (id >= name + len) {
+ pr_err("FAILED to parse set name: %s\n", name);
+ return NULL;
+ }
+
+ return btf_id__add(&obj->sets, id, true);
+}
+
static struct btf_id *add_symbol(struct rb_root *root, char *name, size_t size)
{
char *id;
@@ -412,7 +437,7 @@ static int symbols_collect(struct object *obj)
id = add_symbol(&obj->funcs, prefix, sizeof(BTF_FUNC) - 1);
/* set */
} else if (!strncmp(prefix, BTF_SET, sizeof(BTF_SET) - 1)) {
- id = add_symbol(&obj->sets, prefix, sizeof(BTF_SET) - 1);
+ id = add_set(obj, prefix);
/*
* SET objects store list's count, which is encoded
* in symbol's size, together with 'cnt' field hence
diff --git a/tools/build/Makefile b/tools/build/Makefile
index 727050c..722f170 100644
--- a/tools/build/Makefile
+++ b/tools/build/Makefile
@@ -38,6 +38,8 @@
$(call QUIET_CLEAN, fixdep)
$(Q)find $(if $(OUTPUT),$(OUTPUT),.) -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete
$(Q)rm -f $(OUTPUT)fixdep
+ $(call QUIET_CLEAN, feature-detect)
+ $(Q)$(MAKE) -C feature/ clean >/dev/null
$(OUTPUT)fixdep-in.o: FORCE
$(Q)$(MAKE) $(build)=fixdep
diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature
index c1daf4d5..38415d2 100644
--- a/tools/build/Makefile.feature
+++ b/tools/build/Makefile.feature
@@ -46,7 +46,6 @@
libelf-getphdrnum \
libelf-gelf_getnote \
libelf-getshdrstrndx \
- libelf-mmap \
libnuma \
numa_num_possible_cpus \
libperl \
diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile
index d220fe9..b2a2347 100644
--- a/tools/build/feature/Makefile
+++ b/tools/build/feature/Makefile
@@ -25,7 +25,6 @@
test-libelf-getphdrnum.bin \
test-libelf-gelf_getnote.bin \
test-libelf-getshdrstrndx.bin \
- test-libelf-mmap.bin \
test-libdebuginfod.bin \
test-libnuma.bin \
test-numa_num_possible_cpus.bin \
@@ -146,9 +145,6 @@
$(OUTPUT)test-dwarf_getlocations.bin:
$(BUILD) $(DWARFLIBS)
-$(OUTPUT)test-libelf-mmap.bin:
- $(BUILD) -lelf
-
$(OUTPUT)test-libelf-getphdrnum.bin:
$(BUILD) -lelf
diff --git a/tools/build/feature/test-all.c b/tools/build/feature/test-all.c
index 5479e54..5284e6e 100644
--- a/tools/build/feature/test-all.c
+++ b/tools/build/feature/test-all.c
@@ -30,10 +30,6 @@
# include "test-libelf.c"
#undef main
-#define main main_test_libelf_mmap
-# include "test-libelf-mmap.c"
-#undef main
-
#define main main_test_get_current_dir_name
# include "test-get_current_dir_name.c"
#undef main
diff --git a/tools/build/feature/test-libelf-mmap.c b/tools/build/feature/test-libelf-mmap.c
deleted file mode 100644
index 2c3ef81..0000000
--- a/tools/build/feature/test-libelf-mmap.c
+++ /dev/null
@@ -1,9 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <libelf.h>
-
-int main(void)
-{
- Elf *elf = elf_begin(0, ELF_C_READ_MMAP, 0);
-
- return (long)elf;
-}
diff --git a/tools/include/linux/btf_ids.h b/tools/include/linux/btf_ids.h
index 4867d54..210b086 100644
--- a/tools/include/linux/btf_ids.h
+++ b/tools/include/linux/btf_ids.h
@@ -3,6 +3,11 @@
#ifndef _LINUX_BTF_IDS_H
#define _LINUX_BTF_IDS_H
+struct btf_id_set {
+ u32 cnt;
+ u32 ids[];
+};
+
#ifdef CONFIG_DEBUG_INFO_BTF
#include <linux/compiler.h> /* for __PASTE */
@@ -62,7 +67,7 @@ asm( \
".pushsection " BTF_IDS_SECTION ",\"a\"; \n" \
"." #scope " " #name "; \n" \
#name ":; \n" \
-".popsection; \n"); \
+".popsection; \n");
#define BTF_ID_LIST(name) \
__BTF_ID_LIST(name, local) \
@@ -88,12 +93,56 @@ asm( \
".zero 4 \n" \
".popsection; \n");
+/*
+ * The BTF_SET_START/END macros pair defines sorted list of
+ * BTF IDs plus its members count, with following layout:
+ *
+ * BTF_SET_START(list)
+ * BTF_ID(type1, name1)
+ * BTF_ID(type2, name2)
+ * BTF_SET_END(list)
+ *
+ * __BTF_ID__set__list:
+ * .zero 4
+ * list:
+ * __BTF_ID__type1__name1__3:
+ * .zero 4
+ * __BTF_ID__type2__name2__4:
+ * .zero 4
+ *
+ */
+#define __BTF_SET_START(name, scope) \
+asm( \
+".pushsection " BTF_IDS_SECTION ",\"a\"; \n" \
+"." #scope " __BTF_ID__set__" #name "; \n" \
+"__BTF_ID__set__" #name ":; \n" \
+".zero 4 \n" \
+".popsection; \n");
+
+#define BTF_SET_START(name) \
+__BTF_ID_LIST(name, local) \
+__BTF_SET_START(name, local)
+
+#define BTF_SET_START_GLOBAL(name) \
+__BTF_ID_LIST(name, globl) \
+__BTF_SET_START(name, globl)
+
+#define BTF_SET_END(name) \
+asm( \
+".pushsection " BTF_IDS_SECTION ",\"a\"; \n" \
+".size __BTF_ID__set__" #name ", .-" #name " \n" \
+".popsection; \n"); \
+extern struct btf_id_set name;
+
#else
#define BTF_ID_LIST(name) static u32 name[5];
#define BTF_ID(prefix, name)
#define BTF_ID_UNUSED
#define BTF_ID_LIST_GLOBAL(name) u32 name[1];
+#define BTF_SET_START(name) static struct btf_id_set name = { 0 };
+#define BTF_SET_START_GLOBAL(name) static struct btf_id_set name = { 0 };
+#define BTF_SET_END(name)
#endif /* CONFIG_DEBUG_INFO_BTF */
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index b6238b2..8dda138 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -155,6 +155,7 @@ enum bpf_map_type {
BPF_MAP_TYPE_DEVMAP_HASH,
BPF_MAP_TYPE_STRUCT_OPS,
BPF_MAP_TYPE_RINGBUF,
+ BPF_MAP_TYPE_INODE_STORAGE,
};
/* Note that tracing related programs such as
@@ -345,6 +346,14 @@ enum bpf_link_type {
/* The verifier internal test flag. Behavior is undefined */
#define BPF_F_TEST_STATE_FREQ (1U << 3)
+/* If BPF_F_SLEEPABLE is used in BPF_PROG_LOAD command, the verifier will
+ * restrict map and helper usage for such programs. Sleepable BPF programs can
+ * only be attached to hooks where kernel execution context allows sleeping.
+ * Such programs are allowed to use helpers that may sleep like
+ * bpf_copy_from_user().
+ */
+#define BPF_F_SLEEPABLE (1U << 4)
+
/* When BPF ldimm64's insn[0].src_reg != 0 then this can have
* two extensions:
*
@@ -2807,7 +2816,7 @@ union bpf_attr {
*
* **-ERANGE** if resulting value was out of range.
*
- * void *bpf_sk_storage_get(struct bpf_map *map, struct bpf_sock *sk, void *value, u64 flags)
+ * void *bpf_sk_storage_get(struct bpf_map *map, void *sk, void *value, u64 flags)
* Description
* Get a bpf-local-storage from a *sk*.
*
@@ -2823,6 +2832,9 @@ union bpf_attr {
* "type". The bpf-local-storage "type" (i.e. the *map*) is
* searched against all bpf-local-storages residing at *sk*.
*
+ * *sk* is a kernel **struct sock** pointer for LSM program.
+ * *sk* is a **struct bpf_sock** pointer for other program types.
+ *
* An optional *flags* (**BPF_SK_STORAGE_GET_F_CREATE**) can be
* used such that a new bpf-local-storage will be
* created if one does not exist. *value* can be used
@@ -2835,7 +2847,7 @@ union bpf_attr {
* **NULL** if not found or there was an error in adding
* a new bpf-local-storage.
*
- * long bpf_sk_storage_delete(struct bpf_map *map, struct bpf_sock *sk)
+ * long bpf_sk_storage_delete(struct bpf_map *map, void *sk)
* Description
* Delete a bpf-local-storage from a *sk*.
* Return
@@ -3395,6 +3407,175 @@ union bpf_attr {
* A non-negative value equal to or less than *size* on success,
* or a negative error in case of failure.
*
+ * long bpf_load_hdr_opt(struct bpf_sock_ops *skops, void *searchby_res, u32 len, u64 flags)
+ * Description
+ * Load header option. Support reading a particular TCP header
+ * option for bpf program (BPF_PROG_TYPE_SOCK_OPS).
+ *
+ * If *flags* is 0, it will search the option from the
+ * sock_ops->skb_data. The comment in "struct bpf_sock_ops"
+ * has details on what skb_data contains under different
+ * sock_ops->op.
+ *
+ * The first byte of the *searchby_res* specifies the
+ * kind that it wants to search.
+ *
+ * If the searching kind is an experimental kind
+ * (i.e. 253 or 254 according to RFC6994). It also
+ * needs to specify the "magic" which is either
+ * 2 bytes or 4 bytes. It then also needs to
+ * specify the size of the magic by using
+ * the 2nd byte which is "kind-length" of a TCP
+ * header option and the "kind-length" also
+ * includes the first 2 bytes "kind" and "kind-length"
+ * itself as a normal TCP header option also does.
+ *
+ * For example, to search experimental kind 254 with
+ * 2 byte magic 0xeB9F, the searchby_res should be
+ * [ 254, 4, 0xeB, 0x9F, 0, 0, .... 0 ].
+ *
+ * To search for the standard window scale option (3),
+ * the searchby_res should be [ 3, 0, 0, .... 0 ].
+ * Note, kind-length must be 0 for regular option.
+ *
+ * Searching for No-Op (0) and End-of-Option-List (1) are
+ * not supported.
+ *
+ * *len* must be at least 2 bytes which is the minimal size
+ * of a header option.
+ *
+ * Supported flags:
+ * * **BPF_LOAD_HDR_OPT_TCP_SYN** to search from the
+ * saved_syn packet or the just-received syn packet.
+ *
+ * Return
+ * >0 when found, the header option is copied to *searchby_res*.
+ * The return value is the total length copied.
+ *
+ * **-EINVAL** If param is invalid
+ *
+ * **-ENOMSG** The option is not found
+ *
+ * **-ENOENT** No syn packet available when
+ * **BPF_LOAD_HDR_OPT_TCP_SYN** is used
+ *
+ * **-ENOSPC** Not enough space. Only *len* number of
+ * bytes are copied.
+ *
+ * **-EFAULT** Cannot parse the header options in the packet
+ *
+ * **-EPERM** This helper cannot be used under the
+ * current sock_ops->op.
+ *
+ * long bpf_store_hdr_opt(struct bpf_sock_ops *skops, const void *from, u32 len, u64 flags)
+ * Description
+ * Store header option. The data will be copied
+ * from buffer *from* with length *len* to the TCP header.
+ *
+ * The buffer *from* should have the whole option that
+ * includes the kind, kind-length, and the actual
+ * option data. The *len* must be at least kind-length
+ * long. The kind-length does not have to be 4 byte
+ * aligned. The kernel will take care of the padding
+ * and setting the 4 bytes aligned value to th->doff.
+ *
+ * This helper will check for duplicated option
+ * by searching the same option in the outgoing skb.
+ *
+ * This helper can only be called during
+ * BPF_SOCK_OPS_WRITE_HDR_OPT_CB.
+ *
+ * Return
+ * 0 on success, or negative error in case of failure:
+ *
+ * **-EINVAL** If param is invalid
+ *
+ * **-ENOSPC** Not enough space in the header.
+ * Nothing has been written
+ *
+ * **-EEXIST** The option has already existed
+ *
+ * **-EFAULT** Cannot parse the existing header options
+ *
+ * **-EPERM** This helper cannot be used under the
+ * current sock_ops->op.
+ *
+ * long bpf_reserve_hdr_opt(struct bpf_sock_ops *skops, u32 len, u64 flags)
+ * Description
+ * Reserve *len* bytes for the bpf header option. The
+ * space will be used by bpf_store_hdr_opt() later in
+ * BPF_SOCK_OPS_WRITE_HDR_OPT_CB.
+ *
+ * If bpf_reserve_hdr_opt() is called multiple times,
+ * the total number of bytes will be reserved.
+ *
+ * This helper can only be called during
+ * BPF_SOCK_OPS_HDR_OPT_LEN_CB.
+ *
+ * Return
+ * 0 on success, or negative error in case of failure:
+ *
+ * **-EINVAL** if param is invalid
+ *
+ * **-ENOSPC** Not enough space in the header.
+ *
+ * **-EPERM** This helper cannot be used under the
+ * current sock_ops->op.
+ *
+ * void *bpf_inode_storage_get(struct bpf_map *map, void *inode, void *value, u64 flags)
+ * Description
+ * Get a bpf_local_storage from an *inode*.
+ *
+ * Logically, it could be thought of as getting the value from
+ * a *map* with *inode* as the **key**. From this
+ * perspective, the usage is not much different from
+ * **bpf_map_lookup_elem**\ (*map*, **&**\ *inode*) except this
+ * helper enforces the key must be an inode and the map must also
+ * be a **BPF_MAP_TYPE_INODE_STORAGE**.
+ *
+ * Underneath, the value is stored locally at *inode* instead of
+ * the *map*. The *map* is used as the bpf-local-storage
+ * "type". The bpf-local-storage "type" (i.e. the *map*) is
+ * searched against all bpf_local_storage residing at *inode*.
+ *
+ * An optional *flags* (**BPF_LOCAL_STORAGE_GET_F_CREATE**) can be
+ * used such that a new bpf_local_storage will be
+ * created if one does not exist. *value* can be used
+ * together with **BPF_LOCAL_STORAGE_GET_F_CREATE** to specify
+ * the initial value of a bpf_local_storage. If *value* is
+ * **NULL**, the new bpf_local_storage will be zero initialized.
+ * Return
+ * A bpf_local_storage pointer is returned on success.
+ *
+ * **NULL** if not found or there was an error in adding
+ * a new bpf_local_storage.
+ *
+ * int bpf_inode_storage_delete(struct bpf_map *map, void *inode)
+ * Description
+ * Delete a bpf_local_storage from an *inode*.
+ * Return
+ * 0 on success.
+ *
+ * **-ENOENT** if the bpf_local_storage cannot be found.
+ *
+ * long bpf_d_path(struct path *path, char *buf, u32 sz)
+ * Description
+ * Return full path for given 'struct path' object, which
+ * needs to be the kernel BTF 'path' object. The path is
+ * returned in the provided buffer 'buf' of size 'sz' and
+ * is zero terminated.
+ *
+ * Return
+ * On success, the strictly positive length of the string,
+ * including the trailing NUL character. On error, a negative
+ * value.
+ *
+ * long bpf_copy_from_user(void *dst, u32 size, const void *user_ptr)
+ * Description
+ * Read *size* bytes from user space address *user_ptr* and store
+ * the data in *dst*. This is a wrapper of copy_from_user().
+ * Return
+ * 0 on success, or a negative error in case of failure.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@@ -3539,6 +3720,13 @@ union bpf_attr {
FN(skc_to_tcp_request_sock), \
FN(skc_to_udp6_sock), \
FN(get_task_stack), \
+ FN(load_hdr_opt), \
+ FN(store_hdr_opt), \
+ FN(reserve_hdr_opt), \
+ FN(inode_storage_get), \
+ FN(inode_storage_delete), \
+ FN(d_path), \
+ FN(copy_from_user), \
/* */
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
@@ -3648,9 +3836,13 @@ enum {
BPF_F_SYSCTL_BASE_NAME = (1ULL << 0),
};
-/* BPF_FUNC_sk_storage_get flags */
+/* BPF_FUNC_<kernel_obj>_storage_get flags */
enum {
- BPF_SK_STORAGE_GET_F_CREATE = (1ULL << 0),
+ BPF_LOCAL_STORAGE_GET_F_CREATE = (1ULL << 0),
+ /* BPF_SK_STORAGE_GET_F_CREATE is only kept for backward compatibility
+ * and BPF_LOCAL_STORAGE_GET_F_CREATE must be used instead.
+ */
+ BPF_SK_STORAGE_GET_F_CREATE = BPF_LOCAL_STORAGE_GET_F_CREATE,
};
/* BPF_FUNC_read_branch_records flags. */
@@ -4071,6 +4263,15 @@ struct bpf_link_info {
__u64 cgroup_id;
__u32 attach_type;
} cgroup;
+ struct {
+ __aligned_u64 target_name; /* in/out: target_name buffer ptr */
+ __u32 target_name_len; /* in/out: target_name buffer len */
+ union {
+ struct {
+ __u32 map_id;
+ } map;
+ };
+ } iter;
struct {
__u32 netns_ino;
__u32 attach_type;
@@ -4158,6 +4359,36 @@ struct bpf_sock_ops {
__u64 bytes_received;
__u64 bytes_acked;
__bpf_md_ptr(struct bpf_sock *, sk);
+ /* [skb_data, skb_data_end) covers the whole TCP header.
+ *
+ * BPF_SOCK_OPS_PARSE_HDR_OPT_CB: The packet received
+ * BPF_SOCK_OPS_HDR_OPT_LEN_CB: Not useful because the
+ * header has not been written.
+ * BPF_SOCK_OPS_WRITE_HDR_OPT_CB: The header and options have
+ * been written so far.
+ * BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB: The SYNACK that concludes
+ * the 3WHS.
+ * BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB: The ACK that concludes
+ * the 3WHS.
+ *
+ * bpf_load_hdr_opt() can also be used to read a particular option.
+ */
+ __bpf_md_ptr(void *, skb_data);
+ __bpf_md_ptr(void *, skb_data_end);
+ __u32 skb_len; /* The total length of a packet.
+ * It includes the header, options,
+ * and payload.
+ */
+ __u32 skb_tcp_flags; /* tcp_flags of the header. It provides
+ * an easy way to check for tcp_flags
+ * without parsing skb_data.
+ *
+ * In particular, the skb_tcp_flags
+ * will still be available in
+ * BPF_SOCK_OPS_HDR_OPT_LEN even though
+ * the outgoing header has not
+ * been written yet.
+ */
};
/* Definitions for bpf_sock_ops_cb_flags */
@@ -4166,8 +4397,51 @@ enum {
BPF_SOCK_OPS_RETRANS_CB_FLAG = (1<<1),
BPF_SOCK_OPS_STATE_CB_FLAG = (1<<2),
BPF_SOCK_OPS_RTT_CB_FLAG = (1<<3),
+ /* Call bpf for all received TCP headers. The bpf prog will be
+ * called under sock_ops->op == BPF_SOCK_OPS_PARSE_HDR_OPT_CB
+ *
+ * Please refer to the comment in BPF_SOCK_OPS_PARSE_HDR_OPT_CB
+ * for the header option related helpers that will be useful
+ * to the bpf programs.
+ *
+ * It could be used at the client/active side (i.e. connect() side)
+ * when the server told it that the server was in syncookie
+ * mode and required the active side to resend the bpf-written
+ * options. The active side can keep writing the bpf-options until
+ * it received a valid packet from the server side to confirm
+ * the earlier packet (and options) has been received. The later
+ * example patch is using it like this at the active side when the
+ * server is in syncookie mode.
+ *
+ * The bpf prog will usually turn this off in the common cases.
+ */
+ BPF_SOCK_OPS_PARSE_ALL_HDR_OPT_CB_FLAG = (1<<4),
+ /* Call bpf when kernel has received a header option that
+ * the kernel cannot handle. The bpf prog will be called under
+ * sock_ops->op == BPF_SOCK_OPS_PARSE_HDR_OPT_CB.
+ *
+ * Please refer to the comment in BPF_SOCK_OPS_PARSE_HDR_OPT_CB
+ * for the header option related helpers that will be useful
+ * to the bpf programs.
+ */
+ BPF_SOCK_OPS_PARSE_UNKNOWN_HDR_OPT_CB_FLAG = (1<<5),
+ /* Call bpf when the kernel is writing header options for the
+ * outgoing packet. The bpf prog will first be called
+ * to reserve space in a skb under
+ * sock_ops->op == BPF_SOCK_OPS_HDR_OPT_LEN_CB. Then
+ * the bpf prog will be called to write the header option(s)
+ * under sock_ops->op == BPF_SOCK_OPS_WRITE_HDR_OPT_CB.
+ *
+ * Please refer to the comment in BPF_SOCK_OPS_HDR_OPT_LEN_CB
+ * and BPF_SOCK_OPS_WRITE_HDR_OPT_CB for the header option
+ * related helpers that will be useful to the bpf programs.
+ *
+ * The kernel gets its chance to reserve space and write
+ * options first before the BPF program does.
+ */
+ BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG = (1<<6),
/* Mask of all currently supported cb flags */
- BPF_SOCK_OPS_ALL_CB_FLAGS = 0xF,
+ BPF_SOCK_OPS_ALL_CB_FLAGS = 0x7F,
};
/* List of known BPF sock_ops operators.
@@ -4223,6 +4497,63 @@ enum {
*/
BPF_SOCK_OPS_RTT_CB, /* Called on every RTT.
*/
+ BPF_SOCK_OPS_PARSE_HDR_OPT_CB, /* Parse the header option.
+ * It will be called to handle
+ * the packets received at
+ * an already established
+ * connection.
+ *
+ * sock_ops->skb_data:
+ * Referring to the received skb.
+ * It covers the TCP header only.
+ *
+ * bpf_load_hdr_opt() can also
+ * be used to search for a
+ * particular option.
+ */
+ BPF_SOCK_OPS_HDR_OPT_LEN_CB, /* Reserve space for writing the
+ * header option later in
+ * BPF_SOCK_OPS_WRITE_HDR_OPT_CB.
+ * Arg1: bool want_cookie. (in
+ * writing SYNACK only)
+ *
+ * sock_ops->skb_data:
+ * Not available because no header has
+ * been written yet.
+ *
+ * sock_ops->skb_tcp_flags:
+ * The tcp_flags of the
+ * outgoing skb. (e.g. SYN, ACK, FIN).
+ *
+ * bpf_reserve_hdr_opt() should
+ * be used to reserve space.
+ */
+ BPF_SOCK_OPS_WRITE_HDR_OPT_CB, /* Write the header options
+ * Arg1: bool want_cookie. (in
+ * writing SYNACK only)
+ *
+ * sock_ops->skb_data:
+ * Referring to the outgoing skb.
+ * It covers the TCP header
+ * that has already been written
+ * by the kernel and the
+ * earlier bpf-progs.
+ *
+ * sock_ops->skb_tcp_flags:
+ * The tcp_flags of the outgoing
+ * skb. (e.g. SYN, ACK, FIN).
+ *
+ * bpf_store_hdr_opt() should
+ * be used to write the
+ * option.
+ *
+ * bpf_load_hdr_opt() can also
+ * be used to search for a
+ * particular option that
+ * has already been written
+ * by the kernel or the
+ * earlier bpf-progs.
+ */
};
/* List of TCP states. There is a build check in net/ipv4/tcp.c to detect
@@ -4250,6 +4581,63 @@ enum {
enum {
TCP_BPF_IW = 1001, /* Set TCP initial congestion window */
TCP_BPF_SNDCWND_CLAMP = 1002, /* Set sndcwnd_clamp */
+ TCP_BPF_DELACK_MAX = 1003, /* Max delay ack in usecs */
+ TCP_BPF_RTO_MIN = 1004, /* Min delay ack in usecs */
+ /* Copy the SYN pkt to optval
+ *
+ * BPF_PROG_TYPE_SOCK_OPS only. It is similar to the
+ * bpf_getsockopt(TCP_SAVED_SYN) but it does not limit
+ * to only getting from the saved_syn. It can either get the
+ * syn packet from:
+ *
+ * 1. the just-received SYN packet (only available when writing the
+ * SYNACK). It will be useful when it is not necessary to
+ * save the SYN packet for latter use. It is also the only way
+ * to get the SYN during syncookie mode because the syn
+ * packet cannot be saved during syncookie.
+ *
+ * OR
+ *
+ * 2. the earlier saved syn which was done by
+ * bpf_setsockopt(TCP_SAVE_SYN).
+ *
+ * The bpf_getsockopt(TCP_BPF_SYN*) option will hide where the
+ * SYN packet is obtained.
+ *
+ * If the bpf-prog does not need the IP[46] header, the
+ * bpf-prog can avoid parsing the IP header by using
+ * TCP_BPF_SYN. Otherwise, the bpf-prog can get both
+ * IP[46] and TCP header by using TCP_BPF_SYN_IP.
+ *
+ * >0: Total number of bytes copied
+ * -ENOSPC: Not enough space in optval. Only optlen number of
+ * bytes is copied.
+ * -ENOENT: The SYN skb is not available now and the earlier SYN pkt
+ * is not saved by setsockopt(TCP_SAVE_SYN).
+ */
+ TCP_BPF_SYN = 1005, /* Copy the TCP header */
+ TCP_BPF_SYN_IP = 1006, /* Copy the IP[46] and TCP header */
+ TCP_BPF_SYN_MAC = 1007, /* Copy the MAC, IP[46], and TCP header */
+};
+
+enum {
+ BPF_LOAD_HDR_OPT_TCP_SYN = (1ULL << 0),
+};
+
+/* args[0] value during BPF_SOCK_OPS_HDR_OPT_LEN_CB and
+ * BPF_SOCK_OPS_WRITE_HDR_OPT_CB.
+ */
+enum {
+ BPF_WRITE_HDR_TCP_CURRENT_MSS = 1, /* Kernel is finding the
+ * total option spaces
+ * required for an established
+ * sk in order to calculate the
+ * MSS. No skb is actually
+ * sent.
+ */
+ BPF_WRITE_HDR_TCP_SYNACK_COOKIE = 2, /* Kernel is in syncookie mode
+ * when sending a SYN.
+ */
};
struct bpf_perf_event_value {
diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile
index bf8ed13..adbe994 100644
--- a/tools/lib/bpf/Makefile
+++ b/tools/lib/bpf/Makefile
@@ -1,6 +1,9 @@
# SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
# Most of this file is copied from tools/lib/traceevent/Makefile
+RM ?= rm
+srctree = $(abs_srctree)
+
LIBBPF_VERSION := $(shell \
grep -oE '^LIBBPF_([0-9.]+)' libbpf.map | \
sort -rV | head -n1 | cut -d'_' -f2)
@@ -56,7 +59,7 @@
endif
FEATURE_USER = .libbpf
-FEATURE_TESTS = libelf libelf-mmap zlib bpf reallocarray
+FEATURE_TESTS = libelf zlib bpf
FEATURE_DISPLAY = libelf zlib bpf
INCLUDES = -I. -I$(srctree)/tools/include -I$(srctree)/tools/arch/$(ARCH)/include/uapi -I$(srctree)/tools/include/uapi
@@ -98,16 +101,8 @@
CFLAGS := -g -Wall
endif
-ifeq ($(feature-libelf-mmap), 1)
- override CFLAGS += -DHAVE_LIBELF_MMAP_SUPPORT
-endif
-
-ifeq ($(feature-reallocarray), 0)
- override CFLAGS += -DCOMPAT_NEED_REALLOCARRAY
-endif
-
# Append required CFLAGS
-override CFLAGS += $(EXTRA_WARNINGS)
+override CFLAGS += $(EXTRA_WARNINGS) -Wno-switch-enum
override CFLAGS += -Werror -Wall
override CFLAGS += -fPIC
override CFLAGS += $(INCLUDES)
@@ -196,7 +191,7 @@
@ln -sf $(@F) $(OUTPUT)libbpf.so.$(LIBBPF_MAJOR_VERSION)
$(OUTPUT)libbpf.a: $(BPF_IN_STATIC)
- $(QUIET_LINK)$(RM) $@; $(AR) rcs $@ $^
+ $(QUIET_LINK)$(RM) -f $@; $(AR) rcs $@ $^
$(OUTPUT)libbpf.pc:
$(QUIET_GEN)sed -e "s|@PREFIX@|$(prefix)|" \
@@ -269,10 +264,10 @@
### Cleaning rules
config-clean:
- $(call QUIET_CLEAN, config)
+ $(call QUIET_CLEAN, feature-detect)
$(Q)$(MAKE) -C $(srctree)/tools/build/feature/ clean >/dev/null
-clean:
+clean: config-clean
$(call QUIET_CLEAN, libbpf) $(RM) -rf $(CMD_TARGETS) \
*~ .*.d .*.cmd LIBBPF-CFLAGS $(BPF_HELPER_DEFS) \
$(SHARED_OBJDIR) $(STATIC_OBJDIR) \
@@ -299,7 +294,7 @@
cscope -b -q -I $(srctree)/include -f cscope.out
tags:
- rm -f TAGS tags
+ $(RM) -f TAGS tags
ls *.c *.h | xargs $(TAGS_PROG) -a
# Declare the contents of the .PHONY variable as phony. We keep that
diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index 07506810..82b983f 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -32,9 +32,6 @@
#include "libbpf.h"
#include "libbpf_internal.h"
-/* make sure libbpf doesn't use kernel-only integer typedefs */
-#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64
-
/*
* When building perf, unistd.h is overridden. __NR_bpf is
* required to be defined explicitly.
diff --git a/tools/lib/bpf/bpf_core_read.h b/tools/lib/bpf/bpf_core_read.h
index eae5ccc..bbcefb3 100644
--- a/tools/lib/bpf/bpf_core_read.h
+++ b/tools/lib/bpf/bpf_core_read.h
@@ -19,32 +19,52 @@ enum bpf_field_info_kind {
BPF_FIELD_RSHIFT_U64 = 5,
};
+/* second argument to __builtin_btf_type_id() built-in */
+enum bpf_type_id_kind {
+ BPF_TYPE_ID_LOCAL = 0, /* BTF type ID in local program */
+ BPF_TYPE_ID_TARGET = 1, /* BTF type ID in target kernel */
+};
+
+/* second argument to __builtin_preserve_type_info() built-in */
+enum bpf_type_info_kind {
+ BPF_TYPE_EXISTS = 0, /* type existence in target kernel */
+ BPF_TYPE_SIZE = 1, /* type size in target kernel */
+};
+
+/* second argument to __builtin_preserve_enum_value() built-in */
+enum bpf_enum_value_kind {
+ BPF_ENUMVAL_EXISTS = 0, /* enum value existence in kernel */
+ BPF_ENUMVAL_VALUE = 1, /* enum value value relocation */
+};
+
#define __CORE_RELO(src, field, info) \
__builtin_preserve_field_info((src)->field, BPF_FIELD_##info)
#if __BYTE_ORDER == __LITTLE_ENDIAN
#define __CORE_BITFIELD_PROBE_READ(dst, src, fld) \
- bpf_probe_read((void *)dst, \
- __CORE_RELO(src, fld, BYTE_SIZE), \
- (const void *)src + __CORE_RELO(src, fld, BYTE_OFFSET))
+ bpf_probe_read_kernel( \
+ (void *)dst, \
+ __CORE_RELO(src, fld, BYTE_SIZE), \
+ (const void *)src + __CORE_RELO(src, fld, BYTE_OFFSET))
#else
/* semantics of LSHIFT_64 assumes loading values into low-ordered bytes, so
* for big-endian we need to adjust destination pointer accordingly, based on
* field byte size
*/
#define __CORE_BITFIELD_PROBE_READ(dst, src, fld) \
- bpf_probe_read((void *)dst + (8 - __CORE_RELO(src, fld, BYTE_SIZE)), \
- __CORE_RELO(src, fld, BYTE_SIZE), \
- (const void *)src + __CORE_RELO(src, fld, BYTE_OFFSET))
+ bpf_probe_read_kernel( \
+ (void *)dst + (8 - __CORE_RELO(src, fld, BYTE_SIZE)), \
+ __CORE_RELO(src, fld, BYTE_SIZE), \
+ (const void *)src + __CORE_RELO(src, fld, BYTE_OFFSET))
#endif
/*
* Extract bitfield, identified by s->field, and return its value as u64.
* All this is done in relocatable manner, so bitfield changes such as
* signedness, bit size, offset changes, this will be handled automatically.
- * This version of macro is using bpf_probe_read() to read underlying integer
- * storage. Macro functions as an expression and its return type is
- * bpf_probe_read()'s return value: 0, on success, <0 on error.
+ * This version of macro is using bpf_probe_read_kernel() to read underlying
+ * integer storage. Macro functions as an expression and its return type is
+ * bpf_probe_read_kernel()'s return value: 0, on success, <0 on error.
*/
#define BPF_CORE_READ_BITFIELD_PROBED(s, field) ({ \
unsigned long long val = 0; \
@@ -92,15 +112,75 @@ enum bpf_field_info_kind {
__builtin_preserve_field_info(field, BPF_FIELD_EXISTS)
/*
- * Convenience macro to get byte size of a field. Works for integers,
+ * Convenience macro to get the byte size of a field. Works for integers,
* struct/unions, pointers, arrays, and enums.
*/
#define bpf_core_field_size(field) \
__builtin_preserve_field_info(field, BPF_FIELD_BYTE_SIZE)
/*
- * bpf_core_read() abstracts away bpf_probe_read() call and captures offset
- * relocation for source address using __builtin_preserve_access_index()
+ * Convenience macro to get BTF type ID of a specified type, using a local BTF
+ * information. Return 32-bit unsigned integer with type ID from program's own
+ * BTF. Always succeeds.
+ */
+#define bpf_core_type_id_local(type) \
+ __builtin_btf_type_id(*(typeof(type) *)0, BPF_TYPE_ID_LOCAL)
+
+/*
+ * Convenience macro to get BTF type ID of a target kernel's type that matches
+ * specified local type.
+ * Returns:
+ * - valid 32-bit unsigned type ID in kernel BTF;
+ * - 0, if no matching type was found in a target kernel BTF.
+ */
+#define bpf_core_type_id_kernel(type) \
+ __builtin_btf_type_id(*(typeof(type) *)0, BPF_TYPE_ID_TARGET)
+
+/*
+ * Convenience macro to check that provided named type
+ * (struct/union/enum/typedef) exists in a target kernel.
+ * Returns:
+ * 1, if such type is present in target kernel's BTF;
+ * 0, if no matching type is found.
+ */
+#define bpf_core_type_exists(type) \
+ __builtin_preserve_type_info(*(typeof(type) *)0, BPF_TYPE_EXISTS)
+
+/*
+ * Convenience macro to get the byte size of a provided named type
+ * (struct/union/enum/typedef) in a target kernel.
+ * Returns:
+ * >= 0 size (in bytes), if type is present in target kernel's BTF;
+ * 0, if no matching type is found.
+ */
+#define bpf_core_type_size(type) \
+ __builtin_preserve_type_info(*(typeof(type) *)0, BPF_TYPE_SIZE)
+
+/*
+ * Convenience macro to check that provided enumerator value is defined in
+ * a target kernel.
+ * Returns:
+ * 1, if specified enum type and its enumerator value are present in target
+ * kernel's BTF;
+ * 0, if no matching enum and/or enum value within that enum is found.
+ */
+#define bpf_core_enum_value_exists(enum_type, enum_value) \
+ __builtin_preserve_enum_value(*(typeof(enum_type) *)enum_value, BPF_ENUMVAL_EXISTS)
+
+/*
+ * Convenience macro to get the integer value of an enumerator value in
+ * a target kernel.
+ * Returns:
+ * 64-bit value, if specified enum type and its enumerator value are
+ * present in target kernel's BTF;
+ * 0, if no matching enum and/or enum value within that enum is found.
+ */
+#define bpf_core_enum_value(enum_type, enum_value) \
+ __builtin_preserve_enum_value(*(typeof(enum_type) *)enum_value, BPF_ENUMVAL_VALUE)
+
+/*
+ * bpf_core_read() abstracts away bpf_probe_read_kernel() call and captures
+ * offset relocation for source address using __builtin_preserve_access_index()
* built-in, provided by Clang.
*
* __builtin_preserve_access_index() takes as an argument an expression of
@@ -115,8 +195,8 @@ enum bpf_field_info_kind {
* (local) BTF, used to record relocation.
*/
#define bpf_core_read(dst, sz, src) \
- bpf_probe_read(dst, sz, \
- (const void *)__builtin_preserve_access_index(src))
+ bpf_probe_read_kernel(dst, sz, \
+ (const void *)__builtin_preserve_access_index(src))
/*
* bpf_core_read_str() is a thin wrapper around bpf_probe_read_str()
@@ -124,8 +204,8 @@ enum bpf_field_info_kind {
* argument.
*/
#define bpf_core_read_str(dst, sz, src) \
- bpf_probe_read_str(dst, sz, \
- (const void *)__builtin_preserve_access_index(src))
+ bpf_probe_read_kernel_str(dst, sz, \
+ (const void *)__builtin_preserve_access_index(src))
#define ___concat(a, b) a ## b
#define ___apply(fn, n) ___concat(fn, n)
@@ -239,15 +319,17 @@ enum bpf_field_info_kind {
* int x = BPF_CORE_READ(s, a.b.c, d.e, f, g);
*
* BPF_CORE_READ will decompose above statement into 4 bpf_core_read (BPF
- * CO-RE relocatable bpf_probe_read() wrapper) calls, logically equivalent to:
+ * CO-RE relocatable bpf_probe_read_kernel() wrapper) calls, logically
+ * equivalent to:
* 1. const void *__t = s->a.b.c;
* 2. __t = __t->d.e;
* 3. __t = __t->f;
* 4. return __t->g;
*
* Equivalence is logical, because there is a heavy type casting/preservation
- * involved, as well as all the reads are happening through bpf_probe_read()
- * calls using __builtin_preserve_access_index() to emit CO-RE relocations.
+ * involved, as well as all the reads are happening through
+ * bpf_probe_read_kernel() calls using __builtin_preserve_access_index() to
+ * emit CO-RE relocations.
*
* N.B. Only up to 9 "field accessors" are supported, which should be more
* than enough for any practical purpose.
diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h
index e9a4ecd..1106777 100644
--- a/tools/lib/bpf/bpf_helpers.h
+++ b/tools/lib/bpf/bpf_helpers.h
@@ -32,6 +32,9 @@
#ifndef __always_inline
#define __always_inline __attribute__((always_inline))
#endif
+#ifndef __noinline
+#define __noinline __attribute__((noinline))
+#endif
#ifndef __weak
#define __weak __attribute__((weak))
#endif
diff --git a/tools/lib/bpf/bpf_prog_linfo.c b/tools/lib/bpf/bpf_prog_linfo.c
index bafca49..3ed1a27 100644
--- a/tools/lib/bpf/bpf_prog_linfo.c
+++ b/tools/lib/bpf/bpf_prog_linfo.c
@@ -8,9 +8,6 @@
#include "libbpf.h"
#include "libbpf_internal.h"
-/* make sure libbpf doesn't use kernel-only integer typedefs */
-#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64
-
struct bpf_prog_linfo {
void *raw_linfo;
void *raw_jited_linfo;
diff --git a/tools/lib/bpf/bpf_tracing.h b/tools/lib/bpf/bpf_tracing.h
index eebf020..f9ef377 100644
--- a/tools/lib/bpf/bpf_tracing.h
+++ b/tools/lib/bpf/bpf_tracing.h
@@ -289,9 +289,9 @@ struct pt_regs;
#define BPF_KRETPROBE_READ_RET_IP BPF_KPROBE_READ_RET_IP
#else
#define BPF_KPROBE_READ_RET_IP(ip, ctx) \
- ({ bpf_probe_read(&(ip), sizeof(ip), (void *)PT_REGS_RET(ctx)); })
+ ({ bpf_probe_read_kernel(&(ip), sizeof(ip), (void *)PT_REGS_RET(ctx)); })
#define BPF_KRETPROBE_READ_RET_IP(ip, ctx) \
- ({ bpf_probe_read(&(ip), sizeof(ip), \
+ ({ bpf_probe_read_kernel(&(ip), sizeof(ip), \
(void *)(PT_REGS_FP(ctx) + sizeof(ip))); })
#endif
diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index 7dfca70..a3d259e 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -21,9 +21,6 @@
#include "libbpf_internal.h"
#include "hashmap.h"
-/* make sure libbpf doesn't use kernel-only integer typedefs */
-#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64
-
#define BTF_MAX_NR_TYPES 0x7fffffffU
#define BTF_MAX_STR_OFFSET 0x7fffffffU
@@ -61,7 +58,7 @@ static int btf_add_type(struct btf *btf, struct btf_type *t)
expand_by = max(btf->types_size >> 2, 16U);
new_size = min(BTF_MAX_NR_TYPES, btf->types_size + expand_by);
- new_types = realloc(btf->types, sizeof(*new_types) * new_size);
+ new_types = libbpf_reallocarray(btf->types, new_size, sizeof(*new_types));
if (!new_types)
return -ENOMEM;
@@ -1131,14 +1128,14 @@ static int btf_ext_setup_line_info(struct btf_ext *btf_ext)
return btf_ext_setup_info(btf_ext, ¶m);
}
-static int btf_ext_setup_field_reloc(struct btf_ext *btf_ext)
+static int btf_ext_setup_core_relos(struct btf_ext *btf_ext)
{
struct btf_ext_sec_setup_param param = {
- .off = btf_ext->hdr->field_reloc_off,
- .len = btf_ext->hdr->field_reloc_len,
- .min_rec_size = sizeof(struct bpf_field_reloc),
- .ext_info = &btf_ext->field_reloc_info,
- .desc = "field_reloc",
+ .off = btf_ext->hdr->core_relo_off,
+ .len = btf_ext->hdr->core_relo_len,
+ .min_rec_size = sizeof(struct bpf_core_relo),
+ .ext_info = &btf_ext->core_relo_info,
+ .desc = "core_relo",
};
return btf_ext_setup_info(btf_ext, ¶m);
@@ -1217,10 +1214,9 @@ struct btf_ext *btf_ext__new(__u8 *data, __u32 size)
if (err)
goto done;
- if (btf_ext->hdr->hdr_len <
- offsetofend(struct btf_ext_header, field_reloc_len))
+ if (btf_ext->hdr->hdr_len < offsetofend(struct btf_ext_header, core_relo_len))
goto done;
- err = btf_ext_setup_field_reloc(btf_ext);
+ err = btf_ext_setup_core_relos(btf_ext);
if (err)
goto done;
@@ -1575,7 +1571,7 @@ static int btf_dedup_hypot_map_add(struct btf_dedup *d,
__u32 *new_list;
d->hypot_cap += max((size_t)16, d->hypot_cap / 2);
- new_list = realloc(d->hypot_list, sizeof(__u32) * d->hypot_cap);
+ new_list = libbpf_reallocarray(d->hypot_list, d->hypot_cap, sizeof(__u32));
if (!new_list)
return -ENOMEM;
d->hypot_list = new_list;
@@ -1871,8 +1867,7 @@ static int btf_dedup_strings(struct btf_dedup *d)
struct btf_str_ptr *new_ptrs;
strs.cap += max(strs.cnt / 2, 16U);
- new_ptrs = realloc(strs.ptrs,
- sizeof(strs.ptrs[0]) * strs.cap);
+ new_ptrs = libbpf_reallocarray(strs.ptrs, strs.cap, sizeof(strs.ptrs[0]));
if (!new_ptrs) {
err = -ENOMEM;
goto done;
@@ -2957,8 +2952,8 @@ static int btf_dedup_compact_types(struct btf_dedup *d)
d->btf->nr_types = next_type_id - 1;
d->btf->types_size = d->btf->nr_types;
d->btf->hdr->type_len = p - types_start;
- new_types = realloc(d->btf->types,
- (1 + d->btf->nr_types) * sizeof(struct btf_type *));
+ new_types = libbpf_reallocarray(d->btf->types, (1 + d->btf->nr_types),
+ sizeof(struct btf_type *));
if (!new_types)
return -ENOMEM;
d->btf->types = new_types;
diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h
index 1ca1444..91f0ad0 100644
--- a/tools/lib/bpf/btf.h
+++ b/tools/lib/bpf/btf.h
@@ -24,44 +24,6 @@ struct btf_type;
struct bpf_object;
-/*
- * The .BTF.ext ELF section layout defined as
- * struct btf_ext_header
- * func_info subsection
- *
- * The func_info subsection layout:
- * record size for struct bpf_func_info in the func_info subsection
- * struct btf_sec_func_info for section #1
- * a list of bpf_func_info records for section #1
- * where struct bpf_func_info mimics one in include/uapi/linux/bpf.h
- * but may not be identical
- * struct btf_sec_func_info for section #2
- * a list of bpf_func_info records for section #2
- * ......
- *
- * Note that the bpf_func_info record size in .BTF.ext may not
- * be the same as the one defined in include/uapi/linux/bpf.h.
- * The loader should ensure that record_size meets minimum
- * requirement and pass the record as is to the kernel. The
- * kernel will handle the func_info properly based on its contents.
- */
-struct btf_ext_header {
- __u16 magic;
- __u8 version;
- __u8 flags;
- __u32 hdr_len;
-
- /* All offsets are in bytes relative to the end of this header */
- __u32 func_info_off;
- __u32 func_info_len;
- __u32 line_info_off;
- __u32 line_info_len;
-
- /* optional part of .BTF.ext header */
- __u32 field_reloc_off;
- __u32 field_reloc_len;
-};
-
LIBBPF_API void btf__free(struct btf *btf);
LIBBPF_API struct btf *btf__new(const void *data, __u32 size);
LIBBPF_API struct btf *btf__parse(const char *path, struct btf_ext **btf_ext);
diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c
index 57c00fa..6c079b3 100644
--- a/tools/lib/bpf/btf_dump.c
+++ b/tools/lib/bpf/btf_dump.c
@@ -19,9 +19,6 @@
#include "libbpf.h"
#include "libbpf_internal.h"
-/* make sure libbpf doesn't use kernel-only integer typedefs */
-#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64
-
static const char PREFIXES[] = "\t\t\t\t\t\t\t\t\t\t\t\t\t";
static const size_t PREFIX_CNT = sizeof(PREFIXES) - 1;
@@ -323,8 +320,7 @@ static int btf_dump_add_emit_queue_id(struct btf_dump *d, __u32 id)
if (d->emit_queue_cnt >= d->emit_queue_cap) {
new_cap = max(16, d->emit_queue_cap * 3 / 2);
- new_queue = realloc(d->emit_queue,
- new_cap * sizeof(new_queue[0]));
+ new_queue = libbpf_reallocarray(d->emit_queue, new_cap, sizeof(new_queue[0]));
if (!new_queue)
return -ENOMEM;
d->emit_queue = new_queue;
@@ -1003,8 +999,7 @@ static int btf_dump_push_decl_stack_id(struct btf_dump *d, __u32 id)
if (d->decl_stack_cnt >= d->decl_stack_cap) {
new_cap = max(16, d->decl_stack_cap * 3 / 2);
- new_stack = realloc(d->decl_stack,
- new_cap * sizeof(new_stack[0]));
+ new_stack = libbpf_reallocarray(d->decl_stack, new_cap, sizeof(new_stack[0]));
if (!new_stack)
return -ENOMEM;
d->decl_stack = new_stack;
diff --git a/tools/lib/bpf/hashmap.c b/tools/lib/bpf/hashmap.c
index a405dad..3c20b12 100644
--- a/tools/lib/bpf/hashmap.c
+++ b/tools/lib/bpf/hashmap.c
@@ -15,6 +15,9 @@
/* make sure libbpf doesn't use kernel-only integer typedefs */
#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64
+/* prevent accidental re-addition of reallocarray() */
+#pragma GCC poison reallocarray
+
/* start with 4 buckets */
#define HASHMAP_MIN_CAP_BITS 2
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 0ad0b04..b688aad 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -44,7 +44,6 @@
#include <sys/vfs.h>
#include <sys/utsname.h>
#include <sys/resource.h>
-#include <tools/libc_compat.h>
#include <libelf.h>
#include <gelf.h>
#include <zlib.h>
@@ -56,9 +55,6 @@
#include "libbpf_internal.h"
#include "hashmap.h"
-/* make sure libbpf doesn't use kernel-only integer typedefs */
-#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64
-
#ifndef EM_BPF
#define EM_BPF 247
#endif
@@ -67,6 +63,8 @@
#define BPF_FS_MAGIC 0xcafe4a11
#endif
+#define BPF_INSN_SZ (sizeof(struct bpf_insn))
+
/* vsprintf() in __base_pr() uses nonliteral format string. It may break
* compilation if user enables corresponding warning. Disable it explicitly.
*/
@@ -154,34 +152,35 @@ static void pr_perm_msg(int err)
___err; })
#endif
-#ifdef HAVE_LIBELF_MMAP_SUPPORT
-# define LIBBPF_ELF_C_READ_MMAP ELF_C_READ_MMAP
-#else
-# define LIBBPF_ELF_C_READ_MMAP ELF_C_READ
-#endif
-
static inline __u64 ptr_to_u64(const void *ptr)
{
return (__u64) (unsigned long) ptr;
}
-struct bpf_capabilities {
+enum kern_feature_id {
/* v4.14: kernel support for program & map names. */
- __u32 name:1;
+ FEAT_PROG_NAME,
/* v5.2: kernel support for global data sections. */
- __u32 global_data:1;
+ FEAT_GLOBAL_DATA,
+ /* BTF support */
+ FEAT_BTF,
/* BTF_KIND_FUNC and BTF_KIND_FUNC_PROTO support */
- __u32 btf_func:1;
+ FEAT_BTF_FUNC,
/* BTF_KIND_VAR and BTF_KIND_DATASEC support */
- __u32 btf_datasec:1;
- /* BPF_F_MMAPABLE is supported for arrays */
- __u32 array_mmap:1;
+ FEAT_BTF_DATASEC,
/* BTF_FUNC_GLOBAL is supported */
- __u32 btf_func_global:1;
+ FEAT_BTF_GLOBAL_FUNC,
+ /* BPF_F_MMAPABLE is supported for arrays */
+ FEAT_ARRAY_MMAP,
/* kernel support for expected_attach_type in BPF_PROG_LOAD */
- __u32 exp_attach_type:1;
+ FEAT_EXP_ATTACH_TYPE,
+ /* bpf_probe_read_{kernel,user}[_str] helpers */
+ FEAT_PROBE_READ_KERN,
+ __FEAT_CNT,
};
+static bool kernel_supports(enum kern_feature_id feat_id);
+
enum reloc_type {
RELO_LD64,
RELO_CALL,
@@ -209,6 +208,7 @@ struct bpf_sec_def {
bool is_exp_attach_type_optional;
bool is_attachable;
bool is_attach_btf;
+ bool is_sleepable;
attach_fn_t attach_fn;
};
@@ -253,8 +253,6 @@ struct bpf_program {
__u32 func_info_rec_size;
__u32 func_info_cnt;
- struct bpf_capabilities *caps;
-
void *line_info;
__u32 line_info_rec_size;
__u32 line_info_cnt;
@@ -403,6 +401,7 @@ struct bpf_object {
Elf_Data *rodata;
Elf_Data *bss;
Elf_Data *st_ops_data;
+ size_t shstrndx; /* section index for section name strings */
size_t strtabidx;
struct {
GElf_Shdr shdr;
@@ -436,12 +435,18 @@ struct bpf_object {
void *priv;
bpf_object_clear_priv_t clear_priv;
- struct bpf_capabilities caps;
-
char path[];
};
#define obj_elf_valid(o) ((o)->efile.elf)
+static const char *elf_sym_str(const struct bpf_object *obj, size_t off);
+static const char *elf_sec_str(const struct bpf_object *obj, size_t off);
+static Elf_Scn *elf_sec_by_idx(const struct bpf_object *obj, size_t idx);
+static Elf_Scn *elf_sec_by_name(const struct bpf_object *obj, const char *name);
+static int elf_sec_hdr(const struct bpf_object *obj, Elf_Scn *scn, GElf_Shdr *hdr);
+static const char *elf_sec_name(const struct bpf_object *obj, Elf_Scn *scn);
+static Elf_Data *elf_sec_data(const struct bpf_object *obj, Elf_Scn *scn);
+
void bpf_program__unload(struct bpf_program *prog)
{
int i;
@@ -503,7 +508,7 @@ static char *__bpf_program__pin_name(struct bpf_program *prog)
}
static int
-bpf_program__init(void *data, size_t size, char *section_name, int idx,
+bpf_program__init(void *data, size_t size, const char *section_name, int idx,
struct bpf_program *prog)
{
const size_t bpf_insn_sz = sizeof(struct bpf_insn);
@@ -552,7 +557,7 @@ bpf_program__init(void *data, size_t size, char *section_name, int idx,
static int
bpf_object__add_program(struct bpf_object *obj, void *data, size_t size,
- char *section_name, int idx)
+ const char *section_name, int idx)
{
struct bpf_program prog, *progs;
int nr_progs, err;
@@ -561,11 +566,10 @@ bpf_object__add_program(struct bpf_object *obj, void *data, size_t size,
if (err)
return err;
- prog.caps = &obj->caps;
progs = obj->programs;
nr_progs = obj->nr_programs;
- progs = reallocarray(progs, nr_progs + 1, sizeof(progs[0]));
+ progs = libbpf_reallocarray(progs, nr_progs + 1, sizeof(progs[0]));
if (!progs) {
/*
* In this case the original obj->programs
@@ -578,7 +582,7 @@ bpf_object__add_program(struct bpf_object *obj, void *data, size_t size,
return -ENOMEM;
}
- pr_debug("found program %s\n", prog.section_name);
+ pr_debug("elf: found program '%s'\n", prog.section_name);
obj->programs = progs;
obj->nr_programs = nr_progs + 1;
prog.obj = obj;
@@ -598,8 +602,7 @@ bpf_object__init_prog_names(struct bpf_object *obj)
prog = &obj->programs[pi];
- for (si = 0; si < symbols->d_size / sizeof(GElf_Sym) && !name;
- si++) {
+ for (si = 0; si < symbols->d_size / sizeof(GElf_Sym) && !name; si++) {
GElf_Sym sym;
if (!gelf_getsym(symbols, si, &sym))
@@ -609,11 +612,9 @@ bpf_object__init_prog_names(struct bpf_object *obj)
if (GELF_ST_BIND(sym.st_info) != STB_GLOBAL)
continue;
- name = elf_strptr(obj->efile.elf,
- obj->efile.strtabidx,
- sym.st_name);
+ name = elf_sym_str(obj, sym.st_name);
if (!name) {
- pr_warn("failed to get sym name string for prog %s\n",
+ pr_warn("prog '%s': failed to get symbol name\n",
prog->section_name);
return -LIBBPF_ERRNO__LIBELF;
}
@@ -623,17 +624,14 @@ bpf_object__init_prog_names(struct bpf_object *obj)
name = ".text";
if (!name) {
- pr_warn("failed to find sym for prog %s\n",
+ pr_warn("prog '%s': failed to find program symbol\n",
prog->section_name);
return -EINVAL;
}
prog->name = strdup(name);
- if (!prog->name) {
- pr_warn("failed to allocate memory for prog sym %s\n",
- name);
+ if (!prog->name)
return -ENOMEM;
- }
}
return 0;
@@ -1066,13 +1064,18 @@ static void bpf_object__elf_finish(struct bpf_object *obj)
obj->efile.obj_buf_sz = 0;
}
+/* if libelf is old and doesn't support mmap(), fall back to read() */
+#ifndef ELF_C_READ_MMAP
+#define ELF_C_READ_MMAP ELF_C_READ
+#endif
+
static int bpf_object__elf_init(struct bpf_object *obj)
{
int err = 0;
GElf_Ehdr *ep;
if (obj_elf_valid(obj)) {
- pr_warn("elf init: internal error\n");
+ pr_warn("elf: init internal error\n");
return -LIBBPF_ERRNO__LIBELF;
}
@@ -1090,31 +1093,44 @@ static int bpf_object__elf_init(struct bpf_object *obj)
err = -errno;
cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
- pr_warn("failed to open %s: %s\n", obj->path, cp);
+ pr_warn("elf: failed to open %s: %s\n", obj->path, cp);
return err;
}
- obj->efile.elf = elf_begin(obj->efile.fd,
- LIBBPF_ELF_C_READ_MMAP, NULL);
+ obj->efile.elf = elf_begin(obj->efile.fd, ELF_C_READ_MMAP, NULL);
}
if (!obj->efile.elf) {
- pr_warn("failed to open %s as ELF file\n", obj->path);
+ pr_warn("elf: failed to open %s as ELF file: %s\n", obj->path, elf_errmsg(-1));
err = -LIBBPF_ERRNO__LIBELF;
goto errout;
}
if (!gelf_getehdr(obj->efile.elf, &obj->efile.ehdr)) {
- pr_warn("failed to get EHDR from %s\n", obj->path);
+ pr_warn("elf: failed to get ELF header from %s: %s\n", obj->path, elf_errmsg(-1));
err = -LIBBPF_ERRNO__FORMAT;
goto errout;
}
ep = &obj->efile.ehdr;
+ if (elf_getshdrstrndx(obj->efile.elf, &obj->efile.shstrndx)) {
+ pr_warn("elf: failed to get section names section index for %s: %s\n",
+ obj->path, elf_errmsg(-1));
+ err = -LIBBPF_ERRNO__FORMAT;
+ goto errout;
+ }
+
+ /* Elf is corrupted/truncated, avoid calling elf_strptr. */
+ if (!elf_rawdata(elf_getscn(obj->efile.elf, obj->efile.shstrndx), NULL)) {
+ pr_warn("elf: failed to get section names strings from %s: %s\n",
+ obj->path, elf_errmsg(-1));
+ return -LIBBPF_ERRNO__FORMAT;
+ }
+
/* Old LLVM set e_machine to EM_NONE */
if (ep->e_type != ET_REL ||
(ep->e_machine && ep->e_machine != EM_BPF)) {
- pr_warn("%s is not an eBPF object file\n", obj->path);
+ pr_warn("elf: %s is not a valid eBPF object file\n", obj->path);
err = -LIBBPF_ERRNO__FORMAT;
goto errout;
}
@@ -1136,7 +1152,7 @@ static int bpf_object__check_endianness(struct bpf_object *obj)
#else
# error "Unrecognized __BYTE_ORDER__"
#endif
- pr_warn("endianness mismatch.\n");
+ pr_warn("elf: endianness mismatch in %s.\n", obj->path);
return -LIBBPF_ERRNO__ENDIAN;
}
@@ -1171,55 +1187,10 @@ static bool bpf_map_type__is_map_in_map(enum bpf_map_type type)
return false;
}
-static int bpf_object_search_section_size(const struct bpf_object *obj,
- const char *name, size_t *d_size)
-{
- const GElf_Ehdr *ep = &obj->efile.ehdr;
- Elf *elf = obj->efile.elf;
- Elf_Scn *scn = NULL;
- int idx = 0;
-
- while ((scn = elf_nextscn(elf, scn)) != NULL) {
- const char *sec_name;
- Elf_Data *data;
- GElf_Shdr sh;
-
- idx++;
- if (gelf_getshdr(scn, &sh) != &sh) {
- pr_warn("failed to get section(%d) header from %s\n",
- idx, obj->path);
- return -EIO;
- }
-
- sec_name = elf_strptr(elf, ep->e_shstrndx, sh.sh_name);
- if (!sec_name) {
- pr_warn("failed to get section(%d) name from %s\n",
- idx, obj->path);
- return -EIO;
- }
-
- if (strcmp(name, sec_name))
- continue;
-
- data = elf_getdata(scn, 0);
- if (!data) {
- pr_warn("failed to get section(%d) data from %s(%s)\n",
- idx, name, obj->path);
- return -EIO;
- }
-
- *d_size = data->d_size;
- return 0;
- }
-
- return -ENOENT;
-}
-
int bpf_object__section_size(const struct bpf_object *obj, const char *name,
__u32 *size)
{
int ret = -ENOENT;
- size_t d_size;
*size = 0;
if (!name) {
@@ -1237,9 +1208,13 @@ int bpf_object__section_size(const struct bpf_object *obj, const char *name,
if (obj->efile.st_ops_data)
*size = obj->efile.st_ops_data->d_size;
} else {
- ret = bpf_object_search_section_size(obj, name, &d_size);
- if (!ret)
- *size = d_size;
+ Elf_Scn *scn = elf_sec_by_name(obj, name);
+ Elf_Data *data = elf_sec_data(obj, scn);
+
+ if (data) {
+ ret = 0; /* found it */
+ *size = data->d_size;
+ }
}
return *size ? 0 : ret;
@@ -1264,8 +1239,7 @@ int bpf_object__variable_offset(const struct bpf_object *obj, const char *name,
GELF_ST_TYPE(sym.st_info) != STT_OBJECT)
continue;
- sname = elf_strptr(obj->efile.elf, obj->efile.strtabidx,
- sym.st_name);
+ sname = elf_sym_str(obj, sym.st_name);
if (!sname) {
pr_warn("failed to get sym name string for var %s\n",
name);
@@ -1290,7 +1264,7 @@ static struct bpf_map *bpf_object__add_map(struct bpf_object *obj)
return &obj->maps[obj->nr_maps++];
new_cap = max((size_t)4, obj->maps_cap * 3 / 2);
- new_maps = realloc(obj->maps, new_cap * sizeof(*obj->maps));
+ new_maps = libbpf_reallocarray(obj->maps, new_cap, sizeof(*obj->maps));
if (!new_maps) {
pr_warn("alloc maps for object failed\n");
return ERR_PTR(-ENOMEM);
@@ -1742,12 +1716,12 @@ static int bpf_object__init_user_maps(struct bpf_object *obj, bool strict)
if (!symbols)
return -EINVAL;
- scn = elf_getscn(obj->efile.elf, obj->efile.maps_shndx);
- if (scn)
- data = elf_getdata(scn, NULL);
+
+ scn = elf_sec_by_idx(obj, obj->efile.maps_shndx);
+ data = elf_sec_data(obj, scn);
if (!scn || !data) {
- pr_warn("failed to get Elf_Data from map section %d\n",
- obj->efile.maps_shndx);
+ pr_warn("elf: failed to get legacy map definitions for %s\n",
+ obj->path);
return -EINVAL;
}
@@ -1769,12 +1743,12 @@ static int bpf_object__init_user_maps(struct bpf_object *obj, bool strict)
nr_maps++;
}
/* Assume equally sized map definitions */
- pr_debug("maps in %s: %d maps in %zd bytes\n",
- obj->path, nr_maps, data->d_size);
+ pr_debug("elf: found %d legacy map definitions (%zd bytes) in %s\n",
+ nr_maps, data->d_size, obj->path);
if (!data->d_size || nr_maps == 0 || (data->d_size % nr_maps) != 0) {
- pr_warn("unable to determine map definition size section %s, %d maps in %zd bytes\n",
- obj->path, nr_maps, data->d_size);
+ pr_warn("elf: unable to determine legacy map definition size in %s\n",
+ obj->path);
return -EINVAL;
}
map_def_sz = data->d_size / nr_maps;
@@ -1795,8 +1769,7 @@ static int bpf_object__init_user_maps(struct bpf_object *obj, bool strict)
if (IS_ERR(map))
return PTR_ERR(map);
- map_name = elf_strptr(obj->efile.elf, obj->efile.strtabidx,
- sym.st_name);
+ map_name = elf_sym_str(obj, sym.st_name);
if (!map_name) {
pr_warn("failed to get map #%d name sym string for obj %s\n",
i, obj->path);
@@ -1884,6 +1857,29 @@ resolve_func_ptr(const struct btf *btf, __u32 id, __u32 *res_id)
return btf_is_func_proto(t) ? t : NULL;
}
+static const char *btf_kind_str(const struct btf_type *t)
+{
+ switch (btf_kind(t)) {
+ case BTF_KIND_UNKN: return "void";
+ case BTF_KIND_INT: return "int";
+ case BTF_KIND_PTR: return "ptr";
+ case BTF_KIND_ARRAY: return "array";
+ case BTF_KIND_STRUCT: return "struct";
+ case BTF_KIND_UNION: return "union";
+ case BTF_KIND_ENUM: return "enum";
+ case BTF_KIND_FWD: return "fwd";
+ case BTF_KIND_TYPEDEF: return "typedef";
+ case BTF_KIND_VOLATILE: return "volatile";
+ case BTF_KIND_CONST: return "const";
+ case BTF_KIND_RESTRICT: return "restrict";
+ case BTF_KIND_FUNC: return "func";
+ case BTF_KIND_FUNC_PROTO: return "func_proto";
+ case BTF_KIND_VAR: return "var";
+ case BTF_KIND_DATASEC: return "datasec";
+ default: return "unknown";
+ }
+}
+
/*
* Fetch integer attribute of BTF map definition. Such attributes are
* represented using a pointer to an array, in which dimensionality of array
@@ -1900,8 +1896,8 @@ static bool get_map_field_int(const char *map_name, const struct btf *btf,
const struct btf_type *arr_t;
if (!btf_is_ptr(t)) {
- pr_warn("map '%s': attr '%s': expected PTR, got %u.\n",
- map_name, name, btf_kind(t));
+ pr_warn("map '%s': attr '%s': expected PTR, got %s.\n",
+ map_name, name, btf_kind_str(t));
return false;
}
@@ -1912,8 +1908,8 @@ static bool get_map_field_int(const char *map_name, const struct btf *btf,
return false;
}
if (!btf_is_array(arr_t)) {
- pr_warn("map '%s': attr '%s': expected ARRAY, got %u.\n",
- map_name, name, btf_kind(arr_t));
+ pr_warn("map '%s': attr '%s': expected ARRAY, got %s.\n",
+ map_name, name, btf_kind_str(arr_t));
return false;
}
arr_info = btf_array(arr_t);
@@ -1924,7 +1920,7 @@ static bool get_map_field_int(const char *map_name, const struct btf *btf,
static int build_map_pin_path(struct bpf_map *map, const char *path)
{
char buf[PATH_MAX];
- int err, len;
+ int len;
if (!path)
path = "/sys/fs/bpf";
@@ -1935,11 +1931,7 @@ static int build_map_pin_path(struct bpf_map *map, const char *path)
else if (len >= PATH_MAX)
return -ENAMETOOLONG;
- err = bpf_map__set_pin_path(map, buf);
- if (err)
- return err;
-
- return 0;
+ return bpf_map__set_pin_path(map, buf);
}
@@ -2007,8 +1999,8 @@ static int parse_btf_map_def(struct bpf_object *obj,
return -EINVAL;
}
if (!btf_is_ptr(t)) {
- pr_warn("map '%s': key spec is not PTR: %u.\n",
- map->name, btf_kind(t));
+ pr_warn("map '%s': key spec is not PTR: %s.\n",
+ map->name, btf_kind_str(t));
return -EINVAL;
}
sz = btf__resolve_size(obj->btf, t->type);
@@ -2049,8 +2041,8 @@ static int parse_btf_map_def(struct bpf_object *obj,
return -EINVAL;
}
if (!btf_is_ptr(t)) {
- pr_warn("map '%s': value spec is not PTR: %u.\n",
- map->name, btf_kind(t));
+ pr_warn("map '%s': value spec is not PTR: %s.\n",
+ map->name, btf_kind_str(t));
return -EINVAL;
}
sz = btf__resolve_size(obj->btf, t->type);
@@ -2107,14 +2099,14 @@ static int parse_btf_map_def(struct bpf_object *obj,
t = skip_mods_and_typedefs(obj->btf, btf_array(t)->type,
NULL);
if (!btf_is_ptr(t)) {
- pr_warn("map '%s': map-in-map inner def is of unexpected kind %u.\n",
- map->name, btf_kind(t));
+ pr_warn("map '%s': map-in-map inner def is of unexpected kind %s.\n",
+ map->name, btf_kind_str(t));
return -EINVAL;
}
t = skip_mods_and_typedefs(obj->btf, t->type, NULL);
if (!btf_is_struct(t)) {
- pr_warn("map '%s': map-in-map inner def is of unexpected kind %u.\n",
- map->name, btf_kind(t));
+ pr_warn("map '%s': map-in-map inner def is of unexpected kind %s.\n",
+ map->name, btf_kind_str(t));
return -EINVAL;
}
@@ -2205,8 +2197,8 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj,
return -EINVAL;
}
if (!btf_is_var(var)) {
- pr_warn("map '%s': unexpected var kind %u.\n",
- map_name, btf_kind(var));
+ pr_warn("map '%s': unexpected var kind %s.\n",
+ map_name, btf_kind_str(var));
return -EINVAL;
}
if (var_extra->linkage != BTF_VAR_GLOBAL_ALLOCATED &&
@@ -2218,8 +2210,8 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj,
def = skip_mods_and_typedefs(obj->btf, var->type, NULL);
if (!btf_is_struct(def)) {
- pr_warn("map '%s': unexpected def kind %u.\n",
- map_name, btf_kind(var));
+ pr_warn("map '%s': unexpected def kind %s.\n",
+ map_name, btf_kind_str(var));
return -EINVAL;
}
if (def->size > vi->size) {
@@ -2259,12 +2251,11 @@ static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict,
if (obj->efile.btf_maps_shndx < 0)
return 0;
- scn = elf_getscn(obj->efile.elf, obj->efile.btf_maps_shndx);
- if (scn)
- data = elf_getdata(scn, NULL);
+ scn = elf_sec_by_idx(obj, obj->efile.btf_maps_shndx);
+ data = elf_sec_data(obj, scn);
if (!scn || !data) {
- pr_warn("failed to get Elf_Data from map section %d (%s)\n",
- obj->efile.btf_maps_shndx, MAPS_ELF_SEC);
+ pr_warn("elf: failed to get %s map definitions for %s\n",
+ MAPS_ELF_SEC, obj->path);
return -EINVAL;
}
@@ -2322,36 +2313,28 @@ static int bpf_object__init_maps(struct bpf_object *obj,
static bool section_have_execinstr(struct bpf_object *obj, int idx)
{
- Elf_Scn *scn;
GElf_Shdr sh;
- scn = elf_getscn(obj->efile.elf, idx);
- if (!scn)
+ if (elf_sec_hdr(obj, elf_sec_by_idx(obj, idx), &sh))
return false;
- if (gelf_getshdr(scn, &sh) != &sh)
- return false;
-
- if (sh.sh_flags & SHF_EXECINSTR)
- return true;
-
- return false;
+ return sh.sh_flags & SHF_EXECINSTR;
}
static bool btf_needs_sanitization(struct bpf_object *obj)
{
- bool has_func_global = obj->caps.btf_func_global;
- bool has_datasec = obj->caps.btf_datasec;
- bool has_func = obj->caps.btf_func;
+ bool has_func_global = kernel_supports(FEAT_BTF_GLOBAL_FUNC);
+ bool has_datasec = kernel_supports(FEAT_BTF_DATASEC);
+ bool has_func = kernel_supports(FEAT_BTF_FUNC);
return !has_func || !has_datasec || !has_func_global;
}
static void bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf)
{
- bool has_func_global = obj->caps.btf_func_global;
- bool has_datasec = obj->caps.btf_datasec;
- bool has_func = obj->caps.btf_func;
+ bool has_func_global = kernel_supports(FEAT_BTF_GLOBAL_FUNC);
+ bool has_datasec = kernel_supports(FEAT_BTF_DATASEC);
+ bool has_func = kernel_supports(FEAT_BTF_FUNC);
struct btf_type *t;
int i, j, vlen;
@@ -2499,7 +2482,7 @@ static int bpf_object__load_vmlinux_btf(struct bpf_object *obj)
int err;
/* CO-RE relocations need kernel BTF */
- if (obj->btf_ext && obj->btf_ext->field_reloc_info.len)
+ if (obj->btf_ext && obj->btf_ext->core_relo_info.len)
need_vmlinux_btf = true;
bpf_object__for_each_program(prog, obj) {
@@ -2533,6 +2516,15 @@ static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj)
if (!obj->btf)
return 0;
+ if (!kernel_supports(FEAT_BTF)) {
+ if (kernel_needs_btf(obj)) {
+ err = -EOPNOTSUPP;
+ goto report;
+ }
+ pr_debug("Kernel doesn't support BTF, skipping uploading it.\n");
+ return 0;
+ }
+
sanitize = btf_needs_sanitization(obj);
if (sanitize) {
const void *raw_data;
@@ -2558,6 +2550,7 @@ static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj)
}
btf__free(kern_btf);
}
+report:
if (err) {
btf_mandatory = kernel_needs_btf(obj);
pr_warn("Error loading .BTF into kernel: %d. %s\n", err,
@@ -2569,61 +2562,199 @@ static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj)
return err;
}
+static const char *elf_sym_str(const struct bpf_object *obj, size_t off)
+{
+ const char *name;
+
+ name = elf_strptr(obj->efile.elf, obj->efile.strtabidx, off);
+ if (!name) {
+ pr_warn("elf: failed to get section name string at offset %zu from %s: %s\n",
+ off, obj->path, elf_errmsg(-1));
+ return NULL;
+ }
+
+ return name;
+}
+
+static const char *elf_sec_str(const struct bpf_object *obj, size_t off)
+{
+ const char *name;
+
+ name = elf_strptr(obj->efile.elf, obj->efile.shstrndx, off);
+ if (!name) {
+ pr_warn("elf: failed to get section name string at offset %zu from %s: %s\n",
+ off, obj->path, elf_errmsg(-1));
+ return NULL;
+ }
+
+ return name;
+}
+
+static Elf_Scn *elf_sec_by_idx(const struct bpf_object *obj, size_t idx)
+{
+ Elf_Scn *scn;
+
+ scn = elf_getscn(obj->efile.elf, idx);
+ if (!scn) {
+ pr_warn("elf: failed to get section(%zu) from %s: %s\n",
+ idx, obj->path, elf_errmsg(-1));
+ return NULL;
+ }
+ return scn;
+}
+
+static Elf_Scn *elf_sec_by_name(const struct bpf_object *obj, const char *name)
+{
+ Elf_Scn *scn = NULL;
+ Elf *elf = obj->efile.elf;
+ const char *sec_name;
+
+ while ((scn = elf_nextscn(elf, scn)) != NULL) {
+ sec_name = elf_sec_name(obj, scn);
+ if (!sec_name)
+ return NULL;
+
+ if (strcmp(sec_name, name) != 0)
+ continue;
+
+ return scn;
+ }
+ return NULL;
+}
+
+static int elf_sec_hdr(const struct bpf_object *obj, Elf_Scn *scn, GElf_Shdr *hdr)
+{
+ if (!scn)
+ return -EINVAL;
+
+ if (gelf_getshdr(scn, hdr) != hdr) {
+ pr_warn("elf: failed to get section(%zu) header from %s: %s\n",
+ elf_ndxscn(scn), obj->path, elf_errmsg(-1));
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static const char *elf_sec_name(const struct bpf_object *obj, Elf_Scn *scn)
+{
+ const char *name;
+ GElf_Shdr sh;
+
+ if (!scn)
+ return NULL;
+
+ if (elf_sec_hdr(obj, scn, &sh))
+ return NULL;
+
+ name = elf_sec_str(obj, sh.sh_name);
+ if (!name) {
+ pr_warn("elf: failed to get section(%zu) name from %s: %s\n",
+ elf_ndxscn(scn), obj->path, elf_errmsg(-1));
+ return NULL;
+ }
+
+ return name;
+}
+
+static Elf_Data *elf_sec_data(const struct bpf_object *obj, Elf_Scn *scn)
+{
+ Elf_Data *data;
+
+ if (!scn)
+ return NULL;
+
+ data = elf_getdata(scn, 0);
+ if (!data) {
+ pr_warn("elf: failed to get section(%zu) %s data from %s: %s\n",
+ elf_ndxscn(scn), elf_sec_name(obj, scn) ?: "<?>",
+ obj->path, elf_errmsg(-1));
+ return NULL;
+ }
+
+ return data;
+}
+
+static bool is_sec_name_dwarf(const char *name)
+{
+ /* approximation, but the actual list is too long */
+ return strncmp(name, ".debug_", sizeof(".debug_") - 1) == 0;
+}
+
+static bool ignore_elf_section(GElf_Shdr *hdr, const char *name)
+{
+ /* no special handling of .strtab */
+ if (hdr->sh_type == SHT_STRTAB)
+ return true;
+
+ /* ignore .llvm_addrsig section as well */
+ if (hdr->sh_type == 0x6FFF4C03 /* SHT_LLVM_ADDRSIG */)
+ return true;
+
+ /* no subprograms will lead to an empty .text section, ignore it */
+ if (hdr->sh_type == SHT_PROGBITS && hdr->sh_size == 0 &&
+ strcmp(name, ".text") == 0)
+ return true;
+
+ /* DWARF sections */
+ if (is_sec_name_dwarf(name))
+ return true;
+
+ if (strncmp(name, ".rel", sizeof(".rel") - 1) == 0) {
+ name += sizeof(".rel") - 1;
+ /* DWARF section relocations */
+ if (is_sec_name_dwarf(name))
+ return true;
+
+ /* .BTF and .BTF.ext don't need relocations */
+ if (strcmp(name, BTF_ELF_SEC) == 0 ||
+ strcmp(name, BTF_EXT_ELF_SEC) == 0)
+ return true;
+ }
+
+ return false;
+}
+
static int bpf_object__elf_collect(struct bpf_object *obj)
{
Elf *elf = obj->efile.elf;
- GElf_Ehdr *ep = &obj->efile.ehdr;
Elf_Data *btf_ext_data = NULL;
Elf_Data *btf_data = NULL;
Elf_Scn *scn = NULL;
int idx = 0, err = 0;
- /* Elf is corrupted/truncated, avoid calling elf_strptr. */
- if (!elf_rawdata(elf_getscn(elf, ep->e_shstrndx), NULL)) {
- pr_warn("failed to get e_shstrndx from %s\n", obj->path);
- return -LIBBPF_ERRNO__FORMAT;
- }
-
while ((scn = elf_nextscn(elf, scn)) != NULL) {
- char *name;
+ const char *name;
GElf_Shdr sh;
Elf_Data *data;
idx++;
- if (gelf_getshdr(scn, &sh) != &sh) {
- pr_warn("failed to get section(%d) header from %s\n",
- idx, obj->path);
- return -LIBBPF_ERRNO__FORMAT;
- }
- name = elf_strptr(elf, ep->e_shstrndx, sh.sh_name);
- if (!name) {
- pr_warn("failed to get section(%d) name from %s\n",
- idx, obj->path);
+ if (elf_sec_hdr(obj, scn, &sh))
return -LIBBPF_ERRNO__FORMAT;
- }
- data = elf_getdata(scn, 0);
- if (!data) {
- pr_warn("failed to get section(%d) data from %s(%s)\n",
- idx, name, obj->path);
+ name = elf_sec_str(obj, sh.sh_name);
+ if (!name)
return -LIBBPF_ERRNO__FORMAT;
- }
- pr_debug("section(%d) %s, size %ld, link %d, flags %lx, type=%d\n",
+
+ if (ignore_elf_section(&sh, name))
+ continue;
+
+ data = elf_sec_data(obj, scn);
+ if (!data)
+ return -LIBBPF_ERRNO__FORMAT;
+
+ pr_debug("elf: section(%d) %s, size %ld, link %d, flags %lx, type=%d\n",
idx, name, (unsigned long)data->d_size,
(int)sh.sh_link, (unsigned long)sh.sh_flags,
(int)sh.sh_type);
if (strcmp(name, "license") == 0) {
- err = bpf_object__init_license(obj,
- data->d_buf,
- data->d_size);
+ err = bpf_object__init_license(obj, data->d_buf, data->d_size);
if (err)
return err;
} else if (strcmp(name, "version") == 0) {
- err = bpf_object__init_kversion(obj,
- data->d_buf,
- data->d_size);
+ err = bpf_object__init_kversion(obj, data->d_buf, data->d_size);
if (err)
return err;
} else if (strcmp(name, "maps") == 0) {
@@ -2636,8 +2767,7 @@ static int bpf_object__elf_collect(struct bpf_object *obj)
btf_ext_data = data;
} else if (sh.sh_type == SHT_SYMTAB) {
if (obj->efile.symbols) {
- pr_warn("bpf: multiple SYMTAB in %s\n",
- obj->path);
+ pr_warn("elf: multiple symbol tables in %s\n", obj->path);
return -LIBBPF_ERRNO__FORMAT;
}
obj->efile.symbols = data;
@@ -2650,16 +2780,8 @@ static int bpf_object__elf_collect(struct bpf_object *obj)
err = bpf_object__add_program(obj, data->d_buf,
data->d_size,
name, idx);
- if (err) {
- char errmsg[STRERR_BUFSIZE];
- char *cp;
-
- cp = libbpf_strerror_r(-err, errmsg,
- sizeof(errmsg));
- pr_warn("failed to alloc program %s (%s): %s",
- name, obj->path, cp);
+ if (err)
return err;
- }
} else if (strcmp(name, DATA_SEC) == 0) {
obj->efile.data = data;
obj->efile.data_shndx = idx;
@@ -2670,7 +2792,8 @@ static int bpf_object__elf_collect(struct bpf_object *obj)
obj->efile.st_ops_data = data;
obj->efile.st_ops_shndx = idx;
} else {
- pr_debug("skip section(%d) %s\n", idx, name);
+ pr_info("elf: skipping unrecognized data section(%d) %s\n",
+ idx, name);
}
} else if (sh.sh_type == SHT_REL) {
int nr_sects = obj->efile.nr_reloc_sects;
@@ -2681,34 +2804,33 @@ static int bpf_object__elf_collect(struct bpf_object *obj)
if (!section_have_execinstr(obj, sec) &&
strcmp(name, ".rel" STRUCT_OPS_SEC) &&
strcmp(name, ".rel" MAPS_ELF_SEC)) {
- pr_debug("skip relo %s(%d) for section(%d)\n",
- name, idx, sec);
+ pr_info("elf: skipping relo section(%d) %s for section(%d) %s\n",
+ idx, name, sec,
+ elf_sec_name(obj, elf_sec_by_idx(obj, sec)) ?: "<?>");
continue;
}
- sects = reallocarray(sects, nr_sects + 1,
- sizeof(*obj->efile.reloc_sects));
- if (!sects) {
- pr_warn("reloc_sects realloc failed\n");
+ sects = libbpf_reallocarray(sects, nr_sects + 1,
+ sizeof(*obj->efile.reloc_sects));
+ if (!sects)
return -ENOMEM;
- }
obj->efile.reloc_sects = sects;
obj->efile.nr_reloc_sects++;
obj->efile.reloc_sects[nr_sects].shdr = sh;
obj->efile.reloc_sects[nr_sects].data = data;
- } else if (sh.sh_type == SHT_NOBITS &&
- strcmp(name, BSS_SEC) == 0) {
+ } else if (sh.sh_type == SHT_NOBITS && strcmp(name, BSS_SEC) == 0) {
obj->efile.bss = data;
obj->efile.bss_shndx = idx;
} else {
- pr_debug("skip section(%d) %s\n", idx, name);
+ pr_info("elf: skipping section(%d) %s (size %zu)\n", idx, name,
+ (size_t)sh.sh_size);
}
}
if (!obj->efile.strtabidx || obj->efile.strtabidx > idx) {
- pr_warn("Corrupted ELF file: index of strtab invalid\n");
+ pr_warn("elf: symbol strings section missing or invalid in %s\n", obj->path);
return -LIBBPF_ERRNO__FORMAT;
}
return bpf_object__init_btf(obj, btf_data, btf_ext_data);
@@ -2869,14 +2991,13 @@ static int bpf_object__collect_externs(struct bpf_object *obj)
if (!obj->efile.symbols)
return 0;
- scn = elf_getscn(obj->efile.elf, obj->efile.symbols_shndx);
- if (!scn)
+ scn = elf_sec_by_idx(obj, obj->efile.symbols_shndx);
+ if (elf_sec_hdr(obj, scn, &sh))
return -LIBBPF_ERRNO__FORMAT;
- if (gelf_getshdr(scn, &sh) != &sh)
- return -LIBBPF_ERRNO__FORMAT;
- n = sh.sh_size / sh.sh_entsize;
+ n = sh.sh_size / sh.sh_entsize;
pr_debug("looking for externs among %d symbols...\n", n);
+
for (i = 0; i < n; i++) {
GElf_Sym sym;
@@ -2884,13 +3005,12 @@ static int bpf_object__collect_externs(struct bpf_object *obj)
return -LIBBPF_ERRNO__FORMAT;
if (!sym_is_extern(&sym))
continue;
- ext_name = elf_strptr(obj->efile.elf, obj->efile.strtabidx,
- sym.st_name);
+ ext_name = elf_sym_str(obj, sym.st_name);
if (!ext_name || !ext_name[0])
continue;
ext = obj->externs;
- ext = reallocarray(ext, obj->nr_extern + 1, sizeof(*ext));
+ ext = libbpf_reallocarray(ext, obj->nr_extern + 1, sizeof(*ext));
if (!ext)
return -ENOMEM;
obj->externs = ext;
@@ -3109,7 +3229,7 @@ bpf_object__section_to_libbpf_map_type(const struct bpf_object *obj, int shndx)
static int bpf_program__record_reloc(struct bpf_program *prog,
struct reloc_desc *reloc_desc,
- __u32 insn_idx, const char *name,
+ __u32 insn_idx, const char *sym_name,
const GElf_Sym *sym, const GElf_Rel *rel)
{
struct bpf_insn *insn = &prog->insns[insn_idx];
@@ -3117,22 +3237,25 @@ static int bpf_program__record_reloc(struct bpf_program *prog,
struct bpf_object *obj = prog->obj;
__u32 shdr_idx = sym->st_shndx;
enum libbpf_map_type type;
+ const char *sym_sec_name;
struct bpf_map *map;
/* sub-program call relocation */
if (insn->code == (BPF_JMP | BPF_CALL)) {
if (insn->src_reg != BPF_PSEUDO_CALL) {
- pr_warn("incorrect bpf_call opcode\n");
+ pr_warn("prog '%s': incorrect bpf_call opcode\n", prog->name);
return -LIBBPF_ERRNO__RELOC;
}
/* text_shndx can be 0, if no default "main" program exists */
if (!shdr_idx || shdr_idx != obj->efile.text_shndx) {
- pr_warn("bad call relo against section %u\n", shdr_idx);
+ sym_sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, shdr_idx));
+ pr_warn("prog '%s': bad call relo against '%s' in section '%s'\n",
+ prog->name, sym_name, sym_sec_name);
return -LIBBPF_ERRNO__RELOC;
}
- if (sym->st_value % 8) {
- pr_warn("bad call relo offset: %zu\n",
- (size_t)sym->st_value);
+ if (sym->st_value % BPF_INSN_SZ) {
+ pr_warn("prog '%s': bad call relo against '%s' at offset %zu\n",
+ prog->name, sym_name, (size_t)sym->st_value);
return -LIBBPF_ERRNO__RELOC;
}
reloc_desc->type = RELO_CALL;
@@ -3143,8 +3266,8 @@ static int bpf_program__record_reloc(struct bpf_program *prog,
}
if (insn->code != (BPF_LD | BPF_IMM | BPF_DW)) {
- pr_warn("invalid relo for insns[%d].code 0x%x\n",
- insn_idx, insn->code);
+ pr_warn("prog '%s': invalid relo against '%s' for insns[%d].code 0x%x\n",
+ prog->name, sym_name, insn_idx, insn->code);
return -LIBBPF_ERRNO__RELOC;
}
@@ -3159,12 +3282,12 @@ static int bpf_program__record_reloc(struct bpf_program *prog,
break;
}
if (i >= n) {
- pr_warn("extern relo failed to find extern for sym %d\n",
- sym_idx);
+ pr_warn("prog '%s': extern relo failed to find extern for '%s' (%d)\n",
+ prog->name, sym_name, sym_idx);
return -LIBBPF_ERRNO__RELOC;
}
- pr_debug("found extern #%d '%s' (sym %d) for insn %u\n",
- i, ext->name, ext->sym_idx, insn_idx);
+ pr_debug("prog '%s': found extern #%d '%s' (sym %d) for insn #%u\n",
+ prog->name, i, ext->name, ext->sym_idx, insn_idx);
reloc_desc->type = RELO_EXTERN;
reloc_desc->insn_idx = insn_idx;
reloc_desc->sym_off = i; /* sym_off stores extern index */
@@ -3172,18 +3295,19 @@ static int bpf_program__record_reloc(struct bpf_program *prog,
}
if (!shdr_idx || shdr_idx >= SHN_LORESERVE) {
- pr_warn("invalid relo for \'%s\' in special section 0x%x; forgot to initialize global var?..\n",
- name, shdr_idx);
+ pr_warn("prog '%s': invalid relo against '%s' in special section 0x%x; forgot to initialize global var?..\n",
+ prog->name, sym_name, shdr_idx);
return -LIBBPF_ERRNO__RELOC;
}
type = bpf_object__section_to_libbpf_map_type(obj, shdr_idx);
+ sym_sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, shdr_idx));
/* generic map reference relocation */
if (type == LIBBPF_MAP_UNSPEC) {
if (!bpf_object__shndx_is_maps(obj, shdr_idx)) {
- pr_warn("bad map relo against section %u\n",
- shdr_idx);
+ pr_warn("prog '%s': bad map relo against '%s' in section '%s'\n",
+ prog->name, sym_name, sym_sec_name);
return -LIBBPF_ERRNO__RELOC;
}
for (map_idx = 0; map_idx < nr_maps; map_idx++) {
@@ -3192,14 +3316,14 @@ static int bpf_program__record_reloc(struct bpf_program *prog,
map->sec_idx != sym->st_shndx ||
map->sec_offset != sym->st_value)
continue;
- pr_debug("found map %zd (%s, sec %d, off %zu) for insn %u\n",
- map_idx, map->name, map->sec_idx,
+ pr_debug("prog '%s': found map %zd (%s, sec %d, off %zu) for insn #%u\n",
+ prog->name, map_idx, map->name, map->sec_idx,
map->sec_offset, insn_idx);
break;
}
if (map_idx >= nr_maps) {
- pr_warn("map relo failed to find map for sec %u, off %zu\n",
- shdr_idx, (size_t)sym->st_value);
+ pr_warn("prog '%s': map relo failed to find map for section '%s', off %zu\n",
+ prog->name, sym_sec_name, (size_t)sym->st_value);
return -LIBBPF_ERRNO__RELOC;
}
reloc_desc->type = RELO_LD64;
@@ -3211,21 +3335,22 @@ static int bpf_program__record_reloc(struct bpf_program *prog,
/* global data map relocation */
if (!bpf_object__shndx_is_data(obj, shdr_idx)) {
- pr_warn("bad data relo against section %u\n", shdr_idx);
+ pr_warn("prog '%s': bad data relo against section '%s'\n",
+ prog->name, sym_sec_name);
return -LIBBPF_ERRNO__RELOC;
}
for (map_idx = 0; map_idx < nr_maps; map_idx++) {
map = &obj->maps[map_idx];
if (map->libbpf_type != type)
continue;
- pr_debug("found data map %zd (%s, sec %d, off %zu) for insn %u\n",
- map_idx, map->name, map->sec_idx, map->sec_offset,
- insn_idx);
+ pr_debug("prog '%s': found data map %zd (%s, sec %d, off %zu) for insn %u\n",
+ prog->name, map_idx, map->name, map->sec_idx,
+ map->sec_offset, insn_idx);
break;
}
if (map_idx >= nr_maps) {
- pr_warn("data relo failed to find map for sec %u\n",
- shdr_idx);
+ pr_warn("prog '%s': data relo failed to find map for section '%s'\n",
+ prog->name, sym_sec_name);
return -LIBBPF_ERRNO__RELOC;
}
@@ -3241,9 +3366,17 @@ bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr,
Elf_Data *data, struct bpf_object *obj)
{
Elf_Data *symbols = obj->efile.symbols;
+ const char *relo_sec_name, *sec_name;
+ size_t sec_idx = shdr->sh_info;
int err, i, nrels;
- pr_debug("collecting relocating info for: '%s'\n", prog->section_name);
+ relo_sec_name = elf_sec_str(obj, shdr->sh_name);
+ sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx));
+ if (!relo_sec_name || !sec_name)
+ return -EINVAL;
+
+ pr_debug("sec '%s': collecting relocation for section(%zu) '%s'\n",
+ relo_sec_name, sec_idx, sec_name);
nrels = shdr->sh_size / shdr->sh_entsize;
prog->reloc_desc = malloc(sizeof(*prog->reloc_desc) * nrels);
@@ -3254,35 +3387,34 @@ bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr,
prog->nr_reloc = nrels;
for (i = 0; i < nrels; i++) {
- const char *name;
+ const char *sym_name;
__u32 insn_idx;
GElf_Sym sym;
GElf_Rel rel;
if (!gelf_getrel(data, i, &rel)) {
- pr_warn("relocation: failed to get %d reloc\n", i);
+ pr_warn("sec '%s': failed to get relo #%d\n", relo_sec_name, i);
return -LIBBPF_ERRNO__FORMAT;
}
if (!gelf_getsym(symbols, GELF_R_SYM(rel.r_info), &sym)) {
- pr_warn("relocation: symbol %"PRIx64" not found\n",
- GELF_R_SYM(rel.r_info));
+ pr_warn("sec '%s': symbol 0x%zx not found for relo #%d\n",
+ relo_sec_name, (size_t)GELF_R_SYM(rel.r_info), i);
return -LIBBPF_ERRNO__FORMAT;
}
- if (rel.r_offset % sizeof(struct bpf_insn))
+ if (rel.r_offset % BPF_INSN_SZ) {
+ pr_warn("sec '%s': invalid offset 0x%zx for relo #%d\n",
+ relo_sec_name, (size_t)GELF_R_SYM(rel.r_info), i);
return -LIBBPF_ERRNO__FORMAT;
+ }
- insn_idx = rel.r_offset / sizeof(struct bpf_insn);
- name = elf_strptr(obj->efile.elf, obj->efile.strtabidx,
- sym.st_name) ? : "<?>";
+ insn_idx = rel.r_offset / BPF_INSN_SZ;
+ sym_name = elf_sym_str(obj, sym.st_name) ?: "<?>";
- pr_debug("relo for shdr %u, symb %zu, value %zu, type %d, bind %d, name %d (\'%s\'), insn %u\n",
- (__u32)sym.st_shndx, (size_t)GELF_R_SYM(rel.r_info),
- (size_t)sym.st_value, GELF_ST_TYPE(sym.st_info),
- GELF_ST_BIND(sym.st_info), sym.st_name, name,
- insn_idx);
+ pr_debug("sec '%s': relo #%d: insn #%u against '%s'\n",
+ relo_sec_name, i, insn_idx, sym_name);
err = bpf_program__record_reloc(prog, &prog->reloc_desc[i],
- insn_idx, name, &sym, &rel);
+ insn_idx, sym_name, &sym, &rel);
if (err)
return err;
}
@@ -3433,8 +3565,14 @@ bpf_object__probe_loading(struct bpf_object *obj)
return 0;
}
-static int
-bpf_object__probe_name(struct bpf_object *obj)
+static int probe_fd(int fd)
+{
+ if (fd >= 0)
+ close(fd);
+ return fd >= 0;
+}
+
+static int probe_kern_prog_name(void)
{
struct bpf_load_program_attr attr;
struct bpf_insn insns[] = {
@@ -3452,16 +3590,10 @@ bpf_object__probe_name(struct bpf_object *obj)
attr.license = "GPL";
attr.name = "test";
ret = bpf_load_program_xattr(&attr, NULL, 0);
- if (ret >= 0) {
- obj->caps.name = 1;
- close(ret);
- }
-
- return 0;
+ return probe_fd(ret);
}
-static int
-bpf_object__probe_global_data(struct bpf_object *obj)
+static int probe_kern_global_data(void)
{
struct bpf_load_program_attr prg_attr;
struct bpf_create_map_attr map_attr;
@@ -3498,16 +3630,23 @@ bpf_object__probe_global_data(struct bpf_object *obj)
prg_attr.license = "GPL";
ret = bpf_load_program_xattr(&prg_attr, NULL, 0);
- if (ret >= 0) {
- obj->caps.global_data = 1;
- close(ret);
- }
-
close(map);
- return 0;
+ return probe_fd(ret);
}
-static int bpf_object__probe_btf_func(struct bpf_object *obj)
+static int probe_kern_btf(void)
+{
+ static const char strs[] = "\0int";
+ __u32 types[] = {
+ /* int */
+ BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4),
+ };
+
+ return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
+ strs, sizeof(strs)));
+}
+
+static int probe_kern_btf_func(void)
{
static const char strs[] = "\0int\0x\0a";
/* void x(int a) {} */
@@ -3520,20 +3659,12 @@ static int bpf_object__probe_btf_func(struct bpf_object *obj)
/* FUNC x */ /* [3] */
BTF_TYPE_ENC(5, BTF_INFO_ENC(BTF_KIND_FUNC, 0, 0), 2),
};
- int btf_fd;
- btf_fd = libbpf__load_raw_btf((char *)types, sizeof(types),
- strs, sizeof(strs));
- if (btf_fd >= 0) {
- obj->caps.btf_func = 1;
- close(btf_fd);
- return 1;
- }
-
- return 0;
+ return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
+ strs, sizeof(strs)));
}
-static int bpf_object__probe_btf_func_global(struct bpf_object *obj)
+static int probe_kern_btf_func_global(void)
{
static const char strs[] = "\0int\0x\0a";
/* static void x(int a) {} */
@@ -3546,20 +3677,12 @@ static int bpf_object__probe_btf_func_global(struct bpf_object *obj)
/* FUNC x BTF_FUNC_GLOBAL */ /* [3] */
BTF_TYPE_ENC(5, BTF_INFO_ENC(BTF_KIND_FUNC, 0, BTF_FUNC_GLOBAL), 2),
};
- int btf_fd;
- btf_fd = libbpf__load_raw_btf((char *)types, sizeof(types),
- strs, sizeof(strs));
- if (btf_fd >= 0) {
- obj->caps.btf_func_global = 1;
- close(btf_fd);
- return 1;
- }
-
- return 0;
+ return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
+ strs, sizeof(strs)));
}
-static int bpf_object__probe_btf_datasec(struct bpf_object *obj)
+static int probe_kern_btf_datasec(void)
{
static const char strs[] = "\0x\0.data";
/* static int a; */
@@ -3573,20 +3696,12 @@ static int bpf_object__probe_btf_datasec(struct bpf_object *obj)
BTF_TYPE_ENC(3, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4),
BTF_VAR_SECINFO_ENC(2, 0, 4),
};
- int btf_fd;
- btf_fd = libbpf__load_raw_btf((char *)types, sizeof(types),
- strs, sizeof(strs));
- if (btf_fd >= 0) {
- obj->caps.btf_datasec = 1;
- close(btf_fd);
- return 1;
- }
-
- return 0;
+ return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
+ strs, sizeof(strs)));
}
-static int bpf_object__probe_array_mmap(struct bpf_object *obj)
+static int probe_kern_array_mmap(void)
{
struct bpf_create_map_attr attr = {
.map_type = BPF_MAP_TYPE_ARRAY,
@@ -3595,27 +3710,17 @@ static int bpf_object__probe_array_mmap(struct bpf_object *obj)
.value_size = sizeof(int),
.max_entries = 1,
};
- int fd;
- fd = bpf_create_map_xattr(&attr);
- if (fd >= 0) {
- obj->caps.array_mmap = 1;
- close(fd);
- return 1;
- }
-
- return 0;
+ return probe_fd(bpf_create_map_xattr(&attr));
}
-static int
-bpf_object__probe_exp_attach_type(struct bpf_object *obj)
+static int probe_kern_exp_attach_type(void)
{
struct bpf_load_program_attr attr;
struct bpf_insn insns[] = {
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
};
- int fd;
memset(&attr, 0, sizeof(attr));
/* use any valid combination of program type and (optional)
@@ -3629,36 +3734,91 @@ bpf_object__probe_exp_attach_type(struct bpf_object *obj)
attr.insns_cnt = ARRAY_SIZE(insns);
attr.license = "GPL";
- fd = bpf_load_program_xattr(&attr, NULL, 0);
- if (fd >= 0) {
- obj->caps.exp_attach_type = 1;
- close(fd);
- return 1;
- }
- return 0;
+ return probe_fd(bpf_load_program_xattr(&attr, NULL, 0));
}
-static int
-bpf_object__probe_caps(struct bpf_object *obj)
+static int probe_kern_probe_read_kernel(void)
{
- int (*probe_fn[])(struct bpf_object *obj) = {
- bpf_object__probe_name,
- bpf_object__probe_global_data,
- bpf_object__probe_btf_func,
- bpf_object__probe_btf_func_global,
- bpf_object__probe_btf_datasec,
- bpf_object__probe_array_mmap,
- bpf_object__probe_exp_attach_type,
+ struct bpf_load_program_attr attr;
+ struct bpf_insn insns[] = {
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), /* r1 = r10 (fp) */
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), /* r1 += -8 */
+ BPF_MOV64_IMM(BPF_REG_2, 8), /* r2 = 8 */
+ BPF_MOV64_IMM(BPF_REG_3, 0), /* r3 = 0 */
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_probe_read_kernel),
+ BPF_EXIT_INSN(),
};
- int i, ret;
- for (i = 0; i < ARRAY_SIZE(probe_fn); i++) {
- ret = probe_fn[i](obj);
- if (ret < 0)
- pr_debug("Probe #%d failed with %d.\n", i, ret);
+ memset(&attr, 0, sizeof(attr));
+ attr.prog_type = BPF_PROG_TYPE_KPROBE;
+ attr.insns = insns;
+ attr.insns_cnt = ARRAY_SIZE(insns);
+ attr.license = "GPL";
+
+ return probe_fd(bpf_load_program_xattr(&attr, NULL, 0));
+}
+
+enum kern_feature_result {
+ FEAT_UNKNOWN = 0,
+ FEAT_SUPPORTED = 1,
+ FEAT_MISSING = 2,
+};
+
+typedef int (*feature_probe_fn)(void);
+
+static struct kern_feature_desc {
+ const char *desc;
+ feature_probe_fn probe;
+ enum kern_feature_result res;
+} feature_probes[__FEAT_CNT] = {
+ [FEAT_PROG_NAME] = {
+ "BPF program name", probe_kern_prog_name,
+ },
+ [FEAT_GLOBAL_DATA] = {
+ "global variables", probe_kern_global_data,
+ },
+ [FEAT_BTF] = {
+ "minimal BTF", probe_kern_btf,
+ },
+ [FEAT_BTF_FUNC] = {
+ "BTF functions", probe_kern_btf_func,
+ },
+ [FEAT_BTF_GLOBAL_FUNC] = {
+ "BTF global function", probe_kern_btf_func_global,
+ },
+ [FEAT_BTF_DATASEC] = {
+ "BTF data section and variable", probe_kern_btf_datasec,
+ },
+ [FEAT_ARRAY_MMAP] = {
+ "ARRAY map mmap()", probe_kern_array_mmap,
+ },
+ [FEAT_EXP_ATTACH_TYPE] = {
+ "BPF_PROG_LOAD expected_attach_type attribute",
+ probe_kern_exp_attach_type,
+ },
+ [FEAT_PROBE_READ_KERN] = {
+ "bpf_probe_read_kernel() helper", probe_kern_probe_read_kernel,
+ }
+};
+
+static bool kernel_supports(enum kern_feature_id feat_id)
+{
+ struct kern_feature_desc *feat = &feature_probes[feat_id];
+ int ret;
+
+ if (READ_ONCE(feat->res) == FEAT_UNKNOWN) {
+ ret = feat->probe();
+ if (ret > 0) {
+ WRITE_ONCE(feat->res, FEAT_SUPPORTED);
+ } else if (ret == 0) {
+ WRITE_ONCE(feat->res, FEAT_MISSING);
+ } else {
+ pr_warn("Detection of kernel %s support failed: %d\n", feat->desc, ret);
+ WRITE_ONCE(feat->res, FEAT_MISSING);
+ }
}
- return 0;
+ return READ_ONCE(feat->res) == FEAT_SUPPORTED;
}
static bool map_is_reuse_compat(const struct bpf_map *map, int map_fd)
@@ -3760,7 +3920,7 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map)
memset(&create_attr, 0, sizeof(create_attr));
- if (obj->caps.name)
+ if (kernel_supports(FEAT_PROG_NAME))
create_attr.name = map->name;
create_attr.map_ifindex = map->map_ifindex;
create_attr.map_type = def->type;
@@ -4011,6 +4171,10 @@ struct bpf_core_spec {
const struct btf *btf;
/* high-level spec: named fields and array indices only */
struct bpf_core_accessor spec[BPF_CORE_SPEC_MAX_LEN];
+ /* original unresolved (no skip_mods_or_typedefs) root type ID */
+ __u32 root_type_id;
+ /* CO-RE relocation kind */
+ enum bpf_core_relo_kind relo_kind;
/* high-level spec length */
int len;
/* raw, low-level spec: 1-to-1 with accessor spec string */
@@ -4041,8 +4205,66 @@ static bool is_flex_arr(const struct btf *btf,
return acc->idx == btf_vlen(t) - 1;
}
+static const char *core_relo_kind_str(enum bpf_core_relo_kind kind)
+{
+ switch (kind) {
+ case BPF_FIELD_BYTE_OFFSET: return "byte_off";
+ case BPF_FIELD_BYTE_SIZE: return "byte_sz";
+ case BPF_FIELD_EXISTS: return "field_exists";
+ case BPF_FIELD_SIGNED: return "signed";
+ case BPF_FIELD_LSHIFT_U64: return "lshift_u64";
+ case BPF_FIELD_RSHIFT_U64: return "rshift_u64";
+ case BPF_TYPE_ID_LOCAL: return "local_type_id";
+ case BPF_TYPE_ID_TARGET: return "target_type_id";
+ case BPF_TYPE_EXISTS: return "type_exists";
+ case BPF_TYPE_SIZE: return "type_size";
+ case BPF_ENUMVAL_EXISTS: return "enumval_exists";
+ case BPF_ENUMVAL_VALUE: return "enumval_value";
+ default: return "unknown";
+ }
+}
+
+static bool core_relo_is_field_based(enum bpf_core_relo_kind kind)
+{
+ switch (kind) {
+ case BPF_FIELD_BYTE_OFFSET:
+ case BPF_FIELD_BYTE_SIZE:
+ case BPF_FIELD_EXISTS:
+ case BPF_FIELD_SIGNED:
+ case BPF_FIELD_LSHIFT_U64:
+ case BPF_FIELD_RSHIFT_U64:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static bool core_relo_is_type_based(enum bpf_core_relo_kind kind)
+{
+ switch (kind) {
+ case BPF_TYPE_ID_LOCAL:
+ case BPF_TYPE_ID_TARGET:
+ case BPF_TYPE_EXISTS:
+ case BPF_TYPE_SIZE:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static bool core_relo_is_enumval_based(enum bpf_core_relo_kind kind)
+{
+ switch (kind) {
+ case BPF_ENUMVAL_EXISTS:
+ case BPF_ENUMVAL_VALUE:
+ return true;
+ default:
+ return false;
+ }
+}
+
/*
- * Turn bpf_field_reloc into a low- and high-level spec representation,
+ * Turn bpf_core_relo into a low- and high-level spec representation,
* validating correctness along the way, as well as calculating resulting
* field bit offset, specified by accessor string. Low-level spec captures
* every single level of nestedness, including traversing anonymous
@@ -4071,10 +4293,17 @@ static bool is_flex_arr(const struct btf *btf,
* - field 'a' access (corresponds to '2' in low-level spec);
* - array element #3 access (corresponds to '3' in low-level spec).
*
+ * Type-based relocations (TYPE_EXISTS/TYPE_SIZE,
+ * TYPE_ID_LOCAL/TYPE_ID_TARGET) don't capture any field information. Their
+ * spec and raw_spec are kept empty.
+ *
+ * Enum value-based relocations (ENUMVAL_EXISTS/ENUMVAL_VALUE) use access
+ * string to specify enumerator's value index that need to be relocated.
*/
-static int bpf_core_spec_parse(const struct btf *btf,
+static int bpf_core_parse_spec(const struct btf *btf,
__u32 type_id,
const char *spec_str,
+ enum bpf_core_relo_kind relo_kind,
struct bpf_core_spec *spec)
{
int access_idx, parsed_len, i;
@@ -4089,6 +4318,15 @@ static int bpf_core_spec_parse(const struct btf *btf,
memset(spec, 0, sizeof(*spec));
spec->btf = btf;
+ spec->root_type_id = type_id;
+ spec->relo_kind = relo_kind;
+
+ /* type-based relocations don't have a field access string */
+ if (core_relo_is_type_based(relo_kind)) {
+ if (strcmp(spec_str, "0"))
+ return -EINVAL;
+ return 0;
+ }
/* parse spec_str="0:1:2:3:4" into array raw_spec=[0, 1, 2, 3, 4] */
while (*spec_str) {
@@ -4105,16 +4343,28 @@ static int bpf_core_spec_parse(const struct btf *btf,
if (spec->raw_len == 0)
return -EINVAL;
- /* first spec value is always reloc type array index */
t = skip_mods_and_typedefs(btf, type_id, &id);
if (!t)
return -EINVAL;
access_idx = spec->raw_spec[0];
- spec->spec[0].type_id = id;
- spec->spec[0].idx = access_idx;
+ acc = &spec->spec[0];
+ acc->type_id = id;
+ acc->idx = access_idx;
spec->len++;
+ if (core_relo_is_enumval_based(relo_kind)) {
+ if (!btf_is_enum(t) || spec->raw_len > 1 || access_idx >= btf_vlen(t))
+ return -EINVAL;
+
+ /* record enumerator name in a first accessor */
+ acc->name = btf__name_by_offset(btf, btf_enum(t)[access_idx].name_off);
+ return 0;
+ }
+
+ if (!core_relo_is_field_based(relo_kind))
+ return -EINVAL;
+
sz = btf__resolve_size(btf, id);
if (sz < 0)
return sz;
@@ -4172,8 +4422,8 @@ static int bpf_core_spec_parse(const struct btf *btf,
return sz;
spec->bit_offset += access_idx * sz * 8;
} else {
- pr_warn("relo for [%u] %s (at idx %d) captures type [%d] of unexpected kind %d\n",
- type_id, spec_str, i, id, btf_kind(t));
+ pr_warn("relo for [%u] %s (at idx %d) captures type [%d] of unexpected kind %s\n",
+ type_id, spec_str, i, id, btf_kind_str(t));
return -EINVAL;
}
}
@@ -4223,16 +4473,16 @@ static struct ids_vec *bpf_core_find_cands(const struct btf *local_btf,
{
size_t local_essent_len, targ_essent_len;
const char *local_name, *targ_name;
- const struct btf_type *t;
+ const struct btf_type *t, *local_t;
struct ids_vec *cand_ids;
__u32 *new_ids;
int i, err, n;
- t = btf__type_by_id(local_btf, local_type_id);
- if (!t)
+ local_t = btf__type_by_id(local_btf, local_type_id);
+ if (!local_t)
return ERR_PTR(-EINVAL);
- local_name = btf__name_by_offset(local_btf, t->name_off);
+ local_name = btf__name_by_offset(local_btf, local_t->name_off);
if (str_is_empty(local_name))
return ERR_PTR(-EINVAL);
local_essent_len = bpf_core_essential_name_len(local_name);
@@ -4244,12 +4494,11 @@ static struct ids_vec *bpf_core_find_cands(const struct btf *local_btf,
n = btf__get_nr_types(targ_btf);
for (i = 1; i <= n; i++) {
t = btf__type_by_id(targ_btf, i);
- targ_name = btf__name_by_offset(targ_btf, t->name_off);
- if (str_is_empty(targ_name))
+ if (btf_kind(t) != btf_kind(local_t))
continue;
- t = skip_mods_and_typedefs(targ_btf, i, NULL);
- if (!btf_is_composite(t) && !btf_is_array(t))
+ targ_name = btf__name_by_offset(targ_btf, t->name_off);
+ if (str_is_empty(targ_name))
continue;
targ_essent_len = bpf_core_essential_name_len(targ_name);
@@ -4257,11 +4506,12 @@ static struct ids_vec *bpf_core_find_cands(const struct btf *local_btf,
continue;
if (strncmp(local_name, targ_name, local_essent_len) == 0) {
- pr_debug("[%d] %s: found candidate [%d] %s\n",
- local_type_id, local_name, i, targ_name);
- new_ids = reallocarray(cand_ids->data,
- cand_ids->len + 1,
- sizeof(*cand_ids->data));
+ pr_debug("CO-RE relocating [%d] %s %s: found target candidate [%d] %s %s\n",
+ local_type_id, btf_kind_str(local_t),
+ local_name, i, btf_kind_str(t), targ_name);
+ new_ids = libbpf_reallocarray(cand_ids->data,
+ cand_ids->len + 1,
+ sizeof(*cand_ids->data));
if (!new_ids) {
err = -ENOMEM;
goto err_out;
@@ -4276,8 +4526,9 @@ static struct ids_vec *bpf_core_find_cands(const struct btf *local_btf,
return ERR_PTR(err);
}
-/* Check two types for compatibility, skipping const/volatile/restrict and
- * typedefs, to ensure we are relocating compatible entities:
+/* Check two types for compatibility for the purpose of field access
+ * relocation. const/volatile/restrict and typedefs are skipped to ensure we
+ * are relocating semantically compatible entities:
* - any two STRUCTs/UNIONs are compatible and can be mixed;
* - any two FWDs are compatible, if their names match (modulo flavor suffix);
* - any two PTRs are always compatible;
@@ -4432,6 +4683,100 @@ static int bpf_core_match_member(const struct btf *local_btf,
return 0;
}
+/* Check local and target types for compatibility. This check is used for
+ * type-based CO-RE relocations and follow slightly different rules than
+ * field-based relocations. This function assumes that root types were already
+ * checked for name match. Beyond that initial root-level name check, names
+ * are completely ignored. Compatibility rules are as follows:
+ * - any two STRUCTs/UNIONs/FWDs/ENUMs/INTs are considered compatible, but
+ * kind should match for local and target types (i.e., STRUCT is not
+ * compatible with UNION);
+ * - for ENUMs, the size is ignored;
+ * - for INT, size and signedness are ignored;
+ * - for ARRAY, dimensionality is ignored, element types are checked for
+ * compatibility recursively;
+ * - CONST/VOLATILE/RESTRICT modifiers are ignored;
+ * - TYPEDEFs/PTRs are compatible if types they pointing to are compatible;
+ * - FUNC_PROTOs are compatible if they have compatible signature: same
+ * number of input args and compatible return and argument types.
+ * These rules are not set in stone and probably will be adjusted as we get
+ * more experience with using BPF CO-RE relocations.
+ */
+static int bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id,
+ const struct btf *targ_btf, __u32 targ_id)
+{
+ const struct btf_type *local_type, *targ_type;
+ int depth = 32; /* max recursion depth */
+
+ /* caller made sure that names match (ignoring flavor suffix) */
+ local_type = btf__type_by_id(local_btf, local_id);
+ targ_type = btf__type_by_id(targ_btf, targ_id);
+ if (btf_kind(local_type) != btf_kind(targ_type))
+ return 0;
+
+recur:
+ depth--;
+ if (depth < 0)
+ return -EINVAL;
+
+ local_type = skip_mods_and_typedefs(local_btf, local_id, &local_id);
+ targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id);
+ if (!local_type || !targ_type)
+ return -EINVAL;
+
+ if (btf_kind(local_type) != btf_kind(targ_type))
+ return 0;
+
+ switch (btf_kind(local_type)) {
+ case BTF_KIND_UNKN:
+ case BTF_KIND_STRUCT:
+ case BTF_KIND_UNION:
+ case BTF_KIND_ENUM:
+ case BTF_KIND_FWD:
+ return 1;
+ case BTF_KIND_INT:
+ /* just reject deprecated bitfield-like integers; all other
+ * integers are by default compatible between each other
+ */
+ return btf_int_offset(local_type) == 0 && btf_int_offset(targ_type) == 0;
+ case BTF_KIND_PTR:
+ local_id = local_type->type;
+ targ_id = targ_type->type;
+ goto recur;
+ case BTF_KIND_ARRAY:
+ local_id = btf_array(local_type)->type;
+ targ_id = btf_array(targ_type)->type;
+ goto recur;
+ case BTF_KIND_FUNC_PROTO: {
+ struct btf_param *local_p = btf_params(local_type);
+ struct btf_param *targ_p = btf_params(targ_type);
+ __u16 local_vlen = btf_vlen(local_type);
+ __u16 targ_vlen = btf_vlen(targ_type);
+ int i, err;
+
+ if (local_vlen != targ_vlen)
+ return 0;
+
+ for (i = 0; i < local_vlen; i++, local_p++, targ_p++) {
+ skip_mods_and_typedefs(local_btf, local_p->type, &local_id);
+ skip_mods_and_typedefs(targ_btf, targ_p->type, &targ_id);
+ err = bpf_core_types_are_compat(local_btf, local_id, targ_btf, targ_id);
+ if (err <= 0)
+ return err;
+ }
+
+ /* tail recurse for return type check */
+ skip_mods_and_typedefs(local_btf, local_type->type, &local_id);
+ skip_mods_and_typedefs(targ_btf, targ_type->type, &targ_id);
+ goto recur;
+ }
+ default:
+ pr_warn("unexpected kind %s relocated, local [%d], target [%d]\n",
+ btf_kind_str(local_type), local_id, targ_id);
+ return 0;
+ }
+}
+
/*
* Try to match local spec to a target type and, if successful, produce full
* target spec (high-level, low-level + bit offset).
@@ -4447,10 +4792,51 @@ static int bpf_core_spec_match(struct bpf_core_spec *local_spec,
memset(targ_spec, 0, sizeof(*targ_spec));
targ_spec->btf = targ_btf;
+ targ_spec->root_type_id = targ_id;
+ targ_spec->relo_kind = local_spec->relo_kind;
+
+ if (core_relo_is_type_based(local_spec->relo_kind)) {
+ return bpf_core_types_are_compat(local_spec->btf,
+ local_spec->root_type_id,
+ targ_btf, targ_id);
+ }
local_acc = &local_spec->spec[0];
targ_acc = &targ_spec->spec[0];
+ if (core_relo_is_enumval_based(local_spec->relo_kind)) {
+ size_t local_essent_len, targ_essent_len;
+ const struct btf_enum *e;
+ const char *targ_name;
+
+ /* has to resolve to an enum */
+ targ_type = skip_mods_and_typedefs(targ_spec->btf, targ_id, &targ_id);
+ if (!btf_is_enum(targ_type))
+ return 0;
+
+ local_essent_len = bpf_core_essential_name_len(local_acc->name);
+
+ for (i = 0, e = btf_enum(targ_type); i < btf_vlen(targ_type); i++, e++) {
+ targ_name = btf__name_by_offset(targ_spec->btf, e->name_off);
+ targ_essent_len = bpf_core_essential_name_len(targ_name);
+ if (targ_essent_len != local_essent_len)
+ continue;
+ if (strncmp(local_acc->name, targ_name, local_essent_len) == 0) {
+ targ_acc->type_id = targ_id;
+ targ_acc->idx = i;
+ targ_acc->name = targ_name;
+ targ_spec->len++;
+ targ_spec->raw_spec[targ_spec->raw_len] = targ_acc->idx;
+ targ_spec->raw_len++;
+ return 1;
+ }
+ }
+ return 0;
+ }
+
+ if (!core_relo_is_field_based(local_spec->relo_kind))
+ return -EINVAL;
+
for (i = 0; i < local_spec->len; i++, local_acc++, targ_acc++) {
targ_type = skip_mods_and_typedefs(targ_spec->btf, targ_id,
&targ_id);
@@ -4507,18 +4893,29 @@ static int bpf_core_spec_match(struct bpf_core_spec *local_spec,
}
static int bpf_core_calc_field_relo(const struct bpf_program *prog,
- const struct bpf_field_reloc *relo,
+ const struct bpf_core_relo *relo,
const struct bpf_core_spec *spec,
__u32 *val, bool *validate)
{
- const struct bpf_core_accessor *acc = &spec->spec[spec->len - 1];
- const struct btf_type *t = btf__type_by_id(spec->btf, acc->type_id);
+ const struct bpf_core_accessor *acc;
+ const struct btf_type *t;
__u32 byte_off, byte_sz, bit_off, bit_sz;
const struct btf_member *m;
const struct btf_type *mt;
bool bitfield;
__s64 sz;
+ if (relo->kind == BPF_FIELD_EXISTS) {
+ *val = spec ? 1 : 0;
+ return 0;
+ }
+
+ if (!spec)
+ return -EUCLEAN; /* request instruction poisoning */
+
+ acc = &spec->spec[spec->len - 1];
+ t = btf__type_by_id(spec->btf, acc->type_id);
+
/* a[n] accessor needs special handling */
if (!acc->name) {
if (relo->kind == BPF_FIELD_BYTE_OFFSET) {
@@ -4604,21 +5001,158 @@ static int bpf_core_calc_field_relo(const struct bpf_program *prog,
break;
case BPF_FIELD_EXISTS:
default:
- pr_warn("prog '%s': unknown relo %d at insn #%d\n",
- bpf_program__title(prog, false),
- relo->kind, relo->insn_off / 8);
- return -EINVAL;
+ return -EOPNOTSUPP;
}
return 0;
}
+static int bpf_core_calc_type_relo(const struct bpf_core_relo *relo,
+ const struct bpf_core_spec *spec,
+ __u32 *val)
+{
+ __s64 sz;
+
+ /* type-based relos return zero when target type is not found */
+ if (!spec) {
+ *val = 0;
+ return 0;
+ }
+
+ switch (relo->kind) {
+ case BPF_TYPE_ID_TARGET:
+ *val = spec->root_type_id;
+ break;
+ case BPF_TYPE_EXISTS:
+ *val = 1;
+ break;
+ case BPF_TYPE_SIZE:
+ sz = btf__resolve_size(spec->btf, spec->root_type_id);
+ if (sz < 0)
+ return -EINVAL;
+ *val = sz;
+ break;
+ case BPF_TYPE_ID_LOCAL:
+ /* BPF_TYPE_ID_LOCAL is handled specially and shouldn't get here */
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+static int bpf_core_calc_enumval_relo(const struct bpf_core_relo *relo,
+ const struct bpf_core_spec *spec,
+ __u32 *val)
+{
+ const struct btf_type *t;
+ const struct btf_enum *e;
+
+ switch (relo->kind) {
+ case BPF_ENUMVAL_EXISTS:
+ *val = spec ? 1 : 0;
+ break;
+ case BPF_ENUMVAL_VALUE:
+ if (!spec)
+ return -EUCLEAN; /* request instruction poisoning */
+ t = btf__type_by_id(spec->btf, spec->spec[0].type_id);
+ e = btf_enum(t) + spec->spec[0].idx;
+ *val = e->val;
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+struct bpf_core_relo_res
+{
+ /* expected value in the instruction, unless validate == false */
+ __u32 orig_val;
+ /* new value that needs to be patched up to */
+ __u32 new_val;
+ /* relocation unsuccessful, poison instruction, but don't fail load */
+ bool poison;
+ /* some relocations can't be validated against orig_val */
+ bool validate;
+};
+
+/* Calculate original and target relocation values, given local and target
+ * specs and relocation kind. These values are calculated for each candidate.
+ * If there are multiple candidates, resulting values should all be consistent
+ * with each other. Otherwise, libbpf will refuse to proceed due to ambiguity.
+ * If instruction has to be poisoned, *poison will be set to true.
+ */
+static int bpf_core_calc_relo(const struct bpf_program *prog,
+ const struct bpf_core_relo *relo,
+ int relo_idx,
+ const struct bpf_core_spec *local_spec,
+ const struct bpf_core_spec *targ_spec,
+ struct bpf_core_relo_res *res)
+{
+ int err = -EOPNOTSUPP;
+
+ res->orig_val = 0;
+ res->new_val = 0;
+ res->poison = false;
+ res->validate = true;
+
+ if (core_relo_is_field_based(relo->kind)) {
+ err = bpf_core_calc_field_relo(prog, relo, local_spec, &res->orig_val, &res->validate);
+ err = err ?: bpf_core_calc_field_relo(prog, relo, targ_spec, &res->new_val, NULL);
+ } else if (core_relo_is_type_based(relo->kind)) {
+ err = bpf_core_calc_type_relo(relo, local_spec, &res->orig_val);
+ err = err ?: bpf_core_calc_type_relo(relo, targ_spec, &res->new_val);
+ } else if (core_relo_is_enumval_based(relo->kind)) {
+ err = bpf_core_calc_enumval_relo(relo, local_spec, &res->orig_val);
+ err = err ?: bpf_core_calc_enumval_relo(relo, targ_spec, &res->new_val);
+ }
+
+ if (err == -EUCLEAN) {
+ /* EUCLEAN is used to signal instruction poisoning request */
+ res->poison = true;
+ err = 0;
+ } else if (err == -EOPNOTSUPP) {
+ /* EOPNOTSUPP means unknown/unsupported relocation */
+ pr_warn("prog '%s': relo #%d: unrecognized CO-RE relocation %s (%d) at insn #%d\n",
+ bpf_program__title(prog, false), relo_idx,
+ core_relo_kind_str(relo->kind), relo->kind, relo->insn_off / 8);
+ }
+
+ return err;
+}
+
+/*
+ * Turn instruction for which CO_RE relocation failed into invalid one with
+ * distinct signature.
+ */
+static void bpf_core_poison_insn(struct bpf_program *prog, int relo_idx,
+ int insn_idx, struct bpf_insn *insn)
+{
+ pr_debug("prog '%s': relo #%d: substituting insn #%d w/ invalid insn\n",
+ bpf_program__title(prog, false), relo_idx, insn_idx);
+ insn->code = BPF_JMP | BPF_CALL;
+ insn->dst_reg = 0;
+ insn->src_reg = 0;
+ insn->off = 0;
+ /* if this instruction is reachable (not a dead code),
+ * verifier will complain with the following message:
+ * invalid func unknown#195896080
+ */
+ insn->imm = 195896080; /* => 0xbad2310 => "bad relo" */
+}
+
+static bool is_ldimm64(struct bpf_insn *insn)
+{
+ return insn->code == (BPF_LD | BPF_IMM | BPF_DW);
+}
+
/*
* Patch relocatable BPF instruction.
*
* Patched value is determined by relocation kind and target specification.
- * For field existence relocation target spec will be NULL if field is not
- * found.
+ * For existence relocations target spec will be NULL if field/type is not found.
* Expected insn->imm value is determined using relocation kind and local
* spec, and is checked before patching instruction. If actual insn->imm value
* is wrong, bail out with error.
@@ -4626,58 +5160,43 @@ static int bpf_core_calc_field_relo(const struct bpf_program *prog,
* Currently three kinds of BPF instructions are supported:
* 1. rX = <imm> (assignment with immediate operand);
* 2. rX += <imm> (arithmetic operations with immediate operand);
+ * 3. rX = <imm64> (load with 64-bit immediate value).
*/
-static int bpf_core_reloc_insn(struct bpf_program *prog,
- const struct bpf_field_reloc *relo,
+static int bpf_core_patch_insn(struct bpf_program *prog,
+ const struct bpf_core_relo *relo,
int relo_idx,
- const struct bpf_core_spec *local_spec,
- const struct bpf_core_spec *targ_spec)
+ const struct bpf_core_relo_res *res)
{
__u32 orig_val, new_val;
struct bpf_insn *insn;
- bool validate = true;
- int insn_idx, err;
+ int insn_idx;
__u8 class;
- if (relo->insn_off % sizeof(struct bpf_insn))
+ if (relo->insn_off % BPF_INSN_SZ)
return -EINVAL;
- insn_idx = relo->insn_off / sizeof(struct bpf_insn);
+ insn_idx = relo->insn_off / BPF_INSN_SZ;
insn = &prog->insns[insn_idx];
class = BPF_CLASS(insn->code);
- if (relo->kind == BPF_FIELD_EXISTS) {
- orig_val = 1; /* can't generate EXISTS relo w/o local field */
- new_val = targ_spec ? 1 : 0;
- } else if (!targ_spec) {
- pr_debug("prog '%s': relo #%d: substituting insn #%d w/ invalid insn\n",
- bpf_program__title(prog, false), relo_idx, insn_idx);
- insn->code = BPF_JMP | BPF_CALL;
- insn->dst_reg = 0;
- insn->src_reg = 0;
- insn->off = 0;
- /* if this instruction is reachable (not a dead code),
- * verifier will complain with the following message:
- * invalid func unknown#195896080
+ if (res->poison) {
+ /* poison second part of ldimm64 to avoid confusing error from
+ * verifier about "unknown opcode 00"
*/
- insn->imm = 195896080; /* => 0xbad2310 => "bad relo" */
+ if (is_ldimm64(insn))
+ bpf_core_poison_insn(prog, relo_idx, insn_idx + 1, insn + 1);
+ bpf_core_poison_insn(prog, relo_idx, insn_idx, insn);
return 0;
- } else {
- err = bpf_core_calc_field_relo(prog, relo, local_spec,
- &orig_val, &validate);
- if (err)
- return err;
- err = bpf_core_calc_field_relo(prog, relo, targ_spec,
- &new_val, NULL);
- if (err)
- return err;
}
+ orig_val = res->orig_val;
+ new_val = res->new_val;
+
switch (class) {
case BPF_ALU:
case BPF_ALU64:
if (BPF_SRC(insn->code) != BPF_K)
return -EINVAL;
- if (validate && insn->imm != orig_val) {
+ if (res->validate && insn->imm != orig_val) {
pr_warn("prog '%s': relo #%d: unexpected insn #%d (ALU/ALU64) value: got %u, exp %u -> %u\n",
bpf_program__title(prog, false), relo_idx,
insn_idx, insn->imm, orig_val, new_val);
@@ -4692,8 +5211,8 @@ static int bpf_core_reloc_insn(struct bpf_program *prog,
case BPF_LDX:
case BPF_ST:
case BPF_STX:
- if (validate && insn->off != orig_val) {
- pr_warn("prog '%s': relo #%d: unexpected insn #%d (LD/LDX/ST/STX) value: got %u, exp %u -> %u\n",
+ if (res->validate && insn->off != orig_val) {
+ pr_warn("prog '%s': relo #%d: unexpected insn #%d (LDX/ST/STX) value: got %u, exp %u -> %u\n",
bpf_program__title(prog, false), relo_idx,
insn_idx, insn->off, orig_val, new_val);
return -EINVAL;
@@ -4710,8 +5229,37 @@ static int bpf_core_reloc_insn(struct bpf_program *prog,
bpf_program__title(prog, false), relo_idx, insn_idx,
orig_val, new_val);
break;
+ case BPF_LD: {
+ __u64 imm;
+
+ if (!is_ldimm64(insn) ||
+ insn[0].src_reg != 0 || insn[0].off != 0 ||
+ insn_idx + 1 >= prog->insns_cnt ||
+ insn[1].code != 0 || insn[1].dst_reg != 0 ||
+ insn[1].src_reg != 0 || insn[1].off != 0) {
+ pr_warn("prog '%s': relo #%d: insn #%d (LDIMM64) has unexpected form\n",
+ bpf_program__title(prog, false), relo_idx, insn_idx);
+ return -EINVAL;
+ }
+
+ imm = insn[0].imm + ((__u64)insn[1].imm << 32);
+ if (res->validate && imm != orig_val) {
+ pr_warn("prog '%s': relo #%d: unexpected insn #%d (LDIMM64) value: got %llu, exp %u -> %u\n",
+ bpf_program__title(prog, false), relo_idx,
+ insn_idx, (unsigned long long)imm,
+ orig_val, new_val);
+ return -EINVAL;
+ }
+
+ insn[0].imm = new_val;
+ insn[1].imm = 0; /* currently only 32-bit values are supported */
+ pr_debug("prog '%s': relo #%d: patched insn #%d (LDIMM64) imm64 %llu -> %u\n",
+ bpf_program__title(prog, false), relo_idx, insn_idx,
+ (unsigned long long)imm, new_val);
+ break;
+ }
default:
- pr_warn("prog '%s': relo #%d: trying to relocate unrecognized insn #%d, code:%x, src:%x, dst:%x, off:%x, imm:%x\n",
+ pr_warn("prog '%s': relo #%d: trying to relocate unrecognized insn #%d, code:0x%x, src:0x%x, dst:0x%x, off:0x%x, imm:0x%x\n",
bpf_program__title(prog, false), relo_idx,
insn_idx, insn->code, insn->src_reg, insn->dst_reg,
insn->off, insn->imm);
@@ -4728,29 +5276,48 @@ static int bpf_core_reloc_insn(struct bpf_program *prog,
static void bpf_core_dump_spec(int level, const struct bpf_core_spec *spec)
{
const struct btf_type *t;
+ const struct btf_enum *e;
const char *s;
__u32 type_id;
int i;
- type_id = spec->spec[0].type_id;
+ type_id = spec->root_type_id;
t = btf__type_by_id(spec->btf, type_id);
s = btf__name_by_offset(spec->btf, t->name_off);
- libbpf_print(level, "[%u] %s + ", type_id, s);
- for (i = 0; i < spec->raw_len; i++)
- libbpf_print(level, "%d%s", spec->raw_spec[i],
- i == spec->raw_len - 1 ? " => " : ":");
+ libbpf_print(level, "[%u] %s %s", type_id, btf_kind_str(t), str_is_empty(s) ? "<anon>" : s);
- libbpf_print(level, "%u.%u @ &x",
- spec->bit_offset / 8, spec->bit_offset % 8);
+ if (core_relo_is_type_based(spec->relo_kind))
+ return;
- for (i = 0; i < spec->len; i++) {
- if (spec->spec[i].name)
- libbpf_print(level, ".%s", spec->spec[i].name);
- else
- libbpf_print(level, "[%u]", spec->spec[i].idx);
+ if (core_relo_is_enumval_based(spec->relo_kind)) {
+ t = skip_mods_and_typedefs(spec->btf, type_id, NULL);
+ e = btf_enum(t) + spec->raw_spec[0];
+ s = btf__name_by_offset(spec->btf, e->name_off);
+
+ libbpf_print(level, "::%s = %u", s, e->val);
+ return;
}
+ if (core_relo_is_field_based(spec->relo_kind)) {
+ for (i = 0; i < spec->len; i++) {
+ if (spec->spec[i].name)
+ libbpf_print(level, ".%s", spec->spec[i].name);
+ else if (i > 0 || spec->spec[i].idx > 0)
+ libbpf_print(level, "[%u]", spec->spec[i].idx);
+ }
+
+ libbpf_print(level, " (");
+ for (i = 0; i < spec->raw_len; i++)
+ libbpf_print(level, "%s%d", i == 0 ? "" : ":", spec->raw_spec[i]);
+
+ if (spec->bit_offset % 8)
+ libbpf_print(level, " @ offset %u.%u)",
+ spec->bit_offset / 8, spec->bit_offset % 8);
+ else
+ libbpf_print(level, " @ offset %u)", spec->bit_offset / 8);
+ return;
+ }
}
static size_t bpf_core_hash_fn(const void *key, void *ctx)
@@ -4814,22 +5381,23 @@ static void *u32_as_hash_key(__u32 x)
* CPU-wise compared to prebuilding a map from all local type names to
* a list of candidate type names. It's also sped up by caching resolved
* list of matching candidates per each local "root" type ID, that has at
- * least one bpf_field_reloc associated with it. This list is shared
+ * least one bpf_core_relo associated with it. This list is shared
* between multiple relocations for the same type ID and is updated as some
* of the candidates are pruned due to structural incompatibility.
*/
-static int bpf_core_reloc_field(struct bpf_program *prog,
- const struct bpf_field_reloc *relo,
- int relo_idx,
- const struct btf *local_btf,
- const struct btf *targ_btf,
- struct hashmap *cand_cache)
+static int bpf_core_apply_relo(struct bpf_program *prog,
+ const struct bpf_core_relo *relo,
+ int relo_idx,
+ const struct btf *local_btf,
+ const struct btf *targ_btf,
+ struct hashmap *cand_cache)
{
const char *prog_name = bpf_program__title(prog, false);
- struct bpf_core_spec local_spec, cand_spec, targ_spec;
+ struct bpf_core_spec local_spec, cand_spec, targ_spec = {};
const void *type_key = u32_as_hash_key(relo->type_id);
- const struct btf_type *local_type, *cand_type;
- const char *local_name, *cand_name;
+ struct bpf_core_relo_res cand_res, targ_res;
+ const struct btf_type *local_type;
+ const char *local_name;
struct ids_vec *cand_ids;
__u32 local_id, cand_id;
const char *spec_str;
@@ -4841,32 +5409,49 @@ static int bpf_core_reloc_field(struct bpf_program *prog,
return -EINVAL;
local_name = btf__name_by_offset(local_btf, local_type->name_off);
- if (str_is_empty(local_name))
+ if (!local_name)
return -EINVAL;
spec_str = btf__name_by_offset(local_btf, relo->access_str_off);
if (str_is_empty(spec_str))
return -EINVAL;
- err = bpf_core_spec_parse(local_btf, local_id, spec_str, &local_spec);
+ err = bpf_core_parse_spec(local_btf, local_id, spec_str, relo->kind, &local_spec);
if (err) {
- pr_warn("prog '%s': relo #%d: parsing [%d] %s + %s failed: %d\n",
- prog_name, relo_idx, local_id, local_name, spec_str,
- err);
+ pr_warn("prog '%s': relo #%d: parsing [%d] %s %s + %s failed: %d\n",
+ prog_name, relo_idx, local_id, btf_kind_str(local_type),
+ str_is_empty(local_name) ? "<anon>" : local_name,
+ spec_str, err);
return -EINVAL;
}
- pr_debug("prog '%s': relo #%d: kind %d, spec is ", prog_name, relo_idx,
- relo->kind);
+ pr_debug("prog '%s': relo #%d: kind <%s> (%d), spec is ", prog_name,
+ relo_idx, core_relo_kind_str(relo->kind), relo->kind);
bpf_core_dump_spec(LIBBPF_DEBUG, &local_spec);
libbpf_print(LIBBPF_DEBUG, "\n");
+ /* TYPE_ID_LOCAL relo is special and doesn't need candidate search */
+ if (relo->kind == BPF_TYPE_ID_LOCAL) {
+ targ_res.validate = true;
+ targ_res.poison = false;
+ targ_res.orig_val = local_spec.root_type_id;
+ targ_res.new_val = local_spec.root_type_id;
+ goto patch_insn;
+ }
+
+ /* libbpf doesn't support candidate search for anonymous types */
+ if (str_is_empty(spec_str)) {
+ pr_warn("prog '%s': relo #%d: <%s> (%d) relocation doesn't support anonymous types\n",
+ prog_name, relo_idx, core_relo_kind_str(relo->kind), relo->kind);
+ return -EOPNOTSUPP;
+ }
+
if (!hashmap__find(cand_cache, type_key, (void **)&cand_ids)) {
cand_ids = bpf_core_find_cands(local_btf, local_id, targ_btf);
if (IS_ERR(cand_ids)) {
- pr_warn("prog '%s': relo #%d: target candidate search failed for [%d] %s: %ld",
- prog_name, relo_idx, local_id, local_name,
- PTR_ERR(cand_ids));
+ pr_warn("prog '%s': relo #%d: target candidate search failed for [%d] %s %s: %ld",
+ prog_name, relo_idx, local_id, btf_kind_str(local_type),
+ local_name, PTR_ERR(cand_ids));
return PTR_ERR(cand_ids);
}
err = hashmap__set(cand_cache, type_key, cand_ids, NULL, NULL);
@@ -4878,36 +5463,51 @@ static int bpf_core_reloc_field(struct bpf_program *prog,
for (i = 0, j = 0; i < cand_ids->len; i++) {
cand_id = cand_ids->data[i];
- cand_type = btf__type_by_id(targ_btf, cand_id);
- cand_name = btf__name_by_offset(targ_btf, cand_type->name_off);
-
- err = bpf_core_spec_match(&local_spec, targ_btf,
- cand_id, &cand_spec);
- pr_debug("prog '%s': relo #%d: matching candidate #%d %s against spec ",
- prog_name, relo_idx, i, cand_name);
- bpf_core_dump_spec(LIBBPF_DEBUG, &cand_spec);
- libbpf_print(LIBBPF_DEBUG, ": %d\n", err);
+ err = bpf_core_spec_match(&local_spec, targ_btf, cand_id, &cand_spec);
if (err < 0) {
- pr_warn("prog '%s': relo #%d: matching error: %d\n",
- prog_name, relo_idx, err);
+ pr_warn("prog '%s': relo #%d: error matching candidate #%d ",
+ prog_name, relo_idx, i);
+ bpf_core_dump_spec(LIBBPF_WARN, &cand_spec);
+ libbpf_print(LIBBPF_WARN, ": %d\n", err);
return err;
}
+
+ pr_debug("prog '%s': relo #%d: %s candidate #%d ", prog_name,
+ relo_idx, err == 0 ? "non-matching" : "matching", i);
+ bpf_core_dump_spec(LIBBPF_DEBUG, &cand_spec);
+ libbpf_print(LIBBPF_DEBUG, "\n");
+
if (err == 0)
continue;
+ err = bpf_core_calc_relo(prog, relo, relo_idx, &local_spec, &cand_spec, &cand_res);
+ if (err)
+ return err;
+
if (j == 0) {
+ targ_res = cand_res;
targ_spec = cand_spec;
} else if (cand_spec.bit_offset != targ_spec.bit_offset) {
- /* if there are many candidates, they should all
- * resolve to the same bit offset
+ /* if there are many field relo candidates, they
+ * should all resolve to the same bit offset
*/
- pr_warn("prog '%s': relo #%d: offset ambiguity: %u != %u\n",
+ pr_warn("prog '%s': relo #%d: field offset ambiguity: %u != %u\n",
prog_name, relo_idx, cand_spec.bit_offset,
targ_spec.bit_offset);
return -EINVAL;
+ } else if (cand_res.poison != targ_res.poison || cand_res.new_val != targ_res.new_val) {
+ /* all candidates should result in the same relocation
+ * decision and value, otherwise it's dangerous to
+ * proceed due to ambiguity
+ */
+ pr_warn("prog '%s': relo #%d: relocation decision ambiguity: %s %u != %s %u\n",
+ prog_name, relo_idx,
+ cand_res.poison ? "failure" : "success", cand_res.new_val,
+ targ_res.poison ? "failure" : "success", targ_res.new_val);
+ return -EINVAL;
}
- cand_ids->data[j++] = cand_spec.spec[0].type_id;
+ cand_ids->data[j++] = cand_spec.root_type_id;
}
/*
@@ -4926,19 +5526,25 @@ static int bpf_core_reloc_field(struct bpf_program *prog,
* as well as expected case, depending whether instruction w/
* relocation is guarded in some way that makes it unreachable (dead
* code) if relocation can't be resolved. This is handled in
- * bpf_core_reloc_insn() uniformly by replacing that instruction with
+ * bpf_core_patch_insn() uniformly by replacing that instruction with
* BPF helper call insn (using invalid helper ID). If that instruction
* is indeed unreachable, then it will be ignored and eliminated by
* verifier. If it was an error, then verifier will complain and point
* to a specific instruction number in its log.
*/
- if (j == 0)
- pr_debug("prog '%s': relo #%d: no matching targets found for [%d] %s + %s\n",
- prog_name, relo_idx, local_id, local_name, spec_str);
+ if (j == 0) {
+ pr_debug("prog '%s': relo #%d: no matching targets found\n",
+ prog_name, relo_idx);
- /* bpf_core_reloc_insn should know how to handle missing targ_spec */
- err = bpf_core_reloc_insn(prog, relo, relo_idx, &local_spec,
- j ? &targ_spec : NULL);
+ /* calculate single target relo result explicitly */
+ err = bpf_core_calc_relo(prog, relo, relo_idx, &local_spec, NULL, &targ_res);
+ if (err)
+ return err;
+ }
+
+patch_insn:
+ /* bpf_core_patch_insn() should know how to handle missing targ_spec */
+ err = bpf_core_patch_insn(prog, relo, relo_idx, &targ_res);
if (err) {
pr_warn("prog '%s': relo #%d: failed to patch insn at offset %d: %d\n",
prog_name, relo_idx, relo->insn_off, err);
@@ -4949,10 +5555,10 @@ static int bpf_core_reloc_field(struct bpf_program *prog,
}
static int
-bpf_core_reloc_fields(struct bpf_object *obj, const char *targ_btf_path)
+bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path)
{
const struct btf_ext_info_sec *sec;
- const struct bpf_field_reloc *rec;
+ const struct bpf_core_relo *rec;
const struct btf_ext_info *seg;
struct hashmap_entry *entry;
struct hashmap *cand_cache = NULL;
@@ -4961,6 +5567,9 @@ bpf_core_reloc_fields(struct bpf_object *obj, const char *targ_btf_path)
const char *sec_name;
int i, err = 0;
+ if (obj->btf_ext->core_relo_info.len == 0)
+ return 0;
+
if (targ_btf_path)
targ_btf = btf__parse_elf(targ_btf_path, NULL);
else
@@ -4976,7 +5585,7 @@ bpf_core_reloc_fields(struct bpf_object *obj, const char *targ_btf_path)
goto out;
}
- seg = &obj->btf_ext->field_reloc_info;
+ seg = &obj->btf_ext->core_relo_info;
for_each_btf_ext_sec(seg, sec) {
sec_name = btf__name_by_offset(obj->btf, sec->sec_name_off);
if (str_is_empty(sec_name)) {
@@ -4997,15 +5606,15 @@ bpf_core_reloc_fields(struct bpf_object *obj, const char *targ_btf_path)
goto out;
}
- pr_debug("prog '%s': performing %d CO-RE offset relocs\n",
+ pr_debug("sec '%s': found %d CO-RE relocations\n",
sec_name, sec->num_info);
for_each_btf_ext_rec(seg, sec, i, rec) {
- err = bpf_core_reloc_field(prog, rec, i, obj->btf,
- targ_btf, cand_cache);
+ err = bpf_core_apply_relo(prog, rec, i, obj->btf,
+ targ_btf, cand_cache);
if (err) {
pr_warn("prog '%s': relo #%d: failed to relocate: %d\n",
- sec_name, i, err);
+ prog->name, i, err);
goto out;
}
}
@@ -5025,17 +5634,6 @@ bpf_core_reloc_fields(struct bpf_object *obj, const char *targ_btf_path)
}
static int
-bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path)
-{
- int err = 0;
-
- if (obj->btf_ext->field_reloc_info.len)
- err = bpf_core_reloc_fields(obj, targ_btf_path);
-
- return err;
-}
-
-static int
bpf_program__reloc_text(struct bpf_program *prog, struct bpf_object *obj,
struct reloc_desc *relo)
{
@@ -5051,7 +5649,7 @@ bpf_program__reloc_text(struct bpf_program *prog, struct bpf_object *obj,
return -LIBBPF_ERRNO__RELOC;
}
new_cnt = prog->insns_cnt + text->insns_cnt;
- new_insn = reallocarray(prog->insns, new_cnt, sizeof(*insn));
+ new_insn = libbpf_reallocarray(prog->insns, new_cnt, sizeof(*insn));
if (!new_insn) {
pr_warn("oom in prog realloc\n");
return -ENOMEM;
@@ -5136,7 +5734,8 @@ bpf_program__relocate(struct bpf_program *prog, struct bpf_object *obj)
return err;
break;
default:
- pr_warn("relo #%d: bad relo type %d\n", i, relo->type);
+ pr_warn("prog '%s': relo #%d: bad relo type %d\n",
+ prog->name, i, relo->type);
return -EINVAL;
}
}
@@ -5171,7 +5770,8 @@ bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path)
err = bpf_program__relocate(prog, obj);
if (err) {
- pr_warn("failed to relocate '%s'\n", prog->section_name);
+ pr_warn("prog '%s': failed to relocate data references: %d\n",
+ prog->name, err);
return err;
}
break;
@@ -5186,7 +5786,8 @@ bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path)
err = bpf_program__relocate(prog, obj);
if (err) {
- pr_warn("failed to relocate '%s'\n", prog->section_name);
+ pr_warn("prog '%s': failed to relocate calls: %d\n",
+ prog->name, err);
return err;
}
}
@@ -5230,8 +5831,7 @@ static int bpf_object__collect_map_relos(struct bpf_object *obj,
i, (size_t)GELF_R_SYM(rel.r_info));
return -LIBBPF_ERRNO__FORMAT;
}
- name = elf_strptr(obj->efile.elf, obj->efile.strtabidx,
- sym.st_name) ? : "<?>";
+ name = elf_sym_str(obj, sym.st_name) ?: "<?>";
if (sym.st_shndx != obj->efile.btf_maps_shndx) {
pr_warn(".maps relo #%d: '%s' isn't a BTF-defined map\n",
i, name);
@@ -5293,7 +5893,7 @@ static int bpf_object__collect_map_relos(struct bpf_object *obj,
moff /= bpf_ptr_sz;
if (moff >= map->init_slots_sz) {
new_sz = moff + 1;
- tmp = realloc(map->init_slots, new_sz * host_ptr_sz);
+ tmp = libbpf_reallocarray(map->init_slots, new_sz, host_ptr_sz);
if (!tmp)
return -ENOMEM;
map->init_slots = tmp;
@@ -5348,6 +5948,51 @@ static int bpf_object__collect_reloc(struct bpf_object *obj)
return 0;
}
+static bool insn_is_helper_call(struct bpf_insn *insn, enum bpf_func_id *func_id)
+{
+ if (BPF_CLASS(insn->code) == BPF_JMP &&
+ BPF_OP(insn->code) == BPF_CALL &&
+ BPF_SRC(insn->code) == BPF_K &&
+ insn->src_reg == 0 &&
+ insn->dst_reg == 0) {
+ *func_id = insn->imm;
+ return true;
+ }
+ return false;
+}
+
+static int bpf_object__sanitize_prog(struct bpf_object* obj, struct bpf_program *prog)
+{
+ struct bpf_insn *insn = prog->insns;
+ enum bpf_func_id func_id;
+ int i;
+
+ for (i = 0; i < prog->insns_cnt; i++, insn++) {
+ if (!insn_is_helper_call(insn, &func_id))
+ continue;
+
+ /* on kernels that don't yet support
+ * bpf_probe_read_{kernel,user}[_str] helpers, fall back
+ * to bpf_probe_read() which works well for old kernels
+ */
+ switch (func_id) {
+ case BPF_FUNC_probe_read_kernel:
+ case BPF_FUNC_probe_read_user:
+ if (!kernel_supports(FEAT_PROBE_READ_KERN))
+ insn->imm = BPF_FUNC_probe_read;
+ break;
+ case BPF_FUNC_probe_read_kernel_str:
+ case BPF_FUNC_probe_read_user_str:
+ if (!kernel_supports(FEAT_PROBE_READ_KERN))
+ insn->imm = BPF_FUNC_probe_read_str;
+ break;
+ default:
+ break;
+ }
+ }
+ return 0;
+}
+
static int
load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt,
char *license, __u32 kern_version, int *pfd)
@@ -5364,12 +6009,12 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt,
memset(&load_attr, 0, sizeof(struct bpf_load_program_attr));
load_attr.prog_type = prog->type;
/* old kernels might not support specifying expected_attach_type */
- if (!prog->caps->exp_attach_type && prog->sec_def &&
+ if (!kernel_supports(FEAT_EXP_ATTACH_TYPE) && prog->sec_def &&
prog->sec_def->is_exp_attach_type_optional)
load_attr.expected_attach_type = 0;
else
load_attr.expected_attach_type = prog->expected_attach_type;
- if (prog->caps->name)
+ if (kernel_supports(FEAT_PROG_NAME))
load_attr.name = prog->name;
load_attr.insns = insns;
load_attr.insns_cnt = insns_cnt;
@@ -5387,7 +6032,7 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt,
}
/* specify func_info/line_info only if kernel supports them */
btf_fd = bpf_object__btf_fd(prog->obj);
- if (btf_fd >= 0 && prog->obj->caps.btf_func) {
+ if (btf_fd >= 0 && kernel_supports(FEAT_BTF_FUNC)) {
load_attr.prog_btf_fd = btf_fd;
load_attr.func_info = prog->func_info;
load_attr.func_info_rec_size = prog->func_info_rec_size;
@@ -5425,7 +6070,7 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt,
free(log_buf);
goto retry_load;
}
- ret = -errno;
+ ret = errno ? -errno : -LIBBPF_ERRNO__LOAD;
cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
pr_warn("load bpf program failed: %s\n", cp);
pr_perm_msg(ret);
@@ -5564,11 +6209,17 @@ bpf_object__load_progs(struct bpf_object *obj, int log_level)
for (i = 0; i < obj->nr_programs; i++) {
prog = &obj->programs[i];
+ err = bpf_object__sanitize_prog(obj, prog);
+ if (err)
+ return err;
+ }
+
+ for (i = 0; i < obj->nr_programs; i++) {
+ prog = &obj->programs[i];
if (bpf_program__is_function_storage(prog, obj))
continue;
if (!prog->load) {
- pr_debug("prog '%s'('%s'): skipped loading\n",
- prog->name, prog->section_name);
+ pr_debug("prog '%s': skipped loading\n", prog->name);
continue;
}
prog->log_level |= log_level;
@@ -5641,6 +6292,8 @@ __bpf_object__open(const char *path, const void *obj_buf, size_t obj_buf_sz,
/* couldn't guess, but user might manually specify */
continue;
+ if (prog->sec_def->is_sleepable)
+ prog->prog_flags |= BPF_F_SLEEPABLE;
bpf_program__set_type(prog, prog->sec_def->prog_type);
bpf_program__set_expected_attach_type(prog,
prog->sec_def->expected_attach_type);
@@ -5750,11 +6403,11 @@ static int bpf_object__sanitize_maps(struct bpf_object *obj)
bpf_object__for_each_map(m, obj) {
if (!bpf_map__is_internal(m))
continue;
- if (!obj->caps.global_data) {
+ if (!kernel_supports(FEAT_GLOBAL_DATA)) {
pr_warn("kernel doesn't support global data\n");
return -ENOTSUP;
}
- if (!obj->caps.array_mmap)
+ if (!kernel_supports(FEAT_ARRAY_MMAP))
m->def.map_flags ^= BPF_F_MMAPABLE;
}
@@ -5904,7 +6557,6 @@ int bpf_object__load_xattr(struct bpf_object_load_attr *attr)
}
err = bpf_object__probe_loading(obj);
- err = err ? : bpf_object__probe_caps(obj);
err = err ? : bpf_object__resolve_externs(obj, obj->kconfig);
err = err ? : bpf_object__sanitize_and_load_btf(obj);
err = err ? : bpf_object__sanitize_maps(obj);
@@ -6713,7 +7365,7 @@ int bpf_program__fd(const struct bpf_program *prog)
size_t bpf_program__size(const struct bpf_program *prog)
{
- return prog->insns_cnt * sizeof(struct bpf_insn);
+ return prog->insns_cnt * BPF_INSN_SZ;
}
int bpf_program__set_prep(struct bpf_program *prog, int nr_instances,
@@ -6910,6 +7562,21 @@ static const struct bpf_sec_def section_defs[] = {
.expected_attach_type = BPF_TRACE_FEXIT,
.is_attach_btf = true,
.attach_fn = attach_trace),
+ SEC_DEF("fentry.s/", TRACING,
+ .expected_attach_type = BPF_TRACE_FENTRY,
+ .is_attach_btf = true,
+ .is_sleepable = true,
+ .attach_fn = attach_trace),
+ SEC_DEF("fmod_ret.s/", TRACING,
+ .expected_attach_type = BPF_MODIFY_RETURN,
+ .is_attach_btf = true,
+ .is_sleepable = true,
+ .attach_fn = attach_trace),
+ SEC_DEF("fexit.s/", TRACING,
+ .expected_attach_type = BPF_TRACE_FEXIT,
+ .is_attach_btf = true,
+ .is_sleepable = true,
+ .attach_fn = attach_trace),
SEC_DEF("freplace/", EXT,
.is_attach_btf = true,
.attach_fn = attach_trace),
@@ -6917,6 +7584,11 @@ static const struct bpf_sec_def section_defs[] = {
.is_attach_btf = true,
.expected_attach_type = BPF_LSM_MAC,
.attach_fn = attach_lsm),
+ SEC_DEF("lsm.s/", LSM,
+ .is_attach_btf = true,
+ .is_sleepable = true,
+ .expected_attach_type = BPF_LSM_MAC,
+ .attach_fn = attach_lsm),
SEC_DEF("iter/", TRACING,
.expected_attach_type = BPF_TRACE_ITER,
.is_attach_btf = true,
@@ -7122,8 +7794,7 @@ static int bpf_object__collect_st_ops_relos(struct bpf_object *obj,
return -LIBBPF_ERRNO__FORMAT;
}
- name = elf_strptr(obj->efile.elf, obj->efile.strtabidx,
- sym.st_name) ? : "<?>";
+ name = elf_sym_str(obj, sym.st_name) ?: "<?>";
map = find_struct_ops_map_by_offset(obj, rel.r_offset);
if (!map) {
pr_warn("struct_ops reloc: cannot find map at rel.r_offset %zu\n",
@@ -7640,7 +8311,7 @@ int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
prog->prog_ifindex = attr->ifindex;
prog->log_level = attr->log_level;
- prog->prog_flags = attr->prog_flags;
+ prog->prog_flags |= attr->prog_flags;
if (!first_prog)
first_prog = prog;
}
@@ -8594,7 +9265,7 @@ struct perf_buffer *perf_buffer__new(int map_fd, size_t page_cnt,
struct perf_buffer_params p = {};
struct perf_event_attr attr = { 0, };
- attr.config = PERF_COUNT_SW_BPF_OUTPUT,
+ attr.config = PERF_COUNT_SW_BPF_OUTPUT;
attr.type = PERF_TYPE_SOFTWARE;
attr.sample_type = PERF_SAMPLE_RAW;
attr.sample_period = 1;
@@ -8832,6 +9503,11 @@ static int perf_buffer__process_records(struct perf_buffer *pb,
return 0;
}
+int perf_buffer__epoll_fd(const struct perf_buffer *pb)
+{
+ return pb->epoll_fd;
+}
+
int perf_buffer__poll(struct perf_buffer *pb, int timeout_ms)
{
int i, cnt, err;
@@ -8849,6 +9525,55 @@ int perf_buffer__poll(struct perf_buffer *pb, int timeout_ms)
return cnt < 0 ? -errno : cnt;
}
+/* Return number of PERF_EVENT_ARRAY map slots set up by this perf_buffer
+ * manager.
+ */
+size_t perf_buffer__buffer_cnt(const struct perf_buffer *pb)
+{
+ return pb->cpu_cnt;
+}
+
+/*
+ * Return perf_event FD of a ring buffer in *buf_idx* slot of
+ * PERF_EVENT_ARRAY BPF map. This FD can be polled for new data using
+ * select()/poll()/epoll() Linux syscalls.
+ */
+int perf_buffer__buffer_fd(const struct perf_buffer *pb, size_t buf_idx)
+{
+ struct perf_cpu_buf *cpu_buf;
+
+ if (buf_idx >= pb->cpu_cnt)
+ return -EINVAL;
+
+ cpu_buf = pb->cpu_bufs[buf_idx];
+ if (!cpu_buf)
+ return -ENOENT;
+
+ return cpu_buf->fd;
+}
+
+/*
+ * Consume data from perf ring buffer corresponding to slot *buf_idx* in
+ * PERF_EVENT_ARRAY BPF map without waiting/polling. If there is no data to
+ * consume, do nothing and return success.
+ * Returns:
+ * - 0 on success;
+ * - <0 on failure.
+ */
+int perf_buffer__consume_buffer(struct perf_buffer *pb, size_t buf_idx)
+{
+ struct perf_cpu_buf *cpu_buf;
+
+ if (buf_idx >= pb->cpu_cnt)
+ return -EINVAL;
+
+ cpu_buf = pb->cpu_bufs[buf_idx];
+ if (!cpu_buf)
+ return -ENOENT;
+
+ return perf_buffer__process_records(pb, cpu_buf);
+}
+
int perf_buffer__consume(struct perf_buffer *pb)
{
int i, err;
@@ -8861,7 +9586,7 @@ int perf_buffer__consume(struct perf_buffer *pb)
err = perf_buffer__process_records(pb, cpu_buf);
if (err) {
- pr_warn("error while processing records: %d\n", err);
+ pr_warn("perf_buffer: failed to process records in buffer #%d: %d\n", i, err);
return err;
}
}
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index 5ecb406..308e0de 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -588,8 +588,12 @@ perf_buffer__new_raw(int map_fd, size_t page_cnt,
const struct perf_buffer_raw_opts *opts);
LIBBPF_API void perf_buffer__free(struct perf_buffer *pb);
+LIBBPF_API int perf_buffer__epoll_fd(const struct perf_buffer *pb);
LIBBPF_API int perf_buffer__poll(struct perf_buffer *pb, int timeout_ms);
LIBBPF_API int perf_buffer__consume(struct perf_buffer *pb);
+LIBBPF_API int perf_buffer__consume_buffer(struct perf_buffer *pb, size_t buf_idx);
+LIBBPF_API size_t perf_buffer__buffer_cnt(const struct perf_buffer *pb);
+LIBBPF_API int perf_buffer__buffer_fd(const struct perf_buffer *pb, size_t buf_idx);
typedef enum bpf_perf_event_ret
(*bpf_perf_event_print_t)(struct perf_event_header *hdr,
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index e35bd6c..3fedcdc 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -299,3 +299,12 @@
btf__set_fd;
btf__set_pointer_size;
} LIBBPF_0.0.9;
+
+LIBBPF_0.2.0 {
+ global:
+ perf_buffer__buffer_cnt;
+ perf_buffer__buffer_fd;
+ perf_buffer__epoll_fd;
+ perf_buffer__consume_buffer;
+ xsk_socket__create_shared;
+} LIBBPF_0.1.0;
diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h
index 50d70e9..4d1c366 100644
--- a/tools/lib/bpf/libbpf_internal.h
+++ b/tools/lib/bpf/libbpf_internal.h
@@ -9,6 +9,15 @@
#ifndef __LIBBPF_LIBBPF_INTERNAL_H
#define __LIBBPF_LIBBPF_INTERNAL_H
+#include <stdlib.h>
+#include <limits.h>
+
+/* make sure libbpf doesn't use kernel-only integer typedefs */
+#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64
+
+/* prevent accidental re-addition of reallocarray() */
+#pragma GCC poison reallocarray
+
#include "libbpf.h"
#define BTF_INFO_ENC(kind, kind_flag, vlen) \
@@ -23,6 +32,12 @@
#define BTF_PARAM_ENC(name, type) (name), (type)
#define BTF_VAR_SECINFO_ENC(type, offset, size) (type), (offset), (size)
+#ifndef likely
+#define likely(x) __builtin_expect(!!(x), 1)
+#endif
+#ifndef unlikely
+#define unlikely(x) __builtin_expect(!!(x), 0)
+#endif
#ifndef min
# define min(x, y) ((x) < (y) ? (x) : (y))
#endif
@@ -63,6 +78,33 @@ do { \
#define pr_info(fmt, ...) __pr(LIBBPF_INFO, fmt, ##__VA_ARGS__)
#define pr_debug(fmt, ...) __pr(LIBBPF_DEBUG, fmt, ##__VA_ARGS__)
+#ifndef __has_builtin
+#define __has_builtin(x) 0
+#endif
+/*
+ * Re-implement glibc's reallocarray() for libbpf internal-only use.
+ * reallocarray(), unfortunately, is not available in all versions of glibc,
+ * so requires extra feature detection and using reallocarray() stub from
+ * <tools/libc_compat.h> and COMPAT_NEED_REALLOCARRAY. All this complicates
+ * build of libbpf unnecessarily and is just a maintenance burden. Instead,
+ * it's trivial to implement libbpf-specific internal version and use it
+ * throughout libbpf.
+ */
+static inline void *libbpf_reallocarray(void *ptr, size_t nmemb, size_t size)
+{
+ size_t total;
+
+#if __has_builtin(__builtin_mul_overflow)
+ if (unlikely(__builtin_mul_overflow(nmemb, size, &total)))
+ return NULL;
+#else
+ if (size == 0 || nmemb > ULONG_MAX / size)
+ return NULL;
+ total = nmemb * size;
+#endif
+ return realloc(ptr, total);
+}
+
static inline bool libbpf_validate_opts(const char *opts,
size_t opts_sz, size_t user_sz,
const char *type_name)
@@ -105,18 +147,6 @@ int bpf_object__section_size(const struct bpf_object *obj, const char *name,
int bpf_object__variable_offset(const struct bpf_object *obj, const char *name,
__u32 *off);
-struct nlattr;
-typedef int (*libbpf_dump_nlmsg_t)(void *cookie, void *msg, struct nlattr **tb);
-int libbpf_netlink_open(unsigned int *nl_pid);
-int libbpf_nl_get_link(int sock, unsigned int nl_pid,
- libbpf_dump_nlmsg_t dump_link_nlmsg, void *cookie);
-int libbpf_nl_get_class(int sock, unsigned int nl_pid, int ifindex,
- libbpf_dump_nlmsg_t dump_class_nlmsg, void *cookie);
-int libbpf_nl_get_qdisc(int sock, unsigned int nl_pid, int ifindex,
- libbpf_dump_nlmsg_t dump_qdisc_nlmsg, void *cookie);
-int libbpf_nl_get_filter(int sock, unsigned int nl_pid, int ifindex, int handle,
- libbpf_dump_nlmsg_t dump_filter_nlmsg, void *cookie);
-
struct btf_ext_info {
/*
* info points to the individual info section (e.g. func_info and
@@ -138,6 +168,44 @@ struct btf_ext_info {
i < (sec)->num_info; \
i++, rec = (void *)rec + (seg)->rec_size)
+/*
+ * The .BTF.ext ELF section layout defined as
+ * struct btf_ext_header
+ * func_info subsection
+ *
+ * The func_info subsection layout:
+ * record size for struct bpf_func_info in the func_info subsection
+ * struct btf_sec_func_info for section #1
+ * a list of bpf_func_info records for section #1
+ * where struct bpf_func_info mimics one in include/uapi/linux/bpf.h
+ * but may not be identical
+ * struct btf_sec_func_info for section #2
+ * a list of bpf_func_info records for section #2
+ * ......
+ *
+ * Note that the bpf_func_info record size in .BTF.ext may not
+ * be the same as the one defined in include/uapi/linux/bpf.h.
+ * The loader should ensure that record_size meets minimum
+ * requirement and pass the record as is to the kernel. The
+ * kernel will handle the func_info properly based on its contents.
+ */
+struct btf_ext_header {
+ __u16 magic;
+ __u8 version;
+ __u8 flags;
+ __u32 hdr_len;
+
+ /* All offsets are in bytes relative to the end of this header */
+ __u32 func_info_off;
+ __u32 func_info_len;
+ __u32 line_info_off;
+ __u32 line_info_len;
+
+ /* optional part of .BTF.ext header */
+ __u32 core_relo_off;
+ __u32 core_relo_len;
+};
+
struct btf_ext {
union {
struct btf_ext_header *hdr;
@@ -145,7 +213,7 @@ struct btf_ext {
};
struct btf_ext_info func_info;
struct btf_ext_info line_info;
- struct btf_ext_info field_reloc_info;
+ struct btf_ext_info core_relo_info;
__u32 data_size;
};
@@ -170,32 +238,40 @@ struct bpf_line_info_min {
__u32 line_col;
};
-/* bpf_field_info_kind encodes which aspect of captured field has to be
- * adjusted by relocations. Currently supported values are:
- * - BPF_FIELD_BYTE_OFFSET: field offset (in bytes);
- * - BPF_FIELD_EXISTS: field existence (1, if field exists; 0, otherwise);
+/* bpf_core_relo_kind encodes which aspect of captured field/type/enum value
+ * has to be adjusted by relocations.
*/
-enum bpf_field_info_kind {
+enum bpf_core_relo_kind {
BPF_FIELD_BYTE_OFFSET = 0, /* field byte offset */
- BPF_FIELD_BYTE_SIZE = 1,
+ BPF_FIELD_BYTE_SIZE = 1, /* field size in bytes */
BPF_FIELD_EXISTS = 2, /* field existence in target kernel */
- BPF_FIELD_SIGNED = 3,
- BPF_FIELD_LSHIFT_U64 = 4,
- BPF_FIELD_RSHIFT_U64 = 5,
+ BPF_FIELD_SIGNED = 3, /* field signedness (0 - unsigned, 1 - signed) */
+ BPF_FIELD_LSHIFT_U64 = 4, /* bitfield-specific left bitshift */
+ BPF_FIELD_RSHIFT_U64 = 5, /* bitfield-specific right bitshift */
+ BPF_TYPE_ID_LOCAL = 6, /* type ID in local BPF object */
+ BPF_TYPE_ID_TARGET = 7, /* type ID in target kernel */
+ BPF_TYPE_EXISTS = 8, /* type existence in target kernel */
+ BPF_TYPE_SIZE = 9, /* type size in bytes */
+ BPF_ENUMVAL_EXISTS = 10, /* enum value existence in target kernel */
+ BPF_ENUMVAL_VALUE = 11, /* enum value integer value */
};
-/* The minimum bpf_field_reloc checked by the loader
+/* The minimum bpf_core_relo checked by the loader
*
- * Field relocation captures the following data:
+ * CO-RE relocation captures the following data:
* - insn_off - instruction offset (in bytes) within a BPF program that needs
* its insn->imm field to be relocated with actual field info;
* - type_id - BTF type ID of the "root" (containing) entity of a relocatable
- * field;
+ * type or field;
* - access_str_off - offset into corresponding .BTF string section. String
- * itself encodes an accessed field using a sequence of field and array
- * indicies, separated by colon (:). It's conceptually very close to LLVM's
- * getelementptr ([0]) instruction's arguments for identifying offset to
- * a field.
+ * interpretation depends on specific relocation kind:
+ * - for field-based relocations, string encodes an accessed field using
+ * a sequence of field and array indices, separated by colon (:). It's
+ * conceptually very close to LLVM's getelementptr ([0]) instruction's
+ * arguments for identifying offset to a field.
+ * - for type-based relocations, strings is expected to be just "0";
+ * - for enum value-based relocations, string contains an index of enum
+ * value within its enum type;
*
* Example to provide a better feel.
*
@@ -226,11 +302,11 @@ enum bpf_field_info_kind {
*
* [0] https://llvm.org/docs/LangRef.html#getelementptr-instruction
*/
-struct bpf_field_reloc {
+struct bpf_core_relo {
__u32 insn_off;
__u32 type_id;
__u32 access_str_off;
- enum bpf_field_info_kind kind;
+ enum bpf_core_relo_kind kind;
};
#endif /* __LIBBPF_LIBBPF_INTERNAL_H */
diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c
index 5a3d3f0..5482a9b 100644
--- a/tools/lib/bpf/libbpf_probes.c
+++ b/tools/lib/bpf/libbpf_probes.c
@@ -17,9 +17,6 @@
#include "libbpf.h"
#include "libbpf_internal.h"
-/* make sure libbpf doesn't use kernel-only integer typedefs */
-#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64
-
static bool grep(const char *buffer, const char *pattern)
{
return !!strstr(buffer, pattern);
@@ -173,7 +170,7 @@ int libbpf__load_raw_btf(const char *raw_types, size_t types_len,
return btf_fd;
}
-static int load_sk_storage_btf(void)
+static int load_local_storage_btf(void)
{
const char strs[] = "\0bpf_spin_lock\0val\0cnt\0l";
/* struct bpf_spin_lock {
@@ -232,12 +229,13 @@ bool bpf_probe_map_type(enum bpf_map_type map_type, __u32 ifindex)
key_size = 0;
break;
case BPF_MAP_TYPE_SK_STORAGE:
+ case BPF_MAP_TYPE_INODE_STORAGE:
btf_key_type_id = 1;
btf_value_type_id = 3;
value_size = 8;
max_entries = 0;
map_flags = BPF_F_NO_PREALLOC;
- btf_fd = load_sk_storage_btf();
+ btf_fd = load_local_storage_btf();
if (btf_fd < 0)
return false;
break;
diff --git a/tools/lib/bpf/netlink.c b/tools/lib/bpf/netlink.c
index 312f887..4dd73de 100644
--- a/tools/lib/bpf/netlink.c
+++ b/tools/lib/bpf/netlink.c
@@ -15,13 +15,12 @@
#include "libbpf_internal.h"
#include "nlattr.h"
-/* make sure libbpf doesn't use kernel-only integer typedefs */
-#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64
-
#ifndef SOL_NETLINK
#define SOL_NETLINK 270
#endif
+typedef int (*libbpf_dump_nlmsg_t)(void *cookie, void *msg, struct nlattr **tb);
+
typedef int (*__dump_nlmsg_t)(struct nlmsghdr *nlmsg, libbpf_dump_nlmsg_t,
void *cookie);
@@ -31,7 +30,7 @@ struct xdp_id_md {
struct xdp_link_info info;
};
-int libbpf_netlink_open(__u32 *nl_pid)
+static int libbpf_netlink_open(__u32 *nl_pid)
{
struct sockaddr_nl sa;
socklen_t addrlen;
@@ -283,6 +282,9 @@ static int get_xdp_info(void *cookie, void *msg, struct nlattr **tb)
return 0;
}
+static int libbpf_nl_get_link(int sock, unsigned int nl_pid,
+ libbpf_dump_nlmsg_t dump_link_nlmsg, void *cookie);
+
int bpf_get_link_xdp_info(int ifindex, struct xdp_link_info *info,
size_t info_size, __u32 flags)
{
@@ -368,121 +370,3 @@ int libbpf_nl_get_link(int sock, unsigned int nl_pid,
return bpf_netlink_recv(sock, nl_pid, seq, __dump_link_nlmsg,
dump_link_nlmsg, cookie);
}
-
-static int __dump_class_nlmsg(struct nlmsghdr *nlh,
- libbpf_dump_nlmsg_t dump_class_nlmsg,
- void *cookie)
-{
- struct nlattr *tb[TCA_MAX + 1], *attr;
- struct tcmsg *t = NLMSG_DATA(nlh);
- int len;
-
- len = nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*t));
- attr = (struct nlattr *) ((void *) t + NLMSG_ALIGN(sizeof(*t)));
- if (libbpf_nla_parse(tb, TCA_MAX, attr, len, NULL) != 0)
- return -LIBBPF_ERRNO__NLPARSE;
-
- return dump_class_nlmsg(cookie, t, tb);
-}
-
-int libbpf_nl_get_class(int sock, unsigned int nl_pid, int ifindex,
- libbpf_dump_nlmsg_t dump_class_nlmsg, void *cookie)
-{
- struct {
- struct nlmsghdr nlh;
- struct tcmsg t;
- } req = {
- .nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)),
- .nlh.nlmsg_type = RTM_GETTCLASS,
- .nlh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST,
- .t.tcm_family = AF_UNSPEC,
- .t.tcm_ifindex = ifindex,
- };
- int seq = time(NULL);
-
- req.nlh.nlmsg_seq = seq;
- if (send(sock, &req, req.nlh.nlmsg_len, 0) < 0)
- return -errno;
-
- return bpf_netlink_recv(sock, nl_pid, seq, __dump_class_nlmsg,
- dump_class_nlmsg, cookie);
-}
-
-static int __dump_qdisc_nlmsg(struct nlmsghdr *nlh,
- libbpf_dump_nlmsg_t dump_qdisc_nlmsg,
- void *cookie)
-{
- struct nlattr *tb[TCA_MAX + 1], *attr;
- struct tcmsg *t = NLMSG_DATA(nlh);
- int len;
-
- len = nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*t));
- attr = (struct nlattr *) ((void *) t + NLMSG_ALIGN(sizeof(*t)));
- if (libbpf_nla_parse(tb, TCA_MAX, attr, len, NULL) != 0)
- return -LIBBPF_ERRNO__NLPARSE;
-
- return dump_qdisc_nlmsg(cookie, t, tb);
-}
-
-int libbpf_nl_get_qdisc(int sock, unsigned int nl_pid, int ifindex,
- libbpf_dump_nlmsg_t dump_qdisc_nlmsg, void *cookie)
-{
- struct {
- struct nlmsghdr nlh;
- struct tcmsg t;
- } req = {
- .nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)),
- .nlh.nlmsg_type = RTM_GETQDISC,
- .nlh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST,
- .t.tcm_family = AF_UNSPEC,
- .t.tcm_ifindex = ifindex,
- };
- int seq = time(NULL);
-
- req.nlh.nlmsg_seq = seq;
- if (send(sock, &req, req.nlh.nlmsg_len, 0) < 0)
- return -errno;
-
- return bpf_netlink_recv(sock, nl_pid, seq, __dump_qdisc_nlmsg,
- dump_qdisc_nlmsg, cookie);
-}
-
-static int __dump_filter_nlmsg(struct nlmsghdr *nlh,
- libbpf_dump_nlmsg_t dump_filter_nlmsg,
- void *cookie)
-{
- struct nlattr *tb[TCA_MAX + 1], *attr;
- struct tcmsg *t = NLMSG_DATA(nlh);
- int len;
-
- len = nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*t));
- attr = (struct nlattr *) ((void *) t + NLMSG_ALIGN(sizeof(*t)));
- if (libbpf_nla_parse(tb, TCA_MAX, attr, len, NULL) != 0)
- return -LIBBPF_ERRNO__NLPARSE;
-
- return dump_filter_nlmsg(cookie, t, tb);
-}
-
-int libbpf_nl_get_filter(int sock, unsigned int nl_pid, int ifindex, int handle,
- libbpf_dump_nlmsg_t dump_filter_nlmsg, void *cookie)
-{
- struct {
- struct nlmsghdr nlh;
- struct tcmsg t;
- } req = {
- .nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)),
- .nlh.nlmsg_type = RTM_GETTFILTER,
- .nlh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST,
- .t.tcm_family = AF_UNSPEC,
- .t.tcm_ifindex = ifindex,
- .t.tcm_parent = handle,
- };
- int seq = time(NULL);
-
- req.nlh.nlmsg_seq = seq;
- if (send(sock, &req, req.nlh.nlmsg_len, 0) < 0)
- return -errno;
-
- return bpf_netlink_recv(sock, nl_pid, seq, __dump_filter_nlmsg,
- dump_filter_nlmsg, cookie);
-}
diff --git a/tools/lib/bpf/nlattr.c b/tools/lib/bpf/nlattr.c
index 0ad41df..b607fa9 100644
--- a/tools/lib/bpf/nlattr.c
+++ b/tools/lib/bpf/nlattr.c
@@ -7,14 +7,11 @@
*/
#include <errno.h>
-#include "nlattr.h"
-#include "libbpf_internal.h"
-#include <linux/rtnetlink.h>
#include <string.h>
#include <stdio.h>
-
-/* make sure libbpf doesn't use kernel-only integer typedefs */
-#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64
+#include <linux/rtnetlink.h>
+#include "nlattr.h"
+#include "libbpf_internal.h"
static uint16_t nla_attr_minlen[LIBBPF_NLA_TYPE_MAX+1] = {
[LIBBPF_NLA_U8] = sizeof(uint8_t),
diff --git a/tools/lib/bpf/ringbuf.c b/tools/lib/bpf/ringbuf.c
index 4fc6c6c..5c6522c 100644
--- a/tools/lib/bpf/ringbuf.c
+++ b/tools/lib/bpf/ringbuf.c
@@ -16,15 +16,11 @@
#include <asm/barrier.h>
#include <sys/mman.h>
#include <sys/epoll.h>
-#include <tools/libc_compat.h>
#include "libbpf.h"
#include "libbpf_internal.h"
#include "bpf.h"
-/* make sure libbpf doesn't use kernel-only integer typedefs */
-#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64
-
struct ring {
ring_buffer_sample_fn sample_cb;
void *ctx;
@@ -82,12 +78,12 @@ int ring_buffer__add(struct ring_buffer *rb, int map_fd,
return -EINVAL;
}
- tmp = reallocarray(rb->rings, rb->ring_cnt + 1, sizeof(*rb->rings));
+ tmp = libbpf_reallocarray(rb->rings, rb->ring_cnt + 1, sizeof(*rb->rings));
if (!tmp)
return -ENOMEM;
rb->rings = tmp;
- tmp = reallocarray(rb->events, rb->ring_cnt + 1, sizeof(*rb->events));
+ tmp = libbpf_reallocarray(rb->events, rb->ring_cnt + 1, sizeof(*rb->events));
if (!tmp)
return -ENOMEM;
rb->events = tmp;
diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c
index f7f4efb..49c3245 100644
--- a/tools/lib/bpf/xsk.c
+++ b/tools/lib/bpf/xsk.c
@@ -20,6 +20,7 @@
#include <linux/if_ether.h>
#include <linux/if_packet.h>
#include <linux/if_xdp.h>
+#include <linux/list.h>
#include <linux/sockios.h>
#include <net/if.h>
#include <sys/ioctl.h>
@@ -32,9 +33,6 @@
#include "libbpf_internal.h"
#include "xsk.h"
-/* make sure libbpf doesn't use kernel-only integer typedefs */
-#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64
-
#ifndef SOL_XDP
#define SOL_XDP 283
#endif
@@ -48,26 +46,35 @@
#endif
struct xsk_umem {
- struct xsk_ring_prod *fill;
- struct xsk_ring_cons *comp;
+ struct xsk_ring_prod *fill_save;
+ struct xsk_ring_cons *comp_save;
char *umem_area;
struct xsk_umem_config config;
int fd;
int refcount;
+ struct list_head ctx_list;
+};
+
+struct xsk_ctx {
+ struct xsk_ring_prod *fill;
+ struct xsk_ring_cons *comp;
+ __u32 queue_id;
+ struct xsk_umem *umem;
+ int refcount;
+ int ifindex;
+ struct list_head list;
+ int prog_fd;
+ int xsks_map_fd;
+ char ifname[IFNAMSIZ];
};
struct xsk_socket {
struct xsk_ring_cons *rx;
struct xsk_ring_prod *tx;
__u64 outstanding_tx;
- struct xsk_umem *umem;
+ struct xsk_ctx *ctx;
struct xsk_socket_config config;
int fd;
- int ifindex;
- int prog_fd;
- int xsks_map_fd;
- __u32 queue_id;
- char ifname[IFNAMSIZ];
};
struct xsk_nl_info {
@@ -203,15 +210,73 @@ static int xsk_get_mmap_offsets(int fd, struct xdp_mmap_offsets *off)
return -EINVAL;
}
+static int xsk_create_umem_rings(struct xsk_umem *umem, int fd,
+ struct xsk_ring_prod *fill,
+ struct xsk_ring_cons *comp)
+{
+ struct xdp_mmap_offsets off;
+ void *map;
+ int err;
+
+ err = setsockopt(fd, SOL_XDP, XDP_UMEM_FILL_RING,
+ &umem->config.fill_size,
+ sizeof(umem->config.fill_size));
+ if (err)
+ return -errno;
+
+ err = setsockopt(fd, SOL_XDP, XDP_UMEM_COMPLETION_RING,
+ &umem->config.comp_size,
+ sizeof(umem->config.comp_size));
+ if (err)
+ return -errno;
+
+ err = xsk_get_mmap_offsets(fd, &off);
+ if (err)
+ return -errno;
+
+ map = mmap(NULL, off.fr.desc + umem->config.fill_size * sizeof(__u64),
+ PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd,
+ XDP_UMEM_PGOFF_FILL_RING);
+ if (map == MAP_FAILED)
+ return -errno;
+
+ fill->mask = umem->config.fill_size - 1;
+ fill->size = umem->config.fill_size;
+ fill->producer = map + off.fr.producer;
+ fill->consumer = map + off.fr.consumer;
+ fill->flags = map + off.fr.flags;
+ fill->ring = map + off.fr.desc;
+ fill->cached_cons = umem->config.fill_size;
+
+ map = mmap(NULL, off.cr.desc + umem->config.comp_size * sizeof(__u64),
+ PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd,
+ XDP_UMEM_PGOFF_COMPLETION_RING);
+ if (map == MAP_FAILED) {
+ err = -errno;
+ goto out_mmap;
+ }
+
+ comp->mask = umem->config.comp_size - 1;
+ comp->size = umem->config.comp_size;
+ comp->producer = map + off.cr.producer;
+ comp->consumer = map + off.cr.consumer;
+ comp->flags = map + off.cr.flags;
+ comp->ring = map + off.cr.desc;
+
+ return 0;
+
+out_mmap:
+ munmap(map, off.fr.desc + umem->config.fill_size * sizeof(__u64));
+ return err;
+}
+
int xsk_umem__create_v0_0_4(struct xsk_umem **umem_ptr, void *umem_area,
__u64 size, struct xsk_ring_prod *fill,
struct xsk_ring_cons *comp,
const struct xsk_umem_config *usr_config)
{
- struct xdp_mmap_offsets off;
struct xdp_umem_reg mr;
struct xsk_umem *umem;
- void *map;
int err;
if (!umem_area || !umem_ptr || !fill || !comp)
@@ -230,6 +295,7 @@ int xsk_umem__create_v0_0_4(struct xsk_umem **umem_ptr, void *umem_area,
}
umem->umem_area = umem_area;
+ INIT_LIST_HEAD(&umem->ctx_list);
xsk_set_umem_config(&umem->config, usr_config);
memset(&mr, 0, sizeof(mr));
@@ -244,71 +310,16 @@ int xsk_umem__create_v0_0_4(struct xsk_umem **umem_ptr, void *umem_area,
err = -errno;
goto out_socket;
}
- err = setsockopt(umem->fd, SOL_XDP, XDP_UMEM_FILL_RING,
- &umem->config.fill_size,
- sizeof(umem->config.fill_size));
- if (err) {
- err = -errno;
+
+ err = xsk_create_umem_rings(umem, umem->fd, fill, comp);
+ if (err)
goto out_socket;
- }
- err = setsockopt(umem->fd, SOL_XDP, XDP_UMEM_COMPLETION_RING,
- &umem->config.comp_size,
- sizeof(umem->config.comp_size));
- if (err) {
- err = -errno;
- goto out_socket;
- }
- err = xsk_get_mmap_offsets(umem->fd, &off);
- if (err) {
- err = -errno;
- goto out_socket;
- }
-
- map = mmap(NULL, off.fr.desc + umem->config.fill_size * sizeof(__u64),
- PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, umem->fd,
- XDP_UMEM_PGOFF_FILL_RING);
- if (map == MAP_FAILED) {
- err = -errno;
- goto out_socket;
- }
-
- umem->fill = fill;
- fill->mask = umem->config.fill_size - 1;
- fill->size = umem->config.fill_size;
- fill->producer = map + off.fr.producer;
- fill->consumer = map + off.fr.consumer;
- fill->flags = map + off.fr.flags;
- fill->ring = map + off.fr.desc;
- fill->cached_prod = *fill->producer;
- /* cached_cons is "size" bigger than the real consumer pointer
- * See xsk_prod_nb_free
- */
- fill->cached_cons = *fill->consumer + umem->config.fill_size;
-
- map = mmap(NULL, off.cr.desc + umem->config.comp_size * sizeof(__u64),
- PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, umem->fd,
- XDP_UMEM_PGOFF_COMPLETION_RING);
- if (map == MAP_FAILED) {
- err = -errno;
- goto out_mmap;
- }
-
- umem->comp = comp;
- comp->mask = umem->config.comp_size - 1;
- comp->size = umem->config.comp_size;
- comp->producer = map + off.cr.producer;
- comp->consumer = map + off.cr.consumer;
- comp->flags = map + off.cr.flags;
- comp->ring = map + off.cr.desc;
- comp->cached_prod = *comp->producer;
- comp->cached_cons = *comp->consumer;
-
+ umem->fill_save = fill;
+ umem->comp_save = comp;
*umem_ptr = umem;
return 0;
-out_mmap:
- munmap(map, off.fr.desc + umem->config.fill_size * sizeof(__u64));
out_socket:
close(umem->fd);
out_umem_alloc:
@@ -342,6 +353,7 @@ DEFAULT_VERSION(xsk_umem__create_v0_0_4, xsk_umem__create, LIBBPF_0.0.4)
static int xsk_load_xdp_prog(struct xsk_socket *xsk)
{
static const int log_buf_size = 16 * 1024;
+ struct xsk_ctx *ctx = xsk->ctx;
char log_buf[log_buf_size];
int err, prog_fd;
@@ -369,7 +381,7 @@ static int xsk_load_xdp_prog(struct xsk_socket *xsk)
/* *(u32 *)(r10 - 4) = r2 */
BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_2, -4),
/* r1 = xskmap[] */
- BPF_LD_MAP_FD(BPF_REG_1, xsk->xsks_map_fd),
+ BPF_LD_MAP_FD(BPF_REG_1, ctx->xsks_map_fd),
/* r3 = XDP_PASS */
BPF_MOV64_IMM(BPF_REG_3, 2),
/* call bpf_redirect_map */
@@ -381,7 +393,7 @@ static int xsk_load_xdp_prog(struct xsk_socket *xsk)
/* r2 += -4 */
BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
/* r1 = xskmap[] */
- BPF_LD_MAP_FD(BPF_REG_1, xsk->xsks_map_fd),
+ BPF_LD_MAP_FD(BPF_REG_1, ctx->xsks_map_fd),
/* call bpf_map_lookup_elem */
BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
/* r1 = r0 */
@@ -393,7 +405,7 @@ static int xsk_load_xdp_prog(struct xsk_socket *xsk)
/* r2 = *(u32 *)(r10 - 4) */
BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_10, -4),
/* r1 = xskmap[] */
- BPF_LD_MAP_FD(BPF_REG_1, xsk->xsks_map_fd),
+ BPF_LD_MAP_FD(BPF_REG_1, ctx->xsks_map_fd),
/* r3 = 0 */
BPF_MOV64_IMM(BPF_REG_3, 0),
/* call bpf_redirect_map */
@@ -411,19 +423,21 @@ static int xsk_load_xdp_prog(struct xsk_socket *xsk)
return prog_fd;
}
- err = bpf_set_link_xdp_fd(xsk->ifindex, prog_fd, xsk->config.xdp_flags);
+ err = bpf_set_link_xdp_fd(xsk->ctx->ifindex, prog_fd,
+ xsk->config.xdp_flags);
if (err) {
close(prog_fd);
return err;
}
- xsk->prog_fd = prog_fd;
+ ctx->prog_fd = prog_fd;
return 0;
}
static int xsk_get_max_queues(struct xsk_socket *xsk)
{
struct ethtool_channels channels = { .cmd = ETHTOOL_GCHANNELS };
+ struct xsk_ctx *ctx = xsk->ctx;
struct ifreq ifr = {};
int fd, err, ret;
@@ -432,7 +446,7 @@ static int xsk_get_max_queues(struct xsk_socket *xsk)
return -errno;
ifr.ifr_data = (void *)&channels;
- memcpy(ifr.ifr_name, xsk->ifname, IFNAMSIZ - 1);
+ memcpy(ifr.ifr_name, ctx->ifname, IFNAMSIZ - 1);
ifr.ifr_name[IFNAMSIZ - 1] = '\0';
err = ioctl(fd, SIOCETHTOOL, &ifr);
if (err && errno != EOPNOTSUPP) {
@@ -460,6 +474,7 @@ static int xsk_get_max_queues(struct xsk_socket *xsk)
static int xsk_create_bpf_maps(struct xsk_socket *xsk)
{
+ struct xsk_ctx *ctx = xsk->ctx;
int max_queues;
int fd;
@@ -472,15 +487,17 @@ static int xsk_create_bpf_maps(struct xsk_socket *xsk)
if (fd < 0)
return fd;
- xsk->xsks_map_fd = fd;
+ ctx->xsks_map_fd = fd;
return 0;
}
static void xsk_delete_bpf_maps(struct xsk_socket *xsk)
{
- bpf_map_delete_elem(xsk->xsks_map_fd, &xsk->queue_id);
- close(xsk->xsks_map_fd);
+ struct xsk_ctx *ctx = xsk->ctx;
+
+ bpf_map_delete_elem(ctx->xsks_map_fd, &ctx->queue_id);
+ close(ctx->xsks_map_fd);
}
static int xsk_lookup_bpf_maps(struct xsk_socket *xsk)
@@ -488,10 +505,11 @@ static int xsk_lookup_bpf_maps(struct xsk_socket *xsk)
__u32 i, *map_ids, num_maps, prog_len = sizeof(struct bpf_prog_info);
__u32 map_len = sizeof(struct bpf_map_info);
struct bpf_prog_info prog_info = {};
+ struct xsk_ctx *ctx = xsk->ctx;
struct bpf_map_info map_info;
int fd, err;
- err = bpf_obj_get_info_by_fd(xsk->prog_fd, &prog_info, &prog_len);
+ err = bpf_obj_get_info_by_fd(ctx->prog_fd, &prog_info, &prog_len);
if (err)
return err;
@@ -505,11 +523,11 @@ static int xsk_lookup_bpf_maps(struct xsk_socket *xsk)
prog_info.nr_map_ids = num_maps;
prog_info.map_ids = (__u64)(unsigned long)map_ids;
- err = bpf_obj_get_info_by_fd(xsk->prog_fd, &prog_info, &prog_len);
+ err = bpf_obj_get_info_by_fd(ctx->prog_fd, &prog_info, &prog_len);
if (err)
goto out_map_ids;
- xsk->xsks_map_fd = -1;
+ ctx->xsks_map_fd = -1;
for (i = 0; i < prog_info.nr_map_ids; i++) {
fd = bpf_map_get_fd_by_id(map_ids[i]);
@@ -523,7 +541,7 @@ static int xsk_lookup_bpf_maps(struct xsk_socket *xsk)
}
if (!strcmp(map_info.name, "xsks_map")) {
- xsk->xsks_map_fd = fd;
+ ctx->xsks_map_fd = fd;
continue;
}
@@ -531,7 +549,7 @@ static int xsk_lookup_bpf_maps(struct xsk_socket *xsk)
}
err = 0;
- if (xsk->xsks_map_fd == -1)
+ if (ctx->xsks_map_fd == -1)
err = -ENOENT;
out_map_ids:
@@ -541,16 +559,19 @@ static int xsk_lookup_bpf_maps(struct xsk_socket *xsk)
static int xsk_set_bpf_maps(struct xsk_socket *xsk)
{
- return bpf_map_update_elem(xsk->xsks_map_fd, &xsk->queue_id,
+ struct xsk_ctx *ctx = xsk->ctx;
+
+ return bpf_map_update_elem(ctx->xsks_map_fd, &ctx->queue_id,
&xsk->fd, 0);
}
static int xsk_setup_xdp_prog(struct xsk_socket *xsk)
{
+ struct xsk_ctx *ctx = xsk->ctx;
__u32 prog_id = 0;
int err;
- err = bpf_get_link_xdp_id(xsk->ifindex, &prog_id,
+ err = bpf_get_link_xdp_id(ctx->ifindex, &prog_id,
xsk->config.xdp_flags);
if (err)
return err;
@@ -566,12 +587,12 @@ static int xsk_setup_xdp_prog(struct xsk_socket *xsk)
return err;
}
} else {
- xsk->prog_fd = bpf_prog_get_fd_by_id(prog_id);
- if (xsk->prog_fd < 0)
+ ctx->prog_fd = bpf_prog_get_fd_by_id(prog_id);
+ if (ctx->prog_fd < 0)
return -errno;
err = xsk_lookup_bpf_maps(xsk);
if (err) {
- close(xsk->prog_fd);
+ close(ctx->prog_fd);
return err;
}
}
@@ -580,25 +601,110 @@ static int xsk_setup_xdp_prog(struct xsk_socket *xsk)
err = xsk_set_bpf_maps(xsk);
if (err) {
xsk_delete_bpf_maps(xsk);
- close(xsk->prog_fd);
+ close(ctx->prog_fd);
return err;
}
return 0;
}
-int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
- __u32 queue_id, struct xsk_umem *umem,
- struct xsk_ring_cons *rx, struct xsk_ring_prod *tx,
- const struct xsk_socket_config *usr_config)
+static struct xsk_ctx *xsk_get_ctx(struct xsk_umem *umem, int ifindex,
+ __u32 queue_id)
+{
+ struct xsk_ctx *ctx;
+
+ if (list_empty(&umem->ctx_list))
+ return NULL;
+
+ list_for_each_entry(ctx, &umem->ctx_list, list) {
+ if (ctx->ifindex == ifindex && ctx->queue_id == queue_id) {
+ ctx->refcount++;
+ return ctx;
+ }
+ }
+
+ return NULL;
+}
+
+static void xsk_put_ctx(struct xsk_ctx *ctx)
+{
+ struct xsk_umem *umem = ctx->umem;
+ struct xdp_mmap_offsets off;
+ int err;
+
+ if (--ctx->refcount == 0) {
+ err = xsk_get_mmap_offsets(umem->fd, &off);
+ if (!err) {
+ munmap(ctx->fill->ring - off.fr.desc,
+ off.fr.desc + umem->config.fill_size *
+ sizeof(__u64));
+ munmap(ctx->comp->ring - off.cr.desc,
+ off.cr.desc + umem->config.comp_size *
+ sizeof(__u64));
+ }
+
+ list_del(&ctx->list);
+ free(ctx);
+ }
+}
+
+static struct xsk_ctx *xsk_create_ctx(struct xsk_socket *xsk,
+ struct xsk_umem *umem, int ifindex,
+ const char *ifname, __u32 queue_id,
+ struct xsk_ring_prod *fill,
+ struct xsk_ring_cons *comp)
+{
+ struct xsk_ctx *ctx;
+ int err;
+
+ ctx = calloc(1, sizeof(*ctx));
+ if (!ctx)
+ return NULL;
+
+ if (!umem->fill_save) {
+ err = xsk_create_umem_rings(umem, xsk->fd, fill, comp);
+ if (err) {
+ free(ctx);
+ return NULL;
+ }
+ } else if (umem->fill_save != fill || umem->comp_save != comp) {
+ /* Copy over rings to new structs. */
+ memcpy(fill, umem->fill_save, sizeof(*fill));
+ memcpy(comp, umem->comp_save, sizeof(*comp));
+ }
+
+ ctx->ifindex = ifindex;
+ ctx->refcount = 1;
+ ctx->umem = umem;
+ ctx->queue_id = queue_id;
+ memcpy(ctx->ifname, ifname, IFNAMSIZ - 1);
+ ctx->ifname[IFNAMSIZ - 1] = '\0';
+
+ umem->fill_save = NULL;
+ umem->comp_save = NULL;
+ ctx->fill = fill;
+ ctx->comp = comp;
+ list_add(&ctx->list, &umem->ctx_list);
+ return ctx;
+}
+
+int xsk_socket__create_shared(struct xsk_socket **xsk_ptr,
+ const char *ifname,
+ __u32 queue_id, struct xsk_umem *umem,
+ struct xsk_ring_cons *rx,
+ struct xsk_ring_prod *tx,
+ struct xsk_ring_prod *fill,
+ struct xsk_ring_cons *comp,
+ const struct xsk_socket_config *usr_config)
{
void *rx_map = NULL, *tx_map = NULL;
struct sockaddr_xdp sxdp = {};
struct xdp_mmap_offsets off;
struct xsk_socket *xsk;
- int err;
+ struct xsk_ctx *ctx;
+ int err, ifindex;
- if (!umem || !xsk_ptr || !(rx || tx))
+ if (!umem || !xsk_ptr || !(rx || tx) || !fill || !comp)
return -EFAULT;
xsk = calloc(1, sizeof(*xsk));
@@ -609,10 +715,10 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
if (err)
goto out_xsk_alloc;
- if (umem->refcount &&
- !(xsk->config.libbpf_flags & XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD)) {
- pr_warn("Error: shared umems not supported by libbpf supplied XDP program.\n");
- err = -EBUSY;
+ xsk->outstanding_tx = 0;
+ ifindex = if_nametoindex(ifname);
+ if (!ifindex) {
+ err = -errno;
goto out_xsk_alloc;
}
@@ -626,16 +732,16 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
xsk->fd = umem->fd;
}
- xsk->outstanding_tx = 0;
- xsk->queue_id = queue_id;
- xsk->umem = umem;
- xsk->ifindex = if_nametoindex(ifname);
- if (!xsk->ifindex) {
- err = -errno;
- goto out_socket;
+ ctx = xsk_get_ctx(umem, ifindex, queue_id);
+ if (!ctx) {
+ ctx = xsk_create_ctx(xsk, umem, ifindex, ifname, queue_id,
+ fill, comp);
+ if (!ctx) {
+ err = -ENOMEM;
+ goto out_socket;
+ }
}
- memcpy(xsk->ifname, ifname, IFNAMSIZ - 1);
- xsk->ifname[IFNAMSIZ - 1] = '\0';
+ xsk->ctx = ctx;
if (rx) {
err = setsockopt(xsk->fd, SOL_XDP, XDP_RX_RING,
@@ -643,7 +749,7 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
sizeof(xsk->config.rx_size));
if (err) {
err = -errno;
- goto out_socket;
+ goto out_put_ctx;
}
}
if (tx) {
@@ -652,14 +758,14 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
sizeof(xsk->config.tx_size));
if (err) {
err = -errno;
- goto out_socket;
+ goto out_put_ctx;
}
}
err = xsk_get_mmap_offsets(xsk->fd, &off);
if (err) {
err = -errno;
- goto out_socket;
+ goto out_put_ctx;
}
if (rx) {
@@ -669,7 +775,7 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
xsk->fd, XDP_PGOFF_RX_RING);
if (rx_map == MAP_FAILED) {
err = -errno;
- goto out_socket;
+ goto out_put_ctx;
}
rx->mask = xsk->config.rx_size - 1;
@@ -708,10 +814,10 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
xsk->tx = tx;
sxdp.sxdp_family = PF_XDP;
- sxdp.sxdp_ifindex = xsk->ifindex;
- sxdp.sxdp_queue_id = xsk->queue_id;
+ sxdp.sxdp_ifindex = ctx->ifindex;
+ sxdp.sxdp_queue_id = ctx->queue_id;
if (umem->refcount > 1) {
- sxdp.sxdp_flags = XDP_SHARED_UMEM;
+ sxdp.sxdp_flags |= XDP_SHARED_UMEM;
sxdp.sxdp_shared_umem_fd = umem->fd;
} else {
sxdp.sxdp_flags = xsk->config.bind_flags;
@@ -723,7 +829,7 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
goto out_mmap_tx;
}
- xsk->prog_fd = -1;
+ ctx->prog_fd = -1;
if (!(xsk->config.libbpf_flags & XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD)) {
err = xsk_setup_xdp_prog(xsk);
@@ -742,6 +848,8 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
if (rx)
munmap(rx_map, off.rx.desc +
xsk->config.rx_size * sizeof(struct xdp_desc));
+out_put_ctx:
+ xsk_put_ctx(ctx);
out_socket:
if (--umem->refcount)
close(xsk->fd);
@@ -750,25 +858,24 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
return err;
}
+int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
+ __u32 queue_id, struct xsk_umem *umem,
+ struct xsk_ring_cons *rx, struct xsk_ring_prod *tx,
+ const struct xsk_socket_config *usr_config)
+{
+ return xsk_socket__create_shared(xsk_ptr, ifname, queue_id, umem,
+ rx, tx, umem->fill_save,
+ umem->comp_save, usr_config);
+}
+
int xsk_umem__delete(struct xsk_umem *umem)
{
- struct xdp_mmap_offsets off;
- int err;
-
if (!umem)
return 0;
if (umem->refcount)
return -EBUSY;
- err = xsk_get_mmap_offsets(umem->fd, &off);
- if (!err) {
- munmap(umem->fill->ring - off.fr.desc,
- off.fr.desc + umem->config.fill_size * sizeof(__u64));
- munmap(umem->comp->ring - off.cr.desc,
- off.cr.desc + umem->config.comp_size * sizeof(__u64));
- }
-
close(umem->fd);
free(umem);
@@ -778,15 +885,16 @@ int xsk_umem__delete(struct xsk_umem *umem)
void xsk_socket__delete(struct xsk_socket *xsk)
{
size_t desc_sz = sizeof(struct xdp_desc);
+ struct xsk_ctx *ctx = xsk->ctx;
struct xdp_mmap_offsets off;
int err;
if (!xsk)
return;
- if (xsk->prog_fd != -1) {
+ if (ctx->prog_fd != -1) {
xsk_delete_bpf_maps(xsk);
- close(xsk->prog_fd);
+ close(ctx->prog_fd);
}
err = xsk_get_mmap_offsets(xsk->fd, &off);
@@ -799,14 +907,15 @@ void xsk_socket__delete(struct xsk_socket *xsk)
munmap(xsk->tx->ring - off.tx.desc,
off.tx.desc + xsk->config.tx_size * desc_sz);
}
-
}
- xsk->umem->refcount--;
+ xsk_put_ctx(ctx);
+
+ ctx->umem->refcount--;
/* Do not close an fd that also has an associated umem connected
* to it.
*/
- if (xsk->fd != xsk->umem->fd)
+ if (xsk->fd != ctx->umem->fd)
close(xsk->fd);
free(xsk);
}
diff --git a/tools/lib/bpf/xsk.h b/tools/lib/bpf/xsk.h
index 584f682..1069c46 100644
--- a/tools/lib/bpf/xsk.h
+++ b/tools/lib/bpf/xsk.h
@@ -234,6 +234,15 @@ LIBBPF_API int xsk_socket__create(struct xsk_socket **xsk,
struct xsk_ring_cons *rx,
struct xsk_ring_prod *tx,
const struct xsk_socket_config *config);
+LIBBPF_API int
+xsk_socket__create_shared(struct xsk_socket **xsk_ptr,
+ const char *ifname,
+ __u32 queue_id, struct xsk_umem *umem,
+ struct xsk_ring_cons *rx,
+ struct xsk_ring_prod *tx,
+ struct xsk_ring_prod *fill,
+ struct xsk_ring_cons *comp,
+ const struct xsk_socket_config *config);
/* Returns 0 for success and -EBUSY if the umem is still in use. */
LIBBPF_API int xsk_umem__delete(struct xsk_umem *umem);
diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config
index 190be4f..81bb099 100644
--- a/tools/perf/Makefile.config
+++ b/tools/perf/Makefile.config
@@ -483,10 +483,6 @@
EXTLIBS += -lelf
$(call detected,CONFIG_LIBELF)
- ifeq ($(feature-libelf-mmap), 1)
- CFLAGS += -DHAVE_LIBELF_MMAP_SUPPORT
- endif
-
ifeq ($(feature-libelf-getphdrnum), 1)
CFLAGS += -DHAVE_ELF_GETPHDRNUM_SUPPORT
endif
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index ff4f4c4..03e264a 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -28,7 +28,7 @@ struct option;
* libelf 0.8.x and earlier do not support ELF_C_READ_MMAP;
* for newer versions we can use mmap to reduce memory usage:
*/
-#ifdef HAVE_LIBELF_MMAP_SUPPORT
+#ifdef ELF_C_READ_MMAP
# define PERF_ELF_C_READ_MMAP ELF_C_READ_MMAP
#else
# define PERF_ELF_C_READ_MMAP ELF_C_READ
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index fc946b7..65d3d9a 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -316,7 +316,7 @@
$(TRUNNER_BPF_PROGS_DIR)/%.c \
$(TRUNNER_BPF_PROGS_DIR)/*.h \
$$(INCLUDE_DIR)/vmlinux.h \
- $$(BPFOBJ) | $(TRUNNER_OUTPUT)
+ $(wildcard $(BPFDIR)/bpf_*.h) | $(TRUNNER_OUTPUT)
$$(call $(TRUNNER_BPF_BUILD_RULE),$$<,$$@, \
$(TRUNNER_BPF_CFLAGS), \
$(TRUNNER_BPF_LDFLAGS))
diff --git a/tools/testing/selftests/bpf/README.rst b/tools/testing/selftests/bpf/README.rst
index e885d35..66acfcf 100644
--- a/tools/testing/selftests/bpf/README.rst
+++ b/tools/testing/selftests/bpf/README.rst
@@ -43,3 +43,24 @@
https://reviews.llvm.org/D78466
has been pushed to llvm 10.x release branch and will be
available in 10.0.1. The fix is available in llvm 11.0.0 trunk.
+
+BPF CO-RE-based tests and Clang version
+=======================================
+
+A set of selftests use BPF target-specific built-ins, which might require
+bleeding-edge Clang versions (Clang 12 nightly at this time).
+
+Few sub-tests of core_reloc test suit (part of test_progs test runner) require
+the following built-ins, listed with corresponding Clang diffs introducing
+them to Clang/LLVM. These sub-tests are going to be skipped if Clang is too
+old to support them, they shouldn't cause build failures or runtime test
+failures:
+
+ - __builtin_btf_type_id() ([0], [1], [2]);
+ - __builtin_preserve_type_info(), __builtin_preserve_enum_value() ([3], [4]).
+
+ [0] https://reviews.llvm.org/D74572
+ [1] https://reviews.llvm.org/D74668
+ [2] https://reviews.llvm.org/D85174
+ [3] https://reviews.llvm.org/D83878
+ [4] https://reviews.llvm.org/D83242
diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c
index 944ad472..1a42768 100644
--- a/tools/testing/selftests/bpf/bench.c
+++ b/tools/testing/selftests/bpf/bench.c
@@ -317,6 +317,7 @@ extern const struct bench bench_trig_tp;
extern const struct bench bench_trig_rawtp;
extern const struct bench bench_trig_kprobe;
extern const struct bench bench_trig_fentry;
+extern const struct bench bench_trig_fentry_sleep;
extern const struct bench bench_trig_fmodret;
extern const struct bench bench_rb_libbpf;
extern const struct bench bench_rb_custom;
@@ -338,6 +339,7 @@ static const struct bench *benchs[] = {
&bench_trig_rawtp,
&bench_trig_kprobe,
&bench_trig_fentry,
+ &bench_trig_fentry_sleep,
&bench_trig_fmodret,
&bench_rb_libbpf,
&bench_rb_custom,
diff --git a/tools/testing/selftests/bpf/benchs/bench_trigger.c b/tools/testing/selftests/bpf/benchs/bench_trigger.c
index 49c2283..2a0b6c9 100644
--- a/tools/testing/selftests/bpf/benchs/bench_trigger.c
+++ b/tools/testing/selftests/bpf/benchs/bench_trigger.c
@@ -90,6 +90,12 @@ static void trigger_fentry_setup()
attach_bpf(ctx.skel->progs.bench_trigger_fentry);
}
+static void trigger_fentry_sleep_setup()
+{
+ setup_ctx();
+ attach_bpf(ctx.skel->progs.bench_trigger_fentry_sleep);
+}
+
static void trigger_fmodret_setup()
{
setup_ctx();
@@ -155,6 +161,17 @@ const struct bench bench_trig_fentry = {
.report_final = hits_drops_report_final,
};
+const struct bench bench_trig_fentry_sleep = {
+ .name = "trig-fentry-sleep",
+ .validate = trigger_validate,
+ .setup = trigger_fentry_sleep_setup,
+ .producer_thread = trigger_producer,
+ .consumer_thread = trigger_consumer,
+ .measure = trigger_measure,
+ .report_progress = hits_drops_report_progress,
+ .report_final = hits_drops_report_final,
+};
+
const struct bench bench_trig_fmodret = {
.name = "trig-fmodret",
.validate = trigger_validate,
diff --git a/tools/testing/selftests/bpf/network_helpers.c b/tools/testing/selftests/bpf/network_helpers.c
index f566556..12ee4028 100644
--- a/tools/testing/selftests/bpf/network_helpers.c
+++ b/tools/testing/selftests/bpf/network_helpers.c
@@ -104,6 +104,43 @@ int start_server(int family, int type, const char *addr_str, __u16 port,
return -1;
}
+int fastopen_connect(int server_fd, const char *data, unsigned int data_len,
+ int timeout_ms)
+{
+ struct sockaddr_storage addr;
+ socklen_t addrlen = sizeof(addr);
+ struct sockaddr_in *addr_in;
+ int fd, ret;
+
+ if (getsockname(server_fd, (struct sockaddr *)&addr, &addrlen)) {
+ log_err("Failed to get server addr");
+ return -1;
+ }
+
+ addr_in = (struct sockaddr_in *)&addr;
+ fd = socket(addr_in->sin_family, SOCK_STREAM, 0);
+ if (fd < 0) {
+ log_err("Failed to create client socket");
+ return -1;
+ }
+
+ if (settimeo(fd, timeout_ms))
+ goto error_close;
+
+ ret = sendto(fd, data, data_len, MSG_FASTOPEN, (struct sockaddr *)&addr,
+ addrlen);
+ if (ret != data_len) {
+ log_err("sendto(data, %u) != %d\n", data_len, ret);
+ goto error_close;
+ }
+
+ return fd;
+
+error_close:
+ save_errno_close(fd);
+ return -1;
+}
+
static int connect_fd_to_addr(int fd,
const struct sockaddr_storage *addr,
socklen_t addrlen)
diff --git a/tools/testing/selftests/bpf/network_helpers.h b/tools/testing/selftests/bpf/network_helpers.h
index c3728f6..7205f8af 100644
--- a/tools/testing/selftests/bpf/network_helpers.h
+++ b/tools/testing/selftests/bpf/network_helpers.h
@@ -37,6 +37,8 @@ int start_server(int family, int type, const char *addr, __u16 port,
int timeout_ms);
int connect_to_fd(int server_fd, int timeout_ms);
int connect_fd_to_fd(int client_fd, int server_fd, int timeout_ms);
+int fastopen_connect(int server_fd, const char *data, unsigned int data_len,
+ int timeout_ms);
int make_sockaddr(int family, const char *addr_str, __u16 port,
struct sockaddr_storage *addr, socklen_t *len);
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_map_in_map.c b/tools/testing/selftests/bpf/prog_tests/btf_map_in_map.c
index 6ccecbd..540fea4 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf_map_in_map.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf_map_in_map.c
@@ -53,7 +53,7 @@ static int kern_sync_rcu(void)
return err;
}
-void test_btf_map_in_map(void)
+static void test_lookup_update(void)
{
int err, key = 0, val, i;
struct test_btf_map_in_map *skel;
@@ -143,3 +143,36 @@ void test_btf_map_in_map(void)
cleanup:
test_btf_map_in_map__destroy(skel);
}
+
+static void test_diff_size(void)
+{
+ struct test_btf_map_in_map *skel;
+ int err, inner_map_fd, zero = 0;
+
+ skel = test_btf_map_in_map__open_and_load();
+ if (CHECK(!skel, "skel_open", "failed to open&load skeleton\n"))
+ return;
+
+ inner_map_fd = bpf_map__fd(skel->maps.sockarr_sz2);
+ err = bpf_map_update_elem(bpf_map__fd(skel->maps.outer_sockarr), &zero,
+ &inner_map_fd, 0);
+ CHECK(err, "outer_sockarr inner map size check",
+ "cannot use a different size inner_map\n");
+
+ inner_map_fd = bpf_map__fd(skel->maps.inner_map_sz2);
+ err = bpf_map_update_elem(bpf_map__fd(skel->maps.outer_arr), &zero,
+ &inner_map_fd, 0);
+ CHECK(!err, "outer_arr inner map size check",
+ "incorrectly updated with a different size inner_map\n");
+
+ test_btf_map_in_map__destroy(skel);
+}
+
+void test_btf_map_in_map(void)
+{
+ if (test__start_subtest("lookup_update"))
+ test_lookup_update();
+
+ if (test__start_subtest("diff_size"))
+ test_diff_size();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/core_reloc.c b/tools/testing/selftests/bpf/prog_tests/core_reloc.c
index a54eafc..30e40ff 100644
--- a/tools/testing/selftests/bpf/prog_tests/core_reloc.c
+++ b/tools/testing/selftests/bpf/prog_tests/core_reloc.c
@@ -3,6 +3,9 @@
#include "progs/core_reloc_types.h"
#include <sys/mman.h>
#include <sys/syscall.h>
+#include <bpf/btf.h>
+
+static int duration = 0;
#define STRUCT_TO_CHAR_PTR(struct_name) (const char *)&(struct struct_name)
@@ -177,14 +180,13 @@
.fails = true, \
}
-#define EXISTENCE_CASE_COMMON(name) \
+#define FIELD_EXISTS_CASE_COMMON(name) \
.case_name = #name, \
.bpf_obj_file = "test_core_reloc_existence.o", \
- .btf_src_file = "btf__core_reloc_" #name ".o", \
- .relaxed_core_relocs = true
+ .btf_src_file = "btf__core_reloc_" #name ".o" \
-#define EXISTENCE_ERR_CASE(name) { \
- EXISTENCE_CASE_COMMON(name), \
+#define FIELD_EXISTS_ERR_CASE(name) { \
+ FIELD_EXISTS_CASE_COMMON(name), \
.fails = true, \
}
@@ -253,6 +255,61 @@
.fails = true, \
}
+#define TYPE_BASED_CASE_COMMON(name) \
+ .case_name = #name, \
+ .bpf_obj_file = "test_core_reloc_type_based.o", \
+ .btf_src_file = "btf__core_reloc_" #name ".o" \
+
+#define TYPE_BASED_CASE(name, ...) { \
+ TYPE_BASED_CASE_COMMON(name), \
+ .output = STRUCT_TO_CHAR_PTR(core_reloc_type_based_output) \
+ __VA_ARGS__, \
+ .output_len = sizeof(struct core_reloc_type_based_output), \
+}
+
+#define TYPE_BASED_ERR_CASE(name) { \
+ TYPE_BASED_CASE_COMMON(name), \
+ .fails = true, \
+}
+
+#define TYPE_ID_CASE_COMMON(name) \
+ .case_name = #name, \
+ .bpf_obj_file = "test_core_reloc_type_id.o", \
+ .btf_src_file = "btf__core_reloc_" #name ".o" \
+
+#define TYPE_ID_CASE(name, setup_fn) { \
+ TYPE_ID_CASE_COMMON(name), \
+ .output = STRUCT_TO_CHAR_PTR(core_reloc_type_id_output) {}, \
+ .output_len = sizeof(struct core_reloc_type_id_output), \
+ .setup = setup_fn, \
+}
+
+#define TYPE_ID_ERR_CASE(name) { \
+ TYPE_ID_CASE_COMMON(name), \
+ .fails = true, \
+}
+
+#define ENUMVAL_CASE_COMMON(name) \
+ .case_name = #name, \
+ .bpf_obj_file = "test_core_reloc_enumval.o", \
+ .btf_src_file = "btf__core_reloc_" #name ".o" \
+
+#define ENUMVAL_CASE(name, ...) { \
+ ENUMVAL_CASE_COMMON(name), \
+ .output = STRUCT_TO_CHAR_PTR(core_reloc_enumval_output) \
+ __VA_ARGS__, \
+ .output_len = sizeof(struct core_reloc_enumval_output), \
+}
+
+#define ENUMVAL_ERR_CASE(name) { \
+ ENUMVAL_CASE_COMMON(name), \
+ .fails = true, \
+}
+
+struct core_reloc_test_case;
+
+typedef int (*setup_test_fn)(struct core_reloc_test_case *test);
+
struct core_reloc_test_case {
const char *case_name;
const char *bpf_obj_file;
@@ -264,8 +321,136 @@ struct core_reloc_test_case {
bool fails;
bool relaxed_core_relocs;
bool direct_raw_tp;
+ setup_test_fn setup;
};
+static int find_btf_type(const struct btf *btf, const char *name, __u32 kind)
+{
+ int id;
+
+ id = btf__find_by_name_kind(btf, name, kind);
+ if (CHECK(id <= 0, "find_type_id", "failed to find '%s', kind %d: %d\n", name, kind, id))
+ return -1;
+
+ return id;
+}
+
+static int setup_type_id_case_local(struct core_reloc_test_case *test)
+{
+ struct core_reloc_type_id_output *exp = (void *)test->output;
+ struct btf *local_btf = btf__parse(test->bpf_obj_file, NULL);
+ struct btf *targ_btf = btf__parse(test->btf_src_file, NULL);
+ const struct btf_type *t;
+ const char *name;
+ int i;
+
+ if (CHECK(IS_ERR(local_btf), "local_btf", "failed: %ld\n", PTR_ERR(local_btf)) ||
+ CHECK(IS_ERR(targ_btf), "targ_btf", "failed: %ld\n", PTR_ERR(targ_btf))) {
+ btf__free(local_btf);
+ btf__free(targ_btf);
+ return -EINVAL;
+ }
+
+ exp->local_anon_struct = -1;
+ exp->local_anon_union = -1;
+ exp->local_anon_enum = -1;
+ exp->local_anon_func_proto_ptr = -1;
+ exp->local_anon_void_ptr = -1;
+ exp->local_anon_arr = -1;
+
+ for (i = 1; i <= btf__get_nr_types(local_btf); i++)
+ {
+ t = btf__type_by_id(local_btf, i);
+ /* we are interested only in anonymous types */
+ if (t->name_off)
+ continue;
+
+ if (btf_is_struct(t) && btf_vlen(t) &&
+ (name = btf__name_by_offset(local_btf, btf_members(t)[0].name_off)) &&
+ strcmp(name, "marker_field") == 0) {
+ exp->local_anon_struct = i;
+ } else if (btf_is_union(t) && btf_vlen(t) &&
+ (name = btf__name_by_offset(local_btf, btf_members(t)[0].name_off)) &&
+ strcmp(name, "marker_field") == 0) {
+ exp->local_anon_union = i;
+ } else if (btf_is_enum(t) && btf_vlen(t) &&
+ (name = btf__name_by_offset(local_btf, btf_enum(t)[0].name_off)) &&
+ strcmp(name, "MARKER_ENUM_VAL") == 0) {
+ exp->local_anon_enum = i;
+ } else if (btf_is_ptr(t) && (t = btf__type_by_id(local_btf, t->type))) {
+ if (btf_is_func_proto(t) && (t = btf__type_by_id(local_btf, t->type)) &&
+ btf_is_int(t) && (name = btf__name_by_offset(local_btf, t->name_off)) &&
+ strcmp(name, "_Bool") == 0) {
+ /* ptr -> func_proto -> _Bool */
+ exp->local_anon_func_proto_ptr = i;
+ } else if (btf_is_void(t)) {
+ /* ptr -> void */
+ exp->local_anon_void_ptr = i;
+ }
+ } else if (btf_is_array(t) && (t = btf__type_by_id(local_btf, btf_array(t)->type)) &&
+ btf_is_int(t) && (name = btf__name_by_offset(local_btf, t->name_off)) &&
+ strcmp(name, "_Bool") == 0) {
+ /* _Bool[] */
+ exp->local_anon_arr = i;
+ }
+ }
+
+ exp->local_struct = find_btf_type(local_btf, "a_struct", BTF_KIND_STRUCT);
+ exp->local_union = find_btf_type(local_btf, "a_union", BTF_KIND_UNION);
+ exp->local_enum = find_btf_type(local_btf, "an_enum", BTF_KIND_ENUM);
+ exp->local_int = find_btf_type(local_btf, "int", BTF_KIND_INT);
+ exp->local_struct_typedef = find_btf_type(local_btf, "named_struct_typedef", BTF_KIND_TYPEDEF);
+ exp->local_func_proto_typedef = find_btf_type(local_btf, "func_proto_typedef", BTF_KIND_TYPEDEF);
+ exp->local_arr_typedef = find_btf_type(local_btf, "arr_typedef", BTF_KIND_TYPEDEF);
+
+ btf__free(local_btf);
+ btf__free(targ_btf);
+ return 0;
+}
+
+static int setup_type_id_case_success(struct core_reloc_test_case *test) {
+ struct core_reloc_type_id_output *exp = (void *)test->output;
+ struct btf *targ_btf = btf__parse(test->btf_src_file, NULL);
+ int err;
+
+ err = setup_type_id_case_local(test);
+ if (err)
+ return err;
+
+ targ_btf = btf__parse(test->btf_src_file, NULL);
+
+ exp->targ_struct = find_btf_type(targ_btf, "a_struct", BTF_KIND_STRUCT);
+ exp->targ_union = find_btf_type(targ_btf, "a_union", BTF_KIND_UNION);
+ exp->targ_enum = find_btf_type(targ_btf, "an_enum", BTF_KIND_ENUM);
+ exp->targ_int = find_btf_type(targ_btf, "int", BTF_KIND_INT);
+ exp->targ_struct_typedef = find_btf_type(targ_btf, "named_struct_typedef", BTF_KIND_TYPEDEF);
+ exp->targ_func_proto_typedef = find_btf_type(targ_btf, "func_proto_typedef", BTF_KIND_TYPEDEF);
+ exp->targ_arr_typedef = find_btf_type(targ_btf, "arr_typedef", BTF_KIND_TYPEDEF);
+
+ btf__free(targ_btf);
+ return 0;
+}
+
+static int setup_type_id_case_failure(struct core_reloc_test_case *test)
+{
+ struct core_reloc_type_id_output *exp = (void *)test->output;
+ int err;
+
+ err = setup_type_id_case_local(test);
+ if (err)
+ return err;
+
+ exp->targ_struct = 0;
+ exp->targ_union = 0;
+ exp->targ_enum = 0;
+ exp->targ_int = 0;
+ exp->targ_struct_typedef = 0;
+ exp->targ_func_proto_typedef = 0;
+ exp->targ_arr_typedef = 0;
+
+ return 0;
+}
+
static struct core_reloc_test_case test_cases[] = {
/* validate we can find kernel image and use its BTF for relocs */
{
@@ -364,7 +549,7 @@ static struct core_reloc_test_case test_cases[] = {
/* validate field existence checks */
{
- EXISTENCE_CASE_COMMON(existence),
+ FIELD_EXISTS_CASE_COMMON(existence),
.input = STRUCT_TO_CHAR_PTR(core_reloc_existence) {
.a = 1,
.b = 2,
@@ -388,7 +573,7 @@ static struct core_reloc_test_case test_cases[] = {
.output_len = sizeof(struct core_reloc_existence_output),
},
{
- EXISTENCE_CASE_COMMON(existence___minimal),
+ FIELD_EXISTS_CASE_COMMON(existence___minimal),
.input = STRUCT_TO_CHAR_PTR(core_reloc_existence___minimal) {
.a = 42,
},
@@ -408,12 +593,12 @@ static struct core_reloc_test_case test_cases[] = {
.output_len = sizeof(struct core_reloc_existence_output),
},
- EXISTENCE_ERR_CASE(existence__err_int_sz),
- EXISTENCE_ERR_CASE(existence__err_int_type),
- EXISTENCE_ERR_CASE(existence__err_int_kind),
- EXISTENCE_ERR_CASE(existence__err_arr_kind),
- EXISTENCE_ERR_CASE(existence__err_arr_value_type),
- EXISTENCE_ERR_CASE(existence__err_struct_type),
+ FIELD_EXISTS_ERR_CASE(existence__err_int_sz),
+ FIELD_EXISTS_ERR_CASE(existence__err_int_type),
+ FIELD_EXISTS_ERR_CASE(existence__err_int_kind),
+ FIELD_EXISTS_ERR_CASE(existence__err_arr_kind),
+ FIELD_EXISTS_ERR_CASE(existence__err_arr_value_type),
+ FIELD_EXISTS_ERR_CASE(existence__err_struct_type),
/* bitfield relocation checks */
BITFIELDS_CASE(bitfields, {
@@ -452,11 +637,117 @@ static struct core_reloc_test_case test_cases[] = {
/* size relocation checks */
SIZE_CASE(size),
SIZE_CASE(size___diff_sz),
+ SIZE_ERR_CASE(size___err_ambiguous),
+
+ /* validate type existence and size relocations */
+ TYPE_BASED_CASE(type_based, {
+ .struct_exists = 1,
+ .union_exists = 1,
+ .enum_exists = 1,
+ .typedef_named_struct_exists = 1,
+ .typedef_anon_struct_exists = 1,
+ .typedef_struct_ptr_exists = 1,
+ .typedef_int_exists = 1,
+ .typedef_enum_exists = 1,
+ .typedef_void_ptr_exists = 1,
+ .typedef_func_proto_exists = 1,
+ .typedef_arr_exists = 1,
+ .struct_sz = sizeof(struct a_struct),
+ .union_sz = sizeof(union a_union),
+ .enum_sz = sizeof(enum an_enum),
+ .typedef_named_struct_sz = sizeof(named_struct_typedef),
+ .typedef_anon_struct_sz = sizeof(anon_struct_typedef),
+ .typedef_struct_ptr_sz = sizeof(struct_ptr_typedef),
+ .typedef_int_sz = sizeof(int_typedef),
+ .typedef_enum_sz = sizeof(enum_typedef),
+ .typedef_void_ptr_sz = sizeof(void_ptr_typedef),
+ .typedef_func_proto_sz = sizeof(func_proto_typedef),
+ .typedef_arr_sz = sizeof(arr_typedef),
+ }),
+ TYPE_BASED_CASE(type_based___all_missing, {
+ /* all zeros */
+ }),
+ TYPE_BASED_CASE(type_based___diff_sz, {
+ .struct_exists = 1,
+ .union_exists = 1,
+ .enum_exists = 1,
+ .typedef_named_struct_exists = 1,
+ .typedef_anon_struct_exists = 1,
+ .typedef_struct_ptr_exists = 1,
+ .typedef_int_exists = 1,
+ .typedef_enum_exists = 1,
+ .typedef_void_ptr_exists = 1,
+ .typedef_func_proto_exists = 1,
+ .typedef_arr_exists = 1,
+ .struct_sz = sizeof(struct a_struct___diff_sz),
+ .union_sz = sizeof(union a_union___diff_sz),
+ .enum_sz = sizeof(enum an_enum___diff_sz),
+ .typedef_named_struct_sz = sizeof(named_struct_typedef___diff_sz),
+ .typedef_anon_struct_sz = sizeof(anon_struct_typedef___diff_sz),
+ .typedef_struct_ptr_sz = sizeof(struct_ptr_typedef___diff_sz),
+ .typedef_int_sz = sizeof(int_typedef___diff_sz),
+ .typedef_enum_sz = sizeof(enum_typedef___diff_sz),
+ .typedef_void_ptr_sz = sizeof(void_ptr_typedef___diff_sz),
+ .typedef_func_proto_sz = sizeof(func_proto_typedef___diff_sz),
+ .typedef_arr_sz = sizeof(arr_typedef___diff_sz),
+ }),
+ TYPE_BASED_CASE(type_based___incompat, {
+ .enum_exists = 1,
+ .enum_sz = sizeof(enum an_enum),
+ }),
+ TYPE_BASED_CASE(type_based___fn_wrong_args, {
+ .struct_exists = 1,
+ .struct_sz = sizeof(struct a_struct),
+ }),
+
+ /* BTF_TYPE_ID_LOCAL/BTF_TYPE_ID_TARGET tests */
+ TYPE_ID_CASE(type_id, setup_type_id_case_success),
+ TYPE_ID_CASE(type_id___missing_targets, setup_type_id_case_failure),
+
+ /* Enumerator value existence and value relocations */
+ ENUMVAL_CASE(enumval, {
+ .named_val1_exists = true,
+ .named_val2_exists = true,
+ .named_val3_exists = true,
+ .anon_val1_exists = true,
+ .anon_val2_exists = true,
+ .anon_val3_exists = true,
+ .named_val1 = 1,
+ .named_val2 = 2,
+ .anon_val1 = 0x10,
+ .anon_val2 = 0x20,
+ }),
+ ENUMVAL_CASE(enumval___diff, {
+ .named_val1_exists = true,
+ .named_val2_exists = true,
+ .named_val3_exists = true,
+ .anon_val1_exists = true,
+ .anon_val2_exists = true,
+ .anon_val3_exists = true,
+ .named_val1 = 101,
+ .named_val2 = 202,
+ .anon_val1 = 0x11,
+ .anon_val2 = 0x22,
+ }),
+ ENUMVAL_CASE(enumval___val3_missing, {
+ .named_val1_exists = true,
+ .named_val2_exists = true,
+ .named_val3_exists = false,
+ .anon_val1_exists = true,
+ .anon_val2_exists = true,
+ .anon_val3_exists = false,
+ .named_val1 = 111,
+ .named_val2 = 222,
+ .anon_val1 = 0x111,
+ .anon_val2 = 0x222,
+ }),
+ ENUMVAL_ERR_CASE(enumval___err_missing),
};
struct data {
char in[256];
char out[256];
+ bool skip;
uint64_t my_pid_tgid;
};
@@ -472,7 +763,7 @@ void test_core_reloc(void)
struct bpf_object_load_attr load_attr = {};
struct core_reloc_test_case *test_case;
const char *tp_name, *probe_name;
- int err, duration = 0, i, equal;
+ int err, i, equal;
struct bpf_link *link = NULL;
struct bpf_map *data_map;
struct bpf_program *prog;
@@ -488,11 +779,13 @@ void test_core_reloc(void)
if (!test__start_subtest(test_case->case_name))
continue;
- DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts,
- .relaxed_core_relocs = test_case->relaxed_core_relocs,
- );
+ if (test_case->setup) {
+ err = test_case->setup(test_case);
+ if (CHECK(err, "test_setup", "test #%d setup failed: %d\n", i, err))
+ continue;
+ }
- obj = bpf_object__open_file(test_case->bpf_obj_file, &opts);
+ obj = bpf_object__open_file(test_case->bpf_obj_file, NULL);
if (CHECK(IS_ERR(obj), "obj_open", "failed to open '%s': %ld\n",
test_case->bpf_obj_file, PTR_ERR(obj)))
continue;
@@ -515,15 +808,10 @@ void test_core_reloc(void)
load_attr.log_level = 0;
load_attr.target_btf_path = test_case->btf_src_file;
err = bpf_object__load_xattr(&load_attr);
- if (test_case->fails) {
- CHECK(!err, "obj_load_fail",
- "should fail to load prog '%s'\n", probe_name);
+ if (err) {
+ if (!test_case->fails)
+ CHECK(false, "obj_load", "failed to load prog '%s': %d\n", probe_name, err);
goto cleanup;
- } else {
- if (CHECK(err, "obj_load",
- "failed to load prog '%s': %d\n",
- probe_name, err))
- goto cleanup;
}
data_map = bpf_object__find_map_by_name(obj, "test_cor.bss");
@@ -551,6 +839,16 @@ void test_core_reloc(void)
/* trigger test run */
usleep(1);
+ if (data->skip) {
+ test__skip();
+ goto cleanup;
+ }
+
+ if (test_case->fails) {
+ CHECK(false, "obj_load_fail", "should fail to load prog '%s'\n", probe_name);
+ goto cleanup;
+ }
+
equal = memcmp(data->out, test_case->output,
test_case->output_len) == 0;
if (CHECK(!equal, "check_result",
diff --git a/tools/testing/selftests/bpf/prog_tests/d_path.c b/tools/testing/selftests/bpf/prog_tests/d_path.c
new file mode 100644
index 0000000..fc12e0d
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/d_path.c
@@ -0,0 +1,147 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <test_progs.h>
+#include <sys/stat.h>
+#include <linux/sched.h>
+#include <sys/syscall.h>
+
+#define MAX_PATH_LEN 128
+#define MAX_FILES 7
+
+#include "test_d_path.skel.h"
+
+static int duration;
+
+static struct {
+ __u32 cnt;
+ char paths[MAX_FILES][MAX_PATH_LEN];
+} src;
+
+static int set_pathname(int fd, pid_t pid)
+{
+ char buf[MAX_PATH_LEN];
+
+ snprintf(buf, MAX_PATH_LEN, "/proc/%d/fd/%d", pid, fd);
+ return readlink(buf, src.paths[src.cnt++], MAX_PATH_LEN);
+}
+
+static int trigger_fstat_events(pid_t pid)
+{
+ int sockfd = -1, procfd = -1, devfd = -1;
+ int localfd = -1, indicatorfd = -1;
+ int pipefd[2] = { -1, -1 };
+ struct stat fileStat;
+ int ret = -1;
+
+ /* unmountable pseudo-filesystems */
+ if (CHECK(pipe(pipefd) < 0, "trigger", "pipe failed\n"))
+ return ret;
+ /* unmountable pseudo-filesystems */
+ sockfd = socket(AF_INET, SOCK_STREAM, 0);
+ if (CHECK(sockfd < 0, "trigger", "socket failed\n"))
+ goto out_close;
+ /* mountable pseudo-filesystems */
+ procfd = open("/proc/self/comm", O_RDONLY);
+ if (CHECK(procfd < 0, "trigger", "open /proc/self/comm failed\n"))
+ goto out_close;
+ devfd = open("/dev/urandom", O_RDONLY);
+ if (CHECK(devfd < 0, "trigger", "open /dev/urandom failed\n"))
+ goto out_close;
+ localfd = open("/tmp/d_path_loadgen.txt", O_CREAT | O_RDONLY, 0644);
+ if (CHECK(localfd < 0, "trigger", "open /tmp/d_path_loadgen.txt failed\n"))
+ goto out_close;
+ /* bpf_d_path will return path with (deleted) */
+ remove("/tmp/d_path_loadgen.txt");
+ indicatorfd = open("/tmp/", O_PATH);
+ if (CHECK(indicatorfd < 0, "trigger", "open /tmp/ failed\n"))
+ goto out_close;
+
+ ret = set_pathname(pipefd[0], pid);
+ if (CHECK(ret < 0, "trigger", "set_pathname failed for pipe[0]\n"))
+ goto out_close;
+ ret = set_pathname(pipefd[1], pid);
+ if (CHECK(ret < 0, "trigger", "set_pathname failed for pipe[1]\n"))
+ goto out_close;
+ ret = set_pathname(sockfd, pid);
+ if (CHECK(ret < 0, "trigger", "set_pathname failed for socket\n"))
+ goto out_close;
+ ret = set_pathname(procfd, pid);
+ if (CHECK(ret < 0, "trigger", "set_pathname failed for proc\n"))
+ goto out_close;
+ ret = set_pathname(devfd, pid);
+ if (CHECK(ret < 0, "trigger", "set_pathname failed for dev\n"))
+ goto out_close;
+ ret = set_pathname(localfd, pid);
+ if (CHECK(ret < 0, "trigger", "set_pathname failed for file\n"))
+ goto out_close;
+ ret = set_pathname(indicatorfd, pid);
+ if (CHECK(ret < 0, "trigger", "set_pathname failed for dir\n"))
+ goto out_close;
+
+ /* triggers vfs_getattr */
+ fstat(pipefd[0], &fileStat);
+ fstat(pipefd[1], &fileStat);
+ fstat(sockfd, &fileStat);
+ fstat(procfd, &fileStat);
+ fstat(devfd, &fileStat);
+ fstat(localfd, &fileStat);
+ fstat(indicatorfd, &fileStat);
+
+out_close:
+ /* triggers filp_close */
+ close(pipefd[0]);
+ close(pipefd[1]);
+ close(sockfd);
+ close(procfd);
+ close(devfd);
+ close(localfd);
+ close(indicatorfd);
+ return ret;
+}
+
+void test_d_path(void)
+{
+ struct test_d_path__bss *bss;
+ struct test_d_path *skel;
+ int err;
+
+ skel = test_d_path__open_and_load();
+ if (CHECK(!skel, "setup", "d_path skeleton failed\n"))
+ goto cleanup;
+
+ err = test_d_path__attach(skel);
+ if (CHECK(err, "setup", "attach failed: %d\n", err))
+ goto cleanup;
+
+ bss = skel->bss;
+ bss->my_pid = getpid();
+
+ err = trigger_fstat_events(bss->my_pid);
+ if (err < 0)
+ goto cleanup;
+
+ for (int i = 0; i < MAX_FILES; i++) {
+ CHECK(strncmp(src.paths[i], bss->paths_stat[i], MAX_PATH_LEN),
+ "check",
+ "failed to get stat path[%d]: %s vs %s\n",
+ i, src.paths[i], bss->paths_stat[i]);
+ CHECK(strncmp(src.paths[i], bss->paths_close[i], MAX_PATH_LEN),
+ "check",
+ "failed to get close path[%d]: %s vs %s\n",
+ i, src.paths[i], bss->paths_close[i]);
+ /* The d_path helper returns size plus NUL char, hence + 1 */
+ CHECK(bss->rets_stat[i] != strlen(bss->paths_stat[i]) + 1,
+ "check",
+ "failed to match stat return [%d]: %d vs %zd [%s]\n",
+ i, bss->rets_stat[i], strlen(bss->paths_stat[i]) + 1,
+ bss->paths_stat[i]);
+ CHECK(bss->rets_close[i] != strlen(bss->paths_stat[i]) + 1,
+ "check",
+ "failed to match stat return [%d]: %d vs %zd [%s]\n",
+ i, bss->rets_close[i], strlen(bss->paths_close[i]) + 1,
+ bss->paths_stat[i]);
+ }
+
+cleanup:
+ test_d_path__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c
index 197d0d2..a550dab 100644
--- a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c
+++ b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c
@@ -123,6 +123,7 @@ static void test_func_replace(void)
"freplace/get_skb_len",
"freplace/get_skb_ifindex",
"freplace/get_constant",
+ "freplace/test_pkt_write_access_subprog",
};
test_fexit_bpf2bpf_common("./fexit_bpf2bpf.o",
"./test_pkt_access.o",
@@ -141,10 +142,77 @@ static void test_func_replace_verify(void)
prog_name, false);
}
+static void test_func_sockmap_update(void)
+{
+ const char *prog_name[] = {
+ "freplace/cls_redirect",
+ };
+ test_fexit_bpf2bpf_common("./freplace_cls_redirect.o",
+ "./test_cls_redirect.o",
+ ARRAY_SIZE(prog_name),
+ prog_name, false);
+}
+
+static void test_obj_load_failure_common(const char *obj_file,
+ const char *target_obj_file)
+
+{
+ /*
+ * standalone test that asserts failure to load freplace prog
+ * because of invalid return code.
+ */
+ struct bpf_object *obj = NULL, *pkt_obj;
+ int err, pkt_fd;
+ __u32 duration = 0;
+
+ err = bpf_prog_load(target_obj_file, BPF_PROG_TYPE_UNSPEC,
+ &pkt_obj, &pkt_fd);
+ /* the target prog should load fine */
+ if (CHECK(err, "tgt_prog_load", "file %s err %d errno %d\n",
+ target_obj_file, err, errno))
+ return;
+ DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts,
+ .attach_prog_fd = pkt_fd,
+ );
+
+ obj = bpf_object__open_file(obj_file, &opts);
+ if (CHECK(IS_ERR_OR_NULL(obj), "obj_open",
+ "failed to open %s: %ld\n", obj_file,
+ PTR_ERR(obj)))
+ goto close_prog;
+
+ /* It should fail to load the program */
+ err = bpf_object__load(obj);
+ if (CHECK(!err, "bpf_obj_load should fail", "err %d\n", err))
+ goto close_prog;
+
+close_prog:
+ if (!IS_ERR_OR_NULL(obj))
+ bpf_object__close(obj);
+ bpf_object__close(pkt_obj);
+}
+
+static void test_func_replace_return_code(void)
+{
+ /* test invalid return code in the replaced program */
+ test_obj_load_failure_common("./freplace_connect_v4_prog.o",
+ "./connect4_prog.o");
+}
+
+static void test_func_map_prog_compatibility(void)
+{
+ /* test with spin lock map value in the replaced program */
+ test_obj_load_failure_common("./freplace_attach_probe.o",
+ "./test_attach_probe.o");
+}
+
void test_fexit_bpf2bpf(void)
{
test_target_no_callees();
test_target_yes_callees();
test_func_replace();
test_func_replace_verify();
+ test_func_sockmap_update();
+ test_func_replace_return_code();
+ test_func_map_prog_compatibility();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/perf_buffer.c b/tools/testing/selftests/bpf/prog_tests/perf_buffer.c
index c33ec18..ca9f089 100644
--- a/tools/testing/selftests/bpf/prog_tests/perf_buffer.c
+++ b/tools/testing/selftests/bpf/prog_tests/perf_buffer.c
@@ -7,6 +7,8 @@
#include "test_perf_buffer.skel.h"
#include "bpf/libbpf_internal.h"
+static int duration;
+
/* AddressSanitizer sometimes crashes due to data dereference below, due to
* this being mmap()'ed memory. Disable instrumentation with
* no_sanitize_address attribute
@@ -24,13 +26,31 @@ static void on_sample(void *ctx, int cpu, void *data, __u32 size)
CPU_SET(cpu, cpu_seen);
}
+int trigger_on_cpu(int cpu)
+{
+ cpu_set_t cpu_set;
+ int err;
+
+ CPU_ZERO(&cpu_set);
+ CPU_SET(cpu, &cpu_set);
+
+ err = pthread_setaffinity_np(pthread_self(), sizeof(cpu_set), &cpu_set);
+ if (err && CHECK(err, "set_affinity", "cpu #%d, err %d\n", cpu, err))
+ return err;
+
+ usleep(1);
+
+ return 0;
+}
+
void test_perf_buffer(void)
{
- int err, on_len, nr_on_cpus = 0, nr_cpus, i, duration = 0;
+ int err, on_len, nr_on_cpus = 0, nr_cpus, i;
struct perf_buffer_opts pb_opts = {};
struct test_perf_buffer *skel;
- cpu_set_t cpu_set, cpu_seen;
+ cpu_set_t cpu_seen;
struct perf_buffer *pb;
+ int last_fd = -1, fd;
bool *online;
nr_cpus = libbpf_num_possible_cpus();
@@ -63,6 +83,9 @@ void test_perf_buffer(void)
if (CHECK(IS_ERR(pb), "perf_buf__new", "err %ld\n", PTR_ERR(pb)))
goto out_close;
+ CHECK(perf_buffer__epoll_fd(pb) < 0, "epoll_fd",
+ "bad fd: %d\n", perf_buffer__epoll_fd(pb));
+
/* trigger kprobe on every CPU */
CPU_ZERO(&cpu_seen);
for (i = 0; i < nr_cpus; i++) {
@@ -71,16 +94,8 @@ void test_perf_buffer(void)
continue;
}
- CPU_ZERO(&cpu_set);
- CPU_SET(i, &cpu_set);
-
- err = pthread_setaffinity_np(pthread_self(), sizeof(cpu_set),
- &cpu_set);
- if (err && CHECK(err, "set_affinity", "cpu #%d, err %d\n",
- i, err))
+ if (trigger_on_cpu(i))
goto out_close;
-
- usleep(1);
}
/* read perf buffer */
@@ -92,6 +107,34 @@ void test_perf_buffer(void)
"expect %d, seen %d\n", nr_on_cpus, CPU_COUNT(&cpu_seen)))
goto out_free_pb;
+ if (CHECK(perf_buffer__buffer_cnt(pb) != nr_cpus, "buf_cnt",
+ "got %zu, expected %d\n", perf_buffer__buffer_cnt(pb), nr_cpus))
+ goto out_close;
+
+ for (i = 0; i < nr_cpus; i++) {
+ if (i >= on_len || !online[i])
+ continue;
+
+ fd = perf_buffer__buffer_fd(pb, i);
+ CHECK(fd < 0 || last_fd == fd, "fd_check", "last fd %d == fd %d\n", last_fd, fd);
+ last_fd = fd;
+
+ err = perf_buffer__consume_buffer(pb, i);
+ if (CHECK(err, "drain_buf", "cpu %d, err %d\n", i, err))
+ goto out_close;
+
+ CPU_CLR(i, &cpu_seen);
+ if (trigger_on_cpu(i))
+ goto out_close;
+
+ err = perf_buffer__consume_buffer(pb, i);
+ if (CHECK(err, "consume_buf", "cpu %d, err %d\n", i, err))
+ goto out_close;
+
+ if (CHECK(!CPU_ISSET(i, &cpu_seen), "cpu_seen", "cpu %d not seen\n", i))
+ goto out_close;
+ }
+
out_free_pb:
perf_buffer__free(pb);
out_close:
diff --git a/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c b/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c
index 3b127ca..8826c65 100644
--- a/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c
+++ b/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c
@@ -47,6 +47,15 @@ BTF_ID(struct, S)
BTF_ID(union, U)
BTF_ID(func, func)
+BTF_SET_START(test_set)
+BTF_ID(typedef, S)
+BTF_ID(typedef, T)
+BTF_ID(typedef, U)
+BTF_ID(struct, S)
+BTF_ID(union, U)
+BTF_ID(func, func)
+BTF_SET_END(test_set)
+
static int
__resolve_symbol(struct btf *btf, int type_id)
{
@@ -116,12 +125,40 @@ int test_resolve_btfids(void)
*/
for (j = 0; j < ARRAY_SIZE(test_lists); j++) {
test_list = test_lists[j];
- for (i = 0; i < ARRAY_SIZE(test_symbols) && !ret; i++) {
+ for (i = 0; i < ARRAY_SIZE(test_symbols); i++) {
ret = CHECK(test_list[i] != test_symbols[i].id,
"id_check",
"wrong ID for %s (%d != %d)\n",
test_symbols[i].name,
test_list[i], test_symbols[i].id);
+ if (ret)
+ return ret;
+ }
+ }
+
+ /* Check BTF_SET_START(test_set) IDs */
+ for (i = 0; i < test_set.cnt; i++) {
+ bool found = false;
+
+ for (j = 0; j < ARRAY_SIZE(test_symbols); j++) {
+ if (test_symbols[j].id != test_set.ids[i])
+ continue;
+ found = true;
+ break;
+ }
+
+ ret = CHECK(!found, "id_check",
+ "ID %d not found in test_symbols\n",
+ test_set.ids[i]);
+ if (ret)
+ break;
+
+ if (i > 0) {
+ ret = CHECK(test_set.ids[i - 1] > test_set.ids[i],
+ "sort_check",
+ "test_set is not sorted\n");
+ if (ret)
+ break;
}
}
diff --git a/tools/testing/selftests/bpf/prog_tests/sk_assign.c b/tools/testing/selftests/bpf/prog_tests/sk_assign.c
index 47fa04a..a49a26f 100644
--- a/tools/testing/selftests/bpf/prog_tests/sk_assign.c
+++ b/tools/testing/selftests/bpf/prog_tests/sk_assign.c
@@ -49,7 +49,7 @@ configure_stack(void)
sprintf(tc_cmd, "%s %s %s %s", "tc filter add dev lo ingress bpf",
"direct-action object-file ./test_sk_assign.o",
"section classifier/sk_assign_test",
- (env.verbosity < VERBOSE_VERY) ? " 2>/dev/null" : "");
+ (env.verbosity < VERBOSE_VERY) ? " 2>/dev/null" : "verbose");
if (CHECK(system(tc_cmd), "BPF load failed;",
"run with -vv for more info\n"))
return false;
@@ -268,6 +268,7 @@ void test_sk_assign(void)
int server = -1;
int server_map;
int self_net;
+ int i;
self_net = open(NS_SELF, O_RDONLY);
if (CHECK_FAIL(self_net < 0)) {
@@ -286,7 +287,7 @@ void test_sk_assign(void)
goto cleanup;
}
- for (int i = 0; i < ARRAY_SIZE(tests) && !READ_ONCE(stop); i++) {
+ for (i = 0; i < ARRAY_SIZE(tests) && !READ_ONCE(stop); i++) {
struct test_sk_cfg *test = &tests[i];
const struct sockaddr *addr;
const int zero = 0;
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
index 96e7b7f..0b79d78 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
@@ -4,6 +4,8 @@
#include "test_progs.h"
#include "test_skmsg_load_helpers.skel.h"
+#include "test_sockmap_update.skel.h"
+#include "test_sockmap_invalid_update.skel.h"
#define TCP_REPAIR 19 /* TCP sock is under repair right now */
@@ -101,6 +103,74 @@ static void test_skmsg_helpers(enum bpf_map_type map_type)
test_skmsg_load_helpers__destroy(skel);
}
+static void test_sockmap_update(enum bpf_map_type map_type)
+{
+ struct bpf_prog_test_run_attr tattr;
+ int err, prog, src, dst, duration = 0;
+ struct test_sockmap_update *skel;
+ __u64 src_cookie, dst_cookie;
+ const __u32 zero = 0;
+ char dummy[14] = {0};
+ __s64 sk;
+
+ sk = connected_socket_v4();
+ if (CHECK(sk == -1, "connected_socket_v4", "cannot connect\n"))
+ return;
+
+ skel = test_sockmap_update__open_and_load();
+ if (CHECK(!skel, "open_and_load", "cannot load skeleton\n"))
+ goto close_sk;
+
+ prog = bpf_program__fd(skel->progs.copy_sock_map);
+ src = bpf_map__fd(skel->maps.src);
+ if (map_type == BPF_MAP_TYPE_SOCKMAP)
+ dst = bpf_map__fd(skel->maps.dst_sock_map);
+ else
+ dst = bpf_map__fd(skel->maps.dst_sock_hash);
+
+ err = bpf_map_update_elem(src, &zero, &sk, BPF_NOEXIST);
+ if (CHECK(err, "update_elem(src)", "errno=%u\n", errno))
+ goto out;
+
+ err = bpf_map_lookup_elem(src, &zero, &src_cookie);
+ if (CHECK(err, "lookup_elem(src, cookie)", "errno=%u\n", errno))
+ goto out;
+
+ tattr = (struct bpf_prog_test_run_attr){
+ .prog_fd = prog,
+ .repeat = 1,
+ .data_in = dummy,
+ .data_size_in = sizeof(dummy),
+ };
+
+ err = bpf_prog_test_run_xattr(&tattr);
+ if (CHECK_ATTR(err || !tattr.retval, "bpf_prog_test_run",
+ "errno=%u retval=%u\n", errno, tattr.retval))
+ goto out;
+
+ err = bpf_map_lookup_elem(dst, &zero, &dst_cookie);
+ if (CHECK(err, "lookup_elem(dst, cookie)", "errno=%u\n", errno))
+ goto out;
+
+ CHECK(dst_cookie != src_cookie, "cookie mismatch", "%llu != %llu\n",
+ dst_cookie, src_cookie);
+
+out:
+ test_sockmap_update__destroy(skel);
+close_sk:
+ close(sk);
+}
+
+static void test_sockmap_invalid_update(void)
+{
+ struct test_sockmap_invalid_update *skel;
+ int duration = 0;
+
+ skel = test_sockmap_invalid_update__open_and_load();
+ if (CHECK(skel, "open_and_load", "verifier accepted map_update\n"))
+ test_sockmap_invalid_update__destroy(skel);
+}
+
void test_sockmap_basic(void)
{
if (test__start_subtest("sockmap create_update_free"))
@@ -111,4 +181,10 @@ void test_sockmap_basic(void)
test_skmsg_helpers(BPF_MAP_TYPE_SOCKMAP);
if (test__start_subtest("sockhash sk_msg load helpers"))
test_skmsg_helpers(BPF_MAP_TYPE_SOCKHASH);
+ if (test__start_subtest("sockmap update"))
+ test_sockmap_update(BPF_MAP_TYPE_SOCKMAP);
+ if (test__start_subtest("sockhash update"))
+ test_sockmap_update(BPF_MAP_TYPE_SOCKHASH);
+ if (test__start_subtest("sockmap update in unsafe context"))
+ test_sockmap_invalid_update();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c b/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c
new file mode 100644
index 0000000..24ba0d2
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c
@@ -0,0 +1,622 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+
+#define _GNU_SOURCE
+#include <sched.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/socket.h>
+#include <linux/compiler.h>
+
+#include "test_progs.h"
+#include "cgroup_helpers.h"
+#include "network_helpers.h"
+#include "test_tcp_hdr_options.h"
+#include "test_tcp_hdr_options.skel.h"
+#include "test_misc_tcp_hdr_options.skel.h"
+
+#define LO_ADDR6 "::eB9F"
+#define CG_NAME "/tcpbpf-hdr-opt-test"
+
+struct bpf_test_option exp_passive_estab_in;
+struct bpf_test_option exp_active_estab_in;
+struct bpf_test_option exp_passive_fin_in;
+struct bpf_test_option exp_active_fin_in;
+struct hdr_stg exp_passive_hdr_stg;
+struct hdr_stg exp_active_hdr_stg = { .active = true, };
+
+static struct test_misc_tcp_hdr_options *misc_skel;
+static struct test_tcp_hdr_options *skel;
+static int lport_linum_map_fd;
+static int hdr_stg_map_fd;
+static __u32 duration;
+static int cg_fd;
+
+struct sk_fds {
+ int srv_fd;
+ int passive_fd;
+ int active_fd;
+ int passive_lport;
+ int active_lport;
+};
+
+static int add_lo_addr(void)
+{
+ char ip_addr_cmd[256];
+ int cmdlen;
+
+ cmdlen = snprintf(ip_addr_cmd, sizeof(ip_addr_cmd),
+ "ip -6 addr add %s/128 dev lo scope host",
+ LO_ADDR6);
+
+ if (CHECK(cmdlen >= sizeof(ip_addr_cmd), "compile ip cmd",
+ "failed to add host addr %s to lo. ip cmdlen is too long\n",
+ LO_ADDR6))
+ return -1;
+
+ if (CHECK(system(ip_addr_cmd), "run ip cmd",
+ "failed to add host addr %s to lo\n", LO_ADDR6))
+ return -1;
+
+ return 0;
+}
+
+static int create_netns(void)
+{
+ if (CHECK(unshare(CLONE_NEWNET), "create netns",
+ "unshare(CLONE_NEWNET): %s (%d)",
+ strerror(errno), errno))
+ return -1;
+
+ if (CHECK(system("ip link set dev lo up"), "run ip cmd",
+ "failed to bring lo link up\n"))
+ return -1;
+
+ if (add_lo_addr())
+ return -1;
+
+ return 0;
+}
+
+static int write_sysctl(const char *sysctl, const char *value)
+{
+ int fd, err, len;
+
+ fd = open(sysctl, O_WRONLY);
+ if (CHECK(fd == -1, "open sysctl", "open(%s): %s (%d)\n",
+ sysctl, strerror(errno), errno))
+ return -1;
+
+ len = strlen(value);
+ err = write(fd, value, len);
+ close(fd);
+ if (CHECK(err != len, "write sysctl",
+ "write(%s, %s): err:%d %s (%d)\n",
+ sysctl, value, err, strerror(errno), errno))
+ return -1;
+
+ return 0;
+}
+
+static void print_hdr_stg(const struct hdr_stg *hdr_stg, const char *prefix)
+{
+ fprintf(stderr, "%s{active:%u, resend_syn:%u, syncookie:%u, fastopen:%u}\n",
+ prefix ? : "", hdr_stg->active, hdr_stg->resend_syn,
+ hdr_stg->syncookie, hdr_stg->fastopen);
+}
+
+static void print_option(const struct bpf_test_option *opt, const char *prefix)
+{
+ fprintf(stderr, "%s{flags:0x%x, max_delack_ms:%u, rand:0x%x}\n",
+ prefix ? : "", opt->flags, opt->max_delack_ms, opt->rand);
+}
+
+static void sk_fds_close(struct sk_fds *sk_fds)
+{
+ close(sk_fds->srv_fd);
+ close(sk_fds->passive_fd);
+ close(sk_fds->active_fd);
+}
+
+static int sk_fds_shutdown(struct sk_fds *sk_fds)
+{
+ int ret, abyte;
+
+ shutdown(sk_fds->active_fd, SHUT_WR);
+ ret = read(sk_fds->passive_fd, &abyte, sizeof(abyte));
+ if (CHECK(ret != 0, "read-after-shutdown(passive_fd):",
+ "ret:%d %s (%d)\n",
+ ret, strerror(errno), errno))
+ return -1;
+
+ shutdown(sk_fds->passive_fd, SHUT_WR);
+ ret = read(sk_fds->active_fd, &abyte, sizeof(abyte));
+ if (CHECK(ret != 0, "read-after-shutdown(active_fd):",
+ "ret:%d %s (%d)\n",
+ ret, strerror(errno), errno))
+ return -1;
+
+ return 0;
+}
+
+static int sk_fds_connect(struct sk_fds *sk_fds, bool fast_open)
+{
+ const char fast[] = "FAST!!!";
+ struct sockaddr_in6 addr6;
+ socklen_t len;
+
+ sk_fds->srv_fd = start_server(AF_INET6, SOCK_STREAM, LO_ADDR6, 0, 0);
+ if (CHECK(sk_fds->srv_fd == -1, "start_server", "%s (%d)\n",
+ strerror(errno), errno))
+ goto error;
+
+ if (fast_open)
+ sk_fds->active_fd = fastopen_connect(sk_fds->srv_fd, fast,
+ sizeof(fast), 0);
+ else
+ sk_fds->active_fd = connect_to_fd(sk_fds->srv_fd, 0);
+
+ if (CHECK_FAIL(sk_fds->active_fd == -1)) {
+ close(sk_fds->srv_fd);
+ goto error;
+ }
+
+ len = sizeof(addr6);
+ if (CHECK(getsockname(sk_fds->srv_fd, (struct sockaddr *)&addr6,
+ &len), "getsockname(srv_fd)", "%s (%d)\n",
+ strerror(errno), errno))
+ goto error_close;
+ sk_fds->passive_lport = ntohs(addr6.sin6_port);
+
+ len = sizeof(addr6);
+ if (CHECK(getsockname(sk_fds->active_fd, (struct sockaddr *)&addr6,
+ &len), "getsockname(active_fd)", "%s (%d)\n",
+ strerror(errno), errno))
+ goto error_close;
+ sk_fds->active_lport = ntohs(addr6.sin6_port);
+
+ sk_fds->passive_fd = accept(sk_fds->srv_fd, NULL, 0);
+ if (CHECK(sk_fds->passive_fd == -1, "accept(srv_fd)", "%s (%d)\n",
+ strerror(errno), errno))
+ goto error_close;
+
+ if (fast_open) {
+ char bytes_in[sizeof(fast)];
+ int ret;
+
+ ret = read(sk_fds->passive_fd, bytes_in, sizeof(bytes_in));
+ if (CHECK(ret != sizeof(fast), "read fastopen syn data",
+ "expected=%lu actual=%d\n", sizeof(fast), ret)) {
+ close(sk_fds->passive_fd);
+ goto error_close;
+ }
+ }
+
+ return 0;
+
+error_close:
+ close(sk_fds->active_fd);
+ close(sk_fds->srv_fd);
+
+error:
+ memset(sk_fds, -1, sizeof(*sk_fds));
+ return -1;
+}
+
+static int check_hdr_opt(const struct bpf_test_option *exp,
+ const struct bpf_test_option *act,
+ const char *hdr_desc)
+{
+ if (CHECK(memcmp(exp, act, sizeof(*exp)),
+ "expected-vs-actual", "unexpected %s\n", hdr_desc)) {
+ print_option(exp, "expected: ");
+ print_option(act, " actual: ");
+ return -1;
+ }
+
+ return 0;
+}
+
+static int check_hdr_stg(const struct hdr_stg *exp, int fd,
+ const char *stg_desc)
+{
+ struct hdr_stg act;
+
+ if (CHECK(bpf_map_lookup_elem(hdr_stg_map_fd, &fd, &act),
+ "map_lookup(hdr_stg_map_fd)", "%s %s (%d)\n",
+ stg_desc, strerror(errno), errno))
+ return -1;
+
+ if (CHECK(memcmp(exp, &act, sizeof(*exp)),
+ "expected-vs-actual", "unexpected %s\n", stg_desc)) {
+ print_hdr_stg(exp, "expected: ");
+ print_hdr_stg(&act, " actual: ");
+ return -1;
+ }
+
+ return 0;
+}
+
+static int check_error_linum(const struct sk_fds *sk_fds)
+{
+ unsigned int nr_errors = 0;
+ struct linum_err linum_err;
+ int lport;
+
+ lport = sk_fds->passive_lport;
+ if (!bpf_map_lookup_elem(lport_linum_map_fd, &lport, &linum_err)) {
+ fprintf(stderr,
+ "bpf prog error out at lport:passive(%d), linum:%u err:%d\n",
+ lport, linum_err.linum, linum_err.err);
+ nr_errors++;
+ }
+
+ lport = sk_fds->active_lport;
+ if (!bpf_map_lookup_elem(lport_linum_map_fd, &lport, &linum_err)) {
+ fprintf(stderr,
+ "bpf prog error out at lport:active(%d), linum:%u err:%d\n",
+ lport, linum_err.linum, linum_err.err);
+ nr_errors++;
+ }
+
+ return nr_errors;
+}
+
+static void check_hdr_and_close_fds(struct sk_fds *sk_fds)
+{
+ if (sk_fds_shutdown(sk_fds))
+ goto check_linum;
+
+ if (check_hdr_stg(&exp_passive_hdr_stg, sk_fds->passive_fd,
+ "passive_hdr_stg"))
+ goto check_linum;
+
+ if (check_hdr_stg(&exp_active_hdr_stg, sk_fds->active_fd,
+ "active_hdr_stg"))
+ goto check_linum;
+
+ if (check_hdr_opt(&exp_passive_estab_in, &skel->bss->passive_estab_in,
+ "passive_estab_in"))
+ goto check_linum;
+
+ if (check_hdr_opt(&exp_active_estab_in, &skel->bss->active_estab_in,
+ "active_estab_in"))
+ goto check_linum;
+
+ if (check_hdr_opt(&exp_passive_fin_in, &skel->bss->passive_fin_in,
+ "passive_fin_in"))
+ goto check_linum;
+
+ check_hdr_opt(&exp_active_fin_in, &skel->bss->active_fin_in,
+ "active_fin_in");
+
+check_linum:
+ CHECK_FAIL(check_error_linum(sk_fds));
+ sk_fds_close(sk_fds);
+}
+
+static void prepare_out(void)
+{
+ skel->bss->active_syn_out = exp_passive_estab_in;
+ skel->bss->passive_synack_out = exp_active_estab_in;
+
+ skel->bss->active_fin_out = exp_passive_fin_in;
+ skel->bss->passive_fin_out = exp_active_fin_in;
+}
+
+static void reset_test(void)
+{
+ size_t optsize = sizeof(struct bpf_test_option);
+ int lport, err;
+
+ memset(&skel->bss->passive_synack_out, 0, optsize);
+ memset(&skel->bss->passive_fin_out, 0, optsize);
+
+ memset(&skel->bss->passive_estab_in, 0, optsize);
+ memset(&skel->bss->passive_fin_in, 0, optsize);
+
+ memset(&skel->bss->active_syn_out, 0, optsize);
+ memset(&skel->bss->active_fin_out, 0, optsize);
+
+ memset(&skel->bss->active_estab_in, 0, optsize);
+ memset(&skel->bss->active_fin_in, 0, optsize);
+
+ skel->data->test_kind = TCPOPT_EXP;
+ skel->data->test_magic = 0xeB9F;
+
+ memset(&exp_passive_estab_in, 0, optsize);
+ memset(&exp_active_estab_in, 0, optsize);
+ memset(&exp_passive_fin_in, 0, optsize);
+ memset(&exp_active_fin_in, 0, optsize);
+
+ memset(&exp_passive_hdr_stg, 0, sizeof(exp_passive_hdr_stg));
+ memset(&exp_active_hdr_stg, 0, sizeof(exp_active_hdr_stg));
+ exp_active_hdr_stg.active = true;
+
+ err = bpf_map_get_next_key(lport_linum_map_fd, NULL, &lport);
+ while (!err) {
+ bpf_map_delete_elem(lport_linum_map_fd, &lport);
+ err = bpf_map_get_next_key(lport_linum_map_fd, &lport, &lport);
+ }
+}
+
+static void fastopen_estab(void)
+{
+ struct bpf_link *link;
+ struct sk_fds sk_fds;
+
+ hdr_stg_map_fd = bpf_map__fd(skel->maps.hdr_stg_map);
+ lport_linum_map_fd = bpf_map__fd(skel->maps.lport_linum_map);
+
+ exp_passive_estab_in.flags = OPTION_F_RAND | OPTION_F_MAX_DELACK_MS;
+ exp_passive_estab_in.rand = 0xfa;
+ exp_passive_estab_in.max_delack_ms = 11;
+
+ exp_active_estab_in.flags = OPTION_F_RAND | OPTION_F_MAX_DELACK_MS;
+ exp_active_estab_in.rand = 0xce;
+ exp_active_estab_in.max_delack_ms = 22;
+
+ exp_passive_hdr_stg.fastopen = true;
+
+ prepare_out();
+
+ /* Allow fastopen without fastopen cookie */
+ if (write_sysctl("/proc/sys/net/ipv4/tcp_fastopen", "1543"))
+ return;
+
+ link = bpf_program__attach_cgroup(skel->progs.estab, cg_fd);
+ if (CHECK(IS_ERR(link), "attach_cgroup(estab)", "err: %ld\n",
+ PTR_ERR(link)))
+ return;
+
+ if (sk_fds_connect(&sk_fds, true)) {
+ bpf_link__destroy(link);
+ return;
+ }
+
+ check_hdr_and_close_fds(&sk_fds);
+ bpf_link__destroy(link);
+}
+
+static void syncookie_estab(void)
+{
+ struct bpf_link *link;
+ struct sk_fds sk_fds;
+
+ hdr_stg_map_fd = bpf_map__fd(skel->maps.hdr_stg_map);
+ lport_linum_map_fd = bpf_map__fd(skel->maps.lport_linum_map);
+
+ exp_passive_estab_in.flags = OPTION_F_RAND | OPTION_F_MAX_DELACK_MS;
+ exp_passive_estab_in.rand = 0xfa;
+ exp_passive_estab_in.max_delack_ms = 11;
+
+ exp_active_estab_in.flags = OPTION_F_RAND | OPTION_F_MAX_DELACK_MS |
+ OPTION_F_RESEND;
+ exp_active_estab_in.rand = 0xce;
+ exp_active_estab_in.max_delack_ms = 22;
+
+ exp_passive_hdr_stg.syncookie = true;
+ exp_active_hdr_stg.resend_syn = true,
+
+ prepare_out();
+
+ /* Clear the RESEND to ensure the bpf prog can learn
+ * want_cookie and set the RESEND by itself.
+ */
+ skel->bss->passive_synack_out.flags &= ~OPTION_F_RESEND;
+
+ /* Enforce syncookie mode */
+ if (write_sysctl("/proc/sys/net/ipv4/tcp_syncookies", "2"))
+ return;
+
+ link = bpf_program__attach_cgroup(skel->progs.estab, cg_fd);
+ if (CHECK(IS_ERR(link), "attach_cgroup(estab)", "err: %ld\n",
+ PTR_ERR(link)))
+ return;
+
+ if (sk_fds_connect(&sk_fds, false)) {
+ bpf_link__destroy(link);
+ return;
+ }
+
+ check_hdr_and_close_fds(&sk_fds);
+ bpf_link__destroy(link);
+}
+
+static void fin(void)
+{
+ struct bpf_link *link;
+ struct sk_fds sk_fds;
+
+ hdr_stg_map_fd = bpf_map__fd(skel->maps.hdr_stg_map);
+ lport_linum_map_fd = bpf_map__fd(skel->maps.lport_linum_map);
+
+ exp_passive_fin_in.flags = OPTION_F_RAND;
+ exp_passive_fin_in.rand = 0xfa;
+
+ exp_active_fin_in.flags = OPTION_F_RAND;
+ exp_active_fin_in.rand = 0xce;
+
+ prepare_out();
+
+ if (write_sysctl("/proc/sys/net/ipv4/tcp_syncookies", "1"))
+ return;
+
+ link = bpf_program__attach_cgroup(skel->progs.estab, cg_fd);
+ if (CHECK(IS_ERR(link), "attach_cgroup(estab)", "err: %ld\n",
+ PTR_ERR(link)))
+ return;
+
+ if (sk_fds_connect(&sk_fds, false)) {
+ bpf_link__destroy(link);
+ return;
+ }
+
+ check_hdr_and_close_fds(&sk_fds);
+ bpf_link__destroy(link);
+}
+
+static void __simple_estab(bool exprm)
+{
+ struct bpf_link *link;
+ struct sk_fds sk_fds;
+
+ hdr_stg_map_fd = bpf_map__fd(skel->maps.hdr_stg_map);
+ lport_linum_map_fd = bpf_map__fd(skel->maps.lport_linum_map);
+
+ exp_passive_estab_in.flags = OPTION_F_RAND | OPTION_F_MAX_DELACK_MS;
+ exp_passive_estab_in.rand = 0xfa;
+ exp_passive_estab_in.max_delack_ms = 11;
+
+ exp_active_estab_in.flags = OPTION_F_RAND | OPTION_F_MAX_DELACK_MS;
+ exp_active_estab_in.rand = 0xce;
+ exp_active_estab_in.max_delack_ms = 22;
+
+ prepare_out();
+
+ if (!exprm) {
+ skel->data->test_kind = 0xB9;
+ skel->data->test_magic = 0;
+ }
+
+ if (write_sysctl("/proc/sys/net/ipv4/tcp_syncookies", "1"))
+ return;
+
+ link = bpf_program__attach_cgroup(skel->progs.estab, cg_fd);
+ if (CHECK(IS_ERR(link), "attach_cgroup(estab)", "err: %ld\n",
+ PTR_ERR(link)))
+ return;
+
+ if (sk_fds_connect(&sk_fds, false)) {
+ bpf_link__destroy(link);
+ return;
+ }
+
+ check_hdr_and_close_fds(&sk_fds);
+ bpf_link__destroy(link);
+}
+
+static void no_exprm_estab(void)
+{
+ __simple_estab(false);
+}
+
+static void simple_estab(void)
+{
+ __simple_estab(true);
+}
+
+static void misc(void)
+{
+ const char send_msg[] = "MISC!!!";
+ char recv_msg[sizeof(send_msg)];
+ const unsigned int nr_data = 2;
+ struct bpf_link *link;
+ struct sk_fds sk_fds;
+ int i, ret;
+
+ lport_linum_map_fd = bpf_map__fd(misc_skel->maps.lport_linum_map);
+
+ if (write_sysctl("/proc/sys/net/ipv4/tcp_syncookies", "1"))
+ return;
+
+ link = bpf_program__attach_cgroup(misc_skel->progs.misc_estab, cg_fd);
+ if (CHECK(IS_ERR(link), "attach_cgroup(misc_estab)", "err: %ld\n",
+ PTR_ERR(link)))
+ return;
+
+ if (sk_fds_connect(&sk_fds, false)) {
+ bpf_link__destroy(link);
+ return;
+ }
+
+ for (i = 0; i < nr_data; i++) {
+ /* MSG_EOR to ensure skb will not be combined */
+ ret = send(sk_fds.active_fd, send_msg, sizeof(send_msg),
+ MSG_EOR);
+ if (CHECK(ret != sizeof(send_msg), "send(msg)", "ret:%d\n",
+ ret))
+ goto check_linum;
+
+ ret = read(sk_fds.passive_fd, recv_msg, sizeof(recv_msg));
+ if (CHECK(ret != sizeof(send_msg), "read(msg)", "ret:%d\n",
+ ret))
+ goto check_linum;
+ }
+
+ if (sk_fds_shutdown(&sk_fds))
+ goto check_linum;
+
+ CHECK(misc_skel->bss->nr_syn != 1, "unexpected nr_syn",
+ "expected (1) != actual (%u)\n",
+ misc_skel->bss->nr_syn);
+
+ CHECK(misc_skel->bss->nr_data != nr_data, "unexpected nr_data",
+ "expected (%u) != actual (%u)\n",
+ nr_data, misc_skel->bss->nr_data);
+
+ /* The last ACK may have been delayed, so it is either 1 or 2. */
+ CHECK(misc_skel->bss->nr_pure_ack != 1 &&
+ misc_skel->bss->nr_pure_ack != 2,
+ "unexpected nr_pure_ack",
+ "expected (1 or 2) != actual (%u)\n",
+ misc_skel->bss->nr_pure_ack);
+
+ CHECK(misc_skel->bss->nr_fin != 1, "unexpected nr_fin",
+ "expected (1) != actual (%u)\n",
+ misc_skel->bss->nr_fin);
+
+check_linum:
+ CHECK_FAIL(check_error_linum(&sk_fds));
+ sk_fds_close(&sk_fds);
+ bpf_link__destroy(link);
+}
+
+struct test {
+ const char *desc;
+ void (*run)(void);
+};
+
+#define DEF_TEST(name) { #name, name }
+static struct test tests[] = {
+ DEF_TEST(simple_estab),
+ DEF_TEST(no_exprm_estab),
+ DEF_TEST(syncookie_estab),
+ DEF_TEST(fastopen_estab),
+ DEF_TEST(fin),
+ DEF_TEST(misc),
+};
+
+void test_tcp_hdr_options(void)
+{
+ int i;
+
+ skel = test_tcp_hdr_options__open_and_load();
+ if (CHECK(!skel, "open and load skel", "failed"))
+ return;
+
+ misc_skel = test_misc_tcp_hdr_options__open_and_load();
+ if (CHECK(!misc_skel, "open and load misc test skel", "failed"))
+ goto skel_destroy;
+
+ cg_fd = test__join_cgroup(CG_NAME);
+ if (CHECK_FAIL(cg_fd < 0))
+ goto skel_destroy;
+
+ for (i = 0; i < ARRAY_SIZE(tests); i++) {
+ if (!test__start_subtest(tests[i].desc))
+ continue;
+
+ if (create_netns())
+ break;
+
+ tests[i].run();
+
+ reset_test();
+ }
+
+ close(cg_fd);
+skel_destroy:
+ test_misc_tcp_hdr_options__destroy(misc_skel);
+ test_tcp_hdr_options__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_bpffs.c b/tools/testing/selftests/bpf/prog_tests/test_bpffs.c
new file mode 100644
index 0000000..172c999
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_bpffs.c
@@ -0,0 +1,94 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#define _GNU_SOURCE
+#include <sched.h>
+#include <sys/mount.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <test_progs.h>
+
+#define TDIR "/sys/kernel/debug"
+
+static int read_iter(char *file)
+{
+ /* 1024 should be enough to get contiguous 4 "iter" letters at some point */
+ char buf[1024];
+ int fd, len;
+
+ fd = open(file, 0);
+ if (fd < 0)
+ return -1;
+ while ((len = read(fd, buf, sizeof(buf))) > 0)
+ if (strstr(buf, "iter")) {
+ close(fd);
+ return 0;
+ }
+ close(fd);
+ return -1;
+}
+
+static int fn(void)
+{
+ int err, duration = 0;
+
+ err = unshare(CLONE_NEWNS);
+ if (CHECK(err, "unshare", "failed: %d\n", errno))
+ goto out;
+
+ err = mount("", "/", "", MS_REC | MS_PRIVATE, NULL);
+ if (CHECK(err, "mount /", "failed: %d\n", errno))
+ goto out;
+
+ err = umount(TDIR);
+ if (CHECK(err, "umount " TDIR, "failed: %d\n", errno))
+ goto out;
+
+ err = mount("none", TDIR, "tmpfs", 0, NULL);
+ if (CHECK(err, "mount", "mount root failed: %d\n", errno))
+ goto out;
+
+ err = mkdir(TDIR "/fs1", 0777);
+ if (CHECK(err, "mkdir "TDIR"/fs1", "failed: %d\n", errno))
+ goto out;
+ err = mkdir(TDIR "/fs2", 0777);
+ if (CHECK(err, "mkdir "TDIR"/fs2", "failed: %d\n", errno))
+ goto out;
+
+ err = mount("bpf", TDIR "/fs1", "bpf", 0, NULL);
+ if (CHECK(err, "mount bpffs "TDIR"/fs1", "failed: %d\n", errno))
+ goto out;
+ err = mount("bpf", TDIR "/fs2", "bpf", 0, NULL);
+ if (CHECK(err, "mount bpffs " TDIR "/fs2", "failed: %d\n", errno))
+ goto out;
+
+ err = read_iter(TDIR "/fs1/maps.debug");
+ if (CHECK(err, "reading " TDIR "/fs1/maps.debug", "failed\n"))
+ goto out;
+ err = read_iter(TDIR "/fs2/progs.debug");
+ if (CHECK(err, "reading " TDIR "/fs2/progs.debug", "failed\n"))
+ goto out;
+out:
+ umount(TDIR "/fs1");
+ umount(TDIR "/fs2");
+ rmdir(TDIR "/fs1");
+ rmdir(TDIR "/fs2");
+ umount(TDIR);
+ exit(err);
+}
+
+void test_test_bpffs(void)
+{
+ int err, duration = 0, status = 0;
+ pid_t pid;
+
+ pid = fork();
+ if (CHECK(pid == -1, "clone", "clone failed %d", errno))
+ return;
+ if (pid == 0)
+ fn();
+ err = waitpid(pid, &status, 0);
+ if (CHECK(err == -1 && errno != ECHILD, "waitpid", "failed %d", errno))
+ return;
+ if (CHECK(WEXITSTATUS(status), "bpffs test ", "failed %d", WEXITSTATUS(status)))
+ return;
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_local_storage.c b/tools/testing/selftests/bpf/prog_tests/test_local_storage.c
new file mode 100644
index 0000000..91cd6f3
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_local_storage.c
@@ -0,0 +1,60 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright (C) 2020 Google LLC.
+ */
+
+#include <test_progs.h>
+#include <linux/limits.h>
+
+#include "local_storage.skel.h"
+#include "network_helpers.h"
+
+int create_and_unlink_file(void)
+{
+ char fname[PATH_MAX] = "/tmp/fileXXXXXX";
+ int fd;
+
+ fd = mkstemp(fname);
+ if (fd < 0)
+ return fd;
+
+ close(fd);
+ unlink(fname);
+ return 0;
+}
+
+void test_test_local_storage(void)
+{
+ struct local_storage *skel = NULL;
+ int err, duration = 0, serv_sk = -1;
+
+ skel = local_storage__open_and_load();
+ if (CHECK(!skel, "skel_load", "lsm skeleton failed\n"))
+ goto close_prog;
+
+ err = local_storage__attach(skel);
+ if (CHECK(err, "attach", "lsm attach failed: %d\n", err))
+ goto close_prog;
+
+ skel->bss->monitored_pid = getpid();
+
+ err = create_and_unlink_file();
+ if (CHECK(err < 0, "exec_cmd", "err %d errno %d\n", err, errno))
+ goto close_prog;
+
+ CHECK(skel->data->inode_storage_result != 0, "inode_storage_result",
+ "inode_local_storage not set\n");
+
+ serv_sk = start_server(AF_INET6, SOCK_STREAM, NULL, 0, 0);
+ if (CHECK(serv_sk < 0, "start_server", "failed to start server\n"))
+ goto close_prog;
+
+ CHECK(skel->data->sk_storage_result != 0, "sk_storage_result",
+ "sk_local_storage not set\n");
+
+ close(serv_sk);
+
+close_prog:
+ local_storage__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_lsm.c b/tools/testing/selftests/bpf/prog_tests/test_lsm.c
index b17eb20..6ab2922 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_lsm.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_lsm.c
@@ -10,6 +10,7 @@
#include <unistd.h>
#include <malloc.h>
#include <stdlib.h>
+#include <unistd.h>
#include "lsm.skel.h"
@@ -55,6 +56,7 @@ void test_test_lsm(void)
{
struct lsm *skel = NULL;
int err, duration = 0;
+ int buf = 1234;
skel = lsm__open_and_load();
if (CHECK(!skel, "skel_load", "lsm skeleton failed\n"))
@@ -81,6 +83,13 @@ void test_test_lsm(void)
CHECK(skel->bss->mprotect_count != 1, "mprotect_count",
"mprotect_count = %d\n", skel->bss->mprotect_count);
+ syscall(__NR_setdomainname, &buf, -2L);
+ syscall(__NR_setdomainname, 0, -3L);
+ syscall(__NR_setdomainname, ~0L, -4L);
+
+ CHECK(skel->bss->copy_test != 3, "copy_test",
+ "copy_test = %d\n", skel->bss->copy_test);
+
close_prog:
lsm__destroy(skel);
}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_enumval.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_enumval.c
new file mode 100644
index 0000000..48e62f3
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_enumval.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_enumval x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_enumval___diff.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_enumval___diff.c
new file mode 100644
index 0000000..53e5e5a
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_enumval___diff.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_enumval___diff x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_enumval___err_missing.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_enumval___err_missing.c
new file mode 100644
index 0000000..d024fb2
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_enumval___err_missing.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_enumval___err_missing x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_enumval___val3_missing.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_enumval___val3_missing.c
new file mode 100644
index 0000000..9de6595
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_enumval___val3_missing.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_enumval___val3_missing x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_size___err_ambiguous.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_size___err_ambiguous.c
new file mode 100644
index 0000000..f3e9904
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_size___err_ambiguous.c
@@ -0,0 +1,4 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_size___err_ambiguous1 x,
+ struct core_reloc_size___err_ambiguous2 y) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based.c
new file mode 100644
index 0000000..fc3f69e
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_type_based x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___all_missing.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___all_missing.c
new file mode 100644
index 0000000..5151164
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___all_missing.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_type_based___all_missing x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___diff_sz.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___diff_sz.c
new file mode 100644
index 0000000..67db3dc
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___diff_sz.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_type_based___diff_sz x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___fn_wrong_args.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___fn_wrong_args.c
new file mode 100644
index 0000000..b357fc6
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___fn_wrong_args.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_type_based___fn_wrong_args x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___incompat.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___incompat.c
new file mode 100644
index 0000000..8ddf20d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___incompat.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_type_based___incompat x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_type_id.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_id.c
new file mode 100644
index 0000000..abbe5bd
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_id.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_type_id x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_type_id___missing_targets.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_id___missing_targets.c
new file mode 100644
index 0000000..24e7caf
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_id___missing_targets.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_type_id___missing_targets x) {}
diff --git a/tools/testing/selftests/bpf/progs/core_reloc_types.h b/tools/testing/selftests/bpf/progs/core_reloc_types.h
index 69139ed..e6e616c 100644
--- a/tools/testing/selftests/bpf/progs/core_reloc_types.h
+++ b/tools/testing/selftests/bpf/progs/core_reloc_types.h
@@ -652,7 +652,7 @@ struct core_reloc_misc_extensible {
};
/*
- * EXISTENCE
+ * FIELD EXISTENCE
*/
struct core_reloc_existence_output {
int a_exists;
@@ -809,3 +809,353 @@ struct core_reloc_size___diff_sz {
void *ptr_field;
enum { OTHER_VALUE = 0xFFFFFFFFFFFFFFFF } enum_field;
};
+
+/* Error case of two candidates with the fields (int_field) at the same
+ * offset, but with differing final relocation values: size 4 vs size 1
+ */
+struct core_reloc_size___err_ambiguous1 {
+ /* int at offset 0 */
+ int int_field;
+
+ struct { int x; } struct_field;
+ union { int x; } union_field;
+ int arr_field[4];
+ void *ptr_field;
+ enum { VALUE___1 = 123 } enum_field;
+};
+
+struct core_reloc_size___err_ambiguous2 {
+ /* char at offset 0 */
+ char int_field;
+
+ struct { int x; } struct_field;
+ union { int x; } union_field;
+ int arr_field[4];
+ void *ptr_field;
+ enum { VALUE___2 = 123 } enum_field;
+};
+
+/*
+ * TYPE EXISTENCE & SIZE
+ */
+struct core_reloc_type_based_output {
+ bool struct_exists;
+ bool union_exists;
+ bool enum_exists;
+ bool typedef_named_struct_exists;
+ bool typedef_anon_struct_exists;
+ bool typedef_struct_ptr_exists;
+ bool typedef_int_exists;
+ bool typedef_enum_exists;
+ bool typedef_void_ptr_exists;
+ bool typedef_func_proto_exists;
+ bool typedef_arr_exists;
+
+ int struct_sz;
+ int union_sz;
+ int enum_sz;
+ int typedef_named_struct_sz;
+ int typedef_anon_struct_sz;
+ int typedef_struct_ptr_sz;
+ int typedef_int_sz;
+ int typedef_enum_sz;
+ int typedef_void_ptr_sz;
+ int typedef_func_proto_sz;
+ int typedef_arr_sz;
+};
+
+struct a_struct {
+ int x;
+};
+
+union a_union {
+ int y;
+ int z;
+};
+
+typedef struct a_struct named_struct_typedef;
+
+typedef struct { int x, y, z; } anon_struct_typedef;
+
+typedef struct {
+ int a, b, c;
+} *struct_ptr_typedef;
+
+enum an_enum {
+ AN_ENUM_VAL1 = 1,
+ AN_ENUM_VAL2 = 2,
+ AN_ENUM_VAL3 = 3,
+};
+
+typedef int int_typedef;
+
+typedef enum { TYPEDEF_ENUM_VAL1, TYPEDEF_ENUM_VAL2 } enum_typedef;
+
+typedef void *void_ptr_typedef;
+
+typedef int (*func_proto_typedef)(long);
+
+typedef char arr_typedef[20];
+
+struct core_reloc_type_based {
+ struct a_struct f1;
+ union a_union f2;
+ enum an_enum f3;
+ named_struct_typedef f4;
+ anon_struct_typedef f5;
+ struct_ptr_typedef f6;
+ int_typedef f7;
+ enum_typedef f8;
+ void_ptr_typedef f9;
+ func_proto_typedef f10;
+ arr_typedef f11;
+};
+
+/* no types in target */
+struct core_reloc_type_based___all_missing {
+};
+
+/* different type sizes, extra modifiers, anon vs named enums, etc */
+struct a_struct___diff_sz {
+ long x;
+ int y;
+ char z;
+};
+
+union a_union___diff_sz {
+ char yy;
+ char zz;
+};
+
+typedef struct a_struct___diff_sz named_struct_typedef___diff_sz;
+
+typedef struct { long xx, yy, zzz; } anon_struct_typedef___diff_sz;
+
+typedef struct {
+ char aa[1], bb[2], cc[3];
+} *struct_ptr_typedef___diff_sz;
+
+enum an_enum___diff_sz {
+ AN_ENUM_VAL1___diff_sz = 0x123412341234,
+ AN_ENUM_VAL2___diff_sz = 2,
+};
+
+typedef unsigned long int_typedef___diff_sz;
+
+typedef enum an_enum___diff_sz enum_typedef___diff_sz;
+
+typedef const void * const void_ptr_typedef___diff_sz;
+
+typedef int_typedef___diff_sz (*func_proto_typedef___diff_sz)(char);
+
+typedef int arr_typedef___diff_sz[2];
+
+struct core_reloc_type_based___diff_sz {
+ struct a_struct___diff_sz f1;
+ union a_union___diff_sz f2;
+ enum an_enum___diff_sz f3;
+ named_struct_typedef___diff_sz f4;
+ anon_struct_typedef___diff_sz f5;
+ struct_ptr_typedef___diff_sz f6;
+ int_typedef___diff_sz f7;
+ enum_typedef___diff_sz f8;
+ void_ptr_typedef___diff_sz f9;
+ func_proto_typedef___diff_sz f10;
+ arr_typedef___diff_sz f11;
+};
+
+/* incompatibilities between target and local types */
+union a_struct___incompat { /* union instead of struct */
+ int x;
+};
+
+struct a_union___incompat { /* struct instead of union */
+ int y;
+ int z;
+};
+
+/* typedef to union, not to struct */
+typedef union a_struct___incompat named_struct_typedef___incompat;
+
+/* typedef to void pointer, instead of struct */
+typedef void *anon_struct_typedef___incompat;
+
+/* extra pointer indirection */
+typedef struct {
+ int a, b, c;
+} **struct_ptr_typedef___incompat;
+
+/* typedef of a struct with int, instead of int */
+typedef struct { int x; } int_typedef___incompat;
+
+/* typedef to func_proto, instead of enum */
+typedef int (*enum_typedef___incompat)(void);
+
+/* pointer to char instead of void */
+typedef char *void_ptr_typedef___incompat;
+
+/* void return type instead of int */
+typedef void (*func_proto_typedef___incompat)(long);
+
+/* multi-dimensional array instead of a single-dimensional */
+typedef int arr_typedef___incompat[20][2];
+
+struct core_reloc_type_based___incompat {
+ union a_struct___incompat f1;
+ struct a_union___incompat f2;
+ /* the only valid one is enum, to check that something still succeeds */
+ enum an_enum f3;
+ named_struct_typedef___incompat f4;
+ anon_struct_typedef___incompat f5;
+ struct_ptr_typedef___incompat f6;
+ int_typedef___incompat f7;
+ enum_typedef___incompat f8;
+ void_ptr_typedef___incompat f9;
+ func_proto_typedef___incompat f10;
+ arr_typedef___incompat f11;
+};
+
+/* func_proto with incompatible signature */
+typedef void (*func_proto_typedef___fn_wrong_ret1)(long);
+typedef int * (*func_proto_typedef___fn_wrong_ret2)(long);
+typedef struct { int x; } int_struct_typedef;
+typedef int_struct_typedef (*func_proto_typedef___fn_wrong_ret3)(long);
+typedef int (*func_proto_typedef___fn_wrong_arg)(void *);
+typedef int (*func_proto_typedef___fn_wrong_arg_cnt1)(long, long);
+typedef int (*func_proto_typedef___fn_wrong_arg_cnt2)(void);
+
+struct core_reloc_type_based___fn_wrong_args {
+ /* one valid type to make sure relos still work */
+ struct a_struct f1;
+ func_proto_typedef___fn_wrong_ret1 f2;
+ func_proto_typedef___fn_wrong_ret2 f3;
+ func_proto_typedef___fn_wrong_ret3 f4;
+ func_proto_typedef___fn_wrong_arg f5;
+ func_proto_typedef___fn_wrong_arg_cnt1 f6;
+ func_proto_typedef___fn_wrong_arg_cnt2 f7;
+};
+
+/*
+ * TYPE ID MAPPING (LOCAL AND TARGET)
+ */
+struct core_reloc_type_id_output {
+ int local_anon_struct;
+ int local_anon_union;
+ int local_anon_enum;
+ int local_anon_func_proto_ptr;
+ int local_anon_void_ptr;
+ int local_anon_arr;
+
+ int local_struct;
+ int local_union;
+ int local_enum;
+ int local_int;
+ int local_struct_typedef;
+ int local_func_proto_typedef;
+ int local_arr_typedef;
+
+ int targ_struct;
+ int targ_union;
+ int targ_enum;
+ int targ_int;
+ int targ_struct_typedef;
+ int targ_func_proto_typedef;
+ int targ_arr_typedef;
+};
+
+struct core_reloc_type_id {
+ struct a_struct f1;
+ union a_union f2;
+ enum an_enum f3;
+ named_struct_typedef f4;
+ func_proto_typedef f5;
+ arr_typedef f6;
+};
+
+struct core_reloc_type_id___missing_targets {
+ /* nothing */
+};
+
+/*
+ * ENUMERATOR VALUE EXISTENCE AND VALUE RELOCATION
+ */
+struct core_reloc_enumval_output {
+ bool named_val1_exists;
+ bool named_val2_exists;
+ bool named_val3_exists;
+ bool anon_val1_exists;
+ bool anon_val2_exists;
+ bool anon_val3_exists;
+
+ int named_val1;
+ int named_val2;
+ int anon_val1;
+ int anon_val2;
+};
+
+enum named_enum {
+ NAMED_ENUM_VAL1 = 1,
+ NAMED_ENUM_VAL2 = 2,
+ NAMED_ENUM_VAL3 = 3,
+};
+
+typedef enum {
+ ANON_ENUM_VAL1 = 0x10,
+ ANON_ENUM_VAL2 = 0x20,
+ ANON_ENUM_VAL3 = 0x30,
+} anon_enum;
+
+struct core_reloc_enumval {
+ enum named_enum f1;
+ anon_enum f2;
+};
+
+/* differing enumerator values */
+enum named_enum___diff {
+ NAMED_ENUM_VAL1___diff = 101,
+ NAMED_ENUM_VAL2___diff = 202,
+ NAMED_ENUM_VAL3___diff = 303,
+};
+
+typedef enum {
+ ANON_ENUM_VAL1___diff = 0x11,
+ ANON_ENUM_VAL2___diff = 0x22,
+ ANON_ENUM_VAL3___diff = 0x33,
+} anon_enum___diff;
+
+struct core_reloc_enumval___diff {
+ enum named_enum___diff f1;
+ anon_enum___diff f2;
+};
+
+/* missing (optional) third enum value */
+enum named_enum___val3_missing {
+ NAMED_ENUM_VAL1___val3_missing = 111,
+ NAMED_ENUM_VAL2___val3_missing = 222,
+};
+
+typedef enum {
+ ANON_ENUM_VAL1___val3_missing = 0x111,
+ ANON_ENUM_VAL2___val3_missing = 0x222,
+} anon_enum___val3_missing;
+
+struct core_reloc_enumval___val3_missing {
+ enum named_enum___val3_missing f1;
+ anon_enum___val3_missing f2;
+};
+
+/* missing (mandatory) second enum value, should fail */
+enum named_enum___err_missing {
+ NAMED_ENUM_VAL1___err_missing = 1,
+ NAMED_ENUM_VAL3___err_missing = 3,
+};
+
+typedef enum {
+ ANON_ENUM_VAL1___err_missing = 0x111,
+ ANON_ENUM_VAL3___err_missing = 0x222,
+} anon_enum___err_missing;
+
+struct core_reloc_enumval___err_missing {
+ enum named_enum___err_missing f1;
+ anon_enum___err_missing f2;
+};
diff --git a/tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c b/tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c
index 98e1efe..49a84a3 100644
--- a/tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c
+++ b/tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c
@@ -1,8 +1,10 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2019 Facebook */
#include <linux/stddef.h>
+#include <linux/if_ether.h>
#include <linux/ipv6.h>
#include <linux/bpf.h>
+#include <linux/tcp.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
#include <bpf/bpf_tracing.h>
@@ -151,4 +153,29 @@ int new_get_constant(long val)
test_get_constant = 1;
return test_get_constant; /* original get_constant() returns val - 122 */
}
+
+__u64 test_pkt_write_access_subprog = 0;
+SEC("freplace/test_pkt_write_access_subprog")
+int new_test_pkt_write_access_subprog(struct __sk_buff *skb, __u32 off)
+{
+
+ void *data = (void *)(long)skb->data;
+ void *data_end = (void *)(long)skb->data_end;
+ struct tcphdr *tcp;
+
+ if (off > sizeof(struct ethhdr) + sizeof(struct ipv6hdr))
+ return -1;
+
+ tcp = data + off;
+ if (tcp + 1 > data_end)
+ return -1;
+
+ /* make modifications to the packet data */
+ tcp->check++;
+ tcp->syn = 0;
+
+ test_pkt_write_access_subprog = 1;
+ return 0;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/freplace_attach_probe.c b/tools/testing/selftests/bpf/progs/freplace_attach_probe.c
new file mode 100644
index 0000000..bb2a77c
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/freplace_attach_probe.c
@@ -0,0 +1,40 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+
+#include <linux/ptrace.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+#define VAR_NUM 2
+
+struct hmap_elem {
+ struct bpf_spin_lock lock;
+ int var[VAR_NUM];
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, struct hmap_elem);
+} hash_map SEC(".maps");
+
+SEC("freplace/handle_kprobe")
+int new_handle_kprobe(struct pt_regs *ctx)
+{
+ struct hmap_elem zero = {}, *val;
+ int key = 0;
+
+ val = bpf_map_lookup_elem(&hash_map, &key);
+ if (!val)
+ return 1;
+ /* spin_lock in hash map */
+ bpf_spin_lock(&val->lock);
+ val->var[0] = 99;
+ bpf_spin_unlock(&val->lock);
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/freplace_cls_redirect.c b/tools/testing/selftests/bpf/progs/freplace_cls_redirect.c
new file mode 100644
index 0000000..68a5a9d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/freplace_cls_redirect.c
@@ -0,0 +1,34 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+
+#include <linux/stddef.h>
+#include <linux/bpf.h>
+#include <linux/pkt_cls.h>
+#include <bpf/bpf_endian.h>
+#include <bpf/bpf_helpers.h>
+
+struct bpf_map_def SEC("maps") sock_map = {
+ .type = BPF_MAP_TYPE_SOCKMAP,
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .max_entries = 2,
+};
+
+SEC("freplace/cls_redirect")
+int freplace_cls_redirect_test(struct __sk_buff *skb)
+{
+ int ret = 0;
+ const int zero = 0;
+ struct bpf_sock *sk;
+
+ sk = bpf_map_lookup_elem(&sock_map, &zero);
+ if (!sk)
+ return TC_ACT_SHOT;
+
+ ret = bpf_map_update_elem(&sock_map, &zero, sk, 0);
+ bpf_sk_release(sk);
+
+ return ret == 0 ? TC_ACT_OK : TC_ACT_SHOT;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/freplace_connect_v4_prog.c b/tools/testing/selftests/bpf/progs/freplace_connect_v4_prog.c
new file mode 100644
index 0000000..544e5ac
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/freplace_connect_v4_prog.c
@@ -0,0 +1,19 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+
+#include <linux/stddef.h>
+#include <linux/ipv6.h>
+#include <linux/bpf.h>
+#include <linux/in.h>
+#include <sys/socket.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+SEC("freplace/connect_v4_prog")
+int new_connect_v4_prog(struct bpf_sock_addr *ctx)
+{
+ // return value thats in invalid range
+ return 255;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/local_storage.c b/tools/testing/selftests/bpf/progs/local_storage.c
new file mode 100644
index 0000000..0758ba2
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/local_storage.c
@@ -0,0 +1,140 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright 2020 Google LLC.
+ */
+
+#include <errno.h>
+#include <linux/bpf.h>
+#include <stdbool.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+#define DUMMY_STORAGE_VALUE 0xdeadbeef
+
+int monitored_pid = 0;
+int inode_storage_result = -1;
+int sk_storage_result = -1;
+
+struct dummy_storage {
+ __u32 value;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_INODE_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, struct dummy_storage);
+} inode_storage_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC | BPF_F_CLONE);
+ __type(key, int);
+ __type(value, struct dummy_storage);
+} sk_storage_map SEC(".maps");
+
+/* TODO Use vmlinux.h once BTF pruning for embedded types is fixed.
+ */
+struct sock {} __attribute__((preserve_access_index));
+struct sockaddr {} __attribute__((preserve_access_index));
+struct socket {
+ struct sock *sk;
+} __attribute__((preserve_access_index));
+
+struct inode {} __attribute__((preserve_access_index));
+struct dentry {
+ struct inode *d_inode;
+} __attribute__((preserve_access_index));
+struct file {
+ struct inode *f_inode;
+} __attribute__((preserve_access_index));
+
+
+SEC("lsm/inode_unlink")
+int BPF_PROG(unlink_hook, struct inode *dir, struct dentry *victim)
+{
+ __u32 pid = bpf_get_current_pid_tgid() >> 32;
+ struct dummy_storage *storage;
+
+ if (pid != monitored_pid)
+ return 0;
+
+ storage = bpf_inode_storage_get(&inode_storage_map, victim->d_inode, 0,
+ BPF_SK_STORAGE_GET_F_CREATE);
+ if (!storage)
+ return 0;
+
+ if (storage->value == DUMMY_STORAGE_VALUE)
+ inode_storage_result = -1;
+
+ inode_storage_result =
+ bpf_inode_storage_delete(&inode_storage_map, victim->d_inode);
+
+ return 0;
+}
+
+SEC("lsm/socket_bind")
+int BPF_PROG(socket_bind, struct socket *sock, struct sockaddr *address,
+ int addrlen)
+{
+ __u32 pid = bpf_get_current_pid_tgid() >> 32;
+ struct dummy_storage *storage;
+
+ if (pid != monitored_pid)
+ return 0;
+
+ storage = bpf_sk_storage_get(&sk_storage_map, sock->sk, 0,
+ BPF_SK_STORAGE_GET_F_CREATE);
+ if (!storage)
+ return 0;
+
+ if (storage->value == DUMMY_STORAGE_VALUE)
+ sk_storage_result = -1;
+
+ sk_storage_result = bpf_sk_storage_delete(&sk_storage_map, sock->sk);
+ return 0;
+}
+
+SEC("lsm/socket_post_create")
+int BPF_PROG(socket_post_create, struct socket *sock, int family, int type,
+ int protocol, int kern)
+{
+ __u32 pid = bpf_get_current_pid_tgid() >> 32;
+ struct dummy_storage *storage;
+
+ if (pid != monitored_pid)
+ return 0;
+
+ storage = bpf_sk_storage_get(&sk_storage_map, sock->sk, 0,
+ BPF_SK_STORAGE_GET_F_CREATE);
+ if (!storage)
+ return 0;
+
+ storage->value = DUMMY_STORAGE_VALUE;
+
+ return 0;
+}
+
+SEC("lsm/file_open")
+int BPF_PROG(file_open, struct file *file)
+{
+ __u32 pid = bpf_get_current_pid_tgid() >> 32;
+ struct dummy_storage *storage;
+
+ if (pid != monitored_pid)
+ return 0;
+
+ if (!file->f_inode)
+ return 0;
+
+ storage = bpf_inode_storage_get(&inode_storage_map, file->f_inode, 0,
+ BPF_LOCAL_STORAGE_GET_F_CREATE);
+ if (!storage)
+ return 0;
+
+ storage->value = DUMMY_STORAGE_VALUE;
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/lsm.c b/tools/testing/selftests/bpf/progs/lsm.c
index b4598d4..ff4d343 100644
--- a/tools/testing/selftests/bpf/progs/lsm.c
+++ b/tools/testing/selftests/bpf/progs/lsm.c
@@ -9,6 +9,27 @@
#include <bpf/bpf_tracing.h>
#include <errno.h>
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u64);
+} array SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u64);
+} hash SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_LRU_HASH);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u64);
+} lru_hash SEC(".maps");
+
char _license[] SEC("license") = "GPL";
int monitored_pid = 0;
@@ -36,13 +57,54 @@ int BPF_PROG(test_int_hook, struct vm_area_struct *vma,
return ret;
}
-SEC("lsm/bprm_committed_creds")
+SEC("lsm.s/bprm_committed_creds")
int BPF_PROG(test_void_hook, struct linux_binprm *bprm)
{
__u32 pid = bpf_get_current_pid_tgid() >> 32;
+ char args[64];
+ __u32 key = 0;
+ __u64 *value;
if (monitored_pid == pid)
bprm_count++;
+ bpf_copy_from_user(args, sizeof(args), (void *)bprm->vma->vm_mm->arg_start);
+ bpf_copy_from_user(args, sizeof(args), (void *)bprm->mm->arg_start);
+
+ value = bpf_map_lookup_elem(&array, &key);
+ if (value)
+ *value = 0;
+ value = bpf_map_lookup_elem(&hash, &key);
+ if (value)
+ *value = 0;
+ value = bpf_map_lookup_elem(&lru_hash, &key);
+ if (value)
+ *value = 0;
+
+ return 0;
+}
+SEC("lsm/task_free") /* lsm/ is ok, lsm.s/ fails */
+int BPF_PROG(test_task_free, struct task_struct *task)
+{
+ return 0;
+}
+
+int copy_test = 0;
+
+SEC("fentry.s/__x64_sys_setdomainname")
+int BPF_PROG(test_sys_setdomainname, struct pt_regs *regs)
+{
+ void *ptr = (void *)PT_REGS_PARM1(regs);
+ int len = PT_REGS_PARM2(regs);
+ int buf = 0;
+ long ret;
+
+ ret = bpf_copy_from_user(&buf, sizeof(buf), ptr);
+ if (len == -2 && ret == 0 && buf == 1234)
+ copy_test++;
+ if (len == -3 && ret == -EFAULT)
+ copy_test++;
+ if (len == -4 && ret == -EFAULT)
+ copy_test++;
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/map_ptr_kern.c b/tools/testing/selftests/bpf/progs/map_ptr_kern.c
index 473665c..982a2d8 100644
--- a/tools/testing/selftests/bpf/progs/map_ptr_kern.c
+++ b/tools/testing/selftests/bpf/progs/map_ptr_kern.c
@@ -589,7 +589,7 @@ static inline int check_stack(void)
return 1;
}
-struct bpf_sk_storage_map {
+struct bpf_local_storage_map {
struct bpf_map map;
} __attribute__((preserve_access_index));
@@ -602,8 +602,8 @@ struct {
static inline int check_sk_storage(void)
{
- struct bpf_sk_storage_map *sk_storage =
- (struct bpf_sk_storage_map *)&m_sk_storage;
+ struct bpf_local_storage_map *sk_storage =
+ (struct bpf_local_storage_map *)&m_sk_storage;
struct bpf_map *map = (struct bpf_map *)&m_sk_storage;
VERIFY(check(&sk_storage->map, map, sizeof(__u32), sizeof(__u32), 0));
diff --git a/tools/testing/selftests/bpf/progs/test_btf_map_in_map.c b/tools/testing/selftests/bpf/progs/test_btf_map_in_map.c
index e509379..193fe01 100644
--- a/tools/testing/selftests/bpf/progs/test_btf_map_in_map.c
+++ b/tools/testing/selftests/bpf/progs/test_btf_map_in_map.c
@@ -11,6 +11,13 @@ struct inner_map {
} inner_map1 SEC(".maps"),
inner_map2 SEC(".maps");
+struct inner_map_sz2 {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 2);
+ __type(key, int);
+ __type(value, int);
+} inner_map_sz2 SEC(".maps");
+
struct outer_arr {
__uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
__uint(max_entries, 3);
@@ -50,6 +57,30 @@ struct outer_hash {
},
};
+struct sockarr_sz1 {
+ __uint(type, BPF_MAP_TYPE_REUSEPORT_SOCKARRAY);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, int);
+} sockarr_sz1 SEC(".maps");
+
+struct sockarr_sz2 {
+ __uint(type, BPF_MAP_TYPE_REUSEPORT_SOCKARRAY);
+ __uint(max_entries, 2);
+ __type(key, int);
+ __type(value, int);
+} sockarr_sz2 SEC(".maps");
+
+struct outer_sockarr_sz1 {
+ __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
+ __uint(max_entries, 1);
+ __uint(key_size, sizeof(int));
+ __uint(value_size, sizeof(int));
+ __array(values, struct sockarr_sz1);
+} outer_sockarr SEC(".maps") = {
+ .values = { (void *)&sockarr_sz1 },
+};
+
int input = 0;
SEC("raw_tp/sys_enter")
diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_enumval.c b/tools/testing/selftests/bpf/progs/test_core_reloc_enumval.c
new file mode 100644
index 0000000..e7ef3da
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_core_reloc_enumval.c
@@ -0,0 +1,72 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+
+#include <linux/bpf.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+ char in[256];
+ char out[256];
+ bool skip;
+} data = {};
+
+enum named_enum {
+ NAMED_ENUM_VAL1 = 1,
+ NAMED_ENUM_VAL2 = 2,
+ NAMED_ENUM_VAL3 = 3,
+};
+
+typedef enum {
+ ANON_ENUM_VAL1 = 0x10,
+ ANON_ENUM_VAL2 = 0x20,
+ ANON_ENUM_VAL3 = 0x30,
+} anon_enum;
+
+struct core_reloc_enumval_output {
+ bool named_val1_exists;
+ bool named_val2_exists;
+ bool named_val3_exists;
+ bool anon_val1_exists;
+ bool anon_val2_exists;
+ bool anon_val3_exists;
+
+ int named_val1;
+ int named_val2;
+ int anon_val1;
+ int anon_val2;
+};
+
+SEC("raw_tracepoint/sys_enter")
+int test_core_enumval(void *ctx)
+{
+#if __has_builtin(__builtin_preserve_enum_value)
+ struct core_reloc_enumval_output *out = (void *)&data.out;
+ enum named_enum named = 0;
+ anon_enum anon = 0;
+
+ out->named_val1_exists = bpf_core_enum_value_exists(named, NAMED_ENUM_VAL1);
+ out->named_val2_exists = bpf_core_enum_value_exists(enum named_enum, NAMED_ENUM_VAL2);
+ out->named_val3_exists = bpf_core_enum_value_exists(enum named_enum, NAMED_ENUM_VAL3);
+
+ out->anon_val1_exists = bpf_core_enum_value_exists(anon, ANON_ENUM_VAL1);
+ out->anon_val2_exists = bpf_core_enum_value_exists(anon_enum, ANON_ENUM_VAL2);
+ out->anon_val3_exists = bpf_core_enum_value_exists(anon_enum, ANON_ENUM_VAL3);
+
+ out->named_val1 = bpf_core_enum_value(named, NAMED_ENUM_VAL1);
+ out->named_val2 = bpf_core_enum_value(named, NAMED_ENUM_VAL2);
+ /* NAMED_ENUM_VAL3 value is optional */
+
+ out->anon_val1 = bpf_core_enum_value(anon, ANON_ENUM_VAL1);
+ out->anon_val2 = bpf_core_enum_value(anon, ANON_ENUM_VAL2);
+ /* ANON_ENUM_VAL3 value is optional */
+#else
+ data.skip = true;
+#endif
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_kernel.c b/tools/testing/selftests/bpf/progs/test_core_reloc_kernel.c
index aba928f..145028b 100644
--- a/tools/testing/selftests/bpf/progs/test_core_reloc_kernel.c
+++ b/tools/testing/selftests/bpf/progs/test_core_reloc_kernel.c
@@ -3,6 +3,7 @@
#include <linux/bpf.h>
#include <stdint.h>
+#include <stdbool.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_core_read.h>
@@ -11,6 +12,7 @@ char _license[] SEC("license") = "GPL";
struct {
char in[256];
char out[256];
+ bool skip;
uint64_t my_pid_tgid;
} data = {};
diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_type_based.c b/tools/testing/selftests/bpf/progs/test_core_reloc_type_based.c
new file mode 100644
index 0000000..fb60f81
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_core_reloc_type_based.c
@@ -0,0 +1,110 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+
+#include <linux/bpf.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+ char in[256];
+ char out[256];
+ bool skip;
+} data = {};
+
+struct a_struct {
+ int x;
+};
+
+union a_union {
+ int y;
+ int z;
+};
+
+typedef struct a_struct named_struct_typedef;
+
+typedef struct { int x, y, z; } anon_struct_typedef;
+
+typedef struct {
+ int a, b, c;
+} *struct_ptr_typedef;
+
+enum an_enum {
+ AN_ENUM_VAL1 = 1,
+ AN_ENUM_VAL2 = 2,
+ AN_ENUM_VAL3 = 3,
+};
+
+typedef int int_typedef;
+
+typedef enum { TYPEDEF_ENUM_VAL1, TYPEDEF_ENUM_VAL2 } enum_typedef;
+
+typedef void *void_ptr_typedef;
+
+typedef int (*func_proto_typedef)(long);
+
+typedef char arr_typedef[20];
+
+struct core_reloc_type_based_output {
+ bool struct_exists;
+ bool union_exists;
+ bool enum_exists;
+ bool typedef_named_struct_exists;
+ bool typedef_anon_struct_exists;
+ bool typedef_struct_ptr_exists;
+ bool typedef_int_exists;
+ bool typedef_enum_exists;
+ bool typedef_void_ptr_exists;
+ bool typedef_func_proto_exists;
+ bool typedef_arr_exists;
+
+ int struct_sz;
+ int union_sz;
+ int enum_sz;
+ int typedef_named_struct_sz;
+ int typedef_anon_struct_sz;
+ int typedef_struct_ptr_sz;
+ int typedef_int_sz;
+ int typedef_enum_sz;
+ int typedef_void_ptr_sz;
+ int typedef_func_proto_sz;
+ int typedef_arr_sz;
+};
+
+SEC("raw_tracepoint/sys_enter")
+int test_core_type_based(void *ctx)
+{
+#if __has_builtin(__builtin_preserve_type_info)
+ struct core_reloc_type_based_output *out = (void *)&data.out;
+
+ out->struct_exists = bpf_core_type_exists(struct a_struct);
+ out->union_exists = bpf_core_type_exists(union a_union);
+ out->enum_exists = bpf_core_type_exists(enum an_enum);
+ out->typedef_named_struct_exists = bpf_core_type_exists(named_struct_typedef);
+ out->typedef_anon_struct_exists = bpf_core_type_exists(anon_struct_typedef);
+ out->typedef_struct_ptr_exists = bpf_core_type_exists(struct_ptr_typedef);
+ out->typedef_int_exists = bpf_core_type_exists(int_typedef);
+ out->typedef_enum_exists = bpf_core_type_exists(enum_typedef);
+ out->typedef_void_ptr_exists = bpf_core_type_exists(void_ptr_typedef);
+ out->typedef_func_proto_exists = bpf_core_type_exists(func_proto_typedef);
+ out->typedef_arr_exists = bpf_core_type_exists(arr_typedef);
+
+ out->struct_sz = bpf_core_type_size(struct a_struct);
+ out->union_sz = bpf_core_type_size(union a_union);
+ out->enum_sz = bpf_core_type_size(enum an_enum);
+ out->typedef_named_struct_sz = bpf_core_type_size(named_struct_typedef);
+ out->typedef_anon_struct_sz = bpf_core_type_size(anon_struct_typedef);
+ out->typedef_struct_ptr_sz = bpf_core_type_size(struct_ptr_typedef);
+ out->typedef_int_sz = bpf_core_type_size(int_typedef);
+ out->typedef_enum_sz = bpf_core_type_size(enum_typedef);
+ out->typedef_void_ptr_sz = bpf_core_type_size(void_ptr_typedef);
+ out->typedef_func_proto_sz = bpf_core_type_size(func_proto_typedef);
+ out->typedef_arr_sz = bpf_core_type_size(arr_typedef);
+#else
+ data.skip = true;
+#endif
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_type_id.c b/tools/testing/selftests/bpf/progs/test_core_reloc_type_id.c
new file mode 100644
index 0000000..22aba3f
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_core_reloc_type_id.c
@@ -0,0 +1,115 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+
+#include <linux/bpf.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+ char in[256];
+ char out[256];
+ bool skip;
+} data = {};
+
+/* some types are shared with test_core_reloc_type_based.c */
+struct a_struct {
+ int x;
+};
+
+union a_union {
+ int y;
+ int z;
+};
+
+enum an_enum {
+ AN_ENUM_VAL1 = 1,
+ AN_ENUM_VAL2 = 2,
+ AN_ENUM_VAL3 = 3,
+};
+
+typedef struct a_struct named_struct_typedef;
+
+typedef int (*func_proto_typedef)(long);
+
+typedef char arr_typedef[20];
+
+struct core_reloc_type_id_output {
+ int local_anon_struct;
+ int local_anon_union;
+ int local_anon_enum;
+ int local_anon_func_proto_ptr;
+ int local_anon_void_ptr;
+ int local_anon_arr;
+
+ int local_struct;
+ int local_union;
+ int local_enum;
+ int local_int;
+ int local_struct_typedef;
+ int local_func_proto_typedef;
+ int local_arr_typedef;
+
+ int targ_struct;
+ int targ_union;
+ int targ_enum;
+ int targ_int;
+ int targ_struct_typedef;
+ int targ_func_proto_typedef;
+ int targ_arr_typedef;
+};
+
+/* preserve types even if Clang doesn't support built-in */
+struct a_struct t1 = {};
+union a_union t2 = {};
+enum an_enum t3 = 0;
+named_struct_typedef t4 = {};
+func_proto_typedef t5 = 0;
+arr_typedef t6 = {};
+
+SEC("raw_tracepoint/sys_enter")
+int test_core_type_id(void *ctx)
+{
+ /* We use __builtin_btf_type_id() in this tests, but up until the time
+ * __builtin_preserve_type_info() was added it contained a bug that
+ * would make this test fail. The bug was fixed ([0]) with addition of
+ * __builtin_preserve_type_info(), though, so that's what we are using
+ * to detect whether this test has to be executed, however strange
+ * that might look like.
+ *
+ * [0] https://reviews.llvm.org/D85174
+ */
+#if __has_builtin(__builtin_preserve_type_info)
+ struct core_reloc_type_id_output *out = (void *)&data.out;
+
+ out->local_anon_struct = bpf_core_type_id_local(struct { int marker_field; });
+ out->local_anon_union = bpf_core_type_id_local(union { int marker_field; });
+ out->local_anon_enum = bpf_core_type_id_local(enum { MARKER_ENUM_VAL = 123 });
+ out->local_anon_func_proto_ptr = bpf_core_type_id_local(_Bool(*)(int));
+ out->local_anon_void_ptr = bpf_core_type_id_local(void *);
+ out->local_anon_arr = bpf_core_type_id_local(_Bool[47]);
+
+ out->local_struct = bpf_core_type_id_local(struct a_struct);
+ out->local_union = bpf_core_type_id_local(union a_union);
+ out->local_enum = bpf_core_type_id_local(enum an_enum);
+ out->local_int = bpf_core_type_id_local(int);
+ out->local_struct_typedef = bpf_core_type_id_local(named_struct_typedef);
+ out->local_func_proto_typedef = bpf_core_type_id_local(func_proto_typedef);
+ out->local_arr_typedef = bpf_core_type_id_local(arr_typedef);
+
+ out->targ_struct = bpf_core_type_id_kernel(struct a_struct);
+ out->targ_union = bpf_core_type_id_kernel(union a_union);
+ out->targ_enum = bpf_core_type_id_kernel(enum an_enum);
+ out->targ_int = bpf_core_type_id_kernel(int);
+ out->targ_struct_typedef = bpf_core_type_id_kernel(named_struct_typedef);
+ out->targ_func_proto_typedef = bpf_core_type_id_kernel(func_proto_typedef);
+ out->targ_arr_typedef = bpf_core_type_id_kernel(arr_typedef);
+#else
+ data.skip = true;
+#endif
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_d_path.c b/tools/testing/selftests/bpf/progs/test_d_path.c
new file mode 100644
index 0000000..61f0078
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_d_path.c
@@ -0,0 +1,58 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+#define MAX_PATH_LEN 128
+#define MAX_FILES 7
+
+pid_t my_pid = 0;
+__u32 cnt_stat = 0;
+__u32 cnt_close = 0;
+char paths_stat[MAX_FILES][MAX_PATH_LEN] = {};
+char paths_close[MAX_FILES][MAX_PATH_LEN] = {};
+int rets_stat[MAX_FILES] = {};
+int rets_close[MAX_FILES] = {};
+
+SEC("fentry/vfs_getattr")
+int BPF_PROG(prog_stat, struct path *path, struct kstat *stat,
+ __u32 request_mask, unsigned int query_flags)
+{
+ pid_t pid = bpf_get_current_pid_tgid() >> 32;
+ __u32 cnt = cnt_stat;
+ int ret;
+
+ if (pid != my_pid)
+ return 0;
+
+ if (cnt >= MAX_FILES)
+ return 0;
+ ret = bpf_d_path(path, paths_stat[cnt], MAX_PATH_LEN);
+
+ rets_stat[cnt] = ret;
+ cnt_stat++;
+ return 0;
+}
+
+SEC("fentry/filp_close")
+int BPF_PROG(prog_close, struct file *file, void *id)
+{
+ pid_t pid = bpf_get_current_pid_tgid() >> 32;
+ __u32 cnt = cnt_close;
+ int ret;
+
+ if (pid != my_pid)
+ return 0;
+
+ if (cnt >= MAX_FILES)
+ return 0;
+ ret = bpf_d_path(&file->f_path,
+ paths_close[cnt], MAX_PATH_LEN);
+
+ rets_close[cnt] = ret;
+ cnt_close++;
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_misc_tcp_hdr_options.c b/tools/testing/selftests/bpf/progs/test_misc_tcp_hdr_options.c
new file mode 100644
index 0000000..3a216d1
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_misc_tcp_hdr_options.c
@@ -0,0 +1,325 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+
+#include <stddef.h>
+#include <errno.h>
+#include <stdbool.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <linux/ipv6.h>
+#include <linux/tcp.h>
+#include <linux/socket.h>
+#include <linux/bpf.h>
+#include <linux/types.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+#define BPF_PROG_TEST_TCP_HDR_OPTIONS
+#include "test_tcp_hdr_options.h"
+
+__u16 last_addr16_n = __bpf_htons(0xeB9F);
+__u16 active_lport_n = 0;
+__u16 active_lport_h = 0;
+__u16 passive_lport_n = 0;
+__u16 passive_lport_h = 0;
+
+/* options received at passive side */
+unsigned int nr_pure_ack = 0;
+unsigned int nr_data = 0;
+unsigned int nr_syn = 0;
+unsigned int nr_fin = 0;
+
+/* Check the header received from the active side */
+static int __check_active_hdr_in(struct bpf_sock_ops *skops, bool check_syn)
+{
+ union {
+ struct tcphdr th;
+ struct ipv6hdr ip6;
+ struct tcp_exprm_opt exprm_opt;
+ struct tcp_opt reg_opt;
+ __u8 data[100]; /* IPv6 (40) + Max TCP hdr (60) */
+ } hdr = {};
+ __u64 load_flags = check_syn ? BPF_LOAD_HDR_OPT_TCP_SYN : 0;
+ struct tcphdr *pth;
+ int ret;
+
+ hdr.reg_opt.kind = 0xB9;
+
+ /* The option is 4 bytes long instead of 2 bytes */
+ ret = bpf_load_hdr_opt(skops, &hdr.reg_opt, 2, load_flags);
+ if (ret != -ENOSPC)
+ RET_CG_ERR(ret);
+
+ /* Test searching magic with regular kind */
+ hdr.reg_opt.len = 4;
+ ret = bpf_load_hdr_opt(skops, &hdr.reg_opt, sizeof(hdr.reg_opt),
+ load_flags);
+ if (ret != -EINVAL)
+ RET_CG_ERR(ret);
+
+ hdr.reg_opt.len = 0;
+ ret = bpf_load_hdr_opt(skops, &hdr.reg_opt, sizeof(hdr.reg_opt),
+ load_flags);
+ if (ret != 4 || hdr.reg_opt.len != 4 || hdr.reg_opt.kind != 0xB9 ||
+ hdr.reg_opt.data[0] != 0xfa || hdr.reg_opt.data[1] != 0xce)
+ RET_CG_ERR(ret);
+
+ /* Test searching experimental option with invalid kind length */
+ hdr.exprm_opt.kind = TCPOPT_EXP;
+ hdr.exprm_opt.len = 5;
+ hdr.exprm_opt.magic = 0;
+ ret = bpf_load_hdr_opt(skops, &hdr.exprm_opt, sizeof(hdr.exprm_opt),
+ load_flags);
+ if (ret != -EINVAL)
+ RET_CG_ERR(ret);
+
+ /* Test searching experimental option with 0 magic value */
+ hdr.exprm_opt.len = 4;
+ ret = bpf_load_hdr_opt(skops, &hdr.exprm_opt, sizeof(hdr.exprm_opt),
+ load_flags);
+ if (ret != -ENOMSG)
+ RET_CG_ERR(ret);
+
+ hdr.exprm_opt.magic = __bpf_htons(0xeB9F);
+ ret = bpf_load_hdr_opt(skops, &hdr.exprm_opt, sizeof(hdr.exprm_opt),
+ load_flags);
+ if (ret != 4 || hdr.exprm_opt.len != 4 ||
+ hdr.exprm_opt.kind != TCPOPT_EXP ||
+ hdr.exprm_opt.magic != __bpf_htons(0xeB9F))
+ RET_CG_ERR(ret);
+
+ if (!check_syn)
+ return CG_OK;
+
+ /* Test loading from skops->syn_skb if sk_state == TCP_NEW_SYN_RECV
+ *
+ * Test loading from tp->saved_syn for other sk_state.
+ */
+ ret = bpf_getsockopt(skops, SOL_TCP, TCP_BPF_SYN_IP, &hdr.ip6,
+ sizeof(hdr.ip6));
+ if (ret != -ENOSPC)
+ RET_CG_ERR(ret);
+
+ if (hdr.ip6.saddr.s6_addr16[7] != last_addr16_n ||
+ hdr.ip6.daddr.s6_addr16[7] != last_addr16_n)
+ RET_CG_ERR(0);
+
+ ret = bpf_getsockopt(skops, SOL_TCP, TCP_BPF_SYN_IP, &hdr, sizeof(hdr));
+ if (ret < 0)
+ RET_CG_ERR(ret);
+
+ pth = (struct tcphdr *)(&hdr.ip6 + 1);
+ if (pth->dest != passive_lport_n || pth->source != active_lport_n)
+ RET_CG_ERR(0);
+
+ ret = bpf_getsockopt(skops, SOL_TCP, TCP_BPF_SYN, &hdr, sizeof(hdr));
+ if (ret < 0)
+ RET_CG_ERR(ret);
+
+ if (hdr.th.dest != passive_lport_n || hdr.th.source != active_lport_n)
+ RET_CG_ERR(0);
+
+ return CG_OK;
+}
+
+static int check_active_syn_in(struct bpf_sock_ops *skops)
+{
+ return __check_active_hdr_in(skops, true);
+}
+
+static int check_active_hdr_in(struct bpf_sock_ops *skops)
+{
+ struct tcphdr *th;
+
+ if (__check_active_hdr_in(skops, false) == CG_ERR)
+ return CG_ERR;
+
+ th = skops->skb_data;
+ if (th + 1 > skops->skb_data_end)
+ RET_CG_ERR(0);
+
+ if (tcp_hdrlen(th) < skops->skb_len)
+ nr_data++;
+
+ if (th->fin)
+ nr_fin++;
+
+ if (th->ack && !th->fin && tcp_hdrlen(th) == skops->skb_len)
+ nr_pure_ack++;
+
+ return CG_OK;
+}
+
+static int active_opt_len(struct bpf_sock_ops *skops)
+{
+ int err;
+
+ /* Reserve more than enough to allow the -EEXIST test in
+ * the write_active_opt().
+ */
+ err = bpf_reserve_hdr_opt(skops, 12, 0);
+ if (err)
+ RET_CG_ERR(err);
+
+ return CG_OK;
+}
+
+static int write_active_opt(struct bpf_sock_ops *skops)
+{
+ struct tcp_exprm_opt exprm_opt = {};
+ struct tcp_opt win_scale_opt = {};
+ struct tcp_opt reg_opt = {};
+ struct tcphdr *th;
+ int err, ret;
+
+ exprm_opt.kind = TCPOPT_EXP;
+ exprm_opt.len = 4;
+ exprm_opt.magic = __bpf_htons(0xeB9F);
+
+ reg_opt.kind = 0xB9;
+ reg_opt.len = 4;
+ reg_opt.data[0] = 0xfa;
+ reg_opt.data[1] = 0xce;
+
+ win_scale_opt.kind = TCPOPT_WINDOW;
+
+ err = bpf_store_hdr_opt(skops, &exprm_opt, sizeof(exprm_opt), 0);
+ if (err)
+ RET_CG_ERR(err);
+
+ /* Store the same exprm option */
+ err = bpf_store_hdr_opt(skops, &exprm_opt, sizeof(exprm_opt), 0);
+ if (err != -EEXIST)
+ RET_CG_ERR(err);
+
+ err = bpf_store_hdr_opt(skops, ®_opt, sizeof(reg_opt), 0);
+ if (err)
+ RET_CG_ERR(err);
+ err = bpf_store_hdr_opt(skops, ®_opt, sizeof(reg_opt), 0);
+ if (err != -EEXIST)
+ RET_CG_ERR(err);
+
+ /* Check the option has been written and can be searched */
+ ret = bpf_load_hdr_opt(skops, &exprm_opt, sizeof(exprm_opt), 0);
+ if (ret != 4 || exprm_opt.len != 4 || exprm_opt.kind != TCPOPT_EXP ||
+ exprm_opt.magic != __bpf_htons(0xeB9F))
+ RET_CG_ERR(ret);
+
+ reg_opt.len = 0;
+ ret = bpf_load_hdr_opt(skops, ®_opt, sizeof(reg_opt), 0);
+ if (ret != 4 || reg_opt.len != 4 || reg_opt.kind != 0xB9 ||
+ reg_opt.data[0] != 0xfa || reg_opt.data[1] != 0xce)
+ RET_CG_ERR(ret);
+
+ th = skops->skb_data;
+ if (th + 1 > skops->skb_data_end)
+ RET_CG_ERR(0);
+
+ if (th->syn) {
+ active_lport_h = skops->local_port;
+ active_lport_n = th->source;
+
+ /* Search the win scale option written by kernel
+ * in the SYN packet.
+ */
+ ret = bpf_load_hdr_opt(skops, &win_scale_opt,
+ sizeof(win_scale_opt), 0);
+ if (ret != 3 || win_scale_opt.len != 3 ||
+ win_scale_opt.kind != TCPOPT_WINDOW)
+ RET_CG_ERR(ret);
+
+ /* Write the win scale option that kernel
+ * has already written.
+ */
+ err = bpf_store_hdr_opt(skops, &win_scale_opt,
+ sizeof(win_scale_opt), 0);
+ if (err != -EEXIST)
+ RET_CG_ERR(err);
+ }
+
+ return CG_OK;
+}
+
+static int handle_hdr_opt_len(struct bpf_sock_ops *skops)
+{
+ __u8 tcp_flags = skops_tcp_flags(skops);
+
+ if ((tcp_flags & TCPHDR_SYNACK) == TCPHDR_SYNACK)
+ /* Check the SYN from bpf_sock_ops_kern->syn_skb */
+ return check_active_syn_in(skops);
+
+ /* Passive side should have cleared the write hdr cb by now */
+ if (skops->local_port == passive_lport_h)
+ RET_CG_ERR(0);
+
+ return active_opt_len(skops);
+}
+
+static int handle_write_hdr_opt(struct bpf_sock_ops *skops)
+{
+ if (skops->local_port == passive_lport_h)
+ RET_CG_ERR(0);
+
+ return write_active_opt(skops);
+}
+
+static int handle_parse_hdr(struct bpf_sock_ops *skops)
+{
+ /* Passive side is not writing any non-standard/unknown
+ * option, so the active side should never be called.
+ */
+ if (skops->local_port == active_lport_h)
+ RET_CG_ERR(0);
+
+ return check_active_hdr_in(skops);
+}
+
+static int handle_passive_estab(struct bpf_sock_ops *skops)
+{
+ int err;
+
+ /* No more write hdr cb */
+ bpf_sock_ops_cb_flags_set(skops,
+ skops->bpf_sock_ops_cb_flags &
+ ~BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG);
+
+ /* Recheck the SYN but check the tp->saved_syn this time */
+ err = check_active_syn_in(skops);
+ if (err == CG_ERR)
+ return err;
+
+ nr_syn++;
+
+ /* The ack has header option written by the active side also */
+ return check_active_hdr_in(skops);
+}
+
+SEC("sockops/misc_estab")
+int misc_estab(struct bpf_sock_ops *skops)
+{
+ int true_val = 1;
+
+ switch (skops->op) {
+ case BPF_SOCK_OPS_TCP_LISTEN_CB:
+ passive_lport_h = skops->local_port;
+ passive_lport_n = __bpf_htons(passive_lport_h);
+ bpf_setsockopt(skops, SOL_TCP, TCP_SAVE_SYN,
+ &true_val, sizeof(true_val));
+ set_hdr_cb_flags(skops);
+ break;
+ case BPF_SOCK_OPS_TCP_CONNECT_CB:
+ set_hdr_cb_flags(skops);
+ break;
+ case BPF_SOCK_OPS_PARSE_HDR_OPT_CB:
+ return handle_parse_hdr(skops);
+ case BPF_SOCK_OPS_HDR_OPT_LEN_CB:
+ return handle_hdr_opt_len(skops);
+ case BPF_SOCK_OPS_WRITE_HDR_OPT_CB:
+ return handle_write_hdr_opt(skops);
+ case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB:
+ return handle_passive_estab(skops);
+ }
+
+ return CG_OK;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_pkt_access.c b/tools/testing/selftests/bpf/progs/test_pkt_access.c
index e72eba4..8520510 100644
--- a/tools/testing/selftests/bpf/progs/test_pkt_access.c
+++ b/tools/testing/selftests/bpf/progs/test_pkt_access.c
@@ -79,6 +79,24 @@ int get_skb_ifindex(int val, struct __sk_buff *skb, int var)
return skb->ifindex * val * var;
}
+__attribute__ ((noinline))
+int test_pkt_write_access_subprog(struct __sk_buff *skb, __u32 off)
+{
+ void *data = (void *)(long)skb->data;
+ void *data_end = (void *)(long)skb->data_end;
+ struct tcphdr *tcp = NULL;
+
+ if (off > sizeof(struct ethhdr) + sizeof(struct ipv6hdr))
+ return -1;
+
+ tcp = data + off;
+ if (tcp + 1 > data_end)
+ return -1;
+ /* make modification to the packet data */
+ tcp->check++;
+ return 0;
+}
+
SEC("classifier/test_pkt_access")
int test_pkt_access(struct __sk_buff *skb)
{
@@ -117,6 +135,8 @@ int test_pkt_access(struct __sk_buff *skb)
if (test_pkt_access_subprog3(3, skb) != skb->len * 3 * skb->ifindex)
return TC_ACT_SHOT;
if (tcp) {
+ if (test_pkt_write_access_subprog(skb, (void *)tcp - data))
+ return TC_ACT_SHOT;
if (((void *)(tcp) + 20) > data_end || proto != 6)
return TC_ACT_SHOT;
barrier(); /* to force ordering of checks */
diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_invalid_update.c b/tools/testing/selftests/bpf/progs/test_sockmap_invalid_update.c
new file mode 100644
index 0000000..02a59e2
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_sockmap_invalid_update.c
@@ -0,0 +1,23 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Cloudflare
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKMAP);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u64);
+} map SEC(".maps");
+
+SEC("sockops")
+int bpf_sockmap(struct bpf_sock_ops *skops)
+{
+ __u32 key = 0;
+
+ if (skops->sk)
+ bpf_map_update_elem(&map, &key, skops->sk, 0);
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_update.c b/tools/testing/selftests/bpf/progs/test_sockmap_update.c
new file mode 100644
index 0000000..9d0c9f2
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_sockmap_update.c
@@ -0,0 +1,48 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Cloudflare
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKMAP);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u64);
+} src SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKMAP);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u64);
+} dst_sock_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKHASH);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u64);
+} dst_sock_hash SEC(".maps");
+
+SEC("classifier/copy_sock_map")
+int copy_sock_map(void *ctx)
+{
+ struct bpf_sock *sk;
+ bool failed = false;
+ __u32 key = 0;
+
+ sk = bpf_map_lookup_elem(&src, &key);
+ if (!sk)
+ return SK_DROP;
+
+ if (bpf_map_update_elem(&dst_sock_map, &key, sk, 0))
+ failed = true;
+
+ if (bpf_map_update_elem(&dst_sock_hash, &key, sk, 0))
+ failed = true;
+
+ bpf_sk_release(sk);
+ return failed ? SK_DROP : SK_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_tcp_hdr_options.c b/tools/testing/selftests/bpf/progs/test_tcp_hdr_options.c
new file mode 100644
index 0000000..9197a23d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_tcp_hdr_options.c
@@ -0,0 +1,623 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+
+#include <stddef.h>
+#include <errno.h>
+#include <stdbool.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <linux/tcp.h>
+#include <linux/socket.h>
+#include <linux/bpf.h>
+#include <linux/types.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+#define BPF_PROG_TEST_TCP_HDR_OPTIONS
+#include "test_tcp_hdr_options.h"
+
+#ifndef sizeof_field
+#define sizeof_field(TYPE, MEMBER) sizeof((((TYPE *)0)->MEMBER))
+#endif
+
+__u8 test_kind = TCPOPT_EXP;
+__u16 test_magic = 0xeB9F;
+
+struct bpf_test_option passive_synack_out = {};
+struct bpf_test_option passive_fin_out = {};
+
+struct bpf_test_option passive_estab_in = {};
+struct bpf_test_option passive_fin_in = {};
+
+struct bpf_test_option active_syn_out = {};
+struct bpf_test_option active_fin_out = {};
+
+struct bpf_test_option active_estab_in = {};
+struct bpf_test_option active_fin_in = {};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, struct hdr_stg);
+} hdr_stg_map SEC(".maps");
+
+static bool skops_want_cookie(const struct bpf_sock_ops *skops)
+{
+ return skops->args[0] == BPF_WRITE_HDR_TCP_SYNACK_COOKIE;
+}
+
+static bool skops_current_mss(const struct bpf_sock_ops *skops)
+{
+ return skops->args[0] == BPF_WRITE_HDR_TCP_CURRENT_MSS;
+}
+
+static __u8 option_total_len(__u8 flags)
+{
+ __u8 i, len = 1; /* +1 for flags */
+
+ if (!flags)
+ return 0;
+
+ /* RESEND bit does not use a byte */
+ for (i = OPTION_RESEND + 1; i < __NR_OPTION_FLAGS; i++)
+ len += !!TEST_OPTION_FLAGS(flags, i);
+
+ if (test_kind == TCPOPT_EXP)
+ return len + TCP_BPF_EXPOPT_BASE_LEN;
+ else
+ return len + 2; /* +1 kind, +1 kind-len */
+}
+
+static void write_test_option(const struct bpf_test_option *test_opt,
+ __u8 *data)
+{
+ __u8 offset = 0;
+
+ data[offset++] = test_opt->flags;
+ if (TEST_OPTION_FLAGS(test_opt->flags, OPTION_MAX_DELACK_MS))
+ data[offset++] = test_opt->max_delack_ms;
+
+ if (TEST_OPTION_FLAGS(test_opt->flags, OPTION_RAND))
+ data[offset++] = test_opt->rand;
+}
+
+static int store_option(struct bpf_sock_ops *skops,
+ const struct bpf_test_option *test_opt)
+{
+ union {
+ struct tcp_exprm_opt exprm;
+ struct tcp_opt regular;
+ } write_opt;
+ int err;
+
+ if (test_kind == TCPOPT_EXP) {
+ write_opt.exprm.kind = TCPOPT_EXP;
+ write_opt.exprm.len = option_total_len(test_opt->flags);
+ write_opt.exprm.magic = __bpf_htons(test_magic);
+ write_opt.exprm.data32 = 0;
+ write_test_option(test_opt, write_opt.exprm.data);
+ err = bpf_store_hdr_opt(skops, &write_opt.exprm,
+ sizeof(write_opt.exprm), 0);
+ } else {
+ write_opt.regular.kind = test_kind;
+ write_opt.regular.len = option_total_len(test_opt->flags);
+ write_opt.regular.data32 = 0;
+ write_test_option(test_opt, write_opt.regular.data);
+ err = bpf_store_hdr_opt(skops, &write_opt.regular,
+ sizeof(write_opt.regular), 0);
+ }
+
+ if (err)
+ RET_CG_ERR(err);
+
+ return CG_OK;
+}
+
+static int parse_test_option(struct bpf_test_option *opt, const __u8 *start)
+{
+ opt->flags = *start++;
+
+ if (TEST_OPTION_FLAGS(opt->flags, OPTION_MAX_DELACK_MS))
+ opt->max_delack_ms = *start++;
+
+ if (TEST_OPTION_FLAGS(opt->flags, OPTION_RAND))
+ opt->rand = *start++;
+
+ return 0;
+}
+
+static int load_option(struct bpf_sock_ops *skops,
+ struct bpf_test_option *test_opt, bool from_syn)
+{
+ union {
+ struct tcp_exprm_opt exprm;
+ struct tcp_opt regular;
+ } search_opt;
+ int ret, load_flags = from_syn ? BPF_LOAD_HDR_OPT_TCP_SYN : 0;
+
+ if (test_kind == TCPOPT_EXP) {
+ search_opt.exprm.kind = TCPOPT_EXP;
+ search_opt.exprm.len = 4;
+ search_opt.exprm.magic = __bpf_htons(test_magic);
+ search_opt.exprm.data32 = 0;
+ ret = bpf_load_hdr_opt(skops, &search_opt.exprm,
+ sizeof(search_opt.exprm), load_flags);
+ if (ret < 0)
+ return ret;
+ return parse_test_option(test_opt, search_opt.exprm.data);
+ } else {
+ search_opt.regular.kind = test_kind;
+ search_opt.regular.len = 0;
+ search_opt.regular.data32 = 0;
+ ret = bpf_load_hdr_opt(skops, &search_opt.regular,
+ sizeof(search_opt.regular), load_flags);
+ if (ret < 0)
+ return ret;
+ return parse_test_option(test_opt, search_opt.regular.data);
+ }
+}
+
+static int synack_opt_len(struct bpf_sock_ops *skops)
+{
+ struct bpf_test_option test_opt = {};
+ __u8 optlen;
+ int err;
+
+ if (!passive_synack_out.flags)
+ return CG_OK;
+
+ err = load_option(skops, &test_opt, true);
+
+ /* bpf_test_option is not found */
+ if (err == -ENOMSG)
+ return CG_OK;
+
+ if (err)
+ RET_CG_ERR(err);
+
+ optlen = option_total_len(passive_synack_out.flags);
+ if (optlen) {
+ err = bpf_reserve_hdr_opt(skops, optlen, 0);
+ if (err)
+ RET_CG_ERR(err);
+ }
+
+ return CG_OK;
+}
+
+static int write_synack_opt(struct bpf_sock_ops *skops)
+{
+ struct bpf_test_option opt;
+
+ if (!passive_synack_out.flags)
+ /* We should not even be called since no header
+ * space has been reserved.
+ */
+ RET_CG_ERR(0);
+
+ opt = passive_synack_out;
+ if (skops_want_cookie(skops))
+ SET_OPTION_FLAGS(opt.flags, OPTION_RESEND);
+
+ return store_option(skops, &opt);
+}
+
+static int syn_opt_len(struct bpf_sock_ops *skops)
+{
+ __u8 optlen;
+ int err;
+
+ if (!active_syn_out.flags)
+ return CG_OK;
+
+ optlen = option_total_len(active_syn_out.flags);
+ if (optlen) {
+ err = bpf_reserve_hdr_opt(skops, optlen, 0);
+ if (err)
+ RET_CG_ERR(err);
+ }
+
+ return CG_OK;
+}
+
+static int write_syn_opt(struct bpf_sock_ops *skops)
+{
+ if (!active_syn_out.flags)
+ RET_CG_ERR(0);
+
+ return store_option(skops, &active_syn_out);
+}
+
+static int fin_opt_len(struct bpf_sock_ops *skops)
+{
+ struct bpf_test_option *opt;
+ struct hdr_stg *hdr_stg;
+ __u8 optlen;
+ int err;
+
+ if (!skops->sk)
+ RET_CG_ERR(0);
+
+ hdr_stg = bpf_sk_storage_get(&hdr_stg_map, skops->sk, NULL, 0);
+ if (!hdr_stg)
+ RET_CG_ERR(0);
+
+ if (hdr_stg->active)
+ opt = &active_fin_out;
+ else
+ opt = &passive_fin_out;
+
+ optlen = option_total_len(opt->flags);
+ if (optlen) {
+ err = bpf_reserve_hdr_opt(skops, optlen, 0);
+ if (err)
+ RET_CG_ERR(err);
+ }
+
+ return CG_OK;
+}
+
+static int write_fin_opt(struct bpf_sock_ops *skops)
+{
+ struct bpf_test_option *opt;
+ struct hdr_stg *hdr_stg;
+
+ if (!skops->sk)
+ RET_CG_ERR(0);
+
+ hdr_stg = bpf_sk_storage_get(&hdr_stg_map, skops->sk, NULL, 0);
+ if (!hdr_stg)
+ RET_CG_ERR(0);
+
+ if (hdr_stg->active)
+ opt = &active_fin_out;
+ else
+ opt = &passive_fin_out;
+
+ if (!opt->flags)
+ RET_CG_ERR(0);
+
+ return store_option(skops, opt);
+}
+
+static int resend_in_ack(struct bpf_sock_ops *skops)
+{
+ struct hdr_stg *hdr_stg;
+
+ if (!skops->sk)
+ return -1;
+
+ hdr_stg = bpf_sk_storage_get(&hdr_stg_map, skops->sk, NULL, 0);
+ if (!hdr_stg)
+ return -1;
+
+ return !!hdr_stg->resend_syn;
+}
+
+static int nodata_opt_len(struct bpf_sock_ops *skops)
+{
+ int resend;
+
+ resend = resend_in_ack(skops);
+ if (resend < 0)
+ RET_CG_ERR(0);
+
+ if (resend)
+ return syn_opt_len(skops);
+
+ return CG_OK;
+}
+
+static int write_nodata_opt(struct bpf_sock_ops *skops)
+{
+ int resend;
+
+ resend = resend_in_ack(skops);
+ if (resend < 0)
+ RET_CG_ERR(0);
+
+ if (resend)
+ return write_syn_opt(skops);
+
+ return CG_OK;
+}
+
+static int data_opt_len(struct bpf_sock_ops *skops)
+{
+ /* Same as the nodata version. Mostly to show
+ * an example usage on skops->skb_len.
+ */
+ return nodata_opt_len(skops);
+}
+
+static int write_data_opt(struct bpf_sock_ops *skops)
+{
+ return write_nodata_opt(skops);
+}
+
+static int current_mss_opt_len(struct bpf_sock_ops *skops)
+{
+ /* Reserve maximum that may be needed */
+ int err;
+
+ err = bpf_reserve_hdr_opt(skops, option_total_len(OPTION_MASK), 0);
+ if (err)
+ RET_CG_ERR(err);
+
+ return CG_OK;
+}
+
+static int handle_hdr_opt_len(struct bpf_sock_ops *skops)
+{
+ __u8 tcp_flags = skops_tcp_flags(skops);
+
+ if ((tcp_flags & TCPHDR_SYNACK) == TCPHDR_SYNACK)
+ return synack_opt_len(skops);
+
+ if (tcp_flags & TCPHDR_SYN)
+ return syn_opt_len(skops);
+
+ if (tcp_flags & TCPHDR_FIN)
+ return fin_opt_len(skops);
+
+ if (skops_current_mss(skops))
+ /* The kernel is calculating the MSS */
+ return current_mss_opt_len(skops);
+
+ if (skops->skb_len)
+ return data_opt_len(skops);
+
+ return nodata_opt_len(skops);
+}
+
+static int handle_write_hdr_opt(struct bpf_sock_ops *skops)
+{
+ __u8 tcp_flags = skops_tcp_flags(skops);
+ struct tcphdr *th;
+
+ if ((tcp_flags & TCPHDR_SYNACK) == TCPHDR_SYNACK)
+ return write_synack_opt(skops);
+
+ if (tcp_flags & TCPHDR_SYN)
+ return write_syn_opt(skops);
+
+ if (tcp_flags & TCPHDR_FIN)
+ return write_fin_opt(skops);
+
+ th = skops->skb_data;
+ if (th + 1 > skops->skb_data_end)
+ RET_CG_ERR(0);
+
+ if (skops->skb_len > tcp_hdrlen(th))
+ return write_data_opt(skops);
+
+ return write_nodata_opt(skops);
+}
+
+static int set_delack_max(struct bpf_sock_ops *skops, __u8 max_delack_ms)
+{
+ __u32 max_delack_us = max_delack_ms * 1000;
+
+ return bpf_setsockopt(skops, SOL_TCP, TCP_BPF_DELACK_MAX,
+ &max_delack_us, sizeof(max_delack_us));
+}
+
+static int set_rto_min(struct bpf_sock_ops *skops, __u8 peer_max_delack_ms)
+{
+ __u32 min_rto_us = peer_max_delack_ms * 1000;
+
+ return bpf_setsockopt(skops, SOL_TCP, TCP_BPF_RTO_MIN, &min_rto_us,
+ sizeof(min_rto_us));
+}
+
+static int handle_active_estab(struct bpf_sock_ops *skops)
+{
+ struct hdr_stg init_stg = {
+ .active = true,
+ };
+ int err;
+
+ err = load_option(skops, &active_estab_in, false);
+ if (err && err != -ENOMSG)
+ RET_CG_ERR(err);
+
+ init_stg.resend_syn = TEST_OPTION_FLAGS(active_estab_in.flags,
+ OPTION_RESEND);
+ if (!skops->sk || !bpf_sk_storage_get(&hdr_stg_map, skops->sk,
+ &init_stg,
+ BPF_SK_STORAGE_GET_F_CREATE))
+ RET_CG_ERR(0);
+
+ if (init_stg.resend_syn)
+ /* Don't clear the write_hdr cb now because
+ * the ACK may get lost and retransmit may
+ * be needed.
+ *
+ * PARSE_ALL_HDR cb flag is set to learn if this
+ * resend_syn option has received by the peer.
+ *
+ * The header option will be resent until a valid
+ * packet is received at handle_parse_hdr()
+ * and all hdr cb flags will be cleared in
+ * handle_parse_hdr().
+ */
+ set_parse_all_hdr_cb_flags(skops);
+ else if (!active_fin_out.flags)
+ /* No options will be written from now */
+ clear_hdr_cb_flags(skops);
+
+ if (active_syn_out.max_delack_ms) {
+ err = set_delack_max(skops, active_syn_out.max_delack_ms);
+ if (err)
+ RET_CG_ERR(err);
+ }
+
+ if (active_estab_in.max_delack_ms) {
+ err = set_rto_min(skops, active_estab_in.max_delack_ms);
+ if (err)
+ RET_CG_ERR(err);
+ }
+
+ return CG_OK;
+}
+
+static int handle_passive_estab(struct bpf_sock_ops *skops)
+{
+ struct hdr_stg init_stg = {};
+ struct tcphdr *th;
+ int err;
+
+ err = load_option(skops, &passive_estab_in, true);
+ if (err == -ENOENT) {
+ /* saved_syn is not found. It was in syncookie mode.
+ * We have asked the active side to resend the options
+ * in ACK, so try to find the bpf_test_option from ACK now.
+ */
+ err = load_option(skops, &passive_estab_in, false);
+ init_stg.syncookie = true;
+ }
+
+ /* ENOMSG: The bpf_test_option is not found which is fine.
+ * Bail out now for all other errors.
+ */
+ if (err && err != -ENOMSG)
+ RET_CG_ERR(err);
+
+ th = skops->skb_data;
+ if (th + 1 > skops->skb_data_end)
+ RET_CG_ERR(0);
+
+ if (th->syn) {
+ /* Fastopen */
+
+ /* Cannot clear cb_flags to stop write_hdr cb.
+ * synack is not sent yet for fast open.
+ * Even it was, the synack may need to be retransmitted.
+ *
+ * PARSE_ALL_HDR cb flag is set to learn
+ * if synack has reached the peer.
+ * All cb_flags will be cleared in handle_parse_hdr().
+ */
+ set_parse_all_hdr_cb_flags(skops);
+ init_stg.fastopen = true;
+ } else if (!passive_fin_out.flags) {
+ /* No options will be written from now */
+ clear_hdr_cb_flags(skops);
+ }
+
+ if (!skops->sk ||
+ !bpf_sk_storage_get(&hdr_stg_map, skops->sk, &init_stg,
+ BPF_SK_STORAGE_GET_F_CREATE))
+ RET_CG_ERR(0);
+
+ if (passive_synack_out.max_delack_ms) {
+ err = set_delack_max(skops, passive_synack_out.max_delack_ms);
+ if (err)
+ RET_CG_ERR(err);
+ }
+
+ if (passive_estab_in.max_delack_ms) {
+ err = set_rto_min(skops, passive_estab_in.max_delack_ms);
+ if (err)
+ RET_CG_ERR(err);
+ }
+
+ return CG_OK;
+}
+
+static int handle_parse_hdr(struct bpf_sock_ops *skops)
+{
+ struct hdr_stg *hdr_stg;
+ struct tcphdr *th;
+
+ if (!skops->sk)
+ RET_CG_ERR(0);
+
+ th = skops->skb_data;
+ if (th + 1 > skops->skb_data_end)
+ RET_CG_ERR(0);
+
+ hdr_stg = bpf_sk_storage_get(&hdr_stg_map, skops->sk, NULL, 0);
+ if (!hdr_stg)
+ RET_CG_ERR(0);
+
+ if (hdr_stg->resend_syn || hdr_stg->fastopen)
+ /* The PARSE_ALL_HDR cb flag was turned on
+ * to ensure that the previously written
+ * options have reached the peer.
+ * Those previously written option includes:
+ * - Active side: resend_syn in ACK during syncookie
+ * or
+ * - Passive side: SYNACK during fastopen
+ *
+ * A valid packet has been received here after
+ * the 3WHS, so the PARSE_ALL_HDR cb flag
+ * can be cleared now.
+ */
+ clear_parse_all_hdr_cb_flags(skops);
+
+ if (hdr_stg->resend_syn && !active_fin_out.flags)
+ /* Active side resent the syn option in ACK
+ * because the server was in syncookie mode.
+ * A valid packet has been received, so
+ * clear header cb flags if there is no
+ * more option to send.
+ */
+ clear_hdr_cb_flags(skops);
+
+ if (hdr_stg->fastopen && !passive_fin_out.flags)
+ /* Passive side was in fastopen.
+ * A valid packet has been received, so
+ * the SYNACK has reached the peer.
+ * Clear header cb flags if there is no more
+ * option to send.
+ */
+ clear_hdr_cb_flags(skops);
+
+ if (th->fin) {
+ struct bpf_test_option *fin_opt;
+ int err;
+
+ if (hdr_stg->active)
+ fin_opt = &active_fin_in;
+ else
+ fin_opt = &passive_fin_in;
+
+ err = load_option(skops, fin_opt, false);
+ if (err && err != -ENOMSG)
+ RET_CG_ERR(err);
+ }
+
+ return CG_OK;
+}
+
+SEC("sockops/estab")
+int estab(struct bpf_sock_ops *skops)
+{
+ int true_val = 1;
+
+ switch (skops->op) {
+ case BPF_SOCK_OPS_TCP_LISTEN_CB:
+ bpf_setsockopt(skops, SOL_TCP, TCP_SAVE_SYN,
+ &true_val, sizeof(true_val));
+ set_hdr_cb_flags(skops);
+ break;
+ case BPF_SOCK_OPS_TCP_CONNECT_CB:
+ set_hdr_cb_flags(skops);
+ break;
+ case BPF_SOCK_OPS_PARSE_HDR_OPT_CB:
+ return handle_parse_hdr(skops);
+ case BPF_SOCK_OPS_HDR_OPT_LEN_CB:
+ return handle_hdr_opt_len(skops);
+ case BPF_SOCK_OPS_WRITE_HDR_OPT_CB:
+ return handle_write_hdr_opt(skops);
+ case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB:
+ return handle_passive_estab(skops);
+ case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB:
+ return handle_active_estab(skops);
+ }
+
+ return CG_OK;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_vmlinux.c b/tools/testing/selftests/bpf/progs/test_vmlinux.c
index 29fa09d..e9dfa03 100644
--- a/tools/testing/selftests/bpf/progs/test_vmlinux.c
+++ b/tools/testing/selftests/bpf/progs/test_vmlinux.c
@@ -19,12 +19,14 @@ SEC("tp/syscalls/sys_enter_nanosleep")
int handle__tp(struct trace_event_raw_sys_enter *args)
{
struct __kernel_timespec *ts;
+ long tv_nsec;
if (args->id != __NR_nanosleep)
return 0;
ts = (void *)args->args[0];
- if (BPF_CORE_READ(ts, tv_nsec) != MY_TV_NSEC)
+ if (bpf_probe_read_user(&tv_nsec, sizeof(ts->tv_nsec), &ts->tv_nsec) ||
+ tv_nsec != MY_TV_NSEC)
return 0;
tp_called = true;
@@ -35,12 +37,14 @@ SEC("raw_tp/sys_enter")
int BPF_PROG(handle__raw_tp, struct pt_regs *regs, long id)
{
struct __kernel_timespec *ts;
+ long tv_nsec;
if (id != __NR_nanosleep)
return 0;
ts = (void *)PT_REGS_PARM1_CORE(regs);
- if (BPF_CORE_READ(ts, tv_nsec) != MY_TV_NSEC)
+ if (bpf_probe_read_user(&tv_nsec, sizeof(ts->tv_nsec), &ts->tv_nsec) ||
+ tv_nsec != MY_TV_NSEC)
return 0;
raw_tp_called = true;
@@ -51,12 +55,14 @@ SEC("tp_btf/sys_enter")
int BPF_PROG(handle__tp_btf, struct pt_regs *regs, long id)
{
struct __kernel_timespec *ts;
+ long tv_nsec;
if (id != __NR_nanosleep)
return 0;
ts = (void *)PT_REGS_PARM1_CORE(regs);
- if (BPF_CORE_READ(ts, tv_nsec) != MY_TV_NSEC)
+ if (bpf_probe_read_user(&tv_nsec, sizeof(ts->tv_nsec), &ts->tv_nsec) ||
+ tv_nsec != MY_TV_NSEC)
return 0;
tp_btf_called = true;
diff --git a/tools/testing/selftests/bpf/progs/trigger_bench.c b/tools/testing/selftests/bpf/progs/trigger_bench.c
index 8b36b66..9a4d095 100644
--- a/tools/testing/selftests/bpf/progs/trigger_bench.c
+++ b/tools/testing/selftests/bpf/progs/trigger_bench.c
@@ -39,6 +39,13 @@ int bench_trigger_fentry(void *ctx)
return 0;
}
+SEC("fentry.s/__x64_sys_getpgid")
+int bench_trigger_fentry_sleep(void *ctx)
+{
+ __sync_add_and_fetch(&hits, 1);
+ return 0;
+}
+
SEC("fmod_ret/__x64_sys_getpgid")
int bench_trigger_fmodret(void *ctx)
{
diff --git a/tools/testing/selftests/bpf/test_current_pid_tgid_new_ns.c b/tools/testing/selftests/bpf/test_current_pid_tgid_new_ns.c
index ed253f2..ec53b1e 100644
--- a/tools/testing/selftests/bpf/test_current_pid_tgid_new_ns.c
+++ b/tools/testing/selftests/bpf/test_current_pid_tgid_new_ns.c
@@ -156,4 +156,5 @@ int main(int argc, char **argv)
bpf_object__close(obj);
}
}
+ return 0;
}
diff --git a/tools/testing/selftests/bpf/test_tcp_hdr_options.h b/tools/testing/selftests/bpf/test_tcp_hdr_options.h
new file mode 100644
index 0000000..78a8cf9
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_tcp_hdr_options.h
@@ -0,0 +1,151 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2020 Facebook */
+
+#ifndef _TEST_TCP_HDR_OPTIONS_H
+#define _TEST_TCP_HDR_OPTIONS_H
+
+struct bpf_test_option {
+ __u8 flags;
+ __u8 max_delack_ms;
+ __u8 rand;
+} __attribute__((packed));
+
+enum {
+ OPTION_RESEND,
+ OPTION_MAX_DELACK_MS,
+ OPTION_RAND,
+ __NR_OPTION_FLAGS,
+};
+
+#define OPTION_F_RESEND (1 << OPTION_RESEND)
+#define OPTION_F_MAX_DELACK_MS (1 << OPTION_MAX_DELACK_MS)
+#define OPTION_F_RAND (1 << OPTION_RAND)
+#define OPTION_MASK ((1 << __NR_OPTION_FLAGS) - 1)
+
+#define TEST_OPTION_FLAGS(flags, option) (1 & ((flags) >> (option)))
+#define SET_OPTION_FLAGS(flags, option) ((flags) |= (1 << (option)))
+
+/* Store in bpf_sk_storage */
+struct hdr_stg {
+ bool active;
+ bool resend_syn; /* active side only */
+ bool syncookie; /* passive side only */
+ bool fastopen; /* passive side only */
+};
+
+struct linum_err {
+ unsigned int linum;
+ int err;
+};
+
+#define TCPHDR_FIN 0x01
+#define TCPHDR_SYN 0x02
+#define TCPHDR_RST 0x04
+#define TCPHDR_PSH 0x08
+#define TCPHDR_ACK 0x10
+#define TCPHDR_URG 0x20
+#define TCPHDR_ECE 0x40
+#define TCPHDR_CWR 0x80
+#define TCPHDR_SYNACK (TCPHDR_SYN | TCPHDR_ACK)
+
+#define TCPOPT_EOL 0
+#define TCPOPT_NOP 1
+#define TCPOPT_WINDOW 3
+#define TCPOPT_EXP 254
+
+#define TCP_BPF_EXPOPT_BASE_LEN 4
+#define MAX_TCP_HDR_LEN 60
+#define MAX_TCP_OPTION_SPACE 40
+
+#ifdef BPF_PROG_TEST_TCP_HDR_OPTIONS
+
+#define CG_OK 1
+#define CG_ERR 0
+
+#ifndef SOL_TCP
+#define SOL_TCP 6
+#endif
+
+struct tcp_exprm_opt {
+ __u8 kind;
+ __u8 len;
+ __u16 magic;
+ union {
+ __u8 data[4];
+ __u32 data32;
+ };
+} __attribute__((packed));
+
+struct tcp_opt {
+ __u8 kind;
+ __u8 len;
+ union {
+ __u8 data[4];
+ __u32 data32;
+ };
+} __attribute__((packed));
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 2);
+ __type(key, int);
+ __type(value, struct linum_err);
+} lport_linum_map SEC(".maps");
+
+static inline unsigned int tcp_hdrlen(const struct tcphdr *th)
+{
+ return th->doff << 2;
+}
+
+static inline __u8 skops_tcp_flags(const struct bpf_sock_ops *skops)
+{
+ return skops->skb_tcp_flags;
+}
+
+static inline void clear_hdr_cb_flags(struct bpf_sock_ops *skops)
+{
+ bpf_sock_ops_cb_flags_set(skops,
+ skops->bpf_sock_ops_cb_flags &
+ ~(BPF_SOCK_OPS_PARSE_UNKNOWN_HDR_OPT_CB_FLAG |
+ BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG));
+}
+
+static inline void set_hdr_cb_flags(struct bpf_sock_ops *skops)
+{
+ bpf_sock_ops_cb_flags_set(skops,
+ skops->bpf_sock_ops_cb_flags |
+ BPF_SOCK_OPS_PARSE_UNKNOWN_HDR_OPT_CB_FLAG |
+ BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG);
+}
+static inline void
+clear_parse_all_hdr_cb_flags(struct bpf_sock_ops *skops)
+{
+ bpf_sock_ops_cb_flags_set(skops,
+ skops->bpf_sock_ops_cb_flags &
+ ~BPF_SOCK_OPS_PARSE_ALL_HDR_OPT_CB_FLAG);
+}
+
+static inline void
+set_parse_all_hdr_cb_flags(struct bpf_sock_ops *skops)
+{
+ bpf_sock_ops_cb_flags_set(skops,
+ skops->bpf_sock_ops_cb_flags |
+ BPF_SOCK_OPS_PARSE_ALL_HDR_OPT_CB_FLAG);
+}
+
+#define RET_CG_ERR(__err) ({ \
+ struct linum_err __linum_err; \
+ int __lport; \
+ \
+ __linum_err.linum = __LINE__; \
+ __linum_err.err = __err; \
+ __lport = skops->local_port; \
+ bpf_map_update_elem(&lport_linum_map, &__lport, &__linum_err, BPF_NOEXIST); \
+ clear_hdr_cb_flags(skops); \
+ clear_parse_all_hdr_cb_flags(skops); \
+ return CG_ERR; \
+})
+
+#endif /* BPF_PROG_TEST_TCP_HDR_OPTIONS */
+
+#endif /* _TEST_TCP_HDR_OPTIONS_H */
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 78a6bae..9be395d 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -114,6 +114,7 @@ struct bpf_test {
bpf_testdata_struct_t retvals[MAX_TEST_RUNS];
};
enum bpf_attach_type expected_attach_type;
+ const char *kfunc;
};
/* Note we want this to be 64 bit aligned so that the end of our array is
@@ -984,8 +985,24 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
attr.log_level = 4;
attr.prog_flags = pflags;
+ if (prog_type == BPF_PROG_TYPE_TRACING && test->kfunc) {
+ attr.attach_btf_id = libbpf_find_vmlinux_btf_id(test->kfunc,
+ attr.expected_attach_type);
+ if (attr.attach_btf_id < 0) {
+ printf("FAIL\nFailed to find BTF ID for '%s'!\n",
+ test->kfunc);
+ (*errors)++;
+ return;
+ }
+ }
+
fd_prog = bpf_load_program_xattr(&attr, bpf_vlog, sizeof(bpf_vlog));
- if (fd_prog < 0 && !bpf_probe_prog_type(prog_type, 0)) {
+
+ /* BPF_PROG_TYPE_TRACING requires more setup and
+ * bpf_probe_prog_type won't give correct answer
+ */
+ if (fd_prog < 0 && prog_type != BPF_PROG_TYPE_TRACING &&
+ !bpf_probe_prog_type(prog_type, 0)) {
printf("SKIP (unsupported program type %d)\n", prog_type);
skips++;
goto close_fds;
diff --git a/tools/testing/selftests/bpf/verifier/bounds.c b/tools/testing/selftests/bpf/verifier/bounds.c
index 4d6645f..dac40de 100644
--- a/tools/testing/selftests/bpf/verifier/bounds.c
+++ b/tools/testing/selftests/bpf/verifier/bounds.c
@@ -557,3 +557,149 @@
.result = ACCEPT,
.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
},
+{
+ "bounds check for reg = 0, reg xor 1",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_IMM(BPF_REG_1, 0),
+ BPF_ALU64_IMM(BPF_XOR, BPF_REG_1, 1),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 1),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 8),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_hash_8b = { 3 },
+ .result = ACCEPT,
+},
+{
+ "bounds check for reg32 = 0, reg32 xor 1",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV32_IMM(BPF_REG_1, 0),
+ BPF_ALU32_IMM(BPF_XOR, BPF_REG_1, 1),
+ BPF_JMP32_IMM(BPF_JNE, BPF_REG_1, 0, 1),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 8),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_hash_8b = { 3 },
+ .result = ACCEPT,
+},
+{
+ "bounds check for reg = 2, reg xor 3",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_IMM(BPF_REG_1, 2),
+ BPF_ALU64_IMM(BPF_XOR, BPF_REG_1, 3),
+ BPF_JMP_IMM(BPF_JGT, BPF_REG_1, 0, 1),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 8),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_hash_8b = { 3 },
+ .result = ACCEPT,
+},
+{
+ "bounds check for reg = any, reg xor 3",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0),
+ BPF_ALU64_IMM(BPF_XOR, BPF_REG_1, 3),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 1),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 8),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_hash_8b = { 3 },
+ .result = REJECT,
+ .errstr = "invalid access to map value",
+ .errstr_unpriv = "invalid access to map value",
+},
+{
+ "bounds check for reg32 = any, reg32 xor 3",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0),
+ BPF_ALU32_IMM(BPF_XOR, BPF_REG_1, 3),
+ BPF_JMP32_IMM(BPF_JNE, BPF_REG_1, 0, 1),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 8),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_hash_8b = { 3 },
+ .result = REJECT,
+ .errstr = "invalid access to map value",
+ .errstr_unpriv = "invalid access to map value",
+},
+{
+ "bounds check for reg > 0, reg xor 3",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JLE, BPF_REG_1, 0, 3),
+ BPF_ALU64_IMM(BPF_XOR, BPF_REG_1, 3),
+ BPF_JMP_IMM(BPF_JGE, BPF_REG_1, 0, 1),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 8),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_hash_8b = { 3 },
+ .result = ACCEPT,
+},
+{
+ "bounds check for reg32 > 0, reg32 xor 3",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0),
+ BPF_JMP32_IMM(BPF_JLE, BPF_REG_1, 0, 3),
+ BPF_ALU32_IMM(BPF_XOR, BPF_REG_1, 3),
+ BPF_JMP32_IMM(BPF_JGE, BPF_REG_1, 0, 1),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 8),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_hash_8b = { 3 },
+ .result = ACCEPT,
+},
diff --git a/tools/testing/selftests/bpf/verifier/d_path.c b/tools/testing/selftests/bpf/verifier/d_path.c
new file mode 100644
index 0000000..b988396
--- /dev/null
+++ b/tools/testing/selftests/bpf/verifier/d_path.c
@@ -0,0 +1,37 @@
+{
+ "d_path accept",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_1, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_MOV64_IMM(BPF_REG_6, 0),
+ BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_6, 0),
+ BPF_LD_IMM64(BPF_REG_3, 8),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_d_path),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_TRACING,
+ .expected_attach_type = BPF_TRACE_FENTRY,
+ .kfunc = "dentry_open",
+},
+{
+ "d_path reject",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_1, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_MOV64_IMM(BPF_REG_6, 0),
+ BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_6, 0),
+ BPF_LD_IMM64(BPF_REG_3, 8),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_d_path),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "helper call is not allowed in probe",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_TRACING,
+ .expected_attach_type = BPF_TRACE_FENTRY,
+ .kfunc = "d_path",
+},
diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_policer.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_policer.sh
index 47edf09..508a702 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_policer.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_policer.sh
@@ -207,7 +207,7 @@
RET=0
- devlink trap policer set $DEVLINK_DEV policer $id rate 1000 burst 16
+ devlink trap policer set $DEVLINK_DEV policer $id rate 1000 burst 512
devlink trap group set $DEVLINK_DEV group l3_drops policer $id
# Send packets at highest possible rate and make sure they are dropped
@@ -220,8 +220,8 @@
rate=$(trap_rate_get)
pct=$((100 * (rate - 1000) / 1000))
- ((-5 <= pct && pct <= 5))
- check_err $? "Expected rate 1000 pps, got $rate pps, which is $pct% off. Required accuracy is +-5%"
+ ((-10 <= pct && pct <= 10))
+ check_err $? "Expected rate 1000 pps, got $rate pps, which is $pct% off. Required accuracy is +-10%"
log_info "Expected rate 1000 pps, measured rate $rate pps"
drop_rate=$(policer_drop_rate_get $id)
@@ -288,35 +288,12 @@
RET=0
- devlink trap policer set $DEVLINK_DEV policer $id rate 1000 burst 32
+ devlink trap policer set $DEVLINK_DEV policer $id rate 1000 burst 512
devlink trap group set $DEVLINK_DEV group l3_drops policer $id
- # Send a burst of 64 packets and make sure that about 32 are received
- # and the rest are dropped by the policer
- log_info "=== Tx burst size: 64, Policer burst size: 32 pps ==="
-
- t0_rx=$(devlink_trap_rx_packets_get blackhole_route)
- t0_drop=$(devlink_trap_policer_rx_dropped_get $id)
-
- start_traffic $h1 192.0.2.1 198.51.100.100 $rp1_mac -c 64
-
- t1_rx=$(devlink_trap_rx_packets_get blackhole_route)
- t1_drop=$(devlink_trap_policer_rx_dropped_get $id)
-
- rx=$((t1_rx - t0_rx))
- pct=$((100 * (rx - 32) / 32))
- ((-20 <= pct && pct <= 20))
- check_err $? "Expected burst size of 32 packets, got $rx packets, which is $pct% off. Required accuracy is +-20%"
- log_info "Expected burst size of 32 packets, measured burst size of $rx packets"
-
- drop=$((t1_drop - t0_drop))
- (( drop > 0 ))
- check_err $? "Expected non-zero policer drops, got 0"
- log_info "Measured policer drops of $drop packets"
-
# Send a burst of 16 packets and make sure that 16 are received
# and that none are dropped by the policer
- log_info "=== Tx burst size: 16, Policer burst size: 32 pps ==="
+ log_info "=== Tx burst size: 16, Policer burst size: 512 ==="
t0_rx=$(devlink_trap_rx_packets_get blackhole_route)
t0_drop=$(devlink_trap_policer_rx_dropped_get $id)
diff --git a/tools/testing/selftests/drivers/net/mlxsw/tc_police_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/tc_police_scale.sh
index 4b96561..3e3e06e 100644
--- a/tools/testing/selftests/drivers/net/mlxsw/tc_police_scale.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/tc_police_scale.sh
@@ -24,6 +24,13 @@
simple_if_fini $swp1
}
+tc_police_addr()
+{
+ local num=$1; shift
+
+ printf "2001:db8:1::%x" $num
+}
+
tc_police_rules_create()
{
local count=$1; shift
@@ -34,8 +41,9 @@
for ((i = 0; i < count; ++i)); do
cat >> $TC_POLICE_BATCH_FILE <<-EOF
filter add dev $swp1 ingress \
- prot ip \
- flower skip_sw \
+ prot ipv6 \
+ pref 1000 \
+ flower skip_sw dst_ip $(tc_police_addr $i) \
action police rate 10mbit burst 100k \
conform-exceed drop/ok
EOF
diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh
index 22dc2f3..b74884d 100755
--- a/tools/testing/selftests/net/fib_nexthops.sh
+++ b/tools/testing/selftests/net/fib_nexthops.sh
@@ -739,6 +739,36 @@
run_cmd "$IP nexthop replace id 81 via 172.16.1.1 dev veth1"
log_test $? 2 "Nexthop replace of group entry - v6 route, v4 nexthop"
+ run_cmd "$IP nexthop add id 86 via 2001:db8:92::2 dev veth3"
+ run_cmd "$IP nexthop add id 87 via 172.16.1.1 dev veth1"
+ run_cmd "$IP nexthop add id 88 via 172.16.1.1 dev veth1"
+ run_cmd "$IP nexthop add id 124 group 86/87/88"
+ run_cmd "$IP ro replace 2001:db8:101::1/128 nhid 124"
+ log_test $? 2 "IPv6 route can not have a group with v4 and v6 gateways"
+
+ run_cmd "$IP nexthop del id 88"
+ run_cmd "$IP ro replace 2001:db8:101::1/128 nhid 124"
+ log_test $? 2 "IPv6 route can not have a group with v4 and v6 gateways"
+
+ run_cmd "$IP nexthop del id 87"
+ run_cmd "$IP ro replace 2001:db8:101::1/128 nhid 124"
+ log_test $? 0 "IPv6 route using a group after removing v4 gateways"
+
+ run_cmd "$IP ro delete 2001:db8:101::1/128"
+ run_cmd "$IP nexthop add id 87 via 172.16.1.1 dev veth1"
+ run_cmd "$IP nexthop add id 88 via 172.16.1.1 dev veth1"
+ run_cmd "$IP nexthop replace id 124 group 86/87/88"
+ run_cmd "$IP ro replace 2001:db8:101::1/128 nhid 124"
+ log_test $? 2 "IPv6 route can not have a group with v4 and v6 gateways"
+
+ run_cmd "$IP nexthop replace id 88 via 2001:db8:92::2 dev veth3"
+ run_cmd "$IP ro replace 2001:db8:101::1/128 nhid 124"
+ log_test $? 2 "IPv6 route can not have a group with v4 and v6 gateways"
+
+ run_cmd "$IP nexthop replace id 87 via 2001:db8:92::2 dev veth3"
+ run_cmd "$IP ro replace 2001:db8:101::1/128 nhid 124"
+ log_test $? 0 "IPv6 route using a group after replacing v4 gateways"
+
$IP nexthop flush >/dev/null 2>&1
#
diff --git a/tools/testing/selftests/net/forwarding/mirror_lib.sh b/tools/testing/selftests/net/forwarding/mirror_lib.sh
index c33bfd7..13db1cb 100644
--- a/tools/testing/selftests/net/forwarding/mirror_lib.sh
+++ b/tools/testing/selftests/net/forwarding/mirror_lib.sh
@@ -31,7 +31,7 @@
local t0=$(tc_rule_stats_get $dev $pref)
$MZ $vrf_name ${sip:+-A $sip} -B $dip -a own -b bc -q \
- -c 10 -d 100ms -t icmp type=8
+ -c 10 -d 100msec -t icmp type=8
sleep 0.5
local t1=$(tc_rule_stats_get $dev $pref)
local delta=$((t1 - t0))
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c
index 090620c..a549665 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.c
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c
@@ -65,8 +65,8 @@ static void die_usage(void)
fprintf(stderr, "\t-S num -- set SO_SNDBUF to num\n");
fprintf(stderr, "\t-R num -- set SO_RCVBUF to num\n");
fprintf(stderr, "\t-p num -- use port num\n");
- fprintf(stderr, "\t-m [MPTCP|TCP] -- use tcp or mptcp sockets\n");
- fprintf(stderr, "\t-s [mmap|poll] -- use poll (default) or mmap\n");
+ fprintf(stderr, "\t-s [MPTCP|TCP] -- use mptcp(default) or tcp sockets\n");
+ fprintf(stderr, "\t-m [poll|mmap|sendfile] -- use poll(default)/mmap+write/sendfile\n");
fprintf(stderr, "\t-u -- check mptcp ulp\n");
fprintf(stderr, "\t-w num -- wait num sec before closing the socket\n");
exit(1);
diff --git a/tools/testing/selftests/net/psock_snd.sh b/tools/testing/selftests/net/psock_snd.sh
index 6331d91..170be65 100755
--- a/tools/testing/selftests/net/psock_snd.sh
+++ b/tools/testing/selftests/net/psock_snd.sh
@@ -45,7 +45,7 @@
echo "raw csum_off"
./in_netns.sh ./psock_snd -v -c
-echo "raw csum_off with bad offset (fails)"
+echo "raw csum_off with bad offset (expected to fail)"
(! ./in_netns.sh ./psock_snd -v -c -C)
@@ -57,7 +57,7 @@
echo "raw mtu size"
./in_netns.sh ./psock_snd -l "${mss}"
-echo "raw mtu size + 1 (fails)"
+echo "raw mtu size + 1 (expected to fail)"
(! ./in_netns.sh ./psock_snd -l "${mss_exceeds}")
# fails due to ARPHRD_ETHER check in packet_extra_vlan_len_allowed
@@ -65,19 +65,19 @@
# echo "raw vlan mtu size"
# ./in_netns.sh ./psock_snd -V -l "${mss}"
-echo "raw vlan mtu size + 1 (fails)"
+echo "raw vlan mtu size + 1 (expected to fail)"
(! ./in_netns.sh ./psock_snd -V -l "${mss_exceeds}")
echo "dgram mtu size"
./in_netns.sh ./psock_snd -d -l "${mss}"
-echo "dgram mtu size + 1 (fails)"
+echo "dgram mtu size + 1 (expected to fail)"
(! ./in_netns.sh ./psock_snd -d -l "${mss_exceeds}")
-echo "raw truncate hlen (fails: does not arrive)"
+echo "raw truncate hlen (expected to fail: does not arrive)"
(! ./in_netns.sh ./psock_snd -t "$((${vnet_hlen} + ${eth_hlen}))")
-echo "raw truncate hlen - 1 (fails: EINVAL)"
+echo "raw truncate hlen - 1 (expected to fail: EINVAL)"
(! ./in_netns.sh ./psock_snd -t "$((${vnet_hlen} + ${eth_hlen} - 1))")
@@ -86,13 +86,13 @@
echo "raw gso min size"
./in_netns.sh ./psock_snd -v -c -g -l "${mss_exceeds}"
-echo "raw gso min size - 1 (fails)"
+echo "raw gso min size - 1 (expected to fail)"
(! ./in_netns.sh ./psock_snd -v -c -g -l "${mss}")
echo "raw gso max size"
./in_netns.sh ./psock_snd -v -c -g -l "${max_mss}"
-echo "raw gso max size + 1 (fails)"
+echo "raw gso max size + 1 (expected to fail)"
(! ./in_netns.sh ./psock_snd -v -c -g -l "${max_mss_exceeds}")
echo "OK. All tests passed"
diff --git a/tools/testing/selftests/net/tcp_mmap.c b/tools/testing/selftests/net/tcp_mmap.c
index a61b7b3..00f837c 100644
--- a/tools/testing/selftests/net/tcp_mmap.c
+++ b/tools/testing/selftests/net/tcp_mmap.c
@@ -123,6 +123,28 @@ void hash_zone(void *zone, unsigned int length)
#define ALIGN_UP(x, align_to) (((x) + ((align_to)-1)) & ~((align_to)-1))
#define ALIGN_PTR_UP(p, ptr_align_to) ((typeof(p))ALIGN_UP((unsigned long)(p), ptr_align_to))
+
+static void *mmap_large_buffer(size_t need, size_t *allocated)
+{
+ void *buffer;
+ size_t sz;
+
+ /* Attempt to use huge pages if possible. */
+ sz = ALIGN_UP(need, map_align);
+ buffer = mmap(NULL, sz, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB, -1, 0);
+
+ if (buffer == (void *)-1) {
+ sz = need;
+ buffer = mmap(NULL, sz, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ if (buffer != (void *)-1)
+ fprintf(stderr, "MAP_HUGETLB attempt failed, look at /sys/kernel/mm/hugepages for optimal performance\n");
+ }
+ *allocated = sz;
+ return buffer;
+}
+
void *child_thread(void *arg)
{
unsigned long total_mmap = 0, total = 0;
@@ -135,6 +157,7 @@ void *child_thread(void *arg)
void *addr = NULL;
double throughput;
struct rusage ru;
+ size_t buffer_sz;
int lu, fd;
fd = (int)(unsigned long)arg;
@@ -142,9 +165,9 @@ void *child_thread(void *arg)
gettimeofday(&t0, NULL);
fcntl(fd, F_SETFL, O_NDELAY);
- buffer = malloc(chunk_size);
- if (!buffer) {
- perror("malloc");
+ buffer = mmap_large_buffer(chunk_size, &buffer_sz);
+ if (buffer == (void *)-1) {
+ perror("mmap");
goto error;
}
if (zflg) {
@@ -179,6 +202,10 @@ void *child_thread(void *arg)
total_mmap += zc.length;
if (xflg)
hash_zone(addr, zc.length);
+ /* It is more efficient to unmap the pages right now,
+ * instead of doing this in next TCP_ZEROCOPY_RECEIVE.
+ */
+ madvise(addr, zc.length, MADV_DONTNEED);
total += zc.length;
}
if (zc.recv_skip_hint) {
@@ -230,7 +257,7 @@ void *child_thread(void *arg)
ru.ru_nvcsw);
}
error:
- free(buffer);
+ munmap(buffer, buffer_sz);
close(fd);
if (zflg)
munmap(raddr, chunk_size + map_align);
@@ -347,6 +374,7 @@ int main(int argc, char *argv[])
uint64_t total = 0;
char *host = NULL;
int fd, c, on = 1;
+ size_t buffer_sz;
char *buffer;
int sflg = 0;
int mss = 0;
@@ -437,8 +465,8 @@ int main(int argc, char *argv[])
}
do_accept(fdlisten);
}
- buffer = mmap(NULL, chunk_size, PROT_READ | PROT_WRITE,
- MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+
+ buffer = mmap_large_buffer(chunk_size, &buffer_sz);
if (buffer == (char *)-1) {
perror("mmap");
exit(1);
@@ -484,6 +512,6 @@ int main(int argc, char *argv[])
total += wr;
}
close(fd);
- munmap(buffer, chunk_size);
+ munmap(buffer, buffer_sz);
return 0;
}