Merge tag 'media/v5.7-1' of git://git.kernel.org/pub/scm/linux/kernel/git/mchehab/linux-media
Pull media updates from Mauro Carvalho Chehab:
- New sensor driver: imx219
- Support for some new pixelformats
- Support for Sun8i SoC
- Added more codecs to meson vdec driver
- Prepare for removing the legacy usbvision driver by moving it to
staging. This driver has issues and use legacy core APIs. If nobody
steps up to address those, it is time for its retirement.
- Several cleanups and improvements on drivers, with the addition of
new supported boards
* tag 'media/v5.7-1' of git://git.kernel.org/pub/scm/linux/kernel/git/mchehab/linux-media: (236 commits)
media: venus: firmware: Ignore secure call error on first resume
media: mtk-vpu: load vpu firmware from the new location
media: i2c: video-i2c: fix build errors due to 'imply hwmon'
media: MAINTAINERS: add myself to co-maintain Hantro G1/G2 for i.MX8MQ
media: hantro: add initial i.MX8MQ support
media: dt-bindings: Document i.MX8MQ VPU bindings
media: vivid: fix incorrect PA assignment to HDMI outputs
media: hantro: Add linux-rockchip mailing list to MAINTAINERS
media: cedrus: h264: Fix 4K decoding on H6
media: siano: Use scnprintf() for avoiding potential buffer overflow
media: rc: Use scnprintf() for avoiding potential buffer overflow
media: allegro: create new struct for channel parameters
media: allegro: move mail definitions to separate file
media: allegro: pass buffers through firmware
media: allegro: verify source and destination buffer in VCU response
media: allegro: handle dependency of bitrate and bitrate_peak
media: allegro: read bitrate mode directly from control
media: allegro: make QP configurable
media: allegro: make frame rate configurable
media: allegro: skip filler data if possible
...
diff --git a/.clang-format b/.clang-format
index 196ca31..6ec5558 100644
--- a/.clang-format
+++ b/.clang-format
@@ -86,6 +86,8 @@
- 'bio_for_each_segment_all'
- 'bio_list_for_each'
- 'bip_for_each_vec'
+ - 'bitmap_for_each_clear_region'
+ - 'bitmap_for_each_set_region'
- 'blkg_for_each_descendant_post'
- 'blkg_for_each_descendant_pre'
- 'blk_queue_for_each_rl'
@@ -115,6 +117,7 @@
- 'drm_client_for_each_connector_iter'
- 'drm_client_for_each_modeset'
- 'drm_connector_for_each_possible_encoder'
+ - 'drm_for_each_bridge_in_chain'
- 'drm_for_each_connector_iter'
- 'drm_for_each_crtc'
- 'drm_for_each_encoder'
@@ -136,9 +139,10 @@
- 'for_each_bio'
- 'for_each_board_func_rsrc'
- 'for_each_bvec'
+ - 'for_each_card_auxs'
+ - 'for_each_card_auxs_safe'
- 'for_each_card_components'
- - 'for_each_card_links'
- - 'for_each_card_links_safe'
+ - 'for_each_card_pre_auxs'
- 'for_each_card_prelinks'
- 'for_each_card_rtds'
- 'for_each_card_rtds_safe'
@@ -166,6 +170,7 @@
- 'for_each_dpcm_fe'
- 'for_each_drhd_unit'
- 'for_each_dss_dev'
+ - 'for_each_efi_handle'
- 'for_each_efi_memory_desc'
- 'for_each_efi_memory_desc_in_map'
- 'for_each_element'
@@ -190,6 +195,7 @@
- 'for_each_lru'
- 'for_each_matching_node'
- 'for_each_matching_node_and_match'
+ - 'for_each_member'
- 'for_each_memblock'
- 'for_each_memblock_type'
- 'for_each_memcg_cache_index'
@@ -200,9 +206,11 @@
- 'for_each_msi_entry'
- 'for_each_msi_entry_safe'
- 'for_each_net'
+ - 'for_each_net_continue_reverse'
- 'for_each_netdev'
- 'for_each_netdev_continue'
- 'for_each_netdev_continue_rcu'
+ - 'for_each_netdev_continue_reverse'
- 'for_each_netdev_feature'
- 'for_each_netdev_in_bond_rcu'
- 'for_each_netdev_rcu'
@@ -254,10 +262,10 @@
- 'for_each_reserved_mem_region'
- 'for_each_rtd_codec_dai'
- 'for_each_rtd_codec_dai_rollback'
- - 'for_each_rtdcom'
- - 'for_each_rtdcom_safe'
+ - 'for_each_rtd_components'
- 'for_each_set_bit'
- 'for_each_set_bit_from'
+ - 'for_each_set_clump8'
- 'for_each_sg'
- 'for_each_sg_dma_page'
- 'for_each_sg_page'
@@ -267,6 +275,7 @@
- 'for_each_subelement_id'
- '__for_each_thread'
- 'for_each_thread'
+ - 'for_each_wakeup_source'
- 'for_each_zone'
- 'for_each_zone_zonelist'
- 'for_each_zone_zonelist_nodemask'
@@ -330,6 +339,7 @@
- 'list_for_each'
- 'list_for_each_codec'
- 'list_for_each_codec_safe'
+ - 'list_for_each_continue'
- 'list_for_each_entry'
- 'list_for_each_entry_continue'
- 'list_for_each_entry_continue_rcu'
@@ -351,6 +361,7 @@
- 'llist_for_each_entry'
- 'llist_for_each_entry_safe'
- 'llist_for_each_safe'
+ - 'mci_for_each_dimm'
- 'media_device_for_each_entity'
- 'media_device_for_each_intf'
- 'media_device_for_each_link'
@@ -444,10 +455,16 @@
- 'virtio_device_for_each_vq'
- 'xa_for_each'
- 'xa_for_each_marked'
+ - 'xa_for_each_range'
- 'xa_for_each_start'
- 'xas_for_each'
- 'xas_for_each_conflict'
- 'xas_for_each_marked'
+ - 'xbc_array_for_each_value'
+ - 'xbc_for_each_key_value'
+ - 'xbc_node_for_each_array_value'
+ - 'xbc_node_for_each_child'
+ - 'xbc_node_for_each_key_value'
- 'zorro_for_each_dev'
#IncludeBlocks: Preserve # Unknown to clang-format-5.0
diff --git a/.mailmap b/.mailmap
index 5e650fb..9198a93 100644
--- a/.mailmap
+++ b/.mailmap
@@ -225,6 +225,7 @@
Praveen BP <praveenbp@ti.com>
Punit Agrawal <punitagrawal@gmail.com> <punit.agrawal@arm.com>
Qais Yousef <qsyousef@gmail.com> <qais.yousef@imgtec.com>
+Quentin Monnet <quentin@isovalent.com> <quentin.monnet@netronome.com>
Quentin Perret <qperret@qperret.net> <quentin.perret@arm.com>
Rafael J. Wysocki <rjw@rjwysocki.net> <rjw@sisk.pl>
Rajesh Shah <rajesh.shah@intel.com>
diff --git a/COPYING b/COPYING
index da4cb28..a635a38 100644
--- a/COPYING
+++ b/COPYING
@@ -16,3 +16,5 @@
Documentation/process/license-rules.rst
for more details.
+
+All contributions to the Linux Kernel are subject to this COPYING file.
diff --git a/CREDITS b/CREDITS
index a97d328..032b599 100644
--- a/CREDITS
+++ b/CREDITS
@@ -567,6 +567,11 @@
S: Orlando, Florida
S: USA
+N: Paul Burton
+E: paulburton@kernel.org
+W: https://pburton.com
+D: MIPS maintainer 2018-2020
+
N: Lennert Buytenhek
E: kernel@wantstofly.org
D: Original (2.4) rewrite of the ethernet bridging code
diff --git a/Documentation/ABI/testing/sysfs-kernel-uids b/Documentation/ABI/removed/sysfs-kernel-uids
similarity index 91%
rename from Documentation/ABI/testing/sysfs-kernel-uids
rename to Documentation/ABI/removed/sysfs-kernel-uids
index 4182b70..dc4463f 100644
--- a/Documentation/ABI/testing/sysfs-kernel-uids
+++ b/Documentation/ABI/removed/sysfs-kernel-uids
@@ -1,5 +1,5 @@
What: /sys/kernel/uids/<uid>/cpu_shares
-Date: December 2007
+Date: December 2007, finally removed in kernel v2.6.34-rc1
Contact: Dhaval Giani <dhaval@linux.vnet.ibm.com>
Srivatsa Vaddagiri <vatsa@linux.vnet.ibm.com>
Description:
diff --git a/Documentation/Makefile b/Documentation/Makefile
index d77bb60..79ecee6 100644
--- a/Documentation/Makefile
+++ b/Documentation/Makefile
@@ -13,7 +13,7 @@
SPHINXBUILD = sphinx-build
SPHINXOPTS =
SPHINXDIRS = .
-_SPHINXDIRS = $(patsubst $(srctree)/Documentation/%/index.rst,%,$(wildcard $(srctree)/Documentation/*/index.rst))
+_SPHINXDIRS = $(sort $(patsubst $(srctree)/Documentation/%/index.rst,%,$(wildcard $(srctree)/Documentation/*/index.rst)))
SPHINX_CONF = conf.py
PAPER =
BUILDDIR = $(obj)/output
diff --git a/Documentation/PCI/pci.rst b/Documentation/PCI/pci.rst
index 6864f9a..8c016d8 100644
--- a/Documentation/PCI/pci.rst
+++ b/Documentation/PCI/pci.rst
@@ -239,7 +239,7 @@
as the PCI "bus address" might have been remapped to a "host physical"
address by the arch/chip-set specific kernel support.
-See Documentation/io-mapping.txt for how to access device registers
+See Documentation/driver-api/io-mapping.rst for how to access device registers
or device memory.
The device driver needs to call pci_request_region() to verify
diff --git a/Documentation/accounting/psi.rst b/Documentation/accounting/psi.rst
index 621111c..f2b3439 100644
--- a/Documentation/accounting/psi.rst
+++ b/Documentation/accounting/psi.rst
@@ -1,3 +1,5 @@
+.. _psi:
+
================================
PSI - Pressure Stall Information
================================
diff --git a/Documentation/admin-guide/acpi/fan_performance_states.rst b/Documentation/admin-guide/acpi/fan_performance_states.rst
index 21d233c..98fe5c3 100644
--- a/Documentation/admin-guide/acpi/fan_performance_states.rst
+++ b/Documentation/admin-guide/acpi/fan_performance_states.rst
@@ -18,7 +18,7 @@
$ ls -l /sys/bus/acpi/devices/INT3404:00/
total 0
-...
+ ...
-r--r--r-- 1 root root 4096 Dec 13 20:38 state0
-r--r--r-- 1 root root 4096 Dec 13 20:38 state1
-r--r--r-- 1 root root 4096 Dec 13 20:38 state10
@@ -38,7 +38,7 @@
and contains a colon-separated list of 5 integer numbers (fields) with the
following interpretation::
-control_percent:trip_point_index:speed_rpm:noise_level_mdb:power_mw
+ control_percent:trip_point_index:speed_rpm:noise_level_mdb:power_mw
* ``control_percent``: The percent value to be used to set the fan speed to a
specific level using the _FSL object (0-100).
diff --git a/Documentation/admin-guide/binfmt-misc.rst b/Documentation/admin-guide/binfmt-misc.rst
index 97b0d79..95c93bb 100644
--- a/Documentation/admin-guide/binfmt-misc.rst
+++ b/Documentation/admin-guide/binfmt-misc.rst
@@ -1,5 +1,5 @@
-Kernel Support for miscellaneous (your favourite) Binary Formats v1.1
-=====================================================================
+Kernel Support for miscellaneous Binary Formats (binfmt_misc)
+=============================================================
This Kernel feature allows you to invoke almost (for restrictions see below)
every program by simply typing its name in the shell.
diff --git a/Documentation/admin-guide/blockdev/zram.rst b/Documentation/admin-guide/blockdev/zram.rst
index 27c77d8..a6fd1f9 100644
--- a/Documentation/admin-guide/blockdev/zram.rst
+++ b/Documentation/admin-guide/blockdev/zram.rst
@@ -251,8 +251,6 @@
================ =============================================================
orig_data_size uncompressed size of data stored in this disk.
- This excludes same-element-filled pages (same_pages) since
- no memory is allocated for them.
Unit: bytes
compr_data_size compressed size of data stored in this disk
mem_used_total the amount of memory allocated for this disk. This
diff --git a/Documentation/admin-guide/bootconfig.rst b/Documentation/admin-guide/bootconfig.rst
index b342a67..d6b3b77 100644
--- a/Documentation/admin-guide/bootconfig.rst
+++ b/Documentation/admin-guide/bootconfig.rst
@@ -23,7 +23,7 @@
has to be terminated by semi-colon (``;``) or newline (``\n``).
For array value, array entries are separated by comma (``,``). ::
-KEY[.WORD[...]] = VALUE[, VALUE2[...]][;]
+ KEY[.WORD[...]] = VALUE[, VALUE2[...]][;]
Unlike the kernel command line syntax, spaces are OK around the comma and ``=``.
@@ -62,6 +62,30 @@
In both styles, same key words are automatically merged when parsing it
at boot time. So you can append similar trees or key-values.
+Same-key Values
+---------------
+
+It is prohibited that two or more values or arrays share a same-key.
+For example,::
+
+ foo = bar, baz
+ foo = qux # !ERROR! we can not re-define same key
+
+If you want to append the value to existing key as an array member,
+you can use ``+=`` operator. For example::
+
+ foo = bar, baz
+ foo += qux
+
+In this case, the key ``foo`` has ``bar``, ``baz`` and ``qux``.
+
+However, a sub-key and a value can not co-exist under a parent key.
+For example, following config is NOT allowed.::
+
+ foo = value1
+ foo.bar = value2 # !ERROR! subkey "bar" and value "value1" can NOT co-exist
+
+
Comments
--------
@@ -102,9 +126,13 @@
==============================
Since the boot configuration file is loaded with initrd, it will be added
-to the end of the initrd (initramfs) image file. The Linux kernel decodes
-the last part of the initrd image in memory to get the boot configuration
-data.
+to the end of the initrd (initramfs) image file with size, checksum and
+12-byte magic word as below.
+
+[initrd][bootconfig][size(u32)][checksum(u32)][#BOOTCONFIG\n]
+
+The Linux kernel decodes the last part of the initrd image in memory to
+get the boot configuration data.
Because of this "piggyback" method, there is no need to change or
update the boot loader and the kernel image itself.
diff --git a/Documentation/admin-guide/cgroup-v1/index.rst b/Documentation/admin-guide/cgroup-v1/index.rst
index 10bf48b..226f644 100644
--- a/Documentation/admin-guide/cgroup-v1/index.rst
+++ b/Documentation/admin-guide/cgroup-v1/index.rst
@@ -1,3 +1,5 @@
+.. _cgroup-v1:
+
========================
Control Groups version 1
========================
diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst
index 3f80146..fbb1116 100644
--- a/Documentation/admin-guide/cgroup-v2.rst
+++ b/Documentation/admin-guide/cgroup-v2.rst
@@ -9,7 +9,7 @@
conventions of cgroup v2. It describes all userland-visible aspects
of cgroup including core and specific controller behaviors. All
future changes must be reflected in this document. Documentation for
-v1 is available under Documentation/admin-guide/cgroup-v1/.
+v1 is available under :ref:`Documentation/admin-guide/cgroup-v1/index.rst <cgroup-v1>`.
.. CONTENTS
@@ -1023,7 +1023,7 @@
A read-only nested-key file which exists on non-root cgroups.
Shows pressure stall information for CPU. See
- Documentation/accounting/psi.rst for details.
+ :ref:`Documentation/accounting/psi.rst <psi>` for details.
cpu.uclamp.min
A read-write single value file which exists on non-root cgroups.
@@ -1103,7 +1103,7 @@
proportionally to the overage, reducing reclaim pressure for
smaller overages.
- Effective min boundary is limited by memory.min values of
+ Effective min boundary is limited by memory.min values of
all ancestor cgroups. If there is memory.min overcommitment
(child cgroup or cgroups are requiring more protected memory
than parent will allow), then each child cgroup will get
@@ -1313,53 +1313,41 @@
Number of major page faults incurred
workingset_refault
-
Number of refaults of previously evicted pages
workingset_activate
-
Number of refaulted pages that were immediately activated
workingset_nodereclaim
-
Number of times a shadow node has been reclaimed
pgrefill
-
Amount of scanned pages (in an active LRU list)
pgscan
-
Amount of scanned pages (in an inactive LRU list)
pgsteal
-
Amount of reclaimed pages
pgactivate
-
Amount of pages moved to the active LRU list
pgdeactivate
-
Amount of pages moved to the inactive LRU list
pglazyfree
-
Amount of pages postponed to be freed under memory pressure
pglazyfreed
-
Amount of reclaimed lazyfree pages
thp_fault_alloc
-
Number of transparent hugepages which were allocated to satisfy
a page fault, including COW faults. This counter is not present
when CONFIG_TRANSPARENT_HUGEPAGE is not set.
thp_collapse_alloc
-
Number of transparent hugepages which were allocated to allow
collapsing an existing range of pages. This counter is not
present when CONFIG_TRANSPARENT_HUGEPAGE is not set.
@@ -1403,7 +1391,7 @@
A read-only nested-key file which exists on non-root cgroups.
Shows pressure stall information for memory. See
- Documentation/accounting/psi.rst for details.
+ :ref:`Documentation/accounting/psi.rst <psi>` for details.
Usage Guidelines
@@ -1478,7 +1466,7 @@
dios Number of discard IOs
====== =====================
- An example read output follows:
+ An example read output follows::
8:16 rbytes=1459200 wbytes=314773504 rios=192 wios=353 dbytes=0 dios=0
8:0 rbytes=90430464 wbytes=299008000 rios=8950 wios=1252 dbytes=50331648 dios=3021
@@ -1643,7 +1631,7 @@
A read-only nested-key file which exists on non-root cgroups.
Shows pressure stall information for IO. See
- Documentation/accounting/psi.rst for details.
+ :ref:`Documentation/accounting/psi.rst <psi>` for details.
Writeback
@@ -1853,7 +1841,7 @@
from the requested CPUs.
The CPU numbers are comma-separated numbers or ranges.
- For example:
+ For example::
# cat cpuset.cpus
0-4,6,8-10
@@ -1892,7 +1880,7 @@
from the requested memory nodes.
The memory node numbers are comma-separated numbers or ranges.
- For example:
+ For example::
# cat cpuset.mems
0-1,3
diff --git a/Documentation/driver-api/edid.rst b/Documentation/admin-guide/edid.rst
similarity index 97%
rename from Documentation/driver-api/edid.rst
rename to Documentation/admin-guide/edid.rst
index b1b5acd..80deeb2 100644
--- a/Documentation/driver-api/edid.rst
+++ b/Documentation/admin-guide/edid.rst
@@ -11,11 +11,13 @@
either correctly working because all components follow the standards -
or the computer is unusable, because the screen remains dark after
booting or it displays the wrong area. Cases when this happens are:
+
- The graphics board does not recognize the monitor.
- The graphics board is unable to detect any EDID data.
- The graphics board incorrectly forwards EDID data to the driver.
- The monitor sends no or bogus EDID data.
- A KVM sends its own EDID data instead of querying the connected monitor.
+
Adding the kernel parameter "nomodeset" helps in most cases, but causes
restrictions later on.
@@ -32,7 +34,7 @@
and a Makefile environment are given here.
To create binary EDID and C source code files from the existing data
-material, simply type "make".
+material, simply type "make" in tools/edid/.
If you want to create your own EDID file, copy the file 1024x768.S,
replace the settings with your own data and add a new target to the
diff --git a/Documentation/admin-guide/hw-vuln/tsx_async_abort.rst b/Documentation/admin-guide/hw-vuln/tsx_async_abort.rst
index af6865b..68d96f0 100644
--- a/Documentation/admin-guide/hw-vuln/tsx_async_abort.rst
+++ b/Documentation/admin-guide/hw-vuln/tsx_async_abort.rst
@@ -136,8 +136,6 @@
The mitigation can be controlled at boot time via a kernel command line option.
See :ref:`taa_mitigation_control_command_line`.
-.. _virt_mechanism:
-
Virtualization mitigation
^^^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/Documentation/admin-guide/index.rst b/Documentation/admin-guide/index.rst
index f1d0ccf..5a6269f 100644
--- a/Documentation/admin-guide/index.rst
+++ b/Documentation/admin-guide/index.rst
@@ -75,6 +75,7 @@
cputopology
dell_rbu
device-mapper/index
+ edid
efi-stub
ext4
nfs/index
diff --git a/Documentation/admin-guide/iostats.rst b/Documentation/admin-guide/iostats.rst
index df5b834..9b14b0c 100644
--- a/Documentation/admin-guide/iostats.rst
+++ b/Documentation/admin-guide/iostats.rst
@@ -100,7 +100,7 @@
Since 5.0 this field counts jiffies when at least one request was
started or completed. If request runs more than 2 jiffies then some
- I/O time will not be accounted unless there are other requests.
+ I/O time might be not accounted in case of concurrent requests.
Field 11 -- weighted # of milliseconds spent doing I/Os (unsigned int)
This field is incremented at each I/O start, I/O completion, I/O
@@ -143,6 +143,9 @@
summed to) and the result given to the user. There is no convenient
user interface for accessing the per-CPU counters themselves.
+Since 4.19 request times are measured with nanoseconds precision and
+truncated to milliseconds before showing in this interface.
+
Disks vs Partitions
-------------------
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index dbc22d6..d7da466 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -136,6 +136,10 @@
dynamic table installation which will install SSDT
tables to /sys/firmware/acpi/tables/dynamic.
+ acpi_no_watchdog [HW,ACPI,WDT]
+ Ignore the ACPI-based watchdog interface (WDAT) and let
+ a native driver control the watchdog device instead.
+
acpi_rsdp= [ACPI,EFI,KEXEC]
Pass the RSDP address to the kernel, mostly used
on machines running EFI runtime service to boot the
@@ -1095,6 +1099,12 @@
A valid base address must be provided, and the serial
port must already be setup and configured.
+ ec_imx21,<addr>
+ ec_imx6q,<addr>
+ Start an early, polled-mode, output-only console on the
+ Freescale i.MX UART at the specified address. The UART
+ must already be setup and configured.
+
ar3700_uart,<addr>
Start an early, polled-mode console on the
Armada 3700 serial port at the specified
@@ -1775,7 +1785,7 @@
provided by tboot because it makes the system
vulnerable to DMA attacks.
nobounce [Default off]
- Disable bounce buffer for unstrusted devices such as
+ Disable bounce buffer for untrusted devices such as
the Thunderbolt devices. This will treat the untrusted
devices as the trusted ones, hence might expose security
risks of DMA attacks.
@@ -1879,7 +1889,7 @@
No delay
ip= [IP_PNP]
- See Documentation/filesystems/nfs/nfsroot.txt.
+ See Documentation/admin-guide/nfs/nfsroot.rst.
ipcmni_extend [KNL] Extend the maximum number of unique System V
IPC identifiers from 32,768 to 16,777,216.
@@ -2791,7 +2801,7 @@
<name>,<region-number>[,<base>,<size>,<buswidth>,<altbuswidth>]
mtdparts= [MTD]
- See drivers/mtd/cmdlinepart.c.
+ See drivers/mtd/parsers/cmdlinepart.c
multitce=off [PPC] This parameter disables the use of the pSeries
firmware feature for updating multiple TCE entries
@@ -2849,13 +2859,13 @@
Default value is 0.
nfsaddrs= [NFS] Deprecated. Use ip= instead.
- See Documentation/filesystems/nfs/nfsroot.txt.
+ See Documentation/admin-guide/nfs/nfsroot.rst.
nfsroot= [NFS] nfs root filesystem for disk-less boxes.
- See Documentation/filesystems/nfs/nfsroot.txt.
+ See Documentation/admin-guide/nfs/nfsroot.rst.
nfsrootdebug [NFS] enable nfsroot debugging messages.
- See Documentation/filesystems/nfs/nfsroot.txt.
+ See Documentation/admin-guide/nfs/nfsroot.rst.
nfs.callback_nr_threads=
[NFSv4] set the total number of threads that the
@@ -4510,10 +4520,10 @@
Format: <integer>
A nonzero value instructs the soft-lockup detector
- to panic the machine when a soft-lockup occurs. This
- is also controlled by CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC
- which is the respective build-time switch to that
- functionality.
+ to panic the machine when a soft-lockup occurs. It is
+ also controlled by the kernel.softlockup_panic sysctl
+ and CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC, which is the
+ respective build-time switch to that functionality.
softlockup_all_cpu_backtrace=
[KNL] Should the soft-lockup detector generate
diff --git a/Documentation/admin-guide/kernel-per-CPU-kthreads.rst b/Documentation/admin-guide/kernel-per-CPU-kthreads.rst
index baeeba8..21818ac 100644
--- a/Documentation/admin-guide/kernel-per-CPU-kthreads.rst
+++ b/Documentation/admin-guide/kernel-per-CPU-kthreads.rst
@@ -234,7 +234,7 @@
Such a workqueue can be confined to a given subset of the
CPUs using the ``/sys/devices/virtual/workqueue/*/cpumask`` sysfs
files. The set of WQ_SYSFS workqueues can be displayed using
- "ls sys/devices/virtual/workqueue". That said, the workqueues
+ "ls /sys/devices/virtual/workqueue". That said, the workqueues
maintainer would like to caution people against indiscriminately
sprinkling WQ_SYSFS across all the workqueues. The reason for
caution is that it is easy to add WQ_SYSFS, but because sysfs is
diff --git a/Documentation/admin-guide/perf/imx-ddr.rst b/Documentation/admin-guide/perf/imx-ddr.rst
index 3726a10..f05f56c 100644
--- a/Documentation/admin-guide/perf/imx-ddr.rst
+++ b/Documentation/admin-guide/perf/imx-ddr.rst
@@ -43,7 +43,8 @@
AXI_ID and AXI_MASKING are mapped on DPCR1 register in performance counter.
When non-masked bits are matching corresponding AXI_ID bits then counter is
- incremented. Perf counter is incremented if
+ incremented. Perf counter is incremented if::
+
AxID && AXI_MASKING == AXI_ID && AXI_MASKING
This filter doesn't support filter different AXI ID for axid-read and axid-write
diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst
index def0748..335696d 100644
--- a/Documentation/admin-guide/sysctl/kernel.rst
+++ b/Documentation/admin-guide/sysctl/kernel.rst
@@ -2,262 +2,197 @@
Documentation for /proc/sys/kernel/
===================================
-kernel version 2.2.10
+.. See scripts/check-sysctl-docs to keep this up to date
+
Copyright (c) 1998, 1999, Rik van Riel <riel@nl.linux.org>
Copyright (c) 2009, Shen Feng<shen@cn.fujitsu.com>
-For general info and legal blurb, please look in index.rst.
+For general info and legal blurb, please look in :doc:`index`.
------------------------------------------------------------------------------
This file contains documentation for the sysctl files in
-/proc/sys/kernel/ and is valid for Linux kernel version 2.2.
+``/proc/sys/kernel/`` and is valid for Linux kernel version 2.2.
The files in this directory can be used to tune and monitor
miscellaneous and general things in the operation of the Linux
-kernel. Since some of the files _can_ be used to screw up your
+kernel. Since some of the files *can* be used to screw up your
system, it is advisable to read both documentation and source
before actually making adjustments.
Currently, these files might (depending on your configuration)
-show up in /proc/sys/kernel:
+show up in ``/proc/sys/kernel``:
-- acct
-- acpi_video_flags
-- auto_msgmni
-- bootloader_type [ X86 only ]
-- bootloader_version [ X86 only ]
-- cap_last_cap
-- core_pattern
-- core_pipe_limit
-- core_uses_pid
-- ctrl-alt-del
-- dmesg_restrict
-- domainname
-- hostname
-- hotplug
-- hardlockup_all_cpu_backtrace
-- hardlockup_panic
-- hung_task_panic
-- hung_task_check_count
-- hung_task_timeout_secs
-- hung_task_check_interval_secs
-- hung_task_warnings
-- hyperv_record_panic_msg
-- kexec_load_disabled
-- kptr_restrict
-- l2cr [ PPC only ]
-- modprobe ==> Documentation/debugging-modules.txt
-- modules_disabled
-- msg_next_id [ sysv ipc ]
-- msgmax
-- msgmnb
-- msgmni
-- nmi_watchdog
-- osrelease
-- ostype
-- overflowgid
-- overflowuid
-- panic
-- panic_on_oops
-- panic_on_stackoverflow
-- panic_on_unrecovered_nmi
-- panic_on_warn
-- panic_print
-- panic_on_rcu_stall
-- perf_cpu_time_max_percent
-- perf_event_paranoid
-- perf_event_max_stack
-- perf_event_mlock_kb
-- perf_event_max_contexts_per_stack
-- pid_max
-- powersave-nap [ PPC only ]
-- printk
-- printk_delay
-- printk_ratelimit
-- printk_ratelimit_burst
-- pty ==> Documentation/filesystems/devpts.txt
-- randomize_va_space
-- real-root-dev ==> Documentation/admin-guide/initrd.rst
-- reboot-cmd [ SPARC only ]
-- rtsig-max
-- rtsig-nr
-- sched_energy_aware
-- seccomp/ ==> Documentation/userspace-api/seccomp_filter.rst
-- sem
-- sem_next_id [ sysv ipc ]
-- sg-big-buff [ generic SCSI device (sg) ]
-- shm_next_id [ sysv ipc ]
-- shm_rmid_forced
-- shmall
-- shmmax [ sysv ipc ]
-- shmmni
-- softlockup_all_cpu_backtrace
-- soft_watchdog
-- stack_erasing
-- stop-a [ SPARC only ]
-- sysrq ==> Documentation/admin-guide/sysrq.rst
-- sysctl_writes_strict
-- tainted ==> Documentation/admin-guide/tainted-kernels.rst
-- threads-max
-- unknown_nmi_panic
-- watchdog
-- watchdog_thresh
-- version
+.. contents:: :local:
-acct:
-=====
+acct
+====
-highwater lowwater frequency
+::
+
+ highwater lowwater frequency
If BSD-style process accounting is enabled these values control
its behaviour. If free space on filesystem where the log lives
-goes below <lowwater>% accounting suspends. If free space gets
-above <highwater>% accounting resumes. <Frequency> determines
+goes below ``lowwater``% accounting suspends. If free space gets
+above ``highwater``% accounting resumes. ``frequency`` determines
how often do we check the amount of free space (value is in
seconds). Default:
-4 2 30
-That is, suspend accounting if there left <= 2% free; resume it
-if we got >=4%; consider information about amount of free space
-valid for 30 seconds.
+
+::
+
+ 4 2 30
+
+That is, suspend accounting if free space drops below 2%; resume it
+if it increases to at least 4%; consider information about amount of
+free space valid for 30 seconds.
-acpi_video_flags:
-=================
+acpi_video_flags
+================
-flags
+See :doc:`/power/video`. This allows the video resume mode to be set,
+in a similar fashion to the ``acpi_sleep`` kernel parameter, by
+combining the following values:
-See Doc*/kernel/power/video.txt, it allows mode of video boot to be
-set during run time.
+= =======
+1 s3_bios
+2 s3_mode
+4 s3_beep
+= =======
-auto_msgmni:
-============
+auto_msgmni
+===========
This variable has no effect and may be removed in future kernel
releases. Reading it always returns 0.
-Up to Linux 3.17, it enabled/disabled automatic recomputing of msgmni
-upon memory add/remove or upon ipc namespace creation/removal.
+Up to Linux 3.17, it enabled/disabled automatic recomputing of
+`msgmni`_
+upon memory add/remove or upon IPC namespace creation/removal.
Echoing "1" into this file enabled msgmni automatic recomputing.
-Echoing "0" turned it off. auto_msgmni default value was 1.
+Echoing "0" turned it off. The default value was 1.
-bootloader_type:
-================
-
-x86 bootloader identification
+bootloader_type (x86 only)
+==========================
This gives the bootloader type number as indicated by the bootloader,
shifted left by 4, and OR'd with the low four bits of the bootloader
version. The reason for this encoding is that this used to match the
-type_of_loader field in the kernel header; the encoding is kept for
+``type_of_loader`` field in the kernel header; the encoding is kept for
backwards compatibility. That is, if the full bootloader type number
is 0x15 and the full version number is 0x234, this file will contain
the value 340 = 0x154.
-See the type_of_loader and ext_loader_type fields in
-Documentation/x86/boot.rst for additional information.
+See the ``type_of_loader`` and ``ext_loader_type`` fields in
+:doc:`/x86/boot` for additional information.
-bootloader_version:
-===================
-
-x86 bootloader version
+bootloader_version (x86 only)
+=============================
The complete bootloader version number. In the example above, this
file will contain the value 564 = 0x234.
-See the type_of_loader and ext_loader_ver fields in
-Documentation/x86/boot.rst for additional information.
+See the ``type_of_loader`` and ``ext_loader_ver`` fields in
+:doc:`/x86/boot` for additional information.
-cap_last_cap:
-=============
+cap_last_cap
+============
Highest valid capability of the running kernel. Exports
-CAP_LAST_CAP from the kernel.
+``CAP_LAST_CAP`` from the kernel.
-core_pattern:
-=============
+core_pattern
+============
-core_pattern is used to specify a core dumpfile pattern name.
+``core_pattern`` is used to specify a core dumpfile pattern name.
* max length 127 characters; default value is "core"
-* core_pattern is used as a pattern template for the output filename;
- certain string patterns (beginning with '%') are substituted with
- their actual values.
-* backward compatibility with core_uses_pid:
+* ``core_pattern`` is used as a pattern template for the output
+ filename; certain string patterns (beginning with '%') are
+ substituted with their actual values.
+* backward compatibility with ``core_uses_pid``:
- If core_pattern does not include "%p" (default does not)
- and core_uses_pid is set, then .PID will be appended to
+ If ``core_pattern`` does not include "%p" (default does not)
+ and ``core_uses_pid`` is set, then .PID will be appended to
the filename.
-* corename format specifiers::
+* corename format specifiers
- %<NUL> '%' is dropped
- %% output one '%'
- %p pid
- %P global pid (init PID namespace)
- %i tid
- %I global tid (init PID namespace)
- %u uid (in initial user namespace)
- %g gid (in initial user namespace)
- %d dump mode, matches PR_SET_DUMPABLE and
- /proc/sys/fs/suid_dumpable
- %s signal number
- %t UNIX time of dump
- %h hostname
- %e executable filename (may be shortened)
- %E executable path
- %<OTHER> both are dropped
+ ======== ==========================================
+ %<NUL> '%' is dropped
+ %% output one '%'
+ %p pid
+ %P global pid (init PID namespace)
+ %i tid
+ %I global tid (init PID namespace)
+ %u uid (in initial user namespace)
+ %g gid (in initial user namespace)
+ %d dump mode, matches ``PR_SET_DUMPABLE`` and
+ ``/proc/sys/fs/suid_dumpable``
+ %s signal number
+ %t UNIX time of dump
+ %h hostname
+ %e executable filename (may be shortened)
+ %E executable path
+ %c maximum size of core file by resource limit RLIMIT_CORE
+ %<OTHER> both are dropped
+ ======== ==========================================
* If the first character of the pattern is a '|', the kernel will treat
the rest of the pattern as a command to run. The core dump will be
written to the standard input of that program instead of to a file.
-core_pipe_limit:
-================
+core_pipe_limit
+===============
-This sysctl is only applicable when core_pattern is configured to pipe
-core files to a user space helper (when the first character of
-core_pattern is a '|', see above). When collecting cores via a pipe
-to an application, it is occasionally useful for the collecting
-application to gather data about the crashing process from its
-/proc/pid directory. In order to do this safely, the kernel must wait
-for the collecting process to exit, so as not to remove the crashing
-processes proc files prematurely. This in turn creates the
-possibility that a misbehaving userspace collecting process can block
-the reaping of a crashed process simply by never exiting. This sysctl
-defends against that. It defines how many concurrent crashing
-processes may be piped to user space applications in parallel. If
-this value is exceeded, then those crashing processes above that value
-are noted via the kernel log and their cores are skipped. 0 is a
-special value, indicating that unlimited processes may be captured in
-parallel, but that no waiting will take place (i.e. the collecting
-process is not guaranteed access to /proc/<crashing pid>/). This
-value defaults to 0.
+This sysctl is only applicable when `core_pattern`_ is configured to
+pipe core files to a user space helper (when the first character of
+``core_pattern`` is a '|', see above).
+When collecting cores via a pipe to an application, it is occasionally
+useful for the collecting application to gather data about the
+crashing process from its ``/proc/pid`` directory.
+In order to do this safely, the kernel must wait for the collecting
+process to exit, so as not to remove the crashing processes proc files
+prematurely.
+This in turn creates the possibility that a misbehaving userspace
+collecting process can block the reaping of a crashed process simply
+by never exiting.
+This sysctl defends against that.
+It defines how many concurrent crashing processes may be piped to user
+space applications in parallel.
+If this value is exceeded, then those crashing processes above that
+value are noted via the kernel log and their cores are skipped.
+0 is a special value, indicating that unlimited processes may be
+captured in parallel, but that no waiting will take place (i.e. the
+collecting process is not guaranteed access to ``/proc/<crashing
+pid>/``).
+This value defaults to 0.
-core_uses_pid:
-==============
+core_uses_pid
+=============
The default coredump filename is "core". By setting
-core_uses_pid to 1, the coredump filename becomes core.PID.
-If core_pattern does not include "%p" (default does not)
-and core_uses_pid is set, then .PID will be appended to
+``core_uses_pid`` to 1, the coredump filename becomes core.PID.
+If `core_pattern`_ does not include "%p" (default does not)
+and ``core_uses_pid`` is set, then .PID will be appended to
the filename.
-ctrl-alt-del:
-=============
+ctrl-alt-del
+============
When the value in this file is 0, ctrl-alt-del is trapped and
-sent to the init(1) program to handle a graceful restart.
+sent to the ``init(1)`` program to handle a graceful restart.
When, however, the value is > 0, Linux's reaction to a Vulcan
Nerve Pinch (tm) will be an immediate reboot, without even
syncing its dirty buffers.
@@ -269,21 +204,22 @@
to decide what to do with it.
-dmesg_restrict:
-===============
+dmesg_restrict
+==============
This toggle indicates whether unprivileged users are prevented
-from using dmesg(8) to view messages from the kernel's log buffer.
-When dmesg_restrict is set to (0) there are no restrictions. When
-dmesg_restrict is set set to (1), users must have CAP_SYSLOG to use
-dmesg(8).
+from using ``dmesg(8)`` to view messages from the kernel's log
+buffer.
+When ``dmesg_restrict`` is set to 0 there are no restrictions.
+When ``dmesg_restrict`` is set set to 1, users must have
+``CAP_SYSLOG`` to use ``dmesg(8)``.
-The kernel config option CONFIG_SECURITY_DMESG_RESTRICT sets the
-default value of dmesg_restrict.
+The kernel config option ``CONFIG_SECURITY_DMESG_RESTRICT`` sets the
+default value of ``dmesg_restrict``.
-domainname & hostname:
-======================
+domainname & hostname
+=====================
These files can be used to set the NIS/YP domainname and the
hostname of your box in exactly the same way as the commands
@@ -302,167 +238,206 @@
domainname "frop.org", not to be confused with the NIS (Network
Information Service) or YP (Yellow Pages) domainname. These two
domain names are in general different. For a detailed discussion
-see the hostname(1) man page.
+see the ``hostname(1)`` man page.
-hardlockup_all_cpu_backtrace:
-=============================
+hardlockup_all_cpu_backtrace
+============================
This value controls the hard lockup detector behavior when a hard
lockup condition is detected as to whether or not to gather further
debug information. If enabled, arch-specific all-CPU stack dumping
will be initiated.
-0: do nothing. This is the default behavior.
-
-1: on detection capture more debug information.
+= ============================================
+0 Do nothing. This is the default behavior.
+1 On detection capture more debug information.
+= ============================================
-hardlockup_panic:
-=================
+hardlockup_panic
+================
This parameter can be used to control whether the kernel panics
when a hard lockup is detected.
- 0 - don't panic on hard lockup
- 1 - panic on hard lockup
+= ===========================
+0 Don't panic on hard lockup.
+1 Panic on hard lockup.
+= ===========================
-See Documentation/admin-guide/lockup-watchdogs.rst for more information. This can
-also be set using the nmi_watchdog kernel parameter.
+See :doc:`/admin-guide/lockup-watchdogs` for more information.
+This can also be set using the nmi_watchdog kernel parameter.
-hotplug:
-========
+hotplug
+=======
Path for the hotplug policy agent.
-Default value is "/sbin/hotplug".
+Default value is "``/sbin/hotplug``".
-hung_task_panic:
-================
+hung_task_panic
+===============
Controls the kernel's behavior when a hung task is detected.
-This file shows up if CONFIG_DETECT_HUNG_TASK is enabled.
+This file shows up if ``CONFIG_DETECT_HUNG_TASK`` is enabled.
-0: continue operation. This is the default behavior.
-
-1: panic immediately.
+= =================================================
+0 Continue operation. This is the default behavior.
+1 Panic immediately.
+= =================================================
-hung_task_check_count:
-======================
+hung_task_check_count
+=====================
The upper bound on the number of tasks that are checked.
-This file shows up if CONFIG_DETECT_HUNG_TASK is enabled.
+This file shows up if ``CONFIG_DETECT_HUNG_TASK`` is enabled.
-hung_task_timeout_secs:
-=======================
+hung_task_timeout_secs
+======================
When a task in D state did not get scheduled
for more than this value report a warning.
-This file shows up if CONFIG_DETECT_HUNG_TASK is enabled.
+This file shows up if ``CONFIG_DETECT_HUNG_TASK`` is enabled.
-0: means infinite timeout - no checking done.
+0 means infinite timeout, no checking is done.
-Possible values to set are in range {0..LONG_MAX/HZ}.
+Possible values to set are in range {0:``LONG_MAX``/``HZ``}.
-hung_task_check_interval_secs:
-==============================
+hung_task_check_interval_secs
+=============================
Hung task check interval. If hung task checking is enabled
-(see hung_task_timeout_secs), the check is done every
-hung_task_check_interval_secs seconds.
-This file shows up if CONFIG_DETECT_HUNG_TASK is enabled.
+(see `hung_task_timeout_secs`_), the check is done every
+``hung_task_check_interval_secs`` seconds.
+This file shows up if ``CONFIG_DETECT_HUNG_TASK`` is enabled.
-0 (default): means use hung_task_timeout_secs as checking interval.
-Possible values to set are in range {0..LONG_MAX/HZ}.
+0 (default) means use ``hung_task_timeout_secs`` as checking
+interval.
+
+Possible values to set are in range {0:``LONG_MAX``/``HZ``}.
-hung_task_warnings:
-===================
+hung_task_warnings
+==================
The maximum number of warnings to report. During a check interval
if a hung task is detected, this value is decreased by 1.
When this value reaches 0, no more warnings will be reported.
-This file shows up if CONFIG_DETECT_HUNG_TASK is enabled.
+This file shows up if ``CONFIG_DETECT_HUNG_TASK`` is enabled.
-1: report an infinite number of warnings.
-hyperv_record_panic_msg:
-========================
+hyperv_record_panic_msg
+=======================
Controls whether the panic kmsg data should be reported to Hyper-V.
-0: do not report panic kmsg data.
-
-1: report the panic kmsg data. This is the default behavior.
+= =========================================================
+0 Do not report panic kmsg data.
+1 Report the panic kmsg data. This is the default behavior.
+= =========================================================
-kexec_load_disabled:
-====================
+kexec_load_disabled
+===================
-A toggle indicating if the kexec_load syscall has been disabled. This
-value defaults to 0 (false: kexec_load enabled), but can be set to 1
-(true: kexec_load disabled). Once true, kexec can no longer be used, and
-the toggle cannot be set back to false. This allows a kexec image to be
-loaded before disabling the syscall, allowing a system to set up (and
-later use) an image without it being altered. Generally used together
-with the "modules_disabled" sysctl.
+A toggle indicating if the ``kexec_load`` syscall has been disabled.
+This value defaults to 0 (false: ``kexec_load`` enabled), but can be
+set to 1 (true: ``kexec_load`` disabled).
+Once true, kexec can no longer be used, and the toggle cannot be set
+back to false.
+This allows a kexec image to be loaded before disabling the syscall,
+allowing a system to set up (and later use) an image without it being
+altered.
+Generally used together with the `modules_disabled`_ sysctl.
-kptr_restrict:
-==============
+kptr_restrict
+=============
This toggle indicates whether restrictions are placed on
-exposing kernel addresses via /proc and other interfaces.
+exposing kernel addresses via ``/proc`` and other interfaces.
-When kptr_restrict is set to 0 (the default) the address is hashed before
-printing. (This is the equivalent to %p.)
+When ``kptr_restrict`` is set to 0 (the default) the address is hashed
+before printing.
+(This is the equivalent to %p.)
-When kptr_restrict is set to (1), kernel pointers printed using the %pK
-format specifier will be replaced with 0's unless the user has CAP_SYSLOG
-and effective user and group ids are equal to the real ids. This is
-because %pK checks are done at read() time rather than open() time, so
-if permissions are elevated between the open() and the read() (e.g via
-a setuid binary) then %pK will not leak kernel pointers to unprivileged
-users. Note, this is a temporary solution only. The correct long-term
-solution is to do the permission checks at open() time. Consider removing
-world read permissions from files that use %pK, and using dmesg_restrict
-to protect against uses of %pK in dmesg(8) if leaking kernel pointer
-values to unprivileged users is a concern.
+When ``kptr_restrict`` is set to 1, kernel pointers printed using the
+%pK format specifier will be replaced with 0s unless the user has
+``CAP_SYSLOG`` and effective user and group ids are equal to the real
+ids.
+This is because %pK checks are done at read() time rather than open()
+time, so if permissions are elevated between the open() and the read()
+(e.g via a setuid binary) then %pK will not leak kernel pointers to
+unprivileged users.
+Note, this is a temporary solution only.
+The correct long-term solution is to do the permission checks at
+open() time.
+Consider removing world read permissions from files that use %pK, and
+using `dmesg_restrict`_ to protect against uses of %pK in ``dmesg(8)``
+if leaking kernel pointer values to unprivileged users is a concern.
-When kptr_restrict is set to (2), kernel pointers printed using
-%pK will be replaced with 0's regardless of privileges.
+When ``kptr_restrict`` is set to 2, kernel pointers printed using
+%pK will be replaced with 0s regardless of privileges.
-l2cr: (PPC only)
+modprobe
+========
+
+This gives the full path of the modprobe command which the kernel will
+use to load modules. This can be used to debug module loading
+requests::
+
+ echo '#! /bin/sh' > /tmp/modprobe
+ echo 'echo "$@" >> /tmp/modprobe.log' >> /tmp/modprobe
+ echo 'exec /sbin/modprobe "$@"' >> /tmp/modprobe
+ chmod a+x /tmp/modprobe
+ echo /tmp/modprobe > /proc/sys/kernel/modprobe
+
+This only applies when the *kernel* is requesting that the module be
+loaded; it won't have any effect if the module is being loaded
+explicitly using ``modprobe`` from userspace.
+
+
+modules_disabled
================
-This flag controls the L2 cache of G3 processor boards. If
-0, the cache is disabled. Enabled if nonzero.
-
-
-modules_disabled:
-=================
-
A toggle value indicating if modules are allowed to be loaded
in an otherwise modular kernel. This toggle defaults to off
(0), but can be set true (1). Once true, modules can be
neither loaded nor unloaded, and the toggle cannot be set back
-to false. Generally used with the "kexec_load_disabled" toggle.
+to false. Generally used with the `kexec_load_disabled`_ toggle.
-msg_next_id, sem_next_id, and shm_next_id:
-==========================================
+.. _msgmni:
+
+msgmax, msgmnb, and msgmni
+==========================
+
+``msgmax`` is the maximum size of an IPC message, in bytes. 8192 by
+default (``MSGMAX``).
+
+``msgmnb`` is the maximum size of an IPC queue, in bytes. 16384 by
+default (``MSGMNB``).
+
+``msgmni`` is the maximum number of IPC queues. 32000 by default
+(``MSGMNI``).
+
+
+msg_next_id, sem_next_id, and shm_next_id (System V IPC)
+========================================================
These three toggles allows to specify desired id for next allocated IPC
object: message, semaphore or shared memory respectively.
By default they are equal to -1, which means generic allocation logic.
-Possible values to set are in range {0..INT_MAX}.
+Possible values to set are in range {0:``INT_MAX``}.
Notes:
1) kernel doesn't guarantee, that new object will have desired id. So,
@@ -472,15 +447,16 @@
fails, it is undefined if the value remains unmodified or is reset to -1.
-nmi_watchdog:
-=============
+nmi_watchdog
+============
This parameter can be used to control the NMI watchdog
(i.e. the hard lockup detector) on x86 systems.
-0 - disable the hard lockup detector
-
-1 - enable the hard lockup detector
+= =================================
+0 Disable the hard lockup detector.
+1 Enable the hard lockup detector.
+= =================================
The hard lockup detector monitors each CPU for its ability to respond to
timer interrupts. The mechanism utilizes CPU performance counter registers
@@ -492,11 +468,11 @@
nmi_watchdog=1
-to the guest kernel command line (see Documentation/admin-guide/kernel-parameters.rst).
+to the guest kernel command line (see :doc:`/admin-guide/kernel-parameters`).
-numa_balancing:
-===============
+numa_balancing
+==============
Enables/disables automatic page fault based NUMA memory
balancing. Memory is moved automatically to nodes
@@ -514,9 +490,10 @@
guarantee. If the target workload is already bound to NUMA nodes then this
feature should be disabled. Otherwise, if the system overhead from the
feature is too high then the rate the kernel samples for NUMA hinting
-faults may be controlled by the numa_balancing_scan_period_min_ms,
+faults may be controlled by the `numa_balancing_scan_period_min_ms,
numa_balancing_scan_delay_ms, numa_balancing_scan_period_max_ms,
-numa_balancing_scan_size_mb, and numa_balancing_settle_count sysctls.
+numa_balancing_scan_size_mb`_, and numa_balancing_settle_count sysctls.
+
numa_balancing_scan_period_min_ms, numa_balancing_scan_delay_ms, numa_balancing_scan_period_max_ms, numa_balancing_scan_size_mb
===============================================================================================================================
@@ -542,23 +519,23 @@
memory accesses. These sysctls control the thresholds for scan delays and
the number of pages scanned.
-numa_balancing_scan_period_min_ms is the minimum time in milliseconds to
+``numa_balancing_scan_period_min_ms`` is the minimum time in milliseconds to
scan a tasks virtual memory. It effectively controls the maximum scanning
rate for each task.
-numa_balancing_scan_delay_ms is the starting "scan delay" used for a task
+``numa_balancing_scan_delay_ms`` is the starting "scan delay" used for a task
when it initially forks.
-numa_balancing_scan_period_max_ms is the maximum time in milliseconds to
+``numa_balancing_scan_period_max_ms`` is the maximum time in milliseconds to
scan a tasks virtual memory. It effectively controls the minimum scanning
rate for each task.
-numa_balancing_scan_size_mb is how many megabytes worth of pages are
+``numa_balancing_scan_size_mb`` is how many megabytes worth of pages are
scanned for a given scan.
-osrelease, ostype & version:
-============================
+osrelease, ostype & version
+===========================
::
@@ -569,15 +546,16 @@
# cat version
#5 Wed Feb 25 21:49:24 MET 1998
-The files osrelease and ostype should be clear enough. Version
+The files ``osrelease`` and ``ostype`` should be clear enough.
+``version``
needs a little more clarification however. The '#5' means that
this is the fifth kernel built from this source base and the
date behind it indicates the time the kernel was built.
The only way to tune these values is to rebuild the kernel :-)
-overflowgid & overflowuid:
-==========================
+overflowgid & overflowuid
+=========================
if your architecture did not always support 32-bit UIDs (i.e. arm,
i386, m68k, sh, and sparc32), a fixed UID and GID will be returned to
@@ -588,108 +566,119 @@
The default is 65534.
+panic
+=====
+
+The value in this file determines the behaviour of the kernel on a
panic:
-======
-The value in this file represents the number of seconds the kernel
-waits before rebooting on a panic. When you use the software watchdog,
-the recommended setting is 60.
+* if zero, the kernel will loop forever;
+* if negative, the kernel will reboot immediately;
+* if positive, the kernel will reboot after the corresponding number
+ of seconds.
+
+When you use the software watchdog, the recommended setting is 60.
-panic_on_io_nmi:
-================
+panic_on_io_nmi
+===============
Controls the kernel's behavior when a CPU receives an NMI caused by
an IO error.
-0: try to continue operation (default)
-
-1: panic immediately. The IO error triggered an NMI. This indicates a
- serious system condition which could result in IO data corruption.
- Rather than continuing, panicking might be a better choice. Some
- servers issue this sort of NMI when the dump button is pushed,
- and you can use this option to take a crash dump.
+= ==================================================================
+0 Try to continue operation (default).
+1 Panic immediately. The IO error triggered an NMI. This indicates a
+ serious system condition which could result in IO data corruption.
+ Rather than continuing, panicking might be a better choice. Some
+ servers issue this sort of NMI when the dump button is pushed,
+ and you can use this option to take a crash dump.
+= ==================================================================
-panic_on_oops:
-==============
+panic_on_oops
+=============
Controls the kernel's behaviour when an oops or BUG is encountered.
-0: try to continue operation
-
-1: panic immediately. If the `panic` sysctl is also non-zero then the
- machine will be rebooted.
+= ===================================================================
+0 Try to continue operation.
+1 Panic immediately. If the `panic` sysctl is also non-zero then the
+ machine will be rebooted.
+= ===================================================================
-panic_on_stackoverflow:
-=======================
+panic_on_stackoverflow
+======================
Controls the kernel's behavior when detecting the overflows of
kernel, IRQ and exception stacks except a user stack.
-This file shows up if CONFIG_DEBUG_STACKOVERFLOW is enabled.
+This file shows up if ``CONFIG_DEBUG_STACKOVERFLOW`` is enabled.
-0: try to continue operation.
-
-1: panic immediately.
+= ==========================
+0 Try to continue operation.
+1 Panic immediately.
+= ==========================
-panic_on_unrecovered_nmi:
-=========================
+panic_on_unrecovered_nmi
+========================
The default Linux behaviour on an NMI of either memory or unknown is
to continue operation. For many environments such as scientific
computing it is preferable that the box is taken out and the error
dealt with than an uncorrected parity/ECC error get propagated.
-A small number of systems do generate NMI's for bizarre random reasons
+A small number of systems do generate NMIs for bizarre random reasons
such as power management so the default is off. That sysctl works like
the existing panic controls already in that directory.
-panic_on_warn:
-==============
+panic_on_warn
+=============
Calls panic() in the WARN() path when set to 1. This is useful to avoid
a kernel rebuild when attempting to kdump at the location of a WARN().
-0: only WARN(), default behaviour.
-
-1: call panic() after printing out WARN() location.
+= ================================================
+0 Only WARN(), default behaviour.
+1 Call panic() after printing out WARN() location.
+= ================================================
-panic_print:
-============
+panic_print
+===========
Bitmask for printing system info when panic happens. User can chose
combination of the following bits:
-===== ========================================
+===== ============================================
bit 0 print all tasks info
bit 1 print system memory info
bit 2 print timer info
-bit 3 print locks info if CONFIG_LOCKDEP is on
+bit 3 print locks info if ``CONFIG_LOCKDEP`` is on
bit 4 print ftrace buffer
-===== ========================================
+===== ============================================
So for example to print tasks and memory info on panic, user can::
echo 3 > /proc/sys/kernel/panic_print
-panic_on_rcu_stall:
-===================
+panic_on_rcu_stall
+==================
When set to 1, calls panic() after RCU stall detection messages. This
is useful to define the root cause of RCU stalls using a vmcore.
-0: do not panic() when RCU stall takes place, default behavior.
-
-1: panic() after printing RCU stall messages.
+= ============================================================
+0 Do not panic() when RCU stall takes place, default behavior.
+1 panic() after printing RCU stall messages.
+= ============================================================
-perf_cpu_time_max_percent:
-==========================
+perf_cpu_time_max_percent
+=========================
Hints to the kernel how much CPU time it should be allowed to
use to handle perf sampling events. If the perf subsystem
@@ -702,171 +691,179 @@
stacked up next to each other so much that nothing else is
allowed to execute.
-0:
- disable the mechanism. Do not monitor or correct perf's
- sampling rate no matter how CPU time it takes.
+===== ========================================================
+0 Disable the mechanism. Do not monitor or correct perf's
+ sampling rate no matter how CPU time it takes.
-1-100:
- attempt to throttle perf's sample rate to this
- percentage of CPU. Note: the kernel calculates an
- "expected" length of each sample event. 100 here means
- 100% of that expected length. Even if this is set to
- 100, you may still see sample throttling if this
- length is exceeded. Set to 0 if you truly do not care
- how much CPU is consumed.
+1-100 Attempt to throttle perf's sample rate to this
+ percentage of CPU. Note: the kernel calculates an
+ "expected" length of each sample event. 100 here means
+ 100% of that expected length. Even if this is set to
+ 100, you may still see sample throttling if this
+ length is exceeded. Set to 0 if you truly do not care
+ how much CPU is consumed.
+===== ========================================================
-perf_event_paranoid:
-====================
+perf_event_paranoid
+===================
Controls use of the performance events system by unprivileged
users (without CAP_SYS_ADMIN). The default value is 2.
=== ==================================================================
- -1 Allow use of (almost) all events by all users
+ -1 Allow use of (almost) all events by all users.
- Ignore mlock limit after perf_event_mlock_kb without CAP_IPC_LOCK
+ Ignore mlock limit after perf_event_mlock_kb without
+ ``CAP_IPC_LOCK``.
->=0 Disallow ftrace function tracepoint by users without CAP_SYS_ADMIN
+>=0 Disallow ftrace function tracepoint by users without
+ ``CAP_SYS_ADMIN``.
- Disallow raw tracepoint access by users without CAP_SYS_ADMIN
+ Disallow raw tracepoint access by users without ``CAP_SYS_ADMIN``.
->=1 Disallow CPU event access by users without CAP_SYS_ADMIN
+>=1 Disallow CPU event access by users without ``CAP_SYS_ADMIN``.
->=2 Disallow kernel profiling by users without CAP_SYS_ADMIN
+>=2 Disallow kernel profiling by users without ``CAP_SYS_ADMIN``.
=== ==================================================================
-perf_event_max_stack:
-=====================
+perf_event_max_stack
+====================
-Controls maximum number of stack frames to copy for (attr.sample_type &
-PERF_SAMPLE_CALLCHAIN) configured events, for instance, when using
-'perf record -g' or 'perf trace --call-graph fp'.
+Controls maximum number of stack frames to copy for (``attr.sample_type &
+PERF_SAMPLE_CALLCHAIN``) configured events, for instance, when using
+'``perf record -g``' or '``perf trace --call-graph fp``'.
This can only be done when no events are in use that have callchains
-enabled, otherwise writing to this file will return -EBUSY.
+enabled, otherwise writing to this file will return ``-EBUSY``.
The default value is 127.
-perf_event_mlock_kb:
-====================
+perf_event_mlock_kb
+===================
Control size of per-cpu ring buffer not counted agains mlock limit.
The default value is 512 + 1 page
-perf_event_max_contexts_per_stack:
-==================================
+perf_event_max_contexts_per_stack
+=================================
Controls maximum number of stack frame context entries for
-(attr.sample_type & PERF_SAMPLE_CALLCHAIN) configured events, for
-instance, when using 'perf record -g' or 'perf trace --call-graph fp'.
+(``attr.sample_type & PERF_SAMPLE_CALLCHAIN``) configured events, for
+instance, when using '``perf record -g``' or '``perf trace --call-graph fp``'.
This can only be done when no events are in use that have callchains
-enabled, otherwise writing to this file will return -EBUSY.
+enabled, otherwise writing to this file will return ``-EBUSY``.
The default value is 8.
-pid_max:
-========
+pid_max
+=======
PID allocation wrap value. When the kernel's next PID value
reaches this value, it wraps back to a minimum PID value.
-PIDs of value pid_max or larger are not allocated.
+PIDs of value ``pid_max`` or larger are not allocated.
-ns_last_pid:
-============
+ns_last_pid
+===========
The last pid allocated in the current (the one task using this sysctl
lives in) pid namespace. When selecting a pid for a next task on fork
kernel tries to allocate a number starting from this one.
-powersave-nap: (PPC only)
-=========================
+powersave-nap (PPC only)
+========================
If set, Linux-PPC will use the 'nap' mode of powersaving,
otherwise the 'doze' mode will be used.
+
==============================================================
-printk:
-=======
+printk
+======
-The four values in printk denote: console_loglevel,
-default_message_loglevel, minimum_console_loglevel and
-default_console_loglevel respectively.
+The four values in printk denote: ``console_loglevel``,
+``default_message_loglevel``, ``minimum_console_loglevel`` and
+``default_console_loglevel`` respectively.
These values influence printk() behavior when printing or
-logging error messages. See 'man 2 syslog' for more info on
+logging error messages. See '``man 2 syslog``' for more info on
the different loglevels.
-- console_loglevel:
- messages with a higher priority than
- this will be printed to the console
-- default_message_loglevel:
- messages without an explicit priority
- will be printed with this priority
-- minimum_console_loglevel:
- minimum (highest) value to which
- console_loglevel can be set
-- default_console_loglevel:
- default value for console_loglevel
+======================== =====================================
+console_loglevel messages with a higher priority than
+ this will be printed to the console
+default_message_loglevel messages without an explicit priority
+ will be printed with this priority
+minimum_console_loglevel minimum (highest) value to which
+ console_loglevel can be set
+default_console_loglevel default value for console_loglevel
+======================== =====================================
-printk_delay:
-=============
+printk_delay
+============
-Delay each printk message in printk_delay milliseconds
+Delay each printk message in ``printk_delay`` milliseconds
Value from 0 - 10000 is allowed.
-printk_ratelimit:
-=================
+printk_ratelimit
+================
-Some warning messages are rate limited. printk_ratelimit specifies
+Some warning messages are rate limited. ``printk_ratelimit`` specifies
the minimum length of time between these messages (in seconds).
The default value is 5 seconds.
A value of 0 will disable rate limiting.
-printk_ratelimit_burst:
-=======================
+printk_ratelimit_burst
+======================
-While long term we enforce one message per printk_ratelimit
+While long term we enforce one message per `printk_ratelimit`_
seconds, we do allow a burst of messages to pass through.
-printk_ratelimit_burst specifies the number of messages we can
+``printk_ratelimit_burst`` specifies the number of messages we can
send before ratelimiting kicks in.
The default value is 10 messages.
-printk_devkmsg:
-===============
+printk_devkmsg
+==============
-Control the logging to /dev/kmsg from userspace:
+Control the logging to ``/dev/kmsg`` from userspace:
-ratelimit:
- default, ratelimited
+========= =============================================
+ratelimit default, ratelimited
+on unlimited logging to /dev/kmsg from userspace
+off logging to /dev/kmsg disabled
+========= =============================================
-on: unlimited logging to /dev/kmsg from userspace
-
-off: logging to /dev/kmsg disabled
-
-The kernel command line parameter printk.devkmsg= overrides this and is
+The kernel command line parameter ``printk.devkmsg=`` overrides this and is
a one-time setting until next reboot: once set, it cannot be changed by
this sysctl interface anymore.
+==============================================================
-randomize_va_space:
-===================
+
+pty
+===
+
+See Documentation/filesystems/devpts.txt.
+
+
+randomize_va_space
+==================
This option can be used to select the type of process address
space randomization that is used in the system, for architectures
@@ -881,10 +878,10 @@
This, among other things, implies that shared libraries will be
loaded to random addresses. Also for PIE-linked binaries, the
location of code start is randomized. This is the default if the
- CONFIG_COMPAT_BRK option is enabled.
+ ``CONFIG_COMPAT_BRK`` option is enabled.
2 Additionally enable heap randomization. This is the default if
- CONFIG_COMPAT_BRK is disabled.
+ ``CONFIG_COMPAT_BRK`` is disabled.
There are a few legacy applications out there (such as some ancient
versions of libc.so.5 from 1996) that assume that brk area starts
@@ -894,31 +891,27 @@
systems it is safe to choose full randomization.
Systems with ancient and/or broken binaries should be configured
- with CONFIG_COMPAT_BRK enabled, which excludes the heap from process
+ with ``CONFIG_COMPAT_BRK`` enabled, which excludes the heap from process
address space randomization.
== ===========================================================================
-reboot-cmd: (Sparc only)
-========================
+real-root-dev
+=============
+
+See :doc:`/admin-guide/initrd`.
+
+
+reboot-cmd (SPARC only)
+=======================
??? This seems to be a way to give an argument to the Sparc
ROM/Flash boot loader. Maybe to tell it what to do after
rebooting. ???
-rtsig-max & rtsig-nr:
-=====================
-
-The file rtsig-max can be used to tune the maximum number
-of POSIX realtime (queued) signals that can be outstanding
-in the system.
-
-rtsig-nr shows the number of RT signals currently queued.
-
-
-sched_energy_aware:
-===================
+sched_energy_aware
+==================
Enables/disables Energy Aware Scheduling (EAS). EAS starts
automatically on platforms where it can run (that is,
@@ -928,75 +921,88 @@
this value to 0.
-sched_schedstats:
-=================
+sched_schedstats
+================
Enables/disables scheduler statistics. Enabling this feature
incurs a small amount of overhead in the scheduler but is
useful for debugging and performance tuning.
-sg-big-buff:
-============
+seccomp
+=======
+
+See :doc:`/userspace-api/seccomp_filter`.
+
+
+sg-big-buff
+===========
This file shows the size of the generic SCSI (sg) buffer.
You can't tune it just yet, but you could change it on
-compile time by editing include/scsi/sg.h and changing
-the value of SG_BIG_BUFF.
+compile time by editing ``include/scsi/sg.h`` and changing
+the value of ``SG_BIG_BUFF``.
There shouldn't be any reason to change this value. If
you can come up with one, you probably know what you
are doing anyway :)
-shmall:
-=======
+shmall
+======
This parameter sets the total amount of shared memory pages that
-can be used system wide. Hence, SHMALL should always be at least
-ceil(shmmax/PAGE_SIZE).
+can be used system wide. Hence, ``shmall`` should always be at least
+``ceil(shmmax/PAGE_SIZE)``.
-If you are not sure what the default PAGE_SIZE is on your Linux
-system, you can run the following command:
+If you are not sure what the default ``PAGE_SIZE`` is on your Linux
+system, you can run the following command::
# getconf PAGE_SIZE
-shmmax:
-=======
+shmmax
+======
This value can be used to query and set the run time limit
on the maximum shared memory segment size that can be created.
Shared memory segments up to 1Gb are now supported in the
-kernel. This value defaults to SHMMAX.
+kernel. This value defaults to ``SHMMAX``.
-shm_rmid_forced:
-================
+shmmni
+======
+
+This value determines the maximum number of shared memory segments.
+4096 by default (``SHMMNI``).
+
+
+shm_rmid_forced
+===============
Linux lets you set resource limits, including how much memory one
-process can consume, via setrlimit(2). Unfortunately, shared memory
+process can consume, via ``setrlimit(2)``. Unfortunately, shared memory
segments are allowed to exist without association with any process, and
thus might not be counted against any resource limits. If enabled,
shared memory segments are automatically destroyed when their attach
count becomes zero after a detach or a process termination. It will
also destroy segments that were created, but never attached to, on exit
-from the process. The only use left for IPC_RMID is to immediately
+from the process. The only use left for ``IPC_RMID`` is to immediately
destroy an unattached segment. Of course, this breaks the way things are
defined, so some applications might stop working. Note that this
feature will do you no good unless you also configure your resource
-limits (in particular, RLIMIT_AS and RLIMIT_NPROC). Most systems don't
+limits (in particular, ``RLIMIT_AS`` and ``RLIMIT_NPROC``). Most systems don't
need this.
Note that if you change this from 0 to 1, already created segments
without users and with a dead originative process will be destroyed.
-sysctl_writes_strict:
-=====================
+sysctl_writes_strict
+====================
Control how file position affects the behavior of updating sysctl values
-via the /proc/sys interface:
+via the ``/proc/sys`` interface:
== ======================================================================
-1 Legacy per-write sysctl value handling, with no printk warnings.
@@ -1013,8 +1019,8 @@
== ======================================================================
-softlockup_all_cpu_backtrace:
-=============================
+softlockup_all_cpu_backtrace
+============================
This value controls the soft lockup detector thread's behavior
when a soft lockup condition is detected as to whether or not
@@ -1024,43 +1030,80 @@
This feature is only applicable for architectures which support
NMI.
-0: do nothing. This is the default behavior.
-
-1: on detection capture more debug information.
+= ============================================
+0 Do nothing. This is the default behavior.
+1 On detection capture more debug information.
+= ============================================
-soft_watchdog:
-==============
+softlockup_panic
+=================
+
+This parameter can be used to control whether the kernel panics
+when a soft lockup is detected.
+
+= ============================================
+0 Don't panic on soft lockup.
+1 Panic on soft lockup.
+= ============================================
+
+This can also be set using the softlockup_panic kernel parameter.
+
+
+soft_watchdog
+=============
This parameter can be used to control the soft lockup detector.
- 0 - disable the soft lockup detector
-
- 1 - enable the soft lockup detector
+= =================================
+0 Disable the soft lockup detector.
+1 Enable the soft lockup detector.
+= =================================
The soft lockup detector monitors CPUs for threads that are hogging the CPUs
without rescheduling voluntarily, and thus prevent the 'watchdog/N' threads
from running. The mechanism depends on the CPUs ability to respond to timer
interrupts which are needed for the 'watchdog/N' threads to be woken up by
-the watchdog timer function, otherwise the NMI watchdog - if enabled - can
+the watchdog timer function, otherwise the NMI watchdog — if enabled — can
detect a hard lockup condition.
-stack_erasing:
-==============
+stack_erasing
+=============
This parameter can be used to control kernel stack erasing at the end
-of syscalls for kernels built with CONFIG_GCC_PLUGIN_STACKLEAK.
+of syscalls for kernels built with ``CONFIG_GCC_PLUGIN_STACKLEAK``.
That erasing reduces the information which kernel stack leak bugs
can reveal and blocks some uninitialized stack variable attacks.
The tradeoff is the performance impact: on a single CPU system kernel
compilation sees a 1% slowdown, other systems and workloads may vary.
- 0: kernel stack erasing is disabled, STACKLEAK_METRICS are not updated.
+= ====================================================================
+0 Kernel stack erasing is disabled, STACKLEAK_METRICS are not updated.
+1 Kernel stack erasing is enabled (default), it is performed before
+ returning to the userspace at the end of syscalls.
+= ====================================================================
- 1: kernel stack erasing is enabled (default), it is performed before
- returning to the userspace at the end of syscalls.
+
+stop-a (SPARC only)
+===================
+
+Controls Stop-A:
+
+= ====================================
+0 Stop-A has no effect.
+1 Stop-A breaks to the PROM (default).
+= ====================================
+
+Stop-A is always enabled on a panic, so that the user can return to
+the boot PROM.
+
+
+sysrq
+=====
+
+See :doc:`/admin-guide/sysrq`.
tainted
@@ -1090,30 +1133,30 @@
131072 `(T)` The kernel was built with the struct randomization plugin
====== ===== ==============================================================
-See Documentation/admin-guide/tainted-kernels.rst for more information.
+See :doc:`/admin-guide/tainted-kernels` for more information.
-threads-max:
-============
+threads-max
+===========
This value controls the maximum number of threads that can be created
-using fork().
+using ``fork()``.
During initialization the kernel sets this value such that even if the
maximum number of threads is created, the thread structures occupy only
a part (1/8th) of the available RAM pages.
-The minimum value that can be written to threads-max is 1.
+The minimum value that can be written to ``threads-max`` is 1.
-The maximum value that can be written to threads-max is given by the
-constant FUTEX_TID_MASK (0x3fffffff).
+The maximum value that can be written to ``threads-max`` is given by the
+constant ``FUTEX_TID_MASK`` (0x3fffffff).
-If a value outside of this range is written to threads-max an error
-EINVAL occurs.
+If a value outside of this range is written to ``threads-max`` an
+``EINVAL`` error occurs.
-unknown_nmi_panic:
-==================
+unknown_nmi_panic
+=================
The value in this file affects behavior of handling NMI. When the
value is non-zero, unknown NMI is trapped and then panic occurs. At
@@ -1123,37 +1166,39 @@
example. If a system hangs up, try pressing the NMI switch.
-watchdog:
-=========
+watchdog
+========
This parameter can be used to disable or enable the soft lockup detector
-_and_ the NMI watchdog (i.e. the hard lockup detector) at the same time.
+*and* the NMI watchdog (i.e. the hard lockup detector) at the same time.
- 0 - disable both lockup detectors
-
- 1 - enable both lockup detectors
+= ==============================
+0 Disable both lockup detectors.
+1 Enable both lockup detectors.
+= ==============================
The soft lockup detector and the NMI watchdog can also be disabled or
-enabled individually, using the soft_watchdog and nmi_watchdog parameters.
-If the watchdog parameter is read, for example by executing::
+enabled individually, using the ``soft_watchdog`` and ``nmi_watchdog``
+parameters.
+If the ``watchdog`` parameter is read, for example by executing::
cat /proc/sys/kernel/watchdog
-the output of this command (0 or 1) shows the logical OR of soft_watchdog
-and nmi_watchdog.
+the output of this command (0 or 1) shows the logical OR of
+``soft_watchdog`` and ``nmi_watchdog``.
-watchdog_cpumask:
-=================
+watchdog_cpumask
+================
This value can be used to control on which cpus the watchdog may run.
-The default cpumask is all possible cores, but if NO_HZ_FULL is
+The default cpumask is all possible cores, but if ``NO_HZ_FULL`` is
enabled in the kernel config, and cores are specified with the
-nohz_full= boot argument, those cores are excluded by default.
+``nohz_full=`` boot argument, those cores are excluded by default.
Offline cores can be included in this mask, and if the core is later
brought online, the watchdog will be started based on the mask value.
-Typically this value would only be touched in the nohz_full case
+Typically this value would only be touched in the ``nohz_full`` case
to re-enable cores that by default were not running the watchdog,
if a kernel lockup was suspected on those cores.
@@ -1164,12 +1209,12 @@
echo 0,2-4 > /proc/sys/kernel/watchdog_cpumask
-watchdog_thresh:
-================
+watchdog_thresh
+===============
This value can be used to control the frequency of hrtimer and NMI
events and the soft and hard lockup thresholds. The default threshold
is 10 seconds.
-The softlockup threshold is (2 * watchdog_thresh). Setting this
+The softlockup threshold is (``2 * watchdog_thresh``). Setting this
tunable to zero will disable lockup detection altogether.
diff --git a/Documentation/arm/tcm.rst b/Documentation/arm/tcm.rst
index effd9c7..b256f97 100644
--- a/Documentation/arm/tcm.rst
+++ b/Documentation/arm/tcm.rst
@@ -4,18 +4,18 @@
Written by Linus Walleij <linus.walleij@stericsson.com>
-Some ARM SoC:s have a so-called TCM (Tightly-Coupled Memory).
+Some ARM SoCs have a so-called TCM (Tightly-Coupled Memory).
This is usually just a few (4-64) KiB of RAM inside the ARM
processor.
-Due to being embedded inside the CPU The TCM has a
+Due to being embedded inside the CPU, the TCM has a
Harvard-architecture, so there is an ITCM (instruction TCM)
and a DTCM (data TCM). The DTCM can not contain any
instructions, but the ITCM can actually contain data.
The size of DTCM or ITCM is minimum 4KiB so the typical
minimum configuration is 4KiB ITCM and 4KiB DTCM.
-ARM CPU:s have special registers to read out status, physical
+ARM CPUs have special registers to read out status, physical
location and size of TCM memories. arch/arm/include/asm/cputype.h
defines a CPUID_TCM register that you can read out from the
system control coprocessor. Documentation from ARM can be found
diff --git a/Documentation/arm64/memory.rst b/Documentation/arm64/memory.rst
index 02e0217..cf03b32 100644
--- a/Documentation/arm64/memory.rst
+++ b/Documentation/arm64/memory.rst
@@ -129,7 +129,7 @@
As a single binary will need to support both 48-bit and 52-bit VA
spaces, the VMEMMAP must be sized large enough for 52-bit VAs and
-also must be sized large enought to accommodate a fixed PAGE_OFFSET.
+also must be sized large enough to accommodate a fixed PAGE_OFFSET.
Most code in the kernel should not need to consider the VA_BITS, for
code that does need to know the VA size the variables are
diff --git a/Documentation/arm64/silicon-errata.rst b/Documentation/arm64/silicon-errata.rst
index 9120e59..2c08c62 100644
--- a/Documentation/arm64/silicon-errata.rst
+++ b/Documentation/arm64/silicon-errata.rst
@@ -110,6 +110,8 @@
+----------------+-----------------+-----------------+-----------------------------+
| Cavium | ThunderX GICv3 | #23154 | CAVIUM_ERRATUM_23154 |
+----------------+-----------------+-----------------+-----------------------------+
+| Cavium | ThunderX GICv3 | #38539 | N/A |
++----------------+-----------------+-----------------+-----------------------------+
| Cavium | ThunderX Core | #27456 | CAVIUM_ERRATUM_27456 |
+----------------+-----------------+-----------------+-----------------------------+
| Cavium | ThunderX Core | #30115 | CAVIUM_ERRATUM_30115 |
diff --git a/Documentation/arm64/tagged-address-abi.rst b/Documentation/arm64/tagged-address-abi.rst
index d4a85d5..4a9d9c7 100644
--- a/Documentation/arm64/tagged-address-abi.rst
+++ b/Documentation/arm64/tagged-address-abi.rst
@@ -44,8 +44,15 @@
how the user addresses are used by the kernel:
1. User addresses not accessed by the kernel but used for address space
- management (e.g. ``mmap()``, ``mprotect()``, ``madvise()``). The use
- of valid tagged pointers in this context is always allowed.
+ management (e.g. ``mprotect()``, ``madvise()``). The use of valid
+ tagged pointers in this context is allowed with the exception of
+ ``brk()``, ``mmap()`` and the ``new_address`` argument to
+ ``mremap()`` as these have the potential to alias with existing
+ user addresses.
+
+ NOTE: This behaviour changed in v5.6 and so some earlier kernels may
+ incorrectly accept valid tagged pointers for the ``brk()``,
+ ``mmap()`` and ``mremap()`` system calls.
2. User addresses accessed by the kernel (e.g. ``write()``). This ABI
relaxation is disabled by default and the application thread needs to
diff --git a/Documentation/block/capability.rst b/Documentation/block/capability.rst
index 2cf258d..160a514 100644
--- a/Documentation/block/capability.rst
+++ b/Documentation/block/capability.rst
@@ -2,17 +2,9 @@
Generic Block Device Capability
===============================
-This file documents the sysfs file block/<disk>/capability
+This file documents the sysfs file ``block/<disk>/capability``.
-capability is a hex word indicating which capabilities a specific disk
-supports. For more information on bits not listed here, see
-include/linux/genhd.h
+``capability`` is a bitfield, printed in hexadecimal, indicating which
+capabilities a specific block device supports:
-GENHD_FL_MEDIA_CHANGE_NOTIFY
-----------------------------
-
-Value: 4
-
-When this bit is set, the disk supports Asynchronous Notification
-of media change events. These events will be broadcast to user
-space via kernel uevent.
+.. kernel-doc:: include/linux/genhd.h
diff --git a/Documentation/conf.py b/Documentation/conf.py
index 3c7bdf4..9ae8e9a 100644
--- a/Documentation/conf.py
+++ b/Documentation/conf.py
@@ -38,7 +38,11 @@
# ones.
extensions = ['kerneldoc', 'rstFlatTable', 'kernel_include', 'cdomain',
'kfigure', 'sphinx.ext.ifconfig', 'automarkup',
- 'maintainers_include']
+ 'maintainers_include', 'sphinx.ext.autosectionlabel' ]
+
+# Ensure that autosectionlabel will produce unique names
+autosectionlabel_prefix_document = True
+autosectionlabel_maxdepth = 2
# The name of the math extension changed on Sphinx 1.4
if (major == 1 and minor > 3) or (major > 1):
diff --git a/Documentation/core-api/index.rst b/Documentation/core-api/index.rst
index a501dc1..0897ad1 100644
--- a/Documentation/core-api/index.rst
+++ b/Documentation/core-api/index.rst
@@ -8,41 +8,81 @@
Core utilities
==============
+This section has general and "core core" documentation. The first is a
+massive grab-bag of kerneldoc info left over from the docbook days; it
+should really be broken up someday when somebody finds the energy to do
+it.
+
.. toctree::
:maxdepth: 1
kernel-api
- assoc_array
- atomic_ops
- cachetlb
- refcount-vs-atomic
- cpu_hotplug
- idr
- local_ops
workqueue
- genericirq
- xarray
- librs
- genalloc
- errseq
- packing
printk-formats
+ symbol-namespaces
+
+Data structures and low-level utilities
+=======================================
+
+Library functionality that is used throughout the kernel.
+
+.. toctree::
+ :maxdepth: 1
+
+ kobject
+ assoc_array
+ xarray
+ idr
circular-buffers
generic-radix-tree
+ packing
+ timekeeping
+ errseq
+
+Concurrency primitives
+======================
+
+How Linux keeps everything from happening at the same time. See
+:doc:`/locking/index` for more related documentation.
+
+.. toctree::
+ :maxdepth: 1
+
+ atomic_ops
+ refcount-vs-atomic
+ local_ops
+ padata
+ ../RCU/index
+
+Low-level hardware management
+=============================
+
+Cache management, managing CPU hotplug, etc.
+
+.. toctree::
+ :maxdepth: 1
+
+ cachetlb
+ cpu_hotplug
+ memory-hotplug
+ genericirq
+ protection-keys
+
+Memory management
+=================
+
+How to allocate and use memory in the kernel. Note that there is a lot
+more memory-management documentation in :doc:`/vm/index`.
+
+.. toctree::
+ :maxdepth: 1
+
memory-allocation
mm-api
+ genalloc
pin_user_pages
- gfp_mask-from-fs-io
- timekeeping
boot-time-mm
- memory-hotplug
- protection-keys
- ../RCU/index
- gcc-plugins
- symbol-namespaces
- padata
- ioctl
-
+ gfp_mask-from-fs-io
Interfaces for kernel debugging
===============================
@@ -53,6 +93,16 @@
debug-objects
tracepoint
+Everything else
+===============
+
+Documents that don't fit elsewhere or which have yet to be categorized.
+
+.. toctree::
+ :maxdepth: 1
+
+ librs
+
.. only:: subproject and html
Indices
diff --git a/Documentation/kobject.txt b/Documentation/core-api/kobject.rst
similarity index 86%
rename from Documentation/kobject.txt
rename to Documentation/core-api/kobject.rst
index ff4c250..1f62d4d 100644
--- a/Documentation/kobject.txt
+++ b/Documentation/core-api/kobject.rst
@@ -25,7 +25,7 @@
usually embedded within some other structure which contains the stuff
the code is really interested in.
- No structure should EVER have more than one kobject embedded within it.
+ No structure should **EVER** have more than one kobject embedded within it.
If it does, the reference counting for the object is sure to be messed
up and incorrect, and your code will be buggy. So do not do this.
@@ -55,7 +55,7 @@
embedded in other structures. If you are used to thinking of things in
object-oriented terms, kobjects can be seen as a top-level, abstract class
from which other classes are derived. A kobject implements a set of
-capabilities which are not particularly useful by themselves, but which are
+capabilities which are not particularly useful by themselves, but are
nice to have in other objects. The C language does not allow for the
direct expression of inheritance, so other techniques - such as structure
embedding - must be used.
@@ -65,12 +65,12 @@
their own, but are invariably found embedded in the larger objects of
interest.)
-So, for example, the UIO code in drivers/uio/uio.c has a structure that
+So, for example, the UIO code in ``drivers/uio/uio.c`` has a structure that
defines the memory region associated with a uio device::
struct uio_map {
- struct kobject kobj;
- struct uio_mem *mem;
+ struct kobject kobj;
+ struct uio_mem *mem;
};
If you have a struct uio_map structure, finding its embedded kobject is
@@ -78,30 +78,30 @@
often have the opposite problem, however: given a struct kobject pointer,
what is the pointer to the containing structure? You must avoid tricks
(such as assuming that the kobject is at the beginning of the structure)
-and, instead, use the container_of() macro, found in <linux/kernel.h>::
+and, instead, use the container_of() macro, found in ``<linux/kernel.h>``::
container_of(pointer, type, member)
where:
- * "pointer" is the pointer to the embedded kobject,
- * "type" is the type of the containing structure, and
- * "member" is the name of the structure field to which "pointer" points.
+ * ``pointer`` is the pointer to the embedded kobject,
+ * ``type`` is the type of the containing structure, and
+ * ``member`` is the name of the structure field to which ``pointer`` points.
The return value from container_of() is a pointer to the corresponding
-container type. So, for example, a pointer "kp" to a struct kobject
-embedded *within* a struct uio_map could be converted to a pointer to the
-*containing* uio_map structure with::
+container type. So, for example, a pointer ``kp`` to a struct kobject
+embedded **within** a struct uio_map could be converted to a pointer to the
+**containing** uio_map structure with::
struct uio_map *u_map = container_of(kp, struct uio_map, kobj);
-For convenience, programmers often define a simple macro for "back-casting"
+For convenience, programmers often define a simple macro for **back-casting**
kobject pointers to the containing type. Exactly this happens in the
-earlier drivers/uio/uio.c, as you can see here::
+earlier ``drivers/uio/uio.c``, as you can see here::
struct uio_map {
- struct kobject kobj;
- struct uio_mem *mem;
+ struct kobject kobj;
+ struct uio_mem *mem;
};
#define to_map(map) container_of(map, struct uio_map, kobj)
@@ -125,7 +125,7 @@
register the kobject with sysfs, the function kobject_add() must be called::
int kobject_add(struct kobject *kobj, struct kobject *parent,
- const char *fmt, ...);
+ const char *fmt, ...);
This sets up the parent of the kobject and the name for the kobject
properly. If the kobject is to be associated with a specific kset,
@@ -172,13 +172,13 @@
int kobject_uevent(struct kobject *kobj, enum kobject_action action);
-Use the KOBJ_ADD action for when the kobject is first added to the kernel.
+Use the **KOBJ_ADD** action for when the kobject is first added to the kernel.
This should be done only after any attributes or children of the kobject
have been initialized properly, as userspace will instantly start to look
for them when this call happens.
When the kobject is removed from the kernel (details on how to do that are
-below), the uevent for KOBJ_REMOVE will be automatically created by the
+below), the uevent for **KOBJ_REMOVE** will be automatically created by the
kobject core, so the caller does not have to worry about doing that by
hand.
@@ -238,7 +238,7 @@
with the kobject_create_and_add(), can be of type kobj_attribute, so no
special custom attribute is needed to be created.
-See the example module, samples/kobject/kobject-example.c for an
+See the example module, ``samples/kobject/kobject-example.c`` for an
implementation of a simple kobject and attributes.
@@ -270,10 +270,10 @@
void my_object_release(struct kobject *kobj)
{
- struct my_object *mine = container_of(kobj, struct my_object, kobj);
+ struct my_object *mine = container_of(kobj, struct my_object, kobj);
- /* Perform any additional cleanup on this object, then... */
- kfree(mine);
+ /* Perform any additional cleanup on this object, then... */
+ kfree(mine);
}
One important point cannot be overstated: every kobject must have a
@@ -297,11 +297,11 @@
kobj_type::
struct kobj_type {
- void (*release)(struct kobject *kobj);
- const struct sysfs_ops *sysfs_ops;
- struct attribute **default_attrs;
- const struct kobj_ns_type_operations *(*child_ns_type)(struct kobject *kobj);
- const void *(*namespace)(struct kobject *kobj);
+ void (*release)(struct kobject *kobj);
+ const struct sysfs_ops *sysfs_ops;
+ struct attribute **default_attrs;
+ const struct kobj_ns_type_operations *(*child_ns_type)(struct kobject *kobj);
+ const void *(*namespace)(struct kobject *kobj);
};
This structure is used to describe a particular type of kobject (or, more
@@ -352,8 +352,8 @@
kset use::
struct kset *kset_create_and_add(const char *name,
- struct kset_uevent_ops *u,
- struct kobject *parent);
+ struct kset_uevent_ops *u,
+ struct kobject *parent);
When you are finished with the kset, call::
@@ -365,16 +365,16 @@
after kset_unregister() returns.
An example of using a kset can be seen in the
-samples/kobject/kset-example.c file in the kernel tree.
+``samples/kobject/kset-example.c`` file in the kernel tree.
If a kset wishes to control the uevent operations of the kobjects
associated with it, it can use the struct kset_uevent_ops to handle it::
struct kset_uevent_ops {
- int (*filter)(struct kset *kset, struct kobject *kobj);
- const char *(*name)(struct kset *kset, struct kobject *kobj);
- int (*uevent)(struct kset *kset, struct kobject *kobj,
- struct kobj_uevent_env *env);
+ int (*filter)(struct kset *kset, struct kobject *kobj);
+ const char *(*name)(struct kset *kset, struct kobject *kobj);
+ int (*uevent)(struct kset *kset, struct kobject *kobj,
+ struct kobj_uevent_env *env);
};
@@ -408,8 +408,8 @@
After a kobject has been registered with the kobject core successfully, it
must be cleaned up when the code is finished with it. To do that, call
kobject_put(). By doing this, the kobject core will automatically clean up
-all of the memory allocated by this kobject. If a KOBJ_ADD uevent has been
-sent for the object, a corresponding KOBJ_REMOVE uevent will be sent, and
+all of the memory allocated by this kobject. If a ``KOBJ_ADD`` uevent has been
+sent for the object, a corresponding ``KOBJ_REMOVE`` uevent will be sent, and
any other sysfs housekeeping will be handled for the caller properly.
If you need to do a two-stage delete of the kobject (say you are not
@@ -430,5 +430,5 @@
=========================
For a more complete example of using ksets and kobjects properly, see the
-example programs samples/kobject/{kobject-example.c,kset-example.c},
-which will be built as loadable modules if you select CONFIG_SAMPLE_KOBJECT.
+example programs ``samples/kobject/{kobject-example.c,kset-example.c}``,
+which will be built as loadable modules if you select ``CONFIG_SAMPLE_KOBJECT``.
diff --git a/Documentation/debugging-modules.txt b/Documentation/debugging-modules.txt
deleted file mode 100644
index 172ad4a..0000000
--- a/Documentation/debugging-modules.txt
+++ /dev/null
@@ -1,22 +0,0 @@
-Debugging Modules after 2.6.3
------------------------------
-
-In almost all distributions, the kernel asks for modules which don't
-exist, such as "net-pf-10" or whatever. Changing "modprobe -q" to
-"succeed" in this case is hacky and breaks some setups, and also we
-want to know if it failed for the fallback code for old aliases in
-fs/char_dev.c, for example.
-
-In the past a debugging message which would fill people's logs was
-emitted. This debugging message has been removed. The correct way
-of debugging module problems is something like this:
-
-echo '#! /bin/sh' > /tmp/modprobe
-echo 'echo "$@" >> /tmp/modprobe.log' >> /tmp/modprobe
-echo 'exec /sbin/modprobe "$@"' >> /tmp/modprobe
-chmod a+x /tmp/modprobe
-echo /tmp/modprobe > /proc/sys/kernel/modprobe
-
-Note that the above applies only when the *kernel* is requesting
-that the module be loaded -- it won't have any effect if that module
-is being loaded explicitly using "modprobe" from userspace.
diff --git a/Documentation/dev-tools/gcov.rst b/Documentation/dev-tools/gcov.rst
index 46aae52..7bd0135 100644
--- a/Documentation/dev-tools/gcov.rst
+++ b/Documentation/dev-tools/gcov.rst
@@ -203,7 +203,7 @@
may not correctly copy files from sysfs.
Solution
- Use ``cat``' to read ``.gcda`` files and ``cp -d`` to copy links.
+ Use ``cat`` to read ``.gcda`` files and ``cp -d`` to copy links.
Alternatively use the mechanism shown in Appendix B.
diff --git a/Documentation/dev-tools/kmemleak.rst b/Documentation/dev-tools/kmemleak.rst
index 3a289e8..fce2628 100644
--- a/Documentation/dev-tools/kmemleak.rst
+++ b/Documentation/dev-tools/kmemleak.rst
@@ -8,7 +8,8 @@
reported via /sys/kernel/debug/kmemleak. A similar method is used by the
Valgrind tool (``memcheck --leak-check``) to detect the memory leaks in
user-space applications.
-Kmemleak is supported on x86, arm, powerpc, sparc, sh, microblaze, ppc, mips, s390 and tile.
+Kmemleak is supported on x86, arm, arm64, powerpc, sparc, sh, microblaze, mips,
+s390, nds32, arc and xtensa.
Usage
-----
diff --git a/Documentation/dev-tools/kunit/usage.rst b/Documentation/dev-tools/kunit/usage.rst
index 7cd56a1..607758a 100644
--- a/Documentation/dev-tools/kunit/usage.rst
+++ b/Documentation/dev-tools/kunit/usage.rst
@@ -551,6 +551,7 @@
Once the kernel is built and installed, a simple
.. code-block:: bash
+
modprobe example-test
...will run the tests.
diff --git a/Documentation/devicetree/bindings/arm/arm,scmi.txt b/Documentation/devicetree/bindings/arm/arm,scmi.txt
index f493d69..dc102c4e 100644
--- a/Documentation/devicetree/bindings/arm/arm,scmi.txt
+++ b/Documentation/devicetree/bindings/arm/arm,scmi.txt
@@ -102,7 +102,7 @@
[1] Documentation/devicetree/bindings/clock/clock-bindings.txt
[2] Documentation/devicetree/bindings/power/power-domain.yaml
[3] Documentation/devicetree/bindings/thermal/thermal.txt
-[4] Documentation/devicetree/bindings/sram/sram.txt
+[4] Documentation/devicetree/bindings/sram/sram.yaml
[5] Documentation/devicetree/bindings/reset/reset.txt
Example:
diff --git a/Documentation/devicetree/bindings/arm/arm,scpi.txt b/Documentation/devicetree/bindings/arm/arm,scpi.txt
index 7b83ef4..dd04d9d 100644
--- a/Documentation/devicetree/bindings/arm/arm,scpi.txt
+++ b/Documentation/devicetree/bindings/arm/arm,scpi.txt
@@ -109,7 +109,7 @@
[0] http://infocenter.arm.com/help/topic/com.arm.doc.dui0922b/index.html
[1] Documentation/devicetree/bindings/clock/clock-bindings.txt
[2] Documentation/devicetree/bindings/thermal/thermal.txt
-[3] Documentation/devicetree/bindings/sram/sram.txt
+[3] Documentation/devicetree/bindings/sram/sram.yaml
[4] Documentation/devicetree/bindings/power/power-domain.yaml
Example:
diff --git a/Documentation/devicetree/bindings/arm/bcm/brcm,bcm63138.txt b/Documentation/devicetree/bindings/arm/bcm/brcm,bcm63138.txt
index b82b6a0..8c7a490 100644
--- a/Documentation/devicetree/bindings/arm/bcm/brcm,bcm63138.txt
+++ b/Documentation/devicetree/bindings/arm/bcm/brcm,bcm63138.txt
@@ -62,7 +62,7 @@
Syscon reboot node:
-See Documentation/devicetree/bindings/power/reset/syscon-reboot.txt for the
+See Documentation/devicetree/bindings/power/reset/syscon-reboot.yaml for the
detailed list of properties, the two values defined below are specific to the
BCM6328-style timer:
diff --git a/Documentation/devicetree/bindings/arm/cpus.yaml b/Documentation/devicetree/bindings/arm/cpus.yaml
index 7a9c3ce..0d5b610 100644
--- a/Documentation/devicetree/bindings/arm/cpus.yaml
+++ b/Documentation/devicetree/bindings/arm/cpus.yaml
@@ -216,7 +216,7 @@
$ref: '/schemas/types.yaml#/definitions/phandle-array'
description: |
List of phandles to idle state nodes supported
- by this cpu (see ./idle-states.txt).
+ by this cpu (see ./idle-states.yaml).
capacity-dmips-mhz:
$ref: '/schemas/types.yaml#/definitions/uint32'
diff --git a/Documentation/devicetree/bindings/arm/fsl.yaml b/Documentation/devicetree/bindings/arm/fsl.yaml
index a8e0b4a..0e17e1f 100644
--- a/Documentation/devicetree/bindings/arm/fsl.yaml
+++ b/Documentation/devicetree/bindings/arm/fsl.yaml
@@ -160,7 +160,7 @@
items:
- enum:
- armadeus,imx6dl-apf6 # APF6 (Solo) SoM
- - armadeus,imx6dl-apf6dldev # APF6 (Solo) SoM on APF6Dev board
+ - armadeus,imx6dl-apf6dev # APF6 (Solo) SoM on APF6Dev board
- eckelmann,imx6dl-ci4x10
- emtrion,emcon-mx6 # emCON-MX6S or emCON-MX6DL SoM
- emtrion,emcon-mx6-avari # emCON-MX6S or emCON-MX6DL SoM on Avari Base
diff --git a/Documentation/devicetree/bindings/arm/hisilicon/hi3519-sysctrl.txt b/Documentation/devicetree/bindings/arm/hisilicon/hi3519-sysctrl.txt
index 115c5be..8defacc 100644
--- a/Documentation/devicetree/bindings/arm/hisilicon/hi3519-sysctrl.txt
+++ b/Documentation/devicetree/bindings/arm/hisilicon/hi3519-sysctrl.txt
@@ -1,7 +1,7 @@
* Hisilicon Hi3519 System Controller Block
This bindings use the following binding:
-Documentation/devicetree/bindings/mfd/syscon.txt
+Documentation/devicetree/bindings/mfd/syscon.yaml
Required properties:
- compatible: "hisilicon,hi3519-sysctrl".
diff --git a/Documentation/devicetree/bindings/arm/msm/qcom,idle-state.txt b/Documentation/devicetree/bindings/arm/msm/qcom,idle-state.txt
index 06df04c..6ce0b21 100644
--- a/Documentation/devicetree/bindings/arm/msm/qcom,idle-state.txt
+++ b/Documentation/devicetree/bindings/arm/msm/qcom,idle-state.txt
@@ -81,4 +81,4 @@
};
};
-[1]. Documentation/devicetree/bindings/arm/idle-states.txt
+[1]. Documentation/devicetree/bindings/arm/idle-states.yaml
diff --git a/Documentation/devicetree/bindings/arm/omap/mpu.txt b/Documentation/devicetree/bindings/arm/omap/mpu.txt
index f301e63..e41490e 100644
--- a/Documentation/devicetree/bindings/arm/omap/mpu.txt
+++ b/Documentation/devicetree/bindings/arm/omap/mpu.txt
@@ -17,7 +17,7 @@
- pm-sram: Phandles to ocmcram nodes to be used for power management.
First should be type 'protect-exec' for the driver to use to copy
and run PM functions, second should be regular pool to be used for
- data region for code. See Documentation/devicetree/bindings/sram/sram.txt
+ data region for code. See Documentation/devicetree/bindings/sram/sram.yaml
for more details.
Examples:
diff --git a/Documentation/devicetree/bindings/arm/psci.yaml b/Documentation/devicetree/bindings/arm/psci.yaml
index 8ef8542..5e66934 100644
--- a/Documentation/devicetree/bindings/arm/psci.yaml
+++ b/Documentation/devicetree/bindings/arm/psci.yaml
@@ -100,13 +100,14 @@
bindings in [1]) must specify this property.
[1] Kernel documentation - ARM idle states bindings
- Documentation/devicetree/bindings/arm/idle-states.txt
+ Documentation/devicetree/bindings/arm/idle-states.yaml
- "#power-domain-cells":
- description:
- The number of cells in a PM domain specifier as per binding in [3].
- Must be 0 as to represent a single PM domain.
-
+patternProperties:
+ "^power-domain-":
+ allOf:
+ - $ref: "../power/power-domain.yaml#"
+ type: object
+ description: |
ARM systems can have multiple cores, sometimes in an hierarchical
arrangement. This often, but not always, maps directly to the processor
power topology of the system. Individual nodes in a topology have their
@@ -122,14 +123,8 @@
helps to implement support for OSI mode and OS implementations may choose
to mandate it.
- [3] Documentation/devicetree/bindings/power/power_domain.txt
- [4] Documentation/devicetree/bindings/power/domain-idle-state.txt
-
- power-domains:
- $ref: '/schemas/types.yaml#/definitions/phandle-array'
- description:
- List of phandles and PM domain specifiers, as defined by bindings of the
- PM domain provider.
+ [3] Documentation/devicetree/bindings/power/power-domain.yaml
+ [4] Documentation/devicetree/bindings/power/domain-idle-state.yaml
required:
- compatible
@@ -199,7 +194,7 @@
CPU0: cpu@0 {
device_type = "cpu";
- compatible = "arm,cortex-a53", "arm,armv8";
+ compatible = "arm,cortex-a53";
reg = <0x0>;
enable-method = "psci";
power-domains = <&CPU_PD0>;
@@ -208,7 +203,7 @@
CPU1: cpu@1 {
device_type = "cpu";
- compatible = "arm,cortex-a57", "arm,armv8";
+ compatible = "arm,cortex-a53";
reg = <0x100>;
enable-method = "psci";
power-domains = <&CPU_PD1>;
@@ -224,6 +219,9 @@
exit-latency-us = <10>;
min-residency-us = <100>;
};
+ };
+
+ domain-idle-states {
CLUSTER_RET: cluster-retention {
compatible = "domain-idle-state";
@@ -247,19 +245,19 @@
compatible = "arm,psci-1.0";
method = "smc";
- CPU_PD0: cpu-pd0 {
+ CPU_PD0: power-domain-cpu0 {
#power-domain-cells = <0>;
domain-idle-states = <&CPU_PWRDN>;
power-domains = <&CLUSTER_PD>;
};
- CPU_PD1: cpu-pd1 {
+ CPU_PD1: power-domain-cpu1 {
#power-domain-cells = <0>;
domain-idle-states = <&CPU_PWRDN>;
power-domains = <&CLUSTER_PD>;
};
- CLUSTER_PD: cluster-pd {
+ CLUSTER_PD: power-domain-cluster {
#power-domain-cells = <0>;
domain-idle-states = <&CLUSTER_RET>, <&CLUSTER_PWRDN>;
};
diff --git a/Documentation/devicetree/bindings/arm/stm32/st,mlahb.yaml b/Documentation/devicetree/bindings/arm/stm32/st,mlahb.yaml
index 68917bb..55f7938 100644
--- a/Documentation/devicetree/bindings/arm/stm32/st,mlahb.yaml
+++ b/Documentation/devicetree/bindings/arm/stm32/st,mlahb.yaml
@@ -52,7 +52,7 @@
examples:
- |
- mlahb: ahb {
+ mlahb: ahb@38000000 {
compatible = "st,mlahb", "simple-bus";
#address-cells = <1>;
#size-cells = <1>;
diff --git a/Documentation/devicetree/bindings/bus/allwinner,sun8i-a23-rsb.yaml b/Documentation/devicetree/bindings/bus/allwinner,sun8i-a23-rsb.yaml
index 9fe11ce..8097361 100644
--- a/Documentation/devicetree/bindings/bus/allwinner,sun8i-a23-rsb.yaml
+++ b/Documentation/devicetree/bindings/bus/allwinner,sun8i-a23-rsb.yaml
@@ -70,7 +70,6 @@
#size-cells = <0>;
pmic@3e3 {
- compatible = "...";
reg = <0x3e3>;
/* ... */
diff --git a/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-osc-clk.yaml b/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-osc-clk.yaml
index 69cfa4a..c604822 100644
--- a/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-osc-clk.yaml
+++ b/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-osc-clk.yaml
@@ -40,7 +40,7 @@
examples:
- |
- osc24M: clk@01c20050 {
+ osc24M: clk@1c20050 {
#clock-cells = <0>;
compatible = "allwinner,sun4i-a10-osc-clk";
reg = <0x01c20050 0x4>;
diff --git a/Documentation/devicetree/bindings/clock/allwinner,sun9i-a80-gt-clk.yaml b/Documentation/devicetree/bindings/clock/allwinner,sun9i-a80-gt-clk.yaml
index 07f38de..43963c3 100644
--- a/Documentation/devicetree/bindings/clock/allwinner,sun9i-a80-gt-clk.yaml
+++ b/Documentation/devicetree/bindings/clock/allwinner,sun9i-a80-gt-clk.yaml
@@ -41,7 +41,7 @@
examples:
- |
- clk@0600005c {
+ clk@600005c {
#clock-cells = <0>;
compatible = "allwinner,sun9i-a80-gt-clk";
reg = <0x0600005c 0x4>;
diff --git a/Documentation/devicetree/bindings/clock/qcom,gcc-apq8064.yaml b/Documentation/devicetree/bindings/clock/qcom,gcc-apq8064.yaml
index 17f8717..3647007 100644
--- a/Documentation/devicetree/bindings/clock/qcom,gcc-apq8064.yaml
+++ b/Documentation/devicetree/bindings/clock/qcom,gcc-apq8064.yaml
@@ -42,7 +42,7 @@
be part of GCC and hence the TSENS properties can also be part
of the GCC/clock-controller node.
For more details on the TSENS properties please refer
- Documentation/devicetree/bindings/thermal/qcom-tsens.txt
+ Documentation/devicetree/bindings/thermal/qcom-tsens.yaml
nvmem-cell-names:
minItems: 1
diff --git a/Documentation/devicetree/bindings/crypto/allwinner,sun4i-a10-crypto.yaml b/Documentation/devicetree/bindings/crypto/allwinner,sun4i-a10-crypto.yaml
index 33c7842..8b9a8f3 100644
--- a/Documentation/devicetree/bindings/crypto/allwinner,sun4i-a10-crypto.yaml
+++ b/Documentation/devicetree/bindings/crypto/allwinner,sun4i-a10-crypto.yaml
@@ -23,6 +23,8 @@
- items:
- const: allwinner,sun7i-a20-crypto
- const: allwinner,sun4i-a10-crypto
+ - items:
+ - const: allwinner,sun8i-a33-crypto
reg:
maxItems: 1
diff --git a/Documentation/devicetree/bindings/display/allwinner,sun4i-a10-tcon.yaml b/Documentation/devicetree/bindings/display/allwinner,sun4i-a10-tcon.yaml
index 86ad617..5ff9cf2 100644
--- a/Documentation/devicetree/bindings/display/allwinner,sun4i-a10-tcon.yaml
+++ b/Documentation/devicetree/bindings/display/allwinner,sun4i-a10-tcon.yaml
@@ -43,9 +43,13 @@
- enum:
- allwinner,sun8i-h3-tcon-tv
- allwinner,sun50i-a64-tcon-tv
- - allwinner,sun50i-h6-tcon-tv
- const: allwinner,sun8i-a83t-tcon-tv
+ - items:
+ - enum:
+ - allwinner,sun50i-h6-tcon-tv
+ - const: allwinner,sun8i-r40-tcon-tv
+
reg:
maxItems: 1
diff --git a/Documentation/devicetree/bindings/display/allwinner,sun4i-a10-tv-encoder.yaml b/Documentation/devicetree/bindings/display/allwinner,sun4i-a10-tv-encoder.yaml
index 5d5d3966..6009324b 100644
--- a/Documentation/devicetree/bindings/display/allwinner,sun4i-a10-tv-encoder.yaml
+++ b/Documentation/devicetree/bindings/display/allwinner,sun4i-a10-tv-encoder.yaml
@@ -49,11 +49,7 @@
resets = <&tcon_ch0_clk 0>;
port {
- #address-cells = <1>;
- #size-cells = <0>;
-
- tve0_in_tcon0: endpoint@0 {
- reg = <0>;
+ tve0_in_tcon0: endpoint {
remote-endpoint = <&tcon0_out_tve0>;
};
};
diff --git a/Documentation/devicetree/bindings/display/bridge/anx6345.yaml b/Documentation/devicetree/bindings/display/bridge/anx6345.yaml
index 6d72b3d..c211038 100644
--- a/Documentation/devicetree/bindings/display/bridge/anx6345.yaml
+++ b/Documentation/devicetree/bindings/display/bridge/anx6345.yaml
@@ -79,21 +79,15 @@
#size-cells = <0>;
anx6345_in: port@0 {
- #address-cells = <1>;
- #size-cells = <0>;
reg = <0>;
- anx6345_in_tcon0: endpoint@0 {
- reg = <0>;
+ anx6345_in_tcon0: endpoint {
remote-endpoint = <&tcon0_out_anx6345>;
};
};
anx6345_out: port@1 {
- #address-cells = <1>;
- #size-cells = <0>;
reg = <1>;
- anx6345_out_panel: endpoint@0 {
- reg = <0>;
+ anx6345_out_panel: endpoint {
remote-endpoint = <&panel_in_edp>;
};
};
diff --git a/Documentation/devicetree/bindings/display/panel/leadtek,ltk500hd1829.yaml b/Documentation/devicetree/bindings/display/panel/leadtek,ltk500hd1829.yaml
index 4ebcea7..a614644 100644
--- a/Documentation/devicetree/bindings/display/panel/leadtek,ltk500hd1829.yaml
+++ b/Documentation/devicetree/bindings/display/panel/leadtek,ltk500hd1829.yaml
@@ -37,6 +37,8 @@
dsi@ff450000 {
#address-cells = <1>;
#size-cells = <0>;
+ reg = <0xff450000 0x1000>;
+
panel@0 {
compatible = "leadtek,ltk500hd1829";
reg = <0>;
diff --git a/Documentation/devicetree/bindings/display/panel/xinpeng,xpp055c272.yaml b/Documentation/devicetree/bindings/display/panel/xinpeng,xpp055c272.yaml
index 186e5e1..22c91be 100644
--- a/Documentation/devicetree/bindings/display/panel/xinpeng,xpp055c272.yaml
+++ b/Documentation/devicetree/bindings/display/panel/xinpeng,xpp055c272.yaml
@@ -37,6 +37,8 @@
dsi@ff450000 {
#address-cells = <1>;
#size-cells = <0>;
+ reg = <0xff450000 0x1000>;
+
panel@0 {
compatible = "xinpeng,xpp055c272";
reg = <0>;
diff --git a/Documentation/devicetree/bindings/display/simple-framebuffer.yaml b/Documentation/devicetree/bindings/display/simple-framebuffer.yaml
index 678776b..1db608c 100644
--- a/Documentation/devicetree/bindings/display/simple-framebuffer.yaml
+++ b/Documentation/devicetree/bindings/display/simple-framebuffer.yaml
@@ -174,10 +174,6 @@
};
};
- soc@1c00000 {
- lcdc0: lcdc@1c0c000 {
- compatible = "allwinner,sun4i-a10-lcdc";
- };
- };
+ lcdc0: lcdc { };
...
diff --git a/Documentation/devicetree/bindings/display/tilcdc/tilcdc.txt b/Documentation/devicetree/bindings/display/tilcdc/tilcdc.txt
index 7bf1bb4..aac617a 100644
--- a/Documentation/devicetree/bindings/display/tilcdc/tilcdc.txt
+++ b/Documentation/devicetree/bindings/display/tilcdc/tilcdc.txt
@@ -37,7 +37,7 @@
supports a single port with a single endpoint.
- See also Documentation/devicetree/bindings/display/tilcdc/panel.txt and
- Documentation/devicetree/bindings/display/tilcdc/tfp410.txt for connecting
+ Documentation/devicetree/bindings/display/bridge/ti,tfp410.txt for connecting
tfp410 DVI encoder or lcd panel to lcdc
[1] There is an errata about AM335x color wiring. For 16-bit color mode
diff --git a/Documentation/devicetree/bindings/dma/ti/k3-udma.yaml b/Documentation/devicetree/bindings/dma/ti/k3-udma.yaml
index 8b5c346..34780d7 100644
--- a/Documentation/devicetree/bindings/dma/ti/k3-udma.yaml
+++ b/Documentation/devicetree/bindings/dma/ti/k3-udma.yaml
@@ -143,7 +143,7 @@
#size-cells = <2>;
dma-coherent;
dma-ranges;
- ranges;
+ ranges = <0x0 0x30800000 0x0 0x30800000 0x0 0x05000000>;
ti,sci-dev-id = <118>;
@@ -169,16 +169,4 @@
ti,sci-rm-range-rflow = <0x6>; /* GP RFLOW */
};
};
-
- mcasp0: mcasp@02B00000 {
- dmas = <&main_udmap 0xc400>, <&main_udmap 0x4400>;
- dma-names = "tx", "rx";
- };
-
- crypto: crypto@4E00000 {
- compatible = "ti,sa2ul-crypto";
-
- dmas = <&main_udmap 0xc000>, <&main_udmap 0x4000>, <&main_udmap 0x4001>;
- dma-names = "tx", "rx1", "rx2";
- };
};
diff --git a/Documentation/devicetree/bindings/edac/dmc-520.yaml b/Documentation/devicetree/bindings/edac/dmc-520.yaml
new file mode 100644
index 0000000..9272d2b
--- /dev/null
+++ b/Documentation/devicetree/bindings/edac/dmc-520.yaml
@@ -0,0 +1,59 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/edac/dmc-520.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: ARM DMC-520 EDAC bindings
+
+maintainers:
+ - Lei Wang <lewan@microsoft.com>
+
+description: |+
+ DMC-520 node is defined to describe DRAM error detection and correction.
+
+ https://static.docs.arm.com/100000/0200/corelink_dmc520_trm_100000_0200_01_en.pdf
+
+properties:
+ compatible:
+ items:
+ - const: brcm,dmc-520
+ - const: arm,dmc-520
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ minItems: 1
+ maxItems: 10
+
+ interrupt-names:
+ minItems: 1
+ maxItems: 10
+ items:
+ enum:
+ - ram_ecc_errc
+ - ram_ecc_errd
+ - dram_ecc_errc
+ - dram_ecc_errd
+ - failed_access
+ - failed_prog
+ - link_err
+ - temperature_event
+ - arch_fsm
+ - phy_request
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - interrupt-names
+
+examples:
+ - |
+ dmc0: dmc@200000 {
+ compatible = "brcm,dmc-520", "arm,dmc-520";
+ reg = <0x200000 0x80000>;
+ interrupts = <0x0 0x349 0x4>, <0x0 0x34B 0x4>;
+ interrupt-names = "dram_ecc_errc", "dram_ecc_errd";
+ };
diff --git a/Documentation/devicetree/bindings/gpu/arm,mali-bifrost.yaml b/Documentation/devicetree/bindings/gpu/arm,mali-bifrost.yaml
index 4ea6a87..e8b99ad 100644
--- a/Documentation/devicetree/bindings/gpu/arm,mali-bifrost.yaml
+++ b/Documentation/devicetree/bindings/gpu/arm,mali-bifrost.yaml
@@ -84,31 +84,31 @@
gpu_opp_table: opp_table0 {
compatible = "operating-points-v2";
- opp@533000000 {
+ opp-533000000 {
opp-hz = /bits/ 64 <533000000>;
opp-microvolt = <1250000>;
};
- opp@450000000 {
+ opp-450000000 {
opp-hz = /bits/ 64 <450000000>;
opp-microvolt = <1150000>;
};
- opp@400000000 {
+ opp-400000000 {
opp-hz = /bits/ 64 <400000000>;
opp-microvolt = <1125000>;
};
- opp@350000000 {
+ opp-350000000 {
opp-hz = /bits/ 64 <350000000>;
opp-microvolt = <1075000>;
};
- opp@266000000 {
+ opp-266000000 {
opp-hz = /bits/ 64 <266000000>;
opp-microvolt = <1025000>;
};
- opp@160000000 {
+ opp-160000000 {
opp-hz = /bits/ 64 <160000000>;
opp-microvolt = <925000>;
};
- opp@100000000 {
+ opp-100000000 {
opp-hz = /bits/ 64 <100000000>;
opp-microvolt = <912500>;
};
diff --git a/Documentation/devicetree/bindings/gpu/arm,mali-midgard.yaml b/Documentation/devicetree/bindings/gpu/arm,mali-midgard.yaml
index 36f59b3..8d966f3 100644
--- a/Documentation/devicetree/bindings/gpu/arm,mali-midgard.yaml
+++ b/Documentation/devicetree/bindings/gpu/arm,mali-midgard.yaml
@@ -138,31 +138,31 @@
gpu_opp_table: opp_table0 {
compatible = "operating-points-v2";
- opp@533000000 {
+ opp-533000000 {
opp-hz = /bits/ 64 <533000000>;
opp-microvolt = <1250000>;
};
- opp@450000000 {
+ opp-450000000 {
opp-hz = /bits/ 64 <450000000>;
opp-microvolt = <1150000>;
};
- opp@400000000 {
+ opp-400000000 {
opp-hz = /bits/ 64 <400000000>;
opp-microvolt = <1125000>;
};
- opp@350000000 {
+ opp-350000000 {
opp-hz = /bits/ 64 <350000000>;
opp-microvolt = <1075000>;
};
- opp@266000000 {
+ opp-266000000 {
opp-hz = /bits/ 64 <266000000>;
opp-microvolt = <1025000>;
};
- opp@160000000 {
+ opp-160000000 {
opp-hz = /bits/ 64 <160000000>;
opp-microvolt = <925000>;
};
- opp@100000000 {
+ opp-100000000 {
opp-hz = /bits/ 64 <100000000>;
opp-microvolt = <912500>;
};
diff --git a/Documentation/devicetree/bindings/hwmon/adi,axi-fan-control.yaml b/Documentation/devicetree/bindings/hwmon/adi,axi-fan-control.yaml
new file mode 100644
index 0000000..57a240d
--- /dev/null
+++ b/Documentation/devicetree/bindings/hwmon/adi,axi-fan-control.yaml
@@ -0,0 +1,62 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+# Copyright 2019 Analog Devices Inc.
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/bindings/hwmon/adi,axi-fan-control.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Analog Devices AXI FAN Control Device Tree Bindings
+
+maintainers:
+ - Nuno Sá <nuno.sa@analog.com>
+
+description: |+
+ Bindings for the Analog Devices AXI FAN Control driver. Spefications of the
+ core can be found in:
+
+ https://wiki.analog.com/resources/fpga/docs/axi_fan_control
+
+properties:
+ compatible:
+ enum:
+ - adi,axi-fan-control-1.00.a
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ pulses-per-revolution:
+ description:
+ Value specifying the number of pulses per revolution of the controlled
+ FAN.
+ allOf:
+ - $ref: /schemas/types.yaml#/definitions/uint32
+ enum: [1, 2, 4]
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - interrupts
+ - pulses-per-revolution
+
+examples:
+ - |
+ fpga_axi: fpga-axi@0 {
+ #address-cells = <0x2>;
+ #size-cells = <0x1>;
+
+ axi_fan_control: axi-fan-control@80000000 {
+ compatible = "adi,axi-fan-control-1.00.a";
+ reg = <0x0 0x80000000 0x10000>;
+ clocks = <&clk 71>;
+ interrupts = <0 110 0>;
+ pulses-per-revolution = <2>;
+ };
+ };
+...
diff --git a/Documentation/devicetree/bindings/hwmon/adt7475.yaml b/Documentation/devicetree/bindings/hwmon/adt7475.yaml
new file mode 100644
index 0000000..7698503
--- /dev/null
+++ b/Documentation/devicetree/bindings/hwmon/adt7475.yaml
@@ -0,0 +1,84 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/adt7475.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: ADT7475 hwmon sensor
+
+maintainers:
+ - Jean Delvare <jdelvare@suse.com>
+
+description: |
+ The ADT7473, ADT7475, ADT7476, and ADT7490 are thermal monitors and multiple
+ PWN fan controllers.
+
+ They support monitoring and controlling up to four fans (the ADT7490 can only
+ control up to three). They support reading a single on chip temperature
+ sensor and two off chip temperature sensors (the ADT7490 additionally
+ supports measuring up to three current external temperature sensors with
+ series resistance cancellation (SRC)).
+
+ Datasheets:
+ https://www.onsemi.com/pub/Collateral/ADT7473-D.PDF
+ https://www.onsemi.com/pub/Collateral/ADT7475-D.PDF
+ https://www.onsemi.com/pub/Collateral/ADT7476-D.PDF
+ https://www.onsemi.com/pub/Collateral/ADT7490-D.PDF
+
+ Description taken from onsemiconductors specification sheets, with minor
+ rephrasing.
+
+properties:
+ compatible:
+ enum:
+ - adi,adt7473
+ - adi,adt7475
+ - adi,adt7476
+ - adi,adt7490
+
+ reg:
+ maxItems: 1
+
+patternProperties:
+ "^adi,bypass-attenuator-in[0-4]$":
+ description: |
+ Configures bypassing the individual voltage input attenuator. If
+ set to 1 the attenuator is bypassed if set to 0 the attenuator is
+ not bypassed. If the property is absent then the attenuator
+ retains it's configuration from the bios/bootloader.
+ allOf:
+ - $ref: /schemas/types.yaml#/definitions/uint32
+ - enum: [0, 1]
+
+ "^adi,pwm-active-state$":
+ description: |
+ Integer array, represents the active state of the pwm outputs If set to 0
+ the pwm uses a logic low output for 100% duty cycle. If set to 1 the pwm
+ uses a logic high output for 100% duty cycle.
+ allOf:
+ - $ref: /schemas/types.yaml#/definitions/uint32-array
+ - minItems: 3
+ maxItems: 3
+ items:
+ enum: [0, 1]
+ default: 1
+
+required:
+ - compatible
+ - reg
+
+examples:
+ - |
+ i2c {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ hwmon@2e {
+ compatible = "adi,adt7476";
+ reg = <0x2e>;
+ adi,bypass-attenuator-in0 = <1>;
+ adi,bypass-attenuator-in1 = <0>;
+ adi,pwm-active-state = <1 0 1>;
+ };
+ };
+
diff --git a/Documentation/devicetree/bindings/hwmon/ltc2978.txt b/Documentation/devicetree/bindings/hwmon/ltc2978.txt
index b428a70..4e7f621 100644
--- a/Documentation/devicetree/bindings/hwmon/ltc2978.txt
+++ b/Documentation/devicetree/bindings/hwmon/ltc2978.txt
@@ -2,20 +2,30 @@
Required properties:
- compatible: should contain one of:
+ * "lltc,ltc2972"
* "lltc,ltc2974"
* "lltc,ltc2975"
* "lltc,ltc2977"
* "lltc,ltc2978"
+ * "lltc,ltc2979"
* "lltc,ltc2980"
* "lltc,ltc3880"
* "lltc,ltc3882"
* "lltc,ltc3883"
+ * "lltc,ltc3884"
* "lltc,ltc3886"
* "lltc,ltc3887"
+ * "lltc,ltc3889"
+ * "lltc,ltc7880"
* "lltc,ltm2987"
+ * "lltc,ltm4664"
* "lltc,ltm4675"
* "lltc,ltm4676"
+ * "lltc,ltm4677"
+ * "lltc,ltm4678"
+ * "lltc,ltm4680"
* "lltc,ltm4686"
+ * "lltc,ltm4700"
- reg: I2C slave address
Optional properties:
@@ -25,13 +35,17 @@
standard binding for regulators; see regulator.txt.
Valid names of regulators depend on number of supplies supported per device:
+ * ltc2972 vout0 - vout1
* ltc2974, ltc2975 : vout0 - vout3
- * ltc2977, ltc2980, ltm2987 : vout0 - vout7
+ * ltc2977, ltc2979, ltc2980, ltm2987 : vout0 - vout7
* ltc2978 : vout0 - vout7
- * ltc3880, ltc3882, ltc3886 : vout0 - vout1
+ * ltc3880, ltc3882, ltc3884, ltc3886, ltc3887, ltc3889 : vout0 - vout1
+ * ltc7880 : vout0 - vout1
* ltc3883 : vout0
- * ltm4676 : vout0 - vout1
- * ltm4686 : vout0 - vout1
+ * ltm4664 : vout0 - vout1
+ * ltm4675, ltm4676, ltm4677, ltm4678 : vout0 - vout1
+ * ltm4680, ltm4686 : vout0 - vout1
+ * ltm4700 : vout0 - vout1
Example:
ltc2978@5e {
diff --git a/Documentation/devicetree/bindings/iio/adc/samsung,exynos-adc.yaml b/Documentation/devicetree/bindings/iio/adc/samsung,exynos-adc.yaml
index f46de17..cc3c8ea 100644
--- a/Documentation/devicetree/bindings/iio/adc/samsung,exynos-adc.yaml
+++ b/Documentation/devicetree/bindings/iio/adc/samsung,exynos-adc.yaml
@@ -123,7 +123,7 @@
samsung,syscon-phandle = <&pmu_system_controller>;
/* NTC thermistor is a hwmon device */
- ncp15wb473@0 {
+ ncp15wb473 {
compatible = "murata,ncp15wb473";
pullup-uv = <1800000>;
pullup-ohm = <47000>;
diff --git a/Documentation/devicetree/bindings/input/cypress,tm2-touchkey.txt b/Documentation/devicetree/bindings/input/cypress,tm2-touchkey.txt
index ef2ae72..921172f 100644
--- a/Documentation/devicetree/bindings/input/cypress,tm2-touchkey.txt
+++ b/Documentation/devicetree/bindings/input/cypress,tm2-touchkey.txt
@@ -5,6 +5,7 @@
* "cypress,tm2-touchkey" - for the touchkey found on the tm2 board
* "cypress,midas-touchkey" - for the touchkey found on midas boards
* "cypress,aries-touchkey" - for the touchkey found on aries boards
+ * "coreriver,tc360-touchkey" - for the Coreriver TouchCore 360 touchkey
- reg: I2C address of the chip.
- interrupts: interrupt to which the chip is connected (see interrupt
binding[0]).
diff --git a/Documentation/devicetree/bindings/input/ilitek,ili2xxx.txt b/Documentation/devicetree/bindings/input/ilitek,ili2xxx.txt
index dc194b2..cdcaa3f 100644
--- a/Documentation/devicetree/bindings/input/ilitek,ili2xxx.txt
+++ b/Documentation/devicetree/bindings/input/ilitek,ili2xxx.txt
@@ -1,9 +1,10 @@
-Ilitek ILI210x/ILI2117/ILI251x touchscreen controller
+Ilitek ILI210x/ILI2117/ILI2120/ILI251x touchscreen controller
Required properties:
- compatible:
ilitek,ili210x for ILI210x
ilitek,ili2117 for ILI2117
+ ilitek,ili2120 for ILI2120
ilitek,ili251x for ILI251x
- reg: The I2C address of the device
diff --git a/Documentation/devicetree/bindings/input/touchscreen/goodix.yaml b/Documentation/devicetree/bindings/input/touchscreen/goodix.yaml
index d7c3262..c99ed39 100644
--- a/Documentation/devicetree/bindings/input/touchscreen/goodix.yaml
+++ b/Documentation/devicetree/bindings/input/touchscreen/goodix.yaml
@@ -62,7 +62,7 @@
examples:
- |
- i2c@00000000 {
+ i2c {
#address-cells = <1>;
#size-cells = <0>;
gt928@5d {
diff --git a/Documentation/devicetree/bindings/input/twl4030-pwrbutton.txt b/Documentation/devicetree/bindings/input/twl4030-pwrbutton.txt
index c864a46..f502121 100644
--- a/Documentation/devicetree/bindings/input/twl4030-pwrbutton.txt
+++ b/Documentation/devicetree/bindings/input/twl4030-pwrbutton.txt
@@ -1,7 +1,7 @@
Texas Instruments TWL family (twl4030) pwrbutton module
This module is part of the TWL4030. For more details about the whole
-chip see Documentation/devicetree/bindings/mfd/twl-familly.txt.
+chip see Documentation/devicetree/bindings/mfd/twl-family.txt.
This module provides a simple power button event via an Interrupt.
diff --git a/Documentation/devicetree/bindings/leds/common.yaml b/Documentation/devicetree/bindings/leds/common.yaml
index d97d099..c60b994 100644
--- a/Documentation/devicetree/bindings/leds/common.yaml
+++ b/Documentation/devicetree/bindings/leds/common.yaml
@@ -85,7 +85,7 @@
# LED will act as a back-light, controlled by the framebuffer system
- backlight
# LED will turn on (but for leds-gpio see "default-state" property in
- # Documentation/devicetree/bindings/leds/leds-gpio.txt)
+ # Documentation/devicetree/bindings/leds/leds-gpio.yaml)
- default-on
# LED "double" flashes at a load average based rate
- heartbeat
diff --git a/Documentation/devicetree/bindings/leds/register-bit-led.txt b/Documentation/devicetree/bindings/leds/register-bit-led.txt
index cf1ea40..c7af6f7 100644
--- a/Documentation/devicetree/bindings/leds/register-bit-led.txt
+++ b/Documentation/devicetree/bindings/leds/register-bit-led.txt
@@ -5,7 +5,7 @@
single LED. The register bit LEDs appear as children to the
syscon device, with the proper compatible string. For the
syscon bindings see:
-Documentation/devicetree/bindings/mfd/syscon.txt
+Documentation/devicetree/bindings/mfd/syscon.yaml
Each LED is represented as a sub-node of the syscon device. Each
node's name represents the name of the corresponding LED.
diff --git a/Documentation/devicetree/bindings/media/allwinner,sun4i-a10-csi.yaml b/Documentation/devicetree/bindings/media/allwinner,sun4i-a10-csi.yaml
index 9af873b..8453ee3 100644
--- a/Documentation/devicetree/bindings/media/allwinner,sun4i-a10-csi.yaml
+++ b/Documentation/devicetree/bindings/media/allwinner,sun4i-a10-csi.yaml
@@ -33,24 +33,40 @@
maxItems: 1
clocks:
- minItems: 2
- maxItems: 3
- items:
- - description: The CSI interface clock
- - description: The CSI ISP clock
- - description: The CSI DRAM clock
+ oneOf:
+ - items:
+ - description: The CSI interface clock
+ - description: The CSI DRAM clock
+
+ - items:
+ - description: The CSI interface clock
+ - description: The CSI ISP clock
+ - description: The CSI DRAM clock
clock-names:
- minItems: 2
- maxItems: 3
- items:
- - const: bus
- - const: isp
- - const: ram
+ oneOf:
+ - items:
+ - const: bus
+ - const: ram
+
+ - items:
+ - const: bus
+ - const: isp
+ - const: ram
resets:
maxItems: 1
+ # FIXME: This should be made required eventually once every SoC will
+ # have the MBUS declared.
+ interconnects:
+ maxItems: 1
+
+ # FIXME: This should be made required eventually once every SoC will
+ # have the MBUS declared.
+ interconnect-names:
+ const: dma-mem
+
# See ./video-interfaces.txt for details
port:
type: object
diff --git a/Documentation/devicetree/bindings/media/ti,cal.yaml b/Documentation/devicetree/bindings/media/ti,cal.yaml
index 1ea7841..5e06662 100644
--- a/Documentation/devicetree/bindings/media/ti,cal.yaml
+++ b/Documentation/devicetree/bindings/media/ti,cal.yaml
@@ -177,7 +177,7 @@
};
};
- i2c5: i2c@4807c000 {
+ i2c {
clock-frequency = <400000>;
#address-cells = <1>;
#size-cells = <0>;
diff --git a/Documentation/devicetree/bindings/memory-controllers/nvidia,tegra124-emc.yaml b/Documentation/devicetree/bindings/memory-controllers/nvidia,tegra124-emc.yaml
index dd184348..3e0a8a9 100644
--- a/Documentation/devicetree/bindings/memory-controllers/nvidia,tegra124-emc.yaml
+++ b/Documentation/devicetree/bindings/memory-controllers/nvidia,tegra124-emc.yaml
@@ -347,6 +347,7 @@
interrupts = <GIC_SPI 77 IRQ_TYPE_LEVEL_HIGH>;
#iommu-cells = <1>;
+ #reset-cells = <1>;
};
external-memory-controller@7001b000 {
@@ -363,20 +364,23 @@
timing-0 {
clock-frequency = <12750000>;
- nvidia,emc-zcal-cnt-long = <0x00000042>;
- nvidia,emc-auto-cal-interval = <0x001fffff>;
- nvidia,emc-ctt-term-ctrl = <0x00000802>;
- nvidia,emc-cfg = <0x73240000>;
- nvidia,emc-cfg-2 = <0x000008c5>;
- nvidia,emc-sel-dpd-ctrl = <0x00040128>;
- nvidia,emc-bgbias-ctl0 = <0x00000008>;
nvidia,emc-auto-cal-config = <0xa1430000>;
nvidia,emc-auto-cal-config2 = <0x00000000>;
nvidia,emc-auto-cal-config3 = <0x00000000>;
- nvidia,emc-mode-reset = <0x80001221>;
+ nvidia,emc-auto-cal-interval = <0x001fffff>;
+ nvidia,emc-bgbias-ctl0 = <0x00000008>;
+ nvidia,emc-cfg = <0x73240000>;
+ nvidia,emc-cfg-2 = <0x000008c5>;
+ nvidia,emc-ctt-term-ctrl = <0x00000802>;
nvidia,emc-mode-1 = <0x80100003>;
nvidia,emc-mode-2 = <0x80200008>;
nvidia,emc-mode-4 = <0x00000000>;
+ nvidia,emc-mode-reset = <0x80001221>;
+ nvidia,emc-mrs-wait-cnt = <0x000e000e>;
+ nvidia,emc-sel-dpd-ctrl = <0x00040128>;
+ nvidia,emc-xm2dqspadctrl2 = <0x0130b118>;
+ nvidia,emc-zcal-cnt-long = <0x00000042>;
+ nvidia,emc-zcal-interval = <0x00000000>;
nvidia,emc-configuration = <
0x00000000 /* EMC_RC */
diff --git a/Documentation/devicetree/bindings/memory-controllers/ti/emif.txt b/Documentation/devicetree/bindings/memory-controllers/ti/emif.txt
index 44d7146..63f674ff 100644
--- a/Documentation/devicetree/bindings/memory-controllers/ti/emif.txt
+++ b/Documentation/devicetree/bindings/memory-controllers/ti/emif.txt
@@ -32,7 +32,7 @@
- sram : Phandles for generic sram driver nodes,
first should be type 'protect-exec' for the driver to use to copy
and run PM functions, second should be regular pool to be used for
- data region for code. See Documentation/devicetree/bindings/sram/sram.txt
+ data region for code. See Documentation/devicetree/bindings/sram/sram.yaml
for more details.
Optional properties:
diff --git a/Documentation/devicetree/bindings/mfd/max77650.yaml b/Documentation/devicetree/bindings/mfd/max77650.yaml
index 4a70f87..4803857 100644
--- a/Documentation/devicetree/bindings/mfd/max77650.yaml
+++ b/Documentation/devicetree/bindings/mfd/max77650.yaml
@@ -97,14 +97,14 @@
regulators {
compatible = "maxim,max77650-regulator";
- max77650_ldo: regulator@0 {
+ max77650_ldo: regulator-ldo {
regulator-compatible = "ldo";
regulator-name = "max77650-ldo";
regulator-min-microvolt = <1350000>;
regulator-max-microvolt = <2937500>;
};
- max77650_sbb0: regulator@1 {
+ max77650_sbb0: regulator-sbb0 {
regulator-compatible = "sbb0";
regulator-name = "max77650-sbb0";
regulator-min-microvolt = <800000>;
diff --git a/Documentation/devicetree/bindings/mfd/tps65910.txt b/Documentation/devicetree/bindings/mfd/tps65910.txt
index 4f62143..a5ced46 100644
--- a/Documentation/devicetree/bindings/mfd/tps65910.txt
+++ b/Documentation/devicetree/bindings/mfd/tps65910.txt
@@ -26,8 +26,8 @@
ldo6, ldo7, ldo8
- xxx-supply: Input voltage supply regulator.
- These entries are require if regulators are enabled for a device. Missing of these
- properties can cause the regulator registration fails.
+ These entries are required if regulators are enabled for a device. Missing these
+ properties can cause the regulator registration to fail.
If some of input supply is powered through battery or always-on supply then
also it is require to have these parameters with proper node handle of always
on power supply.
diff --git a/Documentation/devicetree/bindings/mfd/twl-familly.txt b/Documentation/devicetree/bindings/mfd/twl-family.txt
similarity index 100%
rename from Documentation/devicetree/bindings/mfd/twl-familly.txt
rename to Documentation/devicetree/bindings/mfd/twl-family.txt
diff --git a/Documentation/devicetree/bindings/mfd/zii,rave-sp.txt b/Documentation/devicetree/bindings/mfd/zii,rave-sp.txt
index 088eff9..e0f901e 100644
--- a/Documentation/devicetree/bindings/mfd/zii,rave-sp.txt
+++ b/Documentation/devicetree/bindings/mfd/zii,rave-sp.txt
@@ -20,7 +20,7 @@
Device Description
------ -----------
rave-sp-wdt : Watchdog
-rave-sp-nvmem : Interface to onborad EEPROM
+rave-sp-nvmem : Interface to onboard EEPROM
rave-sp-backlight : Display backlight
rave-sp-hwmon : Interface to onboard hardware sensors
rave-sp-leds : Interface to onboard LEDs
diff --git a/Documentation/devicetree/bindings/misc/fsl,qoriq-mc.txt b/Documentation/devicetree/bindings/misc/fsl,qoriq-mc.txt
index bb7e896..9134e9b 100644
--- a/Documentation/devicetree/bindings/misc/fsl,qoriq-mc.txt
+++ b/Documentation/devicetree/bindings/misc/fsl,qoriq-mc.txt
@@ -26,7 +26,7 @@
Documentation/devicetree/bindings/iommu/iommu.txt.
For arm-smmu binding, see:
-Documentation/devicetree/bindings/iommu/arm,smmu.txt.
+Documentation/devicetree/bindings/iommu/arm,smmu.yaml.
Required properties:
diff --git a/Documentation/devicetree/bindings/mmc/mmc-controller.yaml b/Documentation/devicetree/bindings/mmc/mmc-controller.yaml
index 3c0df40..8fded83 100644
--- a/Documentation/devicetree/bindings/mmc/mmc-controller.yaml
+++ b/Documentation/devicetree/bindings/mmc/mmc-controller.yaml
@@ -370,6 +370,7 @@
mmc3: mmc@1c12000 {
#address-cells = <1>;
#size-cells = <0>;
+ reg = <0x1c12000 0x200>;
pinctrl-names = "default";
pinctrl-0 = <&mmc3_pins_a>;
vmmc-supply = <®_vmmc3>;
diff --git a/Documentation/devicetree/bindings/mmc/ti-omap-hsmmc.txt b/Documentation/devicetree/bindings/mmc/ti-omap-hsmmc.txt
index 19f5508..4a9145e 100644
--- a/Documentation/devicetree/bindings/mmc/ti-omap-hsmmc.txt
+++ b/Documentation/devicetree/bindings/mmc/ti-omap-hsmmc.txt
@@ -124,7 +124,7 @@
pinctrl-1 = <&mmc1_idle>;
pinctrl-2 = <&mmc1_sleep>;
...
- interrupts-extended = <&intc 64 &gpio2 28 GPIO_ACTIVE_LOW>;
+ interrupts-extended = <&intc 64 &gpio2 28 IRQ_TYPE_LEVEL_LOW>;
};
mmc1_idle : pinmux_cirq_pin {
diff --git a/Documentation/devicetree/bindings/mtd/cadence-nand-controller.txt b/Documentation/devicetree/bindings/mtd/cadence-nand-controller.txt
index f3893c4..d2eada5 100644
--- a/Documentation/devicetree/bindings/mtd/cadence-nand-controller.txt
+++ b/Documentation/devicetree/bindings/mtd/cadence-nand-controller.txt
@@ -27,7 +27,7 @@
- reg: shall contain the native Chip Select ids from 0 to max supported by
the cadence nand flash controller
-See Documentation/devicetree/bindings/mtd/nand.txt for more details on
+See Documentation/devicetree/bindings/mtd/nand-controller.yaml for more details on
generic bindings.
Example:
diff --git a/Documentation/devicetree/bindings/net/brcm,bcm7445-switch-v4.0.txt b/Documentation/devicetree/bindings/net/brcm,bcm7445-switch-v4.0.txt
index 48a7f91..88b57b0 100644
--- a/Documentation/devicetree/bindings/net/brcm,bcm7445-switch-v4.0.txt
+++ b/Documentation/devicetree/bindings/net/brcm,bcm7445-switch-v4.0.txt
@@ -45,7 +45,7 @@
switch queue
- resets: a single phandle and reset identifier pair. See
- Documentation/devicetree/binding/reset/reset.txt for details.
+ Documentation/devicetree/bindings/reset/reset.txt for details.
- reset-names: If the "reset" property is specified, this property should have
the value "switch" to denote the switch reset line.
diff --git a/Documentation/devicetree/bindings/net/fsl-fman.txt b/Documentation/devicetree/bindings/net/fsl-fman.txt
index 250f8d8..c00fb0d 100644
--- a/Documentation/devicetree/bindings/net/fsl-fman.txt
+++ b/Documentation/devicetree/bindings/net/fsl-fman.txt
@@ -110,6 +110,13 @@
Usage: required
Definition: See soc/fsl/qman.txt and soc/fsl/bman.txt
+- fsl,erratum-a050385
+ Usage: optional
+ Value type: boolean
+ Definition: A boolean property. Indicates the presence of the
+ erratum A050385 which indicates that DMA transactions that are
+ split can result in a FMan lock.
+
=============================================================================
FMan MURAM Node
diff --git a/Documentation/devicetree/bindings/net/mdio.yaml b/Documentation/devicetree/bindings/net/mdio.yaml
index 5d08d2f..50c3397 100644
--- a/Documentation/devicetree/bindings/net/mdio.yaml
+++ b/Documentation/devicetree/bindings/net/mdio.yaml
@@ -56,7 +56,6 @@
examples:
- |
davinci_mdio: mdio@5c030000 {
- compatible = "ti,davinci_mdio";
reg = <0x5c030000 0x1000>;
#address-cells = <1>;
#size-cells = <0>;
diff --git a/Documentation/devicetree/bindings/nvmem/nvmem.yaml b/Documentation/devicetree/bindings/nvmem/nvmem.yaml
index b43c6c6..6598022 100644
--- a/Documentation/devicetree/bindings/nvmem/nvmem.yaml
+++ b/Documentation/devicetree/bindings/nvmem/nvmem.yaml
@@ -76,6 +76,8 @@
qfprom: eeprom@700000 {
#address-cells = <1>;
#size-cells = <1>;
+ reg = <0x00700000 0x100000>;
+
wp-gpios = <&gpio1 3 GPIO_ACTIVE_HIGH>;
/* ... */
diff --git a/Documentation/devicetree/bindings/phy/allwinner,sun4i-a10-usb-phy.yaml b/Documentation/devicetree/bindings/phy/allwinner,sun4i-a10-usb-phy.yaml
index 020ef9e..94ac236 100644
--- a/Documentation/devicetree/bindings/phy/allwinner,sun4i-a10-usb-phy.yaml
+++ b/Documentation/devicetree/bindings/phy/allwinner,sun4i-a10-usb-phy.yaml
@@ -86,7 +86,7 @@
#include <dt-bindings/clock/sun4i-a10-ccu.h>
#include <dt-bindings/reset/sun4i-a10-ccu.h>
- usbphy: phy@01c13400 {
+ usbphy: phy@1c13400 {
#phy-cells = <1>;
compatible = "allwinner,sun4i-a10-usb-phy";
reg = <0x01c13400 0x10>, <0x01c14800 0x4>, <0x01c1c800 0x4>;
diff --git a/Documentation/devicetree/bindings/pinctrl/aspeed,ast2400-pinctrl.yaml b/Documentation/devicetree/bindings/pinctrl/aspeed,ast2400-pinctrl.yaml
index bb690e2..135c7df 100644
--- a/Documentation/devicetree/bindings/pinctrl/aspeed,ast2400-pinctrl.yaml
+++ b/Documentation/devicetree/bindings/pinctrl/aspeed,ast2400-pinctrl.yaml
@@ -17,7 +17,7 @@
"aspeed,ast2400-scu", "syscon", "simple-mfd"
Refer to the the bindings described in
- Documentation/devicetree/bindings/mfd/syscon.txt
+ Documentation/devicetree/bindings/mfd/syscon.yaml
properties:
compatible:
diff --git a/Documentation/devicetree/bindings/pinctrl/aspeed,ast2500-pinctrl.yaml b/Documentation/devicetree/bindings/pinctrl/aspeed,ast2500-pinctrl.yaml
index f7f5d57..824f7fd1 100644
--- a/Documentation/devicetree/bindings/pinctrl/aspeed,ast2500-pinctrl.yaml
+++ b/Documentation/devicetree/bindings/pinctrl/aspeed,ast2500-pinctrl.yaml
@@ -18,7 +18,7 @@
"aspeed,g5-scu", "syscon", "simple-mfd"
Refer to the the bindings described in
- Documentation/devicetree/bindings/mfd/syscon.txt
+ Documentation/devicetree/bindings/mfd/syscon.yaml
properties:
compatible:
diff --git a/Documentation/devicetree/bindings/pinctrl/aspeed,ast2600-pinctrl.yaml b/Documentation/devicetree/bindings/pinctrl/aspeed,ast2600-pinctrl.yaml
index 3749fa2..ac8d1c3 100644
--- a/Documentation/devicetree/bindings/pinctrl/aspeed,ast2600-pinctrl.yaml
+++ b/Documentation/devicetree/bindings/pinctrl/aspeed,ast2600-pinctrl.yaml
@@ -17,7 +17,7 @@
"aspeed,ast2600-scu", "syscon", "simple-mfd"
Refer to the the bindings described in
- Documentation/devicetree/bindings/mfd/syscon.txt
+ Documentation/devicetree/bindings/mfd/syscon.yaml
properties:
compatible:
diff --git a/Documentation/devicetree/bindings/pinctrl/st,stm32-pinctrl.yaml b/Documentation/devicetree/bindings/pinctrl/st,stm32-pinctrl.yaml
index 754ea7a..ef4de32 100644
--- a/Documentation/devicetree/bindings/pinctrl/st,stm32-pinctrl.yaml
+++ b/Documentation/devicetree/bindings/pinctrl/st,stm32-pinctrl.yaml
@@ -248,7 +248,7 @@
};
//Example 3 pin groups
- pinctrl@60020000 {
+ pinctrl {
usart1_pins_a: usart1-0 {
pins1 {
pinmux = <STM32_PINMUX('A', 9, AF7)>;
diff --git a/Documentation/devicetree/bindings/power/amlogic,meson-ee-pwrc.yaml b/Documentation/devicetree/bindings/power/amlogic,meson-ee-pwrc.yaml
index aab70e8..d3098c9 100644
--- a/Documentation/devicetree/bindings/power/amlogic,meson-ee-pwrc.yaml
+++ b/Documentation/devicetree/bindings/power/amlogic,meson-ee-pwrc.yaml
@@ -18,7 +18,7 @@
"amlogic,meson-gx-hhi-sysctrl", "simple-mfd", "syscon"
Refer to the the bindings described in
- Documentation/devicetree/bindings/mfd/syscon.txt
+ Documentation/devicetree/bindings/mfd/syscon.yaml
properties:
compatible:
diff --git a/Documentation/devicetree/bindings/power/domain-idle-state.txt b/Documentation/devicetree/bindings/power/domain-idle-state.txt
deleted file mode 100644
index eefc7ed..0000000
--- a/Documentation/devicetree/bindings/power/domain-idle-state.txt
+++ /dev/null
@@ -1,33 +0,0 @@
-PM Domain Idle State Node:
-
-A domain idle state node represents the state parameters that will be used to
-select the state when there are no active components in the domain.
-
-The state node has the following parameters -
-
-- compatible:
- Usage: Required
- Value type: <string>
- Definition: Must be "domain-idle-state".
-
-- entry-latency-us
- Usage: Required
- Value type: <prop-encoded-array>
- Definition: u32 value representing worst case latency in
- microseconds required to enter the idle state.
- The exit-latency-us duration may be guaranteed
- only after entry-latency-us has passed.
-
-- exit-latency-us
- Usage: Required
- Value type: <prop-encoded-array>
- Definition: u32 value representing worst case latency
- in microseconds required to exit the idle state.
-
-- min-residency-us
- Usage: Required
- Value type: <prop-encoded-array>
- Definition: u32 value representing minimum residency duration
- in microseconds after which the idle state will yield
- power benefits after overcoming the overhead in entering
-i the idle state.
diff --git a/Documentation/devicetree/bindings/power/domain-idle-state.yaml b/Documentation/devicetree/bindings/power/domain-idle-state.yaml
new file mode 100644
index 0000000..dfba1af
--- /dev/null
+++ b/Documentation/devicetree/bindings/power/domain-idle-state.yaml
@@ -0,0 +1,64 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/power/domain-idle-state.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: PM Domain Idle States binding description
+
+maintainers:
+ - Ulf Hansson <ulf.hansson@linaro.org>
+
+description:
+ A domain idle state node represents the state parameters that will be used to
+ select the state when there are no active components in the PM domain.
+
+properties:
+ $nodename:
+ const: domain-idle-states
+
+patternProperties:
+ "^(cpu|cluster|domain)-":
+ type: object
+ description:
+ Each state node represents a domain idle state description.
+
+ properties:
+ compatible:
+ const: domain-idle-state
+
+ entry-latency-us:
+ description:
+ The worst case latency in microseconds required to enter the idle
+ state. Note that, the exit-latency-us duration may be guaranteed only
+ after the entry-latency-us has passed.
+
+ exit-latency-us:
+ description:
+ The worst case latency in microseconds required to exit the idle
+ state.
+
+ min-residency-us:
+ description:
+ The minimum residency duration in microseconds after which the idle
+ state will yield power benefits, after overcoming the overhead while
+ entering the idle state.
+
+ required:
+ - compatible
+ - entry-latency-us
+ - exit-latency-us
+ - min-residency-us
+
+examples:
+ - |
+
+ domain-idle-states {
+ domain_retention: domain-retention {
+ compatible = "domain-idle-state";
+ entry-latency-us = <20>;
+ exit-latency-us = <40>;
+ min-residency-us = <80>;
+ };
+ };
+...
diff --git a/Documentation/devicetree/bindings/power/power-domain.yaml b/Documentation/devicetree/bindings/power/power-domain.yaml
index 455b573..6047aac 100644
--- a/Documentation/devicetree/bindings/power/power-domain.yaml
+++ b/Documentation/devicetree/bindings/power/power-domain.yaml
@@ -25,22 +25,20 @@
properties:
$nodename:
- pattern: "^(power-controller|power-domain)(@.*)?$"
+ pattern: "^(power-controller|power-domain)([@-].*)?$"
domain-idle-states:
$ref: /schemas/types.yaml#/definitions/phandle-array
- description:
- A phandle of an idle-state that shall be soaked into a generic domain
- power state. The idle state definitions are compatible with
- domain-idle-state specified in
- Documentation/devicetree/bindings/power/domain-idle-state.txt
- phandles that are not compatible with domain-idle-state will be ignored.
- The domain-idle-state property reflects the idle state of this PM domain
- and not the idle states of the devices or sub-domains in the PM domain.
- Devices and sub-domains have their own idle-states independent
- of the parent domain's idle states. In the absence of this property,
- the domain would be considered as capable of being powered-on
- or powered-off.
+ description: |
+ Phandles of idle states that defines the available states for the
+ power-domain provider. The idle state definitions are compatible with the
+ domain-idle-state bindings, specified in ./domain-idle-state.yaml.
+
+ Note that, the domain-idle-state property reflects the idle states of this
+ PM domain and not the idle states of the devices or sub-domains in the PM
+ domain. Devices and sub-domains have their own idle states independent of
+ the parent domain's idle states. In the absence of this property, the
+ domain would be considered as capable of being powered-on or powered-off.
operating-points-v2:
$ref: /schemas/types.yaml#/definitions/phandle-array
diff --git a/Documentation/devicetree/bindings/power/power_domain.txt b/Documentation/devicetree/bindings/power/power_domain.txt
index 5b09b2d..08497ef 100644
--- a/Documentation/devicetree/bindings/power/power_domain.txt
+++ b/Documentation/devicetree/bindings/power/power_domain.txt
@@ -109,4 +109,4 @@
required-opps = <&domain1_opp_1>;
};
-[1]. Documentation/devicetree/bindings/power/domain-idle-state.txt
+[1]. Documentation/devicetree/bindings/power/domain-idle-state.yaml
diff --git a/Documentation/devicetree/bindings/regulator/qcom,spmi-regulator.txt b/Documentation/devicetree/bindings/regulator/qcom,spmi-regulator.txt
index f5cdac8..8b00519 100644
--- a/Documentation/devicetree/bindings/regulator/qcom,spmi-regulator.txt
+++ b/Documentation/devicetree/bindings/regulator/qcom,spmi-regulator.txt
@@ -161,7 +161,7 @@
sub-node is identified using the node's name, with valid values listed for each
of the PMICs below.
-pm8005:
+pm8004:
s2, s5
pm8005:
diff --git a/Documentation/devicetree/bindings/regulator/regulator.yaml b/Documentation/devicetree/bindings/regulator/regulator.yaml
index 92ff2e8..91a39a3 100644
--- a/Documentation/devicetree/bindings/regulator/regulator.yaml
+++ b/Documentation/devicetree/bindings/regulator/regulator.yaml
@@ -191,7 +191,7 @@
examples:
- |
- xyzreg: regulator@0 {
+ xyzreg: regulator {
regulator-min-microvolt = <1000000>;
regulator-max-microvolt = <2500000>;
regulator-always-on;
diff --git a/Documentation/devicetree/bindings/reset/intel,rcu-gw.yaml b/Documentation/devicetree/bindings/reset/intel,rcu-gw.yaml
index 246dea8..8ac4372 100644
--- a/Documentation/devicetree/bindings/reset/intel,rcu-gw.yaml
+++ b/Documentation/devicetree/bindings/reset/intel,rcu-gw.yaml
@@ -23,7 +23,11 @@
description: Global reset register offset and bit offset.
allOf:
- $ref: /schemas/types.yaml#/definitions/uint32-array
- - maxItems: 2
+ items:
+ - description: Register offset
+ - description: Register bit offset
+ minimum: 0
+ maximum: 31
"#reset-cells":
minimum: 2
diff --git a/Documentation/devicetree/bindings/reset/st,stm32mp1-rcc.txt b/Documentation/devicetree/bindings/reset/st,stm32mp1-rcc.txt
index b4edaf7..2880d5d 100644
--- a/Documentation/devicetree/bindings/reset/st,stm32mp1-rcc.txt
+++ b/Documentation/devicetree/bindings/reset/st,stm32mp1-rcc.txt
@@ -3,4 +3,4 @@
The RCC IP is both a reset and a clock controller.
-Please see Documentation/devicetree/bindings/clock/st,stm32mp1-rcc.txt
+Please see Documentation/devicetree/bindings/clock/st,stm32mp1-rcc.yaml
diff --git a/Documentation/devicetree/bindings/sound/st,stm32-sai.txt b/Documentation/devicetree/bindings/sound/st,stm32-sai.txt
index 944743dd..c42b91e 100644
--- a/Documentation/devicetree/bindings/sound/st,stm32-sai.txt
+++ b/Documentation/devicetree/bindings/sound/st,stm32-sai.txt
@@ -36,7 +36,7 @@
- clock-names: Must contain "sai_ck".
Must also contain "MCLK", if SAI shares a master clock,
with a SAI set as MCLK clock provider.
- - dmas: see Documentation/devicetree/bindings/dma/stm32-dma.txt
+ - dmas: see Documentation/devicetree/bindings/dma/st,stm32-dma.yaml
- dma-names: identifier string for each DMA request line
"tx": if sai sub-block is configured as playback DAI
"rx": if sai sub-block is configured as capture DAI
diff --git a/Documentation/devicetree/bindings/sound/st,stm32-spdifrx.txt b/Documentation/devicetree/bindings/sound/st,stm32-spdifrx.txt
index 33826f2..ca91017 100644
--- a/Documentation/devicetree/bindings/sound/st,stm32-spdifrx.txt
+++ b/Documentation/devicetree/bindings/sound/st,stm32-spdifrx.txt
@@ -10,7 +10,7 @@
- clock-names: must contain "kclk"
- interrupts: cpu DAI interrupt line
- dmas: DMA specifiers for audio data DMA and iec control flow DMA
- See STM32 DMA bindings, Documentation/devicetree/bindings/dma/stm32-dma.txt
+ See STM32 DMA bindings, Documentation/devicetree/bindings/dma/st,stm32-dma.yaml
- dma-names: two dmas have to be defined, "rx" and "rx-ctrl"
Optional properties:
diff --git a/Documentation/devicetree/bindings/spi/st,stm32-spi.yaml b/Documentation/devicetree/bindings/spi/st,stm32-spi.yaml
index f0d9796..e49ecbf 100644
--- a/Documentation/devicetree/bindings/spi/st,stm32-spi.yaml
+++ b/Documentation/devicetree/bindings/spi/st,stm32-spi.yaml
@@ -49,7 +49,7 @@
dmas:
description: |
DMA specifiers for tx and rx dma. DMA fifo mode must be used. See
- the STM32 DMA bindings Documentation/devicetree/bindings/dma/stm32-dma.txt.
+ the STM32 DMA bindings Documentation/devicetree/bindings/dma/st,stm32-dma.yaml.
items:
- description: rx DMA channel
- description: tx DMA channel
diff --git a/Documentation/devicetree/bindings/sram/allwinner,sun4i-a10-system-control.yaml b/Documentation/devicetree/bindings/sram/allwinner,sun4i-a10-system-control.yaml
index 80bac7a..4b55094 100644
--- a/Documentation/devicetree/bindings/sram/allwinner,sun4i-a10-system-control.yaml
+++ b/Documentation/devicetree/bindings/sram/allwinner,sun4i-a10-system-control.yaml
@@ -125,7 +125,7 @@
#size-cells = <1>;
ranges;
- sram_a: sram@00000000 {
+ sram_a: sram@0 {
compatible = "mmio-sram";
reg = <0x00000000 0xc000>;
#address-cells = <1>;
diff --git a/Documentation/devicetree/bindings/thermal/brcm,avs-ro-thermal.yaml b/Documentation/devicetree/bindings/thermal/brcm,avs-ro-thermal.yaml
index d9fdf48..f3e68ed 100644
--- a/Documentation/devicetree/bindings/thermal/brcm,avs-ro-thermal.yaml
+++ b/Documentation/devicetree/bindings/thermal/brcm,avs-ro-thermal.yaml
@@ -17,7 +17,7 @@
"brcm,bcm2711-avs-monitor", "syscon", "simple-mfd"
Refer to the the bindings described in
- Documentation/devicetree/bindings/mfd/syscon.txt
+ Documentation/devicetree/bindings/mfd/syscon.yaml
properties:
compatible:
diff --git a/Documentation/devicetree/bindings/timer/allwinner,sun4i-a10-timer.yaml b/Documentation/devicetree/bindings/timer/allwinner,sun4i-a10-timer.yaml
index 23e989e..d918cee 100644
--- a/Documentation/devicetree/bindings/timer/allwinner,sun4i-a10-timer.yaml
+++ b/Documentation/devicetree/bindings/timer/allwinner,sun4i-a10-timer.yaml
@@ -87,7 +87,7 @@
examples:
- |
- timer {
+ timer@1c20c00 {
compatible = "allwinner,sun4i-a10-timer";
reg = <0x01c20c00 0x400>;
interrupts = <22>,
diff --git a/Documentation/devicetree/bindings/trivial-devices.yaml b/Documentation/devicetree/bindings/trivial-devices.yaml
index 978de7d..330cab2 100644
--- a/Documentation/devicetree/bindings/trivial-devices.yaml
+++ b/Documentation/devicetree/bindings/trivial-devices.yaml
@@ -34,14 +34,6 @@
- adi,adt7461
# +/-1C TDM Extended Temp Range I.C
- adt7461
- # +/-1C TDM Extended Temp Range I.C
- - adi,adt7473
- # +/-1C TDM Extended Temp Range I.C
- - adi,adt7475
- # +/-1C TDM Extended Temp Range I.C
- - adi,adt7476
- # +/-1C TDM Extended Temp Range I.C
- - adi,adt7490
# Three-Axis Digital Accelerometer
- adi,adxl345
# Three-Axis Digital Accelerometer (backward-compatibility value "adi,adxl345" must be listed too)
@@ -350,6 +342,8 @@
- ti,ads7830
# Temperature Monitoring and Fan Control
- ti,amc6821
+ # Temperature sensor with 2-wire interface
+ - ti,lm73
# Temperature sensor with integrated fan control
- ti,lm96000
# I2C Touch-Screen Controller
diff --git a/Documentation/devicetree/bindings/vendor-prefixes.yaml b/Documentation/devicetree/bindings/vendor-prefixes.yaml
index 9e67944..b3c8c62 100644
--- a/Documentation/devicetree/bindings/vendor-prefixes.yaml
+++ b/Documentation/devicetree/bindings/vendor-prefixes.yaml
@@ -205,6 +205,8 @@
description: Colorful GRP, Shenzhen Xueyushi Technology Ltd.
"^compulab,.*":
description: CompuLab Ltd.
+ "^coreriver,.*":
+ description: CORERIVER Semiconductor Co.,Ltd.
"^corpro,.*":
description: Chengdu Corpro Technology Co., Ltd.
"^cortina,.*":
diff --git a/Documentation/driver-api/80211/mac80211-advanced.rst b/Documentation/driver-api/80211/mac80211-advanced.rst
index 9f1c5bb..24cb64b 100644
--- a/Documentation/driver-api/80211/mac80211-advanced.rst
+++ b/Documentation/driver-api/80211/mac80211-advanced.rst
@@ -272,8 +272,8 @@
.. kernel-doc:: net/mac80211/sta_info.c
:doc: STA information lifetime rules
-Aggregation
-===========
+Aggregation Functions
+=====================
.. kernel-doc:: net/mac80211/sta_info.h
:functions: sta_ampdu_mlme
@@ -284,8 +284,8 @@
.. kernel-doc:: net/mac80211/sta_info.h
:functions: tid_ampdu_rx
-Synchronisation
-===============
+Synchronisation Functions
+=========================
TBD
diff --git a/Documentation/driver-api/dmaengine/client.rst b/Documentation/driver-api/dmaengine/client.rst
index e5953e7..2104830 100644
--- a/Documentation/driver-api/dmaengine/client.rst
+++ b/Documentation/driver-api/dmaengine/client.rst
@@ -151,8 +151,8 @@
Note that callbacks will always be invoked from the DMA
engines tasklet, never from interrupt context.
-Optional: per descriptor metadata
----------------------------------
+ **Optional: per descriptor metadata**
+
DMAengine provides two ways for metadata support.
DESC_METADATA_CLIENT
@@ -199,12 +199,15 @@
DESC_METADATA_CLIENT
- DMA_MEM_TO_DEV / DEV_MEM_TO_MEM:
+
1. prepare the descriptor (dmaengine_prep_*)
construct the metadata in the client's buffer
2. use dmaengine_desc_attach_metadata() to attach the buffer to the
descriptor
3. submit the transfer
+
- DMA_DEV_TO_MEM:
+
1. prepare the descriptor (dmaengine_prep_*)
2. use dmaengine_desc_attach_metadata() to attach the buffer to the
descriptor
@@ -215,6 +218,7 @@
DESC_METADATA_ENGINE
- DMA_MEM_TO_DEV / DEV_MEM_TO_MEM:
+
1. prepare the descriptor (dmaengine_prep_*)
2. use dmaengine_desc_get_metadata_ptr() to get the pointer to the
engine's metadata area
@@ -222,7 +226,9 @@
4. use dmaengine_desc_set_metadata_len() to tell the DMA engine the
amount of data the client has placed into the metadata buffer
5. submit the transfer
+
- DMA_DEV_TO_MEM:
+
1. prepare the descriptor (dmaengine_prep_*)
2. submit the transfer
3. on transfer completion, use dmaengine_desc_get_metadata_ptr() to get
@@ -278,8 +284,8 @@
void dma_async_issue_pending(struct dma_chan *chan);
-Further APIs:
--------------
+Further APIs
+------------
1. Terminate APIs
diff --git a/Documentation/driver-api/dmaengine/index.rst b/Documentation/driver-api/dmaengine/index.rst
index b9df904..bdc45d8 100644
--- a/Documentation/driver-api/dmaengine/index.rst
+++ b/Documentation/driver-api/dmaengine/index.rst
@@ -5,8 +5,8 @@
DMAEngine documentation provides documents for various aspects of DMAEngine
framework.
-DMAEngine documentation
------------------------
+DMAEngine development documentation
+-----------------------------------
This book helps with DMAengine internal APIs and guide for DMAEngine device
driver writers.
diff --git a/Documentation/driver-api/dmaengine/provider.rst b/Documentation/driver-api/dmaengine/provider.rst
index 790a150..56e5833 100644
--- a/Documentation/driver-api/dmaengine/provider.rst
+++ b/Documentation/driver-api/dmaengine/provider.rst
@@ -266,11 +266,15 @@
attached (via the dmaengine_desc_attach_metadata() helper to the descriptor.
From the DMA driver the following is expected for this mode:
+
- DMA_MEM_TO_DEV / DEV_MEM_TO_MEM
+
The data from the provided metadata buffer should be prepared for the DMA
controller to be sent alongside of the payload data. Either by copying to a
hardware descriptor, or highly coupled packet.
+
- DMA_DEV_TO_MEM
+
On transfer completion the DMA driver must copy the metadata to the client
provided metadata buffer before notifying the client about the completion.
After the transfer completion, DMA drivers must not touch the metadata
@@ -284,10 +288,14 @@
and dmaengine_desc_set_metadata_len() is provided as helper functions.
From the DMA driver the following is expected for this mode:
- - get_metadata_ptr
+
+ - get_metadata_ptr()
+
Should return a pointer for the metadata buffer, the maximum size of the
metadata buffer and the currently used / valid (if any) bytes in the buffer.
- - set_metadata_len
+
+ - set_metadata_len()
+
It is called by the clients after it have placed the metadata to the buffer
to let the DMA driver know the number of valid bytes provided.
diff --git a/Documentation/driver-api/driver-model/driver.rst b/Documentation/driver-api/driver-model/driver.rst
index baa6a85c..63887b8 100644
--- a/Documentation/driver-api/driver-model/driver.rst
+++ b/Documentation/driver-api/driver-model/driver.rst
@@ -210,7 +210,7 @@
While the typical use case for sync_state() is to have the kernel cleanly take
over management of devices from the bootloader, the usage of sync_state() is
not restricted to that. Use it whenever it makes sense to take an action after
-all the consumers of a device have probed.
+all the consumers of a device have probed::
int (*remove) (struct device *dev);
diff --git a/Documentation/driver-api/index.rst b/Documentation/driver-api/index.rst
index 0ebe205..d4e78cb 100644
--- a/Documentation/driver-api/index.rst
+++ b/Documentation/driver-api/index.rst
@@ -17,6 +17,7 @@
driver-model/index
basics
infrastructure
+ ioctl
early-userspace/index
pm/index
clk
@@ -74,11 +75,12 @@
connector
console
dcdbas
- edid
eisa
ipmb
isa
isapnp
+ io-mapping
+ io_ordering
generic-counter
lightnvm-pblk
memory-devices/index
diff --git a/Documentation/io-mapping.txt b/Documentation/driver-api/io-mapping.rst
similarity index 100%
rename from Documentation/io-mapping.txt
rename to Documentation/driver-api/io-mapping.rst
diff --git a/Documentation/io_ordering.txt b/Documentation/driver-api/io_ordering.rst
similarity index 100%
rename from Documentation/io_ordering.txt
rename to Documentation/driver-api/io_ordering.rst
diff --git a/Documentation/core-api/ioctl.rst b/Documentation/driver-api/ioctl.rst
similarity index 100%
rename from Documentation/core-api/ioctl.rst
rename to Documentation/driver-api/ioctl.rst
diff --git a/Documentation/driver-api/ipmb.rst b/Documentation/driver-api/ipmb.rst
index 3ec3bae..209c49e 100644
--- a/Documentation/driver-api/ipmb.rst
+++ b/Documentation/driver-api/ipmb.rst
@@ -71,9 +71,13 @@
ipmb@10 {
compatible = "ipmb-dev";
reg = <0x10>;
+ i2c-protocol;
};
};
+If xmit of data to be done using raw i2c block vs smbus
+then "i2c-protocol" needs to be defined as above.
+
2) Manually from Linux::
modprobe ipmb-dev-int
diff --git a/Documentation/features/vm/pte_special/arch-support.txt b/Documentation/features/vm/pte_special/arch-support.txt
index 2dc5df6..3d492a3 100644
--- a/Documentation/features/vm/pte_special/arch-support.txt
+++ b/Documentation/features/vm/pte_special/arch-support.txt
@@ -23,7 +23,7 @@
| openrisc: | TODO |
| parisc: | TODO |
| powerpc: | ok |
- | riscv: | TODO |
+ | riscv: | ok |
| s390: | ok |
| sh: | ok |
| sparc: | ok |
diff --git a/Documentation/filesystems/9p.txt b/Documentation/filesystems/9p.rst
similarity index 63%
rename from Documentation/filesystems/9p.txt
rename to Documentation/filesystems/9p.rst
index fec7144..f054d1c 100644
--- a/Documentation/filesystems/9p.txt
+++ b/Documentation/filesystems/9p.rst
@@ -1,7 +1,10 @@
- v9fs: Plan 9 Resource Sharing for Linux
- =======================================
+.. SPDX-License-Identifier: GPL-2.0
-ABOUT
+=======================================
+v9fs: Plan 9 Resource Sharing for Linux
+=======================================
+
+About
=====
v9fs is a Unix implementation of the Plan 9 9p remote filesystem protocol.
@@ -14,32 +17,34 @@
The best detailed explanation of the Linux implementation and applications of
the 9p client is available in the form of a USENIX paper:
+
http://www.usenix.org/events/usenix05/tech/freenix/hensbergen.html
Other applications are described in the following papers:
- * XCPU & Clustering
- http://xcpu.org/papers/xcpu-talk.pdf
- * KVMFS: control file system for KVM
- http://xcpu.org/papers/kvmfs.pdf
- * CellFS: A New Programming Model for the Cell BE
- http://xcpu.org/papers/cellfs-talk.pdf
- * PROSE I/O: Using 9p to enable Application Partitions
- http://plan9.escet.urjc.es/iwp9/cready/PROSE_iwp9_2006.pdf
- * VirtFS: A Virtualization Aware File System pass-through
- http://goo.gl/3WPDg
-USAGE
+ * XCPU & Clustering
+ http://xcpu.org/papers/xcpu-talk.pdf
+ * KVMFS: control file system for KVM
+ http://xcpu.org/papers/kvmfs.pdf
+ * CellFS: A New Programming Model for the Cell BE
+ http://xcpu.org/papers/cellfs-talk.pdf
+ * PROSE I/O: Using 9p to enable Application Partitions
+ http://plan9.escet.urjc.es/iwp9/cready/PROSE_iwp9_2006.pdf
+ * VirtFS: A Virtualization Aware File System pass-through
+ http://goo.gl/3WPDg
+
+Usage
=====
-For remote file server:
+For remote file server::
mount -t 9p 10.10.1.2 /mnt/9
-For Plan 9 From User Space applications (http://swtch.com/plan9)
+For Plan 9 From User Space applications (http://swtch.com/plan9)::
mount -t 9p `namespace`/acme /mnt/9 -o trans=unix,uname=$USER
-For server running on QEMU host with virtio transport:
+For server running on QEMU host with virtio transport::
mount -t 9p -o trans=virtio <mount_tag> /mnt/9
@@ -48,18 +53,22 @@
associated "mount_tag" property. Available mount tags can be
seen by reading /sys/bus/virtio/drivers/9pnet_virtio/virtio<n>/mount_tag files.
-OPTIONS
+Options
=======
+ ============= ===============================================================
trans=name select an alternative transport. Valid options are
currently:
- unix - specifying a named pipe mount point
- tcp - specifying a normal TCP/IP connection
- fd - used passed file descriptors for connection
- (see rfdno and wfdno)
- virtio - connect to the next virtio channel available
- (from QEMU with trans_virtio module)
- rdma - connect to a specified RDMA channel
+
+ ======== ============================================
+ unix specifying a named pipe mount point
+ tcp specifying a normal TCP/IP connection
+ fd used passed file descriptors for connection
+ (see rfdno and wfdno)
+ virtio connect to the next virtio channel available
+ (from QEMU with trans_virtio module)
+ rdma connect to a specified RDMA channel
+ ======== ============================================
uname=name user name to attempt mount as on the remote server. The
server may override or ignore this value. Certain user
@@ -69,28 +78,36 @@
offering several exported file systems.
cache=mode specifies a caching policy. By default, no caches are used.
- none = default no cache policy, metadata and data
+
+ none
+ default no cache policy, metadata and data
alike are synchronous.
- loose = no attempts are made at consistency,
+ loose
+ no attempts are made at consistency,
intended for exclusive, read-only mounts
- fscache = use FS-Cache for a persistent, read-only
+ fscache
+ use FS-Cache for a persistent, read-only
cache backend.
- mmap = minimal cache that is only used for read-write
+ mmap
+ minimal cache that is only used for read-write
mmap. Northing else is cached, like cache=none
debug=n specifies debug level. The debug level is a bitmask.
- 0x01 = display verbose error messages
- 0x02 = developer debug (DEBUG_CURRENT)
- 0x04 = display 9p trace
- 0x08 = display VFS trace
- 0x10 = display Marshalling debug
- 0x20 = display RPC debug
- 0x40 = display transport debug
- 0x80 = display allocation debug
- 0x100 = display protocol message debug
- 0x200 = display Fid debug
- 0x400 = display packet debug
- 0x800 = display fscache tracing debug
+
+ ===== ================================
+ 0x01 display verbose error messages
+ 0x02 developer debug (DEBUG_CURRENT)
+ 0x04 display 9p trace
+ 0x08 display VFS trace
+ 0x10 display Marshalling debug
+ 0x20 display RPC debug
+ 0x40 display transport debug
+ 0x80 display allocation debug
+ 0x100 display protocol message debug
+ 0x200 display Fid debug
+ 0x400 display packet debug
+ 0x800 display fscache tracing debug
+ ===== ================================
rfdno=n the file descriptor for reading with trans=fd
@@ -103,9 +120,12 @@
noextend force legacy mode (no 9p2000.u or 9p2000.L semantics)
version=name Select 9P protocol version. Valid options are:
- 9p2000 - Legacy mode (same as noextend)
- 9p2000.u - Use 9P2000.u protocol
- 9p2000.L - Use 9P2000.L protocol
+
+ ======== ==============================
+ 9p2000 Legacy mode (same as noextend)
+ 9p2000.u Use 9P2000.u protocol
+ 9p2000.L Use 9P2000.L protocol
+ ======== ==============================
dfltuid attempt to mount as a particular uid
@@ -118,22 +138,27 @@
hosts. This functionality will be expanded in later versions.
access there are four access modes.
- user = if a user tries to access a file on v9fs
+ user
+ if a user tries to access a file on v9fs
filesystem for the first time, v9fs sends an
attach command (Tattach) for that user.
This is the default mode.
- <uid> = allows only user with uid=<uid> to access
+ <uid>
+ allows only user with uid=<uid> to access
the files on the mounted filesystem
- any = v9fs does single attach and performs all
+ any
+ v9fs does single attach and performs all
operations as one user
- client = ACL based access check on the 9p client
+ clien
+ ACL based access check on the 9p client
side for access validation
cachetag cache tag to use the specified persistent cache.
cache tags for existing cache sessions can be listed at
/sys/fs/9p/caches. (applies only to cache=fscache)
+ ============= ===============================================================
-RESOURCES
+Resources
=========
Protocol specifications are maintained on github:
@@ -158,4 +183,3 @@
For information on Plan 9 from User Space (Plan 9 applications and libraries
ported to Linux/BSD/OSX/etc) check out http://swtch.com/plan9
-
diff --git a/Documentation/filesystems/adfs.txt b/Documentation/filesystems/adfs.rst
similarity index 85%
rename from Documentation/filesystems/adfs.txt
rename to Documentation/filesystems/adfs.rst
index 0baa8e8..5b22cae 100644
--- a/Documentation/filesystems/adfs.txt
+++ b/Documentation/filesystems/adfs.rst
@@ -1,3 +1,9 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===============================
+Acorn Disc Filing System - ADFS
+===============================
+
Filesystems supported by ADFS
-----------------------------
@@ -25,6 +31,7 @@
Mount options for ADFS
----------------------
+ ============ ======================================================
uid=nnn All files in the partition will be owned by
user id nnn. Default 0 (root).
gid=nnn All files in the partition will be in group
@@ -36,22 +43,23 @@
ftsuffix=n When ftsuffix=0, no file type suffix will be applied.
When ftsuffix=1, a hexadecimal suffix corresponding to
the RISC OS file type will be added. Default 0.
+ ============ ======================================================
Mapping of ADFS permissions to Linux permissions
------------------------------------------------
ADFS permissions consist of the following:
- Owner read
- Owner write
- Other read
- Other write
+ - Owner read
+ - Owner write
+ - Other read
+ - Other write
(In older versions, an 'execute' permission did exist, but this
- does not hold the same meaning as the Linux 'execute' permission
- and is now obsolete).
+ does not hold the same meaning as the Linux 'execute' permission
+ and is now obsolete).
- The mapping is performed as follows:
+ The mapping is performed as follows::
Owner read -> -r--r--r--
Owner write -> --w--w---w
@@ -66,17 +74,18 @@
Possible other mode permissions -> ----rwxrwx
Hence, with the default masks, if a file is owner read/write, and
- not a UnixExec filetype, then the permissions will be:
+ not a UnixExec filetype, then the permissions will be::
-rw-------
However, if the masks were ownmask=0770,othmask=0007, then this would
- be modified to:
+ be modified to::
+
-rw-rw----
There is no restriction on what you can do with these masks. You may
wish that either read bits give read access to the file for all, but
- keep the default write protection (ownmask=0755,othmask=0577):
+ keep the default write protection (ownmask=0755,othmask=0577)::
-rw-r--r--
diff --git a/Documentation/filesystems/affs.txt b/Documentation/filesystems/affs.rst
similarity index 85%
rename from Documentation/filesystems/affs.txt
rename to Documentation/filesystems/affs.rst
index 71b63c2..7f1a40d 100644
--- a/Documentation/filesystems/affs.txt
+++ b/Documentation/filesystems/affs.rst
@@ -1,9 +1,13 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=============================
Overview of Amiga Filesystems
=============================
Not all varieties of the Amiga filesystems are supported for reading and
writing. The Amiga currently knows six different filesystems:
+============== ===============================================================
DOS\0 The old or original filesystem, not really suited for
hard disks and normally not used on them, either.
Supported read/write.
@@ -23,6 +27,7 @@
sense on hard disks. Supported read only.
DOS\5 The Fast File System with directory cache. Supported read only.
+============== ===============================================================
All of the above filesystems allow block sizes from 512 to 32K bytes.
Supported block sizes are: 512, 1024, 2048 and 4096 bytes. Larger blocks
@@ -36,14 +41,18 @@
Mount options for the AFFS
==========================
-protect If this option is set, the protection bits cannot be altered.
+protect
+ If this option is set, the protection bits cannot be altered.
-setuid[=uid] This sets the owner of all files and directories in the file
+setuid[=uid]
+ This sets the owner of all files and directories in the file
system to uid or the uid of the current user, respectively.
-setgid[=gid] Same as above, but for gid.
+setgid[=gid]
+ Same as above, but for gid.
-mode=mode Sets the mode flags to the given (octal) value, regardless
+mode=mode
+ Sets the mode flags to the given (octal) value, regardless
of the original permissions. Directories will get an x
permission if the corresponding r bit is set.
This is useful since most of the plain AmigaOS files
@@ -53,33 +62,41 @@
The file system will return an error when filename exceeds
standard maximum filename length (30 characters).
-reserved=num Sets the number of reserved blocks at the start of the
+reserved=num
+ Sets the number of reserved blocks at the start of the
partition to num. You should never need this option.
Default is 2.
-root=block Sets the block number of the root block. This should never
+root=block
+ Sets the block number of the root block. This should never
be necessary.
-bs=blksize Sets the blocksize to blksize. Valid block sizes are 512,
+bs=blksize
+ Sets the blocksize to blksize. Valid block sizes are 512,
1024, 2048 and 4096. Like the root option, this should
never be necessary, as the affs can figure it out itself.
-quiet The file system will not return an error for disallowed
+quiet
+ The file system will not return an error for disallowed
mode changes.
-verbose The volume name, file system type and block size will
+verbose
+ The volume name, file system type and block size will
be written to the syslog when the filesystem is mounted.
-mufs The filesystem is really a muFS, also it doesn't
+mufs
+ The filesystem is really a muFS, also it doesn't
identify itself as one. This option is necessary if
the filesystem wasn't formatted as muFS, but is used
as one.
-prefix=path Path will be prefixed to every absolute path name of
+prefix=path
+ Path will be prefixed to every absolute path name of
symbolic links on an AFFS partition. Default = "/".
(See below.)
-volume=name When symbolic links with an absolute path are created
+volume=name
+ When symbolic links with an absolute path are created
on an AFFS partition, name will be prepended as the
volume name. Default = "" (empty string).
(See below.)
@@ -119,7 +136,7 @@
- All other flags (suid, sgid, ...) are ignored and will
not be retained.
-
+
Newly created files and directories will get the user and group ID
of the current user and a mode according to the umask.
@@ -148,11 +165,13 @@
Examples
========
-Command line:
+Command line::
+
mount Archive/Amiga/Workbench3.1.adf /mnt -t affs -o loop,verbose
mount /dev/sda3 /Amiga -t affs
-/etc/fstab entry:
+/etc/fstab entry::
+
/dev/sdb5 /amiga/Workbench affs noauto,user,exec,verbose 0 0
IMPORTANT NOTE
@@ -170,7 +189,8 @@
If the damage is already done, the following should fix the RDB
(where <disk> is the device name).
-DO AT YOUR OWN RISK:
+
+DO AT YOUR OWN RISK::
dd if=/dev/<disk> of=rdb.tmp count=1
cp rdb.tmp rdb.fixed
@@ -189,10 +209,14 @@
'nofilenametruncate' mount option can change that behavior.
Case is ignored by the affs in filename matching, but Linux shells
-do care about the case. Example (with /wb being an affs mounted fs):
+do care about the case. Example (with /wb being an affs mounted fs)::
+
rm /wb/WRONGCASE
-will remove /mnt/wrongcase, but
+
+will remove /mnt/wrongcase, but::
+
rm /wb/WR*
+
will not since the names are matched by the shell.
The block allocation is designed for hard disk partitions. If more
@@ -219,4 +243,4 @@
If you are interested in an Amiga Emulator for Linux, look at
-http://web.archive.org/web/*/http://www.freiburg.linux.de/~uae/
+http://web.archive.org/web/%2E/http://www.freiburg.linux.de/~uae/
diff --git a/Documentation/filesystems/afs.txt b/Documentation/filesystems/afs.rst
similarity index 89%
rename from Documentation/filesystems/afs.txt
rename to Documentation/filesystems/afs.rst
index 8c6ea7b..c4ec39a 100644
--- a/Documentation/filesystems/afs.txt
+++ b/Documentation/filesystems/afs.rst
@@ -1,8 +1,10 @@
- ====================
- kAFS: AFS FILESYSTEM
- ====================
+.. SPDX-License-Identifier: GPL-2.0
-Contents:
+====================
+kAFS: AFS FILESYSTEM
+====================
+
+.. Contents:
- Overview.
- Usage.
@@ -14,8 +16,7 @@
- The @sys substitution.
-========
-OVERVIEW
+Overview
========
This filesystem provides a fairly simple secure AFS filesystem driver. It is
@@ -35,35 +36,33 @@
(*) pioctl() system call.
-===========
-COMPILATION
+Compilation
===========
The filesystem should be enabled by turning on the kernel configuration
-options:
+options::
CONFIG_AF_RXRPC - The RxRPC protocol transport
CONFIG_RXKAD - The RxRPC Kerberos security handler
CONFIG_AFS - The AFS filesystem
-Additionally, the following can be turned on to aid debugging:
+Additionally, the following can be turned on to aid debugging::
CONFIG_AF_RXRPC_DEBUG - Permit AF_RXRPC debugging to be enabled
CONFIG_AFS_DEBUG - Permit AFS debugging to be enabled
They permit the debugging messages to be turned on dynamically by manipulating
-the masks in the following files:
+the masks in the following files::
/sys/module/af_rxrpc/parameters/debug
/sys/module/kafs/parameters/debug
-=====
-USAGE
+Usage
=====
When inserting the driver modules the root cell must be specified along with a
-list of volume location server IP addresses:
+list of volume location server IP addresses::
modprobe rxrpc
modprobe kafs rootcell=cambridge.redhat.com:172.16.18.73:172.16.18.91
@@ -77,14 +76,14 @@
is the actual filesystem driver for the AFS filesystem.
Once the module has been loaded, more modules can be added by the following
-procedure:
+procedure::
echo add grand.central.org 18.9.48.14:128.2.203.61:130.237.48.87 >/proc/fs/afs/cells
Where the parameters to the "add" command are the name of a cell and a list of
volume location servers within that cell, with the latter separated by colons.
-Filesystems can be mounted anywhere by commands similar to the following:
+Filesystems can be mounted anywhere by commands similar to the following::
mount -t afs "%cambridge.redhat.com:root.afs." /afs
mount -t afs "#cambridge.redhat.com:root.cell." /afs/cambridge
@@ -104,8 +103,7 @@
Additional cells can be added through /proc (see later section).
-===========
-MOUNTPOINTS
+Mountpoints
===========
AFS has a concept of mountpoints. In AFS terms, these are specially formatted
@@ -123,42 +121,40 @@
unmounted, otherwise error EBUSY will be returned.
This can be used by the administrator to attempt to unmount the whole AFS tree
-mounted on /afs in one go by doing:
+mounted on /afs in one go by doing::
umount /afs
-============
-DYNAMIC ROOT
+Dynamic Root
============
A mount option is available to create a serverless mount that is only usable
-for dynamic lookup. Creating such a mount can be done by, for example:
+for dynamic lookup. Creating such a mount can be done by, for example::
mount -t afs none /afs -o dyn
This creates a mount that just has an empty directory at the root. Attempting
to look up a name in this directory will cause a mountpoint to be created that
-looks up a cell of the same name, for example:
+looks up a cell of the same name, for example::
ls /afs/grand.central.org/
-===============
-PROC FILESYSTEM
+Proc Filesystem
===============
The AFS modules creates a "/proc/fs/afs/" directory and populates it:
(*) A "cells" file that lists cells currently known to the afs module and
- their usage counts:
+ their usage counts::
[root@andromeda ~]# cat /proc/fs/afs/cells
USE NAME
3 cambridge.redhat.com
(*) A directory per cell that contains files that list volume location
- servers, volumes, and active servers known within that cell.
+ servers, volumes, and active servers known within that cell::
[root@andromeda ~]# cat /proc/fs/afs/cambridge.redhat.com/servers
USE ADDR STATE
@@ -171,8 +167,7 @@
1 Val 20000000 20000001 20000002 root.afs
-=================
-THE CELL DATABASE
+The Cell Database
=================
The filesystem maintains an internal database of all the cells it knows and the
@@ -181,7 +176,7 @@
"rootcell=" argument or, if compiled in, using a "kafs.rootcell=" argument on
the kernel command line.
-Further cells can be added by commands similar to the following:
+Further cells can be added by commands similar to the following::
echo add CELLNAME VLADDR[:VLADDR][:VLADDR]... >/proc/fs/afs/cells
echo add grand.central.org 18.9.48.14:128.2.203.61:130.237.48.87 >/proc/fs/afs/cells
@@ -189,8 +184,7 @@
No other cell database operations are available at this time.
-========
-SECURITY
+Security
========
Secure operations are initiated by acquiring a key using the klog program. A
@@ -198,17 +192,17 @@
http://people.redhat.com/~dhowells/rxrpc/klog.c
-This should be compiled by:
+This should be compiled by::
make klog LDLIBS="-lcrypto -lcrypt -lkrb4 -lkeyutils"
-And then run as:
+And then run as::
./klog
Assuming it's successful, this adds a key of type RxRPC, named for the service
and cell, eg: "afs@<cellname>". This can be viewed with the keyctl program or
-by cat'ing /proc/keys:
+by cat'ing /proc/keys::
[root@andromeda ~]# keyctl show
Session Keyring
@@ -232,20 +226,19 @@
open the file.
-=====================
-THE @SYS SUBSTITUTION
+The @sys Substitution
=====================
The list of up to 16 @sys substitutions for the current network namespace can
-be configured by writing a list to /proc/fs/afs/sysname:
+be configured by writing a list to /proc/fs/afs/sysname::
[root@andromeda ~]# echo foo amd64_linux_26 >/proc/fs/afs/sysname
-or cleared entirely by writing an empty list:
+or cleared entirely by writing an empty list::
[root@andromeda ~]# echo >/proc/fs/afs/sysname
-The current list for current network namespace can be retrieved by:
+The current list for current network namespace can be retrieved by::
[root@andromeda ~]# cat /proc/fs/afs/sysname
foo
diff --git a/Documentation/filesystems/autofs-mount-control.txt b/Documentation/filesystems/autofs-mount-control.rst
similarity index 89%
rename from Documentation/filesystems/autofs-mount-control.txt
rename to Documentation/filesystems/autofs-mount-control.rst
index acc02fc..2903aed 100644
--- a/Documentation/filesystems/autofs-mount-control.txt
+++ b/Documentation/filesystems/autofs-mount-control.rst
@@ -1,4 +1,6 @@
+.. SPDX-License-Identifier: GPL-2.0
+====================================================================
Miscellaneous Device control operations for the autofs kernel module
====================================================================
@@ -36,24 +38,24 @@
module source you will see a third type called an offset, which is just
a direct mount in disguise) and indirect.
-Here is a master map with direct and indirect map entries:
+Here is a master map with direct and indirect map entries::
-/- /etc/auto.direct
-/test /etc/auto.indirect
+ /- /etc/auto.direct
+ /test /etc/auto.indirect
-and the corresponding map files:
+and the corresponding map files::
-/etc/auto.direct:
+ /etc/auto.direct:
-/automount/dparse/g6 budgie:/autofs/export1
-/automount/dparse/g1 shark:/autofs/export1
-and so on.
+ /automount/dparse/g6 budgie:/autofs/export1
+ /automount/dparse/g1 shark:/autofs/export1
+ and so on.
-/etc/auto.indirect:
+/etc/auto.indirect::
-g1 shark:/autofs/export1
-g6 budgie:/autofs/export1
-and so on.
+ g1 shark:/autofs/export1
+ g6 budgie:/autofs/export1
+ and so on.
For the above indirect map an autofs file system is mounted on /test and
mounts are triggered for each sub-directory key by the inode lookup
@@ -69,23 +71,23 @@
But, each entry in direct and indirect maps can have offsets (making
them multi-mount map entries).
-For example, an indirect mount map entry could also be:
+For example, an indirect mount map entry could also be::
-g1 \
- / shark:/autofs/export5/testing/test \
- /s1 shark:/autofs/export/testing/test/s1 \
- /s2 shark:/autofs/export5/testing/test/s2 \
- /s1/ss1 shark:/autofs/export1 \
- /s2/ss2 shark:/autofs/export2
+ g1 \
+ / shark:/autofs/export5/testing/test \
+ /s1 shark:/autofs/export/testing/test/s1 \
+ /s2 shark:/autofs/export5/testing/test/s2 \
+ /s1/ss1 shark:/autofs/export1 \
+ /s2/ss2 shark:/autofs/export2
-and a similarly a direct mount map entry could also be:
+and a similarly a direct mount map entry could also be::
-/automount/dparse/g1 \
- / shark:/autofs/export5/testing/test \
- /s1 shark:/autofs/export/testing/test/s1 \
- /s2 shark:/autofs/export5/testing/test/s2 \
- /s1/ss1 shark:/autofs/export2 \
- /s2/ss2 shark:/autofs/export2
+ /automount/dparse/g1 \
+ / shark:/autofs/export5/testing/test \
+ /s1 shark:/autofs/export/testing/test/s1 \
+ /s2 shark:/autofs/export5/testing/test/s2 \
+ /s1/ss1 shark:/autofs/export2 \
+ /s2/ss2 shark:/autofs/export2
One of the issues with version 4 of autofs was that, when mounting an
entry with a large number of offsets, possibly with nesting, we needed
@@ -170,32 +172,32 @@
The control interface is opening a device node, typically /dev/autofs.
All the ioctls use a common structure to pass the needed parameter
-information and return operation results:
+information and return operation results::
-struct autofs_dev_ioctl {
- __u32 ver_major;
- __u32 ver_minor;
- __u32 size; /* total size of data passed in
- * including this struct */
- __s32 ioctlfd; /* automount command fd */
+ struct autofs_dev_ioctl {
+ __u32 ver_major;
+ __u32 ver_minor;
+ __u32 size; /* total size of data passed in
+ * including this struct */
+ __s32 ioctlfd; /* automount command fd */
- /* Command parameters */
- union {
- struct args_protover protover;
- struct args_protosubver protosubver;
- struct args_openmount openmount;
- struct args_ready ready;
- struct args_fail fail;
- struct args_setpipefd setpipefd;
- struct args_timeout timeout;
- struct args_requester requester;
- struct args_expire expire;
- struct args_askumount askumount;
- struct args_ismountpoint ismountpoint;
- };
+ /* Command parameters */
+ union {
+ struct args_protover protover;
+ struct args_protosubver protosubver;
+ struct args_openmount openmount;
+ struct args_ready ready;
+ struct args_fail fail;
+ struct args_setpipefd setpipefd;
+ struct args_timeout timeout;
+ struct args_requester requester;
+ struct args_expire expire;
+ struct args_askumount askumount;
+ struct args_ismountpoint ismountpoint;
+ };
- char path[0];
-};
+ char path[0];
+ };
The ioctlfd field is a mount point file descriptor of an autofs mount
point. It is returned by the open call and is used by all calls except
@@ -212,7 +214,7 @@
structure sent from user space.
This structure can be initialized before setting specific fields by using
-the void function call init_autofs_dev_ioctl(struct autofs_dev_ioctl *).
+the void function call init_autofs_dev_ioctl(``struct autofs_dev_ioctl *``).
All of the ioctls perform a copy of this structure from user space to
kernel space and return -EINVAL if the size parameter is smaller than
diff --git a/Documentation/filesystems/befs.txt b/Documentation/filesystems/befs.rst
similarity index 83%
rename from Documentation/filesystems/befs.txt
rename to Documentation/filesystems/befs.rst
index da45e6c..79f9740 100644
--- a/Documentation/filesystems/befs.txt
+++ b/Documentation/filesystems/befs.rst
@@ -1,48 +1,54 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=========================
BeOS filesystem for Linux
+=========================
Document last updated: Dec 6, 2001
-WARNING
+Warning
=======
Make sure you understand that this is alpha software. This means that the
-implementation is neither complete nor well-tested.
+implementation is neither complete nor well-tested.
I DISCLAIM ALL RESPONSIBILITY FOR ANY POSSIBLE BAD EFFECTS OF THIS CODE!
-LICENSE
-=====
-This software is covered by the GNU General Public License.
+License
+=======
+This software is covered by the GNU General Public License.
See the file COPYING for the complete text of the license.
Or the GNU website: <http://www.gnu.org/licenses/licenses.html>
-AUTHOR
-=====
+Author
+======
The largest part of the code written by Will Dyson <will_dyson@pobox.com>
He has been working on the code since Aug 13, 2001. See the changelog for
details.
Original Author: Makoto Kato <m_kato@ga2.so-net.ne.jp>
+
His original code can still be found at:
<http://hp.vector.co.jp/authors/VA008030/bfs/>
+
Does anyone know of a more current email address for Makoto? He doesn't
respond to the address given above...
This filesystem doesn't have a maintainer.
-WHAT IS THIS DRIVER?
-==================
-This module implements the native filesystem of BeOS http://www.beincorporated.com/
+What is this Driver?
+====================
+This module implements the native filesystem of BeOS http://www.beincorporated.com/
for the linux 2.4.1 and later kernels. Currently it is a read-only
implementation.
Which is it, BFS or BEFS?
-================
-Be, Inc said, "BeOS Filesystem is officially called BFS, not BeFS".
+=========================
+Be, Inc said, "BeOS Filesystem is officially called BFS, not BeFS".
But Unixware Boot Filesystem is called bfs, too. And they are already in
the kernel. Because of this naming conflict, on Linux the BeOS
filesystem is called befs.
-HOW TO INSTALL
+How to Install
==============
step 1. Install the BeFS patch into the source code tree of linux.
@@ -54,16 +60,16 @@
patch -p1 < /path/to/patch-befs-xxx
if the patching step fails (i.e. there are rejected hunks), you can try to
-figure it out yourself (it shouldn't be hard), or mail the maintainer
+figure it out yourself (it shouldn't be hard), or mail the maintainer
(Will Dyson <will_dyson@pobox.com>) for help.
step 2. Configuration & make kernel
The linux kernel has many compile-time options. Most of them are beyond the
scope of this document. I suggest the Kernel-HOWTO document as a good general
-reference on this topic. http://www.linuxdocs.org/HOWTOs/Kernel-HOWTO-4.html
+reference on this topic. http://www.linuxdocs.org/HOWTOs/Kernel-HOWTO-4.html
-However, to use the BeFS module, you must enable it at configure time.
+However, to use the BeFS module, you must enable it at configure time::
cd /foo/bar/linux
make menuconfig (or xconfig)
@@ -82,35 +88,40 @@
See the kernel howto <http://www.linux.com/howto/Kernel-HOWTO.html> for
instructions on this critical step.
-USING BFS
+Using BFS
=========
To use the BeOS filesystem, use filesystem type 'befs'.
-ex)
+ex::
+
mount -t befs /dev/fd0 /beos
-MOUNT OPTIONS
+Mount Options
=============
+
+============= ===========================================================
uid=nnn All files in the partition will be owned by user id nnn.
gid=nnn All files in the partition will be in group nnn.
iocharset=xxx Use xxx as the name of the NLS translation table.
debug The driver will output debugging information to the syslog.
+============= ===========================================================
-HOW TO GET LASTEST VERSION
+How to Get Lastest Version
==========================
The latest version is currently available at:
<http://befs-driver.sourceforge.net/>
-ANY KNOWN BUGS?
-===========
+Any Known Bugs?
+===============
As of Jan 20, 2002:
-
+
None
-SPECIAL THANKS
+Special Thanks
==============
Dominic Giampalo ... Writing "Practical file system design with Be filesystem"
+
Hiroyuki Yamada ... Testing LinuxPPC.
diff --git a/Documentation/filesystems/bfs.txt b/Documentation/filesystems/bfs.rst
similarity index 71%
rename from Documentation/filesystems/bfs.txt
rename to Documentation/filesystems/bfs.rst
index 843ce91..ce14b90 100644
--- a/Documentation/filesystems/bfs.txt
+++ b/Documentation/filesystems/bfs.rst
@@ -1,4 +1,7 @@
-BFS FILESYSTEM FOR LINUX
+.. SPDX-License-Identifier: GPL-2.0
+
+========================
+BFS Filesystem for Linux
========================
The BFS filesystem is used by SCO UnixWare OS for the /stand slice, which
@@ -9,22 +12,22 @@
know the partition number and the kernel must support UnixWare disk slices
(CONFIG_UNIXWARE_DISKLABEL config option). However BFS support does not
depend on having UnixWare disklabel support because one can also mount
-BFS filesystem via loopback:
+BFS filesystem via loopback::
-# losetup /dev/loop0 stand.img
-# mount -t bfs /dev/loop0 /mnt/stand
+ # losetup /dev/loop0 stand.img
+ # mount -t bfs /dev/loop0 /mnt/stand
-where stand.img is a file containing the image of BFS filesystem.
+where stand.img is a file containing the image of BFS filesystem.
When you have finished using it and umounted you need to also deallocate
-/dev/loop0 device by:
+/dev/loop0 device by::
-# losetup -d /dev/loop0
+ # losetup -d /dev/loop0
-You can simplify mounting by just typing:
+You can simplify mounting by just typing::
-# mount -t bfs -o loop stand.img /mnt/stand
+ # mount -t bfs -o loop stand.img /mnt/stand
-this will allocate the first available loopback device (and load loop.o
+this will allocate the first available loopback device (and load loop.o
kernel module if necessary) automatically. If the loopback driver is not
loaded automatically, make sure that you have compiled the module and
that modprobe is functioning. Beware that umount will not deallocate
@@ -33,21 +36,21 @@
losetup(8). Read losetup(8) manpage for more info.
To create the BFS image under UnixWare you need to find out first which
-slice contains it. The command prtvtoc(1M) is your friend:
+slice contains it. The command prtvtoc(1M) is your friend::
-# prtvtoc /dev/rdsk/c0b0t0d0s0
+ # prtvtoc /dev/rdsk/c0b0t0d0s0
(assuming your root disk is on target=0, lun=0, bus=0, controller=0). Then you
look for the slice with tag "STAND", which is usually slice 10. With this
-information you can use dd(1) to create the BFS image:
+information you can use dd(1) to create the BFS image::
-# umount /stand
-# dd if=/dev/rdsk/c0b0t0d0sa of=stand.img bs=512
+ # umount /stand
+ # dd if=/dev/rdsk/c0b0t0d0sa of=stand.img bs=512
Just in case, you can verify that you have done the right thing by checking
-the magic number:
+the magic number::
-# od -Ad -tx4 stand.img | more
+ # od -Ad -tx4 stand.img | more
The first 4 bytes should be 0x1badface.
diff --git a/Documentation/filesystems/btrfs.txt b/Documentation/filesystems/btrfs.rst
similarity index 95%
rename from Documentation/filesystems/btrfs.txt
rename to Documentation/filesystems/btrfs.rst
index f9dad22..d0904f6 100644
--- a/Documentation/filesystems/btrfs.txt
+++ b/Documentation/filesystems/btrfs.rst
@@ -1,3 +1,6 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=====
BTRFS
=====
diff --git a/Documentation/filesystems/ceph.txt b/Documentation/filesystems/ceph.rst
similarity index 90%
rename from Documentation/filesystems/ceph.txt
rename to Documentation/filesystems/ceph.rst
index b19b6a0..b46a721 100644
--- a/Documentation/filesystems/ceph.txt
+++ b/Documentation/filesystems/ceph.rst
@@ -1,3 +1,6 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+============================
Ceph Distributed File System
============================
@@ -15,6 +18,7 @@
* Easy deployment: most FS components are userspace daemons
Also,
+
* Flexible snapshots (on any directory)
* Recursive accounting (nested files, directories, bytes)
@@ -63,7 +67,7 @@
Finally, Ceph also allows quotas to be set on any directory in the system.
The quota can restrict the number of bytes or the number of files stored
beneath that point in the directory hierarchy. Quotas can be set using
-extended attributes 'ceph.quota.max_files' and 'ceph.quota.max_bytes', eg:
+extended attributes 'ceph.quota.max_files' and 'ceph.quota.max_bytes', eg::
setfattr -n ceph.quota.max_bytes -v 100000000 /some/dir
getfattr -n ceph.quota.max_bytes /some/dir
@@ -76,7 +80,7 @@
Mount Syntax
============
-The basic mount syntax is:
+The basic mount syntax is::
# mount -t ceph monip[:port][,monip2[:port]...]:/[subdir] mnt
@@ -84,7 +88,7 @@
full list when it connects. (However, if the monitor you specify
happens to be down, the mount won't succeed.) The port can be left
off if the monitor is using the default. So if the monitor is at
-1.2.3.4,
+1.2.3.4::
# mount -t ceph 1.2.3.4:/ /mnt/ceph
@@ -163,14 +167,14 @@
available modes are "no" and "clean". The default is "no".
* no: never attempt to reconnect when client detects that it has been
- blacklisted. Operations will generally fail after being blacklisted.
+ blacklisted. Operations will generally fail after being blacklisted.
* clean: client reconnects to the ceph cluster automatically when it
- detects that it has been blacklisted. During reconnect, client drops
- dirty data/metadata, invalidates page caches and writable file handles.
- After reconnect, file locks become stale because the MDS loses track
- of them. If an inode contains any stale file locks, read/write on the
- inode is not allowed until applications release all stale file locks.
+ detects that it has been blacklisted. During reconnect, client drops
+ dirty data/metadata, invalidates page caches and writable file handles.
+ After reconnect, file locks become stale because the MDS loses track
+ of them. If an inode contains any stale file locks, read/write on the
+ inode is not allowed until applications release all stale file locks.
More Information
================
@@ -179,8 +183,8 @@
https://ceph.com/
The Linux kernel client source tree is available at
- https://github.com/ceph/ceph-client.git
- git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client.git
+ - https://github.com/ceph/ceph-client.git
+ - git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client.git
and the source for the full system is at
https://github.com/ceph/ceph.git
diff --git a/Documentation/filesystems/cifs/cifsroot.txt b/Documentation/filesystems/cifs/cifsroot.txt
index 0fa1a2c..947b7ec 100644
--- a/Documentation/filesystems/cifs/cifsroot.txt
+++ b/Documentation/filesystems/cifs/cifsroot.txt
@@ -13,7 +13,7 @@
In order to mount, the network stack will also need to be set up by
using 'ip=' config option. For more details, see
-Documentation/filesystems/nfs/nfsroot.txt.
+Documentation/admin-guide/nfs/nfsroot.rst.
A CIFS root mount currently requires the use of SMB1+UNIX Extensions
which is only supported by the Samba server. SMB1 is the older
diff --git a/Documentation/filesystems/cramfs.txt b/Documentation/filesystems/cramfs.rst
similarity index 88%
rename from Documentation/filesystems/cramfs.txt
rename to Documentation/filesystems/cramfs.rst
index 8e19a53..afbdbde 100644
--- a/Documentation/filesystems/cramfs.txt
+++ b/Documentation/filesystems/cramfs.rst
@@ -1,12 +1,15 @@
+.. SPDX-License-Identifier: GPL-2.0
- Cramfs - cram a filesystem onto a small ROM
+===========================================
+Cramfs - cram a filesystem onto a small ROM
+===========================================
-cramfs is designed to be simple and small, and to compress things well.
+cramfs is designed to be simple and small, and to compress things well.
It uses the zlib routines to compress a file one page at a time, and
allows random page access. The meta-data is not compressed, but is
expressed in a very terse representation to make it use much less
-diskspace than traditional filesystems.
+diskspace than traditional filesystems.
You can't write to a cramfs filesystem (making it compressible and
compact also makes it _very_ hard to update on-the-fly), so you have to
@@ -28,9 +31,9 @@
Hard links are supported, but hard linked files
will still have a link count of 1 in the cramfs image.
-Cramfs directories have no `.' or `..' entries. Directories (like
+Cramfs directories have no ``.`` or ``..`` entries. Directories (like
every other file on cramfs) always have a link count of 1. (There's
-no need to use -noleaf in `find', btw.)
+no need to use -noleaf in ``find``, btw.)
No timestamps are stored in a cramfs, so these default to the epoch
(1970 GMT). Recently-accessed files may have updated timestamps, but
@@ -70,9 +73,9 @@
(Flash device in physical memory map). MTD partitions based on such devices
are fine too. Then that device should be specified with the "mtd:" prefix
as the mount device argument. For example, to mount the MTD device named
-"fs_partition" on the /mnt directory:
+"fs_partition" on the /mnt directory::
-$ mount -t cramfs mtd:fs_partition /mnt
+ $ mount -t cramfs mtd:fs_partition /mnt
To boot a kernel with this as root filesystem, suffice to specify
something like "root=mtd:fs_partition" on the kernel command line.
@@ -90,6 +93,7 @@
For /usr/share/magic
--------------------
+===== ======================= =======================
0 ulelong 0x28cd3d45 Linux cramfs offset 0
>4 ulelong x size %d
>8 ulelong x flags 0x%x
@@ -110,6 +114,7 @@
>552 ulelong x fsid.blocks %d
>556 ulelong x fsid.files %d
>560 string >\0 name "%.16s"
+===== ======================= =======================
Hacker Notes
diff --git a/Documentation/filesystems/debugfs.txt b/Documentation/filesystems/debugfs.rst
similarity index 90%
rename from Documentation/filesystems/debugfs.txt
rename to Documentation/filesystems/debugfs.rst
index dc497b9..80f332b 100644
--- a/Documentation/filesystems/debugfs.txt
+++ b/Documentation/filesystems/debugfs.rst
@@ -1,4 +1,11 @@
-Copyright 2009 Jonathan Corbet <corbet@lwn.net>
+.. SPDX-License-Identifier: GPL-2.0
+.. include:: <isonum.txt>
+
+=======
+DebugFS
+=======
+
+Copyright |copy| 2009 Jonathan Corbet <corbet@lwn.net>
Debugfs exists as a simple way for kernel developers to make information
available to user space. Unlike /proc, which is only meant for information
@@ -6,11 +13,11 @@
debugfs has no rules at all. Developers can put any information they want
there. The debugfs filesystem is also intended to not serve as a stable
ABI to user space; in theory, there are no stability constraints placed on
-files exported there. The real world is not always so simple, though [1];
+files exported there. The real world is not always so simple, though [1]_;
even debugfs interfaces are best designed with the idea that they will need
to be maintained forever.
-Debugfs is typically mounted with a command like:
+Debugfs is typically mounted with a command like::
mount -t debugfs none /sys/kernel/debug
@@ -23,7 +30,7 @@
Code using debugfs should include <linux/debugfs.h>. Then, the first order
of business will be to create at least one directory to hold a set of
-debugfs files:
+debugfs files::
struct dentry *debugfs_create_dir(const char *name, struct dentry *parent);
@@ -36,7 +43,7 @@
indication that the kernel has been built without debugfs support and none
of the functions described below will work.
-The most general way to create a file within a debugfs directory is with:
+The most general way to create a file within a debugfs directory is with::
struct dentry *debugfs_create_file(const char *name, umode_t mode,
struct dentry *parent, void *data,
@@ -53,7 +60,7 @@
missing.
Create a file with an initial size, the following function can be used
-instead:
+instead::
struct dentry *debugfs_create_file_size(const char *name, umode_t mode,
struct dentry *parent, void *data,
@@ -66,7 +73,7 @@
In a number of cases, the creation of a set of file operations is not
actually necessary; the debugfs code provides a number of helper functions
for simple situations. Files containing a single integer value can be
-created with any of:
+created with any of::
void debugfs_create_u8(const char *name, umode_t mode,
struct dentry *parent, u8 *value);
@@ -80,7 +87,7 @@
These files support both reading and writing the given value; if a specific
file should not be written to, simply set the mode bits accordingly. The
values in these files are in decimal; if hexadecimal is more appropriate,
-the following functions can be used instead:
+the following functions can be used instead::
void debugfs_create_x8(const char *name, umode_t mode,
struct dentry *parent, u8 *value);
@@ -94,7 +101,7 @@
These functions are useful as long as the developer knows the size of the
value to be exported. Some types can have different widths on different
architectures, though, complicating the situation somewhat. There are
-functions meant to help out in such special cases:
+functions meant to help out in such special cases::
void debugfs_create_size_t(const char *name, umode_t mode,
struct dentry *parent, size_t *value);
@@ -103,7 +110,7 @@
a variable of type size_t.
Similarly, there are helpers for variables of type unsigned long, in decimal
-and hexadecimal:
+and hexadecimal::
struct dentry *debugfs_create_ulong(const char *name, umode_t mode,
struct dentry *parent,
@@ -111,7 +118,7 @@
void debugfs_create_xul(const char *name, umode_t mode,
struct dentry *parent, unsigned long *value);
-Boolean values can be placed in debugfs with:
+Boolean values can be placed in debugfs with::
struct dentry *debugfs_create_bool(const char *name, umode_t mode,
struct dentry *parent, bool *value);
@@ -120,7 +127,7 @@
N, followed by a newline. If written to, it will accept either upper- or
lower-case values, or 1 or 0. Any other input will be silently ignored.
-Also, atomic_t values can be placed in debugfs with:
+Also, atomic_t values can be placed in debugfs with::
void debugfs_create_atomic_t(const char *name, umode_t mode,
struct dentry *parent, atomic_t *value)
@@ -129,7 +136,7 @@
will set atomic_t values.
Another option is exporting a block of arbitrary binary data, with
-this structure and function:
+this structure and function::
struct debugfs_blob_wrapper {
void *data;
@@ -151,7 +158,7 @@
often during development, even if little such code reaches mainline.
Debugfs offers two functions: one to make a registers-only file, and
another to insert a register block in the middle of another sequential
-file.
+file::
struct debugfs_reg32 {
char *name;
@@ -164,9 +171,9 @@
void __iomem *base;
};
- struct dentry *debugfs_create_regset32(const char *name, umode_t mode,
- struct dentry *parent,
- struct debugfs_regset32 *regset);
+ debugfs_create_regset32(const char *name, umode_t mode,
+ struct dentry *parent,
+ struct debugfs_regset32 *regset);
void debugfs_print_regs32(struct seq_file *s, struct debugfs_reg32 *regs,
int nregs, void __iomem *base, char *prefix);
@@ -175,7 +182,7 @@
using __stringify, and a number of register names (macros) are actually
byte offsets over a base for the register block.
-If you want to dump an u32 array in debugfs, you can create file with:
+If you want to dump an u32 array in debugfs, you can create file with::
void debugfs_create_u32_array(const char *name, umode_t mode,
struct dentry *parent,
@@ -185,7 +192,7 @@
the number of elements in the array. Note: Once array is created its
size can not be changed.
-There is a helper function to create device related seq_file:
+There is a helper function to create device related seq_file::
struct dentry *debugfs_create_devm_seqfile(struct device *dev,
const char *name,
@@ -197,14 +204,14 @@
the "read_fn" is a function pointer which to be called to print the
seq_file content.
-There are a couple of other directory-oriented helper functions:
+There are a couple of other directory-oriented helper functions::
- struct dentry *debugfs_rename(struct dentry *old_dir,
+ struct dentry *debugfs_rename(struct dentry *old_dir,
struct dentry *old_dentry,
- struct dentry *new_dir,
+ struct dentry *new_dir,
const char *new_name);
- struct dentry *debugfs_create_symlink(const char *name,
+ struct dentry *debugfs_create_symlink(const char *name,
struct dentry *parent,
const char *target);
@@ -219,7 +226,7 @@
will be a lot of stale pointers and no end of highly antisocial behavior.
So all debugfs users - at least those which can be built as modules - must
be prepared to remove all files and directories they create there. A file
-can be removed with:
+can be removed with::
void debugfs_remove(struct dentry *dentry);
@@ -229,7 +236,7 @@
Once upon a time, debugfs users were required to remember the dentry
pointer for every debugfs file they created so that all files could be
cleaned up. We live in more civilized times now, though, and debugfs users
-can call:
+can call::
void debugfs_remove_recursive(struct dentry *dentry);
@@ -237,5 +244,4 @@
top-level directory, the entire hierarchy below that directory will be
removed.
-Notes:
- [1] http://lwn.net/Articles/309298/
+.. [1] http://lwn.net/Articles/309298/
diff --git a/Documentation/filesystems/dlmfs.txt b/Documentation/filesystems/dlmfs.rst
similarity index 86%
rename from Documentation/filesystems/dlmfs.txt
rename to Documentation/filesystems/dlmfs.rst
index fcf4d50..68daaa7 100644
--- a/Documentation/filesystems/dlmfs.txt
+++ b/Documentation/filesystems/dlmfs.rst
@@ -1,20 +1,25 @@
-dlmfs
-==================
+.. SPDX-License-Identifier: GPL-2.0
+.. include:: <isonum.txt>
+
+=====
+DLMFS
+=====
+
A minimal DLM userspace interface implemented via a virtual file
system.
dlmfs is built with OCFS2 as it requires most of its infrastructure.
-Project web page: http://ocfs2.wiki.kernel.org
-Tools web page: https://github.com/markfasheh/ocfs2-tools
-OCFS2 mailing lists: http://oss.oracle.com/projects/ocfs2/mailman/
+:Project web page: http://ocfs2.wiki.kernel.org
+:Tools web page: https://github.com/markfasheh/ocfs2-tools
+:OCFS2 mailing lists: http://oss.oracle.com/projects/ocfs2/mailman/
All code copyright 2005 Oracle except when otherwise noted.
-CREDITS
+Credits
=======
-Some code taken from ramfs which is Copyright (C) 2000 Linus Torvalds
+Some code taken from ramfs which is Copyright |copy| 2000 Linus Torvalds
and Transmeta Corp.
Mark Fasheh <mark.fasheh@oracle.com>
@@ -96,14 +101,19 @@
open(2) with O_CREAT to ensure the resource inode is created - dlmfs does
not automatically create inodes for existing lock resources.
+============ ===========================
Open Flag Lock Request Type
---------- -----------------
+============ ===========================
O_RDONLY Shared Read
O_RDWR Exclusive
+============ ===========================
+
+============ ===========================
Open Flag Resulting Locking Behavior
---------- --------------------------
+============ ===========================
O_NONBLOCK Trylock operation
+============ ===========================
You must provide exactly one of O_RDONLY or O_RDWR.
diff --git a/Documentation/filesystems/ecryptfs.txt b/Documentation/filesystems/ecryptfs.rst
similarity index 62%
rename from Documentation/filesystems/ecryptfs.txt
rename to Documentation/filesystems/ecryptfs.rst
index 01d8a08..1f2edef 100644
--- a/Documentation/filesystems/ecryptfs.txt
+++ b/Documentation/filesystems/ecryptfs.rst
@@ -1,14 +1,18 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+======================================================
eCryptfs: A stacked cryptographic filesystem for Linux
+======================================================
eCryptfs is free software. Please see the file COPYING for details.
For documentation, please see the files in the doc/ subdirectory. For
building and installation instructions please see the INSTALL file.
-Maintainer: Phillip Hellewell
-Lead developer: Michael A. Halcrow <mhalcrow@us.ibm.com>
-Developers: Michael C. Thompson
- Kent Yoder
-Web Site: http://ecryptfs.sf.net
+:Maintainer: Phillip Hellewell
+:Lead developer: Michael A. Halcrow <mhalcrow@us.ibm.com>
+:Developers: Michael C. Thompson
+ Kent Yoder
+:Web Site: http://ecryptfs.sf.net
This software is currently undergoing development. Make sure to
maintain a backup copy of any data you write into eCryptfs.
@@ -19,34 +23,36 @@
http://sourceforge.net/projects/ecryptfs/
Userspace requirements include:
- - David Howells' userspace keyring headers and libraries (version
- 1.0 or higher), obtainable from
- http://people.redhat.com/~dhowells/keyutils/
- - Libgcrypt
+
+- David Howells' userspace keyring headers and libraries (version
+ 1.0 or higher), obtainable from
+ http://people.redhat.com/~dhowells/keyutils/
+- Libgcrypt
-NOTES
+.. note::
-In the beta/experimental releases of eCryptfs, when you upgrade
-eCryptfs, you should copy the files to an unencrypted location and
-then copy the files back into the new eCryptfs mount to migrate the
-files.
+ In the beta/experimental releases of eCryptfs, when you upgrade
+ eCryptfs, you should copy the files to an unencrypted location and
+ then copy the files back into the new eCryptfs mount to migrate the
+ files.
-MOUNT-WIDE PASSPHRASE
+Mount-wide Passphrase
+=====================
Create a new directory into which eCryptfs will write its encrypted
files (i.e., /root/crypt). Then, create the mount point directory
-(i.e., /mnt/crypt). Now it's time to mount eCryptfs:
+(i.e., /mnt/crypt). Now it's time to mount eCryptfs::
-mount -t ecryptfs /root/crypt /mnt/crypt
+ mount -t ecryptfs /root/crypt /mnt/crypt
You should be prompted for a passphrase and a salt (the salt may be
blank).
-Try writing a new file:
+Try writing a new file::
-echo "Hello, World" > /mnt/crypt/hello.txt
+ echo "Hello, World" > /mnt/crypt/hello.txt
The operation will complete. Notice that there is a new file in
/root/crypt that is at least 12288 bytes in size (depending on your
@@ -59,10 +65,13 @@
Then umount /mnt/crypt and mount again per the instructions given
above.
-cat /mnt/crypt/hello.txt
+::
+
+ cat /mnt/crypt/hello.txt
-NOTES
+Notes
+=====
eCryptfs version 0.1 should only be mounted on (1) empty directories
or (2) directories containing files only created by eCryptfs. If you
diff --git a/Documentation/filesystems/efivarfs.txt b/Documentation/filesystems/efivarfs.rst
similarity index 85%
rename from Documentation/filesystems/efivarfs.txt
rename to Documentation/filesystems/efivarfs.rst
index 686a64b..90ac656 100644
--- a/Documentation/filesystems/efivarfs.txt
+++ b/Documentation/filesystems/efivarfs.rst
@@ -1,5 +1,8 @@
+.. SPDX-License-Identifier: GPL-2.0
+=======================================
efivarfs - a (U)EFI variable filesystem
+=======================================
The efivarfs filesystem was created to address the shortcomings of
using entries in sysfs to maintain EFI variables. The old sysfs EFI
@@ -11,7 +14,7 @@
Variables can be created, deleted and modified with the efivarfs
filesystem.
-efivarfs is typically mounted like this,
+efivarfs is typically mounted like this::
mount -t efivarfs none /sys/firmware/efi/efivars
diff --git a/Documentation/filesystems/erofs.rst b/Documentation/filesystems/erofs.rst
new file mode 100644
index 0000000..bf14517
--- /dev/null
+++ b/Documentation/filesystems/erofs.rst
@@ -0,0 +1,240 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+======================================
+Enhanced Read-Only File System - EROFS
+======================================
+
+Overview
+========
+
+EROFS file-system stands for Enhanced Read-Only File System. Different
+from other read-only file systems, it aims to be designed for flexibility,
+scalability, but be kept simple and high performance.
+
+It is designed as a better filesystem solution for the following scenarios:
+
+ - read-only storage media or
+
+ - part of a fully trusted read-only solution, which means it needs to be
+ immutable and bit-for-bit identical to the official golden image for
+ their releases due to security and other considerations and
+
+ - hope to save some extra storage space with guaranteed end-to-end performance
+ by using reduced metadata and transparent file compression, especially
+ for those embedded devices with limited memory (ex, smartphone);
+
+Here is the main features of EROFS:
+
+ - Little endian on-disk design;
+
+ - Currently 4KB block size (nobh) and therefore maximum 16TB address space;
+
+ - Metadata & data could be mixed by design;
+
+ - 2 inode versions for different requirements:
+
+ ===================== ============ =====================================
+ compact (v1) extended (v2)
+ ===================== ============ =====================================
+ Inode metadata size 32 bytes 64 bytes
+ Max file size 4 GB 16 EB (also limited by max. vol size)
+ Max uids/gids 65536 4294967296
+ File change time no yes (64 + 32-bit timestamp)
+ Max hardlinks 65536 4294967296
+ Metadata reserved 4 bytes 14 bytes
+ ===================== ============ =====================================
+
+ - Support extended attributes (xattrs) as an option;
+
+ - Support xattr inline and tail-end data inline for all files;
+
+ - Support POSIX.1e ACLs by using xattrs;
+
+ - Support transparent file compression as an option:
+ LZ4 algorithm with 4 KB fixed-sized output compression for high performance.
+
+The following git tree provides the file system user-space tools under
+development (ex, formatting tool mkfs.erofs):
+
+- git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs-utils.git
+
+Bugs and patches are welcome, please kindly help us and send to the following
+linux-erofs mailing list:
+
+- linux-erofs mailing list <linux-erofs@lists.ozlabs.org>
+
+Mount options
+=============
+
+=================== =========================================================
+(no)user_xattr Setup Extended User Attributes. Note: xattr is enabled
+ by default if CONFIG_EROFS_FS_XATTR is selected.
+(no)acl Setup POSIX Access Control List. Note: acl is enabled
+ by default if CONFIG_EROFS_FS_POSIX_ACL is selected.
+cache_strategy=%s Select a strategy for cached decompression from now on:
+
+ ========== =============================================
+ disabled In-place I/O decompression only;
+ readahead Cache the last incomplete compressed physical
+ cluster for further reading. It still does
+ in-place I/O decompression for the rest
+ compressed physical clusters;
+ readaround Cache the both ends of incomplete compressed
+ physical clusters for further reading.
+ It still does in-place I/O decompression
+ for the rest compressed physical clusters.
+ ========== =============================================
+=================== =========================================================
+
+On-disk details
+===============
+
+Summary
+-------
+Different from other read-only file systems, an EROFS volume is designed
+to be as simple as possible::
+
+ |-> aligned with the block size
+ ____________________________________________________________
+ | |SB| | ... | Metadata | ... | Data | Metadata | ... | Data |
+ |_|__|_|_____|__________|_____|______|__________|_____|______|
+ 0 +1K
+
+All data areas should be aligned with the block size, but metadata areas
+may not. All metadatas can be now observed in two different spaces (views):
+
+ 1. Inode metadata space
+
+ Each valid inode should be aligned with an inode slot, which is a fixed
+ value (32 bytes) and designed to be kept in line with compact inode size.
+
+ Each inode can be directly found with the following formula:
+ inode offset = meta_blkaddr * block_size + 32 * nid
+
+ ::
+
+ |-> aligned with 8B
+ |-> followed closely
+ + meta_blkaddr blocks |-> another slot
+ _____________________________________________________________________
+ | ... | inode | xattrs | extents | data inline | ... | inode ...
+ |________|_______|(optional)|(optional)|__(optional)_|_____|__________
+ |-> aligned with the inode slot size
+ . .
+ . .
+ . .
+ . .
+ . .
+ . .
+ .____________________________________________________|-> aligned with 4B
+ | xattr_ibody_header | shared xattrs | inline xattrs |
+ |____________________|_______________|_______________|
+ |-> 12 bytes <-|->x * 4 bytes<-| .
+ . . .
+ . . .
+ . . .
+ ._______________________________.______________________.
+ | id | id | id | id | ... | id | ent | ... | ent| ... |
+ |____|____|____|____|______|____|_____|_____|____|_____|
+ |-> aligned with 4B
+ |-> aligned with 4B
+
+ Inode could be 32 or 64 bytes, which can be distinguished from a common
+ field which all inode versions have -- i_format::
+
+ __________________ __________________
+ | i_format | | i_format |
+ |__________________| |__________________|
+ | ... | | ... |
+ | | | |
+ |__________________| 32 bytes | |
+ | |
+ |__________________| 64 bytes
+
+ Xattrs, extents, data inline are followed by the corresponding inode with
+ proper alignment, and they could be optional for different data mappings.
+ _currently_ total 4 valid data mappings are supported:
+
+ == ====================================================================
+ 0 flat file data without data inline (no extent);
+ 1 fixed-sized output data compression (with non-compacted indexes);
+ 2 flat file data with tail packing data inline (no extent);
+ 3 fixed-sized output data compression (with compacted indexes, v5.3+).
+ == ====================================================================
+
+ The size of the optional xattrs is indicated by i_xattr_count in inode
+ header. Large xattrs or xattrs shared by many different files can be
+ stored in shared xattrs metadata rather than inlined right after inode.
+
+ 2. Shared xattrs metadata space
+
+ Shared xattrs space is similar to the above inode space, started with
+ a specific block indicated by xattr_blkaddr, organized one by one with
+ proper align.
+
+ Each share xattr can also be directly found by the following formula:
+ xattr offset = xattr_blkaddr * block_size + 4 * xattr_id
+
+ ::
+
+ |-> aligned by 4 bytes
+ + xattr_blkaddr blocks |-> aligned with 4 bytes
+ _________________________________________________________________________
+ | ... | xattr_entry | xattr data | ... | xattr_entry | xattr data ...
+ |________|_____________|_____________|_____|______________|_______________
+
+Directories
+-----------
+All directories are now organized in a compact on-disk format. Note that
+each directory block is divided into index and name areas in order to support
+random file lookup, and all directory entries are _strictly_ recorded in
+alphabetical order in order to support improved prefix binary search
+algorithm (could refer to the related source code).
+
+::
+
+ ___________________________
+ / |
+ / ______________|________________
+ / / | nameoff1 | nameoffN-1
+ ____________.______________._______________v________________v__________
+ | dirent | dirent | ... | dirent | filename | filename | ... | filename |
+ |___.0___|____1___|_____|___N-1__|____0_____|____1_____|_____|___N-1____|
+ \ ^
+ \ | * could have
+ \ | trailing '\0'
+ \________________________| nameoff0
+
+ Directory block
+
+Note that apart from the offset of the first filename, nameoff0 also indicates
+the total number of directory entries in this block since it is no need to
+introduce another on-disk field at all.
+
+Compression
+-----------
+Currently, EROFS supports 4KB fixed-sized output transparent file compression,
+as illustrated below::
+
+ |---- Variant-Length Extent ----|-------- VLE --------|----- VLE -----
+ clusterofs clusterofs clusterofs
+ | | | logical data
+ _________v_______________________________v_____________________v_______________
+ ... | . | | . | | . | ...
+ ____|____.________|_____________|________.____|_____________|__.__________|____
+ |-> cluster <-|-> cluster <-|-> cluster <-|-> cluster <-|-> cluster <-|
+ size size size size size
+ . . . .
+ . . . .
+ . . . .
+ _______._____________._____________._____________._____________________
+ ... | | | | ... physical data
+ _______|_____________|_____________|_____________|_____________________
+ |-> cluster <-|-> cluster <-|-> cluster <-|
+ size size size
+
+Currently each on-disk physical cluster can contain 4KB (un)compressed data
+at most. For each logical cluster, there is a corresponding on-disk index to
+describe its cluster type, physical cluster address, etc.
+
+See "struct z_erofs_vle_decompressed_index" in erofs_fs.h for more details.
diff --git a/Documentation/filesystems/erofs.txt b/Documentation/filesystems/erofs.txt
deleted file mode 100644
index db6d39c..0000000
--- a/Documentation/filesystems/erofs.txt
+++ /dev/null
@@ -1,211 +0,0 @@
-Overview
-========
-
-EROFS file-system stands for Enhanced Read-Only File System. Different
-from other read-only file systems, it aims to be designed for flexibility,
-scalability, but be kept simple and high performance.
-
-It is designed as a better filesystem solution for the following scenarios:
- - read-only storage media or
-
- - part of a fully trusted read-only solution, which means it needs to be
- immutable and bit-for-bit identical to the official golden image for
- their releases due to security and other considerations and
-
- - hope to save some extra storage space with guaranteed end-to-end performance
- by using reduced metadata and transparent file compression, especially
- for those embedded devices with limited memory (ex, smartphone);
-
-Here is the main features of EROFS:
- - Little endian on-disk design;
-
- - Currently 4KB block size (nobh) and therefore maximum 16TB address space;
-
- - Metadata & data could be mixed by design;
-
- - 2 inode versions for different requirements:
- compact (v1) extended (v2)
- Inode metadata size: 32 bytes 64 bytes
- Max file size: 4 GB 16 EB (also limited by max. vol size)
- Max uids/gids: 65536 4294967296
- File change time: no yes (64 + 32-bit timestamp)
- Max hardlinks: 65536 4294967296
- Metadata reserved: 4 bytes 14 bytes
-
- - Support extended attributes (xattrs) as an option;
-
- - Support xattr inline and tail-end data inline for all files;
-
- - Support POSIX.1e ACLs by using xattrs;
-
- - Support transparent file compression as an option:
- LZ4 algorithm with 4 KB fixed-sized output compression for high performance.
-
-The following git tree provides the file system user-space tools under
-development (ex, formatting tool mkfs.erofs):
->> git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs-utils.git
-
-Bugs and patches are welcome, please kindly help us and send to the following
-linux-erofs mailing list:
->> linux-erofs mailing list <linux-erofs@lists.ozlabs.org>
-
-Mount options
-=============
-
-(no)user_xattr Setup Extended User Attributes. Note: xattr is enabled
- by default if CONFIG_EROFS_FS_XATTR is selected.
-(no)acl Setup POSIX Access Control List. Note: acl is enabled
- by default if CONFIG_EROFS_FS_POSIX_ACL is selected.
-cache_strategy=%s Select a strategy for cached decompression from now on:
- disabled: In-place I/O decompression only;
- readahead: Cache the last incomplete compressed physical
- cluster for further reading. It still does
- in-place I/O decompression for the rest
- compressed physical clusters;
- readaround: Cache the both ends of incomplete compressed
- physical clusters for further reading.
- It still does in-place I/O decompression
- for the rest compressed physical clusters.
-
-On-disk details
-===============
-
-Summary
--------
-Different from other read-only file systems, an EROFS volume is designed
-to be as simple as possible:
-
- |-> aligned with the block size
- ____________________________________________________________
- | |SB| | ... | Metadata | ... | Data | Metadata | ... | Data |
- |_|__|_|_____|__________|_____|______|__________|_____|______|
- 0 +1K
-
-All data areas should be aligned with the block size, but metadata areas
-may not. All metadatas can be now observed in two different spaces (views):
- 1. Inode metadata space
- Each valid inode should be aligned with an inode slot, which is a fixed
- value (32 bytes) and designed to be kept in line with compact inode size.
-
- Each inode can be directly found with the following formula:
- inode offset = meta_blkaddr * block_size + 32 * nid
-
- |-> aligned with 8B
- |-> followed closely
- + meta_blkaddr blocks |-> another slot
- _____________________________________________________________________
- | ... | inode | xattrs | extents | data inline | ... | inode ...
- |________|_______|(optional)|(optional)|__(optional)_|_____|__________
- |-> aligned with the inode slot size
- . .
- . .
- . .
- . .
- . .
- . .
- .____________________________________________________|-> aligned with 4B
- | xattr_ibody_header | shared xattrs | inline xattrs |
- |____________________|_______________|_______________|
- |-> 12 bytes <-|->x * 4 bytes<-| .
- . . .
- . . .
- . . .
- ._______________________________.______________________.
- | id | id | id | id | ... | id | ent | ... | ent| ... |
- |____|____|____|____|______|____|_____|_____|____|_____|
- |-> aligned with 4B
- |-> aligned with 4B
-
- Inode could be 32 or 64 bytes, which can be distinguished from a common
- field which all inode versions have -- i_format:
-
- __________________ __________________
- | i_format | | i_format |
- |__________________| |__________________|
- | ... | | ... |
- | | | |
- |__________________| 32 bytes | |
- | |
- |__________________| 64 bytes
-
- Xattrs, extents, data inline are followed by the corresponding inode with
- proper alignment, and they could be optional for different data mappings.
- _currently_ total 4 valid data mappings are supported:
-
- 0 flat file data without data inline (no extent);
- 1 fixed-sized output data compression (with non-compacted indexes);
- 2 flat file data with tail packing data inline (no extent);
- 3 fixed-sized output data compression (with compacted indexes, v5.3+).
-
- The size of the optional xattrs is indicated by i_xattr_count in inode
- header. Large xattrs or xattrs shared by many different files can be
- stored in shared xattrs metadata rather than inlined right after inode.
-
- 2. Shared xattrs metadata space
- Shared xattrs space is similar to the above inode space, started with
- a specific block indicated by xattr_blkaddr, organized one by one with
- proper align.
-
- Each share xattr can also be directly found by the following formula:
- xattr offset = xattr_blkaddr * block_size + 4 * xattr_id
-
- |-> aligned by 4 bytes
- + xattr_blkaddr blocks |-> aligned with 4 bytes
- _________________________________________________________________________
- | ... | xattr_entry | xattr data | ... | xattr_entry | xattr data ...
- |________|_____________|_____________|_____|______________|_______________
-
-Directories
------------
-All directories are now organized in a compact on-disk format. Note that
-each directory block is divided into index and name areas in order to support
-random file lookup, and all directory entries are _strictly_ recorded in
-alphabetical order in order to support improved prefix binary search
-algorithm (could refer to the related source code).
-
- ___________________________
- / |
- / ______________|________________
- / / | nameoff1 | nameoffN-1
- ____________.______________._______________v________________v__________
-| dirent | dirent | ... | dirent | filename | filename | ... | filename |
-|___.0___|____1___|_____|___N-1__|____0_____|____1_____|_____|___N-1____|
- \ ^
- \ | * could have
- \ | trailing '\0'
- \________________________| nameoff0
-
- Directory block
-
-Note that apart from the offset of the first filename, nameoff0 also indicates
-the total number of directory entries in this block since it is no need to
-introduce another on-disk field at all.
-
-Compression
------------
-Currently, EROFS supports 4KB fixed-sized output transparent file compression,
-as illustrated below:
-
- |---- Variant-Length Extent ----|-------- VLE --------|----- VLE -----
- clusterofs clusterofs clusterofs
- | | | logical data
-_________v_______________________________v_____________________v_______________
-... | . | | . | | . | ...
-____|____.________|_____________|________.____|_____________|__.__________|____
- |-> cluster <-|-> cluster <-|-> cluster <-|-> cluster <-|-> cluster <-|
- size size size size size
- . . . .
- . . . .
- . . . .
- _______._____________._____________._____________._____________________
- ... | | | | ... physical data
- _______|_____________|_____________|_____________|_____________________
- |-> cluster <-|-> cluster <-|-> cluster <-|
- size size size
-
-Currently each on-disk physical cluster can contain 4KB (un)compressed data
-at most. For each logical cluster, there is a corresponding on-disk index to
-describe its cluster type, physical cluster address, etc.
-
-See "struct z_erofs_vle_decompressed_index" in erofs_fs.h for more details.
-
diff --git a/Documentation/filesystems/ext2.txt b/Documentation/filesystems/ext2.rst
similarity index 91%
rename from Documentation/filesystems/ext2.txt
rename to Documentation/filesystems/ext2.rst
index 94c2cf02..d83dbbb 100644
--- a/Documentation/filesystems/ext2.txt
+++ b/Documentation/filesystems/ext2.rst
@@ -1,3 +1,5 @@
+.. SPDX-License-Identifier: GPL-2.0
+
The Second Extended Filesystem
==============================
@@ -14,8 +16,9 @@
Most defaults are determined by the filesystem superblock, and can be
set using tune2fs(8). Kernel-determined defaults are indicated by (*).
-bsddf (*) Makes `df' act like BSD.
-minixdf Makes `df' act like Minix.
+==================== === ================================================
+bsddf (*) Makes ``df`` act like BSD.
+minixdf Makes ``df`` act like Minix.
check=none, nocheck (*) Don't do extra checking of bitmaps on mount
(check=normal and check=strict options removed)
@@ -62,6 +65,7 @@
grpquota Enable group disk quota support
(requires CONFIG_QUOTA).
+==================== === ================================================
noquota option ls silently ignored by ext2.
@@ -294,9 +298,9 @@
If you're exceptionally paranoid, there are 3 ways of making metadata
writes synchronous on ext2:
-per-file if you have the program source: use the O_SYNC flag to open()
-per-file if you don't have the source: use "chattr +S" on the file
-per-filesystem: add the "sync" option to mount (or in /etc/fstab)
+- per-file if you have the program source: use the O_SYNC flag to open()
+- per-file if you don't have the source: use "chattr +S" on the file
+- per-filesystem: add the "sync" option to mount (or in /etc/fstab)
the first and last are not ext2 specific but do force the metadata to
be written synchronously. See also Journaling below.
@@ -316,10 +320,12 @@
format and using a compatibility flag to signal the format change (at
the expense of some compatibility).
-Filesystem block size: 1kB 2kB 4kB 8kB
-
-File size limit: 16GB 256GB 2048GB 2048GB
-Filesystem size limit: 2047GB 8192GB 16384GB 32768GB
+===================== ======= ======= ======= ========
+Filesystem block size 1kB 2kB 4kB 8kB
+===================== ======= ======= ======= ========
+File size limit 16GB 256GB 2048GB 2048GB
+Filesystem size limit 2047GB 8192GB 16384GB 32768GB
+===================== ======= ======= ======= ========
There is a 2.4 kernel limit of 2048GB for a single block device, so no
filesystem larger than that can be created at this time. There is also
@@ -370,19 +376,24 @@
References
==========
+======================= ===============================================
The kernel source file:/usr/src/linux/fs/ext2/
e2fsprogs (e2fsck) http://e2fsprogs.sourceforge.net/
Design & Implementation http://e2fsprogs.sourceforge.net/ext2intro.html
Journaling (ext3) ftp://ftp.uk.linux.org/pub/linux/sct/fs/jfs/
Filesystem Resizing http://ext2resize.sourceforge.net/
-Compression (*) http://e2compr.sourceforge.net/
+Compression [1]_ http://e2compr.sourceforge.net/
+======================= ===============================================
Implementations for:
-Windows 95/98/NT/2000 http://www.chrysocome.net/explore2fs
-Windows 95 (*) http://www.yipton.net/content.html#FSDEXT2
-DOS client (*) ftp://metalab.unc.edu/pub/Linux/system/filesystems/ext2/
-OS/2 (+) ftp://metalab.unc.edu/pub/Linux/system/filesystems/ext2/
-RISC OS client http://www.esw-heim.tu-clausthal.de/~marco/smorbrod/IscaFS/
-(*) no longer actively developed/supported (as of Apr 2001)
-(+) no longer actively developed/supported (as of Mar 2009)
+======================= ===========================================================
+Windows 95/98/NT/2000 http://www.chrysocome.net/explore2fs
+Windows 95 [1]_ http://www.yipton.net/content.html#FSDEXT2
+DOS client [1]_ ftp://metalab.unc.edu/pub/Linux/system/filesystems/ext2/
+OS/2 [2]_ ftp://metalab.unc.edu/pub/Linux/system/filesystems/ext2/
+RISC OS client http://www.esw-heim.tu-clausthal.de/~marco/smorbrod/IscaFS/
+======================= ===========================================================
+
+.. [1] no longer actively developed/supported (as of Apr 2001)
+.. [2] no longer actively developed/supported (as of Mar 2009)
diff --git a/Documentation/filesystems/ext3.txt b/Documentation/filesystems/ext3.rst
similarity index 88%
rename from Documentation/filesystems/ext3.txt
rename to Documentation/filesystems/ext3.rst
index 58758fb..c06cec3 100644
--- a/Documentation/filesystems/ext3.txt
+++ b/Documentation/filesystems/ext3.rst
@@ -1,4 +1,6 @@
+.. SPDX-License-Identifier: GPL-2.0
+===============
Ext3 Filesystem
===============
diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.rst
similarity index 84%
rename from Documentation/filesystems/f2fs.txt
rename to Documentation/filesystems/f2fs.rst
index 4eb3e2d..d681203 100644
--- a/Documentation/filesystems/f2fs.txt
+++ b/Documentation/filesystems/f2fs.rst
@@ -1,6 +1,8 @@
-================================================================================
+.. SPDX-License-Identifier: GPL-2.0
+
+==========================================
WHAT IS Flash-Friendly File System (F2FS)?
-================================================================================
+==========================================
NAND flash memory-based storage devices, such as SSD, eMMC, and SD cards, have
been equipped on a variety systems ranging from mobile to server systems. Since
@@ -20,14 +22,15 @@
The following git tree provides the file system formatting tool (mkfs.f2fs),
a consistency checking tool (fsck.f2fs), and a debugging tool (dump.f2fs).
->> git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs-tools.git
+
+- git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs-tools.git
For reporting bugs and sending patches, please use the following mailing list:
->> linux-f2fs-devel@lists.sourceforge.net
-================================================================================
-BACKGROUND AND DESIGN ISSUES
-================================================================================
+- linux-f2fs-devel@lists.sourceforge.net
+
+Background and Design issues
+============================
Log-structured File System (LFS)
--------------------------------
@@ -61,6 +64,7 @@
as a cleaning process.
The process consists of three operations as follows.
+
1. A victim segment is selected through referencing segment usage table.
2. It loads parent index structures of all the data in the victim identified by
segment summary blocks.
@@ -71,9 +75,8 @@
is to hide the latencies to users. And also definitely, it should reduce the
amount of valid data to be moved, and move them quickly as well.
-================================================================================
-KEY FEATURES
-================================================================================
+Key Features
+============
Flash Awareness
---------------
@@ -94,10 +97,11 @@
- Support multi-head logs for static/dynamic hot and cold data separation
- Introduce adaptive logging for efficient block allocation
-================================================================================
-MOUNT OPTIONS
-================================================================================
+Mount Options
+=============
+
+====================== ============================================================
background_gc=%s Turn on/off cleaning operations, namely garbage
collection, triggered in background when I/O subsystem is
idle. If background_gc=on, it will turn on the garbage
@@ -167,7 +171,10 @@
fault_type=%d Support configuring fault injection type, should be
enabled with fault_injection option, fault type value
is shown below, it supports single or combined type.
+
+ =================== ===========
Type_Name Type_Value
+ =================== ===========
FAULT_KMALLOC 0x000000001
FAULT_KVMALLOC 0x000000002
FAULT_PAGE_ALLOC 0x000000004
@@ -183,6 +190,7 @@
FAULT_CHECKPOINT 0x000001000
FAULT_DISCARD 0x000002000
FAULT_WRITE_IO 0x000004000
+ =================== ===========
mode=%s Control block allocation mode which supports "adaptive"
and "lfs". In "lfs" mode, there should be no random
writes towards main area.
@@ -219,7 +227,7 @@
non-atomic files likewise "nobarrier" mount option.
test_dummy_encryption Enable dummy encryption, which provides a fake fscrypt
context. The fake fscrypt context is used by xfstests.
-checkpoint=%s[:%u[%]] Set to "disable" to turn off checkpointing. Set to "enable"
+checkpoint=%s[:%u[%]] Set to "disable" to turn off checkpointing. Set to "enable"
to reenable checkpointing. Is enabled by default. While
disabled, any unmounting or unexpected shutdowns will cause
the filesystem contents to appear as they did when the
@@ -246,22 +254,22 @@
on compression extension list and enable compression on
these file by default rather than to enable it via ioctl.
For other files, we can still enable compression via ioctl.
+====================== ============================================================
-================================================================================
-DEBUGFS ENTRIES
-================================================================================
+Debugfs Entries
+===============
/sys/kernel/debug/f2fs/ contains information about all the partitions mounted as
f2fs. Each file shows the whole f2fs information.
/sys/kernel/debug/f2fs/status includes:
+
- major file system information managed by f2fs currently
- average SIT information about whole segments
- current memory footprint consumed by f2fs.
-================================================================================
-SYSFS ENTRIES
-================================================================================
+Sysfs Entries
+=============
Information about mounted f2fs file systems can be found in
/sys/fs/f2fs. Each mounted filesystem will have a directory in
@@ -271,22 +279,24 @@
Files in /sys/fs/f2fs/<devname>
(see also Documentation/ABI/testing/sysfs-fs-f2fs)
-================================================================================
-USAGE
-================================================================================
+Usage
+=====
1. Download userland tools and compile them.
2. Skip, if f2fs was compiled statically inside kernel.
- Otherwise, insert the f2fs.ko module.
- # insmod f2fs.ko
+ Otherwise, insert the f2fs.ko module::
-3. Create a directory trying to mount
- # mkdir /mnt/f2fs
+ # insmod f2fs.ko
-4. Format the block device, and then mount as f2fs
- # mkfs.f2fs -l label /dev/block_device
- # mount -t f2fs /dev/block_device /mnt/f2fs
+3. Create a directory trying to mount::
+
+ # mkdir /mnt/f2fs
+
+4. Format the block device, and then mount as f2fs::
+
+ # mkfs.f2fs -l label /dev/block_device
+ # mount -t f2fs /dev/block_device /mnt/f2fs
mkfs.f2fs
---------
@@ -294,18 +304,26 @@
which builds a basic on-disk layout.
The options consist of:
--l [label] : Give a volume label, up to 512 unicode name.
--a [0 or 1] : Split start location of each area for heap-based allocation.
- 1 is set by default, which performs this.
--o [int] : Set overprovision ratio in percent over volume size.
- 5 is set by default.
--s [int] : Set the number of segments per section.
- 1 is set by default.
--z [int] : Set the number of sections per zone.
- 1 is set by default.
--e [str] : Set basic extension list. e.g. "mp3,gif,mov"
--t [0 or 1] : Disable discard command or not.
- 1 is set by default, which conducts discard.
+
+=============== ===========================================================
+``-l [label]`` Give a volume label, up to 512 unicode name.
+``-a [0 or 1]`` Split start location of each area for heap-based allocation.
+
+ 1 is set by default, which performs this.
+``-o [int]`` Set overprovision ratio in percent over volume size.
+
+ 5 is set by default.
+``-s [int]`` Set the number of segments per section.
+
+ 1 is set by default.
+``-z [int]`` Set the number of sections per zone.
+
+ 1 is set by default.
+``-e [str]`` Set basic extension list. e.g. "mp3,gif,mov"
+``-t [0 or 1]`` Disable discard command or not.
+
+ 1 is set by default, which conducts discard.
+=============== ===========================================================
fsck.f2fs
---------
@@ -314,7 +332,8 @@
are cross-referenced correctly or not.
Note that, initial version of the tool does not fix any inconsistency.
-The options consist of:
+The options consist of::
+
-d debug level [default:0]
dump.f2fs
@@ -327,20 +346,21 @@
able to dump all the SSA and SIT entries into predefined files, ./dump_ssa and
./dump_sit respectively.
-The options consist of:
+The options consist of::
+
-d debug level [default:0]
-i inode no (hex)
-s [SIT dump segno from #1~#2 (decimal), for all 0~-1]
-a [SSA dump segno from #1~#2 (decimal), for all 0~-1]
-Examples:
-# dump.f2fs -i [ino] /dev/sdx
-# dump.f2fs -s 0~-1 /dev/sdx (SIT dump)
-# dump.f2fs -a 0~-1 /dev/sdx (SSA dump)
+Examples::
-================================================================================
-DESIGN
-================================================================================
+ # dump.f2fs -i [ino] /dev/sdx
+ # dump.f2fs -s 0~-1 /dev/sdx (SIT dump)
+ # dump.f2fs -a 0~-1 /dev/sdx (SSA dump)
+
+Design
+======
On-disk Layout
--------------
@@ -351,7 +371,7 @@
segment size identically, but users can easily modify the sizes by mkfs.
F2FS splits the entire volume into six areas, and all the areas except superblock
-consists of multiple segments as described below.
+consists of multiple segments as described below::
align with the zone size <-|
|-> align with the segment size
@@ -373,28 +393,28 @@
|__zone__|
- Superblock (SB)
- : It is located at the beginning of the partition, and there exist two copies
+ It is located at the beginning of the partition, and there exist two copies
to avoid file system crash. It contains basic partition information and some
default parameters of f2fs.
- Checkpoint (CP)
- : It contains file system information, bitmaps for valid NAT/SIT sets, orphan
+ It contains file system information, bitmaps for valid NAT/SIT sets, orphan
inode lists, and summary entries of current active segments.
- Segment Information Table (SIT)
- : It contains segment information such as valid block count and bitmap for the
+ It contains segment information such as valid block count and bitmap for the
validity of all the blocks.
- Node Address Table (NAT)
- : It is composed of a block address table for all the node blocks stored in
+ It is composed of a block address table for all the node blocks stored in
Main area.
- Segment Summary Area (SSA)
- : It contains summary entries which contains the owner information of all the
+ It contains summary entries which contains the owner information of all the
data and node blocks stored in Main area.
- Main Area
- : It contains file and directory data including their indices.
+ It contains file and directory data including their indices.
In order to avoid misalignment between file system and flash-based storage, F2FS
aligns the start block address of CP with the segment size. Also, it aligns the
@@ -414,7 +434,7 @@
mechanism. In addition to CP, NAT and SIT also adopt the shadow copy mechanism.
For file system consistency, each CP points to which NAT and SIT copies are
-valid, as shown as below.
+valid, as shown as below::
+--------+----------+---------+
| CP | SIT | NAT |
@@ -438,7 +458,7 @@
indices, two direct node pointers, two indirect node pointers, and one double
indirect node pointer as described below. One direct node block contains 1018
data blocks, and one indirect node block contains also 1018 node blocks. Thus,
-one inode block (i.e., a file) covers:
+one inode block (i.e., a file) covers::
4KB * (923 + 2 * 1018 + 2 * 1018 * 1018 + 1018 * 1018 * 1018) := 3.94TB.
@@ -473,6 +493,8 @@
used to represent whether each dentry is valid or not. A dentry block occupies
4KB with the following composition.
+::
+
Dentry Block(4 K) = bitmap (27 bytes) + reserved (3 bytes) +
dentries(11 * 214 bytes) + file name (8 * 214 bytes)
@@ -498,23 +520,25 @@
a hash table with dedicated number of hash buckets as shown below. Note that
"A(2B)" means a bucket includes 2 data blocks.
-----------------------
-A : bucket
-B : block
-N : MAX_DIR_HASH_DEPTH
-----------------------
+::
-level #0 | A(2B)
- |
-level #1 | A(2B) - A(2B)
- |
-level #2 | A(2B) - A(2B) - A(2B) - A(2B)
- . | . . . .
-level #N/2 | A(2B) - A(2B) - A(2B) - A(2B) - A(2B) - ... - A(2B)
- . | . . . .
-level #N | A(4B) - A(4B) - A(4B) - A(4B) - A(4B) - ... - A(4B)
+ ----------------------
+ A : bucket
+ B : block
+ N : MAX_DIR_HASH_DEPTH
+ ----------------------
-The number of blocks and buckets are determined by,
+ level #0 | A(2B)
+ |
+ level #1 | A(2B) - A(2B)
+ |
+ level #2 | A(2B) - A(2B) - A(2B) - A(2B)
+ . | . . . .
+ level #N/2 | A(2B) - A(2B) - A(2B) - A(2B) - A(2B) - ... - A(2B)
+ . | . . . .
+ level #N | A(4B) - A(4B) - A(4B) - A(4B) - A(4B) - ... - A(4B)
+
+The number of blocks and buckets are determined by::
,- 2, if n < MAX_DIR_HASH_DEPTH / 2,
# of blocks in level #n = |
@@ -532,7 +556,7 @@
scans the next hash table in level #1. In this way, F2FS scans hash tables in
each levels incrementally from 1 to N. In each levels F2FS needs to scan only
one bucket determined by the following equation, which shows O(log(# of files))
-complexity.
+complexity::
bucket number to scan in level #n = (hash value) % (# of buckets in level #n)
@@ -540,7 +564,8 @@
file name. F2FS searches the empty slots in the hash tables of whole levels from
1 to N in the same way as the lookup operation.
-The following figure shows an example of two cases holding children.
+The following figure shows an example of two cases holding children::
+
--------------> Dir <--------------
| |
child child
@@ -611,14 +636,15 @@
2) whint_mode=user-based. F2FS tries to pass down hints given by
users.
+===================== ======================== ===================
User F2FS Block
----- ---- -----
+===================== ======================== ===================
META WRITE_LIFE_NOT_SET
HOT_NODE "
WARM_NODE "
COLD_NODE "
-*ioctl(COLD) COLD_DATA WRITE_LIFE_EXTREME
-*extension list " "
+ioctl(COLD) COLD_DATA WRITE_LIFE_EXTREME
+extension list " "
-- buffered io
WRITE_LIFE_EXTREME COLD_DATA WRITE_LIFE_EXTREME
@@ -635,11 +661,13 @@
WRITE_LIFE_NONE " WRITE_LIFE_NONE
WRITE_LIFE_MEDIUM " WRITE_LIFE_MEDIUM
WRITE_LIFE_LONG " WRITE_LIFE_LONG
+===================== ======================== ===================
3) whint_mode=fs-based. F2FS passes down hints with its policy.
+===================== ======================== ===================
User F2FS Block
----- ---- -----
+===================== ======================== ===================
META WRITE_LIFE_MEDIUM;
HOT_NODE WRITE_LIFE_NOT_SET
WARM_NODE "
@@ -662,6 +690,7 @@
WRITE_LIFE_NONE " WRITE_LIFE_NONE
WRITE_LIFE_MEDIUM " WRITE_LIFE_MEDIUM
WRITE_LIFE_LONG " WRITE_LIFE_LONG
+===================== ======================== ===================
Fallocate(2) Policy
-------------------
@@ -681,6 +710,7 @@
However, once F2FS receives ioctl(fd, F2FS_IOC_SET_PIN_FILE) in prior to
fallocate(fd, DEFAULT_MODE), it allocates on-disk blocks addressess having
zero or random data, which is useful to the below scenario where:
+
1. create(fd)
2. ioctl(fd, F2FS_IOC_SET_PIN_FILE)
3. fallocate(fd, 0, 0, size)
@@ -692,39 +722,41 @@
--------------------------
- New term named cluster is defined as basic unit of compression, file can
-be divided into multiple clusters logically. One cluster includes 4 << n
-(n >= 0) logical pages, compression size is also cluster size, each of
-cluster can be compressed or not.
+ be divided into multiple clusters logically. One cluster includes 4 << n
+ (n >= 0) logical pages, compression size is also cluster size, each of
+ cluster can be compressed or not.
- In cluster metadata layout, one special block address is used to indicate
-cluster is compressed one or normal one, for compressed cluster, following
-metadata maps cluster to [1, 4 << n - 1] physical blocks, in where f2fs
-stores data including compress header and compressed data.
+ cluster is compressed one or normal one, for compressed cluster, following
+ metadata maps cluster to [1, 4 << n - 1] physical blocks, in where f2fs
+ stores data including compress header and compressed data.
- In order to eliminate write amplification during overwrite, F2FS only
-support compression on write-once file, data can be compressed only when
-all logical blocks in file are valid and cluster compress ratio is lower
-than specified threshold.
+ support compression on write-once file, data can be compressed only when
+ all logical blocks in file are valid and cluster compress ratio is lower
+ than specified threshold.
- To enable compression on regular inode, there are three ways:
-* chattr +c file
-* chattr +c dir; touch dir/file
-* mount w/ -o compress_extension=ext; touch file.ext
-Compress metadata layout:
- [Dnode Structure]
- +-----------------------------------------------+
- | cluster 1 | cluster 2 | ......... | cluster N |
- +-----------------------------------------------+
- . . . .
- . . . .
- . Compressed Cluster . . Normal Cluster .
-+----------+---------+---------+---------+ +---------+---------+---------+---------+
-|compr flag| block 1 | block 2 | block 3 | | block 1 | block 2 | block 3 | block 4 |
-+----------+---------+---------+---------+ +---------+---------+---------+---------+
- . .
- . .
- . .
- +-------------+-------------+----------+----------------------------+
- | data length | data chksum | reserved | compressed data |
- +-------------+-------------+----------+----------------------------+
+ * chattr +c file
+ * chattr +c dir; touch dir/file
+ * mount w/ -o compress_extension=ext; touch file.ext
+
+Compress metadata layout::
+
+ [Dnode Structure]
+ +-----------------------------------------------+
+ | cluster 1 | cluster 2 | ......... | cluster N |
+ +-----------------------------------------------+
+ . . . .
+ . . . .
+ . Compressed Cluster . . Normal Cluster .
+ +----------+---------+---------+---------+ +---------+---------+---------+---------+
+ |compr flag| block 1 | block 2 | block 3 | | block 1 | block 2 | block 3 | block 4 |
+ +----------+---------+---------+---------+ +---------+---------+---------+---------+
+ . .
+ . .
+ . .
+ +-------------+-------------+----------+----------------------------+
+ | data length | data chksum | reserved | compressed data |
+ +-------------+-------------+----------+----------------------------+
diff --git a/Documentation/filesystems/fuse.rst b/Documentation/filesystems/fuse.rst
index 8e45506..cd717f9 100644
--- a/Documentation/filesystems/fuse.rst
+++ b/Documentation/filesystems/fuse.rst
@@ -1,7 +1,8 @@
.. SPDX-License-Identifier: GPL-2.0
-==============
+
+====
FUSE
-==============
+====
Definitions
===========
diff --git a/Documentation/filesystems/gfs2-uevents.txt b/Documentation/filesystems/gfs2-uevents.rst
similarity index 92%
rename from Documentation/filesystems/gfs2-uevents.txt
rename to Documentation/filesystems/gfs2-uevents.rst
index 19a19eb..f162a2c 100644
--- a/Documentation/filesystems/gfs2-uevents.txt
+++ b/Documentation/filesystems/gfs2-uevents.rst
@@ -1,14 +1,18 @@
- uevents and GFS2
- ==================
+.. SPDX-License-Identifier: GPL-2.0
+
+================
+uevents and GFS2
+================
During the lifetime of a GFS2 mount, a number of uevents are generated.
This document explains what the events are and what they are used
for (by gfs_controld in gfs2-utils).
A list of GFS2 uevents
------------------------
+======================
1. ADD
+------
The ADD event occurs at mount time. It will always be the first
uevent generated by the newly created filesystem. If the mount
@@ -21,6 +25,7 @@
of the filesystem respectively.
2. ONLINE
+---------
The ONLINE uevent is generated after a successful mount or remount. It
has the same environment variables as the ADD uevent. The ONLINE
@@ -29,6 +34,7 @@
be generated by older kernels.
3. CHANGE
+---------
The CHANGE uevent is used in two places. One is when reporting the
successful mount of the filesystem by the first node (FIRSTMOUNT=Done).
@@ -52,6 +58,7 @@
uevent for a successful mount or remount.
4. OFFLINE
+----------
The OFFLINE uevent is only generated due to filesystem errors and is used
as part of the "withdraw" mechanism. Currently this doesn't give any
@@ -59,6 +66,7 @@
be fixed.
5. REMOVE
+---------
The REMOVE uevent is generated at the end of an unsuccessful mount
or at the end of a umount of the filesystem. All REMOVE uevents will
@@ -68,9 +76,10 @@
Information common to all GFS2 uevents (uevent environment variables)
-----------------------------------------------------------------------
+=====================================================================
1. LOCKTABLE=
+--------------
The LOCKTABLE is a string, as supplied on the mount command
line (locktable=) or via fstab. It is used as a filesystem label
@@ -78,6 +87,7 @@
able to join the cluster.
2. LOCKPROTO=
+-------------
The LOCKPROTO is a string, and its value depends on what is set
on the mount command line, or via fstab. It will be either
@@ -85,12 +95,14 @@
may be supported.
3. JOURNALID=
+-------------
If a journal is in use by the filesystem (journals are not
assigned for spectator mounts) then this will give the
numeric journal id in all GFS2 uevents.
4. UUID=
+--------
With recent versions of gfs2-utils, mkfs.gfs2 writes a UUID
into the filesystem superblock. If it exists, this will
diff --git a/Documentation/filesystems/gfs2.txt b/Documentation/filesystems/gfs2.rst
similarity index 76%
rename from Documentation/filesystems/gfs2.txt
rename to Documentation/filesystems/gfs2.rst
index cc4f230..8d1ab58 100644
--- a/Documentation/filesystems/gfs2.txt
+++ b/Documentation/filesystems/gfs2.rst
@@ -1,5 +1,8 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==================
Global File System
-------------------
+==================
https://fedorahosted.org/cluster/wiki/HomePage
@@ -14,16 +17,18 @@
GFS uses interchangeable inter-node locking mechanisms, the currently
supported mechanisms are:
- lock_nolock -- allows gfs to be used as a local file system
+ lock_nolock
+ - allows gfs to be used as a local file system
- lock_dlm -- uses a distributed lock manager (dlm) for inter-node locking
- The dlm is found at linux/fs/dlm/
+ lock_dlm
+ - uses a distributed lock manager (dlm) for inter-node locking.
+ The dlm is found at linux/fs/dlm/
Lock_dlm depends on user space cluster management systems found
at the URL above.
To use gfs as a local file system, no external clustering systems are
-needed, simply:
+needed, simply::
$ mkfs -t gfs2 -p lock_nolock -j 1 /dev/block_device
$ mount -t gfs2 /dev/block_device /dir
@@ -37,9 +42,12 @@
is pretty close.
The following man pages can be found at the URL above:
+
+ ============ =============================================
fsck.gfs2 to repair a filesystem
gfs2_grow to expand a filesystem online
gfs2_jadd to add journals to a filesystem online
tunegfs2 to manipulate, examine and tune a filesystem
- gfs2_convert to convert a gfs filesystem to gfs2 in-place
+ gfs2_convert to convert a gfs filesystem to gfs2 in-place
mkfs.gfs2 to make a filesystem
+ ============ =============================================
diff --git a/Documentation/filesystems/hfs.txt b/Documentation/filesystems/hfs.rst
similarity index 79%
rename from Documentation/filesystems/hfs.txt
rename to Documentation/filesystems/hfs.rst
index d096df6..ab17a00 100644
--- a/Documentation/filesystems/hfs.txt
+++ b/Documentation/filesystems/hfs.rst
@@ -1,11 +1,16 @@
-Note: This filesystem doesn't have a maintainer.
+.. SPDX-License-Identifier: GPL-2.0
+==================================
Macintosh HFS Filesystem for Linux
==================================
-HFS stands for ``Hierarchical File System'' and is the filesystem used
+
+.. Note:: This filesystem doesn't have a maintainer.
+
+
+HFS stands for ``Hierarchical File System`` and is the filesystem used
by the Mac Plus and all later Macintosh models. Earlier Macintosh
-models used MFS (``Macintosh File System''), which is not supported,
+models used MFS (``Macintosh File System``), which is not supported,
MacOS 8.1 and newer support a filesystem called HFS+ that's similar to
HFS but is extended in various areas. Use the hfsplus filesystem driver
to access such filesystems from Linux.
@@ -49,25 +54,25 @@
HFS is not a UNIX filesystem, thus it does not have the usual features you'd
expect:
- o You can't modify the set-uid, set-gid, sticky or executable bits or the uid
+ * You can't modify the set-uid, set-gid, sticky or executable bits or the uid
and gid of files.
- o You can't create hard- or symlinks, device files, sockets or FIFOs.
+ * You can't create hard- or symlinks, device files, sockets or FIFOs.
HFS does on the other have the concepts of multiple forks per file. These
non-standard forks are represented as hidden additional files in the normal
filesystems namespace which is kind of a cludge and makes the semantics for
the a little strange:
- o You can't create, delete or rename resource forks of files or the
+ * You can't create, delete or rename resource forks of files or the
Finder's metadata.
- o They are however created (with default values), deleted and renamed
+ * They are however created (with default values), deleted and renamed
along with the corresponding data fork or directory.
- o Copying files to a different filesystem will loose those attributes
+ * Copying files to a different filesystem will loose those attributes
that are essential for MacOS to work.
Creating HFS filesystems
-===================================
+========================
The hfsutils package from Robert Leslie contains a program called
hformat that can be used to create HFS filesystem. See
diff --git a/Documentation/filesystems/hfsplus.txt b/Documentation/filesystems/hfsplus.rst
similarity index 95%
rename from Documentation/filesystems/hfsplus.txt
rename to Documentation/filesystems/hfsplus.rst
index 59f7569..f02f4f5 100644
--- a/Documentation/filesystems/hfsplus.txt
+++ b/Documentation/filesystems/hfsplus.rst
@@ -1,4 +1,6 @@
+.. SPDX-License-Identifier: GPL-2.0
+======================================
Macintosh HFSPlus Filesystem for Linux
======================================
diff --git a/Documentation/filesystems/hpfs.txt b/Documentation/filesystems/hpfs.rst
similarity index 66%
rename from Documentation/filesystems/hpfs.txt
rename to Documentation/filesystems/hpfs.rst
index 74630bd..0db1522 100644
--- a/Documentation/filesystems/hpfs.txt
+++ b/Documentation/filesystems/hpfs.rst
@@ -1,13 +1,21 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+====================
Read/Write HPFS 2.09
+====================
+
1998-2004, Mikulas Patocka
-email: mikulas@artax.karlin.mff.cuni.cz
-homepage: http://artax.karlin.mff.cuni.cz/~mikulas/vyplody/hpfs/index-e.cgi
+:email: mikulas@artax.karlin.mff.cuni.cz
+:homepage: http://artax.karlin.mff.cuni.cz/~mikulas/vyplody/hpfs/index-e.cgi
-CREDITS:
+Credits
+=======
Chris Smith, 1993, original read-only HPFS, some code and hpfs structures file
is taken from it
+
Jacques Gelinas, MSDos mmap, Inspired by fs/nfs/mmap.c (Jon Tombs 15 Aug 1993)
+
Werner Almesberger, 1992, 1993, MSDos option parser & CR/LF conversion
Mount options
@@ -50,6 +58,7 @@
File names
+==========
As in OS/2, filenames are case insensitive. However, shell thinks that names
are case sensitive, so for example when you create a file FOO, you can use
@@ -64,6 +73,7 @@
Extended attributes
+===================
On HPFS partitions, OS/2 can associate to each file a special information called
extended attributes. Extended attributes are pairs of (key,value) where key is
@@ -88,6 +98,7 @@
Symlinks
+========
You can do symlinks on HPFS partition, symlinks are achieved by setting extended
attribute named "SYMLINK" with symlink value. Like on ext2, you can chown and
@@ -101,6 +112,7 @@
Codepages
+=========
HPFS can contain several uppercasing tables for several codepages and each
file has a pointer to codepage its name is in. However OS/2 was created in
@@ -128,6 +140,7 @@
Known bugs
+==========
HPFS386 on OS/2 server is not supported. HPFS386 installed on normal OS/2 client
should work. If you have OS/2 server, use only read-only mode. I don't know how
@@ -152,7 +165,8 @@
to delete other files that are leaf (probability that the file is non-leaf is
about 1/50) or to truncate file first to make some space.
You encounter this problem only if you have many directories so that
-preallocated directory band is full i.e.
+preallocated directory band is full i.e.::
+
number_of_directories / size_of_filesystem_in_mb > 4.
You can't delete open directories.
@@ -174,6 +188,7 @@
What does "unbalanced tree" message mean?
+=========================================
Old versions of this driver created sometimes unbalanced dnode trees. OS/2
chkdsk doesn't scream if the tree is unbalanced (and sometimes creates
@@ -187,6 +202,7 @@
Bugs in OS/2
+============
When you have two (or more) lost directories pointing each to other, chkdsk
locks up when repairing filesystem.
@@ -199,98 +215,139 @@
marks them as short (and writes "minor fs error corrected"). This bug is not in
HPFS386.
-Codepage bugs described above.
+Codepage bugs described above
+=============================
If you don't install fixpacks, there are many, many more...
History
+=======
-0.90 First public release
-0.91 Fixed bug that caused shooting to memory when write_inode was called on
- open inode (rarely happened)
-0.92 Fixed a little memory leak in freeing directory inodes
-0.93 Fixed bug that locked up the machine when there were too many filenames
- with first 15 characters same
- Fixed write_file to zero file when writing behind file end
-0.94 Fixed a little memory leak when trying to delete busy file or directory
-0.95 Fixed a bug that i_hpfs_parent_dir was not updated when moving files
-1.90 First version for 2.1.1xx kernels
-1.91 Fixed a bug that chk_sectors failed when sectors were at the end of disk
- Fixed a race-condition when write_inode is called while deleting file
- Fixed a bug that could possibly happen (with very low probability) when
- using 0xff in filenames
- Rewritten locking to avoid race-conditions
- Mount option 'eas' now works
- Fsync no longer returns error
- Files beginning with '.' are marked hidden
- Remount support added
- Alloc is not so slow when filesystem becomes full
- Atimes are no more updated because it slows down operation
- Code cleanup (removed all commented debug prints)
-1.92 Corrected a bug when sync was called just before closing file
-1.93 Modified, so that it works with kernels >= 2.1.131, I don't know if it
- works with previous versions
- Fixed a possible problem with disks > 64G (but I don't have one, so I can't
- test it)
- Fixed a file overflow at 2G
- Added new option 'timeshift'
- Changed behaviour on HPFS386: It is now possible to operate on HPFS386 in
- read-only mode
- Fixed a bug that slowed down alloc and prevented allocating 100% space
- (this bug was not destructive)
-1.94 Added workaround for one bug in Linux
- Fixed one buffer leak
- Fixed some incompatibilities with large extended attributes (but it's still
- not 100% ok, I have no info on it and OS/2 doesn't want to create them)
- Rewritten allocation
- Fixed a bug with i_blocks (du sometimes didn't display correct values)
- Directories have no longer archive attribute set (some programs don't like
- it)
- Fixed a bug that it set badly one flag in large anode tree (it was not
- destructive)
-1.95 Fixed one buffer leak, that could happen on corrupted filesystem
- Fixed one bug in allocation in 1.94
-1.96 Added workaround for one bug in OS/2 (HPFS locked up, HPFS386 reported
- error sometimes when opening directories in PMSHELL)
- Fixed a possible bitmap race
- Fixed possible problem on large disks
- You can now delete open files
- Fixed a nondestructive race in rename
-1.97 Support for HPFS v3 (on large partitions)
- Fixed a bug that it didn't allow creation of files > 128M (it should be 2G)
+====== =========================================================================
+0.90 First public release
+0.91 Fixed bug that caused shooting to memory when write_inode was called on
+ open inode (rarely happened)
+0.92 Fixed a little memory leak in freeing directory inodes
+0.93 Fixed bug that locked up the machine when there were too many filenames
+ with first 15 characters same
+ Fixed write_file to zero file when writing behind file end
+0.94 Fixed a little memory leak when trying to delete busy file or directory
+0.95 Fixed a bug that i_hpfs_parent_dir was not updated when moving files
+1.90 First version for 2.1.1xx kernels
+1.91 Fixed a bug that chk_sectors failed when sectors were at the end of disk
+ Fixed a race-condition when write_inode is called while deleting file
+ Fixed a bug that could possibly happen (with very low probability) when
+ using 0xff in filenames.
+
+ Rewritten locking to avoid race-conditions
+
+ Mount option 'eas' now works
+
+ Fsync no longer returns error
+
+ Files beginning with '.' are marked hidden
+
+ Remount support added
+
+ Alloc is not so slow when filesystem becomes full
+
+ Atimes are no more updated because it slows down operation
+
+ Code cleanup (removed all commented debug prints)
+1.92 Corrected a bug when sync was called just before closing file
+1.93 Modified, so that it works with kernels >= 2.1.131, I don't know if it
+ works with previous versions
+
+ Fixed a possible problem with disks > 64G (but I don't have one, so I can't
+ test it)
+
+ Fixed a file overflow at 2G
+
+ Added new option 'timeshift'
+
+ Changed behaviour on HPFS386: It is now possible to operate on HPFS386 in
+ read-only mode
+
+ Fixed a bug that slowed down alloc and prevented allocating 100% space
+ (this bug was not destructive)
+1.94 Added workaround for one bug in Linux
+
+ Fixed one buffer leak
+
+ Fixed some incompatibilities with large extended attributes (but it's still
+ not 100% ok, I have no info on it and OS/2 doesn't want to create them)
+
+ Rewritten allocation
+
+ Fixed a bug with i_blocks (du sometimes didn't display correct values)
+
+ Directories have no longer archive attribute set (some programs don't like
+ it)
+
+ Fixed a bug that it set badly one flag in large anode tree (it was not
+ destructive)
+1.95 Fixed one buffer leak, that could happen on corrupted filesystem
+
+ Fixed one bug in allocation in 1.94
+1.96 Added workaround for one bug in OS/2 (HPFS locked up, HPFS386 reported
+ error sometimes when opening directories in PMSHELL)
+
+ Fixed a possible bitmap race
+
+ Fixed possible problem on large disks
+
+ You can now delete open files
+
+ Fixed a nondestructive race in rename
+1.97 Support for HPFS v3 (on large partitions)
+
+ ZFixed a bug that it didn't allow creation of files > 128M
+ (it should be 2G)
1.97.1 Changed names of global symbols
+
Fixed a bug when chmoding or chowning root directory
-1.98 Fixed a deadlock when using old_readdir
- Better directory handling; workaround for "unbalanced tree" bug in OS/2
-1.99 Corrected a possible problem when there's not enough space while deleting
- file
- Now it tries to truncate the file if there's not enough space when deleting
- Removed a lot of redundant code
-2.00 Fixed a bug in rename (it was there since 1.96)
- Better anti-fragmentation strategy
-2.01 Fixed problem with directory listing over NFS
- Directory lseek now checks for proper parameters
- Fixed race-condition in buffer code - it is in all filesystems in Linux;
- when reading device (cat /dev/hda) while creating files on it, files
- could be damaged
-2.02 Workaround for bug in breada in Linux. breada could cause accesses beyond
- end of partition
-2.03 Char, block devices and pipes are correctly created
- Fixed non-crashing race in unlink (Alexander Viro)
- Now it works with Japanese version of OS/2
-2.04 Fixed error when ftruncate used to extend file
-2.05 Fixed crash when got mount parameters without =
- Fixed crash when allocation of anode failed due to full disk
- Fixed some crashes when block io or inode allocation failed
-2.06 Fixed some crash on corrupted disk structures
- Better allocation strategy
- Reschedule points added so that it doesn't lock CPU long time
- It should work in read-only mode on Warp Server
-2.07 More fixes for Warp Server. Now it really works
-2.08 Creating new files is not so slow on large disks
- An attempt to sync deleted file does not generate filesystem error
-2.09 Fixed error on extremely fragmented files
+1.98 Fixed a deadlock when using old_readdir
+ Better directory handling; workaround for "unbalanced tree" bug in OS/2
+1.99 Corrected a possible problem when there's not enough space while deleting
+ file
+ Now it tries to truncate the file if there's not enough space when
+ deleting
- vim: set textwidth=80:
+ Removed a lot of redundant code
+2.00 Fixed a bug in rename (it was there since 1.96)
+ Better anti-fragmentation strategy
+2.01 Fixed problem with directory listing over NFS
+
+ Directory lseek now checks for proper parameters
+
+ Fixed race-condition in buffer code - it is in all filesystems in Linux;
+ when reading device (cat /dev/hda) while creating files on it, files
+ could be damaged
+2.02 Workaround for bug in breada in Linux. breada could cause accesses beyond
+ end of partition
+2.03 Char, block devices and pipes are correctly created
+
+ Fixed non-crashing race in unlink (Alexander Viro)
+
+ Now it works with Japanese version of OS/2
+2.04 Fixed error when ftruncate used to extend file
+2.05 Fixed crash when got mount parameters without =
+
+ Fixed crash when allocation of anode failed due to full disk
+
+ Fixed some crashes when block io or inode allocation failed
+2.06 Fixed some crash on corrupted disk structures
+
+ Better allocation strategy
+
+ Reschedule points added so that it doesn't lock CPU long time
+
+ It should work in read-only mode on Warp Server
+2.07 More fixes for Warp Server. Now it really works
+2.08 Creating new files is not so slow on large disks
+
+ An attempt to sync deleted file does not generate filesystem error
+2.09 Fixed error on extremely fragmented files
+====== =========================================================================
diff --git a/Documentation/filesystems/index.rst b/Documentation/filesystems/index.rst
index 386eaad..e7b46da 100644
--- a/Documentation/filesystems/index.rst
+++ b/Documentation/filesystems/index.rst
@@ -1,3 +1,5 @@
+.. _filesystems_index:
+
===============================
Filesystems in the Linux kernel
===============================
@@ -46,8 +48,53 @@
.. toctree::
:maxdepth: 2
+ 9p
+ adfs
+ affs
+ afs
autofs
+ autofs-mount-control
+ befs
+ bfs
+ btrfs
+ ceph
+ cramfs
+ debugfs
+ dlmfs
+ ecryptfs
+ efivarfs
+ erofs
+ ext2
+ ext3
+ f2fs
+ gfs2
+ gfs2-uevents
+ hfs
+ hfsplus
+ hpfs
fuse
+ inotify
+ isofs
+ nilfs2
+ nfs/index
+ ntfs
+ ocfs2
+ ocfs2-online-filecheck
+ omfs
+ orangefs
overlayfs
+ proc
+ qnx6
+ ramfs-rootfs-initramfs
+ relay
+ romfs
+ squashfs
+ sysfs
+ sysv-fs
+ tmpfs
+ ubifs
+ ubifs-authentication.rst
+ udf
virtiofs
vfat
+ zonefs
diff --git a/Documentation/filesystems/inotify.txt b/Documentation/filesystems/inotify.rst
similarity index 82%
rename from Documentation/filesystems/inotify.txt
rename to Documentation/filesystems/inotify.rst
index 51f61db..7f7ef8a 100644
--- a/Documentation/filesystems/inotify.txt
+++ b/Documentation/filesystems/inotify.rst
@@ -1,27 +1,36 @@
- inotify
- a powerful yet simple file change notification system
+.. SPDX-License-Identifier: GPL-2.0
+
+===============================================================
+Inotify - A Powerful yet Simple File Change Notification System
+===============================================================
Document started 15 Mar 2005 by Robert Love <rml@novell.com>
+
Document updated 4 Jan 2015 by Zhang Zhen <zhenzhang.zhang@huawei.com>
- --Deleted obsoleted interface, just refer to manpages for user interface.
+
+ - Deleted obsoleted interface, just refer to manpages for user interface.
(i) Rationale
-Q: What is the design decision behind not tying the watch to the open fd of
+Q:
+ What is the design decision behind not tying the watch to the open fd of
the watched object?
-A: Watches are associated with an open inotify device, not an open file.
+A:
+ Watches are associated with an open inotify device, not an open file.
This solves the primary problem with dnotify: keeping the file open pins
the file and thus, worse, pins the mount. Dnotify is therefore infeasible
for use on a desktop system with removable media as the media cannot be
unmounted. Watching a file should not require that it be open.
-Q: What is the design decision behind using an-fd-per-instance as opposed to
+Q:
+ What is the design decision behind using an-fd-per-instance as opposed to
an fd-per-watch?
-A: An fd-per-watch quickly consumes more file descriptors than are allowed,
+A:
+ An fd-per-watch quickly consumes more file descriptors than are allowed,
more fd's than are feasible to manage, and more fd's than are optimally
select()-able. Yes, root can bump the per-process fd limit and yes, users
can use epoll, but requiring both is a silly and extraneous requirement.
@@ -29,8 +38,8 @@
spaces is thus sensible. The current design is what user-space developers
want: Users initialize inotify, once, and add n watches, requiring but one
fd and no twiddling with fd limits. Initializing an inotify instance two
- thousand times is silly. If we can implement user-space's preferences
- cleanly--and we can, the idr layer makes stuff like this trivial--then we
+ thousand times is silly. If we can implement user-space's preferences
+ cleanly--and we can, the idr layer makes stuff like this trivial--then we
should.
There are other good arguments. With a single fd, there is a single
@@ -65,9 +74,11 @@
need not be a one-fd-per-process mapping; it is one-fd-per-queue and a
process can easily want more than one queue.
-Q: Why the system call approach?
+Q:
+ Why the system call approach?
-A: The poor user-space interface is the second biggest problem with dnotify.
+A:
+ The poor user-space interface is the second biggest problem with dnotify.
Signals are a terrible, terrible interface for file notification. Or for
anything, for that matter. The ideal solution, from all perspectives, is a
file descriptor-based one that allows basic file I/O and poll/select.
diff --git a/Documentation/filesystems/isofs.rst b/Documentation/filesystems/isofs.rst
new file mode 100644
index 0000000..08fd469
--- /dev/null
+++ b/Documentation/filesystems/isofs.rst
@@ -0,0 +1,64 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==================
+ISO9660 Filesystem
+==================
+
+Mount options that are the same as for msdos and vfat partitions.
+
+ ========= ========================================================
+ gid=nnn All files in the partition will be in group nnn.
+ uid=nnn All files in the partition will be owned by user id nnn.
+ umask=nnn The permission mask (see umask(1)) for the partition.
+ ========= ========================================================
+
+Mount options that are the same as vfat partitions. These are only useful
+when using discs encoded using Microsoft's Joliet extensions.
+
+ ============== =============================================================
+ iocharset=name Character set to use for converting from Unicode to
+ ASCII. Joliet filenames are stored in Unicode format, but
+ Unix for the most part doesn't know how to deal with Unicode.
+ There is also an option of doing UTF-8 translations with the
+ utf8 option.
+ utf8 Encode Unicode names in UTF-8 format. Default is no.
+ ============== =============================================================
+
+Mount options unique to the isofs filesystem.
+
+ ================= ============================================================
+ block=512 Set the block size for the disk to 512 bytes
+ block=1024 Set the block size for the disk to 1024 bytes
+ block=2048 Set the block size for the disk to 2048 bytes
+ check=relaxed Matches filenames with different cases
+ check=strict Matches only filenames with the exact same case
+ cruft Try to handle badly formatted CDs.
+ map=off Do not map non-Rock Ridge filenames to lower case
+ map=normal Map non-Rock Ridge filenames to lower case
+ map=acorn As map=normal but also apply Acorn extensions if present
+ mode=xxx Sets the permissions on files to xxx unless Rock Ridge
+ extensions set the permissions otherwise
+ dmode=xxx Sets the permissions on directories to xxx unless Rock Ridge
+ extensions set the permissions otherwise
+ overriderockperm Set permissions on files and directories according to
+ 'mode' and 'dmode' even though Rock Ridge extensions are
+ present.
+ nojoliet Ignore Joliet extensions if they are present.
+ norock Ignore Rock Ridge extensions if they are present.
+ hide Completely strip hidden files from the file system.
+ showassoc Show files marked with the 'associated' bit
+ unhide Deprecated; showing hidden files is now default;
+ If given, it is a synonym for 'showassoc' which will
+ recreate previous unhide behavior
+ session=x Select number of session on multisession CD
+ sbsector=xxx Session begins from sector xxx
+ ================= ============================================================
+
+Recommended documents about ISO 9660 standard are located at:
+
+- http://www.y-adagio.com/
+- ftp://ftp.ecma.ch/ecma-st/Ecma-119.pdf
+
+Quoting from the PDF "This 2nd Edition of Standard ECMA-119 is technically
+identical with ISO 9660.", so it is a valid and gratis substitute of the
+official ISO specification.
diff --git a/Documentation/filesystems/isofs.txt b/Documentation/filesystems/isofs.txt
deleted file mode 100644
index ba0a933..0000000
--- a/Documentation/filesystems/isofs.txt
+++ /dev/null
@@ -1,48 +0,0 @@
-Mount options that are the same as for msdos and vfat partitions.
-
- gid=nnn All files in the partition will be in group nnn.
- uid=nnn All files in the partition will be owned by user id nnn.
- umask=nnn The permission mask (see umask(1)) for the partition.
-
-Mount options that are the same as vfat partitions. These are only useful
-when using discs encoded using Microsoft's Joliet extensions.
- iocharset=name Character set to use for converting from Unicode to
- ASCII. Joliet filenames are stored in Unicode format, but
- Unix for the most part doesn't know how to deal with Unicode.
- There is also an option of doing UTF-8 translations with the
- utf8 option.
- utf8 Encode Unicode names in UTF-8 format. Default is no.
-
-Mount options unique to the isofs filesystem.
- block=512 Set the block size for the disk to 512 bytes
- block=1024 Set the block size for the disk to 1024 bytes
- block=2048 Set the block size for the disk to 2048 bytes
- check=relaxed Matches filenames with different cases
- check=strict Matches only filenames with the exact same case
- cruft Try to handle badly formatted CDs.
- map=off Do not map non-Rock Ridge filenames to lower case
- map=normal Map non-Rock Ridge filenames to lower case
- map=acorn As map=normal but also apply Acorn extensions if present
- mode=xxx Sets the permissions on files to xxx unless Rock Ridge
- extensions set the permissions otherwise
- dmode=xxx Sets the permissions on directories to xxx unless Rock Ridge
- extensions set the permissions otherwise
- overriderockperm Set permissions on files and directories according to
- 'mode' and 'dmode' even though Rock Ridge extensions are
- present.
- nojoliet Ignore Joliet extensions if they are present.
- norock Ignore Rock Ridge extensions if they are present.
- hide Completely strip hidden files from the file system.
- showassoc Show files marked with the 'associated' bit
- unhide Deprecated; showing hidden files is now default;
- If given, it is a synonym for 'showassoc' which will
- recreate previous unhide behavior
- session=x Select number of session on multisession CD
- sbsector=xxx Session begins from sector xxx
-
-Recommended documents about ISO 9660 standard are located at:
-http://www.y-adagio.com/
-ftp://ftp.ecma.ch/ecma-st/Ecma-119.pdf
-Quoting from the PDF "This 2nd Edition of Standard ECMA-119 is technically
-identical with ISO 9660.", so it is a valid and gratis substitute of the
-official ISO specification.
diff --git a/Documentation/filesystems/nfs/index.rst b/Documentation/filesystems/nfs/index.rst
new file mode 100644
index 0000000..6580562
--- /dev/null
+++ b/Documentation/filesystems/nfs/index.rst
@@ -0,0 +1,13 @@
+===============================
+NFS
+===============================
+
+
+.. toctree::
+ :maxdepth: 1
+
+ pnfs
+ rpc-cache
+ rpc-server-gss
+ nfs41-server
+ knfsd-stats
diff --git a/Documentation/filesystems/nfs/knfsd-stats.txt b/Documentation/filesystems/nfs/knfsd-stats.rst
similarity index 94%
rename from Documentation/filesystems/nfs/knfsd-stats.txt
rename to Documentation/filesystems/nfs/knfsd-stats.rst
index 1a5d821..80bcf13 100644
--- a/Documentation/filesystems/nfs/knfsd-stats.txt
+++ b/Documentation/filesystems/nfs/knfsd-stats.rst
@@ -1,7 +1,9 @@
-
+============================
Kernel NFS Server Statistics
============================
+:Authors: Greg Banks <gnb@sgi.com> - 26 Mar 2009
+
This document describes the format and semantics of the statistics
which the kernel NFS server makes available to userspace. These
statistics are available in several text form pseudo files, each of
@@ -18,7 +20,7 @@
separated by whitespace.
/proc/fs/nfsd/pool_stats
-------------------------
+========================
This file is available in kernels from 2.6.30 onwards, if the
/proc/fs/nfsd filesystem is mounted (it almost always should be).
@@ -109,15 +111,12 @@
(sockets-enqueued counts this case), or the packet can be temporarily
deferred because the transport is currently being used by an nfsd
thread. This last case is not very interesting and is not explicitly
-counted, but can be inferred from the other counters thus:
+counted, but can be inferred from the other counters thus::
-packets-deferred = packets-arrived - ( sockets-enqueued + threads-woken )
+ packets-deferred = packets-arrived - ( sockets-enqueued + threads-woken )
More
-----
+====
+
Descriptions of the other statistics file should go here.
-
-
-Greg Banks <gnb@sgi.com>
-26 Mar 2009
diff --git a/Documentation/filesystems/nfs/nfs41-server.rst b/Documentation/filesystems/nfs/nfs41-server.rst
new file mode 100644
index 0000000..16b5f02
--- /dev/null
+++ b/Documentation/filesystems/nfs/nfs41-server.rst
@@ -0,0 +1,256 @@
+=============================
+NFSv4.1 Server Implementation
+=============================
+
+Server support for minorversion 1 can be controlled using the
+/proc/fs/nfsd/versions control file. The string output returned
+by reading this file will contain either "+4.1" or "-4.1"
+correspondingly.
+
+Currently, server support for minorversion 1 is enabled by default.
+It can be disabled at run time by writing the string "-4.1" to
+the /proc/fs/nfsd/versions control file. Note that to write this
+control file, the nfsd service must be taken down. You can use rpc.nfsd
+for this; see rpc.nfsd(8).
+
+(Warning: older servers will interpret "+4.1" and "-4.1" as "+4" and
+"-4", respectively. Therefore, code meant to work on both new and old
+kernels must turn 4.1 on or off *before* turning support for version 4
+on or off; rpc.nfsd does this correctly.)
+
+The NFSv4 minorversion 1 (NFSv4.1) implementation in nfsd is based
+on RFC 5661.
+
+From the many new features in NFSv4.1 the current implementation
+focuses on the mandatory-to-implement NFSv4.1 Sessions, providing
+"exactly once" semantics and better control and throttling of the
+resources allocated for each client.
+
+The table below, taken from the NFSv4.1 document, lists
+the operations that are mandatory to implement (REQ), optional
+(OPT), and NFSv4.0 operations that are required not to implement (MNI)
+in minor version 1. The first column indicates the operations that
+are not supported yet by the linux server implementation.
+
+The OPTIONAL features identified and their abbreviations are as follows:
+
+- **pNFS** Parallel NFS
+- **FDELG** File Delegations
+- **DDELG** Directory Delegations
+
+The following abbreviations indicate the linux server implementation status.
+
+- **I** Implemented NFSv4.1 operations.
+- **NS** Not Supported.
+- **NS\*** Unimplemented optional feature.
+
+Operations
+==========
+
++-----------------------+----------------------+---------------------+---------------------------+----------------+
+| Implementation status | Operation | REQ,REC, OPT or NMI | Feature (REQ, REC or OPT) | Definition |
++=======================+======================+=====================+===========================+================+
+| | ACCESS | REQ | | Section 18.1 |
++-----------------------+----------------------+---------------------+---------------------------+----------------+
+| I | BACKCHANNEL_CTL | REQ | | Section 18.33 |
++-----------------------+----------------------+---------------------+---------------------------+----------------+
+| I | BIND_CONN_TO_SESSION | REQ | | Section 18.34 |
++-----------------------+----------------------+---------------------+---------------------------+----------------+
+| | CLOSE | REQ | | Section 18.2 |
++-----------------------+----------------------+---------------------+---------------------------+----------------+
+| | COMMIT | REQ | | Section 18.3 |
++-----------------------+----------------------+---------------------+---------------------------+----------------+
+| | CREATE | REQ | | Section 18.4 |
++-----------------------+----------------------+---------------------+---------------------------+----------------+
+| I | CREATE_SESSION | REQ | | Section 18.36 |
++-----------------------+----------------------+---------------------+---------------------------+----------------+
+| NS* | DELEGPURGE | OPT | FDELG (REQ) | Section 18.5 |
++-----------------------+----------------------+---------------------+---------------------------+----------------+
+| | DELEGRETURN | OPT | FDELG, | Section 18.6 |
++-----------------------+----------------------+---------------------+---------------------------+----------------+
+| | | | DDELG, pNFS | |
++-----------------------+----------------------+---------------------+---------------------------+----------------+
+| | | | (REQ) | |
++-----------------------+----------------------+---------------------+---------------------------+----------------+
+| I | DESTROY_CLIENTID | REQ | | Section 18.50 |
++-----------------------+----------------------+---------------------+---------------------------+----------------+
+| I | DESTROY_SESSION | REQ | | Section 18.37 |
++-----------------------+----------------------+---------------------+---------------------------+----------------+
+| I | EXCHANGE_ID | REQ | | Section 18.35 |
++-----------------------+----------------------+---------------------+---------------------------+----------------+
+| I | FREE_STATEID | REQ | | Section 18.38 |
++-----------------------+----------------------+---------------------+---------------------------+----------------+
+| | GETATTR | REQ | | Section 18.7 |
++-----------------------+----------------------+---------------------+---------------------------+----------------+
+| I | GETDEVICEINFO | OPT | pNFS (REQ) | Section 18.40 |
++-----------------------+----------------------+---------------------+---------------------------+----------------+
+| NS* | GETDEVICELIST | OPT | pNFS (OPT) | Section 18.41 |
++-----------------------+----------------------+---------------------+---------------------------+----------------+
+| | GETFH | REQ | | Section 18.8 |
++-----------------------+----------------------+---------------------+---------------------------+----------------+
+| NS* | GET_DIR_DELEGATION | OPT | DDELG (REQ) | Section 18.39 |
++-----------------------+----------------------+---------------------+---------------------------+----------------+
+| I | LAYOUTCOMMIT | OPT | pNFS (REQ) | Section 18.42 |
++-----------------------+----------------------+---------------------+---------------------------+----------------+
+| I | LAYOUTGET | OPT | pNFS (REQ) | Section 18.43 |
++-----------------------+----------------------+---------------------+---------------------------+----------------+
+| I | LAYOUTRETURN | OPT | pNFS (REQ) | Section 18.44 |
++-----------------------+----------------------+---------------------+---------------------------+----------------+
+| | LINK | OPT | | Section 18.9 |
++-----------------------+----------------------+---------------------+---------------------------+----------------+
+| | LOCK | REQ | | Section 18.10 |
++-----------------------+----------------------+---------------------+---------------------------+----------------+
+| | LOCKT | REQ | | Section 18.11 |
++-----------------------+----------------------+---------------------+---------------------------+----------------+
+| | LOCKU | REQ | | Section 18.12 |
++-----------------------+----------------------+---------------------+---------------------------+----------------+
+| | LOOKUP | REQ | | Section 18.13 |
++-----------------------+----------------------+---------------------+---------------------------+----------------+
+| | LOOKUPP | REQ | | Section 18.14 |
++-----------------------+----------------------+---------------------+---------------------------+----------------+
+| | NVERIFY | REQ | | Section 18.15 |
++-----------------------+----------------------+---------------------+---------------------------+----------------+
+| | OPEN | REQ | | Section 18.16 |
++-----------------------+----------------------+---------------------+---------------------------+----------------+
+| NS* | OPENATTR | OPT | | Section 18.17 |
++-----------------------+----------------------+---------------------+---------------------------+----------------+
+| | OPEN_CONFIRM | MNI | | N/A |
++-----------------------+----------------------+---------------------+---------------------------+----------------+
+| | OPEN_DOWNGRADE | REQ | | Section 18.18 |
++-----------------------+----------------------+---------------------+---------------------------+----------------+
+| | PUTFH | REQ | | Section 18.19 |
++-----------------------+----------------------+---------------------+---------------------------+----------------+
+| | PUTPUBFH | REQ | | Section 18.20 |
++-----------------------+----------------------+---------------------+---------------------------+----------------+
+| | PUTROOTFH | REQ | | Section 18.21 |
++-----------------------+----------------------+---------------------+---------------------------+----------------+
+| | READ | REQ | | Section 18.22 |
++-----------------------+----------------------+---------------------+---------------------------+----------------+
+| | READDIR | REQ | | Section 18.23 |
++-----------------------+----------------------+---------------------+---------------------------+----------------+
+| | READLINK | OPT | | Section 18.24 |
++-----------------------+----------------------+---------------------+---------------------------+----------------+
+| | RECLAIM_COMPLETE | REQ | | Section 18.51 |
++-----------------------+----------------------+---------------------+---------------------------+----------------+
+| | RELEASE_LOCKOWNER | MNI | | N/A |
++-----------------------+----------------------+---------------------+---------------------------+----------------+
+| | REMOVE | REQ | | Section 18.25 |
++-----------------------+----------------------+---------------------+---------------------------+----------------+
+| | RENAME | REQ | | Section 18.26 |
++-----------------------+----------------------+---------------------+---------------------------+----------------+
+| | RENEW | MNI | | N/A |
++-----------------------+----------------------+---------------------+---------------------------+----------------+
+| | RESTOREFH | REQ | | Section 18.27 |
++-----------------------+----------------------+---------------------+---------------------------+----------------+
+| | SAVEFH | REQ | | Section 18.28 |
++-----------------------+----------------------+---------------------+---------------------------+----------------+
+| | SECINFO | REQ | | Section 18.29 |
++-----------------------+----------------------+---------------------+---------------------------+----------------+
+| I | SECINFO_NO_NAME | REC | pNFS files | Section 18.45, |
++-----------------------+----------------------+---------------------+---------------------------+----------------+
+| | | | layout (REQ) | Section 13.12 |
++-----------------------+----------------------+---------------------+---------------------------+----------------+
+| I | SEQUENCE | REQ | | Section 18.46 |
++-----------------------+----------------------+---------------------+---------------------------+----------------+
+| | SETATTR | REQ | | Section 18.30 |
++-----------------------+----------------------+---------------------+---------------------------+----------------+
+| | SETCLIENTID | MNI | | N/A |
++-----------------------+----------------------+---------------------+---------------------------+----------------+
+| | SETCLIENTID_CONFIRM | MNI | | N/A |
++-----------------------+----------------------+---------------------+---------------------------+----------------+
+| NS | SET_SSV | REQ | | Section 18.47 |
++-----------------------+----------------------+---------------------+---------------------------+----------------+
+| I | TEST_STATEID | REQ | | Section 18.48 |
++-----------------------+----------------------+---------------------+---------------------------+----------------+
+| | VERIFY | REQ | | Section 18.31 |
++-----------------------+----------------------+---------------------+---------------------------+----------------+
+| NS* | WANT_DELEGATION | OPT | FDELG (OPT) | Section 18.49 |
++-----------------------+----------------------+---------------------+---------------------------+----------------+
+| | WRITE | REQ | | Section 18.32 |
++-----------------------+----------------------+---------------------+---------------------------+----------------+
+
+
+Callback Operations
+===================
++-----------------------+-------------------------+---------------------+---------------------------+---------------+
+| Implementation status | Operation | REQ,REC, OPT or NMI | Feature (REQ, REC or OPT) | Definition |
++=======================+=========================+=====================+===========================+===============+
+| | CB_GETATTR | OPT | FDELG (REQ) | Section 20.1 |
++-----------------------+-------------------------+---------------------+---------------------------+---------------+
+| I | CB_LAYOUTRECALL | OPT | pNFS (REQ) | Section 20.3 |
++-----------------------+-------------------------+---------------------+---------------------------+---------------+
+| NS* | CB_NOTIFY | OPT | DDELG (REQ) | Section 20.4 |
++-----------------------+-------------------------+---------------------+---------------------------+---------------+
+| NS* | CB_NOTIFY_DEVICEID | OPT | pNFS (OPT) | Section 20.12 |
++-----------------------+-------------------------+---------------------+---------------------------+---------------+
+| NS* | CB_NOTIFY_LOCK | OPT | | Section 20.11 |
++-----------------------+-------------------------+---------------------+---------------------------+---------------+
+| NS* | CB_PUSH_DELEG | OPT | FDELG (OPT) | Section 20.5 |
++-----------------------+-------------------------+---------------------+---------------------------+---------------+
+| | CB_RECALL | OPT | FDELG, | Section 20.2 |
++-----------------------+-------------------------+---------------------+---------------------------+---------------+
+| | | | DDELG, pNFS | |
++-----------------------+-------------------------+---------------------+---------------------------+---------------+
+| | | | (REQ) | |
++-----------------------+-------------------------+---------------------+---------------------------+---------------+
+| NS* | CB_RECALL_ANY | OPT | FDELG, | Section 20.6 |
++-----------------------+-------------------------+---------------------+---------------------------+---------------+
+| | | | DDELG, pNFS | |
++-----------------------+-------------------------+---------------------+---------------------------+---------------+
+| | | | (REQ) | |
++-----------------------+-------------------------+---------------------+---------------------------+---------------+
+| NS | CB_RECALL_SLOT | REQ | | Section 20.8 |
++-----------------------+-------------------------+---------------------+---------------------------+---------------+
+| NS* | CB_RECALLABLE_OBJ_AVAIL | OPT | DDELG, pNFS | Section 20.7 |
++-----------------------+-------------------------+---------------------+---------------------------+---------------+
+| | | | (REQ) | |
++-----------------------+-------------------------+---------------------+---------------------------+---------------+
+| I | CB_SEQUENCE | OPT | FDELG, | Section 20.9 |
++-----------------------+-------------------------+---------------------+---------------------------+---------------+
+| | | | DDELG, pNFS | |
++-----------------------+-------------------------+---------------------+---------------------------+---------------+
+| | | | (REQ) | |
++-----------------------+-------------------------+---------------------+---------------------------+---------------+
+| NS* | CB_WANTS_CANCELLED | OPT | FDELG, | Section 20.10 |
++-----------------------+-------------------------+---------------------+---------------------------+---------------+
+| | | | DDELG, pNFS | |
++-----------------------+-------------------------+---------------------+---------------------------+---------------+
+| | | | (REQ) | |
++-----------------------+-------------------------+---------------------+---------------------------+---------------+
+
+
+Implementation notes:
+=====================
+
+SSV:
+ The spec claims this is mandatory, but we don't actually know of any
+ implementations, so we're ignoring it for now. The server returns
+ NFS4ERR_ENCR_ALG_UNSUPP on EXCHANGE_ID, which should be future-proof.
+
+GSS on the backchannel:
+ Again, theoretically required but not widely implemented (in
+ particular, the current Linux client doesn't request it). We return
+ NFS4ERR_ENCR_ALG_UNSUPP on CREATE_SESSION.
+
+DELEGPURGE:
+ mandatory only for servers that support CLAIM_DELEGATE_PREV and/or
+ CLAIM_DELEG_PREV_FH (which allows clients to keep delegations that
+ persist across client reboots). Thus we need not implement this for
+ now.
+
+EXCHANGE_ID:
+ implementation ids are ignored
+
+CREATE_SESSION:
+ backchannel attributes are ignored
+
+SEQUENCE:
+ no support for dynamic slot table renegotiation (optional)
+
+Nonstandard compound limitations:
+ No support for a sessions fore channel RPC compound that requires both a
+ ca_maxrequestsize request and a ca_maxresponsesize reply, so we may
+ fail to live up to the promise we made in CREATE_SESSION fore channel
+ negotiation.
+
+See also http://wiki.linux-nfs.org/wiki/index.php/Server_4.0_and_4.1_issues.
diff --git a/Documentation/filesystems/nfs/nfs41-server.txt b/Documentation/filesystems/nfs/nfs41-server.txt
deleted file mode 100644
index 682a59f..0000000
--- a/Documentation/filesystems/nfs/nfs41-server.txt
+++ /dev/null
@@ -1,173 +0,0 @@
-NFSv4.1 Server Implementation
-
-Server support for minorversion 1 can be controlled using the
-/proc/fs/nfsd/versions control file. The string output returned
-by reading this file will contain either "+4.1" or "-4.1"
-correspondingly.
-
-Currently, server support for minorversion 1 is enabled by default.
-It can be disabled at run time by writing the string "-4.1" to
-the /proc/fs/nfsd/versions control file. Note that to write this
-control file, the nfsd service must be taken down. You can use rpc.nfsd
-for this; see rpc.nfsd(8).
-
-(Warning: older servers will interpret "+4.1" and "-4.1" as "+4" and
-"-4", respectively. Therefore, code meant to work on both new and old
-kernels must turn 4.1 on or off *before* turning support for version 4
-on or off; rpc.nfsd does this correctly.)
-
-The NFSv4 minorversion 1 (NFSv4.1) implementation in nfsd is based
-on RFC 5661.
-
-From the many new features in NFSv4.1 the current implementation
-focuses on the mandatory-to-implement NFSv4.1 Sessions, providing
-"exactly once" semantics and better control and throttling of the
-resources allocated for each client.
-
-The table below, taken from the NFSv4.1 document, lists
-the operations that are mandatory to implement (REQ), optional
-(OPT), and NFSv4.0 operations that are required not to implement (MNI)
-in minor version 1. The first column indicates the operations that
-are not supported yet by the linux server implementation.
-
-The OPTIONAL features identified and their abbreviations are as follows:
- pNFS Parallel NFS
- FDELG File Delegations
- DDELG Directory Delegations
-
-The following abbreviations indicate the linux server implementation status.
- I Implemented NFSv4.1 operations.
- NS Not Supported.
- NS* Unimplemented optional feature.
-
-Operations
-
- +----------------------+------------+--------------+----------------+
- | Operation | REQ, REC, | Feature | Definition |
- | | OPT, or | (REQ, REC, | |
- | | MNI | or OPT) | |
- +----------------------+------------+--------------+----------------+
- | ACCESS | REQ | | Section 18.1 |
-I | BACKCHANNEL_CTL | REQ | | Section 18.33 |
-I | BIND_CONN_TO_SESSION | REQ | | Section 18.34 |
- | CLOSE | REQ | | Section 18.2 |
- | COMMIT | REQ | | Section 18.3 |
- | CREATE | REQ | | Section 18.4 |
-I | CREATE_SESSION | REQ | | Section 18.36 |
-NS*| DELEGPURGE | OPT | FDELG (REQ) | Section 18.5 |
- | DELEGRETURN | OPT | FDELG, | Section 18.6 |
- | | | DDELG, pNFS | |
- | | | (REQ) | |
-I | DESTROY_CLIENTID | REQ | | Section 18.50 |
-I | DESTROY_SESSION | REQ | | Section 18.37 |
-I | EXCHANGE_ID | REQ | | Section 18.35 |
-I | FREE_STATEID | REQ | | Section 18.38 |
- | GETATTR | REQ | | Section 18.7 |
-I | GETDEVICEINFO | OPT | pNFS (REQ) | Section 18.40 |
-NS*| GETDEVICELIST | OPT | pNFS (OPT) | Section 18.41 |
- | GETFH | REQ | | Section 18.8 |
-NS*| GET_DIR_DELEGATION | OPT | DDELG (REQ) | Section 18.39 |
-I | LAYOUTCOMMIT | OPT | pNFS (REQ) | Section 18.42 |
-I | LAYOUTGET | OPT | pNFS (REQ) | Section 18.43 |
-I | LAYOUTRETURN | OPT | pNFS (REQ) | Section 18.44 |
- | LINK | OPT | | Section 18.9 |
- | LOCK | REQ | | Section 18.10 |
- | LOCKT | REQ | | Section 18.11 |
- | LOCKU | REQ | | Section 18.12 |
- | LOOKUP | REQ | | Section 18.13 |
- | LOOKUPP | REQ | | Section 18.14 |
- | NVERIFY | REQ | | Section 18.15 |
- | OPEN | REQ | | Section 18.16 |
-NS*| OPENATTR | OPT | | Section 18.17 |
- | OPEN_CONFIRM | MNI | | N/A |
- | OPEN_DOWNGRADE | REQ | | Section 18.18 |
- | PUTFH | REQ | | Section 18.19 |
- | PUTPUBFH | REQ | | Section 18.20 |
- | PUTROOTFH | REQ | | Section 18.21 |
- | READ | REQ | | Section 18.22 |
- | READDIR | REQ | | Section 18.23 |
- | READLINK | OPT | | Section 18.24 |
- | RECLAIM_COMPLETE | REQ | | Section 18.51 |
- | RELEASE_LOCKOWNER | MNI | | N/A |
- | REMOVE | REQ | | Section 18.25 |
- | RENAME | REQ | | Section 18.26 |
- | RENEW | MNI | | N/A |
- | RESTOREFH | REQ | | Section 18.27 |
- | SAVEFH | REQ | | Section 18.28 |
- | SECINFO | REQ | | Section 18.29 |
-I | SECINFO_NO_NAME | REC | pNFS files | Section 18.45, |
- | | | layout (REQ) | Section 13.12 |
-I | SEQUENCE | REQ | | Section 18.46 |
- | SETATTR | REQ | | Section 18.30 |
- | SETCLIENTID | MNI | | N/A |
- | SETCLIENTID_CONFIRM | MNI | | N/A |
-NS | SET_SSV | REQ | | Section 18.47 |
-I | TEST_STATEID | REQ | | Section 18.48 |
- | VERIFY | REQ | | Section 18.31 |
-NS*| WANT_DELEGATION | OPT | FDELG (OPT) | Section 18.49 |
- | WRITE | REQ | | Section 18.32 |
-
-Callback Operations
-
- +-------------------------+-----------+-------------+---------------+
- | Operation | REQ, REC, | Feature | Definition |
- | | OPT, or | (REQ, REC, | |
- | | MNI | or OPT) | |
- +-------------------------+-----------+-------------+---------------+
- | CB_GETATTR | OPT | FDELG (REQ) | Section 20.1 |
-I | CB_LAYOUTRECALL | OPT | pNFS (REQ) | Section 20.3 |
-NS*| CB_NOTIFY | OPT | DDELG (REQ) | Section 20.4 |
-NS*| CB_NOTIFY_DEVICEID | OPT | pNFS (OPT) | Section 20.12 |
-NS*| CB_NOTIFY_LOCK | OPT | | Section 20.11 |
-NS*| CB_PUSH_DELEG | OPT | FDELG (OPT) | Section 20.5 |
- | CB_RECALL | OPT | FDELG, | Section 20.2 |
- | | | DDELG, pNFS | |
- | | | (REQ) | |
-NS*| CB_RECALL_ANY | OPT | FDELG, | Section 20.6 |
- | | | DDELG, pNFS | |
- | | | (REQ) | |
-NS | CB_RECALL_SLOT | REQ | | Section 20.8 |
-NS*| CB_RECALLABLE_OBJ_AVAIL | OPT | DDELG, pNFS | Section 20.7 |
- | | | (REQ) | |
-I | CB_SEQUENCE | OPT | FDELG, | Section 20.9 |
- | | | DDELG, pNFS | |
- | | | (REQ) | |
-NS*| CB_WANTS_CANCELLED | OPT | FDELG, | Section 20.10 |
- | | | DDELG, pNFS | |
- | | | (REQ) | |
- +-------------------------+-----------+-------------+---------------+
-
-Implementation notes:
-
-SSV:
-* The spec claims this is mandatory, but we don't actually know of any
- implementations, so we're ignoring it for now. The server returns
- NFS4ERR_ENCR_ALG_UNSUPP on EXCHANGE_ID, which should be future-proof.
-
-GSS on the backchannel:
-* Again, theoretically required but not widely implemented (in
- particular, the current Linux client doesn't request it). We return
- NFS4ERR_ENCR_ALG_UNSUPP on CREATE_SESSION.
-
-DELEGPURGE:
-* mandatory only for servers that support CLAIM_DELEGATE_PREV and/or
- CLAIM_DELEG_PREV_FH (which allows clients to keep delegations that
- persist across client reboots). Thus we need not implement this for
- now.
-
-EXCHANGE_ID:
-* implementation ids are ignored
-
-CREATE_SESSION:
-* backchannel attributes are ignored
-
-SEQUENCE:
-* no support for dynamic slot table renegotiation (optional)
-
-Nonstandard compound limitations:
-* No support for a sessions fore channel RPC compound that requires both a
- ca_maxrequestsize request and a ca_maxresponsesize reply, so we may
- fail to live up to the promise we made in CREATE_SESSION fore channel
- negotiation.
-
-See also http://wiki.linux-nfs.org/wiki/index.php/Server_4.0_and_4.1_issues.
diff --git a/Documentation/filesystems/nfs/pnfs.txt b/Documentation/filesystems/nfs/pnfs.rst
similarity index 87%
rename from Documentation/filesystems/nfs/pnfs.txt
rename to Documentation/filesystems/nfs/pnfs.rst
index 80dc0bd..7c470ec 100644
--- a/Documentation/filesystems/nfs/pnfs.txt
+++ b/Documentation/filesystems/nfs/pnfs.rst
@@ -1,15 +1,17 @@
-Reference counting in pnfs:
+==========================
+Reference counting in pnfs
==========================
The are several inter-related caches. We have layouts which can
reference multiple devices, each of which can reference multiple data servers.
Each data server can be referenced by multiple devices. Each device
-can be referenced by multiple layouts. To keep all of this straight,
+can be referenced by multiple layouts. To keep all of this straight,
we need to reference count.
struct pnfs_layout_hdr
-----------------------
+======================
+
The on-the-wire command LAYOUTGET corresponds to struct
pnfs_layout_segment, usually referred to by the variable name lseg.
Each nfs_inode may hold a pointer to a cache of these layout
@@ -25,7 +27,8 @@
keeps it in the list.
deviceid_cache
---------------
+==============
+
lsegs reference device ids, which are resolved per nfs_client and
layout driver type. The device ids are held in a RCU cache (struct
nfs4_deviceid_cache). The cache itself is referenced across each
@@ -38,24 +41,26 @@
deviceid's per filesystem, and multiple filesystems per nfs_client.
The hash code is copied from the nfsd code base. A discussion of
-hashing and variations of this algorithm can be found at:
-http://groups.google.com/group/comp.lang.c/browse_thread/thread/9522965e2b8d3809
+hashing and variations of this algorithm can be found `here.
+<http://groups.google.com/group/comp.lang.c/browse_thread/thread/9522965e2b8d3809>`_
data server cache
------------------
+=================
+
file driver devices refer to data servers, which are kept in a module
level cache. Its reference is held over the lifetime of the deviceid
pointing to it.
lseg
-----
+====
+
lseg maintains an extra reference corresponding to the NFS_LSEG_VALID
bit which holds it in the pnfs_layout_hdr's list. When the final lseg
is removed from the pnfs_layout_hdr's list, the NFS_LAYOUT_DESTROYED
bit is set, preventing any new lsegs from being added.
layout drivers
---------------
+==============
PNFS utilizes what is called layout drivers. The STD defines 4 basic
layout types: "files", "objects", "blocks", and "flexfiles". For each
@@ -68,6 +73,6 @@
Flexfiles-layout-driver code is in: fs/nfs/flexfilelayout/.. directory
blocks-layout setup
--------------------
+===================
TODO: Document the setup needs of the blocks layout driver
diff --git a/Documentation/filesystems/nfs/rpc-cache.txt b/Documentation/filesystems/nfs/rpc-cache.rst
similarity index 66%
rename from Documentation/filesystems/nfs/rpc-cache.txt
rename to Documentation/filesystems/nfs/rpc-cache.rst
index c4dac82..bb164ee 100644
--- a/Documentation/filesystems/nfs/rpc-cache.txt
+++ b/Documentation/filesystems/nfs/rpc-cache.rst
@@ -1,9 +1,14 @@
- This document gives a brief introduction to the caching
+=========
+RPC Cache
+=========
+
+This document gives a brief introduction to the caching
mechanisms in the sunrpc layer that is used, in particular,
for NFS authentication.
-CACHES
+Caches
======
+
The caching replaces the old exports table and allows for
a wide variety of values to be caches.
@@ -12,6 +17,7 @@
of common code for managing these caches.
Examples of caches that are likely to be needed are:
+
- mapping from IP address to client name
- mapping from client name and filesystem to export options
- mapping from UID to list of GIDs, to work around NFS's limitation
@@ -21,6 +27,7 @@
- mapping from network identify to public key for crypto authentication.
The common code handles such things as:
+
- general cache lookup with correct locking
- supporting 'NEGATIVE' as well as positive entries
- allowing an EXPIRED time on cache items, and removing
@@ -35,60 +42,66 @@
Creating a Cache
----------------
-1/ A cache needs a datum to store. This is in the form of a
- structure definition that must contain a
- struct cache_head
+- A cache needs a datum to store. This is in the form of a
+ structure definition that must contain a struct cache_head
as an element, usually the first.
It will also contain a key and some content.
Each cache element is reference counted and contains
expiry and update times for use in cache management.
-2/ A cache needs a "cache_detail" structure that
+- A cache needs a "cache_detail" structure that
describes the cache. This stores the hash table, some
parameters for cache management, and some operations detailing how
to work with particular cache items.
- The operations requires are:
- struct cache_head *alloc(void)
- This simply allocates appropriate memory and returns
- a pointer to the cache_detail embedded within the
- structure
- void cache_put(struct kref *)
- This is called when the last reference to an item is
- dropped. The pointer passed is to the 'ref' field
- in the cache_head. cache_put should release any
- references create by 'cache_init' and, if CACHE_VALID
- is set, any references created by cache_update.
- It should then release the memory allocated by
- 'alloc'.
- int match(struct cache_head *orig, struct cache_head *new)
- test if the keys in the two structures match. Return
- 1 if they do, 0 if they don't.
- void init(struct cache_head *orig, struct cache_head *new)
- Set the 'key' fields in 'new' from 'orig'. This may
- include taking references to shared objects.
- void update(struct cache_head *orig, struct cache_head *new)
- Set the 'content' fileds in 'new' from 'orig'.
- int cache_show(struct seq_file *m, struct cache_detail *cd,
- struct cache_head *h)
- Optional. Used to provide a /proc file that lists the
- contents of a cache. This should show one item,
- usually on just one line.
- int cache_request(struct cache_detail *cd, struct cache_head *h,
- char **bpp, int *blen)
- Format a request to be send to user-space for an item
- to be instantiated. *bpp is a buffer of size *blen.
- bpp should be moved forward over the encoded message,
- and *blen should be reduced to show how much free
- space remains. Return 0 on success or <0 if not
- enough room or other problem.
- int cache_parse(struct cache_detail *cd, char *buf, int len)
- A message from user space has arrived to fill out a
- cache entry. It is in 'buf' of length 'len'.
- cache_parse should parse this, find the item in the
- cache with sunrpc_cache_lookup_rcu, and update the item
- with sunrpc_cache_update.
+
+ The operations are:
+
+ struct cache_head \*alloc(void)
+ This simply allocates appropriate memory and returns
+ a pointer to the cache_detail embedded within the
+ structure
+
+ void cache_put(struct kref \*)
+ This is called when the last reference to an item is
+ dropped. The pointer passed is to the 'ref' field
+ in the cache_head. cache_put should release any
+ references create by 'cache_init' and, if CACHE_VALID
+ is set, any references created by cache_update.
+ It should then release the memory allocated by
+ 'alloc'.
+
+ int match(struct cache_head \*orig, struct cache_head \*new)
+ test if the keys in the two structures match. Return
+ 1 if they do, 0 if they don't.
+
+ void init(struct cache_head \*orig, struct cache_head \*new)
+ Set the 'key' fields in 'new' from 'orig'. This may
+ include taking references to shared objects.
+
+ void update(struct cache_head \*orig, struct cache_head \*new)
+ Set the 'content' fileds in 'new' from 'orig'.
+
+ int cache_show(struct seq_file \*m, struct cache_detail \*cd, struct cache_head \*h)
+ Optional. Used to provide a /proc file that lists the
+ contents of a cache. This should show one item,
+ usually on just one line.
+
+ int cache_request(struct cache_detail \*cd, struct cache_head \*h, char \*\*bpp, int \*blen)
+ Format a request to be send to user-space for an item
+ to be instantiated. \*bpp is a buffer of size \*blen.
+ bpp should be moved forward over the encoded message,
+ and \*blen should be reduced to show how much free
+ space remains. Return 0 on success or <0 if not
+ enough room or other problem.
+
+ int cache_parse(struct cache_detail \*cd, char \*buf, int len)
+ A message from user space has arrived to fill out a
+ cache entry. It is in 'buf' of length 'len'.
+ cache_parse should parse this, find the item in the
+ cache with sunrpc_cache_lookup_rcu, and update the item
+ with sunrpc_cache_update.
-3/ A cache needs to be registered using cache_register(). This
+- A cache needs to be registered using cache_register(). This
includes it on a list of caches that will be regularly
cleaned to discard old data.
@@ -107,7 +120,7 @@
call is needed but not possible, -EAGAIN if an upcall is pending,
or 0 if the data is valid;
-cache_check can be passed a "struct cache_req *". This structure is
+cache_check can be passed a "struct cache_req\*". This structure is
typically embedded in the actual request and can be used to create a
deferred copy of the request (struct cache_deferred_req). This is
done when the found cache item is not uptodate, but the is reason to
@@ -139,9 +152,11 @@
passed as a whole to the cache for parsing and interpretation.
Each cache can treat the write requests differently, but it is
expected that a message written will contain:
+
- a key
- an expiry time
- a content.
+
with the intention that an item in the cache with the give key
should be create or updated to have the given content, and the
expiry time should be set on that item.
@@ -156,7 +171,8 @@
select or poll for read will block waiting for another request to be
added.
-Thus a user-space helper is likely to:
+Thus a user-space helper is likely to::
+
open the channel.
select for readable
read a request
@@ -175,12 +191,13 @@
takes a cache item and encodes a request into the buffer
provided.
-Note: If a cache has no active readers on the channel, and has had not
-active readers for more than 60 seconds, further requests will not be
-added to the channel but instead all lookups that do not find a valid
-entry will fail. This is partly for backward compatibility: The
-previous nfs exports table was deemed to be authoritative and a
-failed lookup meant a definite 'no'.
+.. note::
+ If a cache has no active readers on the channel, and has had not
+ active readers for more than 60 seconds, further requests will not be
+ added to the channel but instead all lookups that do not find a valid
+ entry will fail. This is partly for backward compatibility: The
+ previous nfs exports table was deemed to be authoritative and a
+ failed lookup meant a definite 'no'.
request/response format
-----------------------
@@ -193,10 +210,11 @@
Fields within the record should be separated by spaces, normally one.
If spaces, newlines, or nul characters are needed in a field they
much be quoted. two mechanisms are available:
-1/ If a field begins '\x' then it must contain an even number of
+
+- If a field begins '\x' then it must contain an even number of
hex digits, and pairs of these digits provide the bytes in the
field.
-2/ otherwise a \ in the field must be followed by 3 octal digits
+- otherwise a \ in the field must be followed by 3 octal digits
which give the code for a byte. Other characters are treated
as them selves. At the very least, space, newline, nul, and
'\' must be quoted in this way.
diff --git a/Documentation/filesystems/nfs/rpc-server-gss.txt b/Documentation/filesystems/nfs/rpc-server-gss.rst
similarity index 92%
rename from Documentation/filesystems/nfs/rpc-server-gss.txt
rename to Documentation/filesystems/nfs/rpc-server-gss.rst
index 310bbba..8127545 100644
--- a/Documentation/filesystems/nfs/rpc-server-gss.txt
+++ b/Documentation/filesystems/nfs/rpc-server-gss.rst
@@ -1,4 +1,4 @@
-
+=========================================
rpcsec_gss support for kernel RPC servers
=========================================
@@ -9,14 +9,17 @@
purposes of authentication.)
RPCGSS is specified in a few IETF documents:
+
- RFC2203 v1: http://tools.ietf.org/rfc/rfc2203.txt
- RFC5403 v2: http://tools.ietf.org/rfc/rfc5403.txt
+
and there is a 3rd version being proposed:
+
- http://tools.ietf.org/id/draft-williams-rpcsecgssv3.txt
(At draft n. 02 at the time of writing)
Background
-----------
+==========
The RPCGSS Authentication method describes a way to perform GSSAPI
Authentication for NFS. Although GSSAPI is itself completely mechanism
@@ -29,6 +32,7 @@
GSSAPI is a complex library, and implementing it completely in kernel is
unwarranted. However GSSAPI operations are fundementally separable in 2
parts:
+
- initial context establishment
- integrity/privacy protection (signing and encrypting of individual
packets)
@@ -41,7 +45,7 @@
need upcalls to request userspace to perform context establishment.
NFS Server Legacy Upcall Mechanism
-----------------------------------
+==================================
The classic upcall mechanism uses a custom text based upcall mechanism
to talk to a custom daemon called rpc.svcgssd that is provide by the
@@ -62,21 +66,20 @@
back to the kernel (4KiB).
NFS Server New RPC Upcall Mechanism
------------------------------------
+===================================
The newer upcall mechanism uses RPC over a unix socket to a daemon
called gss-proxy, implemented by a userspace program called Gssproxy.
-The gss_proxy RPC protocol is currently documented here:
-
- https://fedorahosted.org/gss-proxy/wiki/ProtocolDocumentation
+The gss_proxy RPC protocol is currently documented `here
+<https://fedorahosted.org/gss-proxy/wiki/ProtocolDocumentation>`_.
This upcall mechanism uses the kernel rpc client and connects to the gssproxy
userspace program over a regular unix socket. The gssproxy protocol does not
suffer from the size limitations of the legacy protocol.
Negotiating Upcall Mechanisms
------------------------------
+=============================
To provide backward compatibility, the kernel defaults to using the
legacy mechanism. To switch to the new mechanism, gss-proxy must bind
diff --git a/Documentation/filesystems/nilfs2.txt b/Documentation/filesystems/nilfs2.rst
similarity index 89%
rename from Documentation/filesystems/nilfs2.txt
rename to Documentation/filesystems/nilfs2.rst
index f2f3f85..6c49f04 100644
--- a/Documentation/filesystems/nilfs2.txt
+++ b/Documentation/filesystems/nilfs2.rst
@@ -1,5 +1,8 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+======
NILFS2
-------
+======
NILFS2 is a log-structured file system (LFS) supporting continuous
snapshotting. In addition to versioning capability of the entire file
@@ -25,9 +28,9 @@
cleaner or garbage collector) are required. Details on the tools are
described in the man pages included in the package.
-Project web page: https://nilfs.sourceforge.io/
-Download page: https://nilfs.sourceforge.io/en/download.html
-List info: http://vger.kernel.org/vger-lists.html#linux-nilfs
+:Project web page: https://nilfs.sourceforge.io/
+:Download page: https://nilfs.sourceforge.io/en/download.html
+:List info: http://vger.kernel.org/vger-lists.html#linux-nilfs
Caveats
=======
@@ -47,6 +50,7 @@
NILFS2 supports the following mount options:
(*) == default
+======================= =======================================================
barrier(*) This enables/disables the use of write barriers. This
nobarrier requires an IO stack which can support barriers, and
if nilfs gets an error on a barrier write, it will
@@ -79,6 +83,7 @@
nodiscard(*) The discard/TRIM commands are sent to the underlying
block device when blocks are freed. This is useful
for SSD devices and sparse/thinly-provisioned LUNs.
+======================= =======================================================
Ioctls
======
@@ -87,9 +92,11 @@
through the system call interfaces. The list of all NILFS2 specific ioctls are
shown in the table below.
-Table of NILFS2 specific ioctls
-..............................................................................
+Table of NILFS2 specific ioctls:
+
+ ============================== ===============================================
Ioctl Description
+ ============================== ===============================================
NILFS_IOCTL_CHANGE_CPMODE Change mode of given checkpoint between
checkpoint and snapshot state. This ioctl is
used in chcp and mkcp utilities.
@@ -142,11 +149,12 @@
NILFS_IOCTL_SET_ALLOC_RANGE Define lower limit of segments in bytes and
upper limit of segments in bytes. This ioctl
is used by nilfs_resize utility.
+ ============================== ===============================================
NILFS2 usage
============
-To use nilfs2 as a local file system, simply:
+To use nilfs2 as a local file system, simply::
# mkfs -t nilfs2 /dev/block_device
# mount -t nilfs2 /dev/block_device /dir
@@ -157,18 +165,20 @@
Checkpoints and snapshots are managed by the following commands.
Their manpages are included in the nilfs-utils package above.
+ ==== ===========================================================
lscp list checkpoints or snapshots.
mkcp make a checkpoint or a snapshot.
chcp change an existing checkpoint to a snapshot or vice versa.
rmcp invalidate specified checkpoint(s).
+ ==== ===========================================================
-To mount a snapshot,
+To mount a snapshot::
# mount -t nilfs2 -r -o cp=<cno> /dev/block_device /snap_dir
where <cno> is the checkpoint number of the snapshot.
-To unmount the NILFS2 mount point or snapshot, simply:
+To unmount the NILFS2 mount point or snapshot, simply::
# umount /dir
@@ -181,7 +191,7 @@
A nilfs2 volume is equally divided into a number of segments except
for the super block (SB) and segment #0. A segment is the container
of logs. Each log is composed of summary information blocks, payload
-blocks, and an optional super root block (SR):
+blocks, and an optional super root block (SR)::
______________________________________________________
| |SB| | Segment | Segment | Segment | ... | Segment | |
@@ -200,7 +210,7 @@
|_blocks__|_________________|__|
The payload blocks are organized per file, and each file consists of
-data blocks and B-tree node blocks:
+data blocks and B-tree node blocks::
|<--- File-A --->|<--- File-B --->|
_______________________________________________________________
@@ -213,7 +223,7 @@
The organization of the blocks is recorded in the summary information
blocks, which contains a header structure (nilfs_segment_summary), per
-file structures (nilfs_finfo), and per block structures (nilfs_binfo):
+file structures (nilfs_finfo), and per block structures (nilfs_binfo)::
_________________________________________________________________________
| Summary | finfo | binfo | ... | binfo | finfo | binfo | ... | binfo |...
@@ -223,7 +233,7 @@
The logs include regular files, directory files, symbolic link files
and several meta data files. The mata data files are the files used
to maintain file system meta data. The current version of NILFS2 uses
-the following meta data files:
+the following meta data files::
1) Inode file (ifile) -- Stores on-disk inodes
2) Checkpoint file (cpfile) -- Stores checkpoints
@@ -232,7 +242,7 @@
(DAT) block numbers. This file serves to
make on-disk blocks relocatable.
-The following figure shows a typical organization of the logs:
+The following figure shows a typical organization of the logs::
_________________________________________________________________________
| Summary | regular file | file | ... | ifile | cpfile | sufile | DAT |SR|
@@ -250,7 +260,7 @@
of regular files, directories, symlinks and other special files, are
included in the ifile. The inode of ifile itself is included in the
corresponding checkpoint entry in the cpfile. Thus, the hierarchy
-among NILFS2 files can be depicted as follows:
+among NILFS2 files can be depicted as follows::
Super block (SB)
|
diff --git a/Documentation/filesystems/ntfs.txt b/Documentation/filesystems/ntfs.rst
similarity index 85%
rename from Documentation/filesystems/ntfs.txt
rename to Documentation/filesystems/ntfs.rst
index 553f10d..5bb093a 100644
--- a/Documentation/filesystems/ntfs.txt
+++ b/Documentation/filesystems/ntfs.rst
@@ -1,19 +1,21 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+================================
The Linux NTFS filesystem driver
================================
-Table of contents
-=================
+.. Table of contents
-- Overview
-- Web site
-- Features
-- Supported mount options
-- Known bugs and (mis-)features
-- Using NTFS volume and stripe sets
- - The Device-Mapper driver
- - The Software RAID / MD driver
- - Limitations when using the MD driver
+ - Overview
+ - Web site
+ - Features
+ - Supported mount options
+ - Known bugs and (mis-)features
+ - Using NTFS volume and stripe sets
+ - The Device-Mapper driver
+ - The Software RAID / MD driver
+ - Limitations when using the MD driver
Overview
@@ -66,8 +68,10 @@
partition by creating a large file while in Windows and then loopback
mounting the file while in Linux and creating a Linux filesystem on it that
is used to install Linux on it.
-- A comparison of the two drivers using:
+- A comparison of the two drivers using::
+
time find . -type f -exec md5sum "{}" \;
+
run three times in sequence with each driver (after a reboot) on a 1.4GiB
NTFS partition, showed the new driver to be 20% faster in total time elapsed
(from 9:43 minutes on average down to 7:53). The time spent in user space
@@ -104,6 +108,7 @@
mount command (man 8 mount, also see man 5 fstab), the NTFS driver supports the
following mount options:
+======================= =======================================================
iocharset=name Deprecated option. Still supported but please use
nls=name in the future. See description for nls=name.
@@ -175,16 +180,22 @@
errors=opt What to do when critical filesystem errors are found.
Following values can be used for "opt":
- continue: DEFAULT, try to clean-up as much as
+
+ ======== =========================================
+ continue DEFAULT, try to clean-up as much as
possible, e.g. marking a corrupt inode as
bad so it is no longer accessed, and then
continue.
- recover: At present only supported is recovery of
+ recover At present only supported is recovery of
the boot sector from the backup copy.
If read-only mount, the recovery is done
in memory only and not written to disk.
- Note that the options are additive, i.e. specifying:
+ ======== =========================================
+
+ Note that the options are additive, i.e. specifying::
+
errors=continue,errors=recover
+
means the driver will attempt to recover and if that
fails it will clean-up as much as possible and
continue.
@@ -202,12 +213,18 @@
In general use the default. If you have a lot of small
files then use a higher value. The values have the
following meaning:
+
+ ===== =================================
Value MFT zone size (% of volume size)
+ ===== =================================
1 12.5%
2 25%
3 37.5%
4 50%
+ ===== =================================
+
Note this option is irrelevant for read-only mounts.
+======================= =======================================================
Known bugs and (mis-)features
@@ -252,18 +269,18 @@
components and their sizes in sectors, i.e. multiples of 512-byte blocks.
For NT4 fault tolerant volumes you can obtain the sizes using fdisk. So for
-example if one of your partitions is /dev/hda2 you would do:
+example if one of your partitions is /dev/hda2 you would do::
-$ fdisk -ul /dev/hda
+ $ fdisk -ul /dev/hda
-Disk /dev/hda: 81.9 GB, 81964302336 bytes
-255 heads, 63 sectors/track, 9964 cylinders, total 160086528 sectors
-Units = sectors of 1 * 512 = 512 bytes
+ Disk /dev/hda: 81.9 GB, 81964302336 bytes
+ 255 heads, 63 sectors/track, 9964 cylinders, total 160086528 sectors
+ Units = sectors of 1 * 512 = 512 bytes
- Device Boot Start End Blocks Id System
- /dev/hda1 * 63 4209029 2104483+ 83 Linux
- /dev/hda2 4209030 37768814 16779892+ 86 NTFS
- /dev/hda3 37768815 46170809 4200997+ 83 Linux
+ Device Boot Start End Blocks Id System
+ /dev/hda1 * 63 4209029 2104483+ 83 Linux
+ /dev/hda2 4209030 37768814 16779892+ 86 NTFS
+ /dev/hda3 37768815 46170809 4200997+ 83 Linux
And you would know that /dev/hda2 has a size of 37768814 - 4209030 + 1 =
33559785 sectors.
@@ -271,15 +288,17 @@
For Win2k and later dynamic disks, you can for example use the ldminfo utility
which is part of the Linux LDM tools (the latest version at the time of
writing is linux-ldm-0.0.8.tar.bz2). You can download it from:
+
http://www.linux-ntfs.org/
+
Simply extract the downloaded archive (tar xvjf linux-ldm-0.0.8.tar.bz2), go
into it (cd linux-ldm-0.0.8) and change to the test directory (cd test). You
will find the precompiled (i386) ldminfo utility there. NOTE: You will not be
able to compile this yourself easily so use the binary version!
-Then you would use ldminfo in dump mode to obtain the necessary information:
+Then you would use ldminfo in dump mode to obtain the necessary information::
-$ ./ldminfo --dump /dev/hda
+ $ ./ldminfo --dump /dev/hda
This would dump the LDM database found on /dev/hda which describes all of your
dynamic disks and all the volumes on them. At the bottom you will see the
@@ -305,42 +324,36 @@
Assuming you know all your devices and their sizes things are easy.
For a linear raid the table would look like this (note all values are in
-512-byte sectors):
+512-byte sectors)::
---- cut here ---
-# Offset into Size of this Raid type Device Start sector
-# volume device of device
-0 1028161 linear /dev/hda1 0
-1028161 3903762 linear /dev/hdb2 0
-4931923 2103211 linear /dev/hdc1 0
---- cut here ---
+ # Offset into Size of this Raid type Device Start sector
+ # volume device of device
+ 0 1028161 linear /dev/hda1 0
+ 1028161 3903762 linear /dev/hdb2 0
+ 4931923 2103211 linear /dev/hdc1 0
For a striped volume, i.e. raid level 0, you will need to know the chunk size
you used when creating the volume. Windows uses 64kiB as the default, so it
will probably be this unless you changes the defaults when creating the array.
For a raid level 0 the table would look like this (note all values are in
-512-byte sectors):
+512-byte sectors)::
---- cut here ---
-# Offset Size Raid Number Chunk 1st Start 2nd Start
-# into of the type of size Device in Device in
-# volume volume stripes device device
-0 2056320 striped 2 128 /dev/hda1 0 /dev/hdb1 0
---- cut here ---
+ # Offset Size Raid Number Chunk 1st Start 2nd Start
+ # into of the type of size Device in Device in
+ # volume volume stripes device device
+ 0 2056320 striped 2 128 /dev/hda1 0 /dev/hdb1 0
If there are more than two devices, just add each of them to the end of the
line.
Finally, for a mirrored volume, i.e. raid level 1, the table would look like
-this (note all values are in 512-byte sectors):
+this (note all values are in 512-byte sectors)::
---- cut here ---
-# Ofs Size Raid Log Number Region Should Number Source Start Target Start
-# in of the type type of log size sync? of Device in Device in
-# vol volume params mirrors Device Device
-0 2056320 mirror core 2 16 nosync 2 /dev/hda1 0 /dev/hdb1 0
---- cut here ---
+ # Ofs Size Raid Log Number Region Should Number Source Start Target Start
+ # in of the type type of log size sync? of Device in Device in
+ # vol volume params mirrors Device Device
+ 0 2056320 mirror core 2 16 nosync 2 /dev/hda1 0 /dev/hdb1 0
If you are mirroring to multiple devices you can specify further targets at the
end of the line.
@@ -353,17 +366,17 @@
them.
Once you have your table, save it in a file somewhere (e.g. /etc/ntfsvolume1),
-and hand it over to dmsetup to work with, like so:
+and hand it over to dmsetup to work with, like so::
-$ dmsetup create myvolume1 /etc/ntfsvolume1
+ $ dmsetup create myvolume1 /etc/ntfsvolume1
You can obviously replace "myvolume1" with whatever name you like.
If it all worked, you will now have the device /dev/device-mapper/myvolume1
which you can then just use as an argument to the mount command as usual to
-mount the ntfs volume. For example:
+mount the ntfs volume. For example::
-$ mount -t ntfs -o ro /dev/device-mapper/myvolume1 /mnt/myvol1
+ $ mount -t ntfs -o ro /dev/device-mapper/myvolume1 /mnt/myvol1
(You need to create the directory /mnt/myvol1 first and of course you can use
anything you like instead of /mnt/myvol1 as long as it is an existing
@@ -395,18 +408,18 @@
"chunk-size 64k" option for each raid-disk, too.
For example, if you have a stripe set consisting of two partitions /dev/hda5
-and /dev/hdb1 your /etc/raidtab would look like this:
+and /dev/hdb1 your /etc/raidtab would look like this::
-raiddev /dev/md0
- raid-level 0
- nr-raid-disks 2
- nr-spare-disks 0
- persistent-superblock 0
- chunk-size 64k
- device /dev/hda5
- raid-disk 0
- device /dev/hdb1
- raid-disk 1
+ raiddev /dev/md0
+ raid-level 0
+ nr-raid-disks 2
+ nr-spare-disks 0
+ persistent-superblock 0
+ chunk-size 64k
+ device /dev/hda5
+ raid-disk 0
+ device /dev/hdb1
+ raid-disk 1
For linear raid, just change the raid-level above to "raid-level linear", for
mirrors, change it to "raid-level 1", and for stripe sets with parity, change
@@ -427,7 +440,9 @@
raid0run /dev/md0 to start a particular md device, in this case /dev/md0.
Then just use the mount command as usual to mount the ntfs volume using for
-example: mount -t ntfs -o ro /dev/md0 /mnt/myntfsvolume
+example::
+
+ mount -t ntfs -o ro /dev/md0 /mnt/myntfsvolume
It is advisable to do the mount read-only to see if the md volume has been
setup correctly to avoid the possibility of causing damage to the data on the
diff --git a/Documentation/filesystems/ocfs2-online-filecheck.txt b/Documentation/filesystems/ocfs2-online-filecheck.rst
similarity index 77%
rename from Documentation/filesystems/ocfs2-online-filecheck.txt
rename to Documentation/filesystems/ocfs2-online-filecheck.rst
index 139fab1..2257bb5 100644
--- a/Documentation/filesystems/ocfs2-online-filecheck.txt
+++ b/Documentation/filesystems/ocfs2-online-filecheck.rst
@@ -1,5 +1,8 @@
- OCFS2 online file check
- -----------------------
+.. SPDX-License-Identifier: GPL-2.0
+
+=====================================
+OCFS2 file system - online file check
+=====================================
This document will describe OCFS2 online file check feature.
@@ -40,7 +43,7 @@
by the inode number which caused the error. This inode number would be the
input to check/fix the file.
-There is a sysfs directory for each OCFS2 file system mounting:
+There is a sysfs directory for each OCFS2 file system mounting::
/sys/fs/ocfs2/<devname>/filecheck
@@ -50,34 +53,36 @@
fixed. Currently, three operations are supported, which includes checking
inode, fixing inode and setting the size of result record history.
-1. If you want to know what error exactly happened to <inode> before fixing, do
+1. If you want to know what error exactly happened to <inode> before fixing, do::
- # echo "<inode>" > /sys/fs/ocfs2/<devname>/filecheck/check
- # cat /sys/fs/ocfs2/<devname>/filecheck/check
+ # echo "<inode>" > /sys/fs/ocfs2/<devname>/filecheck/check
+ # cat /sys/fs/ocfs2/<devname>/filecheck/check
-The output is like this:
- INO DONE ERROR
-39502 1 GENERATION
+The output is like this::
-<INO> lists the inode numbers.
-<DONE> indicates whether the operation has been finished.
-<ERROR> says what kind of errors was found. For the detailed error numbers,
-please refer to the file linux/fs/ocfs2/filecheck.h.
+ INO DONE ERROR
+ 39502 1 GENERATION
-2. If you determine to fix this inode, do
+ <INO> lists the inode numbers.
+ <DONE> indicates whether the operation has been finished.
+ <ERROR> says what kind of errors was found. For the detailed error numbers,
+ please refer to the file linux/fs/ocfs2/filecheck.h.
- # echo "<inode>" > /sys/fs/ocfs2/<devname>/filecheck/fix
- # cat /sys/fs/ocfs2/<devname>/filecheck/fix
+2. If you determine to fix this inode, do::
-The output is like this:
- INO DONE ERROR
-39502 1 SUCCESS
+ # echo "<inode>" > /sys/fs/ocfs2/<devname>/filecheck/fix
+ # cat /sys/fs/ocfs2/<devname>/filecheck/fix
+
+The output is like this:::
+
+ INO DONE ERROR
+ 39502 1 SUCCESS
This time, the <ERROR> column indicates whether this fix is successful or not.
3. The record cache is used to store the history of check/fix results. It's
default size is 10, and can be adjust between the range of 10 ~ 100. You can
-adjust the size like this:
+adjust the size like this::
# echo "<size>" > /sys/fs/ocfs2/<devname>/filecheck/set
diff --git a/Documentation/filesystems/ocfs2.txt b/Documentation/filesystems/ocfs2.rst
similarity index 87%
rename from Documentation/filesystems/ocfs2.txt
rename to Documentation/filesystems/ocfs2.rst
index 4c49e54..412386b 100644
--- a/Documentation/filesystems/ocfs2.txt
+++ b/Documentation/filesystems/ocfs2.rst
@@ -1,5 +1,9 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+================
OCFS2 filesystem
-==================
+================
+
OCFS2 is a general purpose extent based shared disk cluster file
system with many similarities to ext3. It supports 64 bit inode
numbers, and has automatically extending metadata groups which may
@@ -14,22 +18,26 @@
All code copyright 2005 Oracle except when otherwise noted.
-CREDITS:
+Credits
+=======
+
Lots of code taken from ext3 and other projects.
Authors in alphabetical order:
-Joel Becker <joel.becker@oracle.com>
-Zach Brown <zach.brown@oracle.com>
-Mark Fasheh <mfasheh@suse.com>
-Kurt Hackel <kurt.hackel@oracle.com>
-Tao Ma <tao.ma@oracle.com>
-Sunil Mushran <sunil.mushran@oracle.com>
-Manish Singh <manish.singh@oracle.com>
-Tiger Yang <tiger.yang@oracle.com>
+
+- Joel Becker <joel.becker@oracle.com>
+- Zach Brown <zach.brown@oracle.com>
+- Mark Fasheh <mfasheh@suse.com>
+- Kurt Hackel <kurt.hackel@oracle.com>
+- Tao Ma <tao.ma@oracle.com>
+- Sunil Mushran <sunil.mushran@oracle.com>
+- Manish Singh <manish.singh@oracle.com>
+- Tiger Yang <tiger.yang@oracle.com>
Caveats
=======
Features which OCFS2 does not support yet:
+
- Directory change notification (F_NOTIFY)
- Distributed Caching (F_SETLEASE/F_GETLEASE/break_lease)
@@ -37,8 +45,10 @@
=============
OCFS2 supports the following mount options:
+
(*) == default
+======================= ========================================================
barrier=1 This enables/disables barriers. barrier=0 disables it,
barrier=1 enables it.
errors=remount-ro(*) Remount the filesystem read-only on an error.
@@ -104,3 +114,4 @@
for descriptor blocks. If enabled older kernels cannot
mount the device. This will enable 'journal_checksum'
internally.
+======================= ========================================================
diff --git a/Documentation/filesystems/omfs.rst b/Documentation/filesystems/omfs.rst
new file mode 100644
index 0000000..4c8bb30
--- /dev/null
+++ b/Documentation/filesystems/omfs.rst
@@ -0,0 +1,112 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+================================
+Optimized MPEG Filesystem (OMFS)
+================================
+
+Overview
+========
+
+OMFS is a filesystem created by SonicBlue for use in the ReplayTV DVR
+and Rio Karma MP3 player. The filesystem is extent-based, utilizing
+block sizes from 2k to 8k, with hash-based directories. This
+filesystem driver may be used to read and write disks from these
+devices.
+
+Note, it is not recommended that this FS be used in place of a general
+filesystem for your own streaming media device. Native Linux filesystems
+will likely perform better.
+
+More information is available at:
+
+ http://linux-karma.sf.net/
+
+Various utilities, including mkomfs and omfsck, are included with
+omfsprogs, available at:
+
+ http://bobcopeland.com/karma/
+
+Instructions are included in its README.
+
+Options
+=======
+
+OMFS supports the following mount-time options:
+
+ ============ ========================================
+ uid=n make all files owned by specified user
+ gid=n make all files owned by specified group
+ umask=xxx set permission umask to xxx
+ fmask=xxx set umask to xxx for files
+ dmask=xxx set umask to xxx for directories
+ ============ ========================================
+
+Disk format
+===========
+
+OMFS discriminates between "sysblocks" and normal data blocks. The sysblock
+group consists of super block information, file metadata, directory structures,
+and extents. Each sysblock has a header containing CRCs of the entire
+sysblock, and may be mirrored in successive blocks on the disk. A sysblock may
+have a smaller size than a data block, but since they are both addressed by the
+same 64-bit block number, any remaining space in the smaller sysblock is
+unused.
+
+Sysblock header information::
+
+ struct omfs_header {
+ __be64 h_self; /* FS block where this is located */
+ __be32 h_body_size; /* size of useful data after header */
+ __be16 h_crc; /* crc-ccitt of body_size bytes */
+ char h_fill1[2];
+ u8 h_version; /* version, always 1 */
+ char h_type; /* OMFS_INODE_X */
+ u8 h_magic; /* OMFS_IMAGIC */
+ u8 h_check_xor; /* XOR of header bytes before this */
+ __be32 h_fill2;
+ };
+
+Files and directories are both represented by omfs_inode::
+
+ struct omfs_inode {
+ struct omfs_header i_head; /* header */
+ __be64 i_parent; /* parent containing this inode */
+ __be64 i_sibling; /* next inode in hash bucket */
+ __be64 i_ctime; /* ctime, in milliseconds */
+ char i_fill1[35];
+ char i_type; /* OMFS_[DIR,FILE] */
+ __be32 i_fill2;
+ char i_fill3[64];
+ char i_name[OMFS_NAMELEN]; /* filename */
+ __be64 i_size; /* size of file, in bytes */
+ };
+
+Directories in OMFS are implemented as a large hash table. Filenames are
+hashed then prepended into the bucket list beginning at OMFS_DIR_START.
+Lookup requires hashing the filename, then seeking across i_sibling pointers
+until a match is found on i_name. Empty buckets are represented by block
+pointers with all-1s (~0).
+
+A file is an omfs_inode structure followed by an extent table beginning at
+OMFS_EXTENT_START::
+
+ struct omfs_extent_entry {
+ __be64 e_cluster; /* start location of a set of blocks */
+ __be64 e_blocks; /* number of blocks after e_cluster */
+ };
+
+ struct omfs_extent {
+ __be64 e_next; /* next extent table location */
+ __be32 e_extent_count; /* total # extents in this table */
+ __be32 e_fill;
+ struct omfs_extent_entry e_entry; /* start of extent entries */
+ };
+
+Each extent holds the block offset followed by number of blocks allocated to
+the extent. The final extent in each table is a terminator with e_cluster
+being ~0 and e_blocks being ones'-complement of the total number of blocks
+in the table.
+
+If this table overflows, a continuation inode is written and pointed to by
+e_next. These have a header but lack the rest of the inode structure.
+
diff --git a/Documentation/filesystems/omfs.txt b/Documentation/filesystems/omfs.txt
deleted file mode 100644
index 1d0d41f..0000000
--- a/Documentation/filesystems/omfs.txt
+++ /dev/null
@@ -1,106 +0,0 @@
-Optimized MPEG Filesystem (OMFS)
-
-Overview
-========
-
-OMFS is a filesystem created by SonicBlue for use in the ReplayTV DVR
-and Rio Karma MP3 player. The filesystem is extent-based, utilizing
-block sizes from 2k to 8k, with hash-based directories. This
-filesystem driver may be used to read and write disks from these
-devices.
-
-Note, it is not recommended that this FS be used in place of a general
-filesystem for your own streaming media device. Native Linux filesystems
-will likely perform better.
-
-More information is available at:
-
- http://linux-karma.sf.net/
-
-Various utilities, including mkomfs and omfsck, are included with
-omfsprogs, available at:
-
- http://bobcopeland.com/karma/
-
-Instructions are included in its README.
-
-Options
-=======
-
-OMFS supports the following mount-time options:
-
- uid=n - make all files owned by specified user
- gid=n - make all files owned by specified group
- umask=xxx - set permission umask to xxx
- fmask=xxx - set umask to xxx for files
- dmask=xxx - set umask to xxx for directories
-
-Disk format
-===========
-
-OMFS discriminates between "sysblocks" and normal data blocks. The sysblock
-group consists of super block information, file metadata, directory structures,
-and extents. Each sysblock has a header containing CRCs of the entire
-sysblock, and may be mirrored in successive blocks on the disk. A sysblock may
-have a smaller size than a data block, but since they are both addressed by the
-same 64-bit block number, any remaining space in the smaller sysblock is
-unused.
-
-Sysblock header information:
-
-struct omfs_header {
- __be64 h_self; /* FS block where this is located */
- __be32 h_body_size; /* size of useful data after header */
- __be16 h_crc; /* crc-ccitt of body_size bytes */
- char h_fill1[2];
- u8 h_version; /* version, always 1 */
- char h_type; /* OMFS_INODE_X */
- u8 h_magic; /* OMFS_IMAGIC */
- u8 h_check_xor; /* XOR of header bytes before this */
- __be32 h_fill2;
-};
-
-Files and directories are both represented by omfs_inode:
-
-struct omfs_inode {
- struct omfs_header i_head; /* header */
- __be64 i_parent; /* parent containing this inode */
- __be64 i_sibling; /* next inode in hash bucket */
- __be64 i_ctime; /* ctime, in milliseconds */
- char i_fill1[35];
- char i_type; /* OMFS_[DIR,FILE] */
- __be32 i_fill2;
- char i_fill3[64];
- char i_name[OMFS_NAMELEN]; /* filename */
- __be64 i_size; /* size of file, in bytes */
-};
-
-Directories in OMFS are implemented as a large hash table. Filenames are
-hashed then prepended into the bucket list beginning at OMFS_DIR_START.
-Lookup requires hashing the filename, then seeking across i_sibling pointers
-until a match is found on i_name. Empty buckets are represented by block
-pointers with all-1s (~0).
-
-A file is an omfs_inode structure followed by an extent table beginning at
-OMFS_EXTENT_START:
-
-struct omfs_extent_entry {
- __be64 e_cluster; /* start location of a set of blocks */
- __be64 e_blocks; /* number of blocks after e_cluster */
-};
-
-struct omfs_extent {
- __be64 e_next; /* next extent table location */
- __be32 e_extent_count; /* total # extents in this table */
- __be32 e_fill;
- struct omfs_extent_entry e_entry; /* start of extent entries */
-};
-
-Each extent holds the block offset followed by number of blocks allocated to
-the extent. The final extent in each table is a terminator with e_cluster
-being ~0 and e_blocks being ones'-complement of the total number of blocks
-in the table.
-
-If this table overflows, a continuation inode is written and pointed to by
-e_next. These have a header but lack the rest of the inode structure.
-
diff --git a/Documentation/filesystems/orangefs.txt b/Documentation/filesystems/orangefs.rst
similarity index 83%
rename from Documentation/filesystems/orangefs.txt
rename to Documentation/filesystems/orangefs.rst
index f4ba949..7d6d4ca 100644
--- a/Documentation/filesystems/orangefs.txt
+++ b/Documentation/filesystems/orangefs.rst
@@ -1,3 +1,6 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+========
ORANGEFS
========
@@ -21,25 +24,25 @@
* Stateless
-MAILING LIST ARCHIVES
+Mailing List Archives
=====================
http://lists.orangefs.org/pipermail/devel_lists.orangefs.org/
-MAILING LIST SUBMISSIONS
+Mailing List Submissions
========================
devel@lists.orangefs.org
-DOCUMENTATION
+Documentation
=============
http://www.orangefs.org/documentation/
-USERSPACE FILESYSTEM SOURCE
+Userspace Filesystem Source
===========================
http://www.orangefs.org/download
@@ -48,16 +51,16 @@
upstream version of the kernel client.
-RUNNING ORANGEFS ON A SINGLE SERVER
+Running ORANGEFS On a Single Server
===================================
OrangeFS is usually run in large installations with multiple servers and
clients, but a complete filesystem can be run on a single machine for
development and testing.
-On Fedora, install orangefs and orangefs-server.
+On Fedora, install orangefs and orangefs-server::
-dnf -y install orangefs orangefs-server
+ dnf -y install orangefs orangefs-server
There is an example server configuration file in
/etc/orangefs/orangefs.conf. Change localhost to your hostname if
@@ -70,29 +73,29 @@
controls clients which use libpvfs2. This does not control the
pvfs2-client-core.
-Create the filesystem.
+Create the filesystem::
-pvfs2-server -f /etc/orangefs/orangefs.conf
+ pvfs2-server -f /etc/orangefs/orangefs.conf
-Start the server.
+Start the server::
-systemctl start orangefs-server
+ systemctl start orangefs-server
-Test the server.
+Test the server::
-pvfs2-ping -m /pvfsmnt
+ pvfs2-ping -m /pvfsmnt
Start the client. The module must be compiled in or loaded before this
-point.
+point::
-systemctl start orangefs-client
+ systemctl start orangefs-client
-Mount the filesystem.
+Mount the filesystem::
-mount -t pvfs2 tcp://localhost:3334/orangefs /pvfsmnt
+ mount -t pvfs2 tcp://localhost:3334/orangefs /pvfsmnt
-BUILDING ORANGEFS ON A SINGLE SERVER
+Building ORANGEFS on a Single Server
====================================
Where OrangeFS cannot be installed from distribution packages, it may be
@@ -102,49 +105,51 @@
in /usr/local. As of version 2.9.6, OrangeFS uses Berkeley DB by
default, we will probably be changing the default to LMDB soon.
-./configure --prefix=/opt/ofs --with-db-backend=lmdb
+::
-make
+ ./configure --prefix=/opt/ofs --with-db-backend=lmdb
-make install
+ make
-Create an orangefs config file.
+ make install
-/opt/ofs/bin/pvfs2-genconfig /etc/pvfs2.conf
+Create an orangefs config file::
-Create an /etc/pvfs2tab file.
+ /opt/ofs/bin/pvfs2-genconfig /etc/pvfs2.conf
-echo tcp://localhost:3334/orangefs /pvfsmnt pvfs2 defaults,noauto 0 0 > \
- /etc/pvfs2tab
+Create an /etc/pvfs2tab file::
-Create the mount point you specified in the tab file if needed.
+ echo tcp://localhost:3334/orangefs /pvfsmnt pvfs2 defaults,noauto 0 0 > \
+ /etc/pvfs2tab
-mkdir /pvfsmnt
+Create the mount point you specified in the tab file if needed::
-Bootstrap the server.
+ mkdir /pvfsmnt
-/opt/ofs/sbin/pvfs2-server -f /etc/pvfs2.conf
+Bootstrap the server::
-Start the server.
+ /opt/ofs/sbin/pvfs2-server -f /etc/pvfs2.conf
-/opt/osf/sbin/pvfs2-server /etc/pvfs2.conf
+Start the server::
+
+ /opt/osf/sbin/pvfs2-server /etc/pvfs2.conf
Now the server should be running. Pvfs2-ls is a simple
-test to verify that the server is running.
+test to verify that the server is running::
-/opt/ofs/bin/pvfs2-ls /pvfsmnt
+ /opt/ofs/bin/pvfs2-ls /pvfsmnt
If stuff seems to be working, load the kernel module and
-turn on the client core.
+turn on the client core::
-/opt/ofs/sbin/pvfs2-client -p /opt/osf/sbin/pvfs2-client-core
+ /opt/ofs/sbin/pvfs2-client -p /opt/osf/sbin/pvfs2-client-core
-Mount your filesystem.
+Mount your filesystem::
-mount -t pvfs2 tcp://localhost:3334/orangefs /pvfsmnt
+ mount -t pvfs2 tcp://localhost:3334/orangefs /pvfsmnt
-RUNNING XFSTESTS
+Running xfstests
================
It is useful to use a scratch filesystem with xfstests. This can be
@@ -159,21 +164,23 @@
This change should be made before creating the filesystem.
-pvfs2-server -f /etc/orangefs/orangefs.conf
+::
-To run xfstests, create /etc/xfsqa.config.
+ pvfs2-server -f /etc/orangefs/orangefs.conf
-TEST_DIR=/orangefs
-TEST_DEV=tcp://localhost:3334/orangefs
-SCRATCH_MNT=/scratch
-SCRATCH_DEV=tcp://localhost:3334/scratch
+To run xfstests, create /etc/xfsqa.config::
-Then xfstests can be run
+ TEST_DIR=/orangefs
+ TEST_DEV=tcp://localhost:3334/orangefs
+ SCRATCH_MNT=/scratch
+ SCRATCH_DEV=tcp://localhost:3334/scratch
-./check -pvfs2
+Then xfstests can be run::
+
+ ./check -pvfs2
-OPTIONS
+Options
=======
The following mount options are accepted:
@@ -193,32 +200,32 @@
Distributed locking is being worked on for the future.
-DEBUGGING
+Debugging
=========
If you want the debug (GOSSIP) statements in a particular
-source file (inode.c for example) go to syslog:
+source file (inode.c for example) go to syslog::
echo inode > /sys/kernel/debug/orangefs/kernel-debug
-No debugging (the default):
+No debugging (the default)::
echo none > /sys/kernel/debug/orangefs/kernel-debug
-Debugging from several source files:
+Debugging from several source files::
echo inode,dir > /sys/kernel/debug/orangefs/kernel-debug
-All debugging:
+All debugging::
echo all > /sys/kernel/debug/orangefs/kernel-debug
-Get a list of all debugging keywords:
+Get a list of all debugging keywords::
cat /sys/kernel/debug/orangefs/debug-help
-PROTOCOL BETWEEN KERNEL MODULE AND USERSPACE
+Protocol between Kernel Module and Userspace
============================================
Orangefs is a user space filesystem and an associated kernel module.
@@ -234,7 +241,8 @@
can read from and write to. Userspace can also manipulate the
kernel module through the pseudo device with ioctl.
-THE BUFMAP:
+The Bufmap
+----------
At startup userspace allocates two page-size-aligned (posix_memalign)
mlocked memory buffers, one is used for IO and one is used for readdir
@@ -250,7 +258,8 @@
to initialize the kernel module's "bufmap" (struct orangefs_bufmap), which
then contains:
- * refcnt - a reference counter
+ * refcnt
+ - a reference counter
* desc_size - PVFS2_BUFMAP_DEFAULT_DESC_SIZE (4194304) - the IO buffer's
partition size, which represents the filesystem's block size and
is used for s_blocksize in super blocks.
@@ -259,17 +268,19 @@
* desc_shift - log2(desc_size), used for s_blocksize_bits in super blocks.
* total_size - the total size of the IO buffer.
* page_count - the number of 4096 byte pages in the IO buffer.
- * page_array - a pointer to page_count * (sizeof(struct page*)) bytes
+ * page_array - a pointer to ``page_count * (sizeof(struct page*))`` bytes
of kcalloced memory. This memory is used as an array of pointers
to each of the pages in the IO buffer through a call to get_user_pages.
- * desc_array - a pointer to desc_count * (sizeof(struct orangefs_bufmap_desc))
+ * desc_array - a pointer to ``desc_count * (sizeof(struct orangefs_bufmap_desc))``
bytes of kcalloced memory. This memory is further intialized:
user_desc is the kernel's copy of the IO buffer's ORANGEFS_dev_map_desc
structure. user_desc->ptr points to the IO buffer.
- pages_per_desc = bufmap->desc_size / PAGE_SIZE
- offset = 0
+ ::
+
+ pages_per_desc = bufmap->desc_size / PAGE_SIZE
+ offset = 0
bufmap->desc_array[0].page_array = &bufmap->page_array[offset]
bufmap->desc_array[0].array_count = pages_per_desc = 1024
@@ -293,7 +304,8 @@
* readdir_index_lock - a spinlock to protect readdir_index_array during
update.
-OPERATIONS:
+Operations
+----------
The kernel module builds an "op" (struct orangefs_kernel_op_s) when it
needs to communicate with userspace. Part of the op contains the "upcall"
@@ -308,13 +320,19 @@
Ops are stateful:
- * unknown - op was just initialized
- * waiting - op is on request_list (upward bound)
- * inprogr - op is in progress (waiting for downcall)
- * serviced - op has matching downcall; ok
- * purged - op has to start a timer since client-core
+ * unknown
+ - op was just initialized
+ * waiting
+ - op is on request_list (upward bound)
+ * inprogr
+ - op is in progress (waiting for downcall)
+ * serviced
+ - op has matching downcall; ok
+ * purged
+ - op has to start a timer since client-core
exited uncleanly before servicing op
- * given up - submitter has given up waiting for it
+ * given up
+ - submitter has given up waiting for it
When some arbitrary userspace program needs to perform a
filesystem operation on Orangefs (readdir, I/O, create, whatever)
@@ -389,10 +407,15 @@
response type.
The several members outside of the union are:
- - int32_t type - type of operation.
- - int32_t status - return code for the operation.
- - int64_t trailer_size - 0 unless readdir operation.
- - char *trailer_buf - initialized to NULL, used during readdir operations.
+
+ ``int32_t type``
+ - type of operation.
+ ``int32_t status``
+ - return code for the operation.
+ ``int64_t trailer_size``
+ - 0 unless readdir operation.
+ ``char *trailer_buf``
+ - initialized to NULL, used during readdir operations.
The appropriate member inside the union is filled out for any
particular response.
@@ -449,18 +472,20 @@
made by the kernel side.
A buffer_list containing:
+
- a pointer to the prepared response to the request from the
kernel (struct pvfs2_downcall_t).
- and also, in the case of a readdir request, a pointer to a
buffer containing descriptors for the objects in the target
directory.
+
... is sent to the function (PINT_dev_write_list) which performs
the writev.
PINT_dev_write_list has a local iovec array: struct iovec io_array[10];
The first four elements of io_array are initialized like this for all
-responses:
+responses::
io_array[0].iov_base = address of local variable "proto_ver" (int32_t)
io_array[0].iov_len = sizeof(int32_t)
@@ -475,7 +500,7 @@
of global variable vfs_request (vfs_request_t)
io_array[3].iov_len = sizeof(pvfs2_downcall_t)
-Readdir responses initialize the fifth element io_array like this:
+Readdir responses initialize the fifth element io_array like this::
io_array[4].iov_base = contents of member trailer_buf (char *)
from out_downcall member of global variable
@@ -517,13 +542,13 @@
hence the motivation to use the dentry when possible.
The timeout values d_time and getattr_time are jiffy based, and the
-code is designed to avoid the jiffy-wrap problem:
+code is designed to avoid the jiffy-wrap problem::
-"In general, if the clock may have wrapped around more than once, there
-is no way to tell how much time has elapsed. However, if the times t1
-and t2 are known to be fairly close, we can reliably compute the
-difference in a way that takes into account the possibility that the
-clock may have wrapped between times."
+ "In general, if the clock may have wrapped around more than once, there
+ is no way to tell how much time has elapsed. However, if the times t1
+ and t2 are known to be fairly close, we can reliably compute the
+ difference in a way that takes into account the possibility that the
+ clock may have wrapped between times."
- from course notes by instructor Andy Wang
+from course notes by instructor Andy Wang
diff --git a/Documentation/filesystems/porting.rst b/Documentation/filesystems/porting.rst
index f185060..26c0939 100644
--- a/Documentation/filesystems/porting.rst
+++ b/Documentation/filesystems/porting.rst
@@ -850,3 +850,11 @@
d_alloc_pseudo() is internal-only; uses outside of alloc_file_pseudo() are
very suspect (and won't work in modules). Such uses are very likely to
be misspelled d_alloc_anon().
+
+---
+
+**mandatory**
+
+[should've been added in 2016] stale comment in finish_open() nonwithstanding,
+failure exits in ->atomic_open() instances should *NOT* fput() the file,
+no matter what. Everything is handled by the caller.
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.rst
similarity index 64%
rename from Documentation/filesystems/proc.txt
rename to Documentation/filesystems/proc.rst
index 99ca040..38b6069 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.rst
@@ -1,19 +1,20 @@
-------------------------------------------------------------------------------
- T H E /proc F I L E S Y S T E M
-------------------------------------------------------------------------------
-/proc/sys Terrehon Bowden <terrehon@pacbell.net> October 7 1999
- Bodo Bauer <bb@ricochet.net>
+.. SPDX-License-Identifier: GPL-2.0
-2.4.x update Jorge Nerin <comandante@zaralinux.com> November 14 2000
-move /proc/sys Shen Feng <shen@cn.fujitsu.com> April 1 2009
-------------------------------------------------------------------------------
-Version 1.3 Kernel version 2.2.12
- Kernel version 2.4.0-test11-pre4
-------------------------------------------------------------------------------
-fixes/update part 1.1 Stefani Seibold <stefani@seibold.net> June 9 2009
+====================
+The /proc Filesystem
+====================
-Table of Contents
------------------
+===================== ======================================= ================
+/proc/sys Terrehon Bowden <terrehon@pacbell.net>, October 7 1999
+ Bodo Bauer <bb@ricochet.net>
+2.4.x update Jorge Nerin <comandante@zaralinux.com> November 14 2000
+move /proc/sys Shen Feng <shen@cn.fujitsu.com> April 1 2009
+fixes/update part 1.1 Stefani Seibold <stefani@seibold.net> June 9 2009
+===================== ======================================= ================
+
+
+
+.. Table of Contents
0 Preface
0.1 Introduction/Credits
@@ -50,9 +51,8 @@
4 Configuring procfs
4.1 Mount options
-------------------------------------------------------------------------------
Preface
-------------------------------------------------------------------------------
+=======
0.1 Introduction/Credits
------------------------
@@ -95,20 +95,18 @@
complaining about how you screwed up your system because of incorrect
documentation, we won't feel responsible...
-------------------------------------------------------------------------------
-CHAPTER 1: COLLECTING SYSTEM INFORMATION
-------------------------------------------------------------------------------
+Chapter 1: Collecting System Information
+========================================
-------------------------------------------------------------------------------
In This Chapter
-------------------------------------------------------------------------------
+---------------
* Investigating the properties of the pseudo file system /proc and its
ability to provide information on the running Linux system
* Examining /proc's structure
* Uncovering various information about the kernel and the processes running
on the system
-------------------------------------------------------------------------------
+------------------------------------------------------------------------------
The proc file system acts as an interface to internal data structures in the
kernel. It can be used to obtain information about the system and to change
@@ -134,9 +132,11 @@
also assigned the process ID <pid>. Instead, operations on these FDs
usually fail with ESRCH.
-Table 1-1: Process specific entries in /proc
-..............................................................................
+.. table:: Table 1-1: Process specific entries in /proc
+
+ ============= ===============================================================
File Content
+ ============= ===============================================================
clear_refs Clears page referenced bits shown in smaps output
cmdline Command line arguments
cpu Current and last cpu in which it was executed (2.4)(smp)
@@ -160,10 +160,10 @@
can be derived from smaps, but is faster and more convenient
numa_maps An extension based on maps, showing the memory locality and
binding policy as well as mem usage (in pages) of each mapping.
-..............................................................................
+ ============= ===============================================================
For example, to get the status information of a process, all you have to do is
-read the file /proc/PID/status:
+read the file /proc/PID/status::
>cat /proc/self/status
Name: cat
@@ -222,14 +222,17 @@
explained in Table 1-4.
(for SMP CONFIG users)
+
For making accounting scalable, RSS related information are handled in an
asynchronous manner and the value may not be very precise. To see a precise
snapshot of a moment, you can see /proc/<pid>/smaps file and scan page table.
It's slow but very precise.
-Table 1-2: Contents of the status files (as of 4.19)
-..............................................................................
+.. table:: Table 1-2: Contents of the status files (as of 4.19)
+
+ ========================== ===================================================
Field Content
+ ========================== ===================================================
Name filename of the executable
Umask file mode creation mask
State state (R is running, S is sleeping, D is sleeping
@@ -254,7 +257,8 @@
VmPin pinned memory size
VmHWM peak resident set size ("high water mark")
VmRSS size of memory portions. It contains the three
- following parts (VmRSS = RssAnon + RssFile + RssShmem)
+ following parts
+ (VmRSS = RssAnon + RssFile + RssShmem)
RssAnon size of resident anonymous memory
RssFile size of resident file mappings
RssShmem size of resident shmem memory (includes SysV shm,
@@ -292,27 +296,32 @@
Mems_allowed_list Same as previous, but in "list format"
voluntary_ctxt_switches number of voluntary context switches
nonvoluntary_ctxt_switches number of non voluntary context switches
-..............................................................................
+ ========================== ===================================================
-Table 1-3: Contents of the statm files (as of 2.6.8-rc3)
-..............................................................................
+
+.. table:: Table 1-3: Contents of the statm files (as of 2.6.8-rc3)
+
+ ======== =============================== ==============================
Field Content
+ ======== =============================== ==============================
size total program size (pages) (same as VmSize in status)
resident size of memory portions (pages) (same as VmRSS in status)
shared number of pages that are shared (i.e. backed by a file, same
as RssFile+RssShmem in status)
trs number of pages that are 'code' (not including libs; broken,
- includes data segment)
+ includes data segment)
lrs number of pages of library (always 0 on 2.6)
drs number of pages of data/stack (including libs; broken,
- includes library text)
+ includes library text)
dt number of dirty pages (always 0 on 2.6)
-..............................................................................
+ ======== =============================== ==============================
-Table 1-4: Contents of the stat files (as of 2.6.30-rc7)
-..............................................................................
- Field Content
+.. table:: Table 1-4: Contents of the stat files (as of 2.6.30-rc7)
+
+ ============= ===============================================================
+ Field Content
+ ============= ===============================================================
pid process id
tcomm filename of the executable
state state (R is running, S is sleeping, D is sleeping in an
@@ -348,7 +357,8 @@
blocked bitmap of blocked signals
sigign bitmap of ignored signals
sigcatch bitmap of caught signals
- 0 (place holder, used to be the wchan address, use /proc/PID/wchan instead)
+ 0 (place holder, used to be the wchan address,
+ use /proc/PID/wchan instead)
0 (place holder)
0 (place holder)
exit_signal signal to send to parent thread on exit
@@ -365,39 +375,40 @@
arg_end address below which program command line is placed
env_start address above which program environment is placed
env_end address below which program environment is placed
- exit_code the thread's exit_code in the form reported by the waitpid system call
-..............................................................................
+ exit_code the thread's exit_code in the form reported by the waitpid
+ system call
+ ============= ===============================================================
The /proc/PID/maps file contains the currently mapped memory regions and
their access permissions.
-The format is:
+The format is::
-address perms offset dev inode pathname
+ address perms offset dev inode pathname
-08048000-08049000 r-xp 00000000 03:00 8312 /opt/test
-08049000-0804a000 rw-p 00001000 03:00 8312 /opt/test
-0804a000-0806b000 rw-p 00000000 00:00 0 [heap]
-a7cb1000-a7cb2000 ---p 00000000 00:00 0
-a7cb2000-a7eb2000 rw-p 00000000 00:00 0
-a7eb2000-a7eb3000 ---p 00000000 00:00 0
-a7eb3000-a7ed5000 rw-p 00000000 00:00 0
-a7ed5000-a8008000 r-xp 00000000 03:00 4222 /lib/libc.so.6
-a8008000-a800a000 r--p 00133000 03:00 4222 /lib/libc.so.6
-a800a000-a800b000 rw-p 00135000 03:00 4222 /lib/libc.so.6
-a800b000-a800e000 rw-p 00000000 00:00 0
-a800e000-a8022000 r-xp 00000000 03:00 14462 /lib/libpthread.so.0
-a8022000-a8023000 r--p 00013000 03:00 14462 /lib/libpthread.so.0
-a8023000-a8024000 rw-p 00014000 03:00 14462 /lib/libpthread.so.0
-a8024000-a8027000 rw-p 00000000 00:00 0
-a8027000-a8043000 r-xp 00000000 03:00 8317 /lib/ld-linux.so.2
-a8043000-a8044000 r--p 0001b000 03:00 8317 /lib/ld-linux.so.2
-a8044000-a8045000 rw-p 0001c000 03:00 8317 /lib/ld-linux.so.2
-aff35000-aff4a000 rw-p 00000000 00:00 0 [stack]
-ffffe000-fffff000 r-xp 00000000 00:00 0 [vdso]
+ 08048000-08049000 r-xp 00000000 03:00 8312 /opt/test
+ 08049000-0804a000 rw-p 00001000 03:00 8312 /opt/test
+ 0804a000-0806b000 rw-p 00000000 00:00 0 [heap]
+ a7cb1000-a7cb2000 ---p 00000000 00:00 0
+ a7cb2000-a7eb2000 rw-p 00000000 00:00 0
+ a7eb2000-a7eb3000 ---p 00000000 00:00 0
+ a7eb3000-a7ed5000 rw-p 00000000 00:00 0
+ a7ed5000-a8008000 r-xp 00000000 03:00 4222 /lib/libc.so.6
+ a8008000-a800a000 r--p 00133000 03:00 4222 /lib/libc.so.6
+ a800a000-a800b000 rw-p 00135000 03:00 4222 /lib/libc.so.6
+ a800b000-a800e000 rw-p 00000000 00:00 0
+ a800e000-a8022000 r-xp 00000000 03:00 14462 /lib/libpthread.so.0
+ a8022000-a8023000 r--p 00013000 03:00 14462 /lib/libpthread.so.0
+ a8023000-a8024000 rw-p 00014000 03:00 14462 /lib/libpthread.so.0
+ a8024000-a8027000 rw-p 00000000 00:00 0
+ a8027000-a8043000 r-xp 00000000 03:00 8317 /lib/ld-linux.so.2
+ a8043000-a8044000 r--p 0001b000 03:00 8317 /lib/ld-linux.so.2
+ a8044000-a8045000 rw-p 0001c000 03:00 8317 /lib/ld-linux.so.2
+ aff35000-aff4a000 rw-p 00000000 00:00 0 [stack]
+ ffffe000-fffff000 r-xp 00000000 00:00 0 [vdso]
where "address" is the address space in the process that it occupies, "perms"
-is a set of permissions:
+is a set of permissions::
r = read
w = write
@@ -411,42 +422,44 @@
The "pathname" shows the name associated file for this mapping. If the mapping
is not associated with a file:
- [heap] = the heap of the program
- [stack] = the stack of the main process
- [vdso] = the "virtual dynamic shared object",
+ ======= ====================================
+ [heap] the heap of the program
+ [stack] the stack of the main process
+ [vdso] the "virtual dynamic shared object",
the kernel system call handler
+ ======= ====================================
or if empty, the mapping is anonymous.
The /proc/PID/smaps is an extension based on maps, showing the memory
consumption for each of the process's mappings. For each mapping (aka Virtual
-Memory Area, or VMA) there is a series of lines such as the following:
+Memory Area, or VMA) there is a series of lines such as the following::
-08048000-080bc000 r-xp 00000000 03:02 13130 /bin/bash
+ 08048000-080bc000 r-xp 00000000 03:02 13130 /bin/bash
-Size: 1084 kB
-KernelPageSize: 4 kB
-MMUPageSize: 4 kB
-Rss: 892 kB
-Pss: 374 kB
-Shared_Clean: 892 kB
-Shared_Dirty: 0 kB
-Private_Clean: 0 kB
-Private_Dirty: 0 kB
-Referenced: 892 kB
-Anonymous: 0 kB
-LazyFree: 0 kB
-AnonHugePages: 0 kB
-ShmemPmdMapped: 0 kB
-Shared_Hugetlb: 0 kB
-Private_Hugetlb: 0 kB
-Swap: 0 kB
-SwapPss: 0 kB
-KernelPageSize: 4 kB
-MMUPageSize: 4 kB
-Locked: 0 kB
-THPeligible: 0
-VmFlags: rd ex mr mw me dw
+ Size: 1084 kB
+ KernelPageSize: 4 kB
+ MMUPageSize: 4 kB
+ Rss: 892 kB
+ Pss: 374 kB
+ Shared_Clean: 892 kB
+ Shared_Dirty: 0 kB
+ Private_Clean: 0 kB
+ Private_Dirty: 0 kB
+ Referenced: 892 kB
+ Anonymous: 0 kB
+ LazyFree: 0 kB
+ AnonHugePages: 0 kB
+ ShmemPmdMapped: 0 kB
+ Shared_Hugetlb: 0 kB
+ Private_Hugetlb: 0 kB
+ Swap: 0 kB
+ SwapPss: 0 kB
+ KernelPageSize: 4 kB
+ MMUPageSize: 4 kB
+ Locked: 0 kB
+ THPeligible: 0
+ VmFlags: rd ex mr mw me dw
The first of these lines shows the same information as is displayed for the
mapping in /proc/PID/maps. Following lines show the size of the mapping
@@ -461,26 +474,35 @@
in memory, where each page is divided by the number of processes sharing it.
So if a process has 1000 pages all to itself, and 1000 shared with one other
process, its PSS will be 1500.
+
Note that even a page which is part of a MAP_SHARED mapping, but has only
a single pte mapped, i.e. is currently used by only one process, is accounted
as private and not as shared.
+
"Referenced" indicates the amount of memory currently marked as referenced or
accessed.
+
"Anonymous" shows the amount of memory that does not belong to any file. Even
a mapping associated with a file may contain anonymous pages: when MAP_PRIVATE
and a page is modified, the file page is replaced by a private anonymous copy.
+
"LazyFree" shows the amount of memory which is marked by madvise(MADV_FREE).
The memory isn't freed immediately with madvise(). It's freed in memory
pressure if the memory is clean. Please note that the printed value might
be lower than the real value due to optimizations used in the current
implementation. If this is not desirable please file a bug report.
+
"AnonHugePages" shows the ammount of memory backed by transparent hugepage.
+
"ShmemPmdMapped" shows the ammount of shared (shmem/tmpfs) memory backed by
huge pages.
+
"Shared_Hugetlb" and "Private_Hugetlb" show the ammounts of memory backed by
hugetlbfs page which is *not* counted in "RSS" or "PSS" field for historical
reasons. And these are not included in {Shared,Private}_{Clean,Dirty} field.
+
"Swap" shows how much would-be-anonymous memory is also used, but out on swap.
+
For shmem mappings, "Swap" includes also the size of the mapped (and not
replaced by copy-on-write) part of the underlying shmem object out on swap.
"SwapPss" shows proportional swap share of this mapping. Unlike "Swap", this
@@ -489,36 +511,39 @@
"THPeligible" indicates whether the mapping is eligible for allocating THP
pages - 1 if true, 0 otherwise. It just shows the current status.
-"VmFlags" field deserves a separate description. This member represents the kernel
-flags associated with the particular virtual memory area in two letter encoded
-manner. The codes are the following:
- rd - readable
- wr - writeable
- ex - executable
- sh - shared
- mr - may read
- mw - may write
- me - may execute
- ms - may share
- gd - stack segment growns down
- pf - pure PFN range
- dw - disabled write to the mapped file
- lo - pages are locked in memory
- io - memory mapped I/O area
- sr - sequential read advise provided
- rr - random read advise provided
- dc - do not copy area on fork
- de - do not expand area on remapping
- ac - area is accountable
- nr - swap space is not reserved for the area
- ht - area uses huge tlb pages
- ar - architecture specific flag
- dd - do not include area into core dump
- sd - soft-dirty flag
- mm - mixed map area
- hg - huge page advise flag
- nh - no-huge page advise flag
- mg - mergable advise flag
+"VmFlags" field deserves a separate description. This member represents the
+kernel flags associated with the particular virtual memory area in two letter
+encoded manner. The codes are the following:
+
+ == =======================================
+ rd readable
+ wr writeable
+ ex executable
+ sh shared
+ mr may read
+ mw may write
+ me may execute
+ ms may share
+ gd stack segment growns down
+ pf pure PFN range
+ dw disabled write to the mapped file
+ lo pages are locked in memory
+ io memory mapped I/O area
+ sr sequential read advise provided
+ rr random read advise provided
+ dc do not copy area on fork
+ de do not expand area on remapping
+ ac area is accountable
+ nr swap space is not reserved for the area
+ ht area uses huge tlb pages
+ ar architecture specific flag
+ dd do not include area into core dump
+ sd soft dirty flag
+ mm mixed map area
+ hg huge page advise flag
+ nh no huge page advise flag
+ mg mergable advise flag
+ == =======================================
Note that there is no guarantee that every flag and associated mnemonic will
be present in all further kernel releases. Things get changed, the flags may
@@ -531,6 +556,7 @@
Note: reading /proc/PID/maps or /proc/PID/smaps is inherently racy (consistent
output can be achieved only in the single read call).
+
This typically manifests when doing partial reads of these files while the
memory map is being modified. Despite the races, we do provide the following
guarantees:
@@ -544,9 +570,9 @@
but their values are the sums of the corresponding values for all mappings of
the process. Additionally, it contains these fields:
-Pss_Anon
-Pss_File
-Pss_Shmem
+- Pss_Anon
+- Pss_File
+- Pss_Shmem
They represent the proportional shares of anonymous, file, and shmem pages, as
described for smaps above. These fields are omitted in smaps since each
@@ -558,20 +584,25 @@
bits on both physical and virtual pages associated with a process, and the
soft-dirty bit on pte (see Documentation/admin-guide/mm/soft-dirty.rst
for details).
-To clear the bits for all the pages associated with the process
+To clear the bits for all the pages associated with the process::
+
> echo 1 > /proc/PID/clear_refs
-To clear the bits for the anonymous pages associated with the process
+To clear the bits for the anonymous pages associated with the process::
+
> echo 2 > /proc/PID/clear_refs
-To clear the bits for the file mapped pages associated with the process
+To clear the bits for the file mapped pages associated with the process::
+
> echo 3 > /proc/PID/clear_refs
-To clear the soft-dirty bit
+To clear the soft-dirty bit::
+
> echo 4 > /proc/PID/clear_refs
To reset the peak resident set size ("high water mark") to the process's
-current value:
+current value::
+
> echo 5 > /proc/PID/clear_refs
Any other value written to /proc/PID/clear_refs will have no effect.
@@ -584,30 +615,33 @@
The /proc/pid/numa_maps is an extension based on maps, showing the memory
locality and binding policy, as well as the memory usage (in pages) of
each mapping. The output follows a general format where mapping details get
-summarized separated by blank spaces, one mapping per each file line:
+summarized separated by blank spaces, one mapping per each file line::
-address policy mapping details
+ address policy mapping details
-00400000 default file=/usr/local/bin/app mapped=1 active=0 N3=1 kernelpagesize_kB=4
-00600000 default file=/usr/local/bin/app anon=1 dirty=1 N3=1 kernelpagesize_kB=4
-3206000000 default file=/lib64/ld-2.12.so mapped=26 mapmax=6 N0=24 N3=2 kernelpagesize_kB=4
-320621f000 default file=/lib64/ld-2.12.so anon=1 dirty=1 N3=1 kernelpagesize_kB=4
-3206220000 default file=/lib64/ld-2.12.so anon=1 dirty=1 N3=1 kernelpagesize_kB=4
-3206221000 default anon=1 dirty=1 N3=1 kernelpagesize_kB=4
-3206800000 default file=/lib64/libc-2.12.so mapped=59 mapmax=21 active=55 N0=41 N3=18 kernelpagesize_kB=4
-320698b000 default file=/lib64/libc-2.12.so
-3206b8a000 default file=/lib64/libc-2.12.so anon=2 dirty=2 N3=2 kernelpagesize_kB=4
-3206b8e000 default file=/lib64/libc-2.12.so anon=1 dirty=1 N3=1 kernelpagesize_kB=4
-3206b8f000 default anon=3 dirty=3 active=1 N3=3 kernelpagesize_kB=4
-7f4dc10a2000 default anon=3 dirty=3 N3=3 kernelpagesize_kB=4
-7f4dc10b4000 default anon=2 dirty=2 active=1 N3=2 kernelpagesize_kB=4
-7f4dc1200000 default file=/anon_hugepage\040(deleted) huge anon=1 dirty=1 N3=1 kernelpagesize_kB=2048
-7fff335f0000 default stack anon=3 dirty=3 N3=3 kernelpagesize_kB=4
-7fff3369d000 default mapped=1 mapmax=35 active=0 N3=1 kernelpagesize_kB=4
+ 00400000 default file=/usr/local/bin/app mapped=1 active=0 N3=1 kernelpagesize_kB=4
+ 00600000 default file=/usr/local/bin/app anon=1 dirty=1 N3=1 kernelpagesize_kB=4
+ 3206000000 default file=/lib64/ld-2.12.so mapped=26 mapmax=6 N0=24 N3=2 kernelpagesize_kB=4
+ 320621f000 default file=/lib64/ld-2.12.so anon=1 dirty=1 N3=1 kernelpagesize_kB=4
+ 3206220000 default file=/lib64/ld-2.12.so anon=1 dirty=1 N3=1 kernelpagesize_kB=4
+ 3206221000 default anon=1 dirty=1 N3=1 kernelpagesize_kB=4
+ 3206800000 default file=/lib64/libc-2.12.so mapped=59 mapmax=21 active=55 N0=41 N3=18 kernelpagesize_kB=4
+ 320698b000 default file=/lib64/libc-2.12.so
+ 3206b8a000 default file=/lib64/libc-2.12.so anon=2 dirty=2 N3=2 kernelpagesize_kB=4
+ 3206b8e000 default file=/lib64/libc-2.12.so anon=1 dirty=1 N3=1 kernelpagesize_kB=4
+ 3206b8f000 default anon=3 dirty=3 active=1 N3=3 kernelpagesize_kB=4
+ 7f4dc10a2000 default anon=3 dirty=3 N3=3 kernelpagesize_kB=4
+ 7f4dc10b4000 default anon=2 dirty=2 active=1 N3=2 kernelpagesize_kB=4
+ 7f4dc1200000 default file=/anon_hugepage\040(deleted) huge anon=1 dirty=1 N3=1 kernelpagesize_kB=2048
+ 7fff335f0000 default stack anon=3 dirty=3 N3=3 kernelpagesize_kB=4
+ 7fff3369d000 default mapped=1 mapmax=35 active=0 N3=1 kernelpagesize_kB=4
Where:
+
"address" is the starting address for the mapping;
+
"policy" reports the NUMA memory policy set for the mapping (see Documentation/admin-guide/mm/numa_memory_policy.rst);
+
"mapping details" summarizes mapping data such as mapping type, page usage counters,
node locality page counters (N0 == node0, N1 == node1, ...) and the kernel page
size, in KB, that is backing the mapping up.
@@ -621,81 +655,83 @@
system. It depends on the kernel configuration and the loaded modules, which
files are there, and which are missing.
-Table 1-5: Kernel info in /proc
-..............................................................................
- File Content
- apm Advanced power management info
- buddyinfo Kernel memory allocator information (see text) (2.5)
- bus Directory containing bus specific information
- cmdline Kernel command line
- cpuinfo Info about the CPU
- devices Available devices (block and character)
- dma Used DMS channels
- filesystems Supported filesystems
- driver Various drivers grouped here, currently rtc (2.4)
- execdomains Execdomains, related to security (2.4)
- fb Frame Buffer devices (2.4)
- fs File system parameters, currently nfs/exports (2.4)
- ide Directory containing info about the IDE subsystem
- interrupts Interrupt usage
- iomem Memory map (2.4)
- ioports I/O port usage
- irq Masks for irq to cpu affinity (2.4)(smp?)
- isapnp ISA PnP (Plug&Play) Info (2.4)
- kcore Kernel core image (can be ELF or A.OUT(deprecated in 2.4))
- kmsg Kernel messages
- ksyms Kernel symbol table
- loadavg Load average of last 1, 5 & 15 minutes
- locks Kernel locks
- meminfo Memory info
- misc Miscellaneous
- modules List of loaded modules
- mounts Mounted filesystems
- net Networking info (see text)
+.. table:: Table 1-5: Kernel info in /proc
+
+ ============ ===============================================================
+ File Content
+ ============ ===============================================================
+ apm Advanced power management info
+ buddyinfo Kernel memory allocator information (see text) (2.5)
+ bus Directory containing bus specific information
+ cmdline Kernel command line
+ cpuinfo Info about the CPU
+ devices Available devices (block and character)
+ dma Used DMS channels
+ filesystems Supported filesystems
+ driver Various drivers grouped here, currently rtc (2.4)
+ execdomains Execdomains, related to security (2.4)
+ fb Frame Buffer devices (2.4)
+ fs File system parameters, currently nfs/exports (2.4)
+ ide Directory containing info about the IDE subsystem
+ interrupts Interrupt usage
+ iomem Memory map (2.4)
+ ioports I/O port usage
+ irq Masks for irq to cpu affinity (2.4)(smp?)
+ isapnp ISA PnP (Plug&Play) Info (2.4)
+ kcore Kernel core image (can be ELF or A.OUT(deprecated in 2.4))
+ kmsg Kernel messages
+ ksyms Kernel symbol table
+ loadavg Load average of last 1, 5 & 15 minutes
+ locks Kernel locks
+ meminfo Memory info
+ misc Miscellaneous
+ modules List of loaded modules
+ mounts Mounted filesystems
+ net Networking info (see text)
pagetypeinfo Additional page allocator information (see text) (2.5)
- partitions Table of partitions known to the system
- pci Deprecated info of PCI bus (new way -> /proc/bus/pci/,
- decoupled by lspci (2.4)
- rtc Real time clock
- scsi SCSI info (see text)
- slabinfo Slab pool info
- softirqs softirq usage
- stat Overall statistics
- swaps Swap space utilization
- sys See chapter 2
- sysvipc Info of SysVIPC Resources (msg, sem, shm) (2.4)
- tty Info of tty drivers
- uptime Wall clock since boot, combined idle time of all cpus
- version Kernel version
- video bttv info of video resources (2.4)
- vmallocinfo Show vmalloced areas
-..............................................................................
+ partitions Table of partitions known to the system
+ pci Deprecated info of PCI bus (new way -> /proc/bus/pci/,
+ decoupled by lspci (2.4)
+ rtc Real time clock
+ scsi SCSI info (see text)
+ slabinfo Slab pool info
+ softirqs softirq usage
+ stat Overall statistics
+ swaps Swap space utilization
+ sys See chapter 2
+ sysvipc Info of SysVIPC Resources (msg, sem, shm) (2.4)
+ tty Info of tty drivers
+ uptime Wall clock since boot, combined idle time of all cpus
+ version Kernel version
+ video bttv info of video resources (2.4)
+ vmallocinfo Show vmalloced areas
+ ============ ===============================================================
You can, for example, check which interrupts are currently in use and what
-they are used for by looking in the file /proc/interrupts:
+they are used for by looking in the file /proc/interrupts::
- > cat /proc/interrupts
- CPU0
- 0: 8728810 XT-PIC timer
- 1: 895 XT-PIC keyboard
- 2: 0 XT-PIC cascade
- 3: 531695 XT-PIC aha152x
- 4: 2014133 XT-PIC serial
- 5: 44401 XT-PIC pcnet_cs
- 8: 2 XT-PIC rtc
- 11: 8 XT-PIC i82365
- 12: 182918 XT-PIC PS/2 Mouse
- 13: 1 XT-PIC fpu
- 14: 1232265 XT-PIC ide0
- 15: 7 XT-PIC ide1
- NMI: 0
+ > cat /proc/interrupts
+ CPU0
+ 0: 8728810 XT-PIC timer
+ 1: 895 XT-PIC keyboard
+ 2: 0 XT-PIC cascade
+ 3: 531695 XT-PIC aha152x
+ 4: 2014133 XT-PIC serial
+ 5: 44401 XT-PIC pcnet_cs
+ 8: 2 XT-PIC rtc
+ 11: 8 XT-PIC i82365
+ 12: 182918 XT-PIC PS/2 Mouse
+ 13: 1 XT-PIC fpu
+ 14: 1232265 XT-PIC ide0
+ 15: 7 XT-PIC ide1
+ NMI: 0
In 2.4.* a couple of lines where added to this file LOC & ERR (this time is the
-output of a SMP machine):
+output of a SMP machine)::
- > cat /proc/interrupts
+ > cat /proc/interrupts
- CPU0 CPU1
+ CPU0 CPU1
0: 1243498 1214548 IO-APIC-edge timer
1: 8949 8958 IO-APIC-edge keyboard
2: 0 0 XT-PIC cascade
@@ -708,8 +744,8 @@
15: 2183 2415 IO-APIC-edge ide1
17: 30564 30414 IO-APIC-level eth0
18: 177 164 IO-APIC-level bttv
- NMI: 2457961 2457959
- LOC: 2457882 2457881
+ NMI: 2457961 2457959
+ LOC: 2457882 2457881
ERR: 2155
NMI is incremented in this case because every timer interrupt generates a NMI
@@ -726,21 +762,25 @@
/proc/interrupts to display every IRQ vector in use by the system, not
just those considered 'most important'. The new vectors are:
- THR -- interrupt raised when a machine check threshold counter
+THR
+ interrupt raised when a machine check threshold counter
(typically counting ECC corrected errors of memory or cache) exceeds
a configurable threshold. Only available on some systems.
- TRM -- a thermal event interrupt occurs when a temperature threshold
+TRM
+ a thermal event interrupt occurs when a temperature threshold
has been exceeded for the CPU. This interrupt may also be generated
when the temperature drops back to normal.
- SPU -- a spurious interrupt is some interrupt that was raised then lowered
+SPU
+ a spurious interrupt is some interrupt that was raised then lowered
by some IO device before it could be fully processed by the APIC. Hence
the APIC sees the interrupt but does not know what device it came from.
For this case the APIC will generate the interrupt with a IRQ vector
of 0xff. This might also be generated by chipset bugs.
- RES, CAL, TLB -- rescheduling, call and TLB flush interrupts are
+RES, CAL, TLB]
+ rescheduling, call and TLB flush interrupts are
sent from one CPU to another per the needs of the OS. Typically,
their statistics are used by kernel developers and interested users to
determine the occurrence of interrupts of the given type.
@@ -756,7 +796,8 @@
irq subdir is one subdir for each IRQ, and two files; default_smp_affinity and
prof_cpu_mask.
-For example
+For example::
+
> ls /proc/irq/
0 10 12 14 16 18 2 4 6 8 prof_cpu_mask
1 11 13 15 17 19 3 5 7 9 default_smp_affinity
@@ -764,20 +805,20 @@
smp_affinity
smp_affinity is a bitmask, in which you can specify which CPUs can handle the
-IRQ, you can set it by doing:
+IRQ, you can set it by doing::
> echo 1 > /proc/irq/10/smp_affinity
This means that only the first CPU will handle the IRQ, but you can also echo
5 which means that only the first and third CPU can handle the IRQ.
-The contents of each smp_affinity file is the same by default:
+The contents of each smp_affinity file is the same by default::
> cat /proc/irq/0/smp_affinity
ffffffff
There is an alternate interface, smp_affinity_list which allows specifying
-a cpu range instead of a bitmask:
+a cpu range instead of a bitmask::
> cat /proc/irq/0/smp_affinity_list
1024-1031
@@ -810,46 +851,46 @@
Commonly used objects have their own slab pool (such as network buffers,
directory cache, and so on).
-..............................................................................
+::
-> cat /proc/buddyinfo
+ > cat /proc/buddyinfo
-Node 0, zone DMA 0 4 5 4 4 3 ...
-Node 0, zone Normal 1 0 0 1 101 8 ...
-Node 0, zone HighMem 2 0 0 1 1 0 ...
+ Node 0, zone DMA 0 4 5 4 4 3 ...
+ Node 0, zone Normal 1 0 0 1 101 8 ...
+ Node 0, zone HighMem 2 0 0 1 1 0 ...
External fragmentation is a problem under some workloads, and buddyinfo is a
-useful tool for helping diagnose these problems. Buddyinfo will give you a
+useful tool for helping diagnose these problems. Buddyinfo will give you a
clue as to how big an area you can safely allocate, or why a previous
allocation failed.
-Each column represents the number of pages of a certain order which are
-available. In this case, there are 0 chunks of 2^0*PAGE_SIZE available in
-ZONE_DMA, 4 chunks of 2^1*PAGE_SIZE in ZONE_DMA, 101 chunks of 2^4*PAGE_SIZE
-available in ZONE_NORMAL, etc...
+Each column represents the number of pages of a certain order which are
+available. In this case, there are 0 chunks of 2^0*PAGE_SIZE available in
+ZONE_DMA, 4 chunks of 2^1*PAGE_SIZE in ZONE_DMA, 101 chunks of 2^4*PAGE_SIZE
+available in ZONE_NORMAL, etc...
More information relevant to external fragmentation can be found in
-pagetypeinfo.
+pagetypeinfo::
-> cat /proc/pagetypeinfo
-Page block order: 9
-Pages per block: 512
+ > cat /proc/pagetypeinfo
+ Page block order: 9
+ Pages per block: 512
-Free pages count per migrate type at order 0 1 2 3 4 5 6 7 8 9 10
-Node 0, zone DMA, type Unmovable 0 0 0 1 1 1 1 1 1 1 0
-Node 0, zone DMA, type Reclaimable 0 0 0 0 0 0 0 0 0 0 0
-Node 0, zone DMA, type Movable 1 1 2 1 2 1 1 0 1 0 2
-Node 0, zone DMA, type Reserve 0 0 0 0 0 0 0 0 0 1 0
-Node 0, zone DMA, type Isolate 0 0 0 0 0 0 0 0 0 0 0
-Node 0, zone DMA32, type Unmovable 103 54 77 1 1 1 11 8 7 1 9
-Node 0, zone DMA32, type Reclaimable 0 0 2 1 0 0 0 0 1 0 0
-Node 0, zone DMA32, type Movable 169 152 113 91 77 54 39 13 6 1 452
-Node 0, zone DMA32, type Reserve 1 2 2 2 2 0 1 1 1 1 0
-Node 0, zone DMA32, type Isolate 0 0 0 0 0 0 0 0 0 0 0
+ Free pages count per migrate type at order 0 1 2 3 4 5 6 7 8 9 10
+ Node 0, zone DMA, type Unmovable 0 0 0 1 1 1 1 1 1 1 0
+ Node 0, zone DMA, type Reclaimable 0 0 0 0 0 0 0 0 0 0 0
+ Node 0, zone DMA, type Movable 1 1 2 1 2 1 1 0 1 0 2
+ Node 0, zone DMA, type Reserve 0 0 0 0 0 0 0 0 0 1 0
+ Node 0, zone DMA, type Isolate 0 0 0 0 0 0 0 0 0 0 0
+ Node 0, zone DMA32, type Unmovable 103 54 77 1 1 1 11 8 7 1 9
+ Node 0, zone DMA32, type Reclaimable 0 0 2 1 0 0 0 0 1 0 0
+ Node 0, zone DMA32, type Movable 169 152 113 91 77 54 39 13 6 1 452
+ Node 0, zone DMA32, type Reserve 1 2 2 2 2 0 1 1 1 1 0
+ Node 0, zone DMA32, type Isolate 0 0 0 0 0 0 0 0 0 0 0
-Number of blocks type Unmovable Reclaimable Movable Reserve Isolate
-Node 0, zone DMA 2 0 5 1 0
-Node 0, zone DMA32 41 6 967 2 0
+ Number of blocks type Unmovable Reclaimable Movable Reserve Isolate
+ Node 0, zone DMA 2 0 5 1 0
+ Node 0, zone DMA32 41 6 967 2 0
Fragmentation avoidance in the kernel works by grouping pages of different
migrate types into the same contiguous regions of memory called page blocks.
@@ -870,59 +911,63 @@
also be allocatable although a lot of filesystem metadata may have to be
reclaimed to achieve this.
-..............................................................................
-meminfo:
+meminfo
+~~~~~~~
Provides information about distribution and utilization of memory. This
varies by architecture and compile options. The following is from a
16GB PIII, which has highmem enabled. You may not have all of these fields.
-> cat /proc/meminfo
+::
-MemTotal: 16344972 kB
-MemFree: 13634064 kB
-MemAvailable: 14836172 kB
-Buffers: 3656 kB
-Cached: 1195708 kB
-SwapCached: 0 kB
-Active: 891636 kB
-Inactive: 1077224 kB
-HighTotal: 15597528 kB
-HighFree: 13629632 kB
-LowTotal: 747444 kB
-LowFree: 4432 kB
-SwapTotal: 0 kB
-SwapFree: 0 kB
-Dirty: 968 kB
-Writeback: 0 kB
-AnonPages: 861800 kB
-Mapped: 280372 kB
-Shmem: 644 kB
-KReclaimable: 168048 kB
-Slab: 284364 kB
-SReclaimable: 159856 kB
-SUnreclaim: 124508 kB
-PageTables: 24448 kB
-NFS_Unstable: 0 kB
-Bounce: 0 kB
-WritebackTmp: 0 kB
-CommitLimit: 7669796 kB
-Committed_AS: 100056 kB
-VmallocTotal: 112216 kB
-VmallocUsed: 428 kB
-VmallocChunk: 111088 kB
-Percpu: 62080 kB
-HardwareCorrupted: 0 kB
-AnonHugePages: 49152 kB
-ShmemHugePages: 0 kB
-ShmemPmdMapped: 0 kB
+ > cat /proc/meminfo
+ MemTotal: 16344972 kB
+ MemFree: 13634064 kB
+ MemAvailable: 14836172 kB
+ Buffers: 3656 kB
+ Cached: 1195708 kB
+ SwapCached: 0 kB
+ Active: 891636 kB
+ Inactive: 1077224 kB
+ HighTotal: 15597528 kB
+ HighFree: 13629632 kB
+ LowTotal: 747444 kB
+ LowFree: 4432 kB
+ SwapTotal: 0 kB
+ SwapFree: 0 kB
+ Dirty: 968 kB
+ Writeback: 0 kB
+ AnonPages: 861800 kB
+ Mapped: 280372 kB
+ Shmem: 644 kB
+ KReclaimable: 168048 kB
+ Slab: 284364 kB
+ SReclaimable: 159856 kB
+ SUnreclaim: 124508 kB
+ PageTables: 24448 kB
+ NFS_Unstable: 0 kB
+ Bounce: 0 kB
+ WritebackTmp: 0 kB
+ CommitLimit: 7669796 kB
+ Committed_AS: 100056 kB
+ VmallocTotal: 112216 kB
+ VmallocUsed: 428 kB
+ VmallocChunk: 111088 kB
+ Percpu: 62080 kB
+ HardwareCorrupted: 0 kB
+ AnonHugePages: 49152 kB
+ ShmemHugePages: 0 kB
+ ShmemPmdMapped: 0 kB
- MemTotal: Total usable ram (i.e. physical ram minus a few reserved
+MemTotal
+ Total usable ram (i.e. physical ram minus a few reserved
bits and the kernel binary code)
- MemFree: The sum of LowFree+HighFree
-MemAvailable: An estimate of how much memory is available for starting new
+MemFree
+ The sum of LowFree+HighFree
+MemAvailable
+ An estimate of how much memory is available for starting new
applications, without swapping. Calculated from MemFree,
SReclaimable, the size of the file LRU lists, and the low
watermarks in each zone.
@@ -930,69 +975,99 @@
page cache to function well, and that not all reclaimable
slab will be reclaimable, due to items being in use. The
impact of those factors will vary from system to system.
- Buffers: Relatively temporary storage for raw disk blocks
+Buffers
+ Relatively temporary storage for raw disk blocks
shouldn't get tremendously large (20MB or so)
- Cached: in-memory cache for files read from the disk (the
+Cached
+ in-memory cache for files read from the disk (the
pagecache). Doesn't include SwapCached
- SwapCached: Memory that once was swapped out, is swapped back in but
+SwapCached
+ Memory that once was swapped out, is swapped back in but
still also is in the swapfile (if memory is needed it
doesn't need to be swapped out AGAIN because it is already
in the swapfile. This saves I/O)
- Active: Memory that has been used more recently and usually not
+Active
+ Memory that has been used more recently and usually not
reclaimed unless absolutely necessary.
- Inactive: Memory which has been less recently used. It is more
+Inactive
+ Memory which has been less recently used. It is more
eligible to be reclaimed for other purposes
- HighTotal:
- HighFree: Highmem is all memory above ~860MB of physical memory
+HighTotal, HighFree
+ Highmem is all memory above ~860MB of physical memory
Highmem areas are for use by userspace programs, or
for the pagecache. The kernel must use tricks to access
this memory, making it slower to access than lowmem.
- LowTotal:
- LowFree: Lowmem is memory which can be used for everything that
+LowTotal, LowFree
+ Lowmem is memory which can be used for everything that
highmem can be used for, but it is also available for the
kernel's use for its own data structures. Among many
other things, it is where everything from the Slab is
allocated. Bad things happen when you're out of lowmem.
- SwapTotal: total amount of swap space available
- SwapFree: Memory which has been evicted from RAM, and is temporarily
+SwapTotal
+ total amount of swap space available
+SwapFree
+ Memory which has been evicted from RAM, and is temporarily
on the disk
- Dirty: Memory which is waiting to get written back to the disk
- Writeback: Memory which is actively being written back to the disk
- AnonPages: Non-file backed pages mapped into userspace page tables
-HardwareCorrupted: The amount of RAM/memory in KB, the kernel identifies as
+Dirty
+ Memory which is waiting to get written back to the disk
+Writeback
+ Memory which is actively being written back to the disk
+AnonPages
+ Non-file backed pages mapped into userspace page tables
+HardwareCorrupted
+ The amount of RAM/memory in KB, the kernel identifies as
corrupted.
-AnonHugePages: Non-file backed huge pages mapped into userspace page tables
- Mapped: files which have been mmaped, such as libraries
- Shmem: Total memory used by shared memory (shmem) and tmpfs
-ShmemHugePages: Memory used by shared memory (shmem) and tmpfs allocated
+AnonHugePages
+ Non-file backed huge pages mapped into userspace page tables
+Mapped
+ files which have been mmaped, such as libraries
+Shmem
+ Total memory used by shared memory (shmem) and tmpfs
+ShmemHugePages
+ Memory used by shared memory (shmem) and tmpfs allocated
with huge pages
-ShmemPmdMapped: Shared memory mapped into userspace with huge pages
-KReclaimable: Kernel allocations that the kernel will attempt to reclaim
+ShmemPmdMapped
+ Shared memory mapped into userspace with huge pages
+KReclaimable
+ Kernel allocations that the kernel will attempt to reclaim
under memory pressure. Includes SReclaimable (below), and other
direct allocations with a shrinker.
- Slab: in-kernel data structures cache
-SReclaimable: Part of Slab, that might be reclaimed, such as caches
- SUnreclaim: Part of Slab, that cannot be reclaimed on memory pressure
- PageTables: amount of memory dedicated to the lowest level of page
+Slab
+ in-kernel data structures cache
+SReclaimable
+ Part of Slab, that might be reclaimed, such as caches
+SUnreclaim
+ Part of Slab, that cannot be reclaimed on memory pressure
+PageTables
+ amount of memory dedicated to the lowest level of page
tables.
-NFS_Unstable: NFS pages sent to the server, but not yet committed to stable
+NFS_Unstable
+ NFS pages sent to the server, but not yet committed to stable
storage
- Bounce: Memory used for block device "bounce buffers"
-WritebackTmp: Memory used by FUSE for temporary writeback buffers
- CommitLimit: Based on the overcommit ratio ('vm.overcommit_ratio'),
+Bounce
+ Memory used for block device "bounce buffers"
+WritebackTmp
+ Memory used by FUSE for temporary writeback buffers
+CommitLimit
+ Based on the overcommit ratio ('vm.overcommit_ratio'),
this is the total amount of memory currently available to
be allocated on the system. This limit is only adhered to
if strict overcommit accounting is enabled (mode 2 in
'vm.overcommit_memory').
- The CommitLimit is calculated with the following formula:
- CommitLimit = ([total RAM pages] - [total huge TLB pages]) *
- overcommit_ratio / 100 + [total swap pages]
+
+ The CommitLimit is calculated with the following formula::
+
+ CommitLimit = ([total RAM pages] - [total huge TLB pages]) *
+ overcommit_ratio / 100 + [total swap pages]
+
For example, on a system with 1G of physical RAM and 7G
of swap with a `vm.overcommit_ratio` of 30 it would
yield a CommitLimit of 7.3G.
+
For more details, see the memory overcommit documentation
in vm/overcommit-accounting.
-Committed_AS: The amount of memory presently allocated on the system.
+Committed_AS
+ The amount of memory presently allocated on the system.
The committed memory is a sum of all of the memory which
has been allocated by processes, even if it has not been
"used" by them as of yet. A process which malloc()'s 1G
@@ -1005,21 +1080,25 @@
This is useful if one needs to guarantee that processes will
not fail due to lack of memory once that memory has been
successfully allocated.
-VmallocTotal: total size of vmalloc memory area
- VmallocUsed: amount of vmalloc area which is used
-VmallocChunk: largest contiguous block of vmalloc area which is free
- Percpu: Memory allocated to the percpu allocator used to back percpu
+VmallocTotal
+ total size of vmalloc memory area
+VmallocUsed
+ amount of vmalloc area which is used
+VmallocChunk
+ largest contiguous block of vmalloc area which is free
+Percpu
+ Memory allocated to the percpu allocator used to back percpu
allocations. This stat excludes the cost of metadata.
-..............................................................................
-
-vmallocinfo:
+vmallocinfo
+~~~~~~~~~~~
Provides information about vmalloced/vmaped areas. One line per area,
containing the virtual address range of the area, size in bytes,
caller information of the creator, and optional information depending
on the kind of area :
+ ========== ===================================================
pages=nr number of pages
phys=addr if a physical address was specified
ioremap I/O mapping (ioremap() and friends)
@@ -1029,49 +1108,54 @@
vpages buffer for pages pointers was vmalloced (huge area)
N<node>=nr (Only on NUMA kernels)
Number of pages allocated on memory node <node>
+ ========== ===================================================
-> cat /proc/vmallocinfo
-0xffffc20000000000-0xffffc20000201000 2101248 alloc_large_system_hash+0x204 ...
- /0x2c0 pages=512 vmalloc N0=128 N1=128 N2=128 N3=128
-0xffffc20000201000-0xffffc20000302000 1052672 alloc_large_system_hash+0x204 ...
- /0x2c0 pages=256 vmalloc N0=64 N1=64 N2=64 N3=64
-0xffffc20000302000-0xffffc20000304000 8192 acpi_tb_verify_table+0x21/0x4f...
- phys=7fee8000 ioremap
-0xffffc20000304000-0xffffc20000307000 12288 acpi_tb_verify_table+0x21/0x4f...
- phys=7fee7000 ioremap
-0xffffc2000031d000-0xffffc2000031f000 8192 init_vdso_vars+0x112/0x210
-0xffffc2000031f000-0xffffc2000032b000 49152 cramfs_uncompress_init+0x2e ...
- /0x80 pages=11 vmalloc N0=3 N1=3 N2=2 N3=3
-0xffffc2000033a000-0xffffc2000033d000 12288 sys_swapon+0x640/0xac0 ...
- pages=2 vmalloc N1=2
-0xffffc20000347000-0xffffc2000034c000 20480 xt_alloc_table_info+0xfe ...
- /0x130 [x_tables] pages=4 vmalloc N0=4
-0xffffffffa0000000-0xffffffffa000f000 61440 sys_init_module+0xc27/0x1d00 ...
- pages=14 vmalloc N2=14
-0xffffffffa000f000-0xffffffffa0014000 20480 sys_init_module+0xc27/0x1d00 ...
- pages=4 vmalloc N1=4
-0xffffffffa0014000-0xffffffffa0017000 12288 sys_init_module+0xc27/0x1d00 ...
- pages=2 vmalloc N1=2
-0xffffffffa0017000-0xffffffffa0022000 45056 sys_init_module+0xc27/0x1d00 ...
- pages=10 vmalloc N0=10
+::
-..............................................................................
+ > cat /proc/vmallocinfo
+ 0xffffc20000000000-0xffffc20000201000 2101248 alloc_large_system_hash+0x204 ...
+ /0x2c0 pages=512 vmalloc N0=128 N1=128 N2=128 N3=128
+ 0xffffc20000201000-0xffffc20000302000 1052672 alloc_large_system_hash+0x204 ...
+ /0x2c0 pages=256 vmalloc N0=64 N1=64 N2=64 N3=64
+ 0xffffc20000302000-0xffffc20000304000 8192 acpi_tb_verify_table+0x21/0x4f...
+ phys=7fee8000 ioremap
+ 0xffffc20000304000-0xffffc20000307000 12288 acpi_tb_verify_table+0x21/0x4f...
+ phys=7fee7000 ioremap
+ 0xffffc2000031d000-0xffffc2000031f000 8192 init_vdso_vars+0x112/0x210
+ 0xffffc2000031f000-0xffffc2000032b000 49152 cramfs_uncompress_init+0x2e ...
+ /0x80 pages=11 vmalloc N0=3 N1=3 N2=2 N3=3
+ 0xffffc2000033a000-0xffffc2000033d000 12288 sys_swapon+0x640/0xac0 ...
+ pages=2 vmalloc N1=2
+ 0xffffc20000347000-0xffffc2000034c000 20480 xt_alloc_table_info+0xfe ...
+ /0x130 [x_tables] pages=4 vmalloc N0=4
+ 0xffffffffa0000000-0xffffffffa000f000 61440 sys_init_module+0xc27/0x1d00 ...
+ pages=14 vmalloc N2=14
+ 0xffffffffa000f000-0xffffffffa0014000 20480 sys_init_module+0xc27/0x1d00 ...
+ pages=4 vmalloc N1=4
+ 0xffffffffa0014000-0xffffffffa0017000 12288 sys_init_module+0xc27/0x1d00 ...
+ pages=2 vmalloc N1=2
+ 0xffffffffa0017000-0xffffffffa0022000 45056 sys_init_module+0xc27/0x1d00 ...
+ pages=10 vmalloc N0=10
-softirqs:
+
+softirqs
+~~~~~~~~
Provides counts of softirq handlers serviced since boot time, for each cpu.
-> cat /proc/softirqs
- CPU0 CPU1 CPU2 CPU3
- HI: 0 0 0 0
- TIMER: 27166 27120 27097 27034
- NET_TX: 0 0 0 17
- NET_RX: 42 0 0 39
- BLOCK: 0 0 107 1121
- TASKLET: 0 0 0 290
- SCHED: 27035 26983 26971 26746
- HRTIMER: 0 0 0 0
- RCU: 1678 1769 2178 2250
+::
+
+ > cat /proc/softirqs
+ CPU0 CPU1 CPU2 CPU3
+ HI: 0 0 0 0
+ TIMER: 27166 27120 27097 27034
+ NET_TX: 0 0 0 17
+ NET_RX: 42 0 0 39
+ BLOCK: 0 0 107 1121
+ TASKLET: 0 0 0 290
+ SCHED: 27035 26983 26971 26746
+ HRTIMER: 0 0 0 0
+ RCU: 1678 1769 2178 2250
1.3 IDE devices in /proc/ide
@@ -1083,7 +1167,7 @@
in the controller specific subtree.
The file drivers contains general information about the drivers used for the
-IDE devices:
+IDE devices::
> cat /proc/ide/drivers
ide-cdrom version 4.53
@@ -1094,57 +1178,61 @@
directories contains the files shown in table 1-6.
-Table 1-6: IDE controller info in /proc/ide/ide?
-..............................................................................
- File Content
- channel IDE channel (0 or 1)
- config Configuration (only for PCI/IDE bridge)
- mate Mate name
- model Type/Chipset of IDE controller
-..............................................................................
+.. table:: Table 1-6: IDE controller info in /proc/ide/ide?
+
+ ======= =======================================
+ File Content
+ ======= =======================================
+ channel IDE channel (0 or 1)
+ config Configuration (only for PCI/IDE bridge)
+ mate Mate name
+ model Type/Chipset of IDE controller
+ ======= =======================================
Each device connected to a controller has a separate subdirectory in the
controllers directory. The files listed in table 1-7 are contained in these
directories.
-Table 1-7: IDE device information
-..............................................................................
- File Content
- cache The cache
- capacity Capacity of the medium (in 512Byte blocks)
- driver driver and version
- geometry physical and logical geometry
- identify device identify block
- media media type
- model device identifier
- settings device setup
- smart_thresholds IDE disk management thresholds
- smart_values IDE disk management values
-..............................................................................
+.. table:: Table 1-7: IDE device information
-The most interesting file is settings. This file contains a nice overview of
-the drive parameters:
+ ================ ==========================================
+ File Content
+ ================ ==========================================
+ cache The cache
+ capacity Capacity of the medium (in 512Byte blocks)
+ driver driver and version
+ geometry physical and logical geometry
+ identify device identify block
+ media media type
+ model device identifier
+ settings device setup
+ smart_thresholds IDE disk management thresholds
+ smart_values IDE disk management values
+ ================ ==========================================
- # cat /proc/ide/ide0/hda/settings
- name value min max mode
- ---- ----- --- --- ----
- bios_cyl 526 0 65535 rw
- bios_head 255 0 255 rw
- bios_sect 63 0 63 rw
- breada_readahead 4 0 127 rw
- bswap 0 0 1 r
- file_readahead 72 0 2097151 rw
- io_32bit 0 0 3 rw
- keepsettings 0 0 1 rw
- max_kb_per_request 122 1 127 rw
- multcount 0 0 8 rw
- nice1 1 0 1 rw
- nowerr 0 0 1 rw
- pio_mode write-only 0 255 w
- slow 0 0 1 rw
- unmaskirq 0 0 1 rw
- using_dma 0 0 1 rw
+The most interesting file is ``settings``. This file contains a nice
+overview of the drive parameters::
+
+ # cat /proc/ide/ide0/hda/settings
+ name value min max mode
+ ---- ----- --- --- ----
+ bios_cyl 526 0 65535 rw
+ bios_head 255 0 255 rw
+ bios_sect 63 0 63 rw
+ breada_readahead 4 0 127 rw
+ bswap 0 0 1 r
+ file_readahead 72 0 2097151 rw
+ io_32bit 0 0 3 rw
+ keepsettings 0 0 1 rw
+ max_kb_per_request 122 1 127 rw
+ multcount 0 0 8 rw
+ nice1 1 0 1 rw
+ nowerr 0 0 1 rw
+ pio_mode write-only 0 255 w
+ slow 0 0 1 rw
+ unmaskirq 0 0 1 rw
+ using_dma 0 0 1 rw
1.4 Networking info in /proc/net
@@ -1155,67 +1243,70 @@
support this. Table 1-9 lists the files and their meaning.
-Table 1-8: IPv6 info in /proc/net
-..............................................................................
- File Content
- udp6 UDP sockets (IPv6)
- tcp6 TCP sockets (IPv6)
- raw6 Raw device statistics (IPv6)
- igmp6 IP multicast addresses, which this host joined (IPv6)
- if_inet6 List of IPv6 interface addresses
- ipv6_route Kernel routing table for IPv6
- rt6_stats Global IPv6 routing tables statistics
- sockstat6 Socket statistics (IPv6)
- snmp6 Snmp data (IPv6)
-..............................................................................
+.. table:: Table 1-8: IPv6 info in /proc/net
+ ========== =====================================================
+ File Content
+ ========== =====================================================
+ udp6 UDP sockets (IPv6)
+ tcp6 TCP sockets (IPv6)
+ raw6 Raw device statistics (IPv6)
+ igmp6 IP multicast addresses, which this host joined (IPv6)
+ if_inet6 List of IPv6 interface addresses
+ ipv6_route Kernel routing table for IPv6
+ rt6_stats Global IPv6 routing tables statistics
+ sockstat6 Socket statistics (IPv6)
+ snmp6 Snmp data (IPv6)
+ ========== =====================================================
-Table 1-9: Network info in /proc/net
-..............................................................................
- File Content
- arp Kernel ARP table
- dev network devices with statistics
+.. table:: Table 1-9: Network info in /proc/net
+
+ ============= ================================================================
+ File Content
+ ============= ================================================================
+ arp Kernel ARP table
+ dev network devices with statistics
dev_mcast the Layer2 multicast groups a device is listening too
(interface index, label, number of references, number of bound
- addresses).
- dev_stat network device status
- ip_fwchains Firewall chain linkage
- ip_fwnames Firewall chain names
- ip_masq Directory containing the masquerading tables
- ip_masquerade Major masquerading table
- netstat Network statistics
- raw raw device statistics
- route Kernel routing table
- rpc Directory containing rpc info
- rt_cache Routing cache
- snmp SNMP data
- sockstat Socket statistics
- tcp TCP sockets
- udp UDP sockets
- unix UNIX domain sockets
- wireless Wireless interface data (Wavelan etc)
- igmp IP multicast addresses, which this host joined
- psched Global packet scheduler parameters.
- netlink List of PF_NETLINK sockets
- ip_mr_vifs List of multicast virtual interfaces
- ip_mr_cache List of multicast routing cache
-..............................................................................
+ addresses).
+ dev_stat network device status
+ ip_fwchains Firewall chain linkage
+ ip_fwnames Firewall chain names
+ ip_masq Directory containing the masquerading tables
+ ip_masquerade Major masquerading table
+ netstat Network statistics
+ raw raw device statistics
+ route Kernel routing table
+ rpc Directory containing rpc info
+ rt_cache Routing cache
+ snmp SNMP data
+ sockstat Socket statistics
+ tcp TCP sockets
+ udp UDP sockets
+ unix UNIX domain sockets
+ wireless Wireless interface data (Wavelan etc)
+ igmp IP multicast addresses, which this host joined
+ psched Global packet scheduler parameters.
+ netlink List of PF_NETLINK sockets
+ ip_mr_vifs List of multicast virtual interfaces
+ ip_mr_cache List of multicast routing cache
+ ============= ================================================================
You can use this information to see which network devices are available in
-your system and how much traffic was routed over those devices:
+your system and how much traffic was routed over those devices::
- > cat /proc/net/dev
- Inter-|Receive |[...
- face |bytes packets errs drop fifo frame compressed multicast|[...
- lo: 908188 5596 0 0 0 0 0 0 [...
- ppp0:15475140 20721 410 0 0 410 0 0 [...
- eth0: 614530 7085 0 0 0 0 0 1 [...
-
- ...] Transmit
- ...] bytes packets errs drop fifo colls carrier compressed
- ...] 908188 5596 0 0 0 0 0 0
- ...] 1375103 17405 0 0 0 0 0 0
- ...] 1703981 5535 0 0 0 3 0 0
+ > cat /proc/net/dev
+ Inter-|Receive |[...
+ face |bytes packets errs drop fifo frame compressed multicast|[...
+ lo: 908188 5596 0 0 0 0 0 0 [...
+ ppp0:15475140 20721 410 0 0 410 0 0 [...
+ eth0: 614530 7085 0 0 0 0 0 1 [...
+
+ ...] Transmit
+ ...] bytes packets errs drop fifo colls carrier compressed
+ ...] 908188 5596 0 0 0 0 0 0
+ ...] 1375103 17405 0 0 0 0 0 0
+ ...] 1703981 5535 0 0 0 3 0 0
In addition, each Channel Bond interface has its own directory. For
example, the bond0 device will have a directory called /proc/net/bond0/.
@@ -1228,62 +1319,62 @@
If you have a SCSI host adapter in your system, you'll find a subdirectory
named after the driver for this adapter in /proc/scsi. You'll also see a list
-of all recognized SCSI devices in /proc/scsi:
+of all recognized SCSI devices in /proc/scsi::
- >cat /proc/scsi/scsi
- Attached devices:
- Host: scsi0 Channel: 00 Id: 00 Lun: 00
- Vendor: IBM Model: DGHS09U Rev: 03E0
- Type: Direct-Access ANSI SCSI revision: 03
- Host: scsi0 Channel: 00 Id: 06 Lun: 00
- Vendor: PIONEER Model: CD-ROM DR-U06S Rev: 1.04
- Type: CD-ROM ANSI SCSI revision: 02
+ >cat /proc/scsi/scsi
+ Attached devices:
+ Host: scsi0 Channel: 00 Id: 00 Lun: 00
+ Vendor: IBM Model: DGHS09U Rev: 03E0
+ Type: Direct-Access ANSI SCSI revision: 03
+ Host: scsi0 Channel: 00 Id: 06 Lun: 00
+ Vendor: PIONEER Model: CD-ROM DR-U06S Rev: 1.04
+ Type: CD-ROM ANSI SCSI revision: 02
The directory named after the driver has one file for each adapter found in
the system. These files contain information about the controller, including
the used IRQ and the IO address range. The amount of information shown is
dependent on the adapter you use. The example shows the output for an Adaptec
-AHA-2940 SCSI adapter:
+AHA-2940 SCSI adapter::
- > cat /proc/scsi/aic7xxx/0
-
- Adaptec AIC7xxx driver version: 5.1.19/3.2.4
- Compile Options:
- TCQ Enabled By Default : Disabled
- AIC7XXX_PROC_STATS : Disabled
- AIC7XXX_RESET_DELAY : 5
- Adapter Configuration:
- SCSI Adapter: Adaptec AHA-294X Ultra SCSI host adapter
- Ultra Wide Controller
- PCI MMAPed I/O Base: 0xeb001000
- Adapter SEEPROM Config: SEEPROM found and used.
- Adaptec SCSI BIOS: Enabled
- IRQ: 10
- SCBs: Active 0, Max Active 2,
- Allocated 15, HW 16, Page 255
- Interrupts: 160328
- BIOS Control Word: 0x18b6
- Adapter Control Word: 0x005b
- Extended Translation: Enabled
- Disconnect Enable Flags: 0xffff
- Ultra Enable Flags: 0x0001
- Tag Queue Enable Flags: 0x0000
- Ordered Queue Tag Flags: 0x0000
- Default Tag Queue Depth: 8
- Tagged Queue By Device array for aic7xxx host instance 0:
- {255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255}
- Actual queue depth per device for aic7xxx host instance 0:
- {1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}
- Statistics:
- (scsi0:0:0:0)
- Device using Wide/Sync transfers at 40.0 MByte/sec, offset 8
- Transinfo settings: current(12/8/1/0), goal(12/8/1/0), user(12/15/1/0)
- Total transfers 160151 (74577 reads and 85574 writes)
- (scsi0:0:6:0)
- Device using Narrow/Sync transfers at 5.0 MByte/sec, offset 15
- Transinfo settings: current(50/15/0/0), goal(50/15/0/0), user(50/15/0/0)
- Total transfers 0 (0 reads and 0 writes)
+ > cat /proc/scsi/aic7xxx/0
+
+ Adaptec AIC7xxx driver version: 5.1.19/3.2.4
+ Compile Options:
+ TCQ Enabled By Default : Disabled
+ AIC7XXX_PROC_STATS : Disabled
+ AIC7XXX_RESET_DELAY : 5
+ Adapter Configuration:
+ SCSI Adapter: Adaptec AHA-294X Ultra SCSI host adapter
+ Ultra Wide Controller
+ PCI MMAPed I/O Base: 0xeb001000
+ Adapter SEEPROM Config: SEEPROM found and used.
+ Adaptec SCSI BIOS: Enabled
+ IRQ: 10
+ SCBs: Active 0, Max Active 2,
+ Allocated 15, HW 16, Page 255
+ Interrupts: 160328
+ BIOS Control Word: 0x18b6
+ Adapter Control Word: 0x005b
+ Extended Translation: Enabled
+ Disconnect Enable Flags: 0xffff
+ Ultra Enable Flags: 0x0001
+ Tag Queue Enable Flags: 0x0000
+ Ordered Queue Tag Flags: 0x0000
+ Default Tag Queue Depth: 8
+ Tagged Queue By Device array for aic7xxx host instance 0:
+ {255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255}
+ Actual queue depth per device for aic7xxx host instance 0:
+ {1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}
+ Statistics:
+ (scsi0:0:0:0)
+ Device using Wide/Sync transfers at 40.0 MByte/sec, offset 8
+ Transinfo settings: current(12/8/1/0), goal(12/8/1/0), user(12/15/1/0)
+ Total transfers 160151 (74577 reads and 85574 writes)
+ (scsi0:0:6:0)
+ Device using Narrow/Sync transfers at 5.0 MByte/sec, offset 15
+ Transinfo settings: current(50/15/0/0), goal(50/15/0/0), user(50/15/0/0)
+ Total transfers 0 (0 reads and 0 writes)
1.6 Parallel port info in /proc/parport
@@ -1296,18 +1387,20 @@
These directories contain the four files shown in Table 1-10.
-Table 1-10: Files in /proc/parport
-..............................................................................
- File Content
- autoprobe Any IEEE-1284 device ID information that has been acquired.
+.. table:: Table 1-10: Files in /proc/parport
+
+ ========= ====================================================================
+ File Content
+ ========= ====================================================================
+ autoprobe Any IEEE-1284 device ID information that has been acquired.
devices list of the device drivers using that port. A + will appear by the
name of the device currently using the port (it might not appear
- against any).
- hardware Parallel port's base address, IRQ line and DMA channel.
+ against any).
+ hardware Parallel port's base address, IRQ line and DMA channel.
irq IRQ that parport is using for that port. This is in a separate
file to allow you to alter it by writing a new value in (IRQ
- number or none).
-..............................................................................
+ number or none).
+ ========= ====================================================================
1.7 TTY info in /proc/tty
-------------------------
@@ -1317,29 +1410,31 @@
this directory, as shown in Table 1-11.
-Table 1-11: Files in /proc/tty
-..............................................................................
- File Content
- drivers list of drivers and their usage
- ldiscs registered line disciplines
- driver/serial usage statistic and status of single tty lines
-..............................................................................
+.. table:: Table 1-11: Files in /proc/tty
+
+ ============= ==============================================
+ File Content
+ ============= ==============================================
+ drivers list of drivers and their usage
+ ldiscs registered line disciplines
+ driver/serial usage statistic and status of single tty lines
+ ============= ==============================================
To see which tty's are currently in use, you can simply look into the file
-/proc/tty/drivers:
+/proc/tty/drivers::
- > cat /proc/tty/drivers
- pty_slave /dev/pts 136 0-255 pty:slave
- pty_master /dev/ptm 128 0-255 pty:master
- pty_slave /dev/ttyp 3 0-255 pty:slave
- pty_master /dev/pty 2 0-255 pty:master
- serial /dev/cua 5 64-67 serial:callout
- serial /dev/ttyS 4 64-67 serial
- /dev/tty0 /dev/tty0 4 0 system:vtmaster
- /dev/ptmx /dev/ptmx 5 2 system
- /dev/console /dev/console 5 1 system:console
- /dev/tty /dev/tty 5 0 system:/dev/tty
- unknown /dev/tty 4 1-63 console
+ > cat /proc/tty/drivers
+ pty_slave /dev/pts 136 0-255 pty:slave
+ pty_master /dev/ptm 128 0-255 pty:master
+ pty_slave /dev/ttyp 3 0-255 pty:slave
+ pty_master /dev/pty 2 0-255 pty:master
+ serial /dev/cua 5 64-67 serial:callout
+ serial /dev/ttyS 4 64-67 serial
+ /dev/tty0 /dev/tty0 4 0 system:vtmaster
+ /dev/ptmx /dev/ptmx 5 2 system
+ /dev/console /dev/console 5 1 system:console
+ /dev/tty /dev/tty 5 0 system:/dev/tty
+ unknown /dev/tty 4 1-63 console
1.8 Miscellaneous kernel statistics in /proc/stat
@@ -1347,7 +1442,7 @@
Various pieces of information about kernel activity are available in the
/proc/stat file. All of the numbers reported in this file are aggregates
-since the system first booted. For a quick look, simply cat the file:
+since the system first booted. For a quick look, simply cat the file::
> cat /proc/stat
cpu 2255 34 2290 22625563 6290 127 456 0 0 0
@@ -1372,6 +1467,7 @@
- idle: twiddling thumbs
- iowait: In a word, iowait stands for waiting for I/O to complete. But there
are several problems:
+
1. Cpu will not wait for I/O to complete, iowait is the time that a task is
waiting for I/O to complete. When cpu goes into idle state for
outstanding task io, another task will be scheduled on this CPU.
@@ -1379,6 +1475,7 @@
on any CPU, so the iowait of each CPU is difficult to calculate.
3. The value of iowait field in /proc/stat will decrease in certain
conditions.
+
So, the iowait is not reliable by reading from /proc/stat.
- irq: servicing interrupts
- softirq: servicing softirqs
@@ -1422,18 +1519,19 @@
/proc/fs/ext4/dm-0). The files in each per-device directory are shown
in Table 1-12, below.
-Table 1-12: Files in /proc/fs/ext4/<devname>
-..............................................................................
- File Content
+.. table:: Table 1-12: Files in /proc/fs/ext4/<devname>
+
+ ============== ==========================================================
+ File Content
mb_groups details of multiblock allocator buddy cache of free blocks
-..............................................................................
+ ============== ==========================================================
2.0 /proc/consoles
------------------
Shows registered system console lines.
To see which character device lines are currently used for the system console
-/dev/console, you may simply look into the file /proc/consoles:
+/dev/console, you may simply look into the file /proc/consoles::
> cat /proc/consoles
tty0 -WU (ECp) 4:7
@@ -1441,41 +1539,45 @@
The columns are:
- device name of the device
- operations R = can do read operations
- W = can do write operations
- U = can do unblank
- flags E = it is enabled
- C = it is preferred console
- B = it is primary boot console
- p = it is used for printk buffer
- b = it is not a TTY but a Braille device
- a = it is safe to use when cpu is offline
- major:minor major and minor number of the device separated by a colon
++--------------------+-------------------------------------------------------+
+| device | name of the device |
++====================+=======================================================+
+| operations | * R = can do read operations |
+| | * W = can do write operations |
+| | * U = can do unblank |
++--------------------+-------------------------------------------------------+
+| flags | * E = it is enabled |
+| | * C = it is preferred console |
+| | * B = it is primary boot console |
+| | * p = it is used for printk buffer |
+| | * b = it is not a TTY but a Braille device |
+| | * a = it is safe to use when cpu is offline |
++--------------------+-------------------------------------------------------+
+| major:minor | major and minor number of the device separated by a |
+| | colon |
++--------------------+-------------------------------------------------------+
-------------------------------------------------------------------------------
Summary
-------------------------------------------------------------------------------
+-------
+
The /proc file system serves information about the running system. It not only
allows access to process data but also allows you to request the kernel status
by reading files in the hierarchy.
The directory structure of /proc reflects the types of information and makes
it easy, if not obvious, where to look for specific data.
-------------------------------------------------------------------------------
-------------------------------------------------------------------------------
-CHAPTER 2: MODIFYING SYSTEM PARAMETERS
-------------------------------------------------------------------------------
+Chapter 2: Modifying System Parameters
+======================================
-------------------------------------------------------------------------------
In This Chapter
-------------------------------------------------------------------------------
+---------------
+
* Modifying kernel parameters by writing into files found in /proc/sys
* Exploring the files which modify certain parameters
* Review of the /proc/sys file tree
-------------------------------------------------------------------------------
+------------------------------------------------------------------------------
A very interesting part of /proc is the directory /proc/sys. This is not only
a source of information, it also allows you to change parameters within the
@@ -1503,19 +1605,18 @@
Please see: Documentation/admin-guide/sysctl/ directory for descriptions of these
entries.
-------------------------------------------------------------------------------
Summary
-------------------------------------------------------------------------------
+-------
+
Certain aspects of kernel behavior can be modified at runtime, without the
need to recompile the kernel, or even to reboot the system. The files in the
/proc/sys tree can not only be read, but also modified. You can use the echo
command to write value into these files, thereby changing the default settings
of the kernel.
-------------------------------------------------------------------------------
-------------------------------------------------------------------------------
-CHAPTER 3: PER-PROCESS PARAMETERS
-------------------------------------------------------------------------------
+
+Chapter 3: Per-process Parameters
+=================================
3.1 /proc/<pid>/oom_adj & /proc/<pid>/oom_score_adj- Adjust the oom-killer score
--------------------------------------------------------------------------------
@@ -1588,26 +1689,28 @@
This file contains IO statistics for each running process
Example
--------
+~~~~~~~
-test:/tmp # dd if=/dev/zero of=/tmp/test.dat &
-[1] 3828
+::
-test:/tmp # cat /proc/3828/io
-rchar: 323934931
-wchar: 323929600
-syscr: 632687
-syscw: 632675
-read_bytes: 0
-write_bytes: 323932160
-cancelled_write_bytes: 0
+ test:/tmp # dd if=/dev/zero of=/tmp/test.dat &
+ [1] 3828
+
+ test:/tmp # cat /proc/3828/io
+ rchar: 323934931
+ wchar: 323929600
+ syscr: 632687
+ syscw: 632675
+ read_bytes: 0
+ write_bytes: 323932160
+ cancelled_write_bytes: 0
Description
------------
+~~~~~~~~~~~
rchar
------
+^^^^^
I/O counter: chars read
The number of bytes which this task has caused to be read from storage. This
@@ -1618,7 +1721,7 @@
wchar
------
+^^^^^
I/O counter: chars written
The number of bytes which this task has caused, or shall cause to be written
@@ -1626,7 +1729,7 @@
syscr
------
+^^^^^
I/O counter: read syscalls
Attempt to count the number of read I/O operations, i.e. syscalls like read()
@@ -1634,7 +1737,7 @@
syscw
------
+^^^^^
I/O counter: write syscalls
Attempt to count the number of write I/O operations, i.e. syscalls like
@@ -1642,7 +1745,7 @@
read_bytes
-----------
+^^^^^^^^^^
I/O counter: bytes read
Attempt to count the number of bytes which this process really did cause to
@@ -1652,7 +1755,7 @@
write_bytes
------------
+^^^^^^^^^^^
I/O counter: bytes written
Attempt to count the number of bytes which this process caused to be sent to
@@ -1660,7 +1763,7 @@
cancelled_write_bytes
----------------------
+^^^^^^^^^^^^^^^^^^^^^
The big inaccuracy here is truncate. If a process writes 1MB to a file and
then deletes the file, it will in fact perform no writeout. But it will have
@@ -1673,12 +1776,11 @@
that.
-Note
-----
+.. Note::
-At its current implementation state, this is a bit racy on 32-bit machines: if
-process A reads process B's /proc/pid/io while process B is updating one of
-those 64-bit counters, process A could see an intermediate result.
+ At its current implementation state, this is a bit racy on 32-bit machines:
+ if process A reads process B's /proc/pid/io while process B is updating one
+ of those 64-bit counters, process A could see an intermediate result.
More information about this can be found within the taskstats documentation in
@@ -1698,12 +1800,13 @@
corresponding memory type are dumped, otherwise they are not dumped.
The following 9 memory types are supported:
+
- (bit 0) anonymous private memory
- (bit 1) anonymous shared memory
- (bit 2) file-backed private memory
- (bit 3) file-backed shared memory
- (bit 4) ELF header pages in file-backed private memory areas (it is
- effective only if the bit 2 is cleared)
+ effective only if the bit 2 is cleared)
- (bit 5) hugetlb private memory
- (bit 6) hugetlb shared memory
- (bit 7) DAX private memory
@@ -1719,13 +1822,13 @@
segments, ELF header pages and hugetlb private memory are dumped.
If you don't want to dump all shared memory segments attached to pid 1234,
-write 0x31 to the process's proc file.
+write 0x31 to the process's proc file::
$ echo 0x31 > /proc/1234/coredump_filter
When a new process is created, the process inherits the bitmask status from its
parent. It is useful to set up coredump_filter before the program runs.
-For example:
+For example::
$ echo 0x7 > /proc/self/coredump_filter
$ ./some_program
@@ -1733,35 +1836,37 @@
3.5 /proc/<pid>/mountinfo - Information about mounts
--------------------------------------------------------
-This file contains lines of the form:
+This file contains lines of the form::
-36 35 98:0 /mnt1 /mnt2 rw,noatime master:1 - ext3 /dev/root rw,errors=continue
-(1)(2)(3) (4) (5) (6) (7) (8) (9) (10) (11)
+ 36 35 98:0 /mnt1 /mnt2 rw,noatime master:1 - ext3 /dev/root rw,errors=continue
+ (1)(2)(3) (4) (5) (6) (7) (8) (9) (10) (11)
-(1) mount ID: unique identifier of the mount (may be reused after umount)
-(2) parent ID: ID of parent (or of self for the top of the mount tree)
-(3) major:minor: value of st_dev for files on filesystem
-(4) root: root of the mount within the filesystem
-(5) mount point: mount point relative to the process's root
-(6) mount options: per mount options
-(7) optional fields: zero or more fields of the form "tag[:value]"
-(8) separator: marks the end of the optional fields
-(9) filesystem type: name of filesystem of the form "type[.subtype]"
-(10) mount source: filesystem specific information or "none"
-(11) super options: per super block options
+ (1) mount ID: unique identifier of the mount (may be reused after umount)
+ (2) parent ID: ID of parent (or of self for the top of the mount tree)
+ (3) major:minor: value of st_dev for files on filesystem
+ (4) root: root of the mount within the filesystem
+ (5) mount point: mount point relative to the process's root
+ (6) mount options: per mount options
+ (7) optional fields: zero or more fields of the form "tag[:value]"
+ (8) separator: marks the end of the optional fields
+ (9) filesystem type: name of filesystem of the form "type[.subtype]"
+ (10) mount source: filesystem specific information or "none"
+ (11) super options: per super block options
Parsers should ignore all unrecognised optional fields. Currently the
possible optional fields are:
-shared:X mount is shared in peer group X
-master:X mount is slave to peer group X
-propagate_from:X mount is slave and receives propagation from peer group X (*)
-unbindable mount is unbindable
+================ ==============================================================
+shared:X mount is shared in peer group X
+master:X mount is slave to peer group X
+propagate_from:X mount is slave and receives propagation from peer group X [#]_
+unbindable mount is unbindable
+================ ==============================================================
-(*) X is the closest dominant peer group under the process's root. If
-X is the immediate master of the mount, or if there's no dominant peer
-group under the same root, then only the "master:X" field is present
-and not the "propagate_from:X" field.
+.. [#] X is the closest dominant peer group under the process's root. If
+ X is the immediate master of the mount, or if there's no dominant peer
+ group under the same root, then only the "master:X" field is present
+ and not the "propagate_from:X" field.
For more information on mount propagation see:
@@ -1804,77 +1909,86 @@
the file system containing the opened file [see 3.5 /proc/<pid>/mountinfo
for details].
-A typical output is
+A typical output is::
pos: 0
flags: 0100002
mnt_id: 19
-All locks associated with a file descriptor are shown in its fdinfo too.
+All locks associated with a file descriptor are shown in its fdinfo too::
-lock: 1: FLOCK ADVISORY WRITE 359 00:13:11691 0 EOF
+ lock: 1: FLOCK ADVISORY WRITE 359 00:13:11691 0 EOF
The files such as eventfd, fsnotify, signalfd, epoll among the regular pos/flags
pair provide additional information particular to the objects they represent.
- Eventfd files
- ~~~~~~~~~~~~~
+Eventfd files
+~~~~~~~~~~~~~
+
+::
+
pos: 0
flags: 04002
mnt_id: 9
eventfd-count: 5a
- where 'eventfd-count' is hex value of a counter.
+where 'eventfd-count' is hex value of a counter.
- Signalfd files
- ~~~~~~~~~~~~~~
+Signalfd files
+~~~~~~~~~~~~~~
+
+::
+
pos: 0
flags: 04002
mnt_id: 9
sigmask: 0000000000000200
- where 'sigmask' is hex value of the signal mask associated
- with a file.
+where 'sigmask' is hex value of the signal mask associated
+with a file.
- Epoll files
- ~~~~~~~~~~~
+Epoll files
+~~~~~~~~~~~
+
+::
+
pos: 0
flags: 02
mnt_id: 9
tfd: 5 events: 1d data: ffffffffffffffff pos:0 ino:61af sdev:7
- where 'tfd' is a target file descriptor number in decimal form,
- 'events' is events mask being watched and the 'data' is data
- associated with a target [see epoll(7) for more details].
+where 'tfd' is a target file descriptor number in decimal form,
+'events' is events mask being watched and the 'data' is data
+associated with a target [see epoll(7) for more details].
- The 'pos' is current offset of the target file in decimal form
- [see lseek(2)], 'ino' and 'sdev' are inode and device numbers
- where target file resides, all in hex format.
+The 'pos' is current offset of the target file in decimal form
+[see lseek(2)], 'ino' and 'sdev' are inode and device numbers
+where target file resides, all in hex format.
- Fsnotify files
- ~~~~~~~~~~~~~~
- For inotify files the format is the following
+Fsnotify files
+~~~~~~~~~~~~~~
+For inotify files the format is the following::
pos: 0
flags: 02000000
inotify wd:3 ino:9e7e sdev:800013 mask:800afce ignored_mask:0 fhandle-bytes:8 fhandle-type:1 f_handle:7e9e0000640d1b6d
- where 'wd' is a watch descriptor in decimal form, ie a target file
- descriptor number, 'ino' and 'sdev' are inode and device where the
- target file resides and the 'mask' is the mask of events, all in hex
- form [see inotify(7) for more details].
+where 'wd' is a watch descriptor in decimal form, ie a target file
+descriptor number, 'ino' and 'sdev' are inode and device where the
+target file resides and the 'mask' is the mask of events, all in hex
+form [see inotify(7) for more details].
- If the kernel was built with exportfs support, the path to the target
- file is encoded as a file handle. The file handle is provided by three
- fields 'fhandle-bytes', 'fhandle-type' and 'f_handle', all in hex
- format.
+If the kernel was built with exportfs support, the path to the target
+file is encoded as a file handle. The file handle is provided by three
+fields 'fhandle-bytes', 'fhandle-type' and 'f_handle', all in hex
+format.
- If the kernel is built without exportfs support the file handle won't be
- printed out.
+If the kernel is built without exportfs support the file handle won't be
+printed out.
- If there is no inotify mark attached yet the 'inotify' line will be omitted.
+If there is no inotify mark attached yet the 'inotify' line will be omitted.
- For fanotify files the format is
+For fanotify files the format is::
pos: 0
flags: 02
@@ -1883,20 +1997,22 @@
fanotify mnt_id:12 mflags:40 mask:38 ignored_mask:40000003
fanotify ino:4f969 sdev:800013 mflags:0 mask:3b ignored_mask:40000000 fhandle-bytes:8 fhandle-type:1 f_handle:69f90400c275b5b4
- where fanotify 'flags' and 'event-flags' are values used in fanotify_init
- call, 'mnt_id' is the mount point identifier, 'mflags' is the value of
- flags associated with mark which are tracked separately from events
- mask. 'ino', 'sdev' are target inode and device, 'mask' is the events
- mask and 'ignored_mask' is the mask of events which are to be ignored.
- All in hex format. Incorporation of 'mflags', 'mask' and 'ignored_mask'
- does provide information about flags and mask used in fanotify_mark
- call [see fsnotify manpage for details].
+where fanotify 'flags' and 'event-flags' are values used in fanotify_init
+call, 'mnt_id' is the mount point identifier, 'mflags' is the value of
+flags associated with mark which are tracked separately from events
+mask. 'ino', 'sdev' are target inode and device, 'mask' is the events
+mask and 'ignored_mask' is the mask of events which are to be ignored.
+All in hex format. Incorporation of 'mflags', 'mask' and 'ignored_mask'
+does provide information about flags and mask used in fanotify_mark
+call [see fsnotify manpage for details].
- While the first three lines are mandatory and always printed, the rest is
- optional and may be omitted if no marks created yet.
+While the first three lines are mandatory and always printed, the rest is
+optional and may be omitted if no marks created yet.
- Timerfd files
- ~~~~~~~~~~~~~
+Timerfd files
+~~~~~~~~~~~~~
+
+::
pos: 0
flags: 02
@@ -1907,18 +2023,18 @@
it_value: (0, 49406829)
it_interval: (1, 0)
- where 'clockid' is the clock type and 'ticks' is the number of the timer expirations
- that have occurred [see timerfd_create(2) for details]. 'settime flags' are
- flags in octal form been used to setup the timer [see timerfd_settime(2) for
- details]. 'it_value' is remaining time until the timer exiration.
- 'it_interval' is the interval for the timer. Note the timer might be set up
- with TIMER_ABSTIME option which will be shown in 'settime flags', but 'it_value'
- still exhibits timer's remaining time.
+where 'clockid' is the clock type and 'ticks' is the number of the timer expirations
+that have occurred [see timerfd_create(2) for details]. 'settime flags' are
+flags in octal form been used to setup the timer [see timerfd_settime(2) for
+details]. 'it_value' is remaining time until the timer exiration.
+'it_interval' is the interval for the timer. Note the timer might be set up
+with TIMER_ABSTIME option which will be shown in 'settime flags', but 'it_value'
+still exhibits timer's remaining time.
3.9 /proc/<pid>/map_files - Information about memory mapped files
---------------------------------------------------------------------
This directory contains symbolic links which represent memory mapped files
-the process is maintaining. Example output:
+the process is maintaining. Example output::
| lr-------- 1 root root 64 Jan 27 11:24 333c600000-333c620000 -> /usr/lib64/ld-2.18.so
| lr-------- 1 root root 64 Jan 27 11:24 333c81f000-333c820000 -> /usr/lib64/ld-2.18.so
@@ -1976,17 +2092,22 @@
architecture specific status of the task.
Example
--------
+~~~~~~~
+
+::
+
$ cat /proc/6753/arch_status
AVX512_elapsed_ms: 8
Description
------------
+~~~~~~~~~~~
x86 specific entries:
----------------------
- AVX512_elapsed_ms:
- ------------------
+~~~~~~~~~~~~~~~~~~~~~
+
+AVX512_elapsed_ms:
+^^^^^^^^^^^^^^^^^^
+
If AVX512 is supported on the machine, this entry shows the milliseconds
elapsed since the last time AVX512 usage was recorded. The recording
happens on a best effort basis when a task is scheduled out. This means
@@ -2010,17 +2131,18 @@
the task is unlikely an AVX512 user, but depends on the workload and the
scheduling scenario, it also could be a false negative mentioned above.
-------------------------------------------------------------------------------
Configuring procfs
-------------------------------------------------------------------------------
+------------------
4.1 Mount options
---------------------
The following mount options are supported:
+ ========= ========================================================
hidepid= Set /proc/<pid>/ access mode.
gid= Set the group authorized to learn processes information.
+ ========= ========================================================
hidepid=0 means classic mode - everybody may access all /proc/<pid>/ directories
(default).
diff --git a/Documentation/filesystems/qnx6.txt b/Documentation/filesystems/qnx6.rst
similarity index 98%
rename from Documentation/filesystems/qnx6.txt
rename to Documentation/filesystems/qnx6.rst
index 48ea68f..b713083 100644
--- a/Documentation/filesystems/qnx6.txt
+++ b/Documentation/filesystems/qnx6.rst
@@ -1,3 +1,6 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===================
The QNX6 Filesystem
===================
@@ -14,10 +17,12 @@
qnx6fs shares many properties with traditional Unix filesystems. It has the
concepts of blocks, inodes and directories.
+
On QNX it is possible to create little endian and big endian qnx6 filesystems.
This feature makes it possible to create and use a different endianness fs
for the target (QNX is used on quite a range of embedded systems) platform
running on a different endianness.
+
The Linux driver handles endianness transparently. (LE and BE)
Blocks
@@ -26,6 +31,7 @@
The space in the device or file is split up into blocks. These are a fixed
size of 512, 1024, 2048 or 4096, which is decided when the filesystem is
created.
+
Blockpointers are 32bit, so the maximum space that can be addressed is
2^32 * 4096 bytes or 16TB
@@ -50,6 +56,7 @@
data and the addressing levels in that specific tree.
If the level value is 0, up to 16 direct blocks can be addressed by each
node.
+
Level 1 adds an additional indirect addressing level where each indirect
addressing block holds up to blocksize / 4 bytes pointers to data blocks.
Level 2 adds an additional indirect addressing block level (so, already up
@@ -57,11 +64,13 @@
Unused block pointers are always set to ~0 - regardless of root node,
indirect addressing blocks or inodes.
+
Data leaves are always on the lowest level. So no data is stored on upper
tree levels.
The first Superblock is located at 0x2000. (0x2000 is the bootblock size)
The Audi MMI 3G first superblock directly starts at byte 0.
+
Second superblock position can either be calculated from the superblock
information (total number of filesystem blocks) or by taking the highest
device address, zeroing the last 3 bytes and then subtracting 0x1000 from
@@ -84,6 +93,7 @@
There are also pointers to the first 16 blocks, if the object data can be
addressed with 16 direct blocks.
+
For more than 16 blocks an indirect addressing in form of another tree is
used. (scheme is the same as the one used for the superblock root nodes)
@@ -96,13 +106,18 @@
A directory is a filesystem object and has an inode just like a file.
It is a specially formatted file containing records which associate each
name with an inode number.
+
'.' inode number points to the directory inode
+
'..' inode number points to the parent directory inode
+
Eeach filename record additionally got a filename length field.
One special case are long filenames or subdirectory names.
+
These got set a filename length field of 0xff in the corresponding directory
record plus the longfile inode number also stored in that record.
+
With that longfilename inode number, the longfilename tree can be walked
starting with the superblock longfilename root node pointers.
@@ -111,6 +126,7 @@
Symbolic links are also filesystem objects with inodes. They got a specific
bit in the inode mode field identifying them as symbolic link.
+
The directory entry file inode pointer points to the target file inode.
Hard links got an inode, a directory entry, but a specific mode bit set,
@@ -126,9 +142,11 @@
Long filenames are stored in a separate addressing tree. The staring point
is the longfilename root node in the active superblock.
+
Each data block (tree leaves) holds one long filename. That filename is
limited to 510 bytes. The first two starting bytes are used as length field
for the actual filename.
+
If that structure shall fit for all allowed blocksizes, it is clear why there
is a limit of 510 bytes for the actual filename stored.
@@ -138,6 +156,7 @@
The qnx6fs filesystem allocation bitmap is stored in a tree under bitmap
root node in the superblock and each bit in the bitmap represents one
filesystem block.
+
The first block is block 0, which starts 0x1000 after superblock start.
So for a normal qnx6fs 0x3000 (bootblock + superblock) is the physical
address at which block 0 is located.
@@ -149,11 +168,14 @@
------------------
The bitmap itself is divided into three parts.
+
First the system area, that is split into two halves.
+
Then userspace.
The requirement for a static, fixed preallocated system area comes from how
qnx6fs deals with writes.
+
Each superblock got it's own half of the system area. So superblock #1
always uses blocks from the lower half while superblock #2 just writes to
blocks represented by the upper half bitmap system area bits.
diff --git a/Documentation/filesystems/ramfs-rootfs-initramfs.txt b/Documentation/filesystems/ramfs-rootfs-initramfs.rst
similarity index 90%
rename from Documentation/filesystems/ramfs-rootfs-initramfs.txt
rename to Documentation/filesystems/ramfs-rootfs-initramfs.rst
index 97d42cc..6c576e2 100644
--- a/Documentation/filesystems/ramfs-rootfs-initramfs.txt
+++ b/Documentation/filesystems/ramfs-rootfs-initramfs.rst
@@ -1,5 +1,11 @@
-ramfs, rootfs and initramfs
+.. SPDX-License-Identifier: GPL-2.0
+
+===========================
+Ramfs, rootfs and initramfs
+===========================
+
October 17, 2005
+
Rob Landley <rob@landley.net>
=============================
@@ -99,14 +105,14 @@
All this differs from the old initrd in several ways:
- The old initrd was always a separate file, while the initramfs archive is
- linked into the linux kernel image. (The directory linux-*/usr is devoted
- to generating this archive during the build.)
+ linked into the linux kernel image. (The directory ``linux-*/usr`` is
+ devoted to generating this archive during the build.)
- The old initrd file was a gzipped filesystem image (in some file format,
such as ext2, that needed a driver built into the kernel), while the new
initramfs archive is a gzipped cpio archive (like tar only simpler,
- see cpio(1) and Documentation/driver-api/early-userspace/buffer-format.rst). The
- kernel's cpio extraction code is not only extremely small, it's also
+ see cpio(1) and Documentation/driver-api/early-userspace/buffer-format.rst).
+ The kernel's cpio extraction code is not only extremely small, it's also
__init text and data that can be discarded during the boot process.
- The program run by the old initrd (which was called /initrd, not /init) did
@@ -139,7 +145,7 @@
initramfs archive, which will automatically be incorporated into the
resulting binary. This option can point to an existing gzipped cpio
archive, a directory containing files to be archived, or a text file
-specification such as the following example:
+specification such as the following example::
dir /dev 755 0 0
nod /dev/console 644 0 0 c 5 1
@@ -175,12 +181,12 @@
(instead of a config file or directory).
The following command line can extract a cpio image (either by the above script
-or by the kernel build) back into its component files:
+or by the kernel build) back into its component files::
cpio -i -d -H newc -F initramfs_data.cpio --no-absolute-filenames
The following shell script can create a prebuilt cpio archive you can
-use in place of the above config file:
+use in place of the above config file::
#!/bin/sh
@@ -202,14 +208,17 @@
exit 1
fi
-Note: The cpio man page contains some bad advice that will break your initramfs
-archive if you follow it. It says "A typical way to generate the list
-of filenames is with the find command; you should give find the -depth option
-to minimize problems with permissions on directories that are unwritable or not
-searchable." Don't do this when creating initramfs.cpio.gz images, it won't
-work. The Linux kernel cpio extractor won't create files in a directory that
-doesn't exist, so the directory entries must go before the files that go in
-those directories. The above script gets them in the right order.
+.. Note::
+
+ The cpio man page contains some bad advice that will break your initramfs
+ archive if you follow it. It says "A typical way to generate the list
+ of filenames is with the find command; you should give find the -depth
+ option to minimize problems with permissions on directories that are
+ unwritable or not searchable." Don't do this when creating
+ initramfs.cpio.gz images, it won't work. The Linux kernel cpio extractor
+ won't create files in a directory that doesn't exist, so the directory
+ entries must go before the files that go in those directories.
+ The above script gets them in the right order.
External initramfs images:
--------------------------
@@ -236,9 +245,10 @@
If you don't already understand what shared libraries, devices, and paths
you need to get a minimal root filesystem up and running, here are some
references:
-http://www.tldp.org/HOWTO/Bootdisk-HOWTO/
-http://www.tldp.org/HOWTO/From-PowerUp-To-Bash-Prompt-HOWTO.html
-http://www.linuxfromscratch.org/lfs/view/stable/
+
+- http://www.tldp.org/HOWTO/Bootdisk-HOWTO/
+- http://www.tldp.org/HOWTO/From-PowerUp-To-Bash-Prompt-HOWTO.html
+- http://www.linuxfromscratch.org/lfs/view/stable/
The "klibc" package (http://www.kernel.org/pub/linux/libs/klibc) is
designed to be a tiny C library to statically link early userspace
@@ -255,7 +265,7 @@
A good first step is to get initramfs to run a statically linked "hello world"
program as init, and test it under an emulator like qemu (www.qemu.org) or
-User Mode Linux, like so:
+User Mode Linux, like so::
cat > hello.c << EOF
#include <stdio.h>
@@ -326,8 +336,8 @@
explained his reasoning:
- http://www.uwsg.iu.edu/hypermail/linux/kernel/0112.2/1550.html
- http://www.uwsg.iu.edu/hypermail/linux/kernel/0112.2/1638.html
+ - http://www.uwsg.iu.edu/hypermail/linux/kernel/0112.2/1550.html
+ - http://www.uwsg.iu.edu/hypermail/linux/kernel/0112.2/1638.html
and, most importantly, designed and implemented the initramfs code.
diff --git a/Documentation/filesystems/relay.txt b/Documentation/filesystems/relay.rst
similarity index 90%
rename from Documentation/filesystems/relay.txt
rename to Documentation/filesystems/relay.rst
index cd709a9..04ad083 100644
--- a/Documentation/filesystems/relay.txt
+++ b/Documentation/filesystems/relay.rst
@@ -1,3 +1,6 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==================================
relay interface (formerly relayfs)
==================================
@@ -108,6 +111,7 @@
access to relay channel buffer data. Here are the file operations
that are available and some comments regarding their behavior:
+=========== ============================================================
open() enables user to open an _existing_ channel buffer.
mmap() results in channel buffer being mapped into the caller's
@@ -136,13 +140,16 @@
close() decrements the channel buffer's refcount. When the refcount
reaches 0, i.e. when no process or kernel client has the
buffer open, the channel buffer is freed.
+=========== ============================================================
In order for a user application to make use of relay files, the
-host filesystem must be mounted. For example,
+host filesystem must be mounted. For example::
mount -t debugfs debugfs /sys/kernel/debug
-NOTE: the host filesystem doesn't need to be mounted for kernel
+.. Note::
+
+ the host filesystem doesn't need to be mounted for kernel
clients to create or use channels - it only needs to be
mounted when user space applications need access to the buffer
data.
@@ -154,7 +161,7 @@
Here's a summary of the API the relay interface provides to in-kernel clients:
TBD(curr. line MT:/API/)
- channel management functions:
+ channel management functions::
relay_open(base_filename, parent, subbuf_size, n_subbufs,
callbacks, private_data)
@@ -162,17 +169,17 @@
relay_flush(chan)
relay_reset(chan)
- channel management typically called on instigation of userspace:
+ channel management typically called on instigation of userspace::
relay_subbufs_consumed(chan, cpu, subbufs_consumed)
- write functions:
+ write functions::
relay_write(chan, data, length)
__relay_write(chan, data, length)
relay_reserve(chan, length)
- callbacks:
+ callbacks::
subbuf_start(buf, subbuf, prev_subbuf, prev_padding)
buf_mapped(buf, filp)
@@ -180,7 +187,7 @@
create_buf_file(filename, parent, mode, buf, is_global)
remove_buf_file(dentry)
- helper functions:
+ helper functions::
relay_buf_full(buf)
subbuf_start_reserve(buf, length)
@@ -215,41 +222,41 @@
relay_close().
Here are some typical definitions for these callbacks, in this case
-using debugfs:
+using debugfs::
-/*
- * create_buf_file() callback. Creates relay file in debugfs.
- */
-static struct dentry *create_buf_file_handler(const char *filename,
- struct dentry *parent,
- umode_t mode,
- struct rchan_buf *buf,
- int *is_global)
-{
- return debugfs_create_file(filename, mode, parent, buf,
- &relay_file_operations);
-}
+ /*
+ * create_buf_file() callback. Creates relay file in debugfs.
+ */
+ static struct dentry *create_buf_file_handler(const char *filename,
+ struct dentry *parent,
+ umode_t mode,
+ struct rchan_buf *buf,
+ int *is_global)
+ {
+ return debugfs_create_file(filename, mode, parent, buf,
+ &relay_file_operations);
+ }
-/*
- * remove_buf_file() callback. Removes relay file from debugfs.
- */
-static int remove_buf_file_handler(struct dentry *dentry)
-{
- debugfs_remove(dentry);
+ /*
+ * remove_buf_file() callback. Removes relay file from debugfs.
+ */
+ static int remove_buf_file_handler(struct dentry *dentry)
+ {
+ debugfs_remove(dentry);
- return 0;
-}
+ return 0;
+ }
-/*
- * relay interface callbacks
- */
-static struct rchan_callbacks relay_callbacks =
-{
- .create_buf_file = create_buf_file_handler,
- .remove_buf_file = remove_buf_file_handler,
-};
+ /*
+ * relay interface callbacks
+ */
+ static struct rchan_callbacks relay_callbacks =
+ {
+ .create_buf_file = create_buf_file_handler,
+ .remove_buf_file = remove_buf_file_handler,
+ };
-And an example relay_open() invocation using them:
+And an example relay_open() invocation using them::
chan = relay_open("cpu", NULL, SUBBUF_SIZE, N_SUBBUFS, &relay_callbacks, NULL);
@@ -339,23 +346,23 @@
To implement 'no-overwrite' mode, the userspace client would provide
an implementation of the subbuf_start() callback something like the
-following:
+following::
-static int subbuf_start(struct rchan_buf *buf,
- void *subbuf,
- void *prev_subbuf,
- unsigned int prev_padding)
-{
- if (prev_subbuf)
- *((unsigned *)prev_subbuf) = prev_padding;
+ static int subbuf_start(struct rchan_buf *buf,
+ void *subbuf,
+ void *prev_subbuf,
+ unsigned int prev_padding)
+ {
+ if (prev_subbuf)
+ *((unsigned *)prev_subbuf) = prev_padding;
- if (relay_buf_full(buf))
- return 0;
+ if (relay_buf_full(buf))
+ return 0;
- subbuf_start_reserve(buf, sizeof(unsigned int));
+ subbuf_start_reserve(buf, sizeof(unsigned int));
- return 1;
-}
+ return 1;
+ }
If the current buffer is full, i.e. all sub-buffers remain unconsumed,
the callback returns 0 to indicate that the buffer switch should not
@@ -370,20 +377,20 @@
buffer switch can continue.
The implementation of the subbuf_start() callback for 'overwrite' mode
-would be very similar:
+would be very similar::
-static int subbuf_start(struct rchan_buf *buf,
- void *subbuf,
- void *prev_subbuf,
- size_t prev_padding)
-{
- if (prev_subbuf)
- *((unsigned *)prev_subbuf) = prev_padding;
+ static int subbuf_start(struct rchan_buf *buf,
+ void *subbuf,
+ void *prev_subbuf,
+ size_t prev_padding)
+ {
+ if (prev_subbuf)
+ *((unsigned *)prev_subbuf) = prev_padding;
- subbuf_start_reserve(buf, sizeof(unsigned int));
+ subbuf_start_reserve(buf, sizeof(unsigned int));
- return 1;
-}
+ return 1;
+ }
In this case, the relay_buf_full() check is meaningless and the
callback always returns 1, causing the buffer switch to occur
diff --git a/Documentation/filesystems/romfs.txt b/Documentation/filesystems/romfs.rst
similarity index 86%
rename from Documentation/filesystems/romfs.txt
rename to Documentation/filesystems/romfs.rst
index e2b07cc..465b11e 100644
--- a/Documentation/filesystems/romfs.txt
+++ b/Documentation/filesystems/romfs.rst
@@ -1,4 +1,8 @@
-ROMFS - ROM FILE SYSTEM
+.. SPDX-License-Identifier: GPL-2.0
+
+=======================
+ROMFS - ROM File System
+=======================
This is a quite dumb, read only filesystem, mainly for initial RAM
disks of installation disks. It has grown up by the need of having
@@ -51,9 +55,9 @@
bytes. This is quite rare however, since most file names are longer
than 3 bytes, and shorter than 15 bytes.
-The layout of the filesystem is the following:
+The layout of the filesystem is the following::
-offset content
+ offset content
+---+---+---+---+
0 | - | r | o | m | \
@@ -84,9 +88,9 @@
reliable, it does not require any tables, and it is very simple.
The following bytes are now part of the file system; each file header
-must begin on a 16 byte boundary.
+must begin on a 16 byte boundary::
-offset content
+ offset content
+---+---+---+---+
0 | next filehdr|X| The offset of the next file header
@@ -114,7 +118,9 @@
intended use. The mapping of the 8 possible values to file types is
the following:
+== =============== ============================================
mapping spec.info means
+== =============== ============================================
0 hard link link destination [file header]
1 directory first file's header
2 regular file unused, must be zero [MBZ]
@@ -123,6 +129,7 @@
5 char device - " -
6 socket unused, MBZ
7 fifo unused, MBZ
+== =============== ============================================
Note that hard links are specifically marked in this filesystem, but
they will behave as you can expect (i.e. share the inode number).
@@ -158,24 +165,24 @@
Pending issues:
- Permissions and owner information are pretty essential features of a
-Un*x like system, but romfs does not provide the full possibilities.
-I have never found this limiting, but others might.
+ Un*x like system, but romfs does not provide the full possibilities.
+ I have never found this limiting, but others might.
- The file system is read only, so it can be very small, but in case
-one would want to write _anything_ to a file system, he still needs
-a writable file system, thus negating the size advantages. Possible
-solutions: implement write access as a compile-time option, or a new,
-similarly small writable filesystem for RAM disks.
+ one would want to write _anything_ to a file system, he still needs
+ a writable file system, thus negating the size advantages. Possible
+ solutions: implement write access as a compile-time option, or a new,
+ similarly small writable filesystem for RAM disks.
- Since the files are only required to have alignment on a 16 byte
-boundary, it is currently possibly suboptimal to read or execute files
-from the filesystem. It might be resolved by reordering file data to
-have most of it (i.e. except the start and the end) laying at "natural"
-boundaries, thus it would be possible to directly map a big portion of
-the file contents to the mm subsystem.
+ boundary, it is currently possibly suboptimal to read or execute files
+ from the filesystem. It might be resolved by reordering file data to
+ have most of it (i.e. except the start and the end) laying at "natural"
+ boundaries, thus it would be possible to directly map a big portion of
+ the file contents to the mm subsystem.
- Compression might be an useful feature, but memory is quite a
-limiting factor in my eyes.
+ limiting factor in my eyes.
- Where it is used?
@@ -183,4 +190,5 @@
Have fun,
+
Janos Farkas <chexum@shadow.banki.hu>
diff --git a/Documentation/filesystems/squashfs.txt b/Documentation/filesystems/squashfs.rst
similarity index 90%
rename from Documentation/filesystems/squashfs.txt
rename to Documentation/filesystems/squashfs.rst
index e5274f8..df42106 100644
--- a/Documentation/filesystems/squashfs.txt
+++ b/Documentation/filesystems/squashfs.rst
@@ -1,7 +1,11 @@
-SQUASHFS 4.0 FILESYSTEM
+.. SPDX-License-Identifier: GPL-2.0
+
+=======================
+Squashfs 4.0 Filesystem
=======================
Squashfs is a compressed read-only filesystem for Linux.
+
It uses zlib, lz4, lzo, or xz compression to compress files, inodes and
directories. Inodes in the system are very small and all blocks are packed to
minimise data overhead. Block sizes greater than 4K are supported up to a
@@ -15,31 +19,33 @@
Mailing list: squashfs-devel@lists.sourceforge.net
Web site: www.squashfs.org
-1. FILESYSTEM FEATURES
+1. Filesystem Features
----------------------
Squashfs filesystem features versus Cramfs:
+============================== ========= ==========
Squashfs Cramfs
-
-Max filesystem size: 2^64 256 MiB
-Max file size: ~ 2 TiB 16 MiB
-Max files: unlimited unlimited
-Max directories: unlimited unlimited
-Max entries per directory: unlimited unlimited
-Max block size: 1 MiB 4 KiB
-Metadata compression: yes no
-Directory indexes: yes no
-Sparse file support: yes no
-Tail-end packing (fragments): yes no
-Exportable (NFS etc.): yes no
-Hard link support: yes no
-"." and ".." in readdir: yes no
-Real inode numbers: yes no
-32-bit uids/gids: yes no
-File creation time: yes no
-Xattr support: yes no
-ACL support: no no
+============================== ========= ==========
+Max filesystem size 2^64 256 MiB
+Max file size ~ 2 TiB 16 MiB
+Max files unlimited unlimited
+Max directories unlimited unlimited
+Max entries per directory unlimited unlimited
+Max block size 1 MiB 4 KiB
+Metadata compression yes no
+Directory indexes yes no
+Sparse file support yes no
+Tail-end packing (fragments) yes no
+Exportable (NFS etc.) yes no
+Hard link support yes no
+"." and ".." in readdir yes no
+Real inode numbers yes no
+32-bit uids/gids yes no
+File creation time yes no
+Xattr support yes no
+ACL support no no
+============================== ========= ==========
Squashfs compresses data, inodes and directories. In addition, inode and
directory data are highly compacted, and packed on byte boundaries. Each
@@ -47,7 +53,7 @@
file type, i.e. regular file, directory, symbolic link, and block/char device
inodes have different sizes).
-2. USING SQUASHFS
+2. Using Squashfs
-----------------
As squashfs is a read-only filesystem, the mksquashfs program must be used to
@@ -58,11 +64,11 @@
The squashfs-tools development tree is now located on kernel.org
git://git.kernel.org/pub/scm/fs/squashfs/squashfs-tools.git
-3. SQUASHFS FILESYSTEM DESIGN
+3. Squashfs Filesystem Design
-----------------------------
A squashfs filesystem consists of a maximum of nine parts, packed together on a
-byte alignment:
+byte alignment::
---------------
| superblock |
@@ -229,15 +235,15 @@
is stored. This xattr id is mapped into the location of the xattr
list using a second xattr id lookup table.
-4. TODOS AND OUTSTANDING ISSUES
+4. TODOs and Outstanding Issues
-------------------------------
-4.1 Todo list
+4.1 TODO list
-------------
Implement ACL support.
-4.2 Squashfs internal cache
+4.2 Squashfs Internal Cache
---------------------------
Blocks in Squashfs are compressed. To avoid repeatedly decompressing
diff --git a/Documentation/filesystems/sysfs.rst b/Documentation/filesystems/sysfs.rst
new file mode 100644
index 0000000..290891c
--- /dev/null
+++ b/Documentation/filesystems/sysfs.rst
@@ -0,0 +1,418 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=====================================================
+sysfs - _The_ filesystem for exporting kernel objects
+=====================================================
+
+Patrick Mochel <mochel@osdl.org>
+
+Mike Murphy <mamurph@cs.clemson.edu>
+
+:Revised: 16 August 2011
+:Original: 10 January 2003
+
+
+What it is:
+~~~~~~~~~~~
+
+sysfs is a ram-based filesystem initially based on ramfs. It provides
+a means to export kernel data structures, their attributes, and the
+linkages between them to userspace.
+
+sysfs is tied inherently to the kobject infrastructure. Please read
+Documentation/kobject.txt for more information concerning the kobject
+interface.
+
+
+Using sysfs
+~~~~~~~~~~~
+
+sysfs is always compiled in if CONFIG_SYSFS is defined. You can access
+it by doing::
+
+ mount -t sysfs sysfs /sys
+
+
+Directory Creation
+~~~~~~~~~~~~~~~~~~
+
+For every kobject that is registered with the system, a directory is
+created for it in sysfs. That directory is created as a subdirectory
+of the kobject's parent, expressing internal object hierarchies to
+userspace. Top-level directories in sysfs represent the common
+ancestors of object hierarchies; i.e. the subsystems the objects
+belong to.
+
+Sysfs internally stores a pointer to the kobject that implements a
+directory in the kernfs_node object associated with the directory. In
+the past this kobject pointer has been used by sysfs to do reference
+counting directly on the kobject whenever the file is opened or closed.
+With the current sysfs implementation the kobject reference count is
+only modified directly by the function sysfs_schedule_callback().
+
+
+Attributes
+~~~~~~~~~~
+
+Attributes can be exported for kobjects in the form of regular files in
+the filesystem. Sysfs forwards file I/O operations to methods defined
+for the attributes, providing a means to read and write kernel
+attributes.
+
+Attributes should be ASCII text files, preferably with only one value
+per file. It is noted that it may not be efficient to contain only one
+value per file, so it is socially acceptable to express an array of
+values of the same type.
+
+Mixing types, expressing multiple lines of data, and doing fancy
+formatting of data is heavily frowned upon. Doing these things may get
+you publicly humiliated and your code rewritten without notice.
+
+
+An attribute definition is simply::
+
+ struct attribute {
+ char * name;
+ struct module *owner;
+ umode_t mode;
+ };
+
+
+ int sysfs_create_file(struct kobject * kobj, const struct attribute * attr);
+ void sysfs_remove_file(struct kobject * kobj, const struct attribute * attr);
+
+
+A bare attribute contains no means to read or write the value of the
+attribute. Subsystems are encouraged to define their own attribute
+structure and wrapper functions for adding and removing attributes for
+a specific object type.
+
+For example, the driver model defines struct device_attribute like::
+
+ struct device_attribute {
+ struct attribute attr;
+ ssize_t (*show)(struct device *dev, struct device_attribute *attr,
+ char *buf);
+ ssize_t (*store)(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count);
+ };
+
+ int device_create_file(struct device *, const struct device_attribute *);
+ void device_remove_file(struct device *, const struct device_attribute *);
+
+It also defines this helper for defining device attributes::
+
+ #define DEVICE_ATTR(_name, _mode, _show, _store) \
+ struct device_attribute dev_attr_##_name = __ATTR(_name, _mode, _show, _store)
+
+For example, declaring::
+
+ static DEVICE_ATTR(foo, S_IWUSR | S_IRUGO, show_foo, store_foo);
+
+is equivalent to doing::
+
+ static struct device_attribute dev_attr_foo = {
+ .attr = {
+ .name = "foo",
+ .mode = S_IWUSR | S_IRUGO,
+ },
+ .show = show_foo,
+ .store = store_foo,
+ };
+
+Note as stated in include/linux/kernel.h "OTHER_WRITABLE? Generally
+considered a bad idea." so trying to set a sysfs file writable for
+everyone will fail reverting to RO mode for "Others".
+
+For the common cases sysfs.h provides convenience macros to make
+defining attributes easier as well as making code more concise and
+readable. The above case could be shortened to:
+
+static struct device_attribute dev_attr_foo = __ATTR_RW(foo);
+
+the list of helpers available to define your wrapper function is:
+
+__ATTR_RO(name):
+ assumes default name_show and mode 0444
+__ATTR_WO(name):
+ assumes a name_store only and is restricted to mode
+ 0200 that is root write access only.
+__ATTR_RO_MODE(name, mode):
+ fore more restrictive RO access currently
+ only use case is the EFI System Resource Table
+ (see drivers/firmware/efi/esrt.c)
+__ATTR_RW(name):
+ assumes default name_show, name_store and setting
+ mode to 0644.
+__ATTR_NULL:
+ which sets the name to NULL and is used as end of list
+ indicator (see: kernel/workqueue.c)
+
+Subsystem-Specific Callbacks
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+When a subsystem defines a new attribute type, it must implement a
+set of sysfs operations for forwarding read and write calls to the
+show and store methods of the attribute owners::
+
+ struct sysfs_ops {
+ ssize_t (*show)(struct kobject *, struct attribute *, char *);
+ ssize_t (*store)(struct kobject *, struct attribute *, const char *, size_t);
+ };
+
+[ Subsystems should have already defined a struct kobj_type as a
+descriptor for this type, which is where the sysfs_ops pointer is
+stored. See the kobject documentation for more information. ]
+
+When a file is read or written, sysfs calls the appropriate method
+for the type. The method then translates the generic struct kobject
+and struct attribute pointers to the appropriate pointer types, and
+calls the associated methods.
+
+
+To illustrate::
+
+ #define to_dev(obj) container_of(obj, struct device, kobj)
+ #define to_dev_attr(_attr) container_of(_attr, struct device_attribute, attr)
+
+ static ssize_t dev_attr_show(struct kobject *kobj, struct attribute *attr,
+ char *buf)
+ {
+ struct device_attribute *dev_attr = to_dev_attr(attr);
+ struct device *dev = to_dev(kobj);
+ ssize_t ret = -EIO;
+
+ if (dev_attr->show)
+ ret = dev_attr->show(dev, dev_attr, buf);
+ if (ret >= (ssize_t)PAGE_SIZE) {
+ printk("dev_attr_show: %pS returned bad count\n",
+ dev_attr->show);
+ }
+ return ret;
+ }
+
+
+
+Reading/Writing Attribute Data
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+To read or write attributes, show() or store() methods must be
+specified when declaring the attribute. The method types should be as
+simple as those defined for device attributes::
+
+ ssize_t (*show)(struct device *dev, struct device_attribute *attr, char *buf);
+ ssize_t (*store)(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count);
+
+IOW, they should take only an object, an attribute, and a buffer as parameters.
+
+
+sysfs allocates a buffer of size (PAGE_SIZE) and passes it to the
+method. Sysfs will call the method exactly once for each read or
+write. This forces the following behavior on the method
+implementations:
+
+- On read(2), the show() method should fill the entire buffer.
+ Recall that an attribute should only be exporting one value, or an
+ array of similar values, so this shouldn't be that expensive.
+
+ This allows userspace to do partial reads and forward seeks
+ arbitrarily over the entire file at will. If userspace seeks back to
+ zero or does a pread(2) with an offset of '0' the show() method will
+ be called again, rearmed, to fill the buffer.
+
+- On write(2), sysfs expects the entire buffer to be passed during the
+ first write. Sysfs then passes the entire buffer to the store() method.
+ A terminating null is added after the data on stores. This makes
+ functions like sysfs_streq() safe to use.
+
+ When writing sysfs files, userspace processes should first read the
+ entire file, modify the values it wishes to change, then write the
+ entire buffer back.
+
+ Attribute method implementations should operate on an identical
+ buffer when reading and writing values.
+
+Other notes:
+
+- Writing causes the show() method to be rearmed regardless of current
+ file position.
+
+- The buffer will always be PAGE_SIZE bytes in length. On i386, this
+ is 4096.
+
+- show() methods should return the number of bytes printed into the
+ buffer. This is the return value of scnprintf().
+
+- show() must not use snprintf() when formatting the value to be
+ returned to user space. If you can guarantee that an overflow
+ will never happen you can use sprintf() otherwise you must use
+ scnprintf().
+
+- store() should return the number of bytes used from the buffer. If the
+ entire buffer has been used, just return the count argument.
+
+- show() or store() can always return errors. If a bad value comes
+ through, be sure to return an error.
+
+- The object passed to the methods will be pinned in memory via sysfs
+ referencing counting its embedded object. However, the physical
+ entity (e.g. device) the object represents may not be present. Be
+ sure to have a way to check this, if necessary.
+
+
+A very simple (and naive) implementation of a device attribute is::
+
+ static ssize_t show_name(struct device *dev, struct device_attribute *attr,
+ char *buf)
+ {
+ return scnprintf(buf, PAGE_SIZE, "%s\n", dev->name);
+ }
+
+ static ssize_t store_name(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
+ {
+ snprintf(dev->name, sizeof(dev->name), "%.*s",
+ (int)min(count, sizeof(dev->name) - 1), buf);
+ return count;
+ }
+
+ static DEVICE_ATTR(name, S_IRUGO, show_name, store_name);
+
+
+(Note that the real implementation doesn't allow userspace to set the
+name for a device.)
+
+
+Top Level Directory Layout
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The sysfs directory arrangement exposes the relationship of kernel
+data structures.
+
+The top level sysfs directory looks like::
+
+ block/
+ bus/
+ class/
+ dev/
+ devices/
+ firmware/
+ net/
+ fs/
+
+devices/ contains a filesystem representation of the device tree. It maps
+directly to the internal kernel device tree, which is a hierarchy of
+struct device.
+
+bus/ contains flat directory layout of the various bus types in the
+kernel. Each bus's directory contains two subdirectories::
+
+ devices/
+ drivers/
+
+devices/ contains symlinks for each device discovered in the system
+that point to the device's directory under root/.
+
+drivers/ contains a directory for each device driver that is loaded
+for devices on that particular bus (this assumes that drivers do not
+span multiple bus types).
+
+fs/ contains a directory for some filesystems. Currently each
+filesystem wanting to export attributes must create its own hierarchy
+below fs/ (see ./fuse.txt for an example).
+
+dev/ contains two directories char/ and block/. Inside these two
+directories there are symlinks named <major>:<minor>. These symlinks
+point to the sysfs directory for the given device. /sys/dev provides a
+quick way to lookup the sysfs interface for a device from the result of
+a stat(2) operation.
+
+More information can driver-model specific features can be found in
+Documentation/driver-api/driver-model/.
+
+
+TODO: Finish this section.
+
+
+Current Interfaces
+~~~~~~~~~~~~~~~~~~
+
+The following interface layers currently exist in sysfs:
+
+
+devices (include/linux/device.h)
+--------------------------------
+Structure::
+
+ struct device_attribute {
+ struct attribute attr;
+ ssize_t (*show)(struct device *dev, struct device_attribute *attr,
+ char *buf);
+ ssize_t (*store)(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count);
+ };
+
+Declaring::
+
+ DEVICE_ATTR(_name, _mode, _show, _store);
+
+Creation/Removal::
+
+ int device_create_file(struct device *dev, const struct device_attribute * attr);
+ void device_remove_file(struct device *dev, const struct device_attribute * attr);
+
+
+bus drivers (include/linux/device.h)
+------------------------------------
+Structure::
+
+ struct bus_attribute {
+ struct attribute attr;
+ ssize_t (*show)(struct bus_type *, char * buf);
+ ssize_t (*store)(struct bus_type *, const char * buf, size_t count);
+ };
+
+Declaring::
+
+ static BUS_ATTR_RW(name);
+ static BUS_ATTR_RO(name);
+ static BUS_ATTR_WO(name);
+
+Creation/Removal::
+
+ int bus_create_file(struct bus_type *, struct bus_attribute *);
+ void bus_remove_file(struct bus_type *, struct bus_attribute *);
+
+
+device drivers (include/linux/device.h)
+---------------------------------------
+
+Structure::
+
+ struct driver_attribute {
+ struct attribute attr;
+ ssize_t (*show)(struct device_driver *, char * buf);
+ ssize_t (*store)(struct device_driver *, const char * buf,
+ size_t count);
+ };
+
+Declaring::
+
+ DRIVER_ATTR_RO(_name)
+ DRIVER_ATTR_RW(_name)
+
+Creation/Removal::
+
+ int driver_create_file(struct device_driver *, const struct driver_attribute *);
+ void driver_remove_file(struct device_driver *, const struct driver_attribute *);
+
+
+Documentation
+~~~~~~~~~~~~~
+
+The sysfs directory structure and the attributes in each directory define an
+ABI between the kernel and user space. As for any ABI, it is important that
+this ABI is stable and properly documented. All new sysfs attributes must be
+documented in Documentation/ABI. See also Documentation/ABI/README for more
+information.
diff --git a/Documentation/filesystems/sysfs.txt b/Documentation/filesystems/sysfs.txt
deleted file mode 100644
index ddf15b1..0000000
--- a/Documentation/filesystems/sysfs.txt
+++ /dev/null
@@ -1,408 +0,0 @@
-
-sysfs - _The_ filesystem for exporting kernel objects.
-
-Patrick Mochel <mochel@osdl.org>
-Mike Murphy <mamurph@cs.clemson.edu>
-
-Revised: 16 August 2011
-Original: 10 January 2003
-
-
-What it is:
-~~~~~~~~~~~
-
-sysfs is a ram-based filesystem initially based on ramfs. It provides
-a means to export kernel data structures, their attributes, and the
-linkages between them to userspace.
-
-sysfs is tied inherently to the kobject infrastructure. Please read
-Documentation/kobject.txt for more information concerning the kobject
-interface.
-
-
-Using sysfs
-~~~~~~~~~~~
-
-sysfs is always compiled in if CONFIG_SYSFS is defined. You can access
-it by doing:
-
- mount -t sysfs sysfs /sys
-
-
-Directory Creation
-~~~~~~~~~~~~~~~~~~
-
-For every kobject that is registered with the system, a directory is
-created for it in sysfs. That directory is created as a subdirectory
-of the kobject's parent, expressing internal object hierarchies to
-userspace. Top-level directories in sysfs represent the common
-ancestors of object hierarchies; i.e. the subsystems the objects
-belong to.
-
-Sysfs internally stores a pointer to the kobject that implements a
-directory in the kernfs_node object associated with the directory. In
-the past this kobject pointer has been used by sysfs to do reference
-counting directly on the kobject whenever the file is opened or closed.
-With the current sysfs implementation the kobject reference count is
-only modified directly by the function sysfs_schedule_callback().
-
-
-Attributes
-~~~~~~~~~~
-
-Attributes can be exported for kobjects in the form of regular files in
-the filesystem. Sysfs forwards file I/O operations to methods defined
-for the attributes, providing a means to read and write kernel
-attributes.
-
-Attributes should be ASCII text files, preferably with only one value
-per file. It is noted that it may not be efficient to contain only one
-value per file, so it is socially acceptable to express an array of
-values of the same type.
-
-Mixing types, expressing multiple lines of data, and doing fancy
-formatting of data is heavily frowned upon. Doing these things may get
-you publicly humiliated and your code rewritten without notice.
-
-
-An attribute definition is simply:
-
-struct attribute {
- char * name;
- struct module *owner;
- umode_t mode;
-};
-
-
-int sysfs_create_file(struct kobject * kobj, const struct attribute * attr);
-void sysfs_remove_file(struct kobject * kobj, const struct attribute * attr);
-
-
-A bare attribute contains no means to read or write the value of the
-attribute. Subsystems are encouraged to define their own attribute
-structure and wrapper functions for adding and removing attributes for
-a specific object type.
-
-For example, the driver model defines struct device_attribute like:
-
-struct device_attribute {
- struct attribute attr;
- ssize_t (*show)(struct device *dev, struct device_attribute *attr,
- char *buf);
- ssize_t (*store)(struct device *dev, struct device_attribute *attr,
- const char *buf, size_t count);
-};
-
-int device_create_file(struct device *, const struct device_attribute *);
-void device_remove_file(struct device *, const struct device_attribute *);
-
-It also defines this helper for defining device attributes:
-
-#define DEVICE_ATTR(_name, _mode, _show, _store) \
-struct device_attribute dev_attr_##_name = __ATTR(_name, _mode, _show, _store)
-
-For example, declaring
-
-static DEVICE_ATTR(foo, S_IWUSR | S_IRUGO, show_foo, store_foo);
-
-is equivalent to doing:
-
-static struct device_attribute dev_attr_foo = {
- .attr = {
- .name = "foo",
- .mode = S_IWUSR | S_IRUGO,
- },
- .show = show_foo,
- .store = store_foo,
-};
-
-Note as stated in include/linux/kernel.h "OTHER_WRITABLE? Generally
-considered a bad idea." so trying to set a sysfs file writable for
-everyone will fail reverting to RO mode for "Others".
-
-For the common cases sysfs.h provides convenience macros to make
-defining attributes easier as well as making code more concise and
-readable. The above case could be shortened to:
-
-static struct device_attribute dev_attr_foo = __ATTR_RW(foo);
-
-the list of helpers available to define your wrapper function is:
-__ATTR_RO(name): assumes default name_show and mode 0444
-__ATTR_WO(name): assumes a name_store only and is restricted to mode
- 0200 that is root write access only.
-__ATTR_RO_MODE(name, mode): fore more restrictive RO access currently
- only use case is the EFI System Resource Table
- (see drivers/firmware/efi/esrt.c)
-__ATTR_RW(name): assumes default name_show, name_store and setting
- mode to 0644.
-__ATTR_NULL: which sets the name to NULL and is used as end of list
- indicator (see: kernel/workqueue.c)
-
-Subsystem-Specific Callbacks
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-When a subsystem defines a new attribute type, it must implement a
-set of sysfs operations for forwarding read and write calls to the
-show and store methods of the attribute owners.
-
-struct sysfs_ops {
- ssize_t (*show)(struct kobject *, struct attribute *, char *);
- ssize_t (*store)(struct kobject *, struct attribute *, const char *, size_t);
-};
-
-[ Subsystems should have already defined a struct kobj_type as a
-descriptor for this type, which is where the sysfs_ops pointer is
-stored. See the kobject documentation for more information. ]
-
-When a file is read or written, sysfs calls the appropriate method
-for the type. The method then translates the generic struct kobject
-and struct attribute pointers to the appropriate pointer types, and
-calls the associated methods.
-
-
-To illustrate:
-
-#define to_dev(obj) container_of(obj, struct device, kobj)
-#define to_dev_attr(_attr) container_of(_attr, struct device_attribute, attr)
-
-static ssize_t dev_attr_show(struct kobject *kobj, struct attribute *attr,
- char *buf)
-{
- struct device_attribute *dev_attr = to_dev_attr(attr);
- struct device *dev = to_dev(kobj);
- ssize_t ret = -EIO;
-
- if (dev_attr->show)
- ret = dev_attr->show(dev, dev_attr, buf);
- if (ret >= (ssize_t)PAGE_SIZE) {
- printk("dev_attr_show: %pS returned bad count\n",
- dev_attr->show);
- }
- return ret;
-}
-
-
-
-Reading/Writing Attribute Data
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-To read or write attributes, show() or store() methods must be
-specified when declaring the attribute. The method types should be as
-simple as those defined for device attributes:
-
-ssize_t (*show)(struct device *dev, struct device_attribute *attr, char *buf);
-ssize_t (*store)(struct device *dev, struct device_attribute *attr,
- const char *buf, size_t count);
-
-IOW, they should take only an object, an attribute, and a buffer as parameters.
-
-
-sysfs allocates a buffer of size (PAGE_SIZE) and passes it to the
-method. Sysfs will call the method exactly once for each read or
-write. This forces the following behavior on the method
-implementations:
-
-- On read(2), the show() method should fill the entire buffer.
- Recall that an attribute should only be exporting one value, or an
- array of similar values, so this shouldn't be that expensive.
-
- This allows userspace to do partial reads and forward seeks
- arbitrarily over the entire file at will. If userspace seeks back to
- zero or does a pread(2) with an offset of '0' the show() method will
- be called again, rearmed, to fill the buffer.
-
-- On write(2), sysfs expects the entire buffer to be passed during the
- first write. Sysfs then passes the entire buffer to the store() method.
- A terminating null is added after the data on stores. This makes
- functions like sysfs_streq() safe to use.
-
- When writing sysfs files, userspace processes should first read the
- entire file, modify the values it wishes to change, then write the
- entire buffer back.
-
- Attribute method implementations should operate on an identical
- buffer when reading and writing values.
-
-Other notes:
-
-- Writing causes the show() method to be rearmed regardless of current
- file position.
-
-- The buffer will always be PAGE_SIZE bytes in length. On i386, this
- is 4096.
-
-- show() methods should return the number of bytes printed into the
- buffer. This is the return value of scnprintf().
-
-- show() must not use snprintf() when formatting the value to be
- returned to user space. If you can guarantee that an overflow
- will never happen you can use sprintf() otherwise you must use
- scnprintf().
-
-- store() should return the number of bytes used from the buffer. If the
- entire buffer has been used, just return the count argument.
-
-- show() or store() can always return errors. If a bad value comes
- through, be sure to return an error.
-
-- The object passed to the methods will be pinned in memory via sysfs
- referencing counting its embedded object. However, the physical
- entity (e.g. device) the object represents may not be present. Be
- sure to have a way to check this, if necessary.
-
-
-A very simple (and naive) implementation of a device attribute is:
-
-static ssize_t show_name(struct device *dev, struct device_attribute *attr,
- char *buf)
-{
- return scnprintf(buf, PAGE_SIZE, "%s\n", dev->name);
-}
-
-static ssize_t store_name(struct device *dev, struct device_attribute *attr,
- const char *buf, size_t count)
-{
- snprintf(dev->name, sizeof(dev->name), "%.*s",
- (int)min(count, sizeof(dev->name) - 1), buf);
- return count;
-}
-
-static DEVICE_ATTR(name, S_IRUGO, show_name, store_name);
-
-
-(Note that the real implementation doesn't allow userspace to set the
-name for a device.)
-
-
-Top Level Directory Layout
-~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-The sysfs directory arrangement exposes the relationship of kernel
-data structures.
-
-The top level sysfs directory looks like:
-
-block/
-bus/
-class/
-dev/
-devices/
-firmware/
-net/
-fs/
-
-devices/ contains a filesystem representation of the device tree. It maps
-directly to the internal kernel device tree, which is a hierarchy of
-struct device.
-
-bus/ contains flat directory layout of the various bus types in the
-kernel. Each bus's directory contains two subdirectories:
-
- devices/
- drivers/
-
-devices/ contains symlinks for each device discovered in the system
-that point to the device's directory under root/.
-
-drivers/ contains a directory for each device driver that is loaded
-for devices on that particular bus (this assumes that drivers do not
-span multiple bus types).
-
-fs/ contains a directory for some filesystems. Currently each
-filesystem wanting to export attributes must create its own hierarchy
-below fs/ (see ./fuse.txt for an example).
-
-dev/ contains two directories char/ and block/. Inside these two
-directories there are symlinks named <major>:<minor>. These symlinks
-point to the sysfs directory for the given device. /sys/dev provides a
-quick way to lookup the sysfs interface for a device from the result of
-a stat(2) operation.
-
-More information can driver-model specific features can be found in
-Documentation/driver-api/driver-model/.
-
-
-TODO: Finish this section.
-
-
-Current Interfaces
-~~~~~~~~~~~~~~~~~~
-
-The following interface layers currently exist in sysfs:
-
-
-- devices (include/linux/device.h)
-----------------------------------
-Structure:
-
-struct device_attribute {
- struct attribute attr;
- ssize_t (*show)(struct device *dev, struct device_attribute *attr,
- char *buf);
- ssize_t (*store)(struct device *dev, struct device_attribute *attr,
- const char *buf, size_t count);
-};
-
-Declaring:
-
-DEVICE_ATTR(_name, _mode, _show, _store);
-
-Creation/Removal:
-
-int device_create_file(struct device *dev, const struct device_attribute * attr);
-void device_remove_file(struct device *dev, const struct device_attribute * attr);
-
-
-- bus drivers (include/linux/device.h)
---------------------------------------
-Structure:
-
-struct bus_attribute {
- struct attribute attr;
- ssize_t (*show)(struct bus_type *, char * buf);
- ssize_t (*store)(struct bus_type *, const char * buf, size_t count);
-};
-
-Declaring:
-
-static BUS_ATTR_RW(name);
-static BUS_ATTR_RO(name);
-static BUS_ATTR_WO(name);
-
-Creation/Removal:
-
-int bus_create_file(struct bus_type *, struct bus_attribute *);
-void bus_remove_file(struct bus_type *, struct bus_attribute *);
-
-
-- device drivers (include/linux/device.h)
------------------------------------------
-
-Structure:
-
-struct driver_attribute {
- struct attribute attr;
- ssize_t (*show)(struct device_driver *, char * buf);
- ssize_t (*store)(struct device_driver *, const char * buf,
- size_t count);
-};
-
-Declaring:
-
-DRIVER_ATTR_RO(_name)
-DRIVER_ATTR_RW(_name)
-
-Creation/Removal:
-
-int driver_create_file(struct device_driver *, const struct driver_attribute *);
-void driver_remove_file(struct device_driver *, const struct driver_attribute *);
-
-
-Documentation
-~~~~~~~~~~~~~
-
-The sysfs directory structure and the attributes in each directory define an
-ABI between the kernel and user space. As for any ABI, it is important that
-this ABI is stable and properly documented. All new sysfs attributes must be
-documented in Documentation/ABI. See also Documentation/ABI/README for more
-information.
diff --git a/Documentation/filesystems/sysv-fs.txt b/Documentation/filesystems/sysv-fs.rst
similarity index 73%
rename from Documentation/filesystems/sysv-fs.txt
rename to Documentation/filesystems/sysv-fs.rst
index 253b50d..89e4091 100644
--- a/Documentation/filesystems/sysv-fs.txt
+++ b/Documentation/filesystems/sysv-fs.rst
@@ -1,25 +1,40 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==================
+SystemV Filesystem
+==================
+
It implements all of
- Xenix FS,
- SystemV/386 FS,
- Coherent FS.
To install:
+
* Answer the 'System V and Coherent filesystem support' question with 'y'
when configuring the kernel.
-* To mount a disk or a partition, use
+* To mount a disk or a partition, use::
+
mount [-r] -t sysv device mountpoint
- The file system type names
+
+ The file system type names::
+
-t sysv
-t xenix
-t coherent
+
may be used interchangeably, but the last two will eventually disappear.
Bugs in the present implementation:
+
- Coherent FS:
+
- The "free list interleave" n:m is currently ignored.
- Only file systems with no filesystem name and no pack name are recognized.
- (See Coherent "man mkfs" for a description of these features.)
+ (See Coherent "man mkfs" for a description of these features.)
+
- SystemV Release 2 FS:
+
The superblock is only searched in the blocks 9, 15, 18, which
corresponds to the beginning of track 1 on floppy disks. No support
for this FS on hard disk yet.
@@ -28,12 +43,14 @@
These filesystems are rather similar. Here is a comparison with Minix FS:
* Linux fdisk reports on partitions
+
- Minix FS 0x81 Linux/Minix
- Xenix FS ??
- SystemV FS ??
- Coherent FS 0x08 AIX bootable
* Size of a block or zone (data allocation unit on disk)
+
- Minix FS 1024
- Xenix FS 1024 (also 512 ??)
- SystemV FS 1024 (also 512 and 2048)
@@ -45,37 +62,51 @@
all the block numbers (including the super block) are offset by one track.
* Byte ordering of "short" (16 bit entities) on disk:
+
- Minix FS little endian 0 1
- Xenix FS little endian 0 1
- SystemV FS little endian 0 1
- Coherent FS little endian 0 1
+
Of course, this affects only the file system, not the data of files on it!
* Byte ordering of "long" (32 bit entities) on disk:
+
- Minix FS little endian 0 1 2 3
- Xenix FS little endian 0 1 2 3
- SystemV FS little endian 0 1 2 3
- Coherent FS PDP-11 2 3 0 1
+
Of course, this affects only the file system, not the data of files on it!
* Inode on disk: "short", 0 means non-existent, the root dir ino is:
- - Minix FS 1
- - Xenix FS, SystemV FS, Coherent FS 2
+
+ ================================= ==
+ Minix FS 1
+ Xenix FS, SystemV FS, Coherent FS 2
+ ================================= ==
* Maximum number of hard links to a file:
- - Minix FS 250
- - Xenix FS ??
- - SystemV FS ??
- - Coherent FS >=10000
+
+ =========== =========
+ Minix FS 250
+ Xenix FS ??
+ SystemV FS ??
+ Coherent FS >=10000
+ =========== =========
* Free inode management:
- - Minix FS a bitmap
+
+ - Minix FS
+ a bitmap
- Xenix FS, SystemV FS, Coherent FS
There is a cache of a certain number of free inodes in the super-block.
When it is exhausted, new free inodes are found using a linear search.
* Free block management:
- - Minix FS a bitmap
+
+ - Minix FS
+ a bitmap
- Xenix FS, SystemV FS, Coherent FS
Free blocks are organized in a "free list". Maybe a misleading term,
since it is not true that every free block contains a pointer to
@@ -86,13 +117,18 @@
0 on Xenix FS and SystemV FS, with a block zeroed out on Coherent FS.
* Super-block location:
- - Minix FS block 1 = bytes 1024..2047
- - Xenix FS block 1 = bytes 1024..2047
- - SystemV FS bytes 512..1023
- - Coherent FS block 1 = bytes 512..1023
+
+ =========== ==========================
+ Minix FS block 1 = bytes 1024..2047
+ Xenix FS block 1 = bytes 1024..2047
+ SystemV FS bytes 512..1023
+ Coherent FS block 1 = bytes 512..1023
+ =========== ==========================
* Super-block layout:
- - Minix FS
+
+ - Minix FS::
+
unsigned short s_ninodes;
unsigned short s_nzones;
unsigned short s_imap_blocks;
@@ -101,7 +137,9 @@
unsigned short s_log_zone_size;
unsigned long s_max_size;
unsigned short s_magic;
- - Xenix FS, SystemV FS, Coherent FS
+
+ - Xenix FS, SystemV FS, Coherent FS::
+
unsigned short s_firstdatazone;
unsigned long s_nzones;
unsigned short s_fzone_count;
@@ -120,23 +158,33 @@
unsigned short s_interleave_m,s_interleave_n; -- Coherent FS only
char s_fname[6];
char s_fpack[6];
+
then they differ considerably:
- Xenix FS
+
+ Xenix FS::
+
char s_clean;
char s_fill[371];
long s_magic;
long s_type;
- SystemV FS
+
+ SystemV FS::
+
long s_fill[12 or 14];
long s_state;
long s_magic;
long s_type;
- Coherent FS
+
+ Coherent FS::
+
unsigned long s_unique;
+
Note that Coherent FS has no magic.
* Inode layout:
- - Minix FS
+
+ - Minix FS::
+
unsigned short i_mode;
unsigned short i_uid;
unsigned long i_size;
@@ -144,7 +192,9 @@
unsigned char i_gid;
unsigned char i_nlinks;
unsigned short i_zone[7+1+1];
- - Xenix FS, SystemV FS, Coherent FS
+
+ - Xenix FS, SystemV FS, Coherent FS::
+
unsigned short i_mode;
unsigned short i_nlink;
unsigned short i_uid;
@@ -155,38 +205,55 @@
unsigned long i_mtime;
unsigned long i_ctime;
-* Regular file data blocks are organized as
- - Minix FS
- 7 direct blocks
- 1 indirect block (pointers to blocks)
- 1 double-indirect block (pointer to pointers to blocks)
- - Xenix FS, SystemV FS, Coherent FS
- 10 direct blocks
- 1 indirect block (pointers to blocks)
- 1 double-indirect block (pointer to pointers to blocks)
- 1 triple-indirect block (pointer to pointers to pointers to blocks)
-* Inode size, inodes per block
- - Minix FS 32 32
- - Xenix FS 64 16
- - SystemV FS 64 16
- - Coherent FS 64 8
+* Regular file data blocks are organized as
+
+ - Minix FS:
+
+ - 7 direct blocks
+ - 1 indirect block (pointers to blocks)
+ - 1 double-indirect block (pointer to pointers to blocks)
+
+ - Xenix FS, SystemV FS, Coherent FS:
+
+ - 10 direct blocks
+ - 1 indirect block (pointers to blocks)
+ - 1 double-indirect block (pointer to pointers to blocks)
+ - 1 triple-indirect block (pointer to pointers to pointers to blocks)
+
+
+ =========== ========== ================
+ Inode size inodes per block
+ =========== ========== ================
+ Minix FS 32 32
+ Xenix FS 64 16
+ SystemV FS 64 16
+ Coherent FS 64 8
+ =========== ========== ================
* Directory entry on disk
- - Minix FS
+
+ - Minix FS::
+
unsigned short inode;
char name[14/30];
- - Xenix FS, SystemV FS, Coherent FS
+
+ - Xenix FS, SystemV FS, Coherent FS::
+
unsigned short inode;
char name[14];
-* Dir entry size, dir entries per block
- - Minix FS 16/32 64/32
- - Xenix FS 16 64
- - SystemV FS 16 64
- - Coherent FS 16 32
+ =========== ============== =====================
+ Dir entry size dir entries per block
+ =========== ============== =====================
+ Minix FS 16/32 64/32
+ Xenix FS 16 64
+ SystemV FS 16 64
+ Coherent FS 16 32
+ =========== ============== =====================
* How to implement symbolic links such that the host fsck doesn't scream:
+
- Minix FS normal
- Xenix FS kludge: as regular files with chmod 1000
- SystemV FS ??
diff --git a/Documentation/filesystems/tmpfs.txt b/Documentation/filesystems/tmpfs.rst
similarity index 85%
rename from Documentation/filesystems/tmpfs.txt
rename to Documentation/filesystems/tmpfs.rst
index 5ecbc03..4e95929 100644
--- a/Documentation/filesystems/tmpfs.txt
+++ b/Documentation/filesystems/tmpfs.rst
@@ -1,3 +1,9 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=====
+Tmpfs
+=====
+
Tmpfs is a file system which keeps all files in virtual memory.
@@ -14,7 +20,7 @@
you gain swapping and limit checking. Another similar thing is the RAM
disk (/dev/ram*), which simulates a fixed size hard disk in physical
RAM, where you have to create an ordinary filesystem on top. Ramdisks
-cannot swap and you do not have the possibility to resize them.
+cannot swap and you do not have the possibility to resize them.
Since tmpfs lives completely in the page cache and on swap, all tmpfs
pages will be shown as "Shmem" in /proc/meminfo and "Shared" in
@@ -26,7 +32,7 @@
1) There is always a kernel internal mount which you will not see at
all. This is used for shared anonymous mappings and SYSV shared
- memory.
+ memory.
This mount does not depend on CONFIG_TMPFS. If CONFIG_TMPFS is not
set, the user visible part of tmpfs is not build. But the internal
@@ -34,7 +40,7 @@
2) glibc 2.2 and above expects tmpfs to be mounted at /dev/shm for
POSIX shared memory (shm_open, shm_unlink). Adding the following
- line to /etc/fstab should take care of this:
+ line to /etc/fstab should take care of this::
tmpfs /dev/shm tmpfs defaults 0 0
@@ -56,15 +62,17 @@
tmpfs has three mount options for sizing:
-size: The limit of allocated bytes for this tmpfs instance. The
+========= ============================================================
+size The limit of allocated bytes for this tmpfs instance. The
default is half of your physical RAM without swap. If you
oversize your tmpfs instances the machine will deadlock
since the OOM handler will not be able to free that memory.
-nr_blocks: The same as size, but in blocks of PAGE_SIZE.
-nr_inodes: The maximum number of inodes for this instance. The default
+nr_blocks The same as size, but in blocks of PAGE_SIZE.
+nr_inodes The maximum number of inodes for this instance. The default
is half of the number of your physical RAM pages, or (on a
machine with highmem) the number of lowmem RAM pages,
whichever is the lower.
+========= ============================================================
These parameters accept a suffix k, m or g for kilo, mega and giga and
can be changed on remount. The size parameter also accepts a suffix %
@@ -82,6 +90,7 @@
all files in that instance (if CONFIG_NUMA is enabled) - which can be
adjusted on the fly via 'mount -o remount ...'
+======================== ==============================================
mpol=default use the process allocation policy
(see set_mempolicy(2))
mpol=prefer:Node prefers to allocate memory from the given Node
@@ -89,6 +98,7 @@
mpol=interleave prefers to allocate from each node in turn
mpol=interleave:NodeList allocates from each node of NodeList in turn
mpol=local prefers to allocate memory from the local node
+======================== ==============================================
NodeList format is a comma-separated list of decimal numbers and ranges,
a range being two hyphen-separated decimal numbers, the smallest and
@@ -98,9 +108,9 @@
use at file creation time. When a task allocates a file in the file
system, the mount option memory policy will be applied with a NodeList,
if any, modified by the calling task's cpuset constraints
-[See Documentation/admin-guide/cgroup-v1/cpusets.rst] and any optional flags, listed
-below. If the resulting NodeLists is the empty set, the effective memory
-policy for the file will revert to "default" policy.
+[See Documentation/admin-guide/cgroup-v1/cpusets.rst] and any optional flags,
+listed below. If the resulting NodeLists is the empty set, the effective
+memory policy for the file will revert to "default" policy.
NUMA memory allocation policies have optional flags that can be used in
conjunction with their modes. These optional flags can be specified
@@ -109,6 +119,8 @@
all available memory allocation policy mode flags and their effect on
memory policy.
+::
+
=static is equivalent to MPOL_F_STATIC_NODES
=relative is equivalent to MPOL_F_RELATIVE_NODES
@@ -128,9 +140,11 @@
To specify the initial root directory you can use the following mount
options:
-mode: The permissions as an octal number
-uid: The user id
-gid: The group id
+==== ==================================
+mode The permissions as an octal number
+uid The user id
+gid The group id
+==== ==================================
These options do not have any effect on remount. You can change these
parameters with chmod(1), chown(1) and chgrp(1) on a mounted filesystem.
@@ -141,9 +155,9 @@
RAM/SWAP in 10240 inodes and it is only accessible by root.
-Author:
+:Author:
Christoph Rohland <cr@sap.com>, 1.12.01
-Updated:
+:Updated:
Hugh Dickins, 4 June 2007
-Updated:
+:Updated:
KOSAKI Motohiro, 16 Mar 2010
diff --git a/Documentation/filesystems/ubifs-authentication.rst b/Documentation/filesystems/ubifs-authentication.rst
index 6a9584f..16efd72 100644
--- a/Documentation/filesystems/ubifs-authentication.rst
+++ b/Documentation/filesystems/ubifs-authentication.rst
@@ -1,3 +1,5 @@
+.. SPDX-License-Identifier: GPL-2.0
+
:orphan:
.. UBIFS Authentication
@@ -92,11 +94,11 @@
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Basic on-flash UBIFS entities are called *nodes*. UBIFS knows different types
-of nodes. Eg. data nodes (`struct ubifs_data_node`) which store chunks of file
-contents or inode nodes (`struct ubifs_ino_node`) which represent VFS inodes.
-Almost all types of nodes share a common header (`ubifs_ch`) containing basic
+of nodes. Eg. data nodes (``struct ubifs_data_node``) which store chunks of file
+contents or inode nodes (``struct ubifs_ino_node``) which represent VFS inodes.
+Almost all types of nodes share a common header (``ubifs_ch``) containing basic
information like node type, node length, a sequence number, etc. (see
-`fs/ubifs/ubifs-media.h`in kernel source). Exceptions are entries of the LPT
+``fs/ubifs/ubifs-media.h`` in kernel source). Exceptions are entries of the LPT
and some less important node types like padding nodes which are used to pad
unusable content at the end of LEBs.
diff --git a/Documentation/filesystems/ubifs.txt b/Documentation/filesystems/ubifs.rst
similarity index 91%
rename from Documentation/filesystems/ubifs.txt
rename to Documentation/filesystems/ubifs.rst
index acc8044..e6ee997 100644
--- a/Documentation/filesystems/ubifs.txt
+++ b/Documentation/filesystems/ubifs.rst
@@ -1,5 +1,11 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===============
+UBI File System
+===============
+
Introduction
-=============
+============
UBIFS file-system stands for UBI File System. UBI stands for "Unsorted
Block Images". UBIFS is a flash file system, which means it is designed
@@ -79,6 +85,7 @@
(*) == default.
+==================== =======================================================
bulk_read read more in one go to take advantage of flash
media that read faster sequentially
no_bulk_read (*) do not bulk-read
@@ -98,6 +105,7 @@
auth_hash_name= The hash algorithm used for authentication. Used for
both hashing and for creating HMACs. Typical values
include "sha256" or "sha512"
+==================== =======================================================
Quick usage instructions
@@ -107,12 +115,14 @@
where "X" is UBI device number, "Y" is UBI volume number, and "NAME" is
UBI volume name.
-Mount volume 0 on UBI device 0 to /mnt/ubifs:
-$ mount -t ubifs ubi0_0 /mnt/ubifs
+Mount volume 0 on UBI device 0 to /mnt/ubifs::
+
+ $ mount -t ubifs ubi0_0 /mnt/ubifs
Mount "rootfs" volume of UBI device 0 to /mnt/ubifs ("rootfs" is volume
-name):
-$ mount -t ubifs ubi0:rootfs /mnt/ubifs
+name)::
+
+ $ mount -t ubifs ubi0:rootfs /mnt/ubifs
The following is an example of the kernel boot arguments to attach mtd0
to UBI and mount volume "rootfs":
@@ -122,5 +132,6 @@
==========
UBIFS documentation and FAQ/HOWTO at the MTD web site:
-http://www.linux-mtd.infradead.org/doc/ubifs.html
-http://www.linux-mtd.infradead.org/faq/ubifs.html
+
+- http://www.linux-mtd.infradead.org/doc/ubifs.html
+- http://www.linux-mtd.infradead.org/faq/ubifs.html
diff --git a/Documentation/filesystems/udf.txt b/Documentation/filesystems/udf.rst
similarity index 83%
rename from Documentation/filesystems/udf.txt
rename to Documentation/filesystems/udf.rst
index e2f2faf..d9badbf 100644
--- a/Documentation/filesystems/udf.txt
+++ b/Documentation/filesystems/udf.rst
@@ -1,6 +1,8 @@
-*
-* Documentation/filesystems/udf.txt
-*
+.. SPDX-License-Identifier: GPL-2.0
+
+===============
+UDF file system
+===============
If you encounter problems with reading UDF discs using this driver,
please report them according to MAINTAINERS file.
@@ -18,8 +20,10 @@
by drive firmware.
-------------------------------------------------------------------------------
+
The following mount options are supported:
+ =========== ======================================
gid= Set the default group.
umask= Set the default umask.
mode= Set the default file permissions.
@@ -34,6 +38,7 @@
longad Use long ad's (default)
nostrict Unset strict conformance
iocharset= Set the NLS character set
+ =========== ======================================
The uid= and gid= options need a bit more explaining. They will accept a
decimal numeric value and all inodes on that mount will then appear as
@@ -47,13 +52,17 @@
The remaining are for debugging and disaster recovery:
- novrs Skip volume sequence recognition
+ ===== ================================
+ novrs Skip volume sequence recognition
+ ===== ================================
The following expect a offset from 0.
+ ========== =================================================
session= Set the CDROM session (default= last session)
anchor= Override standard anchor location. (default= 256)
lastblock= Set the last block of the filesystem/
+ ========== =================================================
-------------------------------------------------------------------------------
@@ -62,5 +71,5 @@
https://github.com/pali/udftools
Documentation on UDF and ECMA 167 is available FREE from:
- http://www.osta.org/
- http://www.ecma-international.org/
+ - http://www.osta.org/
+ - http://www.ecma-international.org/
diff --git a/Documentation/filesystems/virtiofs.rst b/Documentation/filesystems/virtiofs.rst
index 4f338e3..e06e495 100644
--- a/Documentation/filesystems/virtiofs.rst
+++ b/Documentation/filesystems/virtiofs.rst
@@ -1,5 +1,7 @@
.. SPDX-License-Identifier: GPL-2.0
+.. _virtiofs_index:
+
===================================================
virtiofs: virtio-fs host<->guest shared file system
===================================================
diff --git a/Documentation/filesystems/zonefs.txt b/Documentation/filesystems/zonefs.rst
similarity index 83%
rename from Documentation/filesystems/zonefs.txt
rename to Documentation/filesystems/zonefs.rst
index 935bf22..71d845c 100644
--- a/Documentation/filesystems/zonefs.txt
+++ b/Documentation/filesystems/zonefs.rst
@@ -1,4 +1,8 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+================================================
ZoneFS - Zone filesystem for Zoned block devices
+================================================
Introduction
============
@@ -29,6 +33,7 @@
Zoned storage devices belong to a class of storage devices with an address
space that is divided into zones. A zone is a group of consecutive LBAs and all
zones are contiguous (there are no LBA gaps). Zones may have different types.
+
* Conventional zones: there are no access constraints to LBAs belonging to
conventional zones. Any read or write access can be executed, similarly to a
regular block device.
@@ -134,7 +139,7 @@
end, that is, write operations can only be append writes. Zonefs makes no
attempt at accepting random writes and will fail any write request that has a
start offset not corresponding to the end of the file, or to the end of the last
-write issued and still in-flight (for asynchrnous I/O operations).
+write issued and still in-flight (for asynchronous I/O operations).
Since dirty page writeback by the page cache does not guarantee a sequential
write pattern, zonefs prevents buffered writes and writeable shared mappings
@@ -142,7 +147,7 @@
zonefs relies on the sequential delivery of write I/O requests to the device
implemented by the block layer elevator. An elevator implementing the sequential
write feature for zoned block device (ELEVATOR_F_ZBD_SEQ_WRITE elevator feature)
-must be used. This type of elevator (e.g. mq-deadline) is the set by default
+must be used. This type of elevator (e.g. mq-deadline) is set by default
for zoned block devices on device initialization.
There are no restrictions on the type of I/O used for read operations in
@@ -158,6 +163,7 @@
--------------
Several optional features of zonefs can be enabled at format time.
+
* Conventional zone aggregation: ranges of contiguous conventional zones can be
aggregated into a single larger file instead of the default one file per zone.
* File ownership: The owner UID and GID of zone files is by default 0 (root)
@@ -196,7 +202,7 @@
may still happen in the case of a partial failure of a very large direct I/O
operation split into multiple BIOs/requests or asynchronous I/O operations.
If one of the write request within the set of sequential write requests
- issued to the device fails, all write requests after queued after it will
+ issued to the device fails, all write requests queued after it will
become unaligned and fail.
* Delayed write errors: similarly to regular block devices, if the device side
@@ -207,7 +213,7 @@
causing all data to be dropped after the sector that caused the error.
All I/O errors detected by zonefs are notified to the user with an error code
-return for the system call that trigered or detected the error. The recovery
+return for the system call that triggered or detected the error. The recovery
actions taken by zonefs in response to I/O errors depend on the I/O type (read
vs write) and on the reason for the error (bad sector, unaligned writes or zone
condition change).
@@ -222,7 +228,7 @@
* A zone condition change to read-only or offline also always triggers zonefs
I/O error recovery.
-Zonefs minimal I/O error recovery may change a file size and a file access
+Zonefs minimal I/O error recovery may change a file size and file access
permissions.
* File size changes:
@@ -237,7 +243,7 @@
A file size may also be reduced to reflect a delayed write error detected on
fsync(): in this case, the amount of data effectively written in the zone may
be less than originally indicated by the file inode size. After such I/O
- error, zonefs always fixes a file inode size to reflect the amount of data
+ error, zonefs always fixes the file inode size to reflect the amount of data
persistently stored in the file zone.
* Access permission changes:
@@ -249,7 +255,7 @@
Further action taken by zonefs I/O error recovery can be controlled by the user
with the "errors=xxx" mount option. The table below summarizes the result of
zonefs I/O error processing depending on the mount option and on the zone
-conditions.
+conditions::
+--------------+-----------+-----------------------------------------+
| | | Post error state |
@@ -258,11 +264,11 @@
| option | condition | size read write read write |
+--------------+-----------+-----------------------------------------+
| | good | fixed yes no yes yes |
- | remount-ro | read-only | fixed yes no yes no |
+ | remount-ro | read-only | as is yes no yes no |
| (default) | offline | 0 no no no no |
+--------------+-----------+-----------------------------------------+
| | good | fixed yes no yes yes |
- | zone-ro | read-only | fixed yes no yes no |
+ | zone-ro | read-only | as is yes no yes no |
| | offline | 0 no no no no |
+--------------+-----------+-----------------------------------------+
| | good | 0 no no yes yes |
@@ -270,22 +276,23 @@
| | offline | 0 no no no no |
+--------------+-----------+-----------------------------------------+
| | good | fixed yes yes yes yes |
- | repair | read-only | fixed yes no yes no |
+ | repair | read-only | as is yes no yes no |
| | offline | 0 no no no no |
+--------------+-----------+-----------------------------------------+
Further notes:
+
* The "errors=remount-ro" mount option is the default behavior of zonefs I/O
error processing if no errors mount option is specified.
* With the "errors=remount-ro" mount option, the change of the file access
permissions to read-only applies to all files. The file system is remounted
read-only.
* Access permission and file size changes due to the device transitioning zones
- to the offline condition are permanent. Remounting or reformating the device
+ to the offline condition are permanent. Remounting or reformatting the device
with mkfs.zonefs (mkzonefs) will not change back offline zone files to a good
state.
* File access permission changes to read-only due to the device transitioning
- zones to the read-only condition are permanent. Remounting or reformating
+ zones to the read-only condition are permanent. Remounting or reformatting
the device will not re-enable file write access.
* File access permission changes implied by the remount-ro, zone-ro and
zone-offline mount options are temporary for zones in a good condition.
@@ -301,14 +308,23 @@
zonefs define the "errors=<behavior>" mount option to allow the user to specify
zonefs behavior in response to I/O errors, inode size inconsistencies or zone
-condition chages. The defined behaviors are as follow:
+condition changes. The defined behaviors are as follow:
+
* remount-ro (default)
* zone-ro
* zone-offline
* repair
-The I/O error actions defined for each behavior is detailed in the previous
-section.
+The run-time I/O error actions defined for each behavior are detailed in the
+previous section. Mount time I/O errors will cause the mount operation to fail.
+The handling of read-only zones also differs between mount-time and run-time.
+If a read-only zone is found at mount time, the zone is always treated in the
+same manner as offline zones, that is, all accesses are disabled and the zone
+file size set to 0. This is necessary as the write pointer of read-only zones
+is defined as invalib by the ZBC and ZAC standards, making it impossible to
+discover the amount of data that has been written to the zone. In the case of a
+read-only zone discovered at run-time, as indicated in the previous section.
+the size of the zone file is left unchanged from its last updated value.
Zonefs User Space Tools
=======================
@@ -325,78 +341,78 @@
--------
The following formats a 15TB host-managed SMR HDD with 256 MB zones
-with the conventional zones aggregation feature enabled.
+with the conventional zones aggregation feature enabled::
-# mkzonefs -o aggr_cnv /dev/sdX
-# mount -t zonefs /dev/sdX /mnt
-# ls -l /mnt/
-total 0
-dr-xr-xr-x 2 root root 1 Nov 25 13:23 cnv
-dr-xr-xr-x 2 root root 55356 Nov 25 13:23 seq
+ # mkzonefs -o aggr_cnv /dev/sdX
+ # mount -t zonefs /dev/sdX /mnt
+ # ls -l /mnt/
+ total 0
+ dr-xr-xr-x 2 root root 1 Nov 25 13:23 cnv
+ dr-xr-xr-x 2 root root 55356 Nov 25 13:23 seq
The size of the zone files sub-directories indicate the number of files
existing for each type of zones. In this example, there is only one
conventional zone file (all conventional zones are aggregated under a single
-file).
+file)::
-# ls -l /mnt/cnv
-total 137101312
--rw-r----- 1 root root 140391743488 Nov 25 13:23 0
+ # ls -l /mnt/cnv
+ total 137101312
+ -rw-r----- 1 root root 140391743488 Nov 25 13:23 0
-This aggregated conventional zone file can be used as a regular file.
+This aggregated conventional zone file can be used as a regular file::
-# mkfs.ext4 /mnt/cnv/0
-# mount -o loop /mnt/cnv/0 /data
+ # mkfs.ext4 /mnt/cnv/0
+ # mount -o loop /mnt/cnv/0 /data
The "seq" sub-directory grouping files for sequential write zones has in this
-example 55356 zones.
+example 55356 zones::
-# ls -lv /mnt/seq
-total 14511243264
--rw-r----- 1 root root 0 Nov 25 13:23 0
--rw-r----- 1 root root 0 Nov 25 13:23 1
--rw-r----- 1 root root 0 Nov 25 13:23 2
-...
--rw-r----- 1 root root 0 Nov 25 13:23 55354
--rw-r----- 1 root root 0 Nov 25 13:23 55355
+ # ls -lv /mnt/seq
+ total 14511243264
+ -rw-r----- 1 root root 0 Nov 25 13:23 0
+ -rw-r----- 1 root root 0 Nov 25 13:23 1
+ -rw-r----- 1 root root 0 Nov 25 13:23 2
+ ...
+ -rw-r----- 1 root root 0 Nov 25 13:23 55354
+ -rw-r----- 1 root root 0 Nov 25 13:23 55355
For sequential write zone files, the file size changes as data is appended at
-the end of the file, similarly to any regular file system.
+the end of the file, similarly to any regular file system::
-# dd if=/dev/zero of=/mnt/seq/0 bs=4096 count=1 conv=notrunc oflag=direct
-1+0 records in
-1+0 records out
-4096 bytes (4.1 kB, 4.0 KiB) copied, 0.00044121 s, 9.3 MB/s
+ # dd if=/dev/zero of=/mnt/seq/0 bs=4096 count=1 conv=notrunc oflag=direct
+ 1+0 records in
+ 1+0 records out
+ 4096 bytes (4.1 kB, 4.0 KiB) copied, 0.00044121 s, 9.3 MB/s
-# ls -l /mnt/seq/0
--rw-r----- 1 root root 4096 Nov 25 13:23 /mnt/seq/0
+ # ls -l /mnt/seq/0
+ -rw-r----- 1 root root 4096 Nov 25 13:23 /mnt/seq/0
The written file can be truncated to the zone size, preventing any further
-write operation.
+write operation::
-# truncate -s 268435456 /mnt/seq/0
-# ls -l /mnt/seq/0
--rw-r----- 1 root root 268435456 Nov 25 13:49 /mnt/seq/0
+ # truncate -s 268435456 /mnt/seq/0
+ # ls -l /mnt/seq/0
+ -rw-r----- 1 root root 268435456 Nov 25 13:49 /mnt/seq/0
Truncation to 0 size allows freeing the file zone storage space and restart
-append-writes to the file.
+append-writes to the file::
-# truncate -s 0 /mnt/seq/0
-# ls -l /mnt/seq/0
--rw-r----- 1 root root 0 Nov 25 13:49 /mnt/seq/0
+ # truncate -s 0 /mnt/seq/0
+ # ls -l /mnt/seq/0
+ -rw-r----- 1 root root 0 Nov 25 13:49 /mnt/seq/0
Since files are statically mapped to zones on the disk, the number of blocks of
-a file as reported by stat() and fstat() indicates the size of the file zone.
+a file as reported by stat() and fstat() indicates the size of the file zone::
-# stat /mnt/seq/0
- File: /mnt/seq/0
- Size: 0 Blocks: 524288 IO Block: 4096 regular empty file
-Device: 870h/2160d Inode: 50431 Links: 1
-Access: (0640/-rw-r-----) Uid: ( 0/ root) Gid: ( 0/ root)
-Access: 2019-11-25 13:23:57.048971997 +0900
-Modify: 2019-11-25 13:52:25.553805765 +0900
-Change: 2019-11-25 13:52:25.553805765 +0900
- Birth: -
+ # stat /mnt/seq/0
+ File: /mnt/seq/0
+ Size: 0 Blocks: 524288 IO Block: 4096 regular empty file
+ Device: 870h/2160d Inode: 50431 Links: 1
+ Access: (0640/-rw-r-----) Uid: ( 0/ root) Gid: ( 0/ root)
+ Access: 2019-11-25 13:23:57.048971997 +0900
+ Modify: 2019-11-25 13:52:25.553805765 +0900
+ Change: 2019-11-25 13:52:25.553805765 +0900
+ Birth: -
The number of blocks of the file ("Blocks") in units of 512B blocks gives the
maximum file size of 524288 * 512 B = 256 MB, corresponding to the device zone
diff --git a/Documentation/gpu/i915.rst b/Documentation/gpu/i915.rst
index e539c42..cc74e24c 100644
--- a/Documentation/gpu/i915.rst
+++ b/Documentation/gpu/i915.rst
@@ -207,10 +207,10 @@
CSR firmware support for DMC
----------------------------
-.. kernel-doc:: drivers/gpu/drm/i915/intel_csr.c
+.. kernel-doc:: drivers/gpu/drm/i915/display/intel_csr.c
:doc: csr support for dmc
-.. kernel-doc:: drivers/gpu/drm/i915/intel_csr.c
+.. kernel-doc:: drivers/gpu/drm/i915/display/intel_csr.c
:internal:
Video BIOS Table (VBT)
diff --git a/Documentation/hwmon/adm1177.rst b/Documentation/hwmon/adm1177.rst
index c81e0b4..471be1e 100644
--- a/Documentation/hwmon/adm1177.rst
+++ b/Documentation/hwmon/adm1177.rst
@@ -20,8 +20,7 @@
-----------
This driver does not auto-detect devices. You will have to instantiate the
-devices explicitly. Please see Documentation/i2c/instantiating-devices for
-details.
+devices explicitly. Please see :doc:`/i2c/instantiating-devices` for details.
Sysfs entries
diff --git a/Documentation/hwmon/index.rst b/Documentation/hwmon/index.rst
index b24adb6..8ef62fd 100644
--- a/Documentation/hwmon/index.rst
+++ b/Documentation/hwmon/index.rst
@@ -162,6 +162,7 @@
tmp421
tmp513
tps40422
+ tps53679
twl4030-madc-hwmon
ucd9000
ucd9200
diff --git a/Documentation/hwmon/isl68137.rst b/Documentation/hwmon/isl68137.rst
index a5a7c85..cc4b614 100644
--- a/Documentation/hwmon/isl68137.rst
+++ b/Documentation/hwmon/isl68137.rst
@@ -3,7 +3,7 @@
Supported chips:
- * Intersil ISL68137
+ * Renesas ISL68137
Prefix: 'isl68137'
@@ -11,19 +11,405 @@
Datasheet:
- Publicly available at the Intersil website
- https://www.intersil.com/content/dam/Intersil/documents/isl6/isl68137.pdf
+ Publicly available at the Renesas website
+ https://www.renesas.com/us/en/www/doc/datasheet/isl68137.pdf
+
+ * Renesas ISL68220
+
+ Prefix: 'raa_dmpvr2_2rail'
+
+ Addresses scanned: -
+
+ Datasheet:
+
+ Publicly available (after August 2020 launch) at the Renesas website
+
+ * Renesas ISL68221
+
+ Prefix: 'raa_dmpvr2_3rail'
+
+ Addresses scanned: -
+
+ Datasheet:
+
+ Publicly available (after August 2020 launch) at the Renesas website
+
+ * Renesas ISL68222
+
+ Prefix: 'raa_dmpvr2_2rail'
+
+ Addresses scanned: -
+
+ Datasheet:
+
+ Publicly available (after August 2020 launch) at the Renesas website
+
+ * Renesas ISL68223
+
+ Prefix: 'raa_dmpvr2_2rail'
+
+ Addresses scanned: -
+
+ Datasheet:
+
+ Publicly available (after August 2020 launch) at the Renesas website
+
+ * Renesas ISL68224
+
+ Prefix: 'raa_dmpvr2_3rail'
+
+ Addresses scanned: -
+
+ Datasheet:
+
+ Publicly available (after August 2020 launch) at the Renesas website
+
+ * Renesas ISL68225
+
+ Prefix: 'raa_dmpvr2_2rail'
+
+ Addresses scanned: -
+
+ Datasheet:
+
+ Publicly available (after August 2020 launch) at the Renesas website
+
+ * Renesas ISL68226
+
+ Prefix: 'raa_dmpvr2_3rail'
+
+ Addresses scanned: -
+
+ Datasheet:
+
+ Publicly available (after August 2020 launch) at the Renesas website
+
+ * Renesas ISL68227
+
+ Prefix: 'raa_dmpvr2_1rail'
+
+ Addresses scanned: -
+
+ Datasheet:
+
+ Publicly available (after August 2020 launch) at the Renesas website
+
+ * Renesas ISL68229
+
+ Prefix: 'raa_dmpvr2_3rail'
+
+ Addresses scanned: -
+
+ Datasheet:
+
+ Publicly available (after August 2020 launch) at the Renesas website
+
+ * Renesas ISL68233
+
+ Prefix: 'raa_dmpvr2_2rail'
+
+ Addresses scanned: -
+
+ Datasheet:
+
+ Publicly available (after August 2020 launch) at the Renesas website
+
+ * Renesas ISL68239
+
+ Prefix: 'raa_dmpvr2_3rail'
+
+ Addresses scanned: -
+
+ Datasheet:
+
+ Publicly available (after August 2020 launch) at the Renesas website
+
+ * Renesas ISL69222
+
+ Prefix: 'raa_dmpvr2_2rail'
+
+ Addresses scanned: -
+
+ Datasheet:
+
+ Publicly available (after August 2020 launch) at the Renesas website
+
+ * Renesas ISL69223
+
+ Prefix: 'raa_dmpvr2_3rail'
+
+ Addresses scanned: -
+
+ Datasheet:
+
+ Publicly available (after August 2020 launch) at the Renesas website
+
+ * Renesas ISL69224
+
+ Prefix: 'raa_dmpvr2_2rail'
+
+ Addresses scanned: -
+
+ Datasheet:
+
+ Publicly available (after August 2020 launch) at the Renesas website
+
+ * Renesas ISL69225
+
+ Prefix: 'raa_dmpvr2_2rail'
+
+ Addresses scanned: -
+
+ Datasheet:
+
+ Publicly available (after August 2020 launch) at the Renesas website
+
+ * Renesas ISL69227
+
+ Prefix: 'raa_dmpvr2_3rail'
+
+ Addresses scanned: -
+
+ Datasheet:
+
+ Publicly available (after August 2020 launch) at the Renesas website
+
+ * Renesas ISL69228
+
+ Prefix: 'raa_dmpvr2_3rail'
+
+ Addresses scanned: -
+
+ Datasheet:
+
+ Publicly available (after August 2020 launch) at the Renesas website
+
+ * Renesas ISL69234
+
+ Prefix: 'raa_dmpvr2_2rail'
+
+ Addresses scanned: -
+
+ Datasheet:
+
+ Publicly available (after August 2020 launch) at the Renesas website
+
+ * Renesas ISL69236
+
+ Prefix: 'raa_dmpvr2_2rail'
+
+ Addresses scanned: -
+
+ Datasheet:
+
+ Publicly available (after August 2020 launch) at the Renesas website
+
+ * Renesas ISL69239
+
+ Prefix: 'raa_dmpvr2_3rail'
+
+ Addresses scanned: -
+
+ Datasheet:
+
+ Publicly available (after August 2020 launch) at the Renesas website
+
+ * Renesas ISL69242
+
+ Prefix: 'raa_dmpvr2_2rail'
+
+ Addresses scanned: -
+
+ Datasheet:
+
+ Publicly available (after August 2020 launch) at the Renesas website
+
+ * Renesas ISL69243
+
+ Prefix: 'raa_dmpvr2_1rail'
+
+ Addresses scanned: -
+
+ Datasheet:
+
+ Publicly available (after August 2020 launch) at the Renesas website
+
+ * Renesas ISL69247
+
+ Prefix: 'raa_dmpvr2_2rail'
+
+ Addresses scanned: -
+
+ Datasheet:
+
+ Publicly available (after August 2020 launch) at the Renesas website
+
+ * Renesas ISL69248
+
+ Prefix: 'raa_dmpvr2_2rail'
+
+ Addresses scanned: -
+
+ Datasheet:
+
+ Publicly available (after August 2020 launch) at the Renesas website
+
+ * Renesas ISL69254
+
+ Prefix: 'raa_dmpvr2_2rail'
+
+ Addresses scanned: -
+
+ Datasheet:
+
+ Publicly available (after August 2020 launch) at the Renesas website
+
+ * Renesas ISL69255
+
+ Prefix: 'raa_dmpvr2_2rail'
+
+ Addresses scanned: -
+
+ Datasheet:
+
+ Publicly available (after August 2020 launch) at the Renesas website
+
+ * Renesas ISL69256
+
+ Prefix: 'raa_dmpvr2_2rail'
+
+ Addresses scanned: -
+
+ Datasheet:
+
+ Publicly available (after August 2020 launch) at the Renesas website
+
+ * Renesas ISL69259
+
+ Prefix: 'raa_dmpvr2_2rail'
+
+ Addresses scanned: -
+
+ Datasheet:
+
+ Publicly available (after August 2020 launch) at the Renesas website
+
+ * Renesas ISL69260
+
+ Prefix: 'raa_dmpvr2_2rail'
+
+ Addresses scanned: -
+
+ Datasheet:
+
+ Publicly available (after August 2020 launch) at the Renesas website
+
+ * Renesas ISL69268
+
+ Prefix: 'raa_dmpvr2_2rail'
+
+ Addresses scanned: -
+
+ Datasheet:
+
+ Publicly available (after August 2020 launch) at the Renesas website
+
+ * Renesas ISL69269
+
+ Prefix: 'raa_dmpvr2_3rail'
+
+ Addresses scanned: -
+
+ Datasheet:
+
+ Publicly available (after August 2020 launch) at the Renesas website
+
+ * Renesas ISL69298
+
+ Prefix: 'raa_dmpvr2_2rail'
+
+ Addresses scanned: -
+
+ Datasheet:
+
+ Publicly available (after August 2020 launch) at the Renesas website
+
+ * Renesas RAA228000
+
+ Prefix: 'raa_dmpvr2_hv'
+
+ Addresses scanned: -
+
+ Datasheet:
+
+ Publicly available (after August 2020 launch) at the Renesas website
+
+ * Renesas RAA228004
+
+ Prefix: 'raa_dmpvr2_hv'
+
+ Addresses scanned: -
+
+ Datasheet:
+
+ Publicly available (after August 2020 launch) at the Renesas website
+
+ * Renesas RAA228006
+
+ Prefix: 'raa_dmpvr2_hv'
+
+ Addresses scanned: -
+
+ Datasheet:
+
+ Publicly available (after August 2020 launch) at the Renesas website
+
+ * Renesas RAA228228
+
+ Prefix: 'raa_dmpvr2_2rail'
+
+ Addresses scanned: -
+
+ Datasheet:
+
+ Publicly available (after August 2020 launch) at the Renesas website
+
+ * Renesas RAA229001
+
+ Prefix: 'raa_dmpvr2_2rail'
+
+ Addresses scanned: -
+
+ Datasheet:
+
+ Publicly available (after August 2020 launch) at the Renesas website
+
+ * Renesas RAA229004
+
+ Prefix: 'raa_dmpvr2_2rail'
+
+ Addresses scanned: -
+
+ Datasheet:
+
+ Publicly available (after August 2020 launch) at the Renesas website
Authors:
- Maxim Sloyko <maxims@google.com>
- Robert Lippert <rlippert@google.com>
- Patrick Venture <venture@google.com>
+ - Grant Peltier <grant.peltier.jg@renesas.com>
Description
-----------
-Intersil ISL68137 is a digital output 7-phase configurable PWM
-controller with an AVSBus interface.
+This driver supports the Renesas ISL68137 and all 2nd generation Renesas
+digital multiphase voltage regulators (raa_dmpvr2). The ISL68137 is a digital
+output 7-phase configurable PWM controller with an AVSBus interface. 2nd
+generation devices are grouped into 4 distinct configurations: '1rail' for
+single-rail devices, '2rail' for dual-rail devices, '3rail' for 3-rail devices,
+and 'hv' for high voltage single-rail devices. Consult the individual datasheets
+for more information.
Usage Notes
-----------
@@ -33,10 +419,14 @@
The ISL68137 AVS operation mode must be enabled/disabled at runtime.
-Beyond the normal sysfs pmbus attributes, the driver exposes a control attribute.
+Beyond the normal sysfs pmbus attributes, the driver exposes a control attribute
+for the ISL68137.
-Additional Sysfs attributes
----------------------------
+For 2nd generation Renesas digital multiphase voltage regulators, only the
+normal sysfs pmbus attributes are supported.
+
+ISL68137 sysfs attributes
+-------------------------
======================= ====================================
avs(0|1)_enable Controls the AVS state of each rail.
@@ -78,3 +468,138 @@
temp[1-3]_max Maximum temperature
temp[1-3]_max_alarm Chip temperature high alarm
======================= ====================================
+
+raa_dmpvr2_1rail/hv sysfs attributes
+------------------------------------
+
+======================= ==========================================
+curr1_label "iin"
+curr1_input Measured input current
+curr1_crit Critical maximum current
+curr1_crit_alarm Current critical high alarm
+
+curr2_label "iout"
+curr2_input Measured output current
+curr2_crit Critical maximum current
+curr2_crit_alarm Current critical high alarm
+
+in1_label "vin"
+in1_input Measured input voltage
+in1_lcrit Critical minimum input voltage
+in1_lcrit_alarm Input voltage critical low alarm
+in1_crit Critical maximum input voltage
+in1_crit_alarm Input voltage critical high alarm
+
+in2_label "vmon"
+in2_input Scaled VMON voltage read from the VMON pin
+
+in3_label "vout"
+in3_input Measured output voltage
+in3_lcrit Critical minimum output voltage
+in3_lcrit_alarm Output voltage critical low alarm
+in3_crit Critical maximum output voltage
+in3_crit_alarm Output voltage critical high alarm
+
+power1_label "pin"
+power1_input Measured input power
+power1_alarm Input power high alarm
+
+power2_label "pout"
+power2_input Measured output power
+
+temp[1-3]_input Measured temperature
+temp[1-3]_crit Critical high temperature
+temp[1-3]_crit_alarm Chip temperature critical high alarm
+temp[1-3]_max Maximum temperature
+temp[1-3]_max_alarm Chip temperature high alarm
+======================= ==========================================
+
+raa_dmpvr2_2rail sysfs attributes
+---------------------------------
+
+======================= ==========================================
+curr[1-2]_label "iin[1-2]"
+curr[1-2]_input Measured input current
+curr[1-2]_crit Critical maximum current
+curr[1-2]_crit_alarm Current critical high alarm
+
+curr[3-4]_label "iout[1-2]"
+curr[3-4]_input Measured output current
+curr[3-4]_crit Critical maximum current
+curr[3-4]_crit_alarm Current critical high alarm
+
+in1_label "vin"
+in1_input Measured input voltage
+in1_lcrit Critical minimum input voltage
+in1_lcrit_alarm Input voltage critical low alarm
+in1_crit Critical maximum input voltage
+in1_crit_alarm Input voltage critical high alarm
+
+in2_label "vmon"
+in2_input Scaled VMON voltage read from the VMON pin
+
+in[3-4]_label "vout[1-2]"
+in[3-4]_input Measured output voltage
+in[3-4]_lcrit Critical minimum output voltage
+in[3-4]_lcrit_alarm Output voltage critical low alarm
+in[3-4]_crit Critical maximum output voltage
+in[3-4]_crit_alarm Output voltage critical high alarm
+
+power[1-2]_label "pin[1-2]"
+power[1-2]_input Measured input power
+power[1-2]_alarm Input power high alarm
+
+power[3-4]_label "pout[1-2]"
+power[3-4]_input Measured output power
+
+temp[1-5]_input Measured temperature
+temp[1-5]_crit Critical high temperature
+temp[1-5]_crit_alarm Chip temperature critical high alarm
+temp[1-5]_max Maximum temperature
+temp[1-5]_max_alarm Chip temperature high alarm
+======================= ==========================================
+
+raa_dmpvr2_3rail sysfs attributes
+---------------------------------
+
+======================= ==========================================
+curr[1-3]_label "iin[1-3]"
+curr[1-3]_input Measured input current
+curr[1-3]_crit Critical maximum current
+curr[1-3]_crit_alarm Current critical high alarm
+
+curr[4-6]_label "iout[1-3]"
+curr[4-6]_input Measured output current
+curr[4-6]_crit Critical maximum current
+curr[4-6]_crit_alarm Current critical high alarm
+
+in1_label "vin"
+in1_input Measured input voltage
+in1_lcrit Critical minimum input voltage
+in1_lcrit_alarm Input voltage critical low alarm
+in1_crit Critical maximum input voltage
+in1_crit_alarm Input voltage critical high alarm
+
+in2_label "vmon"
+in2_input Scaled VMON voltage read from the VMON pin
+
+in[3-5]_label "vout[1-3]"
+in[3-5]_input Measured output voltage
+in[3-5]_lcrit Critical minimum output voltage
+in[3-5]_lcrit_alarm Output voltage critical low alarm
+in[3-5]_crit Critical maximum output voltage
+in[3-5]_crit_alarm Output voltage critical high alarm
+
+power[1-3]_label "pin[1-3]"
+power[1-3]_input Measured input power
+power[1-3]_alarm Input power high alarm
+
+power[4-6]_label "pout[1-3]"
+power[4-6]_input Measured output power
+
+temp[1-7]_input Measured temperature
+temp[1-7]_crit Critical high temperature
+temp[1-7]_crit_alarm Chip temperature critical high alarm
+temp[1-7]_max Maximum temperature
+temp[1-7]_max_alarm Chip temperature high alarm
+======================= ==========================================
diff --git a/Documentation/hwmon/k10temp.rst b/Documentation/hwmon/k10temp.rst
index 4451d59..8557e26 100644
--- a/Documentation/hwmon/k10temp.rst
+++ b/Documentation/hwmon/k10temp.rst
@@ -100,9 +100,10 @@
are using an AM3 processor on an AM2+ mainboard, you can safely use the
"force=1" parameter.
-There is one temperature measurement value, available as temp1_input in
-sysfs. It is measured in degrees Celsius with a resolution of 1/8th degree.
-Please note that it is defined as a relative value; to quote the AMD manual::
+For CPUs older than Family 17h, there is one temperature measurement value,
+available as temp1_input in sysfs. It is measured in degrees Celsius with a
+resolution of 1/8th degree. Please note that it is defined as a relative
+value; to quote the AMD manual::
Tctl is the processor temperature control value, used by the platform to
control cooling systems. Tctl is a non-physical temperature on an
@@ -126,3 +127,25 @@
Models from 17h family report relative temperature, the driver aims to
compensate and report the real temperature.
+
+On Family 17h and Family 18h CPUs, additional temperature sensors may report
+Core Complex Die (CCD) temperatures. Up to 8 such temperatures are reported
+as temp{3..10}_input, labeled Tccd{1..8}. Actual support depends on the CPU
+variant.
+
+Various Family 17h and 18h CPUs report voltage and current telemetry
+information. The following attributes may be reported.
+
+Attribute Label Description
+=============== ======= ================
+in0_input Vcore Core voltage
+in1_input Vsoc SoC voltage
+curr1_input Icore Core current
+curr2_input Isoc SoC current
+=============== ======= ================
+
+Current values are raw (unscaled) as reported by the CPU. Core current is
+reported as multiples of 1A / LSB. SoC is reported as multiples of 0.25A
+/ LSB. The real current is board specific. Reported currents should be seen
+as rough guidance, and should be scaled using sensors3.conf as appropriate
+for a given board.
diff --git a/Documentation/hwmon/ltc2978.rst b/Documentation/hwmon/ltc2978.rst
index 01a24fd..bc5270e 100644
--- a/Documentation/hwmon/ltc2978.rst
+++ b/Documentation/hwmon/ltc2978.rst
@@ -3,13 +3,21 @@
Supported chips:
+ * Linear Technology LTC2972
+
+ Prefix: 'ltc2972'
+
+ Addresses scanned: -
+
+ Datasheet: https://www.analog.com/en/products/ltc2972.html
+
* Linear Technology LTC2974
Prefix: 'ltc2974'
Addresses scanned: -
- Datasheet: http://www.linear.com/product/ltc2974
+ Datasheet: https://www.analog.com/en/products/ltc2974
* Linear Technology LTC2975
@@ -17,7 +25,7 @@
Addresses scanned: -
- Datasheet: http://www.linear.com/product/ltc2975
+ Datasheet: https://www.analog.com/en/products/ltc2975
* Linear Technology LTC2977
@@ -25,7 +33,7 @@
Addresses scanned: -
- Datasheet: http://www.linear.com/product/ltc2977
+ Datasheet: https://www.analog.com/en/products/ltc2977
* Linear Technology LTC2978, LTC2978A
@@ -33,9 +41,17 @@
Addresses scanned: -
- Datasheet: http://www.linear.com/product/ltc2978
+ Datasheet: https://www.analog.com/en/products/ltc2978
- http://www.linear.com/product/ltc2978a
+ https://www.analog.com/en/products/ltc2978a
+
+ * Linear Technology LTC2979
+
+ Prefix: 'ltc2979'
+
+ Addresses scanned: -
+
+ Datasheet: https://www.analog.com/en/products/ltc2979
* Linear Technology LTC2980
@@ -43,7 +59,7 @@
Addresses scanned: -
- Datasheet: http://www.linear.com/product/ltc2980
+ Datasheet: https://www.analog.com/en/products/ltc2980
* Linear Technology LTC3880
@@ -51,7 +67,7 @@
Addresses scanned: -
- Datasheet: http://www.linear.com/product/ltc3880
+ Datasheet: https://www.analog.com/en/products/ltc3880
* Linear Technology LTC3882
@@ -59,7 +75,7 @@
Addresses scanned: -
- Datasheet: http://www.linear.com/product/ltc3882
+ Datasheet: https://www.analog.com/en/products/ltc3882
* Linear Technology LTC3883
@@ -67,7 +83,15 @@
Addresses scanned: -
- Datasheet: http://www.linear.com/product/ltc3883
+ Datasheet: https://www.analog.com/en/products/ltc3883
+
+ * Linear Technology LTC3884
+
+ Prefix: 'ltc3884'
+
+ Addresses scanned: -
+
+ Datasheet: https://www.analog.com/en/products/ltc3884
* Linear Technology LTC3886
@@ -75,7 +99,7 @@
Addresses scanned: -
- Datasheet: http://www.linear.com/product/ltc3886
+ Datasheet: https://www.analog.com/en/products/ltc3886
* Linear Technology LTC3887
@@ -83,7 +107,23 @@
Addresses scanned: -
- Datasheet: http://www.linear.com/product/ltc3887
+ Datasheet: https://www.analog.com/en/products/ltc3887
+
+ * Linear Technology LTC3889
+
+ Prefix: 'ltc3889'
+
+ Addresses scanned: -
+
+ Datasheet: https://www.analog.com/en/products/ltc3889
+
+ * Linear Technology LTC7880
+
+ Prefix: 'ltc7880'
+
+ Addresses scanned: -
+
+ Datasheet: https://www.analog.com/en/products/ltc7880
* Linear Technology LTM2987
@@ -91,15 +131,23 @@
Addresses scanned: -
- Datasheet: http://www.linear.com/product/ltm2987
+ Datasheet: https://www.analog.com/en/products/ltm2987
- * Linear Technology LTM4675
+ * Linear Technology LTM4644
+
+ Prefix: 'ltm4644'
+
+ Addresses scanned: -
+
+ Datasheet: https://www.analog.com/en/products/ltm4644
+
+ * Linear Technology LTM4675
Prefix: 'ltm4675'
Addresses scanned: -
- Datasheet: http://www.linear.com/product/ltm4675
+ Datasheet: https://www.analog.com/en/products/ltm4675
* Linear Technology LTM4676
@@ -107,7 +155,31 @@
Addresses scanned: -
- Datasheet: http://www.linear.com/product/ltm4676
+ Datasheet: https://www.analog.com/en/products/ltm4676
+
+ * Linear Technology LTM4677
+
+ Prefix: 'ltm4677'
+
+ Addresses scanned: -
+
+ Datasheet: https://www.analog.com/en/products/ltm4677
+
+ * Linear Technology LTM4678
+
+ Prefix: 'ltm4678'
+
+ Addresses scanned: -
+
+ Datasheet: https://www.analog.com/en/products/ltm4678
+
+ * Analog Devices LTM4680
+
+ Prefix: 'ltm4680'
+
+ Addresses scanned: -
+
+ Datasheet: http://www.analog.com/ltm4680
* Analog Devices LTM4686
@@ -117,6 +189,15 @@
Datasheet: http://www.analog.com/ltm4686
+ * Analog Devices LTM4700
+
+ Prefix: 'ltm4700'
+
+ Addresses scanned: -
+
+ Datasheet: http://www.analog.com/ltm4700
+
+
Author: Guenter Roeck <linux@roeck-us.net>
@@ -166,13 +247,13 @@
in1_max Maximum input voltage.
- LTC2974, LTC2975, LTC2977, LTC2980, LTC2978, and
- LTM2987 only.
+ LTC2974, LTC2975, LTC2977, LTC2980, LTC2978,
+ LTC2979 and LTM2987 only.
in1_lcrit Critical minimum input voltage.
- LTC2974, LTC2975, LTC2977, LTC2980, LTC2978, and
- LTM2987 only.
+ LTC2972, LTC2974, LTC2975, LTC2977, LTC2980, LTC2978,
+ LTC2979 and LTM2987 only.
in1_crit Critical maximum input voltage.
@@ -180,29 +261,34 @@
in1_max_alarm Input voltage high alarm.
- LTC2974, LTC2975, LTC2977, LTC2980, LTC2978, and
- LTM2987 only.
+ LTC2972, LTC2974, LTC2975, LTC2977, LTC2980, LTC2978,
+ LTC2979 and LTM2987 only.
+
in1_lcrit_alarm Input voltage critical low alarm.
- LTC2974, LTC2975, LTC2977, LTC2980, LTC2978, and
- LTM2987 only.
+ LTC2972, LTC2974, LTC2975, LTC2977, LTC2980, LTC2978,
+ LTC2979 and LTM2987 only.
+
in1_crit_alarm Input voltage critical high alarm.
in1_lowest Lowest input voltage.
- LTC2974, LTC2975, LTC2977, LTC2980, LTC2978, and
- LTM2987 only.
+ LTC2972, LTC2974, LTC2975, LTC2977, LTC2980, LTC2978,
+ and LTM2987 only.
+
in1_highest Highest input voltage.
in1_reset_history Reset input voltage history.
in[N]_label "vout[1-8]".
+ - LTC2972: N=2-3
- LTC2974, LTC2975: N=2-5
- - LTC2977, LTC2980, LTM2987: N=2-9
+ - LTC2977, LTC2979, LTC2980, LTM2987: N=2-9
- LTC2978: N=2-9
- - LTC3880, LTC3882, LTC23886 LTC3887, LTM4675, LTM4676:
- N=2-3
+ - LTC3880, LTC3882, LTC3884, LTC23886 LTC3887, LTC3889,
+ LTC7880, LTM4644, LTM4675, LTM4676, LTM4677, LTM4678,
+ LTM4680, LTM4700: N=2-3
- LTC3883: N=2
in[N]_input Measured output voltage.
@@ -225,8 +311,7 @@
in[N]_lowest Lowest output voltage.
-
- LTC2974, LTC2975,and LTC2978 only.
+ LTC2972, LTC2974, LTC2975,and LTC2978 only.
in[N]_highest Highest output voltage.
@@ -234,20 +319,24 @@
temp[N]_input Measured temperature.
+ - On LTC2972, temp[1-2] report external temperatures,
+ and temp 3 reports the chip temperature.
- On LTC2974 and LTC2975, temp[1-4] report external
temperatures, and temp5 reports the chip temperature.
- - On LTC2977, LTC2980, LTC2978, and LTM2987, only one
- temperature measurement is supported and reports
- the chip temperature.
- - On LTC3880, LTC3882, LTC3887, LTM4675, and LTM4676,
- temp1 and temp2 report external temperatures, and
- temp3 reports the chip temperature.
+ - On LTC2977, LTC2979, LTC2980, LTC2978, and LTM2987,
+ only one temperature measurement is supported and
+ reports the chip temperature.
+ - On LTC3880, LTC3882, LTC3886, LTC3887, LTC3889,
+ LTM4664, LTM4675, LTM4676, LTM4677, LTM4678, LTM4680,
+ and LTM4700, temp1 and temp2 report external
+ temperatures, and temp3 reports the chip temperature.
- On LTC3883, temp1 reports an external temperature,
and temp2 reports the chip temperature.
temp[N]_min Mimimum temperature.
- LTC2974, LCT2977, LTM2980, LTC2978, and LTM2987 only.
+ LTC2972, LTC2974, LCT2977, LTM2980, LTC2978,
+ LTC2979, and LTM2987 only.
temp[N]_max Maximum temperature.
@@ -257,8 +346,8 @@
temp[N]_min_alarm Temperature low alarm.
- LTC2974, LTC2975, LTC2977, LTM2980, LTC2978, and
- LTM2987 only.
+ LTC2972, LTC2974, LTC2975, LTC2977, LTM2980, LTC2978,
+ LTC2979, and LTM2987 only.
temp[N]_max_alarm Temperature high alarm.
@@ -269,8 +358,8 @@
temp[N]_lowest Lowest measured temperature.
- - LTC2974, LTC2975, LTC2977, LTM2980, LTC2978, and
- LTM2987 only.
+ - LTC2972, LTC2974, LTC2975, LTC2977, LTM2980, LTC2978,
+ LTC2979, and LTM2987 only.
- Not supported for chip temperature sensor on LTC2974
and LTC2975.
@@ -290,19 +379,22 @@
power[N]_label "pout[1-4]".
+ - LTC2972: N=1-2
- LTC2974, LTC2975: N=1-4
- - LTC2977, LTC2980, LTM2987: Not supported
+ - LTC2977, LTC2979, LTC2980, LTM2987: Not supported
- LTC2978: Not supported
- - LTC3880, LTC3882, LTC3886, LTC3887, LTM4675, LTM4676:
- N=1-2
+ - LTC3880, LTC3882, LTC3884, LTC3886, LTC3887, LTC3889,
+ LTM4664, LTM4675, LTM4676, LTM4677, LTM4678, LTM4680,
+ LTM4700: N=1-2
- LTC3883: N=2
power[N]_input Measured output power.
curr1_label "iin".
- LTC3880, LTC3883, LTC3886, LTC3887, LTM4675,
- and LTM4676 only.
+ LTC3880, LTC3883, LTC3884, LTC3886, LTC3887, LTC3889,
+ LTM4644, LTM4675, LTM4676, LTM4677, LTM4678, LTM4680,
+ and LTM4700 only.
curr1_input Measured input current.
@@ -320,11 +412,13 @@
curr[N]_label "iout[1-4]".
+ - LTC2972: N-1-2
- LTC2974, LTC2975: N=1-4
- - LTC2977, LTC2980, LTM2987: not supported
+ - LTC2977, LTC2979, LTC2980, LTM2987: not supported
- LTC2978: not supported
- - LTC3880, LTC3882, LTC3886, LTC3887, LTM4675, LTM4676:
- N=2-3
+ - LTC3880, LTC3882, LTC3884, LTC3886, LTC3887, LTC3889,
+ LTM4664, LTM4675, LTM4676, LTM4677, LTM4678, LTM4680,
+ LTM4700: N=2-3
- LTC3883: N=2
curr[N]_input Measured output current.
@@ -335,7 +429,7 @@
curr[N]_lcrit Critical low output current.
- LTC2974 and LTC2975 only.
+ LTC2972, LTC2974 and LTC2975 only.
curr[N]_max_alarm Output current high alarm.
@@ -343,11 +437,11 @@
curr[N]_lcrit_alarm Output current critical low alarm.
- LTC2974 and LTC2975 only.
+ LTC2972, LTC2974 and LTC2975 only.
curr[N]_lowest Lowest output current.
- LTC2974 and LTC2975 only.
+ LTC2972, LTC2974 and LTC2975 only.
curr[N]_highest Highest output current.
diff --git a/Documentation/hwmon/pmbus-core.rst b/Documentation/hwmon/pmbus-core.rst
index 92515c4..501b37b 100644
--- a/Documentation/hwmon/pmbus-core.rst
+++ b/Documentation/hwmon/pmbus-core.rst
@@ -162,9 +162,12 @@
::
- int (*read_word_data)(struct i2c_client *client, int page, int reg);
+ int (*read_word_data)(struct i2c_client *client, int page, int phase,
+ int reg);
-Read word from page <page>, register <reg>.
+Read word from page <page>, phase <pase>, register <reg>. If the chip does not
+support multiple phases, the phase parameter can be ignored. If the chip
+supports multiple phases, a phase value of 0xff indicates all phases.
::
@@ -201,16 +204,21 @@
::
- int pmbus_set_page(struct i2c_client *client, u8 page);
+ int pmbus_set_page(struct i2c_client *client, u8 page, u8 phase);
-Set PMBus page register to <page> for subsequent commands.
+Set PMBus page register to <page> and <phase> for subsequent commands.
+If the chip does not support multiple phases, the phase parameter is
+ignored. Otherwise, a phase value of 0xff selects all phases.
::
- int pmbus_read_word_data(struct i2c_client *client, u8 page, u8 reg);
+ int pmbus_read_word_data(struct i2c_client *client, u8 page, u8 phase,
+ u8 reg);
-Read word data from <page>, <reg>. Similar to i2c_smbus_read_word_data(), but
-selects page first.
+Read word data from <page>, <phase>, <reg>. Similar to
+i2c_smbus_read_word_data(), but selects page and phase first. If the chip does
+not support multiple phases, the phase parameter is ignored. Otherwise, a phase
+value of 0xff selects all phases.
::
diff --git a/Documentation/hwmon/pmbus.rst b/Documentation/hwmon/pmbus.rst
index f787984..2658dde 100644
--- a/Documentation/hwmon/pmbus.rst
+++ b/Documentation/hwmon/pmbus.rst
@@ -227,7 +227,9 @@
From IOUT_UC_FAULT status.
currX_crit_alarm Current critical high alarm.
From IIN_OC_FAULT or IOUT_OC_FAULT status.
-currX_label "iin" or "ioutY"
+currX_label "iin", "iinY", "iinY.Z", "ioutY", or "ioutY.Z",
+ where Y reflects the page number and Z reflects the
+ phase.
powerX_input Measured power. From READ_PIN or READ_POUT register.
powerX_cap Output power cap. From POUT_MAX register.
@@ -239,7 +241,9 @@
From PIN_OP_WARNING or POUT_OP_WARNING status.
powerX_crit_alarm Output power critical high alarm.
From POUT_OP_FAULT status.
-powerX_label "pin" or "poutY"
+powerX_label "pin", "pinY", "pinY.Z", "poutY", or "poutY.Z",
+ where Y reflects the page number and Z reflects the
+ phase.
tempX_input Measured temperature.
From READ_TEMPERATURE_X register.
diff --git a/Documentation/hwmon/tps53679.rst b/Documentation/hwmon/tps53679.rst
new file mode 100644
index 0000000..be94cab
--- /dev/null
+++ b/Documentation/hwmon/tps53679.rst
@@ -0,0 +1,178 @@
+Kernel driver tps53679
+======================
+
+Supported chips:
+
+ * Texas Instruments TPS53647
+
+ Prefix: 'tps53647'
+
+ Addresses scanned: -
+
+ Datasheet: http://www.ti.com/lit/gpn/tps53647
+
+ * Texas Instruments TPS53667
+
+ Prefix: 'tps53667'
+
+ Addresses scanned: -
+
+ Datasheet: http://www.ti.com/lit/gpn/TPS53667
+
+ * Texas Instruments TPS53679
+
+ Prefix: 'tps53679'
+
+ Addresses scanned: -
+
+ Datasheet: http://www.ti.com/lit/gpn/TPS53679 (short version)
+
+ * Texas Instruments TPS53681
+
+ Prefix: 'tps53681'
+
+ Addresses scanned: -
+
+ Datasheet: http://www.ti.com/lit/gpn/TPS53681
+
+ * Texas Instruments TPS53688
+
+ Prefix: 'tps53688'
+
+ Addresses scanned: -
+
+ Datasheet: Available under NDA
+
+
+Authors:
+ Vadim Pasternak <vadimp@mellanox.com>
+ Guenter Roeck <linux@roeck-us.net>
+
+
+Description
+-----------
+
+Chips in this series are multi-phase step-down converters with one or two
+output channels and up to 8 phases per channel.
+
+
+Usage Notes
+-----------
+
+This driver does not probe for PMBus devices. You will have to instantiate
+devices explicitly.
+
+Example: the following commands will load the driver for an TPS53681 at address
+0x60 on I2C bus #1::
+
+ # modprobe tps53679
+ # echo tps53681 0x60 > /sys/bus/i2c/devices/i2c-1/new_device
+
+
+Sysfs attributes
+----------------
+
+======================= ========================================================
+in1_label "vin"
+
+in1_input Measured input voltage.
+
+in1_lcrit Critical minimum input voltage
+
+ TPS53679, TPS53681, TPS53688 only.
+
+in1_lcrit_alarm Input voltage critical low alarm.
+
+ TPS53679, TPS53681, TPS53688 only.
+
+in1_crit Critical maximum input voltage.
+
+in1_crit_alarm Input voltage critical high alarm.
+
+in[N]_label "vout[1-2]"
+
+ - TPS53647, TPS53667: N=2
+ - TPS53679, TPS53588: N=2,3
+
+in[N]_input Measured output voltage.
+
+in[N]_lcrit Critical minimum input voltage.
+
+ TPS53679, TPS53681, TPS53688 only.
+
+in[N]_lcrit_alarm Critical minimum voltage alarm.
+
+ TPS53679, TPS53681, TPS53688 only.
+
+in[N]_alarm Output voltage alarm.
+
+ TPS53647, TPS53667 only.
+
+in[N]_crit Critical maximum output voltage.
+
+ TPS53679, TPS53681, TPS53688 only.
+
+in[N]_crit_alarm Output voltage critical high alarm.
+
+ TPS53679, TPS53681, TPS53688 only.
+
+temp[N]_input Measured temperature.
+
+ - TPS53647, TPS53667: N=1
+ - TPS53679, TPS53681, TPS53588: N=1,2
+
+temp[N]_max Maximum temperature.
+
+temp[N]_crit Critical high temperature.
+
+temp[N]_max_alarm Temperature high alarm.
+
+temp[N]_crit_alarm Temperature critical high alarm.
+
+power1_label "pin".
+
+power1_input Measured input power.
+
+power[N]_label "pout[1-2]".
+
+ - TPS53647, TPS53667: N=2
+ - TPS53679, TPS53681, TPS53588: N=2,3
+
+power[N]_input Measured output power.
+
+curr1_label "iin".
+
+curr1_input Measured input current.
+
+curr1_max Maximum input current.
+
+curr1_max_alarm Input current high alarm.
+
+curr1_crit Critical input current.
+
+curr1_crit_alarm Input current critical alarm.
+
+curr[N]_label "iout[1-2]" or "iout1.[0-5]".
+
+ The first digit is the output channel, the second
+ digit is the phase within the channel. Per-phase
+ telemetry supported on TPS53681 only.
+
+ - TPS53647, TPS53667: N=2
+ - TPS53679, TPS53588: N=2,3
+ - TPS53681: N=2-9
+
+curr[N]_input Measured output current.
+
+curr[N]_max Maximum output current.
+
+curr[N]_crit Critical high output current.
+
+curr[N]_max_alarm Output current high alarm.
+
+curr[N]_crit_alarm Output current critical high alarm.
+
+ Limit and alarm attributes are only available for
+ non-phase telemetry (iout1, iout2).
+
+======================= ========================================================
diff --git a/Documentation/hwmon/xdpe12284.rst b/Documentation/hwmon/xdpe12284.rst
index 6b7ae98..67d1f87 100644
--- a/Documentation/hwmon/xdpe12284.rst
+++ b/Documentation/hwmon/xdpe12284.rst
@@ -24,6 +24,7 @@
dual loop voltage regulators.
The family includes XDPE12284 and XDPE12254 devices.
The devices from this family complaint with:
+
- Intel VR13 and VR13HC rev 1.3, IMVP8 rev 1.2 and IMPVP9 rev 1.3 DC-DC
converter specification.
- Intel SVID rev 1.9. protocol.
diff --git a/Documentation/index.rst b/Documentation/index.rst
index e99d0bd..6fdad61 100644
--- a/Documentation/index.rst
+++ b/Documentation/index.rst
@@ -131,7 +131,6 @@
usb/index
PCI/index
misc-devices/index
- mic/index
scheduler/index
Architecture-agnostic documentation
diff --git a/Documentation/core-api/gcc-plugins.rst b/Documentation/kbuild/gcc-plugins.rst
similarity index 98%
rename from Documentation/core-api/gcc-plugins.rst
rename to Documentation/kbuild/gcc-plugins.rst
index 8502f24..4b1c10f 100644
--- a/Documentation/core-api/gcc-plugins.rst
+++ b/Documentation/kbuild/gcc-plugins.rst
@@ -72,6 +72,10 @@
apt-get install gcc-4.9-plugin-dev
+Or on Fedora::
+
+ dnf install gcc-plugin-devel
+
Enable a GCC plugin based feature in the kernel config::
CONFIG_GCC_PLUGIN_CYC_COMPLEXITY = y
diff --git a/Documentation/kbuild/index.rst b/Documentation/kbuild/index.rst
index 0f144fa..82daf2e 100644
--- a/Documentation/kbuild/index.rst
+++ b/Documentation/kbuild/index.rst
@@ -19,6 +19,7 @@
issues
reproducible-builds
+ gcc-plugins
.. only:: subproject and html
diff --git a/Documentation/kbuild/kbuild.rst b/Documentation/kbuild/kbuild.rst
index f1e5dce..510f38d 100644
--- a/Documentation/kbuild/kbuild.rst
+++ b/Documentation/kbuild/kbuild.rst
@@ -237,7 +237,7 @@
KBUILD_EXTRA_SYMBOLS
--------------------
For modules that use symbols from other modules.
-See more details in modules.txt.
+See more details in modules.rst.
ALLSOURCE_ARCHS
---------------
diff --git a/Documentation/kbuild/kconfig-macro-language.rst b/Documentation/kbuild/kconfig-macro-language.rst
index 35b3263..8b413ef 100644
--- a/Documentation/kbuild/kconfig-macro-language.rst
+++ b/Documentation/kbuild/kconfig-macro-language.rst
@@ -44,7 +44,7 @@
def_bool y
Then, Kconfig moves onto the evaluation stage to resolve inter-symbol
-dependency as explained in kconfig-language.txt.
+dependency as explained in kconfig-language.rst.
Variables
diff --git a/Documentation/kbuild/makefiles.rst b/Documentation/kbuild/makefiles.rst
index 0e0eb2c..04d5c01 100644
--- a/Documentation/kbuild/makefiles.rst
+++ b/Documentation/kbuild/makefiles.rst
@@ -765,7 +765,7 @@
Example::
#arch/x86/boot/Makefile
- subdir- := compressed/
+ subdir- := compressed
The above assignment instructs kbuild to descend down in the
directory compressed/ when "make clean" is executed.
@@ -924,7 +924,7 @@
$(KBUILD_AFLAGS_MODULE) is used to add arch-specific options that
are used for assembler.
- From commandline AFLAGS_MODULE shall be used (see kbuild.txt).
+ From commandline AFLAGS_MODULE shall be used (see kbuild.rst).
KBUILD_CFLAGS_KERNEL
$(CC) options specific for built-in
@@ -937,7 +937,7 @@
$(KBUILD_CFLAGS_MODULE) is used to add arch-specific options that
are used for $(CC).
- From commandline CFLAGS_MODULE shall be used (see kbuild.txt).
+ From commandline CFLAGS_MODULE shall be used (see kbuild.rst).
KBUILD_LDFLAGS_MODULE
Options for $(LD) when linking modules
@@ -945,7 +945,7 @@
$(KBUILD_LDFLAGS_MODULE) is used to add arch-specific options
used when linking modules. This is often a linker script.
- From commandline LDFLAGS_MODULE shall be used (see kbuild.txt).
+ From commandline LDFLAGS_MODULE shall be used (see kbuild.rst).
KBUILD_LDS
@@ -1379,9 +1379,6 @@
in arch/$(ARCH)/include/(uapi/)/asm, Kbuild will automatically generate
a wrapper of the asm-generic one.
- The convention is to list one subdir per line and
- preferably in alphabetic order.
-
8 Kbuild Variables
==================
diff --git a/Documentation/kbuild/modules.rst b/Documentation/kbuild/modules.rst
index 69fa48e..e0b45a2 100644
--- a/Documentation/kbuild/modules.rst
+++ b/Documentation/kbuild/modules.rst
@@ -470,9 +470,9 @@
The syntax of the Module.symvers file is::
- <CRC> <Symbol> <Namespace> <Module> <Export Type>
+ <CRC> <Symbol> <Module> <Export Type> <Namespace>
- 0xe1cc2a05 usb_stor_suspend USB_STORAGE drivers/usb/storage/usb-storage EXPORT_SYMBOL_GPL
+ 0xe1cc2a05 usb_stor_suspend drivers/usb/storage/usb-storage EXPORT_SYMBOL_GPL USB_STORAGE
The fields are separated by tabs and values may be empty (e.g.
if no namespace is defined for an exported symbol).
diff --git a/Documentation/kernel-hacking/hacking.rst b/Documentation/kernel-hacking/hacking.rst
index d62aacb..eed2136 100644
--- a/Documentation/kernel-hacking/hacking.rst
+++ b/Documentation/kernel-hacking/hacking.rst
@@ -601,7 +601,7 @@
This is the variant of `EXPORT_SYMBOL()` that allows specifying a symbol
namespace. Symbol Namespaces are documented in
-``Documentation/core-api/symbol-namespaces.rst``.
+:doc:`../core-api/symbol-namespaces`
:c:func:`EXPORT_SYMBOL_NS_GPL()`
--------------------------------
@@ -610,7 +610,7 @@
This is the variant of `EXPORT_SYMBOL_GPL()` that allows specifying a symbol
namespace. Symbol Namespaces are documented in
-``Documentation/core-api/symbol-namespaces.rst``.
+:doc:`../core-api/symbol-namespaces`
Routines and Conventions
========================
diff --git a/Documentation/kernel-hacking/locking.rst b/Documentation/kernel-hacking/locking.rst
index a8518ac0..6ed806e 100644
--- a/Documentation/kernel-hacking/locking.rst
+++ b/Documentation/kernel-hacking/locking.rst
@@ -150,17 +150,17 @@
If you have a data structure which is only ever accessed from user
context, then you can use a simple mutex (``include/linux/mutex.h``) to
protect it. This is the most trivial case: you initialize the mutex.
-Then you can call :c:func:`mutex_lock_interruptible()` to grab the
-mutex, and :c:func:`mutex_unlock()` to release it. There is also a
-:c:func:`mutex_lock()`, which should be avoided, because it will
+Then you can call mutex_lock_interruptible() to grab the
+mutex, and mutex_unlock() to release it. There is also a
+mutex_lock(), which should be avoided, because it will
not return if a signal is received.
Example: ``net/netfilter/nf_sockopt.c`` allows registration of new
-:c:func:`setsockopt()` and :c:func:`getsockopt()` calls, with
-:c:func:`nf_register_sockopt()`. Registration and de-registration
+setsockopt() and getsockopt() calls, with
+nf_register_sockopt(). Registration and de-registration
are only done on module load and unload (and boot time, where there is
no concurrency), and the list of registrations is only consulted for an
-unknown :c:func:`setsockopt()` or :c:func:`getsockopt()` system
+unknown setsockopt() or getsockopt() system
call. The ``nf_sockopt_mutex`` is perfect to protect this, especially
since the setsockopt and getsockopt calls may well sleep.
@@ -170,19 +170,19 @@
If a softirq shares data with user context, you have two problems.
Firstly, the current user context can be interrupted by a softirq, and
secondly, the critical region could be entered from another CPU. This is
-where :c:func:`spin_lock_bh()` (``include/linux/spinlock.h``) is
+where spin_lock_bh() (``include/linux/spinlock.h``) is
used. It disables softirqs on that CPU, then grabs the lock.
-:c:func:`spin_unlock_bh()` does the reverse. (The '_bh' suffix is
+spin_unlock_bh() does the reverse. (The '_bh' suffix is
a historical reference to "Bottom Halves", the old name for software
interrupts. It should really be called spin_lock_softirq()' in a
perfect world).
-Note that you can also use :c:func:`spin_lock_irq()` or
-:c:func:`spin_lock_irqsave()` here, which stop hardware interrupts
+Note that you can also use spin_lock_irq() or
+spin_lock_irqsave() here, which stop hardware interrupts
as well: see `Hard IRQ Context <#hard-irq-context>`__.
This works perfectly for UP as well: the spin lock vanishes, and this
-macro simply becomes :c:func:`local_bh_disable()`
+macro simply becomes local_bh_disable()
(``include/linux/interrupt.h``), which protects you from the softirq
being run.
@@ -216,8 +216,8 @@
~~~~~~~~~~~~~~~~~~~~~~~~~
If another tasklet/timer wants to share data with your tasklet or timer
-, you will both need to use :c:func:`spin_lock()` and
-:c:func:`spin_unlock()` calls. :c:func:`spin_lock_bh()` is
+, you will both need to use spin_lock() and
+spin_unlock() calls. spin_lock_bh() is
unnecessary here, as you are already in a tasklet, and none will be run
on the same CPU.
@@ -234,14 +234,14 @@
going so far as to use a softirq, you probably care about scalable
performance enough to justify the extra complexity.
-You'll need to use :c:func:`spin_lock()` and
-:c:func:`spin_unlock()` for shared data.
+You'll need to use spin_lock() and
+spin_unlock() for shared data.
Different Softirqs
~~~~~~~~~~~~~~~~~~
-You'll need to use :c:func:`spin_lock()` and
-:c:func:`spin_unlock()` for shared data, whether it be a timer,
+You'll need to use spin_lock() and
+spin_unlock() for shared data, whether it be a timer,
tasklet, different softirq or the same or another softirq: any of them
could be running on a different CPU.
@@ -259,38 +259,38 @@
concerns. Firstly, the softirq processing can be interrupted by a
hardware interrupt, and secondly, the critical region could be entered
by a hardware interrupt on another CPU. This is where
-:c:func:`spin_lock_irq()` is used. It is defined to disable
+spin_lock_irq() is used. It is defined to disable
interrupts on that cpu, then grab the lock.
-:c:func:`spin_unlock_irq()` does the reverse.
+spin_unlock_irq() does the reverse.
-The irq handler does not to use :c:func:`spin_lock_irq()`, because
+The irq handler does not need to use spin_lock_irq(), because
the softirq cannot run while the irq handler is running: it can use
-:c:func:`spin_lock()`, which is slightly faster. The only exception
+spin_lock(), which is slightly faster. The only exception
would be if a different hardware irq handler uses the same lock:
-:c:func:`spin_lock_irq()` will stop that from interrupting us.
+spin_lock_irq() will stop that from interrupting us.
This works perfectly for UP as well: the spin lock vanishes, and this
-macro simply becomes :c:func:`local_irq_disable()`
+macro simply becomes local_irq_disable()
(``include/asm/smp.h``), which protects you from the softirq/tasklet/BH
being run.
-:c:func:`spin_lock_irqsave()` (``include/linux/spinlock.h``) is a
+spin_lock_irqsave() (``include/linux/spinlock.h``) is a
variant which saves whether interrupts were on or off in a flags word,
-which is passed to :c:func:`spin_unlock_irqrestore()`. This means
+which is passed to spin_unlock_irqrestore(). This means
that the same code can be used inside an hard irq handler (where
interrupts are already off) and in softirqs (where the irq disabling is
required).
Note that softirqs (and hence tasklets and timers) are run on return
-from hardware interrupts, so :c:func:`spin_lock_irq()` also stops
-these. In that sense, :c:func:`spin_lock_irqsave()` is the most
+from hardware interrupts, so spin_lock_irq() also stops
+these. In that sense, spin_lock_irqsave() is the most
general and powerful locking function.
Locking Between Two Hard IRQ Handlers
-------------------------------------
It is rare to have to share data between two IRQ handlers, but if you
-do, :c:func:`spin_lock_irqsave()` should be used: it is
+do, spin_lock_irqsave() should be used: it is
architecture-specific whether all interrupts are disabled inside irq
handlers themselves.
@@ -304,11 +304,11 @@
(``copy_from_user*(`` or ``kmalloc(x,GFP_KERNEL)``).
- Otherwise (== data can be touched in an interrupt), use
- :c:func:`spin_lock_irqsave()` and
- :c:func:`spin_unlock_irqrestore()`.
+ spin_lock_irqsave() and
+ spin_unlock_irqrestore().
- Avoid holding spinlock for more than 5 lines of code and across any
- function call (except accessors like :c:func:`readb()`).
+ function call (except accessors like readb()).
Table of Minimum Requirements
-----------------------------
@@ -320,7 +320,7 @@
shares data with another thread, locking is required).
Remember the advice above: you can always use
-:c:func:`spin_lock_irqsave()`, which is a superset of all other
+spin_lock_irqsave(), which is a superset of all other
spinlock primitives.
============== ============= ============= ========= ========= ========= ========= ======= ======= ============== ==============
@@ -363,13 +363,13 @@
lock when some other thread is holding the lock. You should acquire the
lock later if you then need access to the data protected with the lock.
-:c:func:`spin_trylock()` does not spin but returns non-zero if it
+spin_trylock() does not spin but returns non-zero if it
acquires the spinlock on the first try or 0 if not. This function can be
-used in all contexts like :c:func:`spin_lock()`: you must have
+used in all contexts like spin_lock(): you must have
disabled the contexts that might interrupt you and acquire the spin
lock.
-:c:func:`mutex_trylock()` does not suspend your task but returns
+mutex_trylock() does not suspend your task but returns
non-zero if it could lock the mutex on the first try or 0 if not. This
function cannot be safely used in hardware or software interrupt
contexts despite not sleeping.
@@ -490,14 +490,14 @@
objects directly.
There is a slight (and common) optimization here: in
-:c:func:`cache_add()` we set up the fields of the object before
+cache_add() we set up the fields of the object before
grabbing the lock. This is safe, as no-one else can access it until we
put it in cache.
Accessing From Interrupt Context
--------------------------------
-Now consider the case where :c:func:`cache_find()` can be called
+Now consider the case where cache_find() can be called
from interrupt context: either a hardware interrupt or a softirq. An
example would be a timer which deletes object from the cache.
@@ -566,16 +566,16 @@
return ret;
}
-Note that the :c:func:`spin_lock_irqsave()` will turn off
+Note that the spin_lock_irqsave() will turn off
interrupts if they are on, otherwise does nothing (if we are already in
an interrupt handler), hence these functions are safe to call from any
context.
-Unfortunately, :c:func:`cache_add()` calls :c:func:`kmalloc()`
+Unfortunately, cache_add() calls kmalloc()
with the ``GFP_KERNEL`` flag, which is only legal in user context. I
-have assumed that :c:func:`cache_add()` is still only called in
+have assumed that cache_add() is still only called in
user context, otherwise this should become a parameter to
-:c:func:`cache_add()`.
+cache_add().
Exposing Objects Outside This File
----------------------------------
@@ -592,7 +592,7 @@
The second problem is the lifetime problem: if another structure keeps a
pointer to an object, it presumably expects that pointer to remain
valid. Unfortunately, this is only guaranteed while you hold the lock,
-otherwise someone might call :c:func:`cache_delete()` and even
+otherwise someone might call cache_delete() and even
worse, add another object, re-using the same address.
As there is only one lock, you can't hold it forever: no-one else would
@@ -693,8 +693,8 @@
We encapsulate the reference counting in the standard 'get' and 'put'
functions. Now we can return the object itself from
-:c:func:`cache_find()` which has the advantage that the user can
-now sleep holding the object (eg. to :c:func:`copy_to_user()` to
+cache_find() which has the advantage that the user can
+now sleep holding the object (eg. to copy_to_user() to
name to userspace).
The other point to note is that I said a reference should be held for
@@ -710,7 +710,7 @@
are guaranteed to be seen atomically from all CPUs in the system, so no
lock is required. In this case, it is simpler than using spinlocks,
although for anything non-trivial using spinlocks is clearer. The
-:c:func:`atomic_inc()` and :c:func:`atomic_dec_and_test()`
+atomic_inc() and atomic_dec_and_test()
are used instead of the standard increment and decrement operators, and
the lock is no longer used to protect the reference count itself.
@@ -802,7 +802,7 @@
- You can make ``cache_lock`` non-static, and tell people to grab that
lock before changing the name in any object.
-- You can provide a :c:func:`cache_obj_rename()` which grabs this
+- You can provide a cache_obj_rename() which grabs this
lock and changes the name for the caller, and tell everyone to use
that function.
@@ -861,11 +861,11 @@
``cache_lock`` rather than the per-object lock: this is because it (like
the :c:type:`struct list_head <list_head>` inside the object)
is logically part of the infrastructure. This way, I don't need to grab
-the lock of every object in :c:func:`__cache_add()` when seeking
+the lock of every object in __cache_add() when seeking
the least popular.
I also decided that the id member is unchangeable, so I don't need to
-grab each object lock in :c:func:`__cache_find()` to examine the
+grab each object lock in __cache_find() to examine the
id: the object lock is only used by a caller who wants to read or write
the name field.
@@ -887,7 +887,7 @@
stay-up-five-nights-talk-to-fluffy-code-bunnies kind of problem.
For a slightly more complex case, imagine you have a region shared by a
-softirq and user context. If you use a :c:func:`spin_lock()` call
+softirq and user context. If you use a spin_lock() call
to protect it, it is possible that the user context will be interrupted
by the softirq while it holds the lock, and the softirq will then spin
forever trying to get the same lock.
@@ -985,12 +985,12 @@
Sooner or later, this will crash on SMP, because a timer can have just
-gone off before the :c:func:`spin_lock_bh()`, and it will only get
-the lock after we :c:func:`spin_unlock_bh()`, and then try to free
+gone off before the spin_lock_bh(), and it will only get
+the lock after we spin_unlock_bh(), and then try to free
the element (which has already been freed!).
This can be avoided by checking the result of
-:c:func:`del_timer()`: if it returns 1, the timer has been deleted.
+del_timer(): if it returns 1, the timer has been deleted.
If 0, it means (in this case) that it is currently running, so we can
do::
@@ -1012,9 +1012,9 @@
Another common problem is deleting timers which restart themselves (by
-calling :c:func:`add_timer()` at the end of their timer function).
+calling add_timer() at the end of their timer function).
Because this is a fairly common case which is prone to races, you should
-use :c:func:`del_timer_sync()` (``include/linux/timer.h``) to
+use del_timer_sync() (``include/linux/timer.h``) to
handle this case. It returns the number of times the timer had to be
deleted before we finally stopped it from adding itself back in.
@@ -1086,7 +1086,7 @@
list->next = new;
-The :c:func:`wmb()` is a write memory barrier. It ensures that the
+The wmb() is a write memory barrier. It ensures that the
first operation (setting the new element's ``next`` pointer) is complete
and will be seen by all CPUs, before the second operation is (putting
the new element into the list). This is important, since modern
@@ -1097,7 +1097,7 @@
Fortunately, there is a function to do this for standard
:c:type:`struct list_head <list_head>` lists:
-:c:func:`list_add_rcu()` (``include/linux/list.h``).
+list_add_rcu() (``include/linux/list.h``).
Removing an element from the list is even simpler: we replace the
pointer to the old element with a pointer to its successor, and readers
@@ -1108,7 +1108,7 @@
list->next = old->next;
-There is :c:func:`list_del_rcu()` (``include/linux/list.h``) which
+There is list_del_rcu() (``include/linux/list.h``) which
does this (the normal version poisons the old object, which we don't
want).
@@ -1116,9 +1116,9 @@
pointer to start reading the contents of the next element early, but
don't realize that the pre-fetched contents is wrong when the ``next``
pointer changes underneath them. Once again, there is a
-:c:func:`list_for_each_entry_rcu()` (``include/linux/list.h``)
+list_for_each_entry_rcu() (``include/linux/list.h``)
to help you. Of course, writers can just use
-:c:func:`list_for_each_entry()`, since there cannot be two
+list_for_each_entry(), since there cannot be two
simultaneous writers.
Our final dilemma is this: when can we actually destroy the removed
@@ -1127,14 +1127,14 @@
changes, the reader will jump off into garbage and crash. We need to
wait until we know that all the readers who were traversing the list
when we deleted the element are finished. We use
-:c:func:`call_rcu()` to register a callback which will actually
+call_rcu() to register a callback which will actually
destroy the object once all pre-existing readers are finished.
-Alternatively, :c:func:`synchronize_rcu()` may be used to block
+Alternatively, synchronize_rcu() may be used to block
until all pre-existing are finished.
But how does Read Copy Update know when the readers are finished? The
method is this: firstly, the readers always traverse the list inside
-:c:func:`rcu_read_lock()`/:c:func:`rcu_read_unlock()` pairs:
+rcu_read_lock()/rcu_read_unlock() pairs:
these simply disable preemption so the reader won't go to sleep while
reading the list.
@@ -1223,12 +1223,12 @@
}
Note that the reader will alter the popularity member in
-:c:func:`__cache_find()`, and now it doesn't hold a lock. One
+__cache_find(), and now it doesn't hold a lock. One
solution would be to make it an ``atomic_t``, but for this usage, we
don't really care about races: an approximate result is good enough, so
I didn't change it.
-The result is that :c:func:`cache_find()` requires no
+The result is that cache_find() requires no
synchronization with any other functions, so is almost as fast on SMP as
it would be on UP.
@@ -1240,9 +1240,9 @@
Now, because the 'read lock' in RCU is simply disabling preemption, a
caller which always has preemption disabled between calling
-:c:func:`cache_find()` and :c:func:`object_put()` does not
+cache_find() and object_put() does not
need to actually get and put the reference count: we could expose
-:c:func:`__cache_find()` by making it non-static, and such
+__cache_find() by making it non-static, and such
callers could simply call that.
The benefit here is that the reference count is not written to: the
@@ -1260,11 +1260,11 @@
If that was too slow (it's usually not, but if you've got a really big
machine to test on and can show that it is), you could instead use a
counter for each CPU, then none of them need an exclusive lock. See
-:c:func:`DEFINE_PER_CPU()`, :c:func:`get_cpu_var()` and
-:c:func:`put_cpu_var()` (``include/linux/percpu.h``).
+DEFINE_PER_CPU(), get_cpu_var() and
+put_cpu_var() (``include/linux/percpu.h``).
Of particular use for simple per-cpu counters is the ``local_t`` type,
-and the :c:func:`cpu_local_inc()` and related functions, which are
+and the cpu_local_inc() and related functions, which are
more efficient than simple code on some architectures
(``include/asm/local.h``).
@@ -1289,10 +1289,10 @@
enable_irq(irq);
spin_unlock(&lock);
-The :c:func:`disable_irq()` prevents the irq handler from running
+The disable_irq() prevents the irq handler from running
(and waits for it to finish if it's currently running on other CPUs).
The spinlock prevents any other accesses happening at the same time.
-Naturally, this is slower than just a :c:func:`spin_lock_irq()`
+Naturally, this is slower than just a spin_lock_irq()
call, so it only makes sense if this type of access happens extremely
rarely.
@@ -1315,22 +1315,22 @@
- Accesses to userspace:
- - :c:func:`copy_from_user()`
+ - copy_from_user()
- - :c:func:`copy_to_user()`
+ - copy_to_user()
- - :c:func:`get_user()`
+ - get_user()
- - :c:func:`put_user()`
+ - put_user()
-- :c:func:`kmalloc(GFP_KERNEL) <kmalloc>`
+- kmalloc(GP_KERNEL) <kmalloc>`
-- :c:func:`mutex_lock_interruptible()` and
- :c:func:`mutex_lock()`
+- mutex_lock_interruptible() and
+ mutex_lock()
- There is a :c:func:`mutex_trylock()` which does not sleep.
+ There is a mutex_trylock() which does not sleep.
Still, it must not be used inside interrupt context since its
- implementation is not safe for that. :c:func:`mutex_unlock()`
+ implementation is not safe for that. mutex_unlock()
will also never sleep. It cannot be used in interrupt context either
since a mutex must be released by the same task that acquired it.
@@ -1340,11 +1340,11 @@
Some functions are safe to call from any context, or holding almost any
lock.
-- :c:func:`printk()`
+- printk()
-- :c:func:`kfree()`
+- kfree()
-- :c:func:`add_timer()` and :c:func:`del_timer()`
+- add_timer() and del_timer()
Mutex API reference
===================
@@ -1400,26 +1400,26 @@
bh
Bottom Half: for historical reasons, functions with '_bh' in them often
- now refer to any software interrupt, e.g. :c:func:`spin_lock_bh()`
+ now refer to any software interrupt, e.g. spin_lock_bh()
blocks any software interrupt on the current CPU. Bottom halves are
deprecated, and will eventually be replaced by tasklets. Only one bottom
half will be running at any time.
Hardware Interrupt / Hardware IRQ
- Hardware interrupt request. :c:func:`in_irq()` returns true in a
+ Hardware interrupt request. in_irq() returns true in a
hardware interrupt handler.
Interrupt Context
Not user context: processing a hardware irq or software irq. Indicated
- by the :c:func:`in_interrupt()` macro returning true.
+ by the in_interrupt() macro returning true.
SMP
Symmetric Multi-Processor: kernels compiled for multiple-CPU machines.
(``CONFIG_SMP=y``).
Software Interrupt / softirq
- Software interrupt handler. :c:func:`in_irq()` returns false;
- :c:func:`in_softirq()` returns true. Tasklets and softirqs both
+ Software interrupt handler. in_irq() returns false;
+ in_softirq() returns true. Tasklets and softirqs both
fall into the category of 'software interrupts'.
Strictly speaking a softirq is one of up to 32 enumerated software
diff --git a/Documentation/kref.txt b/Documentation/kref.txt
index 3af3841..c61eea6 100644
--- a/Documentation/kref.txt
+++ b/Documentation/kref.txt
@@ -128,6 +128,10 @@
put needs no lock because nothing tries to get the data without
already holding a pointer.
+In the above example, kref_put() will be called 2 times in both success
+and error paths. This is necessary because the reference count got
+incremented 2 times by kref_init() and kref_get().
+
Note that the "before" in rule 1 is very important. You should never
do something like::
diff --git a/Documentation/media/kapi/v4l2-controls.rst b/Documentation/media/kapi/v4l2-controls.rst
index b20800c..5129019 100644
--- a/Documentation/media/kapi/v4l2-controls.rst
+++ b/Documentation/media/kapi/v4l2-controls.rst
@@ -291,8 +291,8 @@
In practice the basic usage as described above is sufficient for most drivers.
-Inheriting Controls
--------------------
+Inheriting Sub-device Controls
+------------------------------
When a sub-device is registered with a V4L2 driver by calling
v4l2_device_register_subdev() and the ctrl_handler fields of both v4l2_subdev
@@ -757,8 +757,8 @@
It is recommended not to use this function from inside the control ops.
-Inheriting Controls
--------------------
+Preventing Controls inheritance
+-------------------------------
When one control handler is added to another using v4l2_ctrl_add_handler, then
by default all controls from one are merged to the other. But a subdev might
diff --git a/Documentation/misc-devices/index.rst b/Documentation/misc-devices/index.rst
index f11c5da..c1dcd26 100644
--- a/Documentation/misc-devices/index.rst
+++ b/Documentation/misc-devices/index.rst
@@ -20,4 +20,5 @@
isl29003
lis3lv02d
max6875
+ mic/index
xilinx_sdfec
diff --git a/Documentation/mic/index.rst b/Documentation/misc-devices/mic/index.rst
similarity index 100%
rename from Documentation/mic/index.rst
rename to Documentation/misc-devices/mic/index.rst
diff --git a/Documentation/mic/mic_overview.rst b/Documentation/misc-devices/mic/mic_overview.rst
similarity index 100%
rename from Documentation/mic/mic_overview.rst
rename to Documentation/misc-devices/mic/mic_overview.rst
diff --git a/Documentation/mic/scif_overview.rst b/Documentation/misc-devices/mic/scif_overview.rst
similarity index 100%
rename from Documentation/mic/scif_overview.rst
rename to Documentation/misc-devices/mic/scif_overview.rst
diff --git a/Documentation/networking/devlink/devlink-region.rst b/Documentation/networking/devlink/devlink-region.rst
index 1a7683e..8b46e85 100644
--- a/Documentation/networking/devlink/devlink-region.rst
+++ b/Documentation/networking/devlink/devlink-region.rst
@@ -40,9 +40,6 @@
# Delete a snapshot using:
$ devlink region del pci/0000:00:05.0/cr-space snapshot 1
- # Trigger (request) a snapshot be taken:
- $ devlink region trigger pci/0000:00:05.0/cr-space
-
# Dump a snapshot:
$ devlink region dump pci/0000:00:05.0/fw-health snapshot 1
0000000000000000 0014 95dc 0014 9514 0035 1670 0034 db30
diff --git a/Documentation/networking/net_failover.rst b/Documentation/networking/net_failover.rst
index 06c97dc..e143ab7 100644
--- a/Documentation/networking/net_failover.rst
+++ b/Documentation/networking/net_failover.rst
@@ -8,9 +8,9 @@
========
The net_failover driver provides an automated failover mechanism via APIs
-to create and destroy a failover master netdev and mananges a primary and
+to create and destroy a failover master netdev and manages a primary and
standby slave netdevs that get registered via the generic failover
-infrastructrure.
+infrastructure.
The failover netdev acts a master device and controls 2 slave devices. The
original paravirtual interface is registered as 'standby' slave netdev and
@@ -29,7 +29,7 @@
=============================================
net_failover enables hypervisor controlled accelerated datapath to virtio-net
-enabled VMs in a transparent manner with no/minimal guest userspace chanages.
+enabled VMs in a transparent manner with no/minimal guest userspace changes.
To support this, the hypervisor needs to enable VIRTIO_NET_F_STANDBY
feature on the virtio-net interface and assign the same MAC address to both
diff --git a/Documentation/networking/phy.rst b/Documentation/networking/phy.rst
index 1e4735c..2561060 100644
--- a/Documentation/networking/phy.rst
+++ b/Documentation/networking/phy.rst
@@ -487,8 +487,9 @@
The stubs set one of the two matching criteria, and set the other one to
match anything.
-When phy_register_fixup() or \*_for_uid()/\*_for_id() is called at module,
-unregister fixup and free allocate memory are required.
+When phy_register_fixup() or \*_for_uid()/\*_for_id() is called at module load
+time, the module needs to unregister the fixup and free allocated memory when
+it's unloaded.
Call one of following function before unloading module::
diff --git a/Documentation/networking/rds.txt b/Documentation/networking/rds.txt
index f2a0147..eec6169 100644
--- a/Documentation/networking/rds.txt
+++ b/Documentation/networking/rds.txt
@@ -159,7 +159,7 @@
set SO_RDS_TRANSPORT on a socket for which the transport has
been previously attached explicitly (by SO_RDS_TRANSPORT) or
implicitly (via bind(2)) will return an error of EOPNOTSUPP.
- An attempt to set SO_RDS_TRANSPPORT to RDS_TRANS_NONE will
+ An attempt to set SO_RDS_TRANSPORT to RDS_TRANS_NONE will
always return EINVAL.
RDMA for RDS
diff --git a/Documentation/networking/snmp_counter.rst b/Documentation/networking/snmp_counter.rst
index 38a4edc..10e1109 100644
--- a/Documentation/networking/snmp_counter.rst
+++ b/Documentation/networking/snmp_counter.rst
@@ -908,8 +908,8 @@
A packet loss is detected and recovered by TLP.
-TCP Fast Open
-=============
+TCP Fast Open description
+=========================
TCP Fast Open is a technology which allows data transfer before the
3-way handshake complete. Please refer the `TCP Fast Open wiki`_ for a
general description.
diff --git a/Documentation/power/index.rst b/Documentation/power/index.rst
index 002e427..ced8a80 100644
--- a/Documentation/power/index.rst
+++ b/Documentation/power/index.rst
@@ -13,7 +13,6 @@
drivers-testing
energy-model
freezing-of-tasks
- interface
opp
pci
pm_qos_interface
diff --git a/Documentation/powerpc/ultravisor.rst b/Documentation/powerpc/ultravisor.rst
index 363736d..df136c8 100644
--- a/Documentation/powerpc/ultravisor.rst
+++ b/Documentation/powerpc/ultravisor.rst
@@ -8,8 +8,8 @@
.. contents::
:depth: 3
-Protected Execution Facility
-############################
+Introduction
+############
Protected Execution Facility (PEF) is an architectural change for
POWER 9 that enables Secure Virtual Machines (SVMs). DD2.3 chips
diff --git a/Documentation/process/2.Process.rst b/Documentation/process/2.Process.rst
index ae020d8..b21b5b2 100644
--- a/Documentation/process/2.Process.rst
+++ b/Documentation/process/2.Process.rst
@@ -18,18 +18,18 @@
release history looks like this:
====== =================
- 4.11 April 30, 2017
- 4.12 July 2, 2017
- 4.13 September 3, 2017
- 4.14 November 12, 2017
- 4.15 January 28, 2018
- 4.16 April 1, 2018
+ 5.0 March 3, 2019
+ 5.1 May 5, 2019
+ 5.2 July 7, 2019
+ 5.3 September 15, 2019
+ 5.4 November 24, 2019
+ 5.5 January 6, 2020
====== =================
-Every 4.x release is a major kernel release with new features, internal
-API changes, and more. A typical 4.x release contain about 13,000
-changesets with changes to several hundred thousand lines of code. 4.x is
-thus the leading edge of Linux kernel development; the kernel uses a
+Every 5.x release is a major kernel release with new features, internal
+API changes, and more. A typical release can contain about 13,000
+changesets with changes to several hundred thousand lines of code. 5.x is
+the leading edge of Linux kernel development; the kernel uses a
rolling development model which is continually integrating major changes.
A relatively straightforward discipline is followed with regard to the
@@ -48,9 +48,9 @@
The merge window lasts for approximately two weeks. At the end of this
time, Linus Torvalds will declare that the window is closed and release the
-first of the "rc" kernels. For the kernel which is destined to be 2.6.40,
+first of the "rc" kernels. For the kernel which is destined to be 5.6,
for example, the release which happens at the end of the merge window will
-be called 2.6.40-rc1. The -rc1 release is the signal that the time to
+be called 5.6-rc1. The -rc1 release is the signal that the time to
merge new features has passed, and that the time to stabilize the next
kernel has begun.
@@ -67,22 +67,23 @@
As fixes make their way into the mainline, the patch rate will slow over
time. Linus releases new -rc kernels about once a week; a normal series
will get up to somewhere between -rc6 and -rc9 before the kernel is
-considered to be sufficiently stable and the final 2.6.x release is made.
+considered to be sufficiently stable and the final release is made.
At that point the whole process starts over again.
-As an example, here is how the 4.16 development cycle went (all dates in
-2018):
+As an example, here is how the 5.4 development cycle went (all dates in
+2019):
============== ===============================
- January 28 4.15 stable release
- February 11 4.16-rc1, merge window closes
- February 18 4.16-rc2
- February 25 4.16-rc3
- March 4 4.16-rc4
- March 11 4.16-rc5
- March 18 4.16-rc6
- March 25 4.16-rc7
- April 1 4.16 stable release
+ September 15 5.3 stable release
+ September 30 5.4-rc1, merge window closes
+ October 6 5.4-rc2
+ October 13 5.4-rc3
+ October 20 5.4-rc4
+ October 27 5.4-rc5
+ November 3 5.4-rc6
+ November 10 5.4-rc7
+ November 17 5.4-rc8
+ November 24 5.4 stable release
============== ===============================
How do the developers decide when to close the development cycle and create
@@ -98,43 +99,44 @@
achieve; there are just too many variables in a project of this size.
There comes a point where delaying the final release just makes the problem
worse; the pile of changes waiting for the next merge window will grow
-larger, creating even more regressions the next time around. So most 4.x
+larger, creating even more regressions the next time around. So most 5.x
kernels go out with a handful of known regressions though, hopefully, none
of them are serious.
Once a stable release is made, its ongoing maintenance is passed off to the
-"stable team," currently consisting of Greg Kroah-Hartman. The stable team
-will release occasional updates to the stable release using the 4.x.y
-numbering scheme. To be considered for an update release, a patch must (1)
-fix a significant bug, and (2) already be merged into the mainline for the
-next development kernel. Kernels will typically receive stable updates for
-a little more than one development cycle past their initial release. So,
-for example, the 4.13 kernel's history looked like:
+"stable team," currently Greg Kroah-Hartman. The stable team will release
+occasional updates to the stable release using the 5.x.y numbering scheme.
+To be considered for an update release, a patch must (1) fix a significant
+bug, and (2) already be merged into the mainline for the next development
+kernel. Kernels will typically receive stable updates for a little more
+than one development cycle past their initial release. So, for example, the
+5.2 kernel's history looked like this (all dates in 2019):
============== ===============================
- September 3 4.13 stable release
- September 13 4.13.1
- September 20 4.13.2
- September 27 4.13.3
- October 5 4.13.4
- October 12 4.13.5
+ September 15 5.2 stable release
+ July 14 5.2.1
+ July 21 5.2.2
+ July 26 5.2.3
+ July 28 5.2.4
+ July 31 5.2.5
... ...
- November 24 4.13.16
+ October 11 5.2.21
============== ===============================
-4.13.16 was the final stable update of the 4.13 release.
+5.2.21 was the final stable update of the 5.2 release.
Some kernels are designated "long term" kernels; they will receive support
for a longer period. As of this writing, the current long term kernels
and their maintainers are:
- ====== ====================== ==============================
- 3.16 Ben Hutchings (very long-term stable kernel)
- 4.1 Sasha Levin
- 4.4 Greg Kroah-Hartman (very long-term stable kernel)
- 4.9 Greg Kroah-Hartman
- 4.14 Greg Kroah-Hartman
- ====== ====================== ==============================
+ ====== ================================ =======================
+ 3.16 Ben Hutchings (very long-term kernel)
+ 4.4 Greg Kroah-Hartman & Sasha Levin (very long-term kernel)
+ 4.9 Greg Kroah-Hartman & Sasha Levin
+ 4.14 Greg Kroah-Hartman & Sasha Levin
+ 4.19 Greg Kroah-Hartman & Sasha Levin
+ 5.4 Greg Kroah-Hartman & Sasha Levin
+ ====== ================================ =======================
The selection of a kernel for long-term support is purely a matter of a
maintainer having the need and the time to maintain that release. There
@@ -215,12 +217,12 @@
-------------------------------
There is exactly one person who can merge patches into the mainline kernel
-repository: Linus Torvalds. But, of the over 9,500 patches which went
-into the 2.6.38 kernel, only 112 (around 1.3%) were directly chosen by Linus
-himself. The kernel project has long since grown to a size where no single
-developer could possibly inspect and select every patch unassisted. The
-way the kernel developers have addressed this growth is through the use of
-a lieutenant system built around a chain of trust.
+repository: Linus Torvalds. But, for example, of the over 9,500 patches
+which went into the 2.6.38 kernel, only 112 (around 1.3%) were directly
+chosen by Linus himself. The kernel project has long since grown to a size
+where no single developer could possibly inspect and select every patch
+unassisted. The way the kernel developers have addressed this growth is
+through the use of a lieutenant system built around a chain of trust.
The kernel code base is logically broken down into a set of subsystems:
networking, specific architecture support, memory management, video
diff --git a/Documentation/process/coding-style.rst b/Documentation/process/coding-style.rst
index edb296c..acb2f1b 100644
--- a/Documentation/process/coding-style.rst
+++ b/Documentation/process/coding-style.rst
@@ -284,9 +284,9 @@
4) Naming
---------
-C is a Spartan language, and so should your naming be. Unlike Modula-2
-and Pascal programmers, C programmers do not use cute names like
-ThisVariableIsATemporaryCounter. A C programmer would call that
+C is a Spartan language, and your naming conventions should follow suit.
+Unlike Modula-2 and Pascal programmers, C programmers do not use cute
+names like ThisVariableIsATemporaryCounter. A C programmer would call that
variable ``tmp``, which is much easier to write, and not the least more
difficult to understand.
@@ -300,9 +300,9 @@
``count_active_users()`` or similar, you should **not** call it ``cntusr()``.
Encoding the type of a function into the name (so-called Hungarian
-notation) is brain damaged - the compiler knows the types anyway and can
-check those, and it only confuses the programmer. No wonder MicroSoft
-makes buggy programs.
+notation) is asinine - the compiler knows the types anyway and can check
+those, and it only confuses the programmer. No wonder Microsoft makes buggy
+programs.
LOCAL variable names should be short, and to the point. If you have
some random integer loop counter, it should probably be called ``i``.
@@ -806,9 +806,9 @@
----------------------------
Kernel developers like to be seen as literate. Do mind the spelling
-of kernel messages to make a good impression. Do not use crippled
-words like ``dont``; use ``do not`` or ``don't`` instead. Make the messages
-concise, clear, and unambiguous.
+of kernel messages to make a good impression. Do not use incorrect
+contractions like ``dont``; use ``do not`` or ``don't`` instead. Make the
+messages concise, clear, and unambiguous.
Kernel messages do not have to be terminated with a period.
diff --git a/Documentation/process/deprecated.rst b/Documentation/process/deprecated.rst
index 179f2a5..652e2aa 100644
--- a/Documentation/process/deprecated.rst
+++ b/Documentation/process/deprecated.rst
@@ -29,6 +29,28 @@
be fully removed from the kernel, or added to this file to discourage
others from using them in the future.
+BUG() and BUG_ON()
+------------------
+Use WARN() and WARN_ON() instead, and handle the "impossible"
+error condition as gracefully as possible. While the BUG()-family
+of APIs were originally designed to act as an "impossible situation"
+assert and to kill a kernel thread "safely", they turn out to just be
+too risky. (e.g. "In what order do locks need to be released? Have
+various states been restored?") Very commonly, using BUG() will
+destabilize a system or entirely break it, which makes it impossible
+to debug or even get viable crash reports. Linus has `very strong
+<https://lore.kernel.org/lkml/CA+55aFy6jNLsywVYdGp83AMrXBo_P-pkjkphPGrO=82SPKCpLQ@mail.gmail.com/>`_
+feelings `about this
+<https://lore.kernel.org/lkml/CAHk-=whDHsbK3HTOpTF=ue_o04onRwTEaK_ZoJp_fjbqq4+=Jw@mail.gmail.com/>`_.
+
+Note that the WARN()-family should only be used for "expected to
+be unreachable" situations. If you want to warn about "reachable
+but undesirable" situations, please use the pr_warn()-family of
+functions. System owners may have set the *panic_on_warn* sysctl,
+to make sure their systems do not continue running in the face of
+"unreachable" conditions. (For example, see commits like `this one
+<https://git.kernel.org/linus/d4689846881d160a4d12a514e991a740bcb5d65a>`_.)
+
open-coded arithmetic in allocator arguments
--------------------------------------------
Dynamic size calculations (especially multiplication) should not be
@@ -63,51 +85,73 @@
header = kzalloc(struct_size(header, item, count), GFP_KERNEL);
-See :c:func:`array_size`, :c:func:`array3_size`, and :c:func:`struct_size`,
-for more details as well as the related :c:func:`check_add_overflow` and
-:c:func:`check_mul_overflow` family of functions.
+See array_size(), array3_size(), and struct_size(),
+for more details as well as the related check_add_overflow() and
+check_mul_overflow() family of functions.
simple_strtol(), simple_strtoll(), simple_strtoul(), simple_strtoull()
----------------------------------------------------------------------
-The :c:func:`simple_strtol`, :c:func:`simple_strtoll`,
-:c:func:`simple_strtoul`, and :c:func:`simple_strtoull` functions
+The simple_strtol(), simple_strtoll(),
+simple_strtoul(), and simple_strtoull() functions
explicitly ignore overflows, which may lead to unexpected results
-in callers. The respective :c:func:`kstrtol`, :c:func:`kstrtoll`,
-:c:func:`kstrtoul`, and :c:func:`kstrtoull` functions tend to be the
+in callers. The respective kstrtol(), kstrtoll(),
+kstrtoul(), and kstrtoull() functions tend to be the
correct replacements, though note that those require the string to be
NUL or newline terminated.
strcpy()
--------
-:c:func:`strcpy` performs no bounds checking on the destination
+strcpy() performs no bounds checking on the destination
buffer. This could result in linear overflows beyond the
end of the buffer, leading to all kinds of misbehaviors. While
`CONFIG_FORTIFY_SOURCE=y` and various compiler flags help reduce the
risk of using this function, there is no good reason to add new uses of
-this function. The safe replacement is :c:func:`strscpy`.
+this function. The safe replacement is strscpy().
strncpy() on NUL-terminated strings
-----------------------------------
-Use of :c:func:`strncpy` does not guarantee that the destination buffer
+Use of strncpy() does not guarantee that the destination buffer
will be NUL terminated. This can lead to various linear read overflows
and other misbehavior due to the missing termination. It also NUL-pads the
destination buffer if the source contents are shorter than the destination
buffer size, which may be a needless performance penalty for callers using
-only NUL-terminated strings. The safe replacement is :c:func:`strscpy`.
-(Users of :c:func:`strscpy` still needing NUL-padding will need an
-explicit :c:func:`memset` added.)
+only NUL-terminated strings. The safe replacement is strscpy().
+(Users of strscpy() still needing NUL-padding should instead
+use strscpy_pad().)
-If a caller is using non-NUL-terminated strings, :c:func:`strncpy()` can
+If a caller is using non-NUL-terminated strings, strncpy()() can
still be used, but destinations should be marked with the `__nonstring
<https://gcc.gnu.org/onlinedocs/gcc/Common-Variable-Attributes.html>`_
attribute to avoid future compiler warnings.
strlcpy()
---------
-:c:func:`strlcpy` reads the entire source buffer first, possibly exceeding
+strlcpy() reads the entire source buffer first, possibly exceeding
the given limit of bytes to copy. This is inefficient and can lead to
linear read overflows if a source string is not NUL-terminated. The
-safe replacement is :c:func:`strscpy`.
+safe replacement is strscpy().
+
+%p format specifier
+-------------------
+Traditionally, using "%p" in format strings would lead to regular address
+exposure flaws in dmesg, proc, sysfs, etc. Instead of leaving these to
+be exploitable, all "%p" uses in the kernel are being printed as a hashed
+value, rendering them unusable for addressing. New uses of "%p" should not
+be added to the kernel. For text addresses, using "%pS" is likely better,
+as it produces the more useful symbol name instead. For nearly everything
+else, just do not add "%p" at all.
+
+Paraphrasing Linus's current `guidance <https://lore.kernel.org/lkml/CA+55aFwQEd_d40g4mUCSsVRZzrFPUJt74vc6PPpb675hYNXcKw@mail.gmail.com/>`_:
+
+- If the hashed "%p" value is pointless, ask yourself whether the pointer
+ itself is important. Maybe it should be removed entirely?
+- If you really think the true pointer value is important, why is some
+ system state or user privilege level considered "special"? If you think
+ you can justify it (in comments and commit log) well enough to stand
+ up to Linus's scrutiny, maybe you can use "%px", along with making sure
+ you have sensible permissions.
+
+And finally, know that a toggle for "%p" hashing will `not be accepted <https://lore.kernel.org/lkml/CA+55aFwieC1-nAs+NFq9RTwaR8ef9hWa4MjNBWL41F-8wM49eA@mail.gmail.com/>`_.
Variable Length Arrays (VLAs)
-----------------------------
@@ -122,27 +166,37 @@
Implicit switch case fall-through
---------------------------------
-The C language allows switch cases to "fall-through" when a "break" statement
-is missing at the end of a case. This, however, introduces ambiguity in the
-code, as it's not always clear if the missing break is intentional or a bug.
+The C language allows switch cases to fall through to the next case
+when a "break" statement is missing at the end of a case. This, however,
+introduces ambiguity in the code, as it's not always clear if the missing
+break is intentional or a bug. For example, it's not obvious just from
+looking at the code if `STATE_ONE` is intentionally designed to fall
+through into `STATE_TWO`::
+
+ switch (value) {
+ case STATE_ONE:
+ do_something();
+ case STATE_TWO:
+ do_other();
+ break;
+ default:
+ WARN("unknown state");
+ }
As there have been a long list of flaws `due to missing "break" statements
<https://cwe.mitre.org/data/definitions/484.html>`_, we no longer allow
-"implicit fall-through".
-
-In order to identify intentional fall-through cases, we have adopted a
-pseudo-keyword macro 'fallthrough' which expands to gcc's extension
-__attribute__((__fallthrough__)). `Statement Attributes
-<https://gcc.gnu.org/onlinedocs/gcc/Statement-Attributes.html>`_
-
-When the C17/C18 [[fallthrough]] syntax is more commonly supported by
+implicit fall-through. In order to identify intentional fall-through
+cases, we have adopted a pseudo-keyword macro "fallthrough" which
+expands to gcc's extension `__attribute__((__fallthrough__))
+<https://gcc.gnu.org/onlinedocs/gcc/Statement-Attributes.html>`_.
+(When the C17/C18 `[[fallthrough]]` syntax is more commonly supported by
C compilers, static analyzers, and IDEs, we can switch to using that syntax
-for the macro pseudo-keyword.
+for the macro pseudo-keyword.)
All switch/case blocks must end in one of:
- break;
- fallthrough;
- continue;
- goto <label>;
- return [expression];
+* break;
+* fallthrough;
+* continue;
+* goto <label>;
+* return [expression];
diff --git a/Documentation/process/email-clients.rst b/Documentation/process/email-clients.rst
index 5273d06..c9e4ce2 100644
--- a/Documentation/process/email-clients.rst
+++ b/Documentation/process/email-clients.rst
@@ -237,9 +237,9 @@
The Mutt docs have lots more information:
- http://dev.mutt.org/trac/wiki/UseCases/Gmail
+ https://gitlab.com/muttmua/mutt/-/wikis/UseCases/Gmail
- http://dev.mutt.org/doc/manual.html
+ http://www.mutt.org/doc/manual/
Pine (TUI)
**********
diff --git a/Documentation/process/embargoed-hardware-issues.rst b/Documentation/process/embargoed-hardware-issues.rst
index 33edae6..a19d084 100644
--- a/Documentation/process/embargoed-hardware-issues.rst
+++ b/Documentation/process/embargoed-hardware-issues.rst
@@ -244,23 +244,23 @@
an involved disclosed party. The current ambassadors list:
============= ========================================================
- ARM
+ ARM Grant Likely <grant.likely@arm.com>
AMD Tom Lendacky <tom.lendacky@amd.com>
IBM
Intel Tony Luck <tony.luck@intel.com>
Qualcomm Trilok Soni <tsoni@codeaurora.org>
- Microsoft Sasha Levin <sashal@kernel.org>
+ Microsoft James Morris <jamorris@linux.microsoft.com>
VMware
Xen Andrew Cooper <andrew.cooper3@citrix.com>
- Canonical Tyler Hicks <tyhicks@canonical.com>
+ Canonical John Johansen <john.johansen@canonical.com>
Debian Ben Hutchings <ben@decadent.org.uk>
Oracle Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Red Hat Josh Poimboeuf <jpoimboe@redhat.com>
SUSE Jiri Kosina <jkosina@suse.cz>
- Amazon Peter Bowen <pzb@amzn.com>
+ Amazon
Google Kees Cook <keescook@chromium.org>
============= ========================================================
diff --git a/Documentation/process/howto.rst b/Documentation/process/howto.rst
index b6f5a37..70791e1 100644
--- a/Documentation/process/howto.rst
+++ b/Documentation/process/howto.rst
@@ -243,10 +243,10 @@
Mainline tree
~~~~~~~~~~~~~
-Mainline tree are maintained by Linus Torvalds, and can be found at
+The mainline tree is maintained by Linus Torvalds, and can be found at
https://kernel.org or in the repo. Its development process is as follows:
- - As soon as a new kernel is released a two weeks window is open,
+ - As soon as a new kernel is released a two week window is open,
during this period of time maintainers can submit big diffs to
Linus, usually the patches that have already been included in the
linux-next for a few weeks. The preferred way to submit big changes
@@ -281,8 +281,9 @@
Kernels with 3-part versions are -stable kernels. They contain
relatively small and critical fixes for security problems or significant
-regressions discovered in a given major mainline release, with the first
-2-part of version number are the same correspondingly.
+regressions discovered in a given major mainline release. Each release
+in a major stable series increments the third part of the version
+number, keeping the first two parts the same.
This is the recommended branch for users who want the most recent stable
kernel and are not interested in helping test development/experimental
@@ -359,10 +360,10 @@
One of the best ways to put into practice your hacking skills is by fixing
bugs reported by other people. Not only you will help to make the kernel
-more stable, you'll learn to fix real world problems and you will improve
-your skills, and other developers will be aware of your presence. Fixing
-bugs is one of the best ways to get merits among other developers, because
-not many people like wasting time fixing other people's bugs.
+more stable, but you'll also learn to fix real world problems and you will
+improve your skills, and other developers will be aware of your presence.
+Fixing bugs is one of the best ways to get merits among other developers,
+because not many people like wasting time fixing other people's bugs.
To work in the already reported bug reports, go to https://bugzilla.kernel.org.
diff --git a/Documentation/process/kernel-docs.rst b/Documentation/process/kernel-docs.rst
index 7a45a8e..9d6d0ac 100644
--- a/Documentation/process/kernel-docs.rst
+++ b/Documentation/process/kernel-docs.rst
@@ -313,7 +313,7 @@
:URL: http://www.linuxjournal.com/article.php?sid=2391
:Date: 1997
:Keywords: RAID, MD driver.
- :Description: Linux Journal Kernel Korner article. Here is its
+ :Description: Linux Journal Kernel Korner article.
:Abstract: *A description of the implementation of the RAID-1,
RAID-4 and RAID-5 personalities of the MD device driver in the
Linux kernel, providing users with high performance and reliable,
@@ -338,7 +338,7 @@
:Date: 1996
:Keywords: device driver, module, loading/unloading modules,
allocating resources.
- :Description: Linux Journal Kernel Korner article. Here is its
+ :Description: Linux Journal Kernel Korner article.
:Abstract: *This is the first of a series of four articles
co-authored by Alessandro Rubini and Georg Zezchwitz which present
a practical approach to writing Linux device drivers as kernel
@@ -354,7 +354,7 @@
:Keywords: character driver, init_module, clean_up module,
autodetection, mayor number, minor number, file operations,
open(), close().
- :Description: Linux Journal Kernel Korner article. Here is its
+ :Description: Linux Journal Kernel Korner article.
:Abstract: *This article, the second of four, introduces part of
the actual code to create custom module implementing a character
device driver. It describes the code for module initialization and
@@ -367,7 +367,7 @@
:Date: 1996
:Keywords: read(), write(), select(), ioctl(), blocking/non
blocking mode, interrupt handler.
- :Description: Linux Journal Kernel Korner article. Here is its
+ :Description: Linux Journal Kernel Korner article.
:Abstract: *This article, the third of four on writing character
device drivers, introduces concepts of reading, writing, and using
ioctl-calls*.
@@ -378,7 +378,7 @@
:URL: http://www.linuxjournal.com/article.php?sid=1222
:Date: 1996
:Keywords: interrupts, irqs, DMA, bottom halves, task queues.
- :Description: Linux Journal Kernel Korner article. Here is its
+ :Description: Linux Journal Kernel Korner article.
:Abstract: *This is the fourth in a series of articles about
writing character device drivers as loadable kernel modules. This
month, we further investigate the field of interrupt handling.
diff --git a/Documentation/process/management-style.rst b/Documentation/process/management-style.rst
index 186753f..dfbc69b 100644
--- a/Documentation/process/management-style.rst
+++ b/Documentation/process/management-style.rst
@@ -227,7 +227,7 @@
out of it.
Then make the developer who really screwed up (if you can find them) know
-**in_private** that they screwed up. Not just so they can avoid it in the
+**in private** that they screwed up. Not just so they can avoid it in the
future, but so that they know they owe you one. And, perhaps even more
importantly, they're also likely the person who can fix it. Because, let's
face it, it sure ain't you.
diff --git a/Documentation/scsi/scsi_mid_low_api.txt b/Documentation/scsi/scsi_mid_low_api.txt
index 2a4be1c..537f047 100644
--- a/Documentation/scsi/scsi_mid_low_api.txt
+++ b/Documentation/scsi/scsi_mid_low_api.txt
@@ -299,7 +299,6 @@
scsi_host_alloc - return a new scsi_host instance whose refcount==1
scsi_host_get - increments Scsi_Host instance's refcount
scsi_host_put - decrements Scsi_Host instance's refcount (free if 0)
- scsi_partsize - parse partition table into cylinders, heads + sectors
scsi_register - create and register a scsi host adapter instance.
scsi_remove_device - detach and remove a SCSI device
scsi_remove_host - detach and remove all SCSI devices owned by host
@@ -473,26 +472,6 @@
/**
- * scsi_partsize - parse partition table into cylinders, heads + sectors
- * @buf: pointer to partition table
- * @capacity: size of (total) disk in 512 byte sectors
- * @cyls: outputs number of cylinders calculated via this pointer
- * @hds: outputs number of heads calculated via this pointer
- * @secs: outputs number of sectors calculated via this pointer
- *
- * Returns 0 on success, -1 on failure
- *
- * Might block: no
- *
- * Notes: Caller owns memory returned (free with kfree() )
- *
- * Defined in: drivers/scsi/scsicam.c
- **/
-int scsi_partsize(unsigned char *buf, unsigned long capacity,
- unsigned int *cyls, unsigned int *hds, unsigned int *secs)
-
-
-/**
* scsi_register - create and register a scsi host adapter instance.
* @sht: pointer to scsi host template
* @privsize: extra bytes to allocate in hostdata array (which is the
diff --git a/Documentation/security/siphash.rst b/Documentation/security/siphash.rst
index 9965821..4eba68c 100644
--- a/Documentation/security/siphash.rst
+++ b/Documentation/security/siphash.rst
@@ -128,8 +128,8 @@
transmitted out of the kernel. This is only remotely useful over `jhash` as a
means of mitigating hashtable flooding denial of service attacks.
-Generating a key
-================
+Generating a HalfSipHash key
+============================
Keys should always be generated from a cryptographically secure source of
random numbers, either using get_random_bytes or get_random_once:
@@ -139,8 +139,8 @@
If you're not deriving your key from here, you're doing it wrong.
-Using the functions
-===================
+Using the HalfSipHash functions
+===============================
There are two variants of the function, one that takes a list of integers, and
one that takes a buffer::
diff --git a/Documentation/sphinx/parallel-wrapper.sh b/Documentation/sphinx/parallel-wrapper.sh
index 7daf513..e54c44c 100644
--- a/Documentation/sphinx/parallel-wrapper.sh
+++ b/Documentation/sphinx/parallel-wrapper.sh
@@ -30,4 +30,4 @@
parallel="-j$parallel"
fi
-exec "$sphinx" "$parallel" "$@"
+exec "$sphinx" $parallel "$@"
diff --git a/Documentation/target/tcmu-design.rst b/Documentation/target/tcmu-design.rst
index a7b4267..e47047e 100644
--- a/Documentation/target/tcmu-design.rst
+++ b/Documentation/target/tcmu-design.rst
@@ -5,7 +5,7 @@
.. Contents:
- 1) TCM Userspace Design
+ 1) Design
a) Background
b) Benefits
c) Design constraints
@@ -23,8 +23,8 @@
3) A final note
-TCM Userspace Design
-====================
+Design
+======
TCM is another name for LIO, an in-kernel iSCSI target (server).
Existing TCM targets run in the kernel. TCMU (TCM in Userspace)
diff --git a/Documentation/trace/events.rst b/Documentation/trace/events.rst
index ed79b22..4a2ebe0 100644
--- a/Documentation/trace/events.rst
+++ b/Documentation/trace/events.rst
@@ -342,7 +342,8 @@
differences and the implementation isn't currently tied to it in any
way, so beware about making generalizations between the two.
-Note: Writing into trace_marker (See Documentation/trace/ftrace.rst)
+.. Note::
+ Writing into trace_marker (See Documentation/trace/ftrace.rst)
can also enable triggers that are written into
/sys/kernel/tracing/events/ftrace/print/trigger
@@ -569,14 +570,14 @@
In this method, the name of the event to create and an array defining
the fields is supplied to synth_event_create(). If successful, a
synthetic event with that name and fields will exist following that
-call. For example, to create a new "schedtest" synthetic event:
+call. For example, to create a new "schedtest" synthetic event::
ret = synth_event_create("schedtest", sched_fields,
ARRAY_SIZE(sched_fields), THIS_MODULE);
The sched_fields param in this example points to an array of struct
synth_field_desc, each of which describes an event field by type and
-name:
+name::
static struct synth_field_desc sched_fields[] = {
{ .type = "pid_t", .name = "next_pid_field" },
@@ -615,7 +616,7 @@
initialize a dynevent_cmd object using synth_event_cmd_init().
For example, to create a new "schedtest" synthetic event with two
-fields:
+fields::
struct dynevent_cmd cmd;
char *buf;
@@ -631,7 +632,7 @@
"u64", "ts_ns");
Alternatively, using an array of struct synth_field_desc fields
-containing the same information:
+containing the same information::
ret = synth_event_gen_cmd_array_start(&cmd, "schedtest", THIS_MODULE,
fields, n_fields);
@@ -640,7 +641,7 @@
populated with more fields. Fields are added one by one using
synth_event_add_field(), supplying the dynevent_cmd object, a field
type, and a field name. For example, to add a new int field named
-"intfield", the following call should be made:
+"intfield", the following call should be made::
ret = synth_event_add_field(&cmd, "int", "intfield");
@@ -649,7 +650,7 @@
A group of fields can also be added all at once using an array of
synth_field_desc with add_synth_fields(). For example, this would add
-just the first four sched_fields:
+just the first four sched_fields::
ret = synth_event_add_fields(&cmd, sched_fields, 4);
@@ -658,7 +659,7 @@
also automatically append a ';' to the string.
Once all the fields have been added, the event should be finalized and
-registered by calling the synth_event_gen_cmd_end() function:
+registered by calling the synth_event_gen_cmd_end() function::
ret = synth_event_gen_cmd_end(&cmd);
@@ -691,7 +692,7 @@
synthetic event field, and the number of values being passed.
So, to trace an event corresponding to the synthetic event definition
-above, code like the following could be used:
+above, code like the following could be used::
ret = synth_event_trace(create_synth_test, 7, /* number of values */
444, /* next_pid_field */
@@ -715,7 +716,7 @@
event field.
To trace an event corresponding to the synthetic event definition
-above, code like the following could be used:
+above, code like the following could be used::
u64 vals[7];
@@ -739,7 +740,7 @@
is needed. The trace_get_event_file() function can be used to get
it - it will find the file in the given trace instance (in this case
NULL since the top trace array is being used) while at the same time
-preventing the instance containing it from going away:
+preventing the instance containing it from going away::
schedtest_event_file = trace_get_event_file(NULL, "synthetic",
"schedtest");
@@ -751,31 +752,31 @@
can be used (which is not specific to synthetic events, so does need
the "synthetic" system name to be specified explicitly).
-To enable the event, pass 'true' to it:
+To enable the event, pass 'true' to it::
trace_array_set_clr_event(schedtest_event_file->tr,
"synthetic", "schedtest", true);
-To disable it pass false:
+To disable it pass false::
trace_array_set_clr_event(schedtest_event_file->tr,
"synthetic", "schedtest", false);
Finally, synth_event_trace_array() can be used to actually trace the
-event, which should be visible in the trace buffer afterwards:
+event, which should be visible in the trace buffer afterwards::
ret = synth_event_trace_array(schedtest_event_file, vals,
ARRAY_SIZE(vals));
To remove the synthetic event, the event should be disabled, and the
-trace instance should be 'put' back using trace_put_event_file():
+trace instance should be 'put' back using trace_put_event_file()::
trace_array_set_clr_event(schedtest_event_file->tr,
"synthetic", "schedtest", false);
trace_put_event_file(schedtest_event_file);
If those have been successful, synth_event_delete() can be called to
-remove the event:
+remove the event::
ret = synth_event_delete("schedtest");
@@ -784,7 +785,7 @@
To trace a synthetic using the piecewise method described above, the
synth_event_trace_start() function is used to 'open' the synthetic
-event trace:
+event trace::
struct synth_trace_state trace_state;
@@ -809,7 +810,7 @@
field is set, the 'cursor' points to the next field, which will be set
by the subsequent call, continuing until all the fields have been set
in order. The same sequence of calls as in the above examples using
-this method would be (without error-handling code):
+this method would be (without error-handling code)::
/* next_pid_field */
ret = synth_event_add_next_val(777, &trace_state);
@@ -837,7 +838,7 @@
the synth_event_trace_start(), along with the field name of the field
to set and the value to set it to. The same sequence of calls as in
the above examples using this method would be (without error-handling
-code):
+code)::
ret = synth_event_add_val("next_pid_field", 777, &trace_state);
ret = synth_event_add_val("next_comm_field", (u64)"silly putty",
@@ -855,7 +856,7 @@
Finally, the event won't be actually traced until it's 'closed',
which is done using synth_event_trace_end(), which takes only the
-struct synth_trace_state object used in the previous calls:
+struct synth_trace_state object used in the previous calls::
ret = synth_event_trace_end(&trace_state);
@@ -878,7 +879,7 @@
should create and initialize a dynevent_cmd object using
kprobe_event_cmd_init().
-For example, to create a new "schedtest" kprobe event with two fields:
+For example, to create a new "schedtest" kprobe event with two fields::
struct dynevent_cmd cmd;
char *buf;
@@ -900,18 +901,18 @@
populated with more fields. Fields can be added using
kprobe_event_add_fields(), supplying the dynevent_cmd object along
with a variable arg list of probe fields. For example, to add a
-couple additional fields, the following call could be made:
+couple additional fields, the following call could be made::
ret = kprobe_event_add_fields(&cmd, "flags=%cx", "mode=+4($stack)");
Once all the fields have been added, the event should be finalized and
registered by calling the kprobe_event_gen_cmd_end() or
kretprobe_event_gen_cmd_end() functions, depending on whether a kprobe
-or kretprobe command was started:
+or kretprobe command was started::
ret = kprobe_event_gen_cmd_end(&cmd);
-or
+or::
ret = kretprobe_event_gen_cmd_end(&cmd);
@@ -920,13 +921,13 @@
Similarly, a kretprobe event can be created using
kretprobe_event_gen_cmd_start() with a probe name and location and
-additional params such as $retval:
+additional params such as $retval::
ret = kretprobe_event_gen_cmd_start(&cmd, "gen_kretprobe_test",
"do_sys_open", "$retval");
Similar to the synthetic event case, code like the following can be
-used to enable the newly created kprobe event:
+used to enable the newly created kprobe event::
gen_kprobe_test = trace_get_event_file(NULL, "kprobes", "gen_kprobe_test");
@@ -934,7 +935,7 @@
"kprobes", "gen_kprobe_test", true);
Finally, also similar to synthetic events, the following code can be
-used to give the kprobe event file back and delete the event:
+used to give the kprobe event file back and delete the event::
trace_put_event_file(gen_kprobe_test);
@@ -963,7 +964,7 @@
The first step in building a new command string is to create and
initialize an instance of a dynevent_cmd. Here, for instance, we
-create a dynevent_cmd on the stack and initialize it:
+create a dynevent_cmd on the stack and initialize it::
struct dynevent_cmd cmd;
char *buf;
@@ -989,7 +990,7 @@
To add a single argument, define and initialize a struct dynevent_arg
or struct dynevent_arg_pair object. Here's an example of the simplest
possible arg addition, which is simply to append the given string as
-a whitespace-separated argument to the command:
+a whitespace-separated argument to the command::
struct dynevent_arg arg;
@@ -1007,7 +1008,7 @@
Here's another more complicated example using an 'arg pair', which is
used to create an argument that consists of a couple components added
together as a unit, for example, a 'type field_name;' arg or a simple
-expression arg e.g. 'flags=%cx':
+expression arg e.g. 'flags=%cx'::
struct dynevent_arg_pair arg_pair;
@@ -1031,7 +1032,7 @@
(until its length surpasses cmd->maxlen). When all the arguments have
been added and the command string is complete, the only thing left to
do is run the command, which happens by simply calling
-dynevent_create():
+dynevent_create()::
ret = dynevent_create(&cmd);
diff --git a/Documentation/translations/it_IT/networking/netdev-FAQ.rst b/Documentation/translations/it_IT/networking/netdev-FAQ.rst
index 8489ead..7e2456b 100644
--- a/Documentation/translations/it_IT/networking/netdev-FAQ.rst
+++ b/Documentation/translations/it_IT/networking/netdev-FAQ.rst
@@ -1,6 +1,6 @@
.. include:: ../disclaimer-ita.rst
-:Original: :ref:`Documentation/process/stable-kernel-rules.rst <stable_kernel_rules>`
+:Original: :ref:`Documentation/networking/netdev-FAQ.rst <netdev-FAQ>`
.. _it_netdev-FAQ:
diff --git a/Documentation/translations/it_IT/process/programming-language.rst b/Documentation/translations/it_IT/process/programming-language.rst
index f4b0063..c4fc9d3 100644
--- a/Documentation/translations/it_IT/process/programming-language.rst
+++ b/Documentation/translations/it_IT/process/programming-language.rst
@@ -8,26 +8,26 @@
Linguaggio di programmazione
============================
-Il kernel è scritto nel linguaggio di programmazione C [c-language]_.
-Più precisamente, il kernel viene compilato con ``gcc`` [gcc]_ usando
-l'opzione ``-std=gnu89`` [gcc-c-dialect-options]_: il dialetto GNU
+Il kernel è scritto nel linguaggio di programmazione C [it-c-language]_.
+Più precisamente, il kernel viene compilato con ``gcc`` [it-gcc]_ usando
+l'opzione ``-std=gnu89`` [it-gcc-c-dialect-options]_: il dialetto GNU
dello standard ISO C90 (con l'aggiunta di alcune funzionalità da C99)
-Questo dialetto contiene diverse estensioni al linguaggio [gnu-extensions]_,
+Questo dialetto contiene diverse estensioni al linguaggio [it-gnu-extensions]_,
e molte di queste vengono usate sistematicamente dal kernel.
Il kernel offre un certo livello di supporto per la compilazione con ``clang``
-[clang]_ e ``icc`` [icc]_ su diverse architetture, tuttavia in questo momento
+[it-clang]_ e ``icc`` [it-icc]_ su diverse architetture, tuttavia in questo momento
il supporto non è completo e richiede delle patch aggiuntive.
Attributi
---------
Una delle estensioni più comuni e usate nel kernel sono gli attributi
-[gcc-attribute-syntax]_. Gli attributi permettono di aggiungere una semantica,
+[it-gcc-attribute-syntax]_. Gli attributi permettono di aggiungere una semantica,
definita dell'implementazione, alle entità del linguaggio (come le variabili,
le funzioni o i tipi) senza dover fare importanti modifiche sintattiche al
-linguaggio stesso (come l'aggiunta di nuove parole chiave) [n2049]_.
+linguaggio stesso (come l'aggiunta di nuove parole chiave) [it-n2049]_.
In alcuni casi, gli attributi sono opzionali (ovvero un compilatore che non
dovesse supportarli dovrebbe produrre comunque codice corretto, anche se
@@ -41,11 +41,11 @@
Per maggiori informazioni consultate il file d'intestazione
``include/linux/compiler_attributes.h``.
-.. [c-language] http://www.open-std.org/jtc1/sc22/wg14/www/standards
-.. [gcc] https://gcc.gnu.org
-.. [clang] https://clang.llvm.org
-.. [icc] https://software.intel.com/en-us/c-compilers
-.. [gcc-c-dialect-options] https://gcc.gnu.org/onlinedocs/gcc/C-Dialect-Options.html
-.. [gnu-extensions] https://gcc.gnu.org/onlinedocs/gcc/C-Extensions.html
-.. [gcc-attribute-syntax] https://gcc.gnu.org/onlinedocs/gcc/Attribute-Syntax.html
-.. [n2049] http://www.open-std.org/jtc1/sc22/wg14/www/docs/n2049.pdf
+.. [it-c-language] http://www.open-std.org/jtc1/sc22/wg14/www/standards
+.. [it-gcc] https://gcc.gnu.org
+.. [it-clang] https://clang.llvm.org
+.. [it-icc] https://software.intel.com/en-us/c-compilers
+.. [it-gcc-c-dialect-options] https://gcc.gnu.org/onlinedocs/gcc/C-Dialect-Options.html
+.. [it-gnu-extensions] https://gcc.gnu.org/onlinedocs/gcc/C-Extensions.html
+.. [it-gcc-attribute-syntax] https://gcc.gnu.org/onlinedocs/gcc/Attribute-Syntax.html
+.. [it-n2049] http://www.open-std.org/jtc1/sc22/wg14/www/docs/n2049.pdf
diff --git a/Documentation/translations/zh_CN/filesystems/index.rst b/Documentation/translations/zh_CN/filesystems/index.rst
new file mode 100644
index 0000000..14f155e
--- /dev/null
+++ b/Documentation/translations/zh_CN/filesystems/index.rst
@@ -0,0 +1,27 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+.. include:: ../disclaimer-zh_CN.rst
+
+:Original: :ref:`Documentation/filesystems/index.rst <filesystems_index>`
+:Translator: Wang Wenhu <wenhu.wang@vivo.com>
+
+.. _cn_filesystems_index:
+
+========================
+Linux Kernel中的文件系统
+========================
+
+这份正在开发的手册或许在未来某个辉煌的日子里以易懂的形式将Linux虚拟\
+文件系统(VFS)层以及基于其上的各种文件系统如何工作呈现给大家。当前\
+可以看到下面的内容。
+
+文件系统
+========
+
+文件系统实现文档。
+
+.. toctree::
+ :maxdepth: 2
+
+ virtiofs
+
diff --git a/Documentation/translations/zh_CN/filesystems/virtiofs.rst b/Documentation/translations/zh_CN/filesystems/virtiofs.rst
new file mode 100644
index 0000000..09bc9e0
--- /dev/null
+++ b/Documentation/translations/zh_CN/filesystems/virtiofs.rst
@@ -0,0 +1,58 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+.. include:: ../disclaimer-zh_CN.rst
+
+:Original: :ref:`Documentation/filesystems/virtiofs.rst <virtiofs_index>`
+
+译者
+::
+
+ 中文版维护者: 王文虎 Wang Wenhu <wenhu.wang@vivo.com>
+ 中文版翻译者: 王文虎 Wang Wenhu <wenhu.wang@vivo.com>
+ 中文版校译者: 王文虎 Wang Wenhu <wenhu.wang@vivo.com>
+
+===========================================
+virtiofs: virtio-fs 主机<->客机共享文件系统
+===========================================
+
+- Copyright (C) 2020 Vivo Communication Technology Co. Ltd.
+
+介绍
+====
+Linux的virtiofs文件系统实现了一个半虚拟化VIRTIO类型“virtio-fs”设备的驱动,通过该\
+类型设备实现客机<->主机文件系统共享。它允许客机挂载一个已经导出到主机的目录。
+
+客机通常需要访问主机或者远程系统上的文件。使用场景包括:在新客机安装时让文件对其\
+可见;从主机上的根文件系统启动;对无状态或临时客机提供持久存储和在客机之间共享目录。
+
+尽管在某些任务可能通过使用已有的网络文件系统完成,但是却需要非常难以自动化的配置\
+步骤,且将存储网络暴露给客机。而virtio-fs设备通过提供不经过网络的文件系统访问文件\
+的设计方式解决了这些问题。
+
+另外,virto-fs设备发挥了主客机共存的优点提高了性能,并且提供了网络文件系统所不具备
+的一些语义功能。
+
+用法
+====
+以``myfs``标签将文件系统挂载到``/mnt``:
+
+.. code-block:: sh
+
+ guest# mount -t virtiofs myfs /mnt
+
+请查阅 https://virtio-fs.gitlab.io/ 了解配置QEMU和virtiofsd守护程序的详细信息。
+
+内幕
+====
+由于virtio-fs设备将FUSE协议用于文件系统请求,因此Linux的virtiofs文件系统与FUSE文\
+件系统客户端紧密集成在一起。客机充当FUSE客户端而主机充当FUSE服务器,内核与用户空\
+间之间的/dev/fuse接口由virtio-fs设备接口代替。
+
+FUSE请求被置于虚拟队列中由主机处理。主机填充缓冲区中的响应部分,而客机处理请求的完成部分。
+
+将/dev/fuse映射到虚拟队列需要解决/dev/fuse和虚拟队列之间语义上的差异。每次读取\
+/dev/fuse设备时,FUSE客户端都可以选择要传输的请求,从而可以使某些请求优先于其他\
+请求。虚拟队列有其队列语义,无法更改已入队请求的顺序。在虚拟队列已满的情况下尤
+其关键,因为此时不可能加入高优先级的请求。为了解决此差异,virtio-fs设备采用“hiprio”\
+(高优先级)虚拟队列,专门用于有别于普通请求的高优先级请求。
+
diff --git a/Documentation/translations/zh_CN/index.rst b/Documentation/translations/zh_CN/index.rst
index d3165535..76850a5 100644
--- a/Documentation/translations/zh_CN/index.rst
+++ b/Documentation/translations/zh_CN/index.rst
@@ -14,6 +14,7 @@
:maxdepth: 2
process/index
+ filesystems/index
目录和表格
----------
diff --git a/Documentation/translations/zh_CN/io_ordering.txt b/Documentation/translations/zh_CN/io_ordering.txt
index 1f8127b..7bb3086 100644
--- a/Documentation/translations/zh_CN/io_ordering.txt
+++ b/Documentation/translations/zh_CN/io_ordering.txt
@@ -1,4 +1,4 @@
-Chinese translated version of Documentation/io_ordering.txt
+Chinese translated version of Documentation/driver-api/io_ordering.rst
If you have any comment or update to the content, please contact the
original document maintainer directly. However, if you have a problem
@@ -8,7 +8,7 @@
Chinese maintainer: Lin Yongting <linyongting@gmail.com>
---------------------------------------------------------------------
-Documentation/io_ordering.txt 的中文翻译
+Documentation/driver-api/io_ordering.rst 的中文翻译
如果想评论或更新本文的内容,请直接联系原文档的维护者。如果你使用英文
交流有困难的话,也可以向中文版维护者求助。如果本翻译更新不及时或者翻
diff --git a/Documentation/translations/zh_CN/process/5.Posting.rst b/Documentation/translations/zh_CN/process/5.Posting.rst
index 41aba21..9ff9945 100644
--- a/Documentation/translations/zh_CN/process/5.Posting.rst
+++ b/Documentation/translations/zh_CN/process/5.Posting.rst
@@ -5,7 +5,7 @@
.. _cn_development_posting:
-发送补丁
+发布补丁
========
迟早,当您的工作准备好提交给社区进行审查,并最终包含到主线内核中时。不出所料,
diff --git a/Documentation/translations/zh_CN/process/embargoed-hardware-issues.rst b/Documentation/translations/zh_CN/process/embargoed-hardware-issues.rst
index b93f1af..88273eb 100644
--- a/Documentation/translations/zh_CN/process/embargoed-hardware-issues.rst
+++ b/Documentation/translations/zh_CN/process/embargoed-hardware-issues.rst
@@ -183,7 +183,7 @@
VMware
Xen Andrew Cooper <andrew.cooper3@citrix.com>
- Canonical Tyler Hicks <tyhicks@canonical.com>
+ Canonical John Johansen <john.johansen@canonical.com>
Debian Ben Hutchings <ben@decadent.org.uk>
Oracle Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Red Hat Josh Poimboeuf <jpoimboe@redhat.com>
diff --git a/Documentation/userspace-api/ioctl/ioctl-number.rst b/Documentation/userspace-api/ioctl/ioctl-number.rst
index 2e91370..f759eda 100644
--- a/Documentation/userspace-api/ioctl/ioctl-number.rst
+++ b/Documentation/userspace-api/ioctl/ioctl-number.rst
@@ -266,7 +266,6 @@
'o' 01-A1 `linux/dvb/*.h` DVB
'p' 00-0F linux/phantom.h conflict! (OpenHaptics needs this)
'p' 00-1F linux/rtc.h conflict!
-'p' 00-3F linux/mc146818rtc.h conflict!
'p' 40-7F linux/nvram.h
'p' 80-9F linux/ppdev.h user-space parport
<mailto:tim@cyberelk.net>
diff --git a/Documentation/virtual/guest-halt-polling.txt b/Documentation/virt/guest-halt-polling.rst
similarity index 90%
rename from Documentation/virtual/guest-halt-polling.txt
rename to Documentation/virt/guest-halt-polling.rst
index b3a2a29..b4e7479 100644
--- a/Documentation/virtual/guest-halt-polling.txt
+++ b/Documentation/virt/guest-halt-polling.rst
@@ -1,9 +1,11 @@
+==================
Guest halt polling
==================
The cpuidle_haltpoll driver, with the haltpoll governor, allows
the guest vcpus to poll for a specified amount of time before
halting.
+
This provides the following benefits to host side polling:
1) The POLL flag is set while polling is performed, which allows
@@ -29,18 +31,21 @@
The haltpoll governor has 5 tunable module parameters:
1) guest_halt_poll_ns:
+
Maximum amount of time, in nanoseconds, that polling is
performed before halting.
Default: 200000
2) guest_halt_poll_shrink:
+
Division factor used to shrink per-cpu guest_halt_poll_ns when
wakeup event occurs after the global guest_halt_poll_ns.
Default: 2
3) guest_halt_poll_grow:
+
Multiplication factor used to grow per-cpu guest_halt_poll_ns
when event occurs after per-cpu guest_halt_poll_ns
but before global guest_halt_poll_ns.
@@ -48,6 +53,7 @@
Default: 2
4) guest_halt_poll_grow_start:
+
The per-cpu guest_halt_poll_ns eventually reaches zero
in case of an idle system. This value sets the initial
per-cpu guest_halt_poll_ns when growing. This can
@@ -66,7 +72,7 @@
Default: Y
-The module parameters can be set from the debugfs files in:
+The module parameters can be set from the debugfs files in::
/sys/module/haltpoll/parameters/
@@ -74,5 +80,5 @@
=============
- Care should be taken when setting the guest_halt_poll_ns parameter as a
-large value has the potential to drive the cpu usage to 100% on a machine which
-would be almost entirely idle otherwise.
+ large value has the potential to drive the cpu usage to 100% on a machine
+ which would be almost entirely idle otherwise.
diff --git a/Documentation/virt/index.rst b/Documentation/virt/index.rst
index 062ffb5..de1ab81 100644
--- a/Documentation/virt/index.rst
+++ b/Documentation/virt/index.rst
@@ -8,7 +8,9 @@
:maxdepth: 2
kvm/index
+ uml/user_mode_linux
paravirt_ops
+ guest-halt-polling
.. only:: html and subproject
diff --git a/Documentation/virt/kvm/amd-memory-encryption.rst b/Documentation/virt/kvm/amd-memory-encryption.rst
index d18c97b..c3129b9 100644
--- a/Documentation/virt/kvm/amd-memory-encryption.rst
+++ b/Documentation/virt/kvm/amd-memory-encryption.rst
@@ -53,6 +53,29 @@
encrypting bootstrap code, snapshot, migrating and debugging the guest. For more
information, see the SEV Key Management spec [api-spec]_
+The main ioctl to access SEV is KVM_MEM_ENCRYPT_OP. If the argument
+to KVM_MEM_ENCRYPT_OP is NULL, the ioctl returns 0 if SEV is enabled
+and ``ENOTTY` if it is disabled (on some older versions of Linux,
+the ioctl runs normally even with a NULL argument, and therefore will
+likely return ``EFAULT``). If non-NULL, the argument to KVM_MEM_ENCRYPT_OP
+must be a struct kvm_sev_cmd::
+
+ struct kvm_sev_cmd {
+ __u32 id;
+ __u64 data;
+ __u32 error;
+ __u32 sev_fd;
+ };
+
+
+The ``id`` field contains the subcommand, and the ``data`` field points to
+another struct containing arguments specific to command. The ``sev_fd``
+should point to a file descriptor that is opened on the ``/dev/sev``
+device, if needed (see individual commands).
+
+On output, ``error`` is zero on success, or an error code. Error codes
+are defined in ``<linux/psp-dev.h>`.
+
KVM implements the following commands to support common lifecycle events of SEV
guests, such as launching, running, snapshotting, migrating and decommissioning.
@@ -90,6 +113,8 @@
On success, the 'handle' field contains a new handle and on error, a negative value.
+KVM_SEV_LAUNCH_START requires the ``sev_fd`` field to be valid.
+
For more details, see SEV spec Section 6.2.
3. KVM_SEV_LAUNCH_UPDATE_DATA
diff --git a/Documentation/virt/kvm/api.txt b/Documentation/virt/kvm/api.rst
similarity index 71%
rename from Documentation/virt/kvm/api.txt
rename to Documentation/virt/kvm/api.rst
index c6e1ce5..ebd383f 100644
--- a/Documentation/virt/kvm/api.txt
+++ b/Documentation/virt/kvm/api.rst
@@ -1,8 +1,11 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===================================================================
The Definitive KVM (Kernel-based Virtual Machine) API Documentation
===================================================================
1. General description
-----------------------
+======================
The kvm API is a set of ioctls that are issued to control various aspects
of a virtual machine. The ioctls belong to the following classes:
@@ -33,7 +36,7 @@
was used to create the VM.
2. File descriptors
--------------------
+===================
The kvm API is centered around file descriptors. An initial
open("/dev/kvm") obtains a handle to the kvm subsystem; this handle
@@ -70,7 +73,7 @@
3. Extensions
--------------
+=============
As of Linux 2.6.22, the KVM ABI has been stabilized: no backward
incompatible change are allowed. However, there is an extension
@@ -84,13 +87,14 @@
4. API description
-------------------
+==================
This section describes ioctls that can be used to control kvm guests.
For each ioctl, the following information is provided along with a
description:
- Capability: which KVM extension provides this ioctl. Can be 'basic',
+ Capability:
+ which KVM extension provides this ioctl. Can be 'basic',
which means that is will be provided by any kernel that supports
API version 12 (see section 4.1), a KVM_CAP_xyz constant, which
means availability needs to be checked with KVM_CHECK_EXTENSION
@@ -99,24 +103,29 @@
availability: for kernels that don't support the ioctl,
the ioctl returns -ENOTTY.
- Architectures: which instruction set architectures provide this ioctl.
+ Architectures:
+ which instruction set architectures provide this ioctl.
x86 includes both i386 and x86_64.
- Type: system, vm, or vcpu.
+ Type:
+ system, vm, or vcpu.
- Parameters: what parameters are accepted by the ioctl.
+ Parameters:
+ what parameters are accepted by the ioctl.
- Returns: the return value. General error numbers (EBADF, ENOMEM, EINVAL)
+ Returns:
+ the return value. General error numbers (EBADF, ENOMEM, EINVAL)
are not detailed, but errors with specific meanings are.
4.1 KVM_GET_API_VERSION
+-----------------------
-Capability: basic
-Architectures: all
-Type: system ioctl
-Parameters: none
-Returns: the constant KVM_API_VERSION (=12)
+:Capability: basic
+:Architectures: all
+:Type: system ioctl
+:Parameters: none
+:Returns: the constant KVM_API_VERSION (=12)
This identifies the API version as the stable kvm API. It is not
expected that this number will change. However, Linux 2.6.20 and
@@ -127,12 +136,13 @@
4.2 KVM_CREATE_VM
+-----------------
-Capability: basic
-Architectures: all
-Type: system ioctl
-Parameters: machine type identifier (KVM_VM_*)
-Returns: a VM fd that can be used to control the new virtual machine.
+:Capability: basic
+:Architectures: all
+:Type: system ioctl
+:Parameters: machine type identifier (KVM_VM_*)
+:Returns: a VM fd that can be used to control the new virtual machine.
The new VM has no virtual cpus and no memory.
You probably want to use 0 as machine type.
@@ -155,17 +165,17 @@
address used by the VM. The IPA_Bits is encoded in bits[7-0] of the
machine type identifier.
-e.g, to configure a guest to use 48bit physical address size :
+e.g, to configure a guest to use 48bit physical address size::
vm_fd = ioctl(dev_fd, KVM_CREATE_VM, KVM_VM_TYPE_ARM_IPA_SIZE(48));
-The requested size (IPA_Bits) must be :
- 0 - Implies default size, 40bits (for backward compatibility)
+The requested size (IPA_Bits) must be:
- or
-
- N - Implies N bits, where N is a positive integer such that,
+ == =========================================================
+ 0 Implies default size, 40bits (for backward compatibility)
+ N Implies N bits, where N is a positive integer such that,
32 <= N <= Host_IPA_Limit
+ == =========================================================
Host_IPA_Limit is the maximum possible value for IPA_Bits on the host and
is dependent on the CPU capability and the kernel configuration. The limit can
@@ -179,21 +189,28 @@
4.3 KVM_GET_MSR_INDEX_LIST, KVM_GET_MSR_FEATURE_INDEX_LIST
+----------------------------------------------------------
-Capability: basic, KVM_CAP_GET_MSR_FEATURES for KVM_GET_MSR_FEATURE_INDEX_LIST
-Architectures: x86
-Type: system ioctl
-Parameters: struct kvm_msr_list (in/out)
-Returns: 0 on success; -1 on error
+:Capability: basic, KVM_CAP_GET_MSR_FEATURES for KVM_GET_MSR_FEATURE_INDEX_LIST
+:Architectures: x86
+:Type: system ioctl
+:Parameters: struct kvm_msr_list (in/out)
+:Returns: 0 on success; -1 on error
+
Errors:
- EFAULT: the msr index list cannot be read from or written to
- E2BIG: the msr index list is to be to fit in the array specified by
- the user.
-struct kvm_msr_list {
+ ====== ============================================================
+ EFAULT the msr index list cannot be read from or written to
+ E2BIG the msr index list is to be to fit in the array specified by
+ the user.
+ ====== ============================================================
+
+::
+
+ struct kvm_msr_list {
__u32 nmsrs; /* number of msrs in entries */
__u32 indices[0];
-};
+ };
The user fills in the size of the indices array in nmsrs, and in return
kvm adjusts nmsrs to reflect the actual number of msrs and fills in the
@@ -214,12 +231,13 @@
4.4 KVM_CHECK_EXTENSION
+-----------------------
-Capability: basic, KVM_CAP_CHECK_EXTENSION_VM for vm ioctl
-Architectures: all
-Type: system ioctl, vm ioctl
-Parameters: extension identifier (KVM_CAP_*)
-Returns: 0 if unsupported; 1 (or some other positive integer) if supported
+:Capability: basic, KVM_CAP_CHECK_EXTENSION_VM for vm ioctl
+:Architectures: all
+:Type: system ioctl, vm ioctl
+:Parameters: extension identifier (KVM_CAP_*)
+:Returns: 0 if unsupported; 1 (or some other positive integer) if supported
The API allows the application to query about extensions to the core
kvm API. Userspace passes an extension identifier (an integer) and
@@ -232,12 +250,13 @@
with KVM_CAP_CHECK_EXTENSION_VM on the vm fd)
4.5 KVM_GET_VCPU_MMAP_SIZE
+--------------------------
-Capability: basic
-Architectures: all
-Type: system ioctl
-Parameters: none
-Returns: size of vcpu mmap area, in bytes
+:Capability: basic
+:Architectures: all
+:Type: system ioctl
+:Parameters: none
+:Returns: size of vcpu mmap area, in bytes
The KVM_RUN ioctl (cf.) communicates with userspace via a shared
memory region. This ioctl returns the size of that region. See the
@@ -245,23 +264,25 @@
4.6 KVM_SET_MEMORY_REGION
+-------------------------
-Capability: basic
-Architectures: all
-Type: vm ioctl
-Parameters: struct kvm_memory_region (in)
-Returns: 0 on success, -1 on error
+:Capability: basic
+:Architectures: all
+:Type: vm ioctl
+:Parameters: struct kvm_memory_region (in)
+:Returns: 0 on success, -1 on error
This ioctl is obsolete and has been removed.
4.7 KVM_CREATE_VCPU
+-------------------
-Capability: basic
-Architectures: all
-Type: vm ioctl
-Parameters: vcpu id (apic id on x86)
-Returns: vcpu fd on success, -1 on error
+:Capability: basic
+:Architectures: all
+:Type: vm ioctl
+:Parameters: vcpu id (apic id on x86)
+:Returns: vcpu fd on success, -1 on error
This API adds a vcpu to a virtual machine. No more than max_vcpus may be added.
The vcpu id is an integer in the range [0, max_vcpu_id).
@@ -302,22 +323,25 @@
4.8 KVM_GET_DIRTY_LOG (vm ioctl)
+--------------------------------
-Capability: basic
-Architectures: all
-Type: vm ioctl
-Parameters: struct kvm_dirty_log (in/out)
-Returns: 0 on success, -1 on error
+:Capability: basic
+:Architectures: all
+:Type: vm ioctl
+:Parameters: struct kvm_dirty_log (in/out)
+:Returns: 0 on success, -1 on error
-/* for KVM_GET_DIRTY_LOG */
-struct kvm_dirty_log {
+::
+
+ /* for KVM_GET_DIRTY_LOG */
+ struct kvm_dirty_log {
__u32 slot;
__u32 padding;
union {
void __user *dirty_bitmap; /* one bit per page */
__u64 padding;
};
-};
+ };
Given a memory slot, return a bitmap containing any pages dirtied
since the last call to this ioctl. Bit 0 is the first page in the
@@ -334,25 +358,31 @@
see the description of the capability.
4.9 KVM_SET_MEMORY_ALIAS
+------------------------
-Capability: basic
-Architectures: x86
-Type: vm ioctl
-Parameters: struct kvm_memory_alias (in)
-Returns: 0 (success), -1 (error)
+:Capability: basic
+:Architectures: x86
+:Type: vm ioctl
+:Parameters: struct kvm_memory_alias (in)
+:Returns: 0 (success), -1 (error)
This ioctl is obsolete and has been removed.
4.10 KVM_RUN
+------------
-Capability: basic
-Architectures: all
-Type: vcpu ioctl
-Parameters: none
-Returns: 0 on success, -1 on error
+:Capability: basic
+:Architectures: all
+:Type: vcpu ioctl
+:Parameters: none
+:Returns: 0 on success, -1 on error
+
Errors:
- EINTR: an unmasked signal is pending
+
+ ===== =============================
+ EINTR an unmasked signal is pending
+ ===== =============================
This ioctl is used to run a guest virtual cpu. While there are no
explicit parameters, there is an implicit parameter block that can be
@@ -362,42 +392,46 @@
4.11 KVM_GET_REGS
+-----------------
-Capability: basic
-Architectures: all except ARM, arm64
-Type: vcpu ioctl
-Parameters: struct kvm_regs (out)
-Returns: 0 on success, -1 on error
+:Capability: basic
+:Architectures: all except ARM, arm64
+:Type: vcpu ioctl
+:Parameters: struct kvm_regs (out)
+:Returns: 0 on success, -1 on error
Reads the general purpose registers from the vcpu.
-/* x86 */
-struct kvm_regs {
+::
+
+ /* x86 */
+ struct kvm_regs {
/* out (KVM_GET_REGS) / in (KVM_SET_REGS) */
__u64 rax, rbx, rcx, rdx;
__u64 rsi, rdi, rsp, rbp;
__u64 r8, r9, r10, r11;
__u64 r12, r13, r14, r15;
__u64 rip, rflags;
-};
+ };
-/* mips */
-struct kvm_regs {
+ /* mips */
+ struct kvm_regs {
/* out (KVM_GET_REGS) / in (KVM_SET_REGS) */
__u64 gpr[32];
__u64 hi;
__u64 lo;
__u64 pc;
-};
+ };
4.12 KVM_SET_REGS
+-----------------
-Capability: basic
-Architectures: all except ARM, arm64
-Type: vcpu ioctl
-Parameters: struct kvm_regs (in)
-Returns: 0 on success, -1 on error
+:Capability: basic
+:Architectures: all except ARM, arm64
+:Type: vcpu ioctl
+:Parameters: struct kvm_regs (in)
+:Returns: 0 on success, -1 on error
Writes the general purpose registers into the vcpu.
@@ -405,17 +439,20 @@
4.13 KVM_GET_SREGS
+------------------
-Capability: basic
-Architectures: x86, ppc
-Type: vcpu ioctl
-Parameters: struct kvm_sregs (out)
-Returns: 0 on success, -1 on error
+:Capability: basic
+:Architectures: x86, ppc
+:Type: vcpu ioctl
+:Parameters: struct kvm_sregs (out)
+:Returns: 0 on success, -1 on error
Reads special registers from the vcpu.
-/* x86 */
-struct kvm_sregs {
+::
+
+ /* x86 */
+ struct kvm_sregs {
struct kvm_segment cs, ds, es, fs, gs, ss;
struct kvm_segment tr, ldt;
struct kvm_dtable gdt, idt;
@@ -423,9 +460,9 @@
__u64 efer;
__u64 apic_base;
__u64 interrupt_bitmap[(KVM_NR_INTERRUPTS + 63) / 64];
-};
+ };
-/* ppc -- see arch/powerpc/include/uapi/asm/kvm.h */
+ /* ppc -- see arch/powerpc/include/uapi/asm/kvm.h */
interrupt_bitmap is a bitmap of pending external interrupts. At most
one bit may be set. This interrupt has been acknowledged by the APIC
@@ -433,29 +470,33 @@
4.14 KVM_SET_SREGS
+------------------
-Capability: basic
-Architectures: x86, ppc
-Type: vcpu ioctl
-Parameters: struct kvm_sregs (in)
-Returns: 0 on success, -1 on error
+:Capability: basic
+:Architectures: x86, ppc
+:Type: vcpu ioctl
+:Parameters: struct kvm_sregs (in)
+:Returns: 0 on success, -1 on error
Writes special registers into the vcpu. See KVM_GET_SREGS for the
data structures.
4.15 KVM_TRANSLATE
+------------------
-Capability: basic
-Architectures: x86
-Type: vcpu ioctl
-Parameters: struct kvm_translation (in/out)
-Returns: 0 on success, -1 on error
+:Capability: basic
+:Architectures: x86
+:Type: vcpu ioctl
+:Parameters: struct kvm_translation (in/out)
+:Returns: 0 on success, -1 on error
Translates a virtual address according to the vcpu's current address
translation mode.
-struct kvm_translation {
+::
+
+ struct kvm_translation {
/* in */
__u64 linear_address;
@@ -465,59 +506,68 @@
__u8 writeable;
__u8 usermode;
__u8 pad[5];
-};
+ };
4.16 KVM_INTERRUPT
+------------------
-Capability: basic
-Architectures: x86, ppc, mips
-Type: vcpu ioctl
-Parameters: struct kvm_interrupt (in)
-Returns: 0 on success, negative on failure.
+:Capability: basic
+:Architectures: x86, ppc, mips
+:Type: vcpu ioctl
+:Parameters: struct kvm_interrupt (in)
+:Returns: 0 on success, negative on failure.
Queues a hardware interrupt vector to be injected.
-/* for KVM_INTERRUPT */
-struct kvm_interrupt {
+::
+
+ /* for KVM_INTERRUPT */
+ struct kvm_interrupt {
/* in */
__u32 irq;
-};
+ };
X86:
+^^^^
-Returns: 0 on success,
- -EEXIST if an interrupt is already enqueued
- -EINVAL the the irq number is invalid
- -ENXIO if the PIC is in the kernel
- -EFAULT if the pointer is invalid
+:Returns:
+
+ ========= ===================================
+ 0 on success,
+ -EEXIST if an interrupt is already enqueued
+ -EINVAL the the irq number is invalid
+ -ENXIO if the PIC is in the kernel
+ -EFAULT if the pointer is invalid
+ ========= ===================================
Note 'irq' is an interrupt vector, not an interrupt pin or line. This
ioctl is useful if the in-kernel PIC is not used.
PPC:
+^^^^
Queues an external interrupt to be injected. This ioctl is overleaded
with 3 different irq values:
a) KVM_INTERRUPT_SET
- This injects an edge type external interrupt into the guest once it's ready
- to receive interrupts. When injected, the interrupt is done.
+ This injects an edge type external interrupt into the guest once it's ready
+ to receive interrupts. When injected, the interrupt is done.
b) KVM_INTERRUPT_UNSET
- This unsets any pending interrupt.
+ This unsets any pending interrupt.
- Only available with KVM_CAP_PPC_UNSET_IRQ.
+ Only available with KVM_CAP_PPC_UNSET_IRQ.
c) KVM_INTERRUPT_SET_LEVEL
- This injects a level type external interrupt into the guest context. The
- interrupt stays pending until a specific ioctl with KVM_INTERRUPT_UNSET
- is triggered.
+ This injects a level type external interrupt into the guest context. The
+ interrupt stays pending until a specific ioctl with KVM_INTERRUPT_UNSET
+ is triggered.
- Only available with KVM_CAP_PPC_IRQ_LEVEL.
+ Only available with KVM_CAP_PPC_IRQ_LEVEL.
Note that any value for 'irq' other than the ones stated above is invalid
and incurs unexpected behavior.
@@ -525,6 +575,7 @@
This is an asynchronous vcpu ioctl and can be invoked from any thread.
MIPS:
+^^^^^
Queues an external interrupt to be injected into the virtual CPU. A negative
interrupt number dequeues the interrupt.
@@ -533,24 +584,26 @@
4.17 KVM_DEBUG_GUEST
+--------------------
-Capability: basic
-Architectures: none
-Type: vcpu ioctl
-Parameters: none)
-Returns: -1 on error
+:Capability: basic
+:Architectures: none
+:Type: vcpu ioctl
+:Parameters: none)
+:Returns: -1 on error
Support for this has been removed. Use KVM_SET_GUEST_DEBUG instead.
4.18 KVM_GET_MSRS
+-----------------
-Capability: basic (vcpu), KVM_CAP_GET_MSR_FEATURES (system)
-Architectures: x86
-Type: system ioctl, vcpu ioctl
-Parameters: struct kvm_msrs (in/out)
-Returns: number of msrs successfully returned;
- -1 on error
+:Capability: basic (vcpu), KVM_CAP_GET_MSR_FEATURES (system)
+:Architectures: x86
+:Type: system ioctl, vcpu ioctl
+:Parameters: struct kvm_msrs (in/out)
+:Returns: number of msrs successfully returned;
+ -1 on error
When used as a system ioctl:
Reads the values of MSR-based features that are available for the VM. This
@@ -562,18 +615,20 @@
Reads model-specific registers from the vcpu. Supported msr indices can
be obtained using KVM_GET_MSR_INDEX_LIST in a system ioctl.
-struct kvm_msrs {
+::
+
+ struct kvm_msrs {
__u32 nmsrs; /* number of msrs in entries */
__u32 pad;
struct kvm_msr_entry entries[0];
-};
+ };
-struct kvm_msr_entry {
+ struct kvm_msr_entry {
__u32 index;
__u32 reserved;
__u64 data;
-};
+ };
Application code should set the 'nmsrs' member (which indicates the
size of the entries array) and the 'index' member of each array entry.
@@ -581,12 +636,13 @@
4.19 KVM_SET_MSRS
+-----------------
-Capability: basic
-Architectures: x86
-Type: vcpu ioctl
-Parameters: struct kvm_msrs (in)
-Returns: number of msrs successfully set (see below), -1 on error
+:Capability: basic
+:Architectures: x86
+:Type: vcpu ioctl
+:Parameters: struct kvm_msrs (in)
+:Returns: number of msrs successfully set (see below), -1 on error
Writes model-specific registers to the vcpu. See KVM_GET_MSRS for the
data structures.
@@ -602,41 +658,44 @@
4.20 KVM_SET_CPUID
+------------------
-Capability: basic
-Architectures: x86
-Type: vcpu ioctl
-Parameters: struct kvm_cpuid (in)
-Returns: 0 on success, -1 on error
+:Capability: basic
+:Architectures: x86
+:Type: vcpu ioctl
+:Parameters: struct kvm_cpuid (in)
+:Returns: 0 on success, -1 on error
Defines the vcpu responses to the cpuid instruction. Applications
should use the KVM_SET_CPUID2 ioctl if available.
+::
-struct kvm_cpuid_entry {
+ struct kvm_cpuid_entry {
__u32 function;
__u32 eax;
__u32 ebx;
__u32 ecx;
__u32 edx;
__u32 padding;
-};
+ };
-/* for KVM_SET_CPUID */
-struct kvm_cpuid {
+ /* for KVM_SET_CPUID */
+ struct kvm_cpuid {
__u32 nent;
__u32 padding;
struct kvm_cpuid_entry entries[0];
-};
+ };
4.21 KVM_SET_SIGNAL_MASK
+------------------------
-Capability: basic
-Architectures: all
-Type: vcpu ioctl
-Parameters: struct kvm_signal_mask (in)
-Returns: 0 on success, -1 on error
+:Capability: basic
+:Architectures: all
+:Type: vcpu ioctl
+:Parameters: struct kvm_signal_mask (in)
+:Returns: 0 on success, -1 on error
Defines which signals are blocked during execution of KVM_RUN. This
signal mask temporarily overrides the threads signal mask. Any
@@ -646,25 +705,30 @@
Note the signal will only be delivered if not blocked by the original
signal mask.
-/* for KVM_SET_SIGNAL_MASK */
-struct kvm_signal_mask {
+::
+
+ /* for KVM_SET_SIGNAL_MASK */
+ struct kvm_signal_mask {
__u32 len;
__u8 sigset[0];
-};
+ };
4.22 KVM_GET_FPU
+----------------
-Capability: basic
-Architectures: x86
-Type: vcpu ioctl
-Parameters: struct kvm_fpu (out)
-Returns: 0 on success, -1 on error
+:Capability: basic
+:Architectures: x86
+:Type: vcpu ioctl
+:Parameters: struct kvm_fpu (out)
+:Returns: 0 on success, -1 on error
Reads the floating point state from the vcpu.
-/* for KVM_GET_FPU and KVM_SET_FPU */
-struct kvm_fpu {
+::
+
+ /* for KVM_GET_FPU and KVM_SET_FPU */
+ struct kvm_fpu {
__u8 fpr[8][16];
__u16 fcw;
__u16 fsw;
@@ -676,21 +740,24 @@
__u8 xmm[16][16];
__u32 mxcsr;
__u32 pad2;
-};
+ };
4.23 KVM_SET_FPU
+----------------
-Capability: basic
-Architectures: x86
-Type: vcpu ioctl
-Parameters: struct kvm_fpu (in)
-Returns: 0 on success, -1 on error
+:Capability: basic
+:Architectures: x86
+:Type: vcpu ioctl
+:Parameters: struct kvm_fpu (in)
+:Returns: 0 on success, -1 on error
Writes the floating point state to the vcpu.
-/* for KVM_GET_FPU and KVM_SET_FPU */
-struct kvm_fpu {
+::
+
+ /* for KVM_GET_FPU and KVM_SET_FPU */
+ struct kvm_fpu {
__u8 fpr[8][16];
__u16 fcw;
__u16 fsw;
@@ -702,16 +769,17 @@
__u8 xmm[16][16];
__u32 mxcsr;
__u32 pad2;
-};
+ };
4.24 KVM_CREATE_IRQCHIP
+-----------------------
-Capability: KVM_CAP_IRQCHIP, KVM_CAP_S390_IRQCHIP (s390)
-Architectures: x86, ARM, arm64, s390
-Type: vm ioctl
-Parameters: none
-Returns: 0 on success, -1 on error
+:Capability: KVM_CAP_IRQCHIP, KVM_CAP_S390_IRQCHIP (s390)
+:Architectures: x86, ARM, arm64, s390
+:Type: vm ioctl
+:Parameters: none
+:Returns: 0 on success, -1 on error
Creates an interrupt controller model in the kernel.
On x86, creates a virtual ioapic, a virtual PIC (two PICs, nested), and sets up
@@ -727,12 +795,13 @@
4.25 KVM_IRQ_LINE
+-----------------
-Capability: KVM_CAP_IRQCHIP
-Architectures: x86, arm, arm64
-Type: vm ioctl
-Parameters: struct kvm_irq_level
-Returns: 0 on success, -1 on error
+:Capability: KVM_CAP_IRQCHIP
+:Architectures: x86, arm, arm64
+:Type: vm ioctl
+:Parameters: struct kvm_irq_level
+:Returns: 0 on success, -1 on error
Sets the level of a GSI input to the interrupt controller model in the kernel.
On some architectures it is required that an interrupt controller model has
@@ -756,16 +825,20 @@
ARM/arm64 can signal an interrupt either at the CPU level, or at the
in-kernel irqchip (GIC), and for in-kernel irqchip can tell the GIC to
use PPIs designated for specific cpus. The irq field is interpreted
-like this:
+like this::
bits: | 31 ... 28 | 27 ... 24 | 23 ... 16 | 15 ... 0 |
field: | vcpu2_index | irq_type | vcpu_index | irq_id |
The irq_type field has the following values:
-- irq_type[0]: out-of-kernel GIC: irq_id 0 is IRQ, irq_id 1 is FIQ
-- irq_type[1]: in-kernel GIC: SPI, irq_id between 32 and 1019 (incl.)
+
+- irq_type[0]:
+ out-of-kernel GIC: irq_id 0 is IRQ, irq_id 1 is FIQ
+- irq_type[1]:
+ in-kernel GIC: SPI, irq_id between 32 and 1019 (incl.)
(the vcpu_index field is ignored)
-- irq_type[2]: in-kernel GIC: PPI, irq_id between 16 and 31 (incl.)
+- irq_type[2]:
+ in-kernel GIC: PPI, irq_id between 16 and 31 (incl.)
(The irq_id field thus corresponds nicely to the IRQ ID in the ARM GIC specs)
@@ -779,27 +852,32 @@
injection of interrupts for the in-kernel irqchip. KVM_IRQ_LINE can always
be used for a userspace interrupt controller.
-struct kvm_irq_level {
+::
+
+ struct kvm_irq_level {
union {
__u32 irq; /* GSI */
__s32 status; /* not used for KVM_IRQ_LEVEL */
};
__u32 level; /* 0 or 1 */
-};
+ };
4.26 KVM_GET_IRQCHIP
+--------------------
-Capability: KVM_CAP_IRQCHIP
-Architectures: x86
-Type: vm ioctl
-Parameters: struct kvm_irqchip (in/out)
-Returns: 0 on success, -1 on error
+:Capability: KVM_CAP_IRQCHIP
+:Architectures: x86
+:Type: vm ioctl
+:Parameters: struct kvm_irqchip (in/out)
+:Returns: 0 on success, -1 on error
Reads the state of a kernel interrupt controller created with
KVM_CREATE_IRQCHIP into a buffer provided by the caller.
-struct kvm_irqchip {
+::
+
+ struct kvm_irqchip {
__u32 chip_id; /* 0 = PIC1, 1 = PIC2, 2 = IOAPIC */
__u32 pad;
union {
@@ -807,21 +885,24 @@
struct kvm_pic_state pic;
struct kvm_ioapic_state ioapic;
} chip;
-};
+ };
4.27 KVM_SET_IRQCHIP
+--------------------
-Capability: KVM_CAP_IRQCHIP
-Architectures: x86
-Type: vm ioctl
-Parameters: struct kvm_irqchip (in)
-Returns: 0 on success, -1 on error
+:Capability: KVM_CAP_IRQCHIP
+:Architectures: x86
+:Type: vm ioctl
+:Parameters: struct kvm_irqchip (in)
+:Returns: 0 on success, -1 on error
Sets the state of a kernel interrupt controller created with
KVM_CREATE_IRQCHIP from a buffer provided by the caller.
-struct kvm_irqchip {
+::
+
+ struct kvm_irqchip {
__u32 chip_id; /* 0 = PIC1, 1 = PIC2, 2 = IOAPIC */
__u32 pad;
union {
@@ -829,16 +910,17 @@
struct kvm_pic_state pic;
struct kvm_ioapic_state ioapic;
} chip;
-};
+ };
4.28 KVM_XEN_HVM_CONFIG
+-----------------------
-Capability: KVM_CAP_XEN_HVM
-Architectures: x86
-Type: vm ioctl
-Parameters: struct kvm_xen_hvm_config (in)
-Returns: 0 on success, -1 on error
+:Capability: KVM_CAP_XEN_HVM
+:Architectures: x86
+:Type: vm ioctl
+:Parameters: struct kvm_xen_hvm_config (in)
+:Returns: 0 on success, -1 on error
Sets the MSR that the Xen HVM guest uses to initialize its hypercall
page, and provides the starting address and size of the hypercall
@@ -846,7 +928,9 @@
page of a blob (32- or 64-bit, depending on the vcpu mode) to guest
memory.
-struct kvm_xen_hvm_config {
+::
+
+ struct kvm_xen_hvm_config {
__u32 flags;
__u32 msr;
__u64 blob_addr_32;
@@ -854,16 +938,17 @@
__u8 blob_size_32;
__u8 blob_size_64;
__u8 pad2[30];
-};
+ };
4.29 KVM_GET_CLOCK
+------------------
-Capability: KVM_CAP_ADJUST_CLOCK
-Architectures: x86
-Type: vm ioctl
-Parameters: struct kvm_clock_data (out)
-Returns: 0 on success, -1 on error
+:Capability: KVM_CAP_ADJUST_CLOCK
+:Architectures: x86
+:Type: vm ioctl
+:Parameters: struct kvm_clock_data (out)
+:Returns: 0 on success, -1 on error
Gets the current timestamp of kvmclock as seen by the current guest. In
conjunction with KVM_SET_CLOCK, it is used to ensure monotonicity on scenarios
@@ -880,47 +965,56 @@
but the exact value read by each VCPU could differ, because the host
TSC is not stable.
-struct kvm_clock_data {
+::
+
+ struct kvm_clock_data {
__u64 clock; /* kvmclock current value */
__u32 flags;
__u32 pad[9];
-};
+ };
4.30 KVM_SET_CLOCK
+------------------
-Capability: KVM_CAP_ADJUST_CLOCK
-Architectures: x86
-Type: vm ioctl
-Parameters: struct kvm_clock_data (in)
-Returns: 0 on success, -1 on error
+:Capability: KVM_CAP_ADJUST_CLOCK
+:Architectures: x86
+:Type: vm ioctl
+:Parameters: struct kvm_clock_data (in)
+:Returns: 0 on success, -1 on error
Sets the current timestamp of kvmclock to the value specified in its parameter.
In conjunction with KVM_GET_CLOCK, it is used to ensure monotonicity on scenarios
such as migration.
-struct kvm_clock_data {
+::
+
+ struct kvm_clock_data {
__u64 clock; /* kvmclock current value */
__u32 flags;
__u32 pad[9];
-};
+ };
4.31 KVM_GET_VCPU_EVENTS
+------------------------
-Capability: KVM_CAP_VCPU_EVENTS
-Extended by: KVM_CAP_INTR_SHADOW
-Architectures: x86, arm, arm64
-Type: vcpu ioctl
-Parameters: struct kvm_vcpu_event (out)
-Returns: 0 on success, -1 on error
+:Capability: KVM_CAP_VCPU_EVENTS
+:Extended by: KVM_CAP_INTR_SHADOW
+:Architectures: x86, arm, arm64
+:Type: vcpu ioctl
+:Parameters: struct kvm_vcpu_event (out)
+:Returns: 0 on success, -1 on error
X86:
+^^^^
Gets currently pending exceptions, interrupts, and NMIs as well as related
states of the vcpu.
-struct kvm_vcpu_events {
+::
+
+ struct kvm_vcpu_events {
struct {
__u8 injected;
__u8 nr;
@@ -951,7 +1045,7 @@
__u8 reserved[27];
__u8 exception_has_payload;
__u64 exception_payload;
-};
+ };
The following bits are defined in the flags field:
@@ -967,6 +1061,7 @@
KVM_CAP_EXCEPTION_PAYLOAD is enabled.
ARM/ARM64:
+^^^^^^^^^^
If the guest accesses a device that is being emulated by the host kernel in
such a way that a real device would generate a physical SError, KVM may make
@@ -1006,8 +1101,9 @@
KVM_SET_VCPU_EVENTS or otherwise) because such an exception is always delivered
directly to the virtual CPU).
+::
-struct kvm_vcpu_events {
+ struct kvm_vcpu_events {
struct {
__u8 serror_pending;
__u8 serror_has_esr;
@@ -1017,18 +1113,20 @@
__u64 serror_esr;
} exception;
__u32 reserved[12];
-};
+ };
4.32 KVM_SET_VCPU_EVENTS
+------------------------
-Capability: KVM_CAP_VCPU_EVENTS
-Extended by: KVM_CAP_INTR_SHADOW
-Architectures: x86, arm, arm64
-Type: vcpu ioctl
-Parameters: struct kvm_vcpu_event (in)
-Returns: 0 on success, -1 on error
+:Capability: KVM_CAP_VCPU_EVENTS
+:Extended by: KVM_CAP_INTR_SHADOW
+:Architectures: x86, arm, arm64
+:Type: vcpu ioctl
+:Parameters: struct kvm_vcpu_event (in)
+:Returns: 0 on success, -1 on error
X86:
+^^^^
Set pending exceptions, interrupts, and NMIs as well as related states of the
vcpu.
@@ -1040,9 +1138,11 @@
smi.pending. Keep the corresponding bits in the flags field cleared to
suppress overwriting the current in-kernel state. The bits are:
-KVM_VCPUEVENT_VALID_NMI_PENDING - transfer nmi.pending to the kernel
-KVM_VCPUEVENT_VALID_SIPI_VECTOR - transfer sipi_vector
-KVM_VCPUEVENT_VALID_SMM - transfer the smi sub-struct.
+=============================== ==================================
+KVM_VCPUEVENT_VALID_NMI_PENDING transfer nmi.pending to the kernel
+KVM_VCPUEVENT_VALID_SIPI_VECTOR transfer sipi_vector
+KVM_VCPUEVENT_VALID_SMM transfer the smi sub-struct.
+=============================== ==================================
If KVM_CAP_INTR_SHADOW is available, KVM_VCPUEVENT_VALID_SHADOW can be set in
the flags field to signal that interrupt.shadow contains a valid state and
@@ -1056,6 +1156,7 @@
contain a valid state and shall be written into the VCPU.
ARM/ARM64:
+^^^^^^^^^^
User space may need to inject several types of events to the guest.
@@ -1078,31 +1179,35 @@
4.33 KVM_GET_DEBUGREGS
+----------------------
-Capability: KVM_CAP_DEBUGREGS
-Architectures: x86
-Type: vm ioctl
-Parameters: struct kvm_debugregs (out)
-Returns: 0 on success, -1 on error
+:Capability: KVM_CAP_DEBUGREGS
+:Architectures: x86
+:Type: vm ioctl
+:Parameters: struct kvm_debugregs (out)
+:Returns: 0 on success, -1 on error
Reads debug registers from the vcpu.
-struct kvm_debugregs {
+::
+
+ struct kvm_debugregs {
__u64 db[4];
__u64 dr6;
__u64 dr7;
__u64 flags;
__u64 reserved[9];
-};
+ };
4.34 KVM_SET_DEBUGREGS
+----------------------
-Capability: KVM_CAP_DEBUGREGS
-Architectures: x86
-Type: vm ioctl
-Parameters: struct kvm_debugregs (in)
-Returns: 0 on success, -1 on error
+:Capability: KVM_CAP_DEBUGREGS
+:Architectures: x86
+:Type: vm ioctl
+:Parameters: struct kvm_debugregs (in)
+:Returns: 0 on success, -1 on error
Writes debug registers into the vcpu.
@@ -1111,24 +1216,27 @@
4.35 KVM_SET_USER_MEMORY_REGION
+-------------------------------
-Capability: KVM_CAP_USER_MEMORY
-Architectures: all
-Type: vm ioctl
-Parameters: struct kvm_userspace_memory_region (in)
-Returns: 0 on success, -1 on error
+:Capability: KVM_CAP_USER_MEMORY
+:Architectures: all
+:Type: vm ioctl
+:Parameters: struct kvm_userspace_memory_region (in)
+:Returns: 0 on success, -1 on error
-struct kvm_userspace_memory_region {
+::
+
+ struct kvm_userspace_memory_region {
__u32 slot;
__u32 flags;
__u64 guest_phys_addr;
__u64 memory_size; /* bytes */
__u64 userspace_addr; /* start of the userspace allocated memory */
-};
+ };
-/* for kvm_memory_region::flags */
-#define KVM_MEM_LOG_DIRTY_PAGES (1UL << 0)
-#define KVM_MEM_READONLY (1UL << 1)
+ /* for kvm_memory_region::flags */
+ #define KVM_MEM_LOG_DIRTY_PAGES (1UL << 0)
+ #define KVM_MEM_READONLY (1UL << 1)
This ioctl allows the user to create, modify or delete a guest physical
memory slot. Bits 0-15 of "slot" specify the slot id and this value
@@ -1174,12 +1282,13 @@
4.36 KVM_SET_TSS_ADDR
+---------------------
-Capability: KVM_CAP_SET_TSS_ADDR
-Architectures: x86
-Type: vm ioctl
-Parameters: unsigned long tss_address (in)
-Returns: 0 on success, -1 on error
+:Capability: KVM_CAP_SET_TSS_ADDR
+:Architectures: x86
+:Type: vm ioctl
+:Parameters: unsigned long tss_address (in)
+:Returns: 0 on success, -1 on error
This ioctl defines the physical address of a three-page region in the guest
physical address space. The region must be within the first 4GB of the
@@ -1193,21 +1302,24 @@
4.37 KVM_ENABLE_CAP
+-------------------
-Capability: KVM_CAP_ENABLE_CAP
-Architectures: mips, ppc, s390
-Type: vcpu ioctl
-Parameters: struct kvm_enable_cap (in)
-Returns: 0 on success; -1 on error
+:Capability: KVM_CAP_ENABLE_CAP
+:Architectures: mips, ppc, s390
+:Type: vcpu ioctl
+:Parameters: struct kvm_enable_cap (in)
+:Returns: 0 on success; -1 on error
-Capability: KVM_CAP_ENABLE_CAP_VM
-Architectures: all
-Type: vcpu ioctl
-Parameters: struct kvm_enable_cap (in)
-Returns: 0 on success; -1 on error
+:Capability: KVM_CAP_ENABLE_CAP_VM
+:Architectures: all
+:Type: vcpu ioctl
+:Parameters: struct kvm_enable_cap (in)
+:Returns: 0 on success; -1 on error
-+Not all extensions are enabled by default. Using this ioctl the application
-can enable an extension, making it available to the guest.
+.. note::
+
+ Not all extensions are enabled by default. Using this ioctl the application
+ can enable an extension, making it available to the guest.
On systems that do not support this ioctl, it always fails. On systems that
do support it, it only works for extensions that are supported for enablement.
@@ -1215,76 +1327,91 @@
To check if a capability can be enabled, the KVM_CHECK_EXTENSION ioctl should
be used.
-struct kvm_enable_cap {
+::
+
+ struct kvm_enable_cap {
/* in */
__u32 cap;
The capability that is supposed to get enabled.
+::
+
__u32 flags;
A bitfield indicating future enhancements. Has to be 0 for now.
+::
+
__u64 args[4];
Arguments for enabling a feature. If a feature needs initial values to
function properly, this is the place to put them.
+::
+
__u8 pad[64];
-};
+ };
The vcpu ioctl should be used for vcpu-specific capabilities, the vm ioctl
for vm-wide capabilities.
4.38 KVM_GET_MP_STATE
+---------------------
-Capability: KVM_CAP_MP_STATE
-Architectures: x86, s390, arm, arm64
-Type: vcpu ioctl
-Parameters: struct kvm_mp_state (out)
-Returns: 0 on success; -1 on error
+:Capability: KVM_CAP_MP_STATE
+:Architectures: x86, s390, arm, arm64
+:Type: vcpu ioctl
+:Parameters: struct kvm_mp_state (out)
+:Returns: 0 on success; -1 on error
-struct kvm_mp_state {
+::
+
+ struct kvm_mp_state {
__u32 mp_state;
-};
+ };
Returns the vcpu's current "multiprocessing state" (though also valid on
uniprocessor guests).
Possible values are:
- - KVM_MP_STATE_RUNNABLE: the vcpu is currently running [x86,arm/arm64]
- - KVM_MP_STATE_UNINITIALIZED: the vcpu is an application processor (AP)
+ ========================== ===============================================
+ KVM_MP_STATE_RUNNABLE the vcpu is currently running [x86,arm/arm64]
+ KVM_MP_STATE_UNINITIALIZED the vcpu is an application processor (AP)
which has not yet received an INIT signal [x86]
- - KVM_MP_STATE_INIT_RECEIVED: the vcpu has received an INIT signal, and is
+ KVM_MP_STATE_INIT_RECEIVED the vcpu has received an INIT signal, and is
now ready for a SIPI [x86]
- - KVM_MP_STATE_HALTED: the vcpu has executed a HLT instruction and
+ KVM_MP_STATE_HALTED the vcpu has executed a HLT instruction and
is waiting for an interrupt [x86]
- - KVM_MP_STATE_SIPI_RECEIVED: the vcpu has just received a SIPI (vector
+ KVM_MP_STATE_SIPI_RECEIVED the vcpu has just received a SIPI (vector
accessible via KVM_GET_VCPU_EVENTS) [x86]
- - KVM_MP_STATE_STOPPED: the vcpu is stopped [s390,arm/arm64]
- - KVM_MP_STATE_CHECK_STOP: the vcpu is in a special error state [s390]
- - KVM_MP_STATE_OPERATING: the vcpu is operating (running or halted)
+ KVM_MP_STATE_STOPPED the vcpu is stopped [s390,arm/arm64]
+ KVM_MP_STATE_CHECK_STOP the vcpu is in a special error state [s390]
+ KVM_MP_STATE_OPERATING the vcpu is operating (running or halted)
[s390]
- - KVM_MP_STATE_LOAD: the vcpu is in a special load/startup state
+ KVM_MP_STATE_LOAD the vcpu is in a special load/startup state
[s390]
+ ========================== ===============================================
On x86, this ioctl is only useful after KVM_CREATE_IRQCHIP. Without an
in-kernel irqchip, the multiprocessing state must be maintained by userspace on
these architectures.
For arm/arm64:
+^^^^^^^^^^^^^^
The only states that are valid are KVM_MP_STATE_STOPPED and
KVM_MP_STATE_RUNNABLE which reflect if the vcpu is paused or not.
4.39 KVM_SET_MP_STATE
+---------------------
-Capability: KVM_CAP_MP_STATE
-Architectures: x86, s390, arm, arm64
-Type: vcpu ioctl
-Parameters: struct kvm_mp_state (in)
-Returns: 0 on success; -1 on error
+:Capability: KVM_CAP_MP_STATE
+:Architectures: x86, s390, arm, arm64
+:Type: vcpu ioctl
+:Parameters: struct kvm_mp_state (in)
+:Returns: 0 on success; -1 on error
Sets the vcpu's current "multiprocessing state"; see KVM_GET_MP_STATE for
arguments.
@@ -1294,17 +1421,19 @@
these architectures.
For arm/arm64:
+^^^^^^^^^^^^^^
The only states that are valid are KVM_MP_STATE_STOPPED and
KVM_MP_STATE_RUNNABLE which reflect if the vcpu should be paused or not.
4.40 KVM_SET_IDENTITY_MAP_ADDR
+------------------------------
-Capability: KVM_CAP_SET_IDENTITY_MAP_ADDR
-Architectures: x86
-Type: vm ioctl
-Parameters: unsigned long identity (in)
-Returns: 0 on success, -1 on error
+:Capability: KVM_CAP_SET_IDENTITY_MAP_ADDR
+:Architectures: x86
+:Type: vm ioctl
+:Parameters: unsigned long identity (in)
+:Returns: 0 on success, -1 on error
This ioctl defines the physical address of a one-page region in the guest
physical address space. The region must be within the first 4GB of the
@@ -1322,12 +1451,13 @@
Fails if any VCPU has already been created.
4.41 KVM_SET_BOOT_CPU_ID
+------------------------
-Capability: KVM_CAP_SET_BOOT_CPU_ID
-Architectures: x86
-Type: vm ioctl
-Parameters: unsigned long vcpu_id
-Returns: 0 on success, -1 on error
+:Capability: KVM_CAP_SET_BOOT_CPU_ID
+:Architectures: x86
+:Type: vm ioctl
+:Parameters: unsigned long vcpu_id
+:Returns: 0 on success, -1 on error
Define which vcpu is the Bootstrap Processor (BSP). Values are the same
as the vcpu id in KVM_CREATE_VCPU. If this ioctl is not called, the default
@@ -1335,102 +1465,119 @@
4.42 KVM_GET_XSAVE
+------------------
-Capability: KVM_CAP_XSAVE
-Architectures: x86
-Type: vcpu ioctl
-Parameters: struct kvm_xsave (out)
-Returns: 0 on success, -1 on error
+:Capability: KVM_CAP_XSAVE
+:Architectures: x86
+:Type: vcpu ioctl
+:Parameters: struct kvm_xsave (out)
+:Returns: 0 on success, -1 on error
-struct kvm_xsave {
+
+::
+
+ struct kvm_xsave {
__u32 region[1024];
-};
+ };
This ioctl would copy current vcpu's xsave struct to the userspace.
4.43 KVM_SET_XSAVE
+------------------
-Capability: KVM_CAP_XSAVE
-Architectures: x86
-Type: vcpu ioctl
-Parameters: struct kvm_xsave (in)
-Returns: 0 on success, -1 on error
+:Capability: KVM_CAP_XSAVE
+:Architectures: x86
+:Type: vcpu ioctl
+:Parameters: struct kvm_xsave (in)
+:Returns: 0 on success, -1 on error
-struct kvm_xsave {
+::
+
+
+ struct kvm_xsave {
__u32 region[1024];
-};
+ };
This ioctl would copy userspace's xsave struct to the kernel.
4.44 KVM_GET_XCRS
+-----------------
-Capability: KVM_CAP_XCRS
-Architectures: x86
-Type: vcpu ioctl
-Parameters: struct kvm_xcrs (out)
-Returns: 0 on success, -1 on error
+:Capability: KVM_CAP_XCRS
+:Architectures: x86
+:Type: vcpu ioctl
+:Parameters: struct kvm_xcrs (out)
+:Returns: 0 on success, -1 on error
-struct kvm_xcr {
+::
+
+ struct kvm_xcr {
__u32 xcr;
__u32 reserved;
__u64 value;
-};
+ };
-struct kvm_xcrs {
+ struct kvm_xcrs {
__u32 nr_xcrs;
__u32 flags;
struct kvm_xcr xcrs[KVM_MAX_XCRS];
__u64 padding[16];
-};
+ };
This ioctl would copy current vcpu's xcrs to the userspace.
4.45 KVM_SET_XCRS
+-----------------
-Capability: KVM_CAP_XCRS
-Architectures: x86
-Type: vcpu ioctl
-Parameters: struct kvm_xcrs (in)
-Returns: 0 on success, -1 on error
+:Capability: KVM_CAP_XCRS
+:Architectures: x86
+:Type: vcpu ioctl
+:Parameters: struct kvm_xcrs (in)
+:Returns: 0 on success, -1 on error
-struct kvm_xcr {
+::
+
+ struct kvm_xcr {
__u32 xcr;
__u32 reserved;
__u64 value;
-};
+ };
-struct kvm_xcrs {
+ struct kvm_xcrs {
__u32 nr_xcrs;
__u32 flags;
struct kvm_xcr xcrs[KVM_MAX_XCRS];
__u64 padding[16];
-};
+ };
This ioctl would set vcpu's xcr to the value userspace specified.
4.46 KVM_GET_SUPPORTED_CPUID
+----------------------------
-Capability: KVM_CAP_EXT_CPUID
-Architectures: x86
-Type: system ioctl
-Parameters: struct kvm_cpuid2 (in/out)
-Returns: 0 on success, -1 on error
+:Capability: KVM_CAP_EXT_CPUID
+:Architectures: x86
+:Type: system ioctl
+:Parameters: struct kvm_cpuid2 (in/out)
+:Returns: 0 on success, -1 on error
-struct kvm_cpuid2 {
+::
+
+ struct kvm_cpuid2 {
__u32 nent;
__u32 padding;
struct kvm_cpuid_entry2 entries[0];
-};
+ };
-#define KVM_CPUID_FLAG_SIGNIFCANT_INDEX BIT(0)
-#define KVM_CPUID_FLAG_STATEFUL_FUNC BIT(1)
-#define KVM_CPUID_FLAG_STATE_READ_NEXT BIT(2)
+ #define KVM_CPUID_FLAG_SIGNIFCANT_INDEX BIT(0)
+ #define KVM_CPUID_FLAG_STATEFUL_FUNC BIT(1)
+ #define KVM_CPUID_FLAG_STATE_READ_NEXT BIT(2)
-struct kvm_cpuid_entry2 {
+ struct kvm_cpuid_entry2 {
__u32 function;
__u32 index;
__u32 flags;
@@ -1439,7 +1586,7 @@
__u32 ecx;
__u32 edx;
__u32 padding[3];
-};
+ };
This ioctl returns x86 cpuid features which are supported by both the
hardware and kvm in its default configuration. Userspace can use the
@@ -1467,10 +1614,16 @@
x2apic), may not be present in the host cpu, but are exposed by kvm if it can
emulate them efficiently. The fields in each entry are defined as follows:
- function: the eax value used to obtain the entry
- index: the ecx value used to obtain the entry (for entries that are
+ function:
+ the eax value used to obtain the entry
+
+ index:
+ the ecx value used to obtain the entry (for entries that are
affected by ecx)
- flags: an OR of zero or more of the following:
+
+ flags:
+ an OR of zero or more of the following:
+
KVM_CPUID_FLAG_SIGNIFCANT_INDEX:
if the index field is valid
KVM_CPUID_FLAG_STATEFUL_FUNC:
@@ -1480,12 +1633,14 @@
KVM_CPUID_FLAG_STATE_READ_NEXT:
for KVM_CPUID_FLAG_STATEFUL_FUNC entries, set if this entry is
the first entry to be read by a cpu
- eax, ebx, ecx, edx: the values returned by the cpuid instruction for
+
+ eax, ebx, ecx, edx:
+ the values returned by the cpuid instruction for
this function/index combination
The TSC deadline timer feature (CPUID leaf 1, ecx[24]) is always returned
as false, since the feature depends on KVM_CREATE_IRQCHIP for local APIC
-support. Instead it is reported via
+support. Instead it is reported via::
ioctl(KVM_CHECK_EXTENSION, KVM_CAP_TSC_DEADLINE_TIMER)
@@ -1494,18 +1649,21 @@
4.47 KVM_PPC_GET_PVINFO
+-----------------------
-Capability: KVM_CAP_PPC_GET_PVINFO
-Architectures: ppc
-Type: vm ioctl
-Parameters: struct kvm_ppc_pvinfo (out)
-Returns: 0 on success, !0 on error
+:Capability: KVM_CAP_PPC_GET_PVINFO
+:Architectures: ppc
+:Type: vm ioctl
+:Parameters: struct kvm_ppc_pvinfo (out)
+:Returns: 0 on success, !0 on error
-struct kvm_ppc_pvinfo {
+::
+
+ struct kvm_ppc_pvinfo {
__u32 flags;
__u32 hcall[4];
__u8 pad[108];
-};
+ };
This ioctl fetches PV specific information that need to be passed to the guest
using the device tree or other means from vm context.
@@ -1515,33 +1673,39 @@
If any additional field gets added to this structure later on, a bit for that
additional piece of information will be set in the flags bitmap.
-The flags bitmap is defined as:
+The flags bitmap is defined as::
/* the host supports the ePAPR idle hcall
#define KVM_PPC_PVINFO_FLAGS_EV_IDLE (1<<0)
4.52 KVM_SET_GSI_ROUTING
+------------------------
-Capability: KVM_CAP_IRQ_ROUTING
-Architectures: x86 s390 arm arm64
-Type: vm ioctl
-Parameters: struct kvm_irq_routing (in)
-Returns: 0 on success, -1 on error
+:Capability: KVM_CAP_IRQ_ROUTING
+:Architectures: x86 s390 arm arm64
+:Type: vm ioctl
+:Parameters: struct kvm_irq_routing (in)
+:Returns: 0 on success, -1 on error
Sets the GSI routing table entries, overwriting any previously set entries.
On arm/arm64, GSI routing has the following limitation:
+
- GSI routing does not apply to KVM_IRQ_LINE but only to KVM_IRQFD.
-struct kvm_irq_routing {
+::
+
+ struct kvm_irq_routing {
__u32 nr;
__u32 flags;
struct kvm_irq_routing_entry entries[0];
-};
+ };
No flags are specified so far, the corresponding field must be set to zero.
-struct kvm_irq_routing_entry {
+::
+
+ struct kvm_irq_routing_entry {
__u32 gsi;
__u32 type;
__u32 flags;
@@ -1553,15 +1717,16 @@
struct kvm_irq_routing_hv_sint hv_sint;
__u32 pad[8];
} u;
-};
+ };
-/* gsi routing entry types */
-#define KVM_IRQ_ROUTING_IRQCHIP 1
-#define KVM_IRQ_ROUTING_MSI 2
-#define KVM_IRQ_ROUTING_S390_ADAPTER 3
-#define KVM_IRQ_ROUTING_HV_SINT 4
+ /* gsi routing entry types */
+ #define KVM_IRQ_ROUTING_IRQCHIP 1
+ #define KVM_IRQ_ROUTING_MSI 2
+ #define KVM_IRQ_ROUTING_S390_ADAPTER 3
+ #define KVM_IRQ_ROUTING_HV_SINT 4
flags:
+
- KVM_MSI_VALID_DEVID: used along with KVM_IRQ_ROUTING_MSI routing entry
type, specifies that the devid field contains a valid value. The per-VM
KVM_CAP_MSI_DEVID capability advertises the requirement to provide
@@ -1569,12 +1734,14 @@
never set the KVM_MSI_VALID_DEVID flag as the ioctl might fail.
- zero otherwise
-struct kvm_irq_routing_irqchip {
+::
+
+ struct kvm_irq_routing_irqchip {
__u32 irqchip;
__u32 pin;
-};
+ };
-struct kvm_irq_routing_msi {
+ struct kvm_irq_routing_msi {
__u32 address_lo;
__u32 address_hi;
__u32 data;
@@ -1582,7 +1749,7 @@
__u32 pad;
__u32 devid;
};
-};
+ };
If KVM_MSI_VALID_DEVID is set, devid contains a unique device identifier
for the device that wrote the MSI message. For PCI, this is usually a
@@ -1593,39 +1760,43 @@
address_hi bits 31-8 provide bits 31-8 of the destination id. Bits 7-0 of
address_hi must be zero.
-struct kvm_irq_routing_s390_adapter {
+::
+
+ struct kvm_irq_routing_s390_adapter {
__u64 ind_addr;
__u64 summary_addr;
__u64 ind_offset;
__u32 summary_offset;
__u32 adapter_id;
-};
+ };
-struct kvm_irq_routing_hv_sint {
+ struct kvm_irq_routing_hv_sint {
__u32 vcpu;
__u32 sint;
-};
+ };
4.55 KVM_SET_TSC_KHZ
+--------------------
-Capability: KVM_CAP_TSC_CONTROL
-Architectures: x86
-Type: vcpu ioctl
-Parameters: virtual tsc_khz
-Returns: 0 on success, -1 on error
+:Capability: KVM_CAP_TSC_CONTROL
+:Architectures: x86
+:Type: vcpu ioctl
+:Parameters: virtual tsc_khz
+:Returns: 0 on success, -1 on error
Specifies the tsc frequency for the virtual machine. The unit of the
frequency is KHz.
4.56 KVM_GET_TSC_KHZ
+--------------------
-Capability: KVM_CAP_GET_TSC_KHZ
-Architectures: x86
-Type: vcpu ioctl
-Parameters: none
-Returns: virtual tsc-khz on success, negative value on error
+:Capability: KVM_CAP_GET_TSC_KHZ
+:Architectures: x86
+:Type: vcpu ioctl
+:Parameters: none
+:Returns: virtual tsc-khz on success, negative value on error
Returns the tsc frequency of the guest. The unit of the return value is
KHz. If the host has unstable tsc this ioctl returns -EIO instead as an
@@ -1633,17 +1804,20 @@
4.57 KVM_GET_LAPIC
+------------------
-Capability: KVM_CAP_IRQCHIP
-Architectures: x86
-Type: vcpu ioctl
-Parameters: struct kvm_lapic_state (out)
-Returns: 0 on success, -1 on error
+:Capability: KVM_CAP_IRQCHIP
+:Architectures: x86
+:Type: vcpu ioctl
+:Parameters: struct kvm_lapic_state (out)
+:Returns: 0 on success, -1 on error
-#define KVM_APIC_REG_SIZE 0x400
-struct kvm_lapic_state {
+::
+
+ #define KVM_APIC_REG_SIZE 0x400
+ struct kvm_lapic_state {
char regs[KVM_APIC_REG_SIZE];
-};
+ };
Reads the Local APIC registers and copies them into the input argument. The
data format and layout are the same as documented in the architecture manual.
@@ -1661,17 +1835,20 @@
4.58 KVM_SET_LAPIC
+------------------
-Capability: KVM_CAP_IRQCHIP
-Architectures: x86
-Type: vcpu ioctl
-Parameters: struct kvm_lapic_state (in)
-Returns: 0 on success, -1 on error
+:Capability: KVM_CAP_IRQCHIP
+:Architectures: x86
+:Type: vcpu ioctl
+:Parameters: struct kvm_lapic_state (in)
+:Returns: 0 on success, -1 on error
-#define KVM_APIC_REG_SIZE 0x400
-struct kvm_lapic_state {
+::
+
+ #define KVM_APIC_REG_SIZE 0x400
+ struct kvm_lapic_state {
char regs[KVM_APIC_REG_SIZE];
-};
+ };
Copies the input argument into the Local APIC registers. The data format
and layout are the same as documented in the architecture manual.
@@ -1682,35 +1859,38 @@
4.59 KVM_IOEVENTFD
+------------------
-Capability: KVM_CAP_IOEVENTFD
-Architectures: all
-Type: vm ioctl
-Parameters: struct kvm_ioeventfd (in)
-Returns: 0 on success, !0 on error
+:Capability: KVM_CAP_IOEVENTFD
+:Architectures: all
+:Type: vm ioctl
+:Parameters: struct kvm_ioeventfd (in)
+:Returns: 0 on success, !0 on error
This ioctl attaches or detaches an ioeventfd to a legal pio/mmio address
within the guest. A guest write in the registered address will signal the
provided event instead of triggering an exit.
-struct kvm_ioeventfd {
+::
+
+ struct kvm_ioeventfd {
__u64 datamatch;
__u64 addr; /* legal pio/mmio address */
__u32 len; /* 0, 1, 2, 4, or 8 bytes */
__s32 fd;
__u32 flags;
__u8 pad[36];
-};
+ };
For the special case of virtio-ccw devices on s390, the ioevent is matched
to a subchannel/virtqueue tuple instead.
-The following flags are defined:
+The following flags are defined::
-#define KVM_IOEVENTFD_FLAG_DATAMATCH (1 << kvm_ioeventfd_flag_nr_datamatch)
-#define KVM_IOEVENTFD_FLAG_PIO (1 << kvm_ioeventfd_flag_nr_pio)
-#define KVM_IOEVENTFD_FLAG_DEASSIGN (1 << kvm_ioeventfd_flag_nr_deassign)
-#define KVM_IOEVENTFD_FLAG_VIRTIO_CCW_NOTIFY \
+ #define KVM_IOEVENTFD_FLAG_DATAMATCH (1 << kvm_ioeventfd_flag_nr_datamatch)
+ #define KVM_IOEVENTFD_FLAG_PIO (1 << kvm_ioeventfd_flag_nr_pio)
+ #define KVM_IOEVENTFD_FLAG_DEASSIGN (1 << kvm_ioeventfd_flag_nr_deassign)
+ #define KVM_IOEVENTFD_FLAG_VIRTIO_CCW_NOTIFY \
(1 << kvm_ioeventfd_flag_nr_virtio_ccw_notify)
If datamatch flag is set, the event will be signaled only if the written value
@@ -1725,17 +1905,20 @@
work anyway.
4.60 KVM_DIRTY_TLB
+------------------
-Capability: KVM_CAP_SW_TLB
-Architectures: ppc
-Type: vcpu ioctl
-Parameters: struct kvm_dirty_tlb (in)
-Returns: 0 on success, -1 on error
+:Capability: KVM_CAP_SW_TLB
+:Architectures: ppc
+:Type: vcpu ioctl
+:Parameters: struct kvm_dirty_tlb (in)
+:Returns: 0 on success, -1 on error
-struct kvm_dirty_tlb {
+::
+
+ struct kvm_dirty_tlb {
__u64 bitmap;
__u32 num_dirty;
-};
+ };
This must be called whenever userspace has changed an entry in the shared
TLB, prior to calling KVM_RUN on the associated vcpu.
@@ -1758,23 +1941,26 @@
4.62 KVM_CREATE_SPAPR_TCE
+-------------------------
-Capability: KVM_CAP_SPAPR_TCE
-Architectures: powerpc
-Type: vm ioctl
-Parameters: struct kvm_create_spapr_tce (in)
-Returns: file descriptor for manipulating the created TCE table
+:Capability: KVM_CAP_SPAPR_TCE
+:Architectures: powerpc
+:Type: vm ioctl
+:Parameters: struct kvm_create_spapr_tce (in)
+:Returns: file descriptor for manipulating the created TCE table
This creates a virtual TCE (translation control entry) table, which
is an IOMMU for PAPR-style virtual I/O. It is used to translate
logical addresses used in virtual I/O into guest physical addresses,
and provides a scatter/gather capability for PAPR virtual I/O.
-/* for KVM_CAP_SPAPR_TCE */
-struct kvm_create_spapr_tce {
+::
+
+ /* for KVM_CAP_SPAPR_TCE */
+ struct kvm_create_spapr_tce {
__u64 liobn;
__u32 window_size;
-};
+ };
The liobn field gives the logical IO bus number for which to create a
TCE table. The window_size field specifies the size of the DMA window
@@ -1794,12 +1980,13 @@
4.63 KVM_ALLOCATE_RMA
+---------------------
-Capability: KVM_CAP_PPC_RMA
-Architectures: powerpc
-Type: vm ioctl
-Parameters: struct kvm_allocate_rma (out)
-Returns: file descriptor for mapping the allocated RMA
+:Capability: KVM_CAP_PPC_RMA
+:Architectures: powerpc
+:Type: vm ioctl
+:Parameters: struct kvm_allocate_rma (out)
+:Returns: file descriptor for mapping the allocated RMA
This allocates a Real Mode Area (RMA) from the pool allocated at boot
time by the kernel. An RMA is a physically-contiguous, aligned region
@@ -1808,10 +1995,12 @@
POWER processors support a set of sizes for the RMA that usually
includes 64MB, 128MB, 256MB and some larger powers of two.
-/* for KVM_ALLOCATE_RMA */
-struct kvm_allocate_rma {
+::
+
+ /* for KVM_ALLOCATE_RMA */
+ struct kvm_allocate_rma {
__u64 rma_size;
-};
+ };
The return value is a file descriptor which can be passed to mmap(2)
to map the allocated RMA into userspace. The mapped area can then be
@@ -1827,12 +2016,13 @@
4.64 KVM_NMI
+------------
-Capability: KVM_CAP_USER_NMI
-Architectures: x86
-Type: vcpu ioctl
-Parameters: none
-Returns: 0 on success, -1 on error
+:Capability: KVM_CAP_USER_NMI
+:Architectures: x86
+:Type: vcpu ioctl
+:Parameters: none
+:Returns: 0 on success, -1 on error
Queues an NMI on the thread's vcpu. Note this is well defined only
when KVM_CREATE_IRQCHIP has not been called, since this is an interface
@@ -1853,14 +2043,16 @@
4.65 KVM_S390_UCAS_MAP
+----------------------
-Capability: KVM_CAP_S390_UCONTROL
-Architectures: s390
-Type: vcpu ioctl
-Parameters: struct kvm_s390_ucas_mapping (in)
-Returns: 0 in case of success
+:Capability: KVM_CAP_S390_UCONTROL
+:Architectures: s390
+:Type: vcpu ioctl
+:Parameters: struct kvm_s390_ucas_mapping (in)
+:Returns: 0 in case of success
-The parameter is defined like this:
+The parameter is defined like this::
+
struct kvm_s390_ucas_mapping {
__u64 user_addr;
__u64 vcpu_addr;
@@ -1873,14 +2065,16 @@
4.66 KVM_S390_UCAS_UNMAP
+------------------------
-Capability: KVM_CAP_S390_UCONTROL
-Architectures: s390
-Type: vcpu ioctl
-Parameters: struct kvm_s390_ucas_mapping (in)
-Returns: 0 in case of success
+:Capability: KVM_CAP_S390_UCONTROL
+:Architectures: s390
+:Type: vcpu ioctl
+:Parameters: struct kvm_s390_ucas_mapping (in)
+:Returns: 0 in case of success
-The parameter is defined like this:
+The parameter is defined like this::
+
struct kvm_s390_ucas_mapping {
__u64 user_addr;
__u64 vcpu_addr;
@@ -1893,12 +2087,13 @@
4.67 KVM_S390_VCPU_FAULT
+------------------------
-Capability: KVM_CAP_S390_UCONTROL
-Architectures: s390
-Type: vcpu ioctl
-Parameters: vcpu absolute address (in)
-Returns: 0 in case of success
+:Capability: KVM_CAP_S390_UCONTROL
+:Architectures: s390
+:Type: vcpu ioctl
+:Parameters: vcpu absolute address (in)
+:Returns: 0 in case of success
This call creates a page table entry on the virtual cpu's address space
(for user controlled virtual machines) or the virtual machine's address
@@ -1910,23 +2105,31 @@
4.68 KVM_SET_ONE_REG
+--------------------
-Capability: KVM_CAP_ONE_REG
-Architectures: all
-Type: vcpu ioctl
-Parameters: struct kvm_one_reg (in)
-Returns: 0 on success, negative value on failure
+:Capability: KVM_CAP_ONE_REG
+:Architectures: all
+:Type: vcpu ioctl
+:Parameters: struct kvm_one_reg (in)
+:Returns: 0 on success, negative value on failure
+
Errors:
- ENOENT: no such register
- EINVAL: invalid register ID, or no such register
- EPERM: (arm64) register access not allowed before vcpu finalization
+
+ ====== ============================================================
+ ENOENT no such register
+ EINVAL invalid register ID, or no such register
+ EPERM (arm64) register access not allowed before vcpu finalization
+ ====== ============================================================
+
(These error codes are indicative only: do not rely on a specific error
code being returned in a specific situation.)
-struct kvm_one_reg {
+::
+
+ struct kvm_one_reg {
__u64 id;
__u64 addr;
-};
+ };
Using this ioctl, a single vcpu register can be set to a specific value
defined by user space with the passed in struct kvm_one_reg, where id
@@ -1936,217 +2139,226 @@
and their own constants and width. To keep track of the implemented
registers, find a list below:
- Arch | Register | Width (bits)
- | |
- PPC | KVM_REG_PPC_HIOR | 64
- PPC | KVM_REG_PPC_IAC1 | 64
- PPC | KVM_REG_PPC_IAC2 | 64
- PPC | KVM_REG_PPC_IAC3 | 64
- PPC | KVM_REG_PPC_IAC4 | 64
- PPC | KVM_REG_PPC_DAC1 | 64
- PPC | KVM_REG_PPC_DAC2 | 64
- PPC | KVM_REG_PPC_DABR | 64
- PPC | KVM_REG_PPC_DSCR | 64
- PPC | KVM_REG_PPC_PURR | 64
- PPC | KVM_REG_PPC_SPURR | 64
- PPC | KVM_REG_PPC_DAR | 64
- PPC | KVM_REG_PPC_DSISR | 32
- PPC | KVM_REG_PPC_AMR | 64
- PPC | KVM_REG_PPC_UAMOR | 64
- PPC | KVM_REG_PPC_MMCR0 | 64
- PPC | KVM_REG_PPC_MMCR1 | 64
- PPC | KVM_REG_PPC_MMCRA | 64
- PPC | KVM_REG_PPC_MMCR2 | 64
- PPC | KVM_REG_PPC_MMCRS | 64
- PPC | KVM_REG_PPC_SIAR | 64
- PPC | KVM_REG_PPC_SDAR | 64
- PPC | KVM_REG_PPC_SIER | 64
- PPC | KVM_REG_PPC_PMC1 | 32
- PPC | KVM_REG_PPC_PMC2 | 32
- PPC | KVM_REG_PPC_PMC3 | 32
- PPC | KVM_REG_PPC_PMC4 | 32
- PPC | KVM_REG_PPC_PMC5 | 32
- PPC | KVM_REG_PPC_PMC6 | 32
- PPC | KVM_REG_PPC_PMC7 | 32
- PPC | KVM_REG_PPC_PMC8 | 32
- PPC | KVM_REG_PPC_FPR0 | 64
- ...
- PPC | KVM_REG_PPC_FPR31 | 64
- PPC | KVM_REG_PPC_VR0 | 128
- ...
- PPC | KVM_REG_PPC_VR31 | 128
- PPC | KVM_REG_PPC_VSR0 | 128
- ...
- PPC | KVM_REG_PPC_VSR31 | 128
- PPC | KVM_REG_PPC_FPSCR | 64
- PPC | KVM_REG_PPC_VSCR | 32
- PPC | KVM_REG_PPC_VPA_ADDR | 64
- PPC | KVM_REG_PPC_VPA_SLB | 128
- PPC | KVM_REG_PPC_VPA_DTL | 128
- PPC | KVM_REG_PPC_EPCR | 32
- PPC | KVM_REG_PPC_EPR | 32
- PPC | KVM_REG_PPC_TCR | 32
- PPC | KVM_REG_PPC_TSR | 32
- PPC | KVM_REG_PPC_OR_TSR | 32
- PPC | KVM_REG_PPC_CLEAR_TSR | 32
- PPC | KVM_REG_PPC_MAS0 | 32
- PPC | KVM_REG_PPC_MAS1 | 32
- PPC | KVM_REG_PPC_MAS2 | 64
- PPC | KVM_REG_PPC_MAS7_3 | 64
- PPC | KVM_REG_PPC_MAS4 | 32
- PPC | KVM_REG_PPC_MAS6 | 32
- PPC | KVM_REG_PPC_MMUCFG | 32
- PPC | KVM_REG_PPC_TLB0CFG | 32
- PPC | KVM_REG_PPC_TLB1CFG | 32
- PPC | KVM_REG_PPC_TLB2CFG | 32
- PPC | KVM_REG_PPC_TLB3CFG | 32
- PPC | KVM_REG_PPC_TLB0PS | 32
- PPC | KVM_REG_PPC_TLB1PS | 32
- PPC | KVM_REG_PPC_TLB2PS | 32
- PPC | KVM_REG_PPC_TLB3PS | 32
- PPC | KVM_REG_PPC_EPTCFG | 32
- PPC | KVM_REG_PPC_ICP_STATE | 64
- PPC | KVM_REG_PPC_VP_STATE | 128
- PPC | KVM_REG_PPC_TB_OFFSET | 64
- PPC | KVM_REG_PPC_SPMC1 | 32
- PPC | KVM_REG_PPC_SPMC2 | 32
- PPC | KVM_REG_PPC_IAMR | 64
- PPC | KVM_REG_PPC_TFHAR | 64
- PPC | KVM_REG_PPC_TFIAR | 64
- PPC | KVM_REG_PPC_TEXASR | 64
- PPC | KVM_REG_PPC_FSCR | 64
- PPC | KVM_REG_PPC_PSPB | 32
- PPC | KVM_REG_PPC_EBBHR | 64
- PPC | KVM_REG_PPC_EBBRR | 64
- PPC | KVM_REG_PPC_BESCR | 64
- PPC | KVM_REG_PPC_TAR | 64
- PPC | KVM_REG_PPC_DPDES | 64
- PPC | KVM_REG_PPC_DAWR | 64
- PPC | KVM_REG_PPC_DAWRX | 64
- PPC | KVM_REG_PPC_CIABR | 64
- PPC | KVM_REG_PPC_IC | 64
- PPC | KVM_REG_PPC_VTB | 64
- PPC | KVM_REG_PPC_CSIGR | 64
- PPC | KVM_REG_PPC_TACR | 64
- PPC | KVM_REG_PPC_TCSCR | 64
- PPC | KVM_REG_PPC_PID | 64
- PPC | KVM_REG_PPC_ACOP | 64
- PPC | KVM_REG_PPC_VRSAVE | 32
- PPC | KVM_REG_PPC_LPCR | 32
- PPC | KVM_REG_PPC_LPCR_64 | 64
- PPC | KVM_REG_PPC_PPR | 64
- PPC | KVM_REG_PPC_ARCH_COMPAT | 32
- PPC | KVM_REG_PPC_DABRX | 32
- PPC | KVM_REG_PPC_WORT | 64
- PPC | KVM_REG_PPC_SPRG9 | 64
- PPC | KVM_REG_PPC_DBSR | 32
- PPC | KVM_REG_PPC_TIDR | 64
- PPC | KVM_REG_PPC_PSSCR | 64
- PPC | KVM_REG_PPC_DEC_EXPIRY | 64
- PPC | KVM_REG_PPC_PTCR | 64
- PPC | KVM_REG_PPC_TM_GPR0 | 64
- ...
- PPC | KVM_REG_PPC_TM_GPR31 | 64
- PPC | KVM_REG_PPC_TM_VSR0 | 128
- ...
- PPC | KVM_REG_PPC_TM_VSR63 | 128
- PPC | KVM_REG_PPC_TM_CR | 64
- PPC | KVM_REG_PPC_TM_LR | 64
- PPC | KVM_REG_PPC_TM_CTR | 64
- PPC | KVM_REG_PPC_TM_FPSCR | 64
- PPC | KVM_REG_PPC_TM_AMR | 64
- PPC | KVM_REG_PPC_TM_PPR | 64
- PPC | KVM_REG_PPC_TM_VRSAVE | 64
- PPC | KVM_REG_PPC_TM_VSCR | 32
- PPC | KVM_REG_PPC_TM_DSCR | 64
- PPC | KVM_REG_PPC_TM_TAR | 64
- PPC | KVM_REG_PPC_TM_XER | 64
- | |
- MIPS | KVM_REG_MIPS_R0 | 64
- ...
- MIPS | KVM_REG_MIPS_R31 | 64
- MIPS | KVM_REG_MIPS_HI | 64
- MIPS | KVM_REG_MIPS_LO | 64
- MIPS | KVM_REG_MIPS_PC | 64
- MIPS | KVM_REG_MIPS_CP0_INDEX | 32
- MIPS | KVM_REG_MIPS_CP0_ENTRYLO0 | 64
- MIPS | KVM_REG_MIPS_CP0_ENTRYLO1 | 64
- MIPS | KVM_REG_MIPS_CP0_CONTEXT | 64
- MIPS | KVM_REG_MIPS_CP0_CONTEXTCONFIG| 32
- MIPS | KVM_REG_MIPS_CP0_USERLOCAL | 64
- MIPS | KVM_REG_MIPS_CP0_XCONTEXTCONFIG| 64
- MIPS | KVM_REG_MIPS_CP0_PAGEMASK | 32
- MIPS | KVM_REG_MIPS_CP0_PAGEGRAIN | 32
- MIPS | KVM_REG_MIPS_CP0_SEGCTL0 | 64
- MIPS | KVM_REG_MIPS_CP0_SEGCTL1 | 64
- MIPS | KVM_REG_MIPS_CP0_SEGCTL2 | 64
- MIPS | KVM_REG_MIPS_CP0_PWBASE | 64
- MIPS | KVM_REG_MIPS_CP0_PWFIELD | 64
- MIPS | KVM_REG_MIPS_CP0_PWSIZE | 64
- MIPS | KVM_REG_MIPS_CP0_WIRED | 32
- MIPS | KVM_REG_MIPS_CP0_PWCTL | 32
- MIPS | KVM_REG_MIPS_CP0_HWRENA | 32
- MIPS | KVM_REG_MIPS_CP0_BADVADDR | 64
- MIPS | KVM_REG_MIPS_CP0_BADINSTR | 32
- MIPS | KVM_REG_MIPS_CP0_BADINSTRP | 32
- MIPS | KVM_REG_MIPS_CP0_COUNT | 32
- MIPS | KVM_REG_MIPS_CP0_ENTRYHI | 64
- MIPS | KVM_REG_MIPS_CP0_COMPARE | 32
- MIPS | KVM_REG_MIPS_CP0_STATUS | 32
- MIPS | KVM_REG_MIPS_CP0_INTCTL | 32
- MIPS | KVM_REG_MIPS_CP0_CAUSE | 32
- MIPS | KVM_REG_MIPS_CP0_EPC | 64
- MIPS | KVM_REG_MIPS_CP0_PRID | 32
- MIPS | KVM_REG_MIPS_CP0_EBASE | 64
- MIPS | KVM_REG_MIPS_CP0_CONFIG | 32
- MIPS | KVM_REG_MIPS_CP0_CONFIG1 | 32
- MIPS | KVM_REG_MIPS_CP0_CONFIG2 | 32
- MIPS | KVM_REG_MIPS_CP0_CONFIG3 | 32
- MIPS | KVM_REG_MIPS_CP0_CONFIG4 | 32
- MIPS | KVM_REG_MIPS_CP0_CONFIG5 | 32
- MIPS | KVM_REG_MIPS_CP0_CONFIG7 | 32
- MIPS | KVM_REG_MIPS_CP0_XCONTEXT | 64
- MIPS | KVM_REG_MIPS_CP0_ERROREPC | 64
- MIPS | KVM_REG_MIPS_CP0_KSCRATCH1 | 64
- MIPS | KVM_REG_MIPS_CP0_KSCRATCH2 | 64
- MIPS | KVM_REG_MIPS_CP0_KSCRATCH3 | 64
- MIPS | KVM_REG_MIPS_CP0_KSCRATCH4 | 64
- MIPS | KVM_REG_MIPS_CP0_KSCRATCH5 | 64
- MIPS | KVM_REG_MIPS_CP0_KSCRATCH6 | 64
- MIPS | KVM_REG_MIPS_CP0_MAAR(0..63) | 64
- MIPS | KVM_REG_MIPS_COUNT_CTL | 64
- MIPS | KVM_REG_MIPS_COUNT_RESUME | 64
- MIPS | KVM_REG_MIPS_COUNT_HZ | 64
- MIPS | KVM_REG_MIPS_FPR_32(0..31) | 32
- MIPS | KVM_REG_MIPS_FPR_64(0..31) | 64
- MIPS | KVM_REG_MIPS_VEC_128(0..31) | 128
- MIPS | KVM_REG_MIPS_FCR_IR | 32
- MIPS | KVM_REG_MIPS_FCR_CSR | 32
- MIPS | KVM_REG_MIPS_MSA_IR | 32
- MIPS | KVM_REG_MIPS_MSA_CSR | 32
+ ======= =============================== ============
+ Arch Register Width (bits)
+ ======= =============================== ============
+ PPC KVM_REG_PPC_HIOR 64
+ PPC KVM_REG_PPC_IAC1 64
+ PPC KVM_REG_PPC_IAC2 64
+ PPC KVM_REG_PPC_IAC3 64
+ PPC KVM_REG_PPC_IAC4 64
+ PPC KVM_REG_PPC_DAC1 64
+ PPC KVM_REG_PPC_DAC2 64
+ PPC KVM_REG_PPC_DABR 64
+ PPC KVM_REG_PPC_DSCR 64
+ PPC KVM_REG_PPC_PURR 64
+ PPC KVM_REG_PPC_SPURR 64
+ PPC KVM_REG_PPC_DAR 64
+ PPC KVM_REG_PPC_DSISR 32
+ PPC KVM_REG_PPC_AMR 64
+ PPC KVM_REG_PPC_UAMOR 64
+ PPC KVM_REG_PPC_MMCR0 64
+ PPC KVM_REG_PPC_MMCR1 64
+ PPC KVM_REG_PPC_MMCRA 64
+ PPC KVM_REG_PPC_MMCR2 64
+ PPC KVM_REG_PPC_MMCRS 64
+ PPC KVM_REG_PPC_SIAR 64
+ PPC KVM_REG_PPC_SDAR 64
+ PPC KVM_REG_PPC_SIER 64
+ PPC KVM_REG_PPC_PMC1 32
+ PPC KVM_REG_PPC_PMC2 32
+ PPC KVM_REG_PPC_PMC3 32
+ PPC KVM_REG_PPC_PMC4 32
+ PPC KVM_REG_PPC_PMC5 32
+ PPC KVM_REG_PPC_PMC6 32
+ PPC KVM_REG_PPC_PMC7 32
+ PPC KVM_REG_PPC_PMC8 32
+ PPC KVM_REG_PPC_FPR0 64
+ ...
+ PPC KVM_REG_PPC_FPR31 64
+ PPC KVM_REG_PPC_VR0 128
+ ...
+ PPC KVM_REG_PPC_VR31 128
+ PPC KVM_REG_PPC_VSR0 128
+ ...
+ PPC KVM_REG_PPC_VSR31 128
+ PPC KVM_REG_PPC_FPSCR 64
+ PPC KVM_REG_PPC_VSCR 32
+ PPC KVM_REG_PPC_VPA_ADDR 64
+ PPC KVM_REG_PPC_VPA_SLB 128
+ PPC KVM_REG_PPC_VPA_DTL 128
+ PPC KVM_REG_PPC_EPCR 32
+ PPC KVM_REG_PPC_EPR 32
+ PPC KVM_REG_PPC_TCR 32
+ PPC KVM_REG_PPC_TSR 32
+ PPC KVM_REG_PPC_OR_TSR 32
+ PPC KVM_REG_PPC_CLEAR_TSR 32
+ PPC KVM_REG_PPC_MAS0 32
+ PPC KVM_REG_PPC_MAS1 32
+ PPC KVM_REG_PPC_MAS2 64
+ PPC KVM_REG_PPC_MAS7_3 64
+ PPC KVM_REG_PPC_MAS4 32
+ PPC KVM_REG_PPC_MAS6 32
+ PPC KVM_REG_PPC_MMUCFG 32
+ PPC KVM_REG_PPC_TLB0CFG 32
+ PPC KVM_REG_PPC_TLB1CFG 32
+ PPC KVM_REG_PPC_TLB2CFG 32
+ PPC KVM_REG_PPC_TLB3CFG 32
+ PPC KVM_REG_PPC_TLB0PS 32
+ PPC KVM_REG_PPC_TLB1PS 32
+ PPC KVM_REG_PPC_TLB2PS 32
+ PPC KVM_REG_PPC_TLB3PS 32
+ PPC KVM_REG_PPC_EPTCFG 32
+ PPC KVM_REG_PPC_ICP_STATE 64
+ PPC KVM_REG_PPC_VP_STATE 128
+ PPC KVM_REG_PPC_TB_OFFSET 64
+ PPC KVM_REG_PPC_SPMC1 32
+ PPC KVM_REG_PPC_SPMC2 32
+ PPC KVM_REG_PPC_IAMR 64
+ PPC KVM_REG_PPC_TFHAR 64
+ PPC KVM_REG_PPC_TFIAR 64
+ PPC KVM_REG_PPC_TEXASR 64
+ PPC KVM_REG_PPC_FSCR 64
+ PPC KVM_REG_PPC_PSPB 32
+ PPC KVM_REG_PPC_EBBHR 64
+ PPC KVM_REG_PPC_EBBRR 64
+ PPC KVM_REG_PPC_BESCR 64
+ PPC KVM_REG_PPC_TAR 64
+ PPC KVM_REG_PPC_DPDES 64
+ PPC KVM_REG_PPC_DAWR 64
+ PPC KVM_REG_PPC_DAWRX 64
+ PPC KVM_REG_PPC_CIABR 64
+ PPC KVM_REG_PPC_IC 64
+ PPC KVM_REG_PPC_VTB 64
+ PPC KVM_REG_PPC_CSIGR 64
+ PPC KVM_REG_PPC_TACR 64
+ PPC KVM_REG_PPC_TCSCR 64
+ PPC KVM_REG_PPC_PID 64
+ PPC KVM_REG_PPC_ACOP 64
+ PPC KVM_REG_PPC_VRSAVE 32
+ PPC KVM_REG_PPC_LPCR 32
+ PPC KVM_REG_PPC_LPCR_64 64
+ PPC KVM_REG_PPC_PPR 64
+ PPC KVM_REG_PPC_ARCH_COMPAT 32
+ PPC KVM_REG_PPC_DABRX 32
+ PPC KVM_REG_PPC_WORT 64
+ PPC KVM_REG_PPC_SPRG9 64
+ PPC KVM_REG_PPC_DBSR 32
+ PPC KVM_REG_PPC_TIDR 64
+ PPC KVM_REG_PPC_PSSCR 64
+ PPC KVM_REG_PPC_DEC_EXPIRY 64
+ PPC KVM_REG_PPC_PTCR 64
+ PPC KVM_REG_PPC_TM_GPR0 64
+ ...
+ PPC KVM_REG_PPC_TM_GPR31 64
+ PPC KVM_REG_PPC_TM_VSR0 128
+ ...
+ PPC KVM_REG_PPC_TM_VSR63 128
+ PPC KVM_REG_PPC_TM_CR 64
+ PPC KVM_REG_PPC_TM_LR 64
+ PPC KVM_REG_PPC_TM_CTR 64
+ PPC KVM_REG_PPC_TM_FPSCR 64
+ PPC KVM_REG_PPC_TM_AMR 64
+ PPC KVM_REG_PPC_TM_PPR 64
+ PPC KVM_REG_PPC_TM_VRSAVE 64
+ PPC KVM_REG_PPC_TM_VSCR 32
+ PPC KVM_REG_PPC_TM_DSCR 64
+ PPC KVM_REG_PPC_TM_TAR 64
+ PPC KVM_REG_PPC_TM_XER 64
+
+ MIPS KVM_REG_MIPS_R0 64
+ ...
+ MIPS KVM_REG_MIPS_R31 64
+ MIPS KVM_REG_MIPS_HI 64
+ MIPS KVM_REG_MIPS_LO 64
+ MIPS KVM_REG_MIPS_PC 64
+ MIPS KVM_REG_MIPS_CP0_INDEX 32
+ MIPS KVM_REG_MIPS_CP0_ENTRYLO0 64
+ MIPS KVM_REG_MIPS_CP0_ENTRYLO1 64
+ MIPS KVM_REG_MIPS_CP0_CONTEXT 64
+ MIPS KVM_REG_MIPS_CP0_CONTEXTCONFIG 32
+ MIPS KVM_REG_MIPS_CP0_USERLOCAL 64
+ MIPS KVM_REG_MIPS_CP0_XCONTEXTCONFIG 64
+ MIPS KVM_REG_MIPS_CP0_PAGEMASK 32
+ MIPS KVM_REG_MIPS_CP0_PAGEGRAIN 32
+ MIPS KVM_REG_MIPS_CP0_SEGCTL0 64
+ MIPS KVM_REG_MIPS_CP0_SEGCTL1 64
+ MIPS KVM_REG_MIPS_CP0_SEGCTL2 64
+ MIPS KVM_REG_MIPS_CP0_PWBASE 64
+ MIPS KVM_REG_MIPS_CP0_PWFIELD 64
+ MIPS KVM_REG_MIPS_CP0_PWSIZE 64
+ MIPS KVM_REG_MIPS_CP0_WIRED 32
+ MIPS KVM_REG_MIPS_CP0_PWCTL 32
+ MIPS KVM_REG_MIPS_CP0_HWRENA 32
+ MIPS KVM_REG_MIPS_CP0_BADVADDR 64
+ MIPS KVM_REG_MIPS_CP0_BADINSTR 32
+ MIPS KVM_REG_MIPS_CP0_BADINSTRP 32
+ MIPS KVM_REG_MIPS_CP0_COUNT 32
+ MIPS KVM_REG_MIPS_CP0_ENTRYHI 64
+ MIPS KVM_REG_MIPS_CP0_COMPARE 32
+ MIPS KVM_REG_MIPS_CP0_STATUS 32
+ MIPS KVM_REG_MIPS_CP0_INTCTL 32
+ MIPS KVM_REG_MIPS_CP0_CAUSE 32
+ MIPS KVM_REG_MIPS_CP0_EPC 64
+ MIPS KVM_REG_MIPS_CP0_PRID 32
+ MIPS KVM_REG_MIPS_CP0_EBASE 64
+ MIPS KVM_REG_MIPS_CP0_CONFIG 32
+ MIPS KVM_REG_MIPS_CP0_CONFIG1 32
+ MIPS KVM_REG_MIPS_CP0_CONFIG2 32
+ MIPS KVM_REG_MIPS_CP0_CONFIG3 32
+ MIPS KVM_REG_MIPS_CP0_CONFIG4 32
+ MIPS KVM_REG_MIPS_CP0_CONFIG5 32
+ MIPS KVM_REG_MIPS_CP0_CONFIG7 32
+ MIPS KVM_REG_MIPS_CP0_XCONTEXT 64
+ MIPS KVM_REG_MIPS_CP0_ERROREPC 64
+ MIPS KVM_REG_MIPS_CP0_KSCRATCH1 64
+ MIPS KVM_REG_MIPS_CP0_KSCRATCH2 64
+ MIPS KVM_REG_MIPS_CP0_KSCRATCH3 64
+ MIPS KVM_REG_MIPS_CP0_KSCRATCH4 64
+ MIPS KVM_REG_MIPS_CP0_KSCRATCH5 64
+ MIPS KVM_REG_MIPS_CP0_KSCRATCH6 64
+ MIPS KVM_REG_MIPS_CP0_MAAR(0..63) 64
+ MIPS KVM_REG_MIPS_COUNT_CTL 64
+ MIPS KVM_REG_MIPS_COUNT_RESUME 64
+ MIPS KVM_REG_MIPS_COUNT_HZ 64
+ MIPS KVM_REG_MIPS_FPR_32(0..31) 32
+ MIPS KVM_REG_MIPS_FPR_64(0..31) 64
+ MIPS KVM_REG_MIPS_VEC_128(0..31) 128
+ MIPS KVM_REG_MIPS_FCR_IR 32
+ MIPS KVM_REG_MIPS_FCR_CSR 32
+ MIPS KVM_REG_MIPS_MSA_IR 32
+ MIPS KVM_REG_MIPS_MSA_CSR 32
+ ======= =============================== ============
ARM registers are mapped using the lower 32 bits. The upper 16 of that
is the register group type, or coprocessor number:
-ARM core registers have the following id bit patterns:
+ARM core registers have the following id bit patterns::
+
0x4020 0000 0010 <index into the kvm_regs struct:16>
-ARM 32-bit CP15 registers have the following id bit patterns:
+ARM 32-bit CP15 registers have the following id bit patterns::
+
0x4020 0000 000F <zero:1> <crn:4> <crm:4> <opc1:4> <opc2:3>
-ARM 64-bit CP15 registers have the following id bit patterns:
+ARM 64-bit CP15 registers have the following id bit patterns::
+
0x4030 0000 000F <zero:1> <zero:4> <crm:4> <opc1:4> <zero:3>
-ARM CCSIDR registers are demultiplexed by CSSELR value:
+ARM CCSIDR registers are demultiplexed by CSSELR value::
+
0x4020 0000 0011 00 <csselr:8>
-ARM 32-bit VFP control registers have the following id bit patterns:
+ARM 32-bit VFP control registers have the following id bit patterns::
+
0x4020 0000 0012 1 <regno:12>
-ARM 64-bit FP registers have the following id bit patterns:
+ARM 64-bit FP registers have the following id bit patterns::
+
0x4030 0000 0012 0 <regno:12>
-ARM firmware pseudo-registers have the following bit pattern:
+ARM firmware pseudo-registers have the following bit pattern::
+
0x4030 0000 0014 <regno:16>
@@ -2156,15 +2368,18 @@
arm64 core/FP-SIMD registers have the following id bit patterns. Note
that the size of the access is variable, as the kvm_regs structure
contains elements ranging from 32 to 128 bits. The index is a 32bit
-value in the kvm_regs structure seen as a 32bit array.
+value in the kvm_regs structure seen as a 32bit array::
+
0x60x0 0000 0010 <index into the kvm_regs struct:16>
Specifically:
+
+======================= ========= ===== =======================================
Encoding Register Bits kvm_regs member
-----------------------------------------------------------------
+======================= ========= ===== =======================================
0x6030 0000 0010 0000 X0 64 regs.regs[0]
0x6030 0000 0010 0002 X1 64 regs.regs[1]
- ...
+ ...
0x6030 0000 0010 003c X30 64 regs.regs[30]
0x6030 0000 0010 003e SP 64 regs.sp
0x6030 0000 0010 0040 PC 64 regs.pc
@@ -2176,27 +2391,31 @@
0x6030 0000 0010 004c SPSR_UND 64 spsr[KVM_SPSR_UND]
0x6030 0000 0010 004e SPSR_IRQ 64 spsr[KVM_SPSR_IRQ]
0x6060 0000 0010 0050 SPSR_FIQ 64 spsr[KVM_SPSR_FIQ]
- 0x6040 0000 0010 0054 V0 128 fp_regs.vregs[0] (*)
- 0x6040 0000 0010 0058 V1 128 fp_regs.vregs[1] (*)
- ...
- 0x6040 0000 0010 00d0 V31 128 fp_regs.vregs[31] (*)
+ 0x6040 0000 0010 0054 V0 128 fp_regs.vregs[0] [1]_
+ 0x6040 0000 0010 0058 V1 128 fp_regs.vregs[1] [1]_
+ ...
+ 0x6040 0000 0010 00d0 V31 128 fp_regs.vregs[31] [1]_
0x6020 0000 0010 00d4 FPSR 32 fp_regs.fpsr
0x6020 0000 0010 00d5 FPCR 32 fp_regs.fpcr
+======================= ========= ===== =======================================
-(*) These encodings are not accepted for SVE-enabled vcpus. See
- KVM_ARM_VCPU_INIT.
+.. [1] These encodings are not accepted for SVE-enabled vcpus. See
+ KVM_ARM_VCPU_INIT.
- The equivalent register content can be accessed via bits [127:0] of
- the corresponding SVE Zn registers instead for vcpus that have SVE
- enabled (see below).
+ The equivalent register content can be accessed via bits [127:0] of
+ the corresponding SVE Zn registers instead for vcpus that have SVE
+ enabled (see below).
-arm64 CCSIDR registers are demultiplexed by CSSELR value:
+arm64 CCSIDR registers are demultiplexed by CSSELR value::
+
0x6020 0000 0011 00 <csselr:8>
-arm64 system registers have the following id bit patterns:
+arm64 system registers have the following id bit patterns::
+
0x6030 0000 0013 <op0:2> <op1:3> <crn:4> <crm:4> <op2:3>
-WARNING:
+.. warning::
+
Two system register IDs do not follow the specified pattern. These
are KVM_REG_ARM_TIMER_CVAL and KVM_REG_ARM_TIMER_CNT, which map to
system registers CNTV_CVAL_EL0 and CNTVCT_EL0 respectively. These
@@ -2205,10 +2424,12 @@
derived from the register encoding for CNTV_CVAL_EL0. As this is
API, it must remain this way.
-arm64 firmware pseudo-registers have the following bit pattern:
+arm64 firmware pseudo-registers have the following bit pattern::
+
0x6030 0000 0014 <regno:16>
-arm64 SVE registers have the following bit patterns:
+arm64 SVE registers have the following bit patterns::
+
0x6080 0000 0015 00 <n:5> <slice:5> Zn bits[2048*slice + 2047 : 2048*slice]
0x6050 0000 0015 04 <n:4> <slice:5> Pn bits[256*slice + 255 : 256*slice]
0x6050 0000 0015 060 <slice:5> FFR bits[256*slice + 255 : 256*slice]
@@ -2216,7 +2437,7 @@
Access to register IDs where 2048 * slice >= 128 * max_vq will fail with
ENOENT. max_vq is the vcpu's maximum supported vector length in 128-bit
-quadwords: see (**) below.
+quadwords: see [2]_ below.
These registers are only accessible on vcpus for which SVE is enabled.
See KVM_ARM_VCPU_INIT for details.
@@ -2231,21 +2452,21 @@
userspace. When transferred to or from user memory via KVM_GET_ONE_REG
or KVM_SET_ONE_REG, the value of this register is of type
__u64[KVM_ARM64_SVE_VLS_WORDS], and encodes the set of vector lengths as
-follows:
+follows::
-__u64 vector_lengths[KVM_ARM64_SVE_VLS_WORDS];
+ __u64 vector_lengths[KVM_ARM64_SVE_VLS_WORDS];
-if (vq >= SVE_VQ_MIN && vq <= SVE_VQ_MAX &&
- ((vector_lengths[(vq - KVM_ARM64_SVE_VQ_MIN) / 64] >>
+ if (vq >= SVE_VQ_MIN && vq <= SVE_VQ_MAX &&
+ ((vector_lengths[(vq - KVM_ARM64_SVE_VQ_MIN) / 64] >>
((vq - KVM_ARM64_SVE_VQ_MIN) % 64)) & 1))
/* Vector length vq * 16 bytes supported */
-else
+ else
/* Vector length vq * 16 bytes not supported */
-(**) The maximum value vq for which the above condition is true is
-max_vq. This is the maximum vector length available to the guest on
-this vcpu, and determines which register slices are visible through
-this ioctl interface.
+.. [2] The maximum value vq for which the above condition is true is
+ max_vq. This is the maximum vector length available to the guest on
+ this vcpu, and determines which register slices are visible through
+ this ioctl interface.
(See Documentation/arm64/sve.rst for an explanation of the "vq"
nomenclature.)
@@ -2270,11 +2491,13 @@
MIPS registers are mapped using the lower 32 bits. The upper 16 of that is
the register group type:
-MIPS core registers (see above) have the following id bit patterns:
+MIPS core registers (see above) have the following id bit patterns::
+
0x7030 0000 0000 <reg:16>
MIPS CP0 registers (see KVM_REG_MIPS_CP0_* above) have the following id bit
-patterns depending on whether they're 32-bit or 64-bit registers:
+patterns depending on whether they're 32-bit or 64-bit registers::
+
0x7020 0000 0001 00 <reg:5> <sel:3> (32-bit)
0x7030 0000 0001 00 <reg:5> <sel:3> (64-bit)
@@ -2285,10 +2508,12 @@
the PFNX field starting at bit 30.
MIPS MAARs (see KVM_REG_MIPS_CP0_MAAR(*) above) have the following id bit
-patterns:
+patterns::
+
0x7030 0000 0001 01 <reg:8>
-MIPS KVM control registers (see above) have the following id bit patterns:
+MIPS KVM control registers (see above) have the following id bit patterns::
+
0x7030 0000 0002 <reg:16>
MIPS FPU registers (see KVM_REG_MIPS_FPR_{32,64}() above) have the following
@@ -2297,31 +2522,40 @@
Config5.FRE), i.e. as the guest would see them, and they become unpredictable
if the guest FPU mode is changed. MIPS SIMD Architecture (MSA) vector
registers (see KVM_REG_MIPS_VEC_128() above) have similar patterns as they
-overlap the FPU registers:
+overlap the FPU registers::
+
0x7020 0000 0003 00 <0:3> <reg:5> (32-bit FPU registers)
0x7030 0000 0003 00 <0:3> <reg:5> (64-bit FPU registers)
0x7040 0000 0003 00 <0:3> <reg:5> (128-bit MSA vector registers)
MIPS FPU control registers (see KVM_REG_MIPS_FCR_{IR,CSR} above) have the
-following id bit patterns:
+following id bit patterns::
+
0x7020 0000 0003 01 <0:3> <reg:5>
MIPS MSA control registers (see KVM_REG_MIPS_MSA_{IR,CSR} above) have the
-following id bit patterns:
+following id bit patterns::
+
0x7020 0000 0003 02 <0:3> <reg:5>
4.69 KVM_GET_ONE_REG
+--------------------
-Capability: KVM_CAP_ONE_REG
-Architectures: all
-Type: vcpu ioctl
-Parameters: struct kvm_one_reg (in and out)
-Returns: 0 on success, negative value on failure
+:Capability: KVM_CAP_ONE_REG
+:Architectures: all
+:Type: vcpu ioctl
+:Parameters: struct kvm_one_reg (in and out)
+:Returns: 0 on success, negative value on failure
+
Errors include:
- ENOENT: no such register
- EINVAL: invalid register ID, or no such register
- EPERM: (arm64) register access not allowed before vcpu finalization
+
+ ======== ============================================================
+ ENOENT no such register
+ EINVAL invalid register ID, or no such register
+ EPERM (arm64) register access not allowed before vcpu finalization
+ ======== ============================================================
+
(These error codes are indicative only: do not rely on a specific error
code being returned in a specific situation.)
@@ -2335,12 +2569,13 @@
4.70 KVM_KVMCLOCK_CTRL
+----------------------
-Capability: KVM_CAP_KVMCLOCK_CTRL
-Architectures: Any that implement pvclocks (currently x86 only)
-Type: vcpu ioctl
-Parameters: None
-Returns: 0 on success, -1 on error
+:Capability: KVM_CAP_KVMCLOCK_CTRL
+:Architectures: Any that implement pvclocks (currently x86 only)
+:Type: vcpu ioctl
+:Parameters: None
+:Returns: 0 on success, -1 on error
This signals to the host kernel that the specified guest is being paused by
userspace. The host will set a flag in the pvclock structure that is checked
@@ -2356,26 +2591,30 @@
4.71 KVM_SIGNAL_MSI
+-------------------
-Capability: KVM_CAP_SIGNAL_MSI
-Architectures: x86 arm arm64
-Type: vm ioctl
-Parameters: struct kvm_msi (in)
-Returns: >0 on delivery, 0 if guest blocked the MSI, and -1 on error
+:Capability: KVM_CAP_SIGNAL_MSI
+:Architectures: x86 arm arm64
+:Type: vm ioctl
+:Parameters: struct kvm_msi (in)
+:Returns: >0 on delivery, 0 if guest blocked the MSI, and -1 on error
Directly inject a MSI message. Only valid with in-kernel irqchip that handles
MSI messages.
-struct kvm_msi {
+::
+
+ struct kvm_msi {
__u32 address_lo;
__u32 address_hi;
__u32 data;
__u32 flags;
__u32 devid;
__u8 pad[12];
-};
+ };
-flags: KVM_MSI_VALID_DEVID: devid contains a valid value. The per-VM
+flags:
+ KVM_MSI_VALID_DEVID: devid contains a valid value. The per-VM
KVM_CAP_MSI_DEVID capability advertises the requirement to provide
the device ID. If this capability is not available, userspace
should never set the KVM_MSI_VALID_DEVID flag as the ioctl might fail.
@@ -2391,30 +2630,31 @@
4.71 KVM_CREATE_PIT2
+--------------------
-Capability: KVM_CAP_PIT2
-Architectures: x86
-Type: vm ioctl
-Parameters: struct kvm_pit_config (in)
-Returns: 0 on success, -1 on error
+:Capability: KVM_CAP_PIT2
+:Architectures: x86
+:Type: vm ioctl
+:Parameters: struct kvm_pit_config (in)
+:Returns: 0 on success, -1 on error
Creates an in-kernel device model for the i8254 PIT. This call is only valid
after enabling in-kernel irqchip support via KVM_CREATE_IRQCHIP. The following
-parameters have to be passed:
+parameters have to be passed::
-struct kvm_pit_config {
+ struct kvm_pit_config {
__u32 flags;
__u32 pad[15];
-};
+ };
-Valid flags are:
+Valid flags are::
-#define KVM_PIT_SPEAKER_DUMMY 1 /* emulate speaker port stub */
+ #define KVM_PIT_SPEAKER_DUMMY 1 /* emulate speaker port stub */
PIT timer interrupts may use a per-VM kernel thread for injection. If it
-exists, this thread will have a name of the following pattern:
+exists, this thread will have a name of the following pattern::
-kvm-pit/<owner-process-pid>
+ kvm-pit/<owner-process-pid>
When running a guest with elevated priorities, the scheduling parameters of
this thread may have to be adjusted accordingly.
@@ -2423,37 +2663,39 @@
4.72 KVM_GET_PIT2
+-----------------
-Capability: KVM_CAP_PIT_STATE2
-Architectures: x86
-Type: vm ioctl
-Parameters: struct kvm_pit_state2 (out)
-Returns: 0 on success, -1 on error
+:Capability: KVM_CAP_PIT_STATE2
+:Architectures: x86
+:Type: vm ioctl
+:Parameters: struct kvm_pit_state2 (out)
+:Returns: 0 on success, -1 on error
Retrieves the state of the in-kernel PIT model. Only valid after
-KVM_CREATE_PIT2. The state is returned in the following structure:
+KVM_CREATE_PIT2. The state is returned in the following structure::
-struct kvm_pit_state2 {
+ struct kvm_pit_state2 {
struct kvm_pit_channel_state channels[3];
__u32 flags;
__u32 reserved[9];
-};
+ };
-Valid flags are:
+Valid flags are::
-/* disable PIT in HPET legacy mode */
-#define KVM_PIT_FLAGS_HPET_LEGACY 0x00000001
+ /* disable PIT in HPET legacy mode */
+ #define KVM_PIT_FLAGS_HPET_LEGACY 0x00000001
This IOCTL replaces the obsolete KVM_GET_PIT.
4.73 KVM_SET_PIT2
+-----------------
-Capability: KVM_CAP_PIT_STATE2
-Architectures: x86
-Type: vm ioctl
-Parameters: struct kvm_pit_state2 (in)
-Returns: 0 on success, -1 on error
+:Capability: KVM_CAP_PIT_STATE2
+:Architectures: x86
+:Type: vm ioctl
+:Parameters: struct kvm_pit_state2 (in)
+:Returns: 0 on success, -1 on error
Sets the state of the in-kernel PIT model. Only valid after KVM_CREATE_PIT2.
See KVM_GET_PIT2 for details on struct kvm_pit_state2.
@@ -2462,12 +2704,13 @@
4.74 KVM_PPC_GET_SMMU_INFO
+--------------------------
-Capability: KVM_CAP_PPC_GET_SMMU_INFO
-Architectures: powerpc
-Type: vm ioctl
-Parameters: None
-Returns: 0 on success, -1 on error
+:Capability: KVM_CAP_PPC_GET_SMMU_INFO
+:Architectures: powerpc
+:Type: vm ioctl
+:Parameters: None
+:Returns: 0 on success, -1 on error
This populates and returns a structure describing the features of
the "Server" class MMU emulation supported by KVM.
@@ -2475,7 +2718,7 @@
device-tree properties for the guest operating system.
The structure contains some global information, followed by an
-array of supported segment page sizes:
+array of supported segment page sizes::
struct kvm_ppc_smmu_info {
__u64 flags;
@@ -2503,7 +2746,7 @@
The "sps" array contains 8 entries indicating the supported base
page sizes for a segment in increasing order. Each entry is defined
-as follow:
+as follow::
struct kvm_ppc_one_seg_page_size {
__u32 page_shift; /* Base page shift of segment (or 0) */
@@ -2524,7 +2767,7 @@
only larger or equal to the base page size), along with the
corresponding encoding in the hash PTE. Similarly, the array is
8 entries sorted by increasing sizes and an entry with a "0" shift
-is an empty entry and a terminator:
+is an empty entry and a terminator::
struct kvm_ppc_one_page_size {
__u32 page_shift; /* Page shift (or 0) */
@@ -2536,12 +2779,13 @@
into the hash PTE second double word).
4.75 KVM_IRQFD
+--------------
-Capability: KVM_CAP_IRQFD
-Architectures: x86 s390 arm arm64
-Type: vm ioctl
-Parameters: struct kvm_irqfd (in)
-Returns: 0 on success, -1 on error
+:Capability: KVM_CAP_IRQFD
+:Architectures: x86 s390 arm arm64
+:Type: vm ioctl
+:Parameters: struct kvm_irqfd (in)
+:Returns: 0 on success, -1 on error
Allows setting an eventfd to directly trigger a guest interrupt.
kvm_irqfd.fd specifies the file descriptor to use as the eventfd and
@@ -2565,6 +2809,7 @@
and need not be specified with KVM_IRQFD_FLAG_DEASSIGN.
On arm/arm64, gsi routing being supported, the following can happen:
+
- in case no routing entry is associated to this gsi, injection fails
- in case the gsi is associated to an irqchip routing entry,
irqchip.pin + 32 corresponds to the injected SPI ID.
@@ -2573,12 +2818,13 @@
to GICv3 ITS in-kernel emulation).
4.76 KVM_PPC_ALLOCATE_HTAB
+--------------------------
-Capability: KVM_CAP_PPC_ALLOC_HTAB
-Architectures: powerpc
-Type: vm ioctl
-Parameters: Pointer to u32 containing hash table order (in/out)
-Returns: 0 on success, -1 on error
+:Capability: KVM_CAP_PPC_ALLOC_HTAB
+:Architectures: powerpc
+:Type: vm ioctl
+:Parameters: Pointer to u32 containing hash table order (in/out)
+:Returns: 0 on success, -1 on error
This requests the host kernel to allocate an MMU hash table for a
guest using the PAPR paravirtualization interface. This only does
@@ -2609,75 +2855,88 @@
HPTEs on the next KVM_RUN of any vcpu.
4.77 KVM_S390_INTERRUPT
+-----------------------
-Capability: basic
-Architectures: s390
-Type: vm ioctl, vcpu ioctl
-Parameters: struct kvm_s390_interrupt (in)
-Returns: 0 on success, -1 on error
+:Capability: basic
+:Architectures: s390
+:Type: vm ioctl, vcpu ioctl
+:Parameters: struct kvm_s390_interrupt (in)
+:Returns: 0 on success, -1 on error
Allows to inject an interrupt to the guest. Interrupts can be floating
(vm ioctl) or per cpu (vcpu ioctl), depending on the interrupt type.
-Interrupt parameters are passed via kvm_s390_interrupt:
+Interrupt parameters are passed via kvm_s390_interrupt::
-struct kvm_s390_interrupt {
+ struct kvm_s390_interrupt {
__u32 type;
__u32 parm;
__u64 parm64;
-};
+ };
type can be one of the following:
-KVM_S390_SIGP_STOP (vcpu) - sigp stop; optional flags in parm
-KVM_S390_PROGRAM_INT (vcpu) - program check; code in parm
-KVM_S390_SIGP_SET_PREFIX (vcpu) - sigp set prefix; prefix address in parm
-KVM_S390_RESTART (vcpu) - restart
-KVM_S390_INT_CLOCK_COMP (vcpu) - clock comparator interrupt
-KVM_S390_INT_CPU_TIMER (vcpu) - CPU timer interrupt
-KVM_S390_INT_VIRTIO (vm) - virtio external interrupt; external interrupt
- parameters in parm and parm64
-KVM_S390_INT_SERVICE (vm) - sclp external interrupt; sclp parameter in parm
-KVM_S390_INT_EMERGENCY (vcpu) - sigp emergency; source cpu in parm
-KVM_S390_INT_EXTERNAL_CALL (vcpu) - sigp external call; source cpu in parm
-KVM_S390_INT_IO(ai,cssid,ssid,schid) (vm) - compound value to indicate an
- I/O interrupt (ai - adapter interrupt; cssid,ssid,schid - subchannel);
- I/O interruption parameters in parm (subchannel) and parm64 (intparm,
- interruption subclass)
-KVM_S390_MCHK (vm, vcpu) - machine check interrupt; cr 14 bits in parm,
- machine check interrupt code in parm64 (note that
- machine checks needing further payload are not
- supported by this ioctl)
+KVM_S390_SIGP_STOP (vcpu)
+ - sigp stop; optional flags in parm
+KVM_S390_PROGRAM_INT (vcpu)
+ - program check; code in parm
+KVM_S390_SIGP_SET_PREFIX (vcpu)
+ - sigp set prefix; prefix address in parm
+KVM_S390_RESTART (vcpu)
+ - restart
+KVM_S390_INT_CLOCK_COMP (vcpu)
+ - clock comparator interrupt
+KVM_S390_INT_CPU_TIMER (vcpu)
+ - CPU timer interrupt
+KVM_S390_INT_VIRTIO (vm)
+ - virtio external interrupt; external interrupt
+ parameters in parm and parm64
+KVM_S390_INT_SERVICE (vm)
+ - sclp external interrupt; sclp parameter in parm
+KVM_S390_INT_EMERGENCY (vcpu)
+ - sigp emergency; source cpu in parm
+KVM_S390_INT_EXTERNAL_CALL (vcpu)
+ - sigp external call; source cpu in parm
+KVM_S390_INT_IO(ai,cssid,ssid,schid) (vm)
+ - compound value to indicate an
+ I/O interrupt (ai - adapter interrupt; cssid,ssid,schid - subchannel);
+ I/O interruption parameters in parm (subchannel) and parm64 (intparm,
+ interruption subclass)
+KVM_S390_MCHK (vm, vcpu)
+ - machine check interrupt; cr 14 bits in parm, machine check interrupt
+ code in parm64 (note that machine checks needing further payload are not
+ supported by this ioctl)
This is an asynchronous vcpu ioctl and can be invoked from any thread.
4.78 KVM_PPC_GET_HTAB_FD
+------------------------
-Capability: KVM_CAP_PPC_HTAB_FD
-Architectures: powerpc
-Type: vm ioctl
-Parameters: Pointer to struct kvm_get_htab_fd (in)
-Returns: file descriptor number (>= 0) on success, -1 on error
+:Capability: KVM_CAP_PPC_HTAB_FD
+:Architectures: powerpc
+:Type: vm ioctl
+:Parameters: Pointer to struct kvm_get_htab_fd (in)
+:Returns: file descriptor number (>= 0) on success, -1 on error
This returns a file descriptor that can be used either to read out the
entries in the guest's hashed page table (HPT), or to write entries to
initialize the HPT. The returned fd can only be written to if the
KVM_GET_HTAB_WRITE bit is set in the flags field of the argument, and
can only be read if that bit is clear. The argument struct looks like
-this:
+this::
-/* For KVM_PPC_GET_HTAB_FD */
-struct kvm_get_htab_fd {
+ /* For KVM_PPC_GET_HTAB_FD */
+ struct kvm_get_htab_fd {
__u64 flags;
__u64 start_index;
__u64 reserved[2];
-};
+ };
-/* Values for kvm_get_htab_fd.flags */
-#define KVM_GET_HTAB_BOLTED_ONLY ((__u64)0x1)
-#define KVM_GET_HTAB_WRITE ((__u64)0x2)
+ /* Values for kvm_get_htab_fd.flags */
+ #define KVM_GET_HTAB_BOLTED_ONLY ((__u64)0x1)
+ #define KVM_GET_HTAB_WRITE ((__u64)0x2)
-The `start_index' field gives the index in the HPT of the entry at
+The 'start_index' field gives the index in the HPT of the entry at
which to start reading. It is ignored when writing.
Reads on the fd will initially supply information about all
@@ -2692,29 +2951,34 @@
series of valid HPT entries (16 bytes) each. The header indicates how
many valid HPT entries there are and how many invalid entries follow
the valid entries. The invalid entries are not represented explicitly
-in the stream. The header format is:
+in the stream. The header format is::
-struct kvm_get_htab_header {
+ struct kvm_get_htab_header {
__u32 index;
__u16 n_valid;
__u16 n_invalid;
-};
+ };
Writes to the fd create HPT entries starting at the index given in the
-header; first `n_valid' valid entries with contents from the data
-written, then `n_invalid' invalid entries, invalidating any previously
+header; first 'n_valid' valid entries with contents from the data
+written, then 'n_invalid' invalid entries, invalidating any previously
valid entries found.
4.79 KVM_CREATE_DEVICE
+----------------------
-Capability: KVM_CAP_DEVICE_CTRL
-Type: vm ioctl
-Parameters: struct kvm_create_device (in/out)
-Returns: 0 on success, -1 on error
+:Capability: KVM_CAP_DEVICE_CTRL
+:Type: vm ioctl
+:Parameters: struct kvm_create_device (in/out)
+:Returns: 0 on success, -1 on error
+
Errors:
- ENODEV: The device type is unknown or unsupported
- EEXIST: Device already created, and this type of device may not
+
+ ====== =======================================================
+ ENODEV The device type is unknown or unsupported
+ EEXIST Device already created, and this type of device may not
be instantiated multiple times
+ ====== =======================================================
Other error conditions may be defined by individual device types or
have their standard meanings.
@@ -2730,25 +2994,32 @@
for specifying any behavior that is not implied by the device type
number.
-struct kvm_create_device {
+::
+
+ struct kvm_create_device {
__u32 type; /* in: KVM_DEV_TYPE_xxx */
__u32 fd; /* out: device handle */
__u32 flags; /* in: KVM_CREATE_DEVICE_xxx */
-};
+ };
4.80 KVM_SET_DEVICE_ATTR/KVM_GET_DEVICE_ATTR
+--------------------------------------------
-Capability: KVM_CAP_DEVICE_CTRL, KVM_CAP_VM_ATTRIBUTES for vm device,
- KVM_CAP_VCPU_ATTRIBUTES for vcpu device
-Type: device ioctl, vm ioctl, vcpu ioctl
-Parameters: struct kvm_device_attr
-Returns: 0 on success, -1 on error
+:Capability: KVM_CAP_DEVICE_CTRL, KVM_CAP_VM_ATTRIBUTES for vm device,
+ KVM_CAP_VCPU_ATTRIBUTES for vcpu device
+:Type: device ioctl, vm ioctl, vcpu ioctl
+:Parameters: struct kvm_device_attr
+:Returns: 0 on success, -1 on error
+
Errors:
- ENXIO: The group or attribute is unknown/unsupported for this device
+
+ ===== =============================================================
+ ENXIO The group or attribute is unknown/unsupported for this device
or hardware support is missing.
- EPERM: The attribute cannot (currently) be accessed this way
+ EPERM The attribute cannot (currently) be accessed this way
(e.g. read-only attribute, or attribute that only makes
sense when the device is in a different state)
+ ===== =============================================================
Other error conditions may be defined by individual device types.
@@ -2757,23 +3028,30 @@
the "devices" directory. As with ONE_REG, the size of the data
transferred is defined by the particular attribute.
-struct kvm_device_attr {
+::
+
+ struct kvm_device_attr {
__u32 flags; /* no flags currently defined */
__u32 group; /* device-defined */
__u64 attr; /* group-defined */
__u64 addr; /* userspace address of attr data */
-};
+ };
4.81 KVM_HAS_DEVICE_ATTR
+------------------------
-Capability: KVM_CAP_DEVICE_CTRL, KVM_CAP_VM_ATTRIBUTES for vm device,
- KVM_CAP_VCPU_ATTRIBUTES for vcpu device
-Type: device ioctl, vm ioctl, vcpu ioctl
-Parameters: struct kvm_device_attr
-Returns: 0 on success, -1 on error
+:Capability: KVM_CAP_DEVICE_CTRL, KVM_CAP_VM_ATTRIBUTES for vm device,
+ KVM_CAP_VCPU_ATTRIBUTES for vcpu device
+:Type: device ioctl, vm ioctl, vcpu ioctl
+:Parameters: struct kvm_device_attr
+:Returns: 0 on success, -1 on error
+
Errors:
- ENXIO: The group or attribute is unknown/unsupported for this device
+
+ ===== =============================================================
+ ENXIO The group or attribute is unknown/unsupported for this device
or hardware support is missing.
+ ===== =============================================================
Tests whether a device supports a particular attribute. A successful
return indicates the attribute is implemented. It does not necessarily
@@ -2781,15 +3059,20 @@
current state. "addr" is ignored.
4.82 KVM_ARM_VCPU_INIT
+----------------------
-Capability: basic
-Architectures: arm, arm64
-Type: vcpu ioctl
-Parameters: struct kvm_vcpu_init (in)
-Returns: 0 on success; -1 on error
+:Capability: basic
+:Architectures: arm, arm64
+:Type: vcpu ioctl
+:Parameters: struct kvm_vcpu_init (in)
+:Returns: 0 on success; -1 on error
+
Errors:
- EINVAL: the target is unknown, or the combination of features is invalid.
- ENOENT: a features bit specified is unknown.
+
+ ====== =================================================================
+ EINVAL the target is unknown, or the combination of features is invalid.
+ ENOENT a features bit specified is unknown.
+ ====== =================================================================
This tells KVM what type of CPU to present to the guest, and what
optional features it should have. This will cause a reset of the cpu
@@ -2805,6 +3088,7 @@
target and same set of feature flags, otherwise EINVAL will be returned.
Possible features:
+
- KVM_ARM_VCPU_POWER_OFF: Starts the CPU in a power-off state.
Depends on KVM_CAP_ARM_PSCI. If not set, the CPU will be powered on
and execute guest code when KVM_RUN is called.
@@ -2861,14 +3145,19 @@
no longer be written using KVM_SET_ONE_REG.
4.83 KVM_ARM_PREFERRED_TARGET
+-----------------------------
-Capability: basic
-Architectures: arm, arm64
-Type: vm ioctl
-Parameters: struct struct kvm_vcpu_init (out)
-Returns: 0 on success; -1 on error
+:Capability: basic
+:Architectures: arm, arm64
+:Type: vm ioctl
+:Parameters: struct struct kvm_vcpu_init (out)
+:Returns: 0 on success; -1 on error
+
Errors:
- ENODEV: no preferred target available for the host
+
+ ====== ==========================================
+ ENODEV no preferred target available for the host
+ ====== ==========================================
This queries KVM for preferred CPU target type which can be emulated
by KVM on underlying host.
@@ -2885,43 +3174,57 @@
4.84 KVM_GET_REG_LIST
+---------------------
-Capability: basic
-Architectures: arm, arm64, mips
-Type: vcpu ioctl
-Parameters: struct kvm_reg_list (in/out)
-Returns: 0 on success; -1 on error
+:Capability: basic
+:Architectures: arm, arm64, mips
+:Type: vcpu ioctl
+:Parameters: struct kvm_reg_list (in/out)
+:Returns: 0 on success; -1 on error
+
Errors:
- E2BIG: the reg index list is too big to fit in the array specified by
- the user (the number required will be written into n).
-struct kvm_reg_list {
+ ===== ==============================================================
+ E2BIG the reg index list is too big to fit in the array specified by
+ the user (the number required will be written into n).
+ ===== ==============================================================
+
+::
+
+ struct kvm_reg_list {
__u64 n; /* number of registers in reg[] */
__u64 reg[0];
-};
+ };
This ioctl returns the guest registers that are supported for the
KVM_GET_ONE_REG/KVM_SET_ONE_REG calls.
4.85 KVM_ARM_SET_DEVICE_ADDR (deprecated)
+-----------------------------------------
-Capability: KVM_CAP_ARM_SET_DEVICE_ADDR
-Architectures: arm, arm64
-Type: vm ioctl
-Parameters: struct kvm_arm_device_address (in)
-Returns: 0 on success, -1 on error
+:Capability: KVM_CAP_ARM_SET_DEVICE_ADDR
+:Architectures: arm, arm64
+:Type: vm ioctl
+:Parameters: struct kvm_arm_device_address (in)
+:Returns: 0 on success, -1 on error
+
Errors:
- ENODEV: The device id is unknown
- ENXIO: Device not supported on current system
- EEXIST: Address already set
- E2BIG: Address outside guest physical address space
- EBUSY: Address overlaps with other device range
-struct kvm_arm_device_addr {
+ ====== ============================================
+ ENODEV The device id is unknown
+ ENXIO Device not supported on current system
+ EEXIST Address already set
+ E2BIG Address outside guest physical address space
+ EBUSY Address overlaps with other device range
+ ====== ============================================
+
+::
+
+ struct kvm_arm_device_addr {
__u64 id;
__u64 addr;
-};
+ };
Specify a device address in the guest's physical address space where guests
can access emulated or directly exposed devices, which the host kernel needs
@@ -2929,7 +3232,7 @@
specific device.
ARM/arm64 divides the id field into two parts, a device id and an
-address type id specific to the individual device.
+address type id specific to the individual device::
bits: | 63 ... 32 | 31 ... 16 | 15 ... 0 |
field: | 0x00000000 | device id | addr type id |
@@ -2947,12 +3250,13 @@
4.86 KVM_PPC_RTAS_DEFINE_TOKEN
+------------------------------
-Capability: KVM_CAP_PPC_RTAS
-Architectures: ppc
-Type: vm ioctl
-Parameters: struct kvm_rtas_token_args
-Returns: 0 on success, -1 on error
+:Capability: KVM_CAP_PPC_RTAS
+:Architectures: ppc
+:Type: vm ioctl
+:Parameters: struct kvm_rtas_token_args
+:Returns: 0 on success, -1 on error
Defines a token value for a RTAS (Run Time Abstraction Services)
service in order to allow it to be handled in the kernel. The
@@ -2966,18 +3270,21 @@
handled.
4.87 KVM_SET_GUEST_DEBUG
+------------------------
-Capability: KVM_CAP_SET_GUEST_DEBUG
-Architectures: x86, s390, ppc, arm64
-Type: vcpu ioctl
-Parameters: struct kvm_guest_debug (in)
-Returns: 0 on success; -1 on error
+:Capability: KVM_CAP_SET_GUEST_DEBUG
+:Architectures: x86, s390, ppc, arm64
+:Type: vcpu ioctl
+:Parameters: struct kvm_guest_debug (in)
+:Returns: 0 on success; -1 on error
-struct kvm_guest_debug {
+::
+
+ struct kvm_guest_debug {
__u32 control;
__u32 pad;
struct kvm_guest_debug_arch arch;
-};
+ };
Set up the processor specific debug registers and configure vcpu for
handling guest debug events. There are two parts to the structure, the
@@ -3019,26 +3326,31 @@
structure containing architecture specific debug information.
4.88 KVM_GET_EMULATED_CPUID
+---------------------------
-Capability: KVM_CAP_EXT_EMUL_CPUID
-Architectures: x86
-Type: system ioctl
-Parameters: struct kvm_cpuid2 (in/out)
-Returns: 0 on success, -1 on error
+:Capability: KVM_CAP_EXT_EMUL_CPUID
+:Architectures: x86
+:Type: system ioctl
+:Parameters: struct kvm_cpuid2 (in/out)
+:Returns: 0 on success, -1 on error
-struct kvm_cpuid2 {
+::
+
+ struct kvm_cpuid2 {
__u32 nent;
__u32 flags;
struct kvm_cpuid_entry2 entries[0];
-};
+ };
The member 'flags' is used for passing flags from userspace.
-#define KVM_CPUID_FLAG_SIGNIFCANT_INDEX BIT(0)
-#define KVM_CPUID_FLAG_STATEFUL_FUNC BIT(1)
-#define KVM_CPUID_FLAG_STATE_READ_NEXT BIT(2)
+::
-struct kvm_cpuid_entry2 {
+ #define KVM_CPUID_FLAG_SIGNIFCANT_INDEX BIT(0)
+ #define KVM_CPUID_FLAG_STATEFUL_FUNC BIT(1)
+ #define KVM_CPUID_FLAG_STATE_READ_NEXT BIT(2)
+
+ struct kvm_cpuid_entry2 {
__u32 function;
__u32 index;
__u32 flags;
@@ -3047,7 +3359,7 @@
__u32 ecx;
__u32 edx;
__u32 padding[3];
-};
+ };
This ioctl returns x86 cpuid features which are emulated by
kvm.Userspace can use the information returned by this ioctl to query
@@ -3072,10 +3384,14 @@
The fields in each entry are defined as follows:
- function: the eax value used to obtain the entry
- index: the ecx value used to obtain the entry (for entries that are
+ function:
+ the eax value used to obtain the entry
+ index:
+ the ecx value used to obtain the entry (for entries that are
affected by ecx)
- flags: an OR of zero or more of the following:
+ flags:
+ an OR of zero or more of the following:
+
KVM_CPUID_FLAG_SIGNIFCANT_INDEX:
if the index field is valid
KVM_CPUID_FLAG_STATEFUL_FUNC:
@@ -3085,24 +3401,28 @@
KVM_CPUID_FLAG_STATE_READ_NEXT:
for KVM_CPUID_FLAG_STATEFUL_FUNC entries, set if this entry is
the first entry to be read by a cpu
- eax, ebx, ecx, edx: the values returned by the cpuid instruction for
+
+ eax, ebx, ecx, edx:
+
+ the values returned by the cpuid instruction for
this function/index combination
4.89 KVM_S390_MEM_OP
+--------------------
-Capability: KVM_CAP_S390_MEM_OP
-Architectures: s390
-Type: vcpu ioctl
-Parameters: struct kvm_s390_mem_op (in)
-Returns: = 0 on success,
- < 0 on generic error (e.g. -EFAULT or -ENOMEM),
- > 0 if an exception occurred while walking the page tables
+:Capability: KVM_CAP_S390_MEM_OP
+:Architectures: s390
+:Type: vcpu ioctl
+:Parameters: struct kvm_s390_mem_op (in)
+:Returns: = 0 on success,
+ < 0 on generic error (e.g. -EFAULT or -ENOMEM),
+ > 0 if an exception occurred while walking the page tables
Read or write data from/to the logical (virtual) memory of a VCPU.
-Parameters are specified via the following structure:
+Parameters are specified via the following structure::
-struct kvm_s390_mem_op {
+ struct kvm_s390_mem_op {
__u64 gaddr; /* the guest address */
__u64 flags; /* flags */
__u32 size; /* amount of bytes */
@@ -3110,7 +3430,7 @@
__u64 buf; /* buffer in userspace */
__u8 ar; /* the access register number */
__u8 reserved[31]; /* should be set to 0 */
-};
+ };
The type of operation is specified in the "op" field. It is either
KVM_S390_MEMOP_LOGICAL_READ for reading from logical memory space or
@@ -3137,24 +3457,25 @@
KVM with the currently defined set of flags.
4.90 KVM_S390_GET_SKEYS
+-----------------------
-Capability: KVM_CAP_S390_SKEYS
-Architectures: s390
-Type: vm ioctl
-Parameters: struct kvm_s390_skeys
-Returns: 0 on success, KVM_S390_GET_KEYS_NONE if guest is not using storage
- keys, negative value on error
+:Capability: KVM_CAP_S390_SKEYS
+:Architectures: s390
+:Type: vm ioctl
+:Parameters: struct kvm_s390_skeys
+:Returns: 0 on success, KVM_S390_GET_KEYS_NONE if guest is not using storage
+ keys, negative value on error
This ioctl is used to get guest storage key values on the s390
-architecture. The ioctl takes parameters via the kvm_s390_skeys struct.
+architecture. The ioctl takes parameters via the kvm_s390_skeys struct::
-struct kvm_s390_skeys {
+ struct kvm_s390_skeys {
__u64 start_gfn;
__u64 count;
__u64 skeydata_addr;
__u32 flags;
__u32 reserved[9];
-};
+ };
The start_gfn field is the number of the first guest frame whose storage keys
you want to get.
@@ -3168,12 +3489,13 @@
bytes. This buffer will be filled with storage key data by the ioctl.
4.91 KVM_S390_SET_SKEYS
+-----------------------
-Capability: KVM_CAP_S390_SKEYS
-Architectures: s390
-Type: vm ioctl
-Parameters: struct kvm_s390_skeys
-Returns: 0 on success, negative value on error
+:Capability: KVM_CAP_S390_SKEYS
+:Architectures: s390
+:Type: vm ioctl
+:Parameters: struct kvm_s390_skeys
+:Returns: 0 on success, negative value on error
This ioctl is used to set guest storage key values on the s390
architecture. The ioctl takes parameters via the kvm_s390_skeys struct.
@@ -3195,21 +3517,27 @@
the ioctl will return -EINVAL.
4.92 KVM_S390_IRQ
+-----------------
-Capability: KVM_CAP_S390_INJECT_IRQ
-Architectures: s390
-Type: vcpu ioctl
-Parameters: struct kvm_s390_irq (in)
-Returns: 0 on success, -1 on error
+:Capability: KVM_CAP_S390_INJECT_IRQ
+:Architectures: s390
+:Type: vcpu ioctl
+:Parameters: struct kvm_s390_irq (in)
+:Returns: 0 on success, -1 on error
+
Errors:
- EINVAL: interrupt type is invalid
- type is KVM_S390_SIGP_STOP and flag parameter is invalid value
+
+
+ ====== =================================================================
+ EINVAL interrupt type is invalid
+ type is KVM_S390_SIGP_STOP and flag parameter is invalid value,
type is KVM_S390_INT_EXTERNAL_CALL and code is bigger
- than the maximum of VCPUs
- EBUSY: type is KVM_S390_SIGP_SET_PREFIX and vcpu is not stopped
- type is KVM_S390_SIGP_STOP and a stop irq is already pending
+ than the maximum of VCPUs
+ EBUSY type is KVM_S390_SIGP_SET_PREFIX and vcpu is not stopped,
+ type is KVM_S390_SIGP_STOP and a stop irq is already pending,
type is KVM_S390_INT_EXTERNAL_CALL and an external call interrupt
- is already pending
+ is already pending
+ ====== =================================================================
Allows to inject an interrupt to the guest.
@@ -3217,9 +3545,9 @@
to inject additional payload which is not
possible via KVM_S390_INTERRUPT.
-Interrupt parameters are passed via kvm_s390_irq:
+Interrupt parameters are passed via kvm_s390_irq::
-struct kvm_s390_irq {
+ struct kvm_s390_irq {
__u64 type;
union {
struct kvm_s390_io_info io;
@@ -3232,44 +3560,45 @@
struct kvm_s390_mchk_info mchk;
char reserved[64];
} u;
-};
+ };
type can be one of the following:
-KVM_S390_SIGP_STOP - sigp stop; parameter in .stop
-KVM_S390_PROGRAM_INT - program check; parameters in .pgm
-KVM_S390_SIGP_SET_PREFIX - sigp set prefix; parameters in .prefix
-KVM_S390_RESTART - restart; no parameters
-KVM_S390_INT_CLOCK_COMP - clock comparator interrupt; no parameters
-KVM_S390_INT_CPU_TIMER - CPU timer interrupt; no parameters
-KVM_S390_INT_EMERGENCY - sigp emergency; parameters in .emerg
-KVM_S390_INT_EXTERNAL_CALL - sigp external call; parameters in .extcall
-KVM_S390_MCHK - machine check interrupt; parameters in .mchk
+- KVM_S390_SIGP_STOP - sigp stop; parameter in .stop
+- KVM_S390_PROGRAM_INT - program check; parameters in .pgm
+- KVM_S390_SIGP_SET_PREFIX - sigp set prefix; parameters in .prefix
+- KVM_S390_RESTART - restart; no parameters
+- KVM_S390_INT_CLOCK_COMP - clock comparator interrupt; no parameters
+- KVM_S390_INT_CPU_TIMER - CPU timer interrupt; no parameters
+- KVM_S390_INT_EMERGENCY - sigp emergency; parameters in .emerg
+- KVM_S390_INT_EXTERNAL_CALL - sigp external call; parameters in .extcall
+- KVM_S390_MCHK - machine check interrupt; parameters in .mchk
This is an asynchronous vcpu ioctl and can be invoked from any thread.
4.94 KVM_S390_GET_IRQ_STATE
+---------------------------
-Capability: KVM_CAP_S390_IRQ_STATE
-Architectures: s390
-Type: vcpu ioctl
-Parameters: struct kvm_s390_irq_state (out)
-Returns: >= number of bytes copied into buffer,
- -EINVAL if buffer size is 0,
- -ENOBUFS if buffer size is too small to fit all pending interrupts,
- -EFAULT if the buffer address was invalid
+:Capability: KVM_CAP_S390_IRQ_STATE
+:Architectures: s390
+:Type: vcpu ioctl
+:Parameters: struct kvm_s390_irq_state (out)
+:Returns: >= number of bytes copied into buffer,
+ -EINVAL if buffer size is 0,
+ -ENOBUFS if buffer size is too small to fit all pending interrupts,
+ -EFAULT if the buffer address was invalid
This ioctl allows userspace to retrieve the complete state of all currently
pending interrupts in a single buffer. Use cases include migration
and introspection. The parameter structure contains the address of a
-userspace buffer and its length:
+userspace buffer and its length::
-struct kvm_s390_irq_state {
+ struct kvm_s390_irq_state {
__u64 buf;
__u32 flags; /* will stay unused for compatibility reasons */
__u32 len;
__u32 reserved[4]; /* will stay unused for compatibility reasons */
-};
+ };
Userspace passes in the above struct and for each pending interrupt a
struct kvm_s390_irq is copied to the provided buffer.
@@ -3283,29 +3612,30 @@
may retry with a bigger buffer.
4.95 KVM_S390_SET_IRQ_STATE
+---------------------------
-Capability: KVM_CAP_S390_IRQ_STATE
-Architectures: s390
-Type: vcpu ioctl
-Parameters: struct kvm_s390_irq_state (in)
-Returns: 0 on success,
- -EFAULT if the buffer address was invalid,
- -EINVAL for an invalid buffer length (see below),
- -EBUSY if there were already interrupts pending,
- errors occurring when actually injecting the
+:Capability: KVM_CAP_S390_IRQ_STATE
+:Architectures: s390
+:Type: vcpu ioctl
+:Parameters: struct kvm_s390_irq_state (in)
+:Returns: 0 on success,
+ -EFAULT if the buffer address was invalid,
+ -EINVAL for an invalid buffer length (see below),
+ -EBUSY if there were already interrupts pending,
+ errors occurring when actually injecting the
interrupt. See KVM_S390_IRQ.
This ioctl allows userspace to set the complete state of all cpu-local
interrupts currently pending for the vcpu. It is intended for restoring
interrupt state after a migration. The input parameter is a userspace buffer
-containing a struct kvm_s390_irq_state:
+containing a struct kvm_s390_irq_state::
-struct kvm_s390_irq_state {
+ struct kvm_s390_irq_state {
__u64 buf;
__u32 flags; /* will stay unused for compatibility reasons */
__u32 len;
__u32 reserved[4]; /* will stay unused for compatibility reasons */
-};
+ };
The restrictions for flags and reserved apply as well.
(see KVM_S390_GET_IRQ_STATE)
@@ -3320,20 +3650,22 @@
which is the maximum number of possibly pending cpu-local interrupts.
4.96 KVM_SMI
+------------
-Capability: KVM_CAP_X86_SMM
-Architectures: x86
-Type: vcpu ioctl
-Parameters: none
-Returns: 0 on success, -1 on error
+:Capability: KVM_CAP_X86_SMM
+:Architectures: x86
+:Type: vcpu ioctl
+:Parameters: none
+:Returns: 0 on success, -1 on error
Queues an SMI on the thread's vcpu.
4.97 KVM_CAP_PPC_MULTITCE
+-------------------------
-Capability: KVM_CAP_PPC_MULTITCE
-Architectures: ppc
-Type: vm
+:Capability: KVM_CAP_PPC_MULTITCE
+:Architectures: ppc
+:Type: vm
This capability means the kernel is capable of handling hypercalls
H_PUT_TCE_INDIRECT and H_STUFF_TCE without passing those into the user
@@ -3355,26 +3687,27 @@
This capability is always enabled.
4.98 KVM_CREATE_SPAPR_TCE_64
+----------------------------
-Capability: KVM_CAP_SPAPR_TCE_64
-Architectures: powerpc
-Type: vm ioctl
-Parameters: struct kvm_create_spapr_tce_64 (in)
-Returns: file descriptor for manipulating the created TCE table
+:Capability: KVM_CAP_SPAPR_TCE_64
+:Architectures: powerpc
+:Type: vm ioctl
+:Parameters: struct kvm_create_spapr_tce_64 (in)
+:Returns: file descriptor for manipulating the created TCE table
This is an extension for KVM_CAP_SPAPR_TCE which only supports 32bit
windows, described in 4.62 KVM_CREATE_SPAPR_TCE
-This capability uses extended struct in ioctl interface:
+This capability uses extended struct in ioctl interface::
-/* for KVM_CAP_SPAPR_TCE_64 */
-struct kvm_create_spapr_tce_64 {
+ /* for KVM_CAP_SPAPR_TCE_64 */
+ struct kvm_create_spapr_tce_64 {
__u64 liobn;
__u32 page_shift;
__u32 flags;
__u64 offset; /* in pages */
__u64 size; /* in pages */
-};
+ };
The aim of extension is to support an additional bigger DMA window with
a variable page size.
@@ -3387,12 +3720,13 @@
The rest of functionality is identical to KVM_CREATE_SPAPR_TCE.
4.99 KVM_REINJECT_CONTROL
+-------------------------
-Capability: KVM_CAP_REINJECT_CONTROL
-Architectures: x86
-Type: vm ioctl
-Parameters: struct kvm_reinject_control (in)
-Returns: 0 on success,
+:Capability: KVM_CAP_REINJECT_CONTROL
+:Architectures: x86
+:Type: vm ioctl
+:Parameters: struct kvm_reinject_control (in)
+:Returns: 0 on success,
-EFAULT if struct kvm_reinject_control cannot be read,
-ENXIO if KVM_CREATE_PIT or KVM_CREATE_PIT2 didn't succeed earlier.
@@ -3402,21 +3736,24 @@
interrupt whenever there isn't a pending interrupt from i8254.
!reinject mode injects an interrupt as soon as a tick arrives.
-struct kvm_reinject_control {
+::
+
+ struct kvm_reinject_control {
__u8 pit_reinject;
__u8 reserved[31];
-};
+ };
pit_reinject = 0 (!reinject mode) is recommended, unless running an old
operating system that uses the PIT for timing (e.g. Linux 2.4.x).
4.100 KVM_PPC_CONFIGURE_V3_MMU
+------------------------------
-Capability: KVM_CAP_PPC_RADIX_MMU or KVM_CAP_PPC_HASH_MMU_V3
-Architectures: ppc
-Type: vm ioctl
-Parameters: struct kvm_ppc_mmuv3_cfg (in)
-Returns: 0 on success,
+:Capability: KVM_CAP_PPC_RADIX_MMU or KVM_CAP_PPC_HASH_MMU_V3
+:Architectures: ppc
+:Type: vm ioctl
+:Parameters: struct kvm_ppc_mmuv3_cfg (in)
+:Returns: 0 on success,
-EFAULT if struct kvm_ppc_mmuv3_cfg cannot be read,
-EINVAL if the configuration is invalid
@@ -3424,10 +3761,12 @@
page table) translation, and sets the pointer to the process table for
the guest.
-struct kvm_ppc_mmuv3_cfg {
+::
+
+ struct kvm_ppc_mmuv3_cfg {
__u64 flags;
__u64 process_table;
-};
+ };
There are two bits that can be set in flags; KVM_PPC_MMUV3_RADIX and
KVM_PPC_MMUV3_GTSE. KVM_PPC_MMUV3_RADIX, if set, configures the guest
@@ -3442,12 +3781,13 @@
the Power ISA V3.00, Book III section 5.7.6.1.
4.101 KVM_PPC_GET_RMMU_INFO
+---------------------------
-Capability: KVM_CAP_PPC_RADIX_MMU
-Architectures: ppc
-Type: vm ioctl
-Parameters: struct kvm_ppc_rmmu_info (out)
-Returns: 0 on success,
+:Capability: KVM_CAP_PPC_RADIX_MMU
+:Architectures: ppc
+:Type: vm ioctl
+:Parameters: struct kvm_ppc_rmmu_info (out)
+:Returns: 0 on success,
-EFAULT if struct kvm_ppc_rmmu_info cannot be written,
-EINVAL if no useful information can be returned
@@ -3456,14 +3796,16 @@
page sizes to put in the "AP" (actual page size) field for the tlbie
(TLB invalidate entry) instruction.
-struct kvm_ppc_rmmu_info {
+::
+
+ struct kvm_ppc_rmmu_info {
struct kvm_ppc_radix_geom {
__u8 page_shift;
__u8 level_bits[4];
__u8 pad[3];
} geometries[8];
__u32 ap_encodings[8];
-};
+ };
The geometries[] field gives up to 8 supported geometries for the
radix page table, in terms of the log base 2 of the smallest page
@@ -3476,19 +3818,54 @@
base 2 of the page size in the bottom 6 bits.
4.102 KVM_PPC_RESIZE_HPT_PREPARE
+--------------------------------
-Capability: KVM_CAP_SPAPR_RESIZE_HPT
-Architectures: powerpc
-Type: vm ioctl
-Parameters: struct kvm_ppc_resize_hpt (in)
-Returns: 0 on successful completion,
+:Capability: KVM_CAP_SPAPR_RESIZE_HPT
+:Architectures: powerpc
+:Type: vm ioctl
+:Parameters: struct kvm_ppc_resize_hpt (in)
+:Returns: 0 on successful completion,
>0 if a new HPT is being prepared, the value is an estimated
- number of milliseconds until preparation is complete
+ number of milliseconds until preparation is complete,
-EFAULT if struct kvm_reinject_control cannot be read,
- -EINVAL if the supplied shift or flags are invalid
- -ENOMEM if unable to allocate the new HPT
- -ENOSPC if there was a hash collision when moving existing
- HPT entries to the new HPT
+ -EINVAL if the supplied shift or flags are invalid,
+ -ENOMEM if unable to allocate the new HPT,
+ -ENOSPC if there was a hash collision
+
+::
+
+ struct kvm_ppc_rmmu_info {
+ struct kvm_ppc_radix_geom {
+ __u8 page_shift;
+ __u8 level_bits[4];
+ __u8 pad[3];
+ } geometries[8];
+ __u32 ap_encodings[8];
+ };
+
+The geometries[] field gives up to 8 supported geometries for the
+radix page table, in terms of the log base 2 of the smallest page
+size, and the number of bits indexed at each level of the tree, from
+the PTE level up to the PGD level in that order. Any unused entries
+will have 0 in the page_shift field.
+
+The ap_encodings gives the supported page sizes and their AP field
+encodings, encoded with the AP value in the top 3 bits and the log
+base 2 of the page size in the bottom 6 bits.
+
+4.102 KVM_PPC_RESIZE_HPT_PREPARE
+--------------------------------
+
+:Capability: KVM_CAP_SPAPR_RESIZE_HPT
+:Architectures: powerpc
+:Type: vm ioctl
+:Parameters: struct kvm_ppc_resize_hpt (in)
+:Returns: 0 on successful completion,
+ >0 if a new HPT is being prepared, the value is an estimated
+ number of milliseconds until preparation is complete,
+ -EFAULT if struct kvm_reinject_control cannot be read,
+ -EINVAL if the supplied shift or flags are invalid,when moving existing
+ HPT entries to the new HPT,
-EIO on other error conditions
Used to implement the PAPR extension for runtime resizing of a guest's
@@ -3506,6 +3883,7 @@
creates a new one as above.
If called when there is a pending HPT of the size requested, will:
+
* If preparation of the pending HPT is already complete, return 0
* If preparation of the pending HPT has failed, return an error
code, then discard the pending HPT.
@@ -3522,26 +3900,29 @@
it returns <= 0. The first call will initiate preparation, subsequent
ones will monitor preparation until it completes or fails.
-struct kvm_ppc_resize_hpt {
+::
+
+ struct kvm_ppc_resize_hpt {
__u64 flags;
__u32 shift;
__u32 pad;
-};
+ };
4.103 KVM_PPC_RESIZE_HPT_COMMIT
+-------------------------------
-Capability: KVM_CAP_SPAPR_RESIZE_HPT
-Architectures: powerpc
-Type: vm ioctl
-Parameters: struct kvm_ppc_resize_hpt (in)
-Returns: 0 on successful completion,
+:Capability: KVM_CAP_SPAPR_RESIZE_HPT
+:Architectures: powerpc
+:Type: vm ioctl
+:Parameters: struct kvm_ppc_resize_hpt (in)
+:Returns: 0 on successful completion,
-EFAULT if struct kvm_reinject_control cannot be read,
- -EINVAL if the supplied shift or flags are invalid
+ -EINVAL if the supplied shift or flags are invalid,
-ENXIO is there is no pending HPT, or the pending HPT doesn't
- have the requested size
- -EBUSY if the pending HPT is not fully prepared
+ have the requested size,
+ -EBUSY if the pending HPT is not fully prepared,
-ENOSPC if there was a hash collision when moving existing
- HPT entries to the new HPT
+ HPT entries to the new HPT,
-EIO on other error conditions
Used to implement the PAPR extension for runtime resizing of a guest's
@@ -3564,31 +3945,35 @@
On failure, the guest will still be operating on its previous HPT.
-struct kvm_ppc_resize_hpt {
+::
+
+ struct kvm_ppc_resize_hpt {
__u64 flags;
__u32 shift;
__u32 pad;
-};
+ };
4.104 KVM_X86_GET_MCE_CAP_SUPPORTED
+-----------------------------------
-Capability: KVM_CAP_MCE
-Architectures: x86
-Type: system ioctl
-Parameters: u64 mce_cap (out)
-Returns: 0 on success, -1 on error
+:Capability: KVM_CAP_MCE
+:Architectures: x86
+:Type: system ioctl
+:Parameters: u64 mce_cap (out)
+:Returns: 0 on success, -1 on error
Returns supported MCE capabilities. The u64 mce_cap parameter
has the same format as the MSR_IA32_MCG_CAP register. Supported
capabilities will have the corresponding bits set.
4.105 KVM_X86_SETUP_MCE
+-----------------------
-Capability: KVM_CAP_MCE
-Architectures: x86
-Type: vcpu ioctl
-Parameters: u64 mcg_cap (in)
-Returns: 0 on success,
+:Capability: KVM_CAP_MCE
+:Architectures: x86
+:Type: vcpu ioctl
+:Parameters: u64 mcg_cap (in)
+:Returns: 0 on success,
-EFAULT if u64 mcg_cap cannot be read,
-EINVAL if the requested number of banks is invalid,
-EINVAL if requested MCE capability is not supported.
@@ -3601,20 +3986,21 @@
retrieved with KVM_X86_GET_MCE_CAP_SUPPORTED.
4.106 KVM_X86_SET_MCE
+---------------------
-Capability: KVM_CAP_MCE
-Architectures: x86
-Type: vcpu ioctl
-Parameters: struct kvm_x86_mce (in)
-Returns: 0 on success,
+:Capability: KVM_CAP_MCE
+:Architectures: x86
+:Type: vcpu ioctl
+:Parameters: struct kvm_x86_mce (in)
+:Returns: 0 on success,
-EFAULT if struct kvm_x86_mce cannot be read,
-EINVAL if the bank number is invalid,
-EINVAL if VAL bit is not set in status field.
Inject a machine check error (MCE) into the guest. The input
-parameter is:
+parameter is::
-struct kvm_x86_mce {
+ struct kvm_x86_mce {
__u64 status;
__u64 addr;
__u64 misc;
@@ -3622,7 +4008,7 @@
__u8 bank;
__u8 pad1[7];
__u64 pad2[3];
-};
+ };
If the MCE being reported is an uncorrected error, KVM will
inject it as an MCE exception into the guest. If the guest
@@ -3634,15 +4020,17 @@
not holding a previously reported uncorrected error).
4.107 KVM_S390_GET_CMMA_BITS
+----------------------------
-Capability: KVM_CAP_S390_CMMA_MIGRATION
-Architectures: s390
-Type: vm ioctl
-Parameters: struct kvm_s390_cmma_log (in, out)
-Returns: 0 on success, a negative value on error
+:Capability: KVM_CAP_S390_CMMA_MIGRATION
+:Architectures: s390
+:Type: vm ioctl
+:Parameters: struct kvm_s390_cmma_log (in, out)
+:Returns: 0 on success, a negative value on error
This ioctl is used to get the values of the CMMA bits on the s390
architecture. It is meant to be used in two scenarios:
+
- During live migration to save the CMMA values. Live migration needs
to be enabled via the KVM_REQ_START_MIGRATION VM property.
- To non-destructively peek at the CMMA values, with the flag
@@ -3652,9 +4040,12 @@
values are written to a buffer whose location is indicated via the "values"
member in the kvm_s390_cmma_log struct. The values in the input struct are
also updated as needed.
+
Each CMMA value takes up one byte.
-struct kvm_s390_cmma_log {
+::
+
+ struct kvm_s390_cmma_log {
__u64 start_gfn;
__u32 count;
__u32 flags;
@@ -3663,7 +4054,7 @@
__u64 mask;
};
__u64 values;
-};
+ };
start_gfn is the number of the first guest frame whose CMMA values are
to be retrieved,
@@ -3724,12 +4115,13 @@
present for the addresses (e.g. when using hugepages).
4.108 KVM_S390_SET_CMMA_BITS
+----------------------------
-Capability: KVM_CAP_S390_CMMA_MIGRATION
-Architectures: s390
-Type: vm ioctl
-Parameters: struct kvm_s390_cmma_log (in)
-Returns: 0 on success, a negative value on error
+:Capability: KVM_CAP_S390_CMMA_MIGRATION
+:Architectures: s390
+:Type: vm ioctl
+:Parameters: struct kvm_s390_cmma_log (in)
+:Returns: 0 on success, a negative value on error
This ioctl is used to set the values of the CMMA bits on the s390
architecture. It is meant to be used during live migration to restore
@@ -3737,16 +4129,18 @@
The ioctl takes parameters via the kvm_s390_cmma_values struct.
Each CMMA value takes up one byte.
-struct kvm_s390_cmma_log {
+::
+
+ struct kvm_s390_cmma_log {
__u64 start_gfn;
__u32 count;
__u32 flags;
union {
__u64 remaining;
__u64 mask;
- };
+ };
__u64 values;
-};
+ };
start_gfn indicates the starting guest frame number,
@@ -3769,26 +4163,27 @@
hugepages).
4.109 KVM_PPC_GET_CPU_CHAR
+--------------------------
-Capability: KVM_CAP_PPC_GET_CPU_CHAR
-Architectures: powerpc
-Type: vm ioctl
-Parameters: struct kvm_ppc_cpu_char (out)
-Returns: 0 on successful completion
+:Capability: KVM_CAP_PPC_GET_CPU_CHAR
+:Architectures: powerpc
+:Type: vm ioctl
+:Parameters: struct kvm_ppc_cpu_char (out)
+:Returns: 0 on successful completion,
-EFAULT if struct kvm_ppc_cpu_char cannot be written
This ioctl gives userspace information about certain characteristics
of the CPU relating to speculative execution of instructions and
possible information leakage resulting from speculative execution (see
CVE-2017-5715, CVE-2017-5753 and CVE-2017-5754). The information is
-returned in struct kvm_ppc_cpu_char, which looks like this:
+returned in struct kvm_ppc_cpu_char, which looks like this::
-struct kvm_ppc_cpu_char {
+ struct kvm_ppc_cpu_char {
__u64 character; /* characteristics of the CPU */
__u64 behaviour; /* recommended software behaviour */
__u64 character_mask; /* valid bits in character */
__u64 behaviour_mask; /* valid bits in behaviour */
-};
+ };
For extensibility, the character_mask and behaviour_mask fields
indicate which bits of character and behaviour have been filled in by
@@ -3815,12 +4210,13 @@
H_GET_CPU_CHARACTERISTICS hypercall.
4.110 KVM_MEMORY_ENCRYPT_OP
+---------------------------
-Capability: basic
-Architectures: x86
-Type: system
-Parameters: an opaque platform specific structure (in/out)
-Returns: 0 on success; -1 on error
+:Capability: basic
+:Architectures: x86
+:Type: system
+:Parameters: an opaque platform specific structure (in/out)
+:Returns: 0 on success; -1 on error
If the platform supports creating encrypted VMs then this ioctl can be used
for issuing platform-specific memory encryption commands to manage those
@@ -3831,12 +4227,13 @@
Documentation/virt/kvm/amd-memory-encryption.rst.
4.111 KVM_MEMORY_ENCRYPT_REG_REGION
+-----------------------------------
-Capability: basic
-Architectures: x86
-Type: system
-Parameters: struct kvm_enc_region (in)
-Returns: 0 on success; -1 on error
+:Capability: basic
+:Architectures: x86
+:Type: system
+:Parameters: struct kvm_enc_region (in)
+:Returns: 0 on success; -1 on error
This ioctl can be used to register a guest memory region which may
contain encrypted data (e.g. guest RAM, SMRAM etc).
@@ -3854,60 +4251,71 @@
memory region registered with the ioctl.
4.112 KVM_MEMORY_ENCRYPT_UNREG_REGION
+-------------------------------------
-Capability: basic
-Architectures: x86
-Type: system
-Parameters: struct kvm_enc_region (in)
-Returns: 0 on success; -1 on error
+:Capability: basic
+:Architectures: x86
+:Type: system
+:Parameters: struct kvm_enc_region (in)
+:Returns: 0 on success; -1 on error
This ioctl can be used to unregister the guest memory region registered
with KVM_MEMORY_ENCRYPT_REG_REGION ioctl above.
4.113 KVM_HYPERV_EVENTFD
+------------------------
-Capability: KVM_CAP_HYPERV_EVENTFD
-Architectures: x86
-Type: vm ioctl
-Parameters: struct kvm_hyperv_eventfd (in)
+:Capability: KVM_CAP_HYPERV_EVENTFD
+:Architectures: x86
+:Type: vm ioctl
+:Parameters: struct kvm_hyperv_eventfd (in)
This ioctl (un)registers an eventfd to receive notifications from the guest on
the specified Hyper-V connection id through the SIGNAL_EVENT hypercall, without
causing a user exit. SIGNAL_EVENT hypercall with non-zero event flag number
(bits 24-31) still triggers a KVM_EXIT_HYPERV_HCALL user exit.
-struct kvm_hyperv_eventfd {
+::
+
+ struct kvm_hyperv_eventfd {
__u32 conn_id;
__s32 fd;
__u32 flags;
__u32 padding[3];
-};
+ };
-The conn_id field should fit within 24 bits:
+The conn_id field should fit within 24 bits::
-#define KVM_HYPERV_CONN_ID_MASK 0x00ffffff
+ #define KVM_HYPERV_CONN_ID_MASK 0x00ffffff
-The acceptable values for the flags field are:
+The acceptable values for the flags field are::
-#define KVM_HYPERV_EVENTFD_DEASSIGN (1 << 0)
+ #define KVM_HYPERV_EVENTFD_DEASSIGN (1 << 0)
-Returns: 0 on success,
- -EINVAL if conn_id or flags is outside the allowed range
- -ENOENT on deassign if the conn_id isn't registered
- -EEXIST on assign if the conn_id is already registered
+:Returns: 0 on success,
+ -EINVAL if conn_id or flags is outside the allowed range,
+ -ENOENT on deassign if the conn_id isn't registered,
+ -EEXIST on assign if the conn_id is already registered
4.114 KVM_GET_NESTED_STATE
+--------------------------
-Capability: KVM_CAP_NESTED_STATE
-Architectures: x86
-Type: vcpu ioctl
-Parameters: struct kvm_nested_state (in/out)
-Returns: 0 on success, -1 on error
+:Capability: KVM_CAP_NESTED_STATE
+:Architectures: x86
+:Type: vcpu ioctl
+:Parameters: struct kvm_nested_state (in/out)
+:Returns: 0 on success, -1 on error
+
Errors:
- E2BIG: the total state size exceeds the value of 'size' specified by
- the user; the size required will be written into size.
-struct kvm_nested_state {
+ ===== =============================================================
+ E2BIG the total state size exceeds the value of 'size' specified by
+ the user; the size required will be written into size.
+ ===== =============================================================
+
+::
+
+ struct kvm_nested_state {
__u16 flags;
__u16 format;
__u32 size;
@@ -3924,33 +4332,33 @@
struct kvm_vmx_nested_state_data vmx[0];
struct kvm_svm_nested_state_data svm[0];
} data;
-};
+ };
-#define KVM_STATE_NESTED_GUEST_MODE 0x00000001
-#define KVM_STATE_NESTED_RUN_PENDING 0x00000002
-#define KVM_STATE_NESTED_EVMCS 0x00000004
+ #define KVM_STATE_NESTED_GUEST_MODE 0x00000001
+ #define KVM_STATE_NESTED_RUN_PENDING 0x00000002
+ #define KVM_STATE_NESTED_EVMCS 0x00000004
-#define KVM_STATE_NESTED_FORMAT_VMX 0
-#define KVM_STATE_NESTED_FORMAT_SVM 1
+ #define KVM_STATE_NESTED_FORMAT_VMX 0
+ #define KVM_STATE_NESTED_FORMAT_SVM 1
-#define KVM_STATE_NESTED_VMX_VMCS_SIZE 0x1000
+ #define KVM_STATE_NESTED_VMX_VMCS_SIZE 0x1000
-#define KVM_STATE_NESTED_VMX_SMM_GUEST_MODE 0x00000001
-#define KVM_STATE_NESTED_VMX_SMM_VMXON 0x00000002
+ #define KVM_STATE_NESTED_VMX_SMM_GUEST_MODE 0x00000001
+ #define KVM_STATE_NESTED_VMX_SMM_VMXON 0x00000002
-struct kvm_vmx_nested_state_hdr {
+ struct kvm_vmx_nested_state_hdr {
__u64 vmxon_pa;
__u64 vmcs12_pa;
struct {
__u16 flags;
} smm;
-};
+ };
-struct kvm_vmx_nested_state_data {
+ struct kvm_vmx_nested_state_data {
__u8 vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE];
__u8 shadow_vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE];
-};
+ };
This ioctl copies the vcpu's nested virtualization state from the kernel to
userspace.
@@ -3959,24 +4367,26 @@
to the KVM_CHECK_EXTENSION ioctl().
4.115 KVM_SET_NESTED_STATE
+--------------------------
-Capability: KVM_CAP_NESTED_STATE
-Architectures: x86
-Type: vcpu ioctl
-Parameters: struct kvm_nested_state (in)
-Returns: 0 on success, -1 on error
+:Capability: KVM_CAP_NESTED_STATE
+:Architectures: x86
+:Type: vcpu ioctl
+:Parameters: struct kvm_nested_state (in)
+:Returns: 0 on success, -1 on error
This copies the vcpu's kvm_nested_state struct from userspace to the kernel.
For the definition of struct kvm_nested_state, see KVM_GET_NESTED_STATE.
4.116 KVM_(UN)REGISTER_COALESCED_MMIO
+-------------------------------------
-Capability: KVM_CAP_COALESCED_MMIO (for coalesced mmio)
- KVM_CAP_COALESCED_PIO (for coalesced pio)
-Architectures: all
-Type: vm ioctl
-Parameters: struct kvm_coalesced_mmio_zone
-Returns: 0 on success, < 0 on error
+:Capability: KVM_CAP_COALESCED_MMIO (for coalesced mmio)
+ KVM_CAP_COALESCED_PIO (for coalesced pio)
+:Architectures: all
+:Type: vm ioctl
+:Parameters: struct kvm_coalesced_mmio_zone
+:Returns: 0 on success, < 0 on error
Coalesced I/O is a performance optimization that defers hardware
register write emulation so that userspace exits are avoided. It is
@@ -3998,15 +4408,18 @@
to I/O ports.
4.117 KVM_CLEAR_DIRTY_LOG (vm ioctl)
+------------------------------------
-Capability: KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2
-Architectures: x86, arm, arm64, mips
-Type: vm ioctl
-Parameters: struct kvm_dirty_log (in)
-Returns: 0 on success, -1 on error
+:Capability: KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2
+:Architectures: x86, arm, arm64, mips
+:Type: vm ioctl
+:Parameters: struct kvm_dirty_log (in)
+:Returns: 0 on success, -1 on error
-/* for KVM_CLEAR_DIRTY_LOG */
-struct kvm_clear_dirty_log {
+::
+
+ /* for KVM_CLEAR_DIRTY_LOG */
+ struct kvm_clear_dirty_log {
__u32 slot;
__u32 num_pages;
__u64 first_page;
@@ -4014,7 +4427,7 @@
void __user *dirty_bitmap; /* one bit per page */
__u64 padding;
};
-};
+ };
The ioctl clears the dirty status of pages in a memory slot, according to
the bitmap that is passed in struct kvm_clear_dirty_log's dirty_bitmap
@@ -4038,20 +4451,23 @@
that KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 is present.
4.118 KVM_GET_SUPPORTED_HV_CPUID
+--------------------------------
-Capability: KVM_CAP_HYPERV_CPUID
-Architectures: x86
-Type: vcpu ioctl
-Parameters: struct kvm_cpuid2 (in/out)
-Returns: 0 on success, -1 on error
+:Capability: KVM_CAP_HYPERV_CPUID
+:Architectures: x86
+:Type: vcpu ioctl
+:Parameters: struct kvm_cpuid2 (in/out)
+:Returns: 0 on success, -1 on error
-struct kvm_cpuid2 {
+::
+
+ struct kvm_cpuid2 {
__u32 nent;
__u32 padding;
struct kvm_cpuid_entry2 entries[0];
-};
+ };
-struct kvm_cpuid_entry2 {
+ struct kvm_cpuid_entry2 {
__u32 function;
__u32 index;
__u32 flags;
@@ -4060,7 +4476,7 @@
__u32 ecx;
__u32 edx;
__u32 padding[3];
-};
+ };
This ioctl returns x86 cpuid features leaves related to Hyper-V emulation in
KVM. Userspace can use the information returned by this ioctl to construct
@@ -4073,13 +4489,13 @@
leaves (0x40000000, 0x40000001).
Currently, the following list of CPUID leaves are returned:
- HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS
- HYPERV_CPUID_INTERFACE
- HYPERV_CPUID_VERSION
- HYPERV_CPUID_FEATURES
- HYPERV_CPUID_ENLIGHTMENT_INFO
- HYPERV_CPUID_IMPLEMENT_LIMITS
- HYPERV_CPUID_NESTED_FEATURES
+ - HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS
+ - HYPERV_CPUID_INTERFACE
+ - HYPERV_CPUID_VERSION
+ - HYPERV_CPUID_FEATURES
+ - HYPERV_CPUID_ENLIGHTMENT_INFO
+ - HYPERV_CPUID_IMPLEMENT_LIMITS
+ - HYPERV_CPUID_NESTED_FEATURES
HYPERV_CPUID_NESTED_FEATURES leaf is only exposed when Enlightened VMCS was
enabled on the corresponding vCPU (KVM_CAP_HYPERV_ENLIGHTENED_VMCS).
@@ -4095,17 +4511,25 @@
userspace should not expect to get any particular value there.
4.119 KVM_ARM_VCPU_FINALIZE
+---------------------------
-Architectures: arm, arm64
-Type: vcpu ioctl
-Parameters: int feature (in)
-Returns: 0 on success, -1 on error
+:Architectures: arm, arm64
+:Type: vcpu ioctl
+:Parameters: int feature (in)
+:Returns: 0 on success, -1 on error
+
Errors:
- EPERM: feature not enabled, needs configuration, or already finalized
- EINVAL: feature unknown or not present
+
+ ====== ==============================================================
+ EPERM feature not enabled, needs configuration, or already finalized
+ EINVAL feature unknown or not present
+ ====== ==============================================================
Recognised values for feature:
+
+ ===== ===========================================
arm64 KVM_ARM_VCPU_SVE (requires KVM_CAP_ARM_SVE)
+ ===== ===========================================
Finalizes the configuration of the specified vcpu feature.
@@ -4129,21 +4553,24 @@
using this ioctl.
4.120 KVM_SET_PMU_EVENT_FILTER
+------------------------------
-Capability: KVM_CAP_PMU_EVENT_FILTER
-Architectures: x86
-Type: vm ioctl
-Parameters: struct kvm_pmu_event_filter (in)
-Returns: 0 on success, -1 on error
+:Capability: KVM_CAP_PMU_EVENT_FILTER
+:Architectures: x86
+:Type: vm ioctl
+:Parameters: struct kvm_pmu_event_filter (in)
+:Returns: 0 on success, -1 on error
-struct kvm_pmu_event_filter {
+::
+
+ struct kvm_pmu_event_filter {
__u32 action;
__u32 nevents;
__u32 fixed_counter_bitmap;
__u32 flags;
__u32 pad[4];
__u64 events[0];
-};
+ };
This ioctl restricts the set of PMU events that the guest can program.
The argument holds a list of events which will be allowed or denied.
@@ -4154,20 +4581,26 @@
No flags are defined yet, the field must be zero.
-Valid values for 'action':
-#define KVM_PMU_EVENT_ALLOW 0
-#define KVM_PMU_EVENT_DENY 1
+Valid values for 'action'::
+
+ #define KVM_PMU_EVENT_ALLOW 0
+ #define KVM_PMU_EVENT_DENY 1
4.121 KVM_PPC_SVM_OFF
+---------------------
-Capability: basic
-Architectures: powerpc
-Type: vm ioctl
-Parameters: none
-Returns: 0 on successful completion,
+:Capability: basic
+:Architectures: powerpc
+:Type: vm ioctl
+:Parameters: none
+:Returns: 0 on successful completion,
+
Errors:
- EINVAL: if ultravisor failed to terminate the secure guest
- ENOMEM: if hypervisor failed to allocate new radix page tables for guest
+
+ ====== ================================================================
+ EINVAL if ultravisor failed to terminate the secure guest
+ ENOMEM if hypervisor failed to allocate new radix page tables for guest
+ ====== ================================================================
This ioctl is used to turn off the secure mode of the guest or transition
the guest from secure mode to normal mode. This is invoked when the guest
@@ -4178,35 +4611,38 @@
track the secure pages by hypervisor.
4.122 KVM_S390_NORMAL_RESET
+---------------------------
-Capability: KVM_CAP_S390_VCPU_RESETS
-Architectures: s390
-Type: vcpu ioctl
-Parameters: none
-Returns: 0
+:Capability: KVM_CAP_S390_VCPU_RESETS
+:Architectures: s390
+:Type: vcpu ioctl
+:Parameters: none
+:Returns: 0
This ioctl resets VCPU registers and control structures according to
the cpu reset definition in the POP (Principles Of Operation).
4.123 KVM_S390_INITIAL_RESET
+----------------------------
-Capability: none
-Architectures: s390
-Type: vcpu ioctl
-Parameters: none
-Returns: 0
+:Capability: none
+:Architectures: s390
+:Type: vcpu ioctl
+:Parameters: none
+:Returns: 0
This ioctl resets VCPU registers and control structures according to
the initial cpu reset definition in the POP. However, the cpu is not
put into ESA mode. This reset is a superset of the normal reset.
4.124 KVM_S390_CLEAR_RESET
+--------------------------
-Capability: KVM_CAP_S390_VCPU_RESETS
-Architectures: s390
-Type: vcpu ioctl
-Parameters: none
-Returns: 0
+:Capability: KVM_CAP_S390_VCPU_RESETS
+:Architectures: s390
+:Type: vcpu ioctl
+:Parameters: none
+:Returns: 0
This ioctl resets VCPU registers and control structures according to
the clear cpu reset definition in the POP. However, the cpu is not put
@@ -4214,7 +4650,7 @@
5. The kvm_run structure
-------------------------
+========================
Application code obtains a pointer to the kvm_run structure by
mmap()ing a vcpu fd. From that point, application code can control
@@ -4222,13 +4658,17 @@
ioctl, and obtain information about the reason KVM_RUN returned by
looking up structure members.
-struct kvm_run {
+::
+
+ struct kvm_run {
/* in */
__u8 request_interrupt_window;
Request that KVM_RUN return when it becomes possible to inject external
interrupts into the guest. Useful in conjunction with KVM_INTERRUPT.
+::
+
__u8 immediate_exit;
This field is polled once when KVM_RUN starts; if non-zero, KVM_RUN
@@ -4240,6 +4680,8 @@
This field is ignored if KVM_CAP_IMMEDIATE_EXIT is not available.
+::
+
__u8 padding1[6];
/* out */
@@ -4249,16 +4691,22 @@
application code why KVM_RUN has returned. Allowable values for this
field are detailed below.
+::
+
__u8 ready_for_interrupt_injection;
If request_interrupt_window has been specified, this field indicates
an interrupt can be injected now with KVM_INTERRUPT.
+::
+
__u8 if_flag;
The value of the current interrupt flag. Only valid if in-kernel
local APIC is not used.
+::
+
__u16 flags;
More architecture-specific flags detailing state of the VCPU that may
@@ -4266,17 +4714,23 @@
KVM_RUN_X86_SMM, which is valid on x86 machines and is set if the
VCPU is in system management mode.
+::
+
/* in (pre_kvm_run), out (post_kvm_run) */
__u64 cr8;
The value of the cr8 register. Only valid if in-kernel local APIC is
not used. Both input and output.
+::
+
__u64 apic_base;
The value of the APIC BASE msr. Only valid if in-kernel local
APIC is not used. Both input and output.
+::
+
union {
/* KVM_EXIT_UNKNOWN */
struct {
@@ -4287,6 +4741,8 @@
reasons. Further architecture-specific information is available in
hardware_exit_reason.
+::
+
/* KVM_EXIT_FAIL_ENTRY */
struct {
__u64 hardware_entry_failure_reason;
@@ -4296,6 +4752,8 @@
to unknown reasons. Further architecture-specific information is
available in hardware_entry_failure_reason.
+::
+
/* KVM_EXIT_EXCEPTION */
struct {
__u32 exception;
@@ -4304,10 +4762,12 @@
Unused.
+::
+
/* KVM_EXIT_IO */
struct {
-#define KVM_EXIT_IO_IN 0
-#define KVM_EXIT_IO_OUT 1
+ #define KVM_EXIT_IO_IN 0
+ #define KVM_EXIT_IO_OUT 1
__u8 direction;
__u8 size; /* bytes */
__u16 port;
@@ -4321,6 +4781,8 @@
where kvm expects application code to place the data for the next
KVM_RUN invocation (KVM_EXIT_IO_IN). Data format is a packed array.
+::
+
/* KVM_EXIT_DEBUG */
struct {
struct kvm_debug_exit_arch arch;
@@ -4329,6 +4791,8 @@
If the exit_reason is KVM_EXIT_DEBUG, then a vcpu is processing a debug event
for which architecture specific information is returned.
+::
+
/* KVM_EXIT_MMIO */
struct {
__u64 phys_addr;
@@ -4346,14 +4810,19 @@
appear if the VCPU performed a load or store of the appropriate width directly
to the byte array.
-NOTE: For KVM_EXIT_IO, KVM_EXIT_MMIO, KVM_EXIT_OSI, KVM_EXIT_PAPR and
+.. note::
+
+ For KVM_EXIT_IO, KVM_EXIT_MMIO, KVM_EXIT_OSI, KVM_EXIT_PAPR and
KVM_EXIT_EPR the corresponding
+
operations are complete (and guest state is consistent) only after userspace
has re-entered the kernel with KVM_RUN. The kernel side will first finish
incomplete operations and then check for pending signals. Userspace
can re-enter the guest with an unmasked signal pending to complete
pending operations.
+::
+
/* KVM_EXIT_HYPERCALL */
struct {
__u64 nr;
@@ -4365,7 +4834,10 @@
Unused. This was once used for 'hypercall to userspace'. To implement
such functionality, use KVM_EXIT_IO (x86) or KVM_EXIT_MMIO (all except s390).
-Note KVM_EXIT_IO is significantly faster than KVM_EXIT_MMIO.
+
+.. note:: KVM_EXIT_IO is significantly faster than KVM_EXIT_MMIO.
+
+::
/* KVM_EXIT_TPR_ACCESS */
struct {
@@ -4376,6 +4848,8 @@
To be documented (KVM_TPR_ACCESS_REPORTING).
+::
+
/* KVM_EXIT_S390_SIEIC */
struct {
__u8 icptcode;
@@ -4387,16 +4861,20 @@
s390 specific.
+::
+
/* KVM_EXIT_S390_RESET */
-#define KVM_S390_RESET_POR 1
-#define KVM_S390_RESET_CLEAR 2
-#define KVM_S390_RESET_SUBSYSTEM 4
-#define KVM_S390_RESET_CPU_INIT 8
-#define KVM_S390_RESET_IPL 16
+ #define KVM_S390_RESET_POR 1
+ #define KVM_S390_RESET_CLEAR 2
+ #define KVM_S390_RESET_SUBSYSTEM 4
+ #define KVM_S390_RESET_CPU_INIT 8
+ #define KVM_S390_RESET_IPL 16
__u64 s390_reset_flags;
s390 specific.
+::
+
/* KVM_EXIT_S390_UCONTROL */
struct {
__u64 trans_exc_code;
@@ -4411,6 +4889,8 @@
Principles of Operation Book in the Chapter for Dynamic Address Translation
(DAT)
+::
+
/* KVM_EXIT_DCR */
struct {
__u32 dcrn;
@@ -4420,6 +4900,8 @@
Deprecated - was used for 440 KVM.
+::
+
/* KVM_EXIT_OSI */
struct {
__u64 gprs[32];
@@ -4433,6 +4915,8 @@
necessary. Upon guest entry all guest GPRs will then be replaced by the values
in this struct.
+::
+
/* KVM_EXIT_PAPR_HCALL */
struct {
__u64 nr;
@@ -4450,6 +4934,8 @@
Requirements (PAPR) document available from www.power.org (free
developer registration required to access it).
+::
+
/* KVM_EXIT_S390_TSCH */
struct {
__u16 subchannel_id;
@@ -4466,6 +4952,8 @@
subchannel_nr, io_int_parm and io_int_word contain the parameters for that
interrupt. ipb is needed for instruction parameter decoding.
+::
+
/* KVM_EXIT_EPR */
struct {
__u32 epr;
@@ -4485,11 +4973,13 @@
external interrupt has just been delivered into the guest. User space
should put the acknowledged interrupt vector into the 'epr' field.
+::
+
/* KVM_EXIT_SYSTEM_EVENT */
struct {
-#define KVM_SYSTEM_EVENT_SHUTDOWN 1
-#define KVM_SYSTEM_EVENT_RESET 2
-#define KVM_SYSTEM_EVENT_CRASH 3
+ #define KVM_SYSTEM_EVENT_SHUTDOWN 1
+ #define KVM_SYSTEM_EVENT_RESET 2
+ #define KVM_SYSTEM_EVENT_CRASH 3
__u32 type;
__u64 flags;
} system_event;
@@ -4502,18 +4992,21 @@
specific flags for the system-level event.
Valid values for 'type' are:
- KVM_SYSTEM_EVENT_SHUTDOWN -- the guest has requested a shutdown of the
+
+ - KVM_SYSTEM_EVENT_SHUTDOWN -- the guest has requested a shutdown of the
VM. Userspace is not obliged to honour this, and if it does honour
this does not need to destroy the VM synchronously (ie it may call
KVM_RUN again before shutdown finally occurs).
- KVM_SYSTEM_EVENT_RESET -- the guest has requested a reset of the VM.
+ - KVM_SYSTEM_EVENT_RESET -- the guest has requested a reset of the VM.
As with SHUTDOWN, userspace can choose to ignore the request, or
to schedule the reset to occur in the future and may call KVM_RUN again.
- KVM_SYSTEM_EVENT_CRASH -- the guest crash occurred and the guest
+ - KVM_SYSTEM_EVENT_CRASH -- the guest crash occurred and the guest
has requested a crash condition maintenance. Userspace can choose
to ignore the request, or to gather VM memory core dump and/or
reset/shutdown of the VM.
+::
+
/* KVM_EXIT_IOAPIC_EOI */
struct {
__u8 vector;
@@ -4526,9 +5019,11 @@
it is still asserted. Vector is the LAPIC interrupt vector for which the
EOI was received.
+::
+
struct kvm_hyperv_exit {
-#define KVM_EXIT_HYPERV_SYNIC 1
-#define KVM_EXIT_HYPERV_HCALL 2
+ #define KVM_EXIT_HYPERV_SYNIC 1
+ #define KVM_EXIT_HYPERV_HCALL 2
__u32 type;
union {
struct {
@@ -4546,14 +5041,20 @@
};
/* KVM_EXIT_HYPERV */
struct kvm_hyperv_exit hyperv;
+
Indicates that the VCPU exits into userspace to process some tasks
related to Hyper-V emulation.
+
Valid values for 'type' are:
- KVM_EXIT_HYPERV_SYNIC -- synchronously notify user-space about
+
+ - KVM_EXIT_HYPERV_SYNIC -- synchronously notify user-space about
+
Hyper-V SynIC state change. Notification is used to remap SynIC
event/message pages and to enable/disable SynIC messages/events processing
in userspace.
+::
+
/* KVM_EXIT_ARM_NISV */
struct {
__u64 esr_iss;
@@ -4587,6 +5088,8 @@
KVM_EXIT_MMIO, but userspace has to emulate any change to the processing state
if it decides to decode and emulate the instruction.
+::
+
/* Fix the size of the union. */
char padding[256];
};
@@ -4611,18 +5114,20 @@
Userspace can query the validity of the structure by checking
kvm_valid_regs for specific bits. These bits are architecture specific
and usually define the validity of a groups of registers. (e.g. one bit
- for general purpose registers)
+for general purpose registers)
Please note that the kernel is allowed to use the kvm_run structure as the
primary storage for certain register types. Therefore, the kernel may use the
values in kvm_run even if the corresponding bit in kvm_dirty_regs is not set.
-};
+::
+
+ };
6. Capabilities that can be enabled on vCPUs
---------------------------------------------
+============================================
There are certain capabilities that change the behavior of the virtual CPU or
the virtual machine when enabled. To enable them, please see section 4.37.
@@ -4631,23 +5136,28 @@
The following information is provided along with the description:
- Architectures: which instruction set architectures provide this ioctl.
+ Architectures:
+ which instruction set architectures provide this ioctl.
x86 includes both i386 and x86_64.
- Target: whether this is a per-vcpu or per-vm capability.
+ Target:
+ whether this is a per-vcpu or per-vm capability.
- Parameters: what parameters are accepted by the capability.
+ Parameters:
+ what parameters are accepted by the capability.
- Returns: the return value. General error numbers (EBADF, ENOMEM, EINVAL)
+ Returns:
+ the return value. General error numbers (EBADF, ENOMEM, EINVAL)
are not detailed, but errors with specific meanings are.
6.1 KVM_CAP_PPC_OSI
+-------------------
-Architectures: ppc
-Target: vcpu
-Parameters: none
-Returns: 0 on success; -1 on error
+:Architectures: ppc
+:Target: vcpu
+:Parameters: none
+:Returns: 0 on success; -1 on error
This capability enables interception of OSI hypercalls that otherwise would
be treated as normal system calls to be injected into the guest. OSI hypercalls
@@ -4658,11 +5168,12 @@
6.2 KVM_CAP_PPC_PAPR
+--------------------
-Architectures: ppc
-Target: vcpu
-Parameters: none
-Returns: 0 on success; -1 on error
+:Architectures: ppc
+:Target: vcpu
+:Parameters: none
+:Returns: 0 on success; -1 on error
This capability enables interception of PAPR hypercalls. PAPR hypercalls are
done using the hypercall instruction "sc 1".
@@ -4678,18 +5189,21 @@
6.3 KVM_CAP_SW_TLB
+------------------
-Architectures: ppc
-Target: vcpu
-Parameters: args[0] is the address of a struct kvm_config_tlb
-Returns: 0 on success; -1 on error
+:Architectures: ppc
+:Target: vcpu
+:Parameters: args[0] is the address of a struct kvm_config_tlb
+:Returns: 0 on success; -1 on error
-struct kvm_config_tlb {
+::
+
+ struct kvm_config_tlb {
__u64 params;
__u64 array;
__u32 mmu_type;
__u32 array_len;
-};
+ };
Configures the virtual CPU's TLB array, establishing a shared memory area
between userspace and KVM. The "params" and "array" fields are userspace
@@ -4708,6 +5222,7 @@
on this vcpu.
For mmu types KVM_MMU_FSL_BOOKE_NOHV and KVM_MMU_FSL_BOOKE_HV:
+
- The "params" field is of type "struct kvm_book3e_206_tlb_params".
- The "array" field points to an array of type "struct
kvm_book3e_206_tlb_entry".
@@ -4721,11 +5236,12 @@
hardware ignores this value for TLB0.
6.4 KVM_CAP_S390_CSS_SUPPORT
+----------------------------
-Architectures: s390
-Target: vcpu
-Parameters: none
-Returns: 0 on success; -1 on error
+:Architectures: s390
+:Target: vcpu
+:Parameters: none
+:Returns: 0 on success; -1 on error
This capability enables support for handling of channel I/O instructions.
@@ -4739,11 +5255,12 @@
virtual machine is affected.
6.5 KVM_CAP_PPC_EPR
+-------------------
-Architectures: ppc
-Target: vcpu
-Parameters: args[0] defines whether the proxy facility is active
-Returns: 0 on success; -1 on error
+:Architectures: ppc
+:Target: vcpu
+:Parameters: args[0] defines whether the proxy facility is active
+:Returns: 0 on success; -1 on error
This capability enables or disables the delivery of interrupts through the
external proxy facility.
@@ -4757,62 +5274,70 @@
When this capability is enabled, KVM_EXIT_EPR can occur.
6.6 KVM_CAP_IRQ_MPIC
+--------------------
-Architectures: ppc
-Parameters: args[0] is the MPIC device fd
- args[1] is the MPIC CPU number for this vcpu
+:Architectures: ppc
+:Parameters: args[0] is the MPIC device fd;
+ args[1] is the MPIC CPU number for this vcpu
This capability connects the vcpu to an in-kernel MPIC device.
6.7 KVM_CAP_IRQ_XICS
+--------------------
-Architectures: ppc
-Target: vcpu
-Parameters: args[0] is the XICS device fd
- args[1] is the XICS CPU number (server ID) for this vcpu
+:Architectures: ppc
+:Target: vcpu
+:Parameters: args[0] is the XICS device fd;
+ args[1] is the XICS CPU number (server ID) for this vcpu
This capability connects the vcpu to an in-kernel XICS device.
6.8 KVM_CAP_S390_IRQCHIP
+------------------------
-Architectures: s390
-Target: vm
-Parameters: none
+:Architectures: s390
+:Target: vm
+:Parameters: none
This capability enables the in-kernel irqchip for s390. Please refer to
"4.24 KVM_CREATE_IRQCHIP" for details.
6.9 KVM_CAP_MIPS_FPU
+--------------------
-Architectures: mips
-Target: vcpu
-Parameters: args[0] is reserved for future use (should be 0).
+:Architectures: mips
+:Target: vcpu
+:Parameters: args[0] is reserved for future use (should be 0).
This capability allows the use of the host Floating Point Unit by the guest. It
allows the Config1.FP bit to be set to enable the FPU in the guest. Once this is
-done the KVM_REG_MIPS_FPR_* and KVM_REG_MIPS_FCR_* registers can be accessed
-(depending on the current guest FPU register mode), and the Status.FR,
+done the ``KVM_REG_MIPS_FPR_*`` and ``KVM_REG_MIPS_FCR_*`` registers can be
+accessed (depending on the current guest FPU register mode), and the Status.FR,
Config5.FRE bits are accessible via the KVM API and also from the guest,
depending on them being supported by the FPU.
6.10 KVM_CAP_MIPS_MSA
+---------------------
-Architectures: mips
-Target: vcpu
-Parameters: args[0] is reserved for future use (should be 0).
+:Architectures: mips
+:Target: vcpu
+:Parameters: args[0] is reserved for future use (should be 0).
This capability allows the use of the MIPS SIMD Architecture (MSA) by the guest.
It allows the Config3.MSAP bit to be set to enable the use of MSA by the guest.
-Once this is done the KVM_REG_MIPS_VEC_* and KVM_REG_MIPS_MSA_* registers can be
-accessed, and the Config5.MSAEn bit is accessible via the KVM API and also from
-the guest.
+Once this is done the ``KVM_REG_MIPS_VEC_*`` and ``KVM_REG_MIPS_MSA_*``
+registers can be accessed, and the Config5.MSAEn bit is accessible via the
+KVM API and also from the guest.
6.74 KVM_CAP_SYNC_REGS
-Architectures: s390, x86
-Target: s390: always enabled, x86: vcpu
-Parameters: none
-Returns: x86: KVM_CHECK_EXTENSION returns a bit-array indicating which register
-sets are supported (bitfields defined in arch/x86/include/uapi/asm/kvm.h).
+----------------------
+
+:Architectures: s390, x86
+:Target: s390: always enabled, x86: vcpu
+:Parameters: none
+:Returns: x86: KVM_CHECK_EXTENSION returns a bit-array indicating which register
+ sets are supported
+ (bitfields defined in arch/x86/include/uapi/asm/kvm.h).
As described above in the kvm_sync_regs struct info in section 5 (kvm_run):
KVM_CAP_SYNC_REGS "allow[s] userspace to access certain guest registers
@@ -4825,6 +5350,7 @@
For s390 specifics, please refer to the source code.
For x86:
+
- the register sets to be copied out to kvm_run are selectable
by userspace (rather that all sets being copied out for every exit).
- vcpu_events are available in addition to regs and sregs.
@@ -4841,23 +5367,26 @@
Unused bitfields in the bitarrays must be set to zero.
-struct kvm_sync_regs {
+::
+
+ struct kvm_sync_regs {
struct kvm_regs regs;
struct kvm_sregs sregs;
struct kvm_vcpu_events events;
-};
+ };
6.75 KVM_CAP_PPC_IRQ_XIVE
+-------------------------
-Architectures: ppc
-Target: vcpu
-Parameters: args[0] is the XIVE device fd
- args[1] is the XIVE CPU number (server ID) for this vcpu
+:Architectures: ppc
+:Target: vcpu
+:Parameters: args[0] is the XIVE device fd;
+ args[1] is the XIVE CPU number (server ID) for this vcpu
This capability connects the vcpu to an in-kernel XIVE device.
7. Capabilities that can be enabled on VMs
-------------------------------------------
+==========================================
There are certain capabilities that change the behavior of the virtual
machine when enabled. To enable them, please see section 4.37. Below
@@ -4866,20 +5395,24 @@
The following information is provided along with the description:
- Architectures: which instruction set architectures provide this ioctl.
+ Architectures:
+ which instruction set architectures provide this ioctl.
x86 includes both i386 and x86_64.
- Parameters: what parameters are accepted by the capability.
+ Parameters:
+ what parameters are accepted by the capability.
- Returns: the return value. General error numbers (EBADF, ENOMEM, EINVAL)
+ Returns:
+ the return value. General error numbers (EBADF, ENOMEM, EINVAL)
are not detailed, but errors with specific meanings are.
7.1 KVM_CAP_PPC_ENABLE_HCALL
+----------------------------
-Architectures: ppc
-Parameters: args[0] is the sPAPR hcall number
- args[1] is 0 to disable, 1 to enable in-kernel handling
+:Architectures: ppc
+:Parameters: args[0] is the sPAPR hcall number;
+ args[1] is 0 to disable, 1 to enable in-kernel handling
This capability controls whether individual sPAPR hypercalls (hcalls)
get handled by the kernel or not. Enabling or disabling in-kernel
@@ -4897,13 +5430,15 @@
error.
7.2 KVM_CAP_S390_USER_SIGP
+--------------------------
-Architectures: s390
-Parameters: none
+:Architectures: s390
+:Parameters: none
This capability controls which SIGP orders will be handled completely in user
space. With this capability enabled, all fast orders will be handled completely
in the kernel:
+
- SENSE
- SENSE RUNNING
- EXTERNAL CALL
@@ -4917,48 +5452,52 @@
old way of handling SIGP orders is used (partially in kernel and user space).
7.3 KVM_CAP_S390_VECTOR_REGISTERS
+---------------------------------
-Architectures: s390
-Parameters: none
-Returns: 0 on success, negative value on error
+:Architectures: s390
+:Parameters: none
+:Returns: 0 on success, negative value on error
Allows use of the vector registers introduced with z13 processor, and
provides for the synchronization between host and user space. Will
return -EINVAL if the machine does not support vectors.
7.4 KVM_CAP_S390_USER_STSI
+--------------------------
-Architectures: s390
-Parameters: none
+:Architectures: s390
+:Parameters: none
This capability allows post-handlers for the STSI instruction. After
initial handling in the kernel, KVM exits to user space with
KVM_EXIT_S390_STSI to allow user space to insert further data.
Before exiting to userspace, kvm handlers should fill in s390_stsi field of
-vcpu->run:
-struct {
+vcpu->run::
+
+ struct {
__u64 addr;
__u8 ar;
__u8 reserved;
__u8 fc;
__u8 sel1;
__u16 sel2;
-} s390_stsi;
+ } s390_stsi;
-@addr - guest address of STSI SYSIB
-@fc - function code
-@sel1 - selector 1
-@sel2 - selector 2
-@ar - access register number
+ @addr - guest address of STSI SYSIB
+ @fc - function code
+ @sel1 - selector 1
+ @sel2 - selector 2
+ @ar - access register number
KVM handlers should exit to userspace with rc = -EREMOTE.
7.5 KVM_CAP_SPLIT_IRQCHIP
+-------------------------
-Architectures: x86
-Parameters: args[0] - number of routes reserved for userspace IOAPICs
-Returns: 0 on success, -1 on error
+:Architectures: x86
+:Parameters: args[0] - number of routes reserved for userspace IOAPICs
+:Returns: 0 on success, -1 on error
Create a local apic for each processor in the kernel. This can be used
instead of KVM_CREATE_IRQCHIP if the userspace VMM wishes to emulate the
@@ -4975,24 +5514,26 @@
kernel (i.e. KVM_CREATE_IRQCHIP has already been called).
7.6 KVM_CAP_S390_RI
+-------------------
-Architectures: s390
-Parameters: none
+:Architectures: s390
+:Parameters: none
Allows use of runtime-instrumentation introduced with zEC12 processor.
Will return -EINVAL if the machine does not support runtime-instrumentation.
Will return -EBUSY if a VCPU has already been created.
7.7 KVM_CAP_X2APIC_API
+----------------------
-Architectures: x86
-Parameters: args[0] - features that should be enabled
-Returns: 0 on success, -EINVAL when args[0] contains invalid features
+:Architectures: x86
+:Parameters: args[0] - features that should be enabled
+:Returns: 0 on success, -EINVAL when args[0] contains invalid features
-Valid feature flags in args[0] are
+Valid feature flags in args[0] are::
-#define KVM_X2APIC_API_USE_32BIT_IDS (1ULL << 0)
-#define KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK (1ULL << 1)
+ #define KVM_X2APIC_API_USE_32BIT_IDS (1ULL << 0)
+ #define KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK (1ULL << 1)
Enabling KVM_X2APIC_API_USE_32BIT_IDS changes the behavior of
KVM_SET_GSI_ROUTING, KVM_SIGNAL_MSI, KVM_SET_LAPIC, and KVM_GET_LAPIC,
@@ -5006,9 +5547,10 @@
where 0xff represents CPUs 0-7 in cluster 0.
7.8 KVM_CAP_S390_USER_INSTR0
+----------------------------
-Architectures: s390
-Parameters: none
+:Architectures: s390
+:Parameters: none
With this capability enabled, all illegal instructions 0x0000 (2 bytes) will
be intercepted and forwarded to user space. User space can use this
@@ -5020,26 +5562,29 @@
created and are running.
7.9 KVM_CAP_S390_GS
+-------------------
-Architectures: s390
-Parameters: none
-Returns: 0 on success; -EINVAL if the machine does not support
- guarded storage; -EBUSY if a VCPU has already been created.
+:Architectures: s390
+:Parameters: none
+:Returns: 0 on success; -EINVAL if the machine does not support
+ guarded storage; -EBUSY if a VCPU has already been created.
Allows use of guarded storage for the KVM guest.
7.10 KVM_CAP_S390_AIS
+---------------------
-Architectures: s390
-Parameters: none
+:Architectures: s390
+:Parameters: none
Allow use of adapter-interruption suppression.
-Returns: 0 on success; -EBUSY if a VCPU has already been created.
+:Returns: 0 on success; -EBUSY if a VCPU has already been created.
7.11 KVM_CAP_PPC_SMT
+--------------------
-Architectures: ppc
-Parameters: vsmt_mode, flags
+:Architectures: ppc
+:Parameters: vsmt_mode, flags
Enabling this capability on a VM provides userspace with a way to set
the desired virtual SMT mode (i.e. the number of virtual CPUs per
@@ -5054,9 +5599,10 @@
modes are available.
7.12 KVM_CAP_PPC_FWNMI
+----------------------
-Architectures: ppc
-Parameters: none
+:Architectures: ppc
+:Parameters: none
With this capability a machine check exception in the guest address
space will cause KVM to exit the guest with NMI exit reason. This
@@ -5065,17 +5611,18 @@
branch to guests' 0x200 interrupt vector.
7.13 KVM_CAP_X86_DISABLE_EXITS
+------------------------------
-Architectures: x86
-Parameters: args[0] defines which exits are disabled
-Returns: 0 on success, -EINVAL when args[0] contains invalid exits
+:Architectures: x86
+:Parameters: args[0] defines which exits are disabled
+:Returns: 0 on success, -EINVAL when args[0] contains invalid exits
-Valid bits in args[0] are
+Valid bits in args[0] are::
-#define KVM_X86_DISABLE_EXITS_MWAIT (1 << 0)
-#define KVM_X86_DISABLE_EXITS_HLT (1 << 1)
-#define KVM_X86_DISABLE_EXITS_PAUSE (1 << 2)
-#define KVM_X86_DISABLE_EXITS_CSTATE (1 << 3)
+ #define KVM_X86_DISABLE_EXITS_MWAIT (1 << 0)
+ #define KVM_X86_DISABLE_EXITS_HLT (1 << 1)
+ #define KVM_X86_DISABLE_EXITS_PAUSE (1 << 2)
+ #define KVM_X86_DISABLE_EXITS_CSTATE (1 << 3)
Enabling this capability on a VM provides userspace with a way to no
longer intercept some instructions for improved latency in some
@@ -5087,12 +5634,13 @@
Do not enable KVM_FEATURE_PV_UNHALT if you disable HLT exits.
7.14 KVM_CAP_S390_HPAGE_1M
+--------------------------
-Architectures: s390
-Parameters: none
-Returns: 0 on success, -EINVAL if hpage module parameter was not set
- or cmma is enabled, or the VM has the KVM_VM_S390_UCONTROL
- flag set
+:Architectures: s390
+:Parameters: none
+:Returns: 0 on success, -EINVAL if hpage module parameter was not set
+ or cmma is enabled, or the VM has the KVM_VM_S390_UCONTROL
+ flag set
With this capability the KVM support for memory backing with 1m pages
through hugetlbfs can be enabled for a VM. After the capability is
@@ -5104,20 +5652,22 @@
this capability, the VM will not be able to run.
7.15 KVM_CAP_MSR_PLATFORM_INFO
+------------------------------
-Architectures: x86
-Parameters: args[0] whether feature should be enabled or not
+:Architectures: x86
+:Parameters: args[0] whether feature should be enabled or not
With this capability, a guest may read the MSR_PLATFORM_INFO MSR. Otherwise,
a #GP would be raised when the guest tries to access. Currently, this
capability does not enable write permissions of this MSR for the guest.
7.16 KVM_CAP_PPC_NESTED_HV
+--------------------------
-Architectures: ppc
-Parameters: none
-Returns: 0 on success, -EINVAL when the implementation doesn't support
- nested-HV virtualization.
+:Architectures: ppc
+:Parameters: none
+:Returns: 0 on success, -EINVAL when the implementation doesn't support
+ nested-HV virtualization.
HV-KVM on POWER9 and later systems allows for "nested-HV"
virtualization, which provides a way for a guest VM to run guests that
@@ -5127,9 +5677,10 @@
kvm-hv module parameter.
7.17 KVM_CAP_EXCEPTION_PAYLOAD
+------------------------------
-Architectures: x86
-Parameters: args[0] whether feature should be enabled or not
+:Architectures: x86
+:Parameters: args[0] whether feature should be enabled or not
With this capability enabled, CR2 will not be modified prior to the
emulated VM-exit when L1 intercepts a #PF exception that occurs in
@@ -5140,21 +5691,21 @@
faulting address (or the new DR6 bits*) will be reported in the
exception_payload field. Similarly, when userspace injects a #PF (or
#DB) into L2 using KVM_SET_VCPU_EVENTS, it is expected to set
-exception.has_payload and to put the faulting address (or the new DR6
-bits*) in the exception_payload field.
+exception.has_payload and to put the faulting address - or the new DR6
+bits\ [#]_ - in the exception_payload field.
This capability also enables exception.pending in struct
kvm_vcpu_events, which allows userspace to distinguish between pending
and injected exceptions.
-* For the new DR6 bits, note that bit 16 is set iff the #DB exception
- will clear DR6.RTM.
+.. [#] For the new DR6 bits, note that bit 16 is set iff the #DB exception
+ will clear DR6.RTM.
7.18 KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2
-Architectures: x86, arm, arm64, mips
-Parameters: args[0] whether feature should be enabled or not
+:Architectures: x86, arm, arm64, mips
+:Parameters: args[0] whether feature should be enabled or not
With this capability enabled, KVM_GET_DIRTY_LOG will not automatically
clear and write-protect all pages that are returned as dirty.
@@ -5181,14 +5732,15 @@
Userspace should not try to use KVM_CAP_MANUAL_DIRTY_LOG_PROTECT.
8. Other capabilities.
-----------------------
+======================
This section lists capabilities that give information about other
features of the KVM implementation.
8.1 KVM_CAP_PPC_HWRNG
+---------------------
-Architectures: ppc
+:Architectures: ppc
This capability, if KVM_CHECK_EXTENSION indicates that it is
available, means that that the kernel has an implementation of the
@@ -5197,8 +5749,10 @@
with the KVM_CAP_PPC_ENABLE_HCALL capability.
8.2 KVM_CAP_HYPERV_SYNIC
+------------------------
-Architectures: x86
+:Architectures: x86
+
This capability, if KVM_CHECK_EXTENSION indicates that it is
available, means that that the kernel has an implementation of the
Hyper-V Synthetic interrupt controller(SynIC). Hyper-V SynIC is
@@ -5210,8 +5764,9 @@
by the CPU, as it's incompatible with SynIC auto-EOI behavior.
8.3 KVM_CAP_PPC_RADIX_MMU
+-------------------------
-Architectures: ppc
+:Architectures: ppc
This capability, if KVM_CHECK_EXTENSION indicates that it is
available, means that that the kernel can support guests using the
@@ -5219,8 +5774,9 @@
processor).
8.4 KVM_CAP_PPC_HASH_MMU_V3
+---------------------------
-Architectures: ppc
+:Architectures: ppc
This capability, if KVM_CHECK_EXTENSION indicates that it is
available, means that that the kernel can support guests using the
@@ -5228,8 +5784,9 @@
the POWER9 processor), including in-memory segment tables.
8.5 KVM_CAP_MIPS_VZ
+-------------------
-Architectures: mips
+:Architectures: mips
This capability, if KVM_CHECK_EXTENSION on the main kvm handle indicates that
it is available, means that full hardware assisted virtualization capabilities
@@ -5247,16 +5804,19 @@
possibility of other hardware assisted virtualization implementations which
may be incompatible with the MIPS VZ ASE.
- 0: The trap & emulate implementation is in use to run guest code in user
+== ==========================================================================
+ 0 The trap & emulate implementation is in use to run guest code in user
mode. Guest virtual memory segments are rearranged to fit the guest in the
user mode address space.
- 1: The MIPS VZ ASE is in use, providing full hardware assisted
+ 1 The MIPS VZ ASE is in use, providing full hardware assisted
virtualization, including standard guest virtual memory segments.
+== ==========================================================================
8.6 KVM_CAP_MIPS_TE
+-------------------
-Architectures: mips
+:Architectures: mips
This capability, if KVM_CHECK_EXTENSION on the main kvm handle indicates that
it is available, means that the trap & emulate implementation is available to
@@ -5268,8 +5828,9 @@
available, it means that the VM is using trap & emulate.
8.7 KVM_CAP_MIPS_64BIT
+----------------------
-Architectures: mips
+:Architectures: mips
This capability indicates the supported architecture type of the guest, i.e. the
supported register and address width.
@@ -5279,22 +5840,26 @@
be checked specifically against known values (see below). All other values are
reserved.
- 0: MIPS32 or microMIPS32.
+== ========================================================================
+ 0 MIPS32 or microMIPS32.
Both registers and addresses are 32-bits wide.
It will only be possible to run 32-bit guest code.
- 1: MIPS64 or microMIPS64 with access only to 32-bit compatibility segments.
+ 1 MIPS64 or microMIPS64 with access only to 32-bit compatibility segments.
Registers are 64-bits wide, but addresses are 32-bits wide.
64-bit guest code may run but cannot access MIPS64 memory segments.
It will also be possible to run 32-bit guest code.
- 2: MIPS64 or microMIPS64 with access to all address segments.
+ 2 MIPS64 or microMIPS64 with access to all address segments.
Both registers and addresses are 64-bits wide.
It will be possible to run 64-bit or 32-bit guest code.
+== ========================================================================
8.9 KVM_CAP_ARM_USER_IRQ
+------------------------
-Architectures: arm, arm64
+:Architectures: arm, arm64
+
This capability, if KVM_CHECK_EXTENSION indicates that it is available, means
that if userspace creates a VM without an in-kernel interrupt controller, it
will be notified of changes to the output level of in-kernel emulated devices,
@@ -5321,7 +5886,7 @@
number larger than 0 indicating the version of this capability is implemented
and thereby which bits in in run->s.regs.device_irq_level can signal values.
-Currently the following bits are defined for the device_irq_level bitmap:
+Currently the following bits are defined for the device_irq_level bitmap::
KVM_CAP_ARM_USER_IRQ >= 1:
@@ -5334,8 +5899,9 @@
listed above.
8.10 KVM_CAP_PPC_SMT_POSSIBLE
+-----------------------------
-Architectures: ppc
+:Architectures: ppc
Querying this capability returns a bitmap indicating the possible
virtual SMT modes that can be set using KVM_CAP_PPC_SMT. If bit N
@@ -5343,8 +5909,9 @@
available.
8.11 KVM_CAP_HYPERV_SYNIC2
+--------------------------
-Architectures: x86
+:Architectures: x86
This capability enables a newer version of Hyper-V Synthetic interrupt
controller (SynIC). The only difference with KVM_CAP_HYPERV_SYNIC is that KVM
@@ -5352,8 +5919,9 @@
writing to the respective MSRs.
8.12 KVM_CAP_HYPERV_VP_INDEX
+----------------------------
-Architectures: x86
+:Architectures: x86
This capability indicates that userspace can load HV_X64_MSR_VP_INDEX msr. Its
value is used to denote the target vcpu for a SynIC interrupt. For
@@ -5361,47 +5929,53 @@
capability is absent, userspace can still query this msr's value.
8.13 KVM_CAP_S390_AIS_MIGRATION
+-------------------------------
-Architectures: s390
-Parameters: none
+:Architectures: s390
+:Parameters: none
This capability indicates if the flic device will be able to get/set the
AIS states for migration via the KVM_DEV_FLIC_AISM_ALL attribute and allows
to discover this without having to create a flic device.
8.14 KVM_CAP_S390_PSW
+---------------------
-Architectures: s390
+:Architectures: s390
This capability indicates that the PSW is exposed via the kvm_run structure.
8.15 KVM_CAP_S390_GMAP
+----------------------
-Architectures: s390
+:Architectures: s390
This capability indicates that the user space memory used as guest mapping can
be anywhere in the user memory address space, as long as the memory slots are
aligned and sized to a segment (1MB) boundary.
8.16 KVM_CAP_S390_COW
+---------------------
-Architectures: s390
+:Architectures: s390
This capability indicates that the user space memory used as guest mapping can
use copy-on-write semantics as well as dirty pages tracking via read-only page
tables.
8.17 KVM_CAP_S390_BPB
+---------------------
-Architectures: s390
+:Architectures: s390
This capability indicates that kvm will implement the interfaces to handle
reset, migration and nested KVM for branch prediction blocking. The stfle
facility 82 should not be provided to the guest without this capability.
8.18 KVM_CAP_HYPERV_TLBFLUSH
+----------------------------
-Architectures: x86
+:Architectures: x86
This capability indicates that KVM supports paravirtualized Hyper-V TLB Flush
hypercalls:
@@ -5409,8 +5983,9 @@
HvFlushVirtualAddressList, HvFlushVirtualAddressListEx.
8.19 KVM_CAP_ARM_INJECT_SERROR_ESR
+----------------------------------
-Architectures: arm, arm64
+:Architectures: arm, arm64
This capability indicates that userspace can specify (via the
KVM_SET_VCPU_EVENTS ioctl) the syndrome value reported to the guest when it
@@ -5421,16 +5996,20 @@
AArch64, this value will be reported in the ISS field of ESR_ELx.
See KVM_CAP_VCPU_EVENTS for more details.
-8.20 KVM_CAP_HYPERV_SEND_IPI
-Architectures: x86
+8.20 KVM_CAP_HYPERV_SEND_IPI
+----------------------------
+
+:Architectures: x86
This capability indicates that KVM supports paravirtualized Hyper-V IPI send
hypercalls:
HvCallSendSyntheticClusterIpi, HvCallSendSyntheticClusterIpiEx.
-8.21 KVM_CAP_HYPERV_DIRECT_TLBFLUSH
-Architecture: x86
+8.21 KVM_CAP_HYPERV_DIRECT_TLBFLUSH
+-----------------------------------
+
+:Architecture: x86
This capability indicates that KVM running on top of Hyper-V hypervisor
enables Direct TLB flush for its guests meaning that TLB flush
diff --git a/Documentation/virt/kvm/arm/hyp-abi.txt b/Documentation/virt/kvm/arm/hyp-abi.rst
similarity index 78%
rename from Documentation/virt/kvm/arm/hyp-abi.txt
rename to Documentation/virt/kvm/arm/hyp-abi.rst
index a20a0be..d1fc27d 100644
--- a/Documentation/virt/kvm/arm/hyp-abi.txt
+++ b/Documentation/virt/kvm/arm/hyp-abi.rst
@@ -1,4 +1,8 @@
-* Internal ABI between the kernel and HYP
+.. SPDX-License-Identifier: GPL-2.0
+
+=======================================
+Internal ABI between the kernel and HYP
+=======================================
This file documents the interaction between the Linux kernel and the
hypervisor layer when running Linux as a hypervisor (for example
@@ -19,25 +23,31 @@
Unless specified otherwise, any built-in hypervisor must implement
these functions (see arch/arm{,64}/include/asm/virt.h):
-* r0/x0 = HVC_SET_VECTORS
- r1/x1 = vectors
+* ::
+
+ r0/x0 = HVC_SET_VECTORS
+ r1/x1 = vectors
Set HVBAR/VBAR_EL2 to 'vectors' to enable a hypervisor. 'vectors'
must be a physical address, and respect the alignment requirements
of the architecture. Only implemented by the initial stubs, not by
Linux hypervisors.
-* r0/x0 = HVC_RESET_VECTORS
+* ::
+
+ r0/x0 = HVC_RESET_VECTORS
Turn HYP/EL2 MMU off, and reset HVBAR/VBAR_EL2 to the initials
stubs' exception vector value. This effectively disables an existing
hypervisor.
-* r0/x0 = HVC_SOFT_RESTART
- r1/x1 = restart address
- x2 = x0's value when entering the next payload (arm64)
- x3 = x1's value when entering the next payload (arm64)
- x4 = x2's value when entering the next payload (arm64)
+* ::
+
+ r0/x0 = HVC_SOFT_RESTART
+ r1/x1 = restart address
+ x2 = x0's value when entering the next payload (arm64)
+ x3 = x1's value when entering the next payload (arm64)
+ x4 = x2's value when entering the next payload (arm64)
Mask all exceptions, disable the MMU, move the arguments into place
(arm64 only), and jump to the restart address while at HYP/EL2. This
diff --git a/Documentation/virt/kvm/arm/index.rst b/Documentation/virt/kvm/arm/index.rst
new file mode 100644
index 0000000..3e2b2ab
--- /dev/null
+++ b/Documentation/virt/kvm/arm/index.rst
@@ -0,0 +1,12 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===
+ARM
+===
+
+.. toctree::
+ :maxdepth: 2
+
+ hyp-abi
+ psci
+ pvtime
diff --git a/Documentation/virt/kvm/arm/psci.txt b/Documentation/virt/kvm/arm/psci.rst
similarity index 60%
rename from Documentation/virt/kvm/arm/psci.txt
rename to Documentation/virt/kvm/arm/psci.rst
index 559586f..d52c2e8 100644
--- a/Documentation/virt/kvm/arm/psci.txt
+++ b/Documentation/virt/kvm/arm/psci.rst
@@ -1,3 +1,9 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=========================================
+Power State Coordination Interface (PSCI)
+=========================================
+
KVM implements the PSCI (Power State Coordination Interface)
specification in order to provide services such as CPU on/off, reset
and power-off to the guest.
@@ -30,32 +36,42 @@
- Affects the whole VM (even if the register view is per-vcpu)
* KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1:
- Holds the state of the firmware support to mitigate CVE-2017-5715, as
- offered by KVM to the guest via a HVC call. The workaround is described
- under SMCCC_ARCH_WORKAROUND_1 in [1].
+ Holds the state of the firmware support to mitigate CVE-2017-5715, as
+ offered by KVM to the guest via a HVC call. The workaround is described
+ under SMCCC_ARCH_WORKAROUND_1 in [1].
+
Accepted values are:
- KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_AVAIL: KVM does not offer
+
+ KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_AVAIL:
+ KVM does not offer
firmware support for the workaround. The mitigation status for the
guest is unknown.
- KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_AVAIL: The workaround HVC call is
+ KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_AVAIL:
+ The workaround HVC call is
available to the guest and required for the mitigation.
- KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_REQUIRED: The workaround HVC call
+ KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_REQUIRED:
+ The workaround HVC call
is available to the guest, but it is not needed on this VCPU.
* KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2:
- Holds the state of the firmware support to mitigate CVE-2018-3639, as
- offered by KVM to the guest via a HVC call. The workaround is described
- under SMCCC_ARCH_WORKAROUND_2 in [1].
+ Holds the state of the firmware support to mitigate CVE-2018-3639, as
+ offered by KVM to the guest via a HVC call. The workaround is described
+ under SMCCC_ARCH_WORKAROUND_2 in [1]_.
+
Accepted values are:
- KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL: A workaround is not
+
+ KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL:
+ A workaround is not
available. KVM does not offer firmware support for the workaround.
- KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_UNKNOWN: The workaround state is
+ KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_UNKNOWN:
+ The workaround state is
unknown. KVM does not offer firmware support for the workaround.
- KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL: The workaround is available,
+ KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL:
+ The workaround is available,
and can be disabled by a vCPU. If
KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED is set, it is active for
this vCPU.
- KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_REQUIRED: The workaround is
- always active on this vCPU or it is not needed.
+ KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_REQUIRED:
+ The workaround is always active on this vCPU or it is not needed.
-[1] https://developer.arm.com/-/media/developer/pdf/ARM_DEN_0070A_Firmware_interfaces_for_mitigating_CVE-2017-5715.pdf
+.. [1] https://developer.arm.com/-/media/developer/pdf/ARM_DEN_0070A_Firmware_interfaces_for_mitigating_CVE-2017-5715.pdf
diff --git a/Documentation/virt/kvm/devices/arm-vgic-its.txt b/Documentation/virt/kvm/devices/arm-vgic-its.rst
similarity index 71%
rename from Documentation/virt/kvm/devices/arm-vgic-its.txt
rename to Documentation/virt/kvm/devices/arm-vgic-its.rst
index eeaa95b..6c304fd 100644
--- a/Documentation/virt/kvm/devices/arm-vgic-its.txt
+++ b/Documentation/virt/kvm/devices/arm-vgic-its.rst
@@ -1,3 +1,6 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===============================================
ARM Virtual Interrupt Translation Service (ITS)
===============================================
@@ -12,22 +15,32 @@
a separate, non-overlapping MMIO region.
-Groups:
- KVM_DEV_ARM_VGIC_GRP_ADDR
+Groups
+======
+
+KVM_DEV_ARM_VGIC_GRP_ADDR
+-------------------------
+
Attributes:
KVM_VGIC_ITS_ADDR_TYPE (rw, 64-bit)
Base address in the guest physical address space of the GICv3 ITS
control register frame.
This address needs to be 64K aligned and the region covers 128K.
+
Errors:
- -E2BIG: Address outside of addressable IPA range
- -EINVAL: Incorrectly aligned address
- -EEXIST: Address already configured
- -EFAULT: Invalid user pointer for attr->addr.
- -ENODEV: Incorrect attribute or the ITS is not supported.
+
+ ======= =================================================
+ -E2BIG Address outside of addressable IPA range
+ -EINVAL Incorrectly aligned address
+ -EEXIST Address already configured
+ -EFAULT Invalid user pointer for attr->addr.
+ -ENODEV Incorrect attribute or the ITS is not supported.
+ ======= =================================================
- KVM_DEV_ARM_VGIC_GRP_CTRL
+KVM_DEV_ARM_VGIC_GRP_CTRL
+-------------------------
+
Attributes:
KVM_DEV_ARM_VGIC_CTRL_INIT
request the initialization of the ITS, no additional parameter in
@@ -58,16 +71,21 @@
"ITS Restore Sequence".
Errors:
- -ENXIO: ITS not properly configured as required prior to setting
- this attribute
- -ENOMEM: Memory shortage when allocating ITS internal data
- -EINVAL: Inconsistent restored data
- -EFAULT: Invalid guest ram access
- -EBUSY: One or more VCPUS are running
- -EACCES: The virtual ITS is backed by a physical GICv4 ITS, and the
- state is not available
- KVM_DEV_ARM_VGIC_GRP_ITS_REGS
+ ======= ==========================================================
+ -ENXIO ITS not properly configured as required prior to setting
+ this attribute
+ -ENOMEM Memory shortage when allocating ITS internal data
+ -EINVAL Inconsistent restored data
+ -EFAULT Invalid guest ram access
+ -EBUSY One or more VCPUS are running
+ -EACCES The virtual ITS is backed by a physical GICv4 ITS, and the
+ state is not available
+ ======= ==========================================================
+
+KVM_DEV_ARM_VGIC_GRP_ITS_REGS
+-----------------------------
+
Attributes:
The attr field of kvm_device_attr encodes the offset of the
ITS register, relative to the ITS control frame base address
@@ -78,6 +96,7 @@
be accessed with full length.
Writes to read-only registers are ignored by the kernel except for:
+
- GITS_CREADR. It must be restored otherwise commands in the queue
will be re-executed after restoring CWRITER. GITS_CREADR must be
restored before restoring the GITS_CTLR which is likely to enable the
@@ -91,30 +110,36 @@
For other registers, getting or setting a register has the same
effect as reading/writing the register on real hardware.
- Errors:
- -ENXIO: Offset does not correspond to any supported register
- -EFAULT: Invalid user pointer for attr->addr
- -EINVAL: Offset is not 64-bit aligned
- -EBUSY: one or more VCPUS are running
- ITS Restore Sequence:
- -------------------------
+ Errors:
+
+ ======= ====================================================
+ -ENXIO Offset does not correspond to any supported register
+ -EFAULT Invalid user pointer for attr->addr
+ -EINVAL Offset is not 64-bit aligned
+ -EBUSY one or more VCPUS are running
+ ======= ====================================================
+
+ITS Restore Sequence:
+---------------------
The following ordering must be followed when restoring the GIC and the ITS:
+
a) restore all guest memory and create vcpus
b) restore all redistributors
c) provide the ITS base address
(KVM_DEV_ARM_VGIC_GRP_ADDR)
d) restore the ITS in the following order:
- 1. Restore GITS_CBASER
- 2. Restore all other GITS_ registers, except GITS_CTLR!
- 3. Load the ITS table data (KVM_DEV_ARM_ITS_RESTORE_TABLES)
- 4. Restore GITS_CTLR
+
+ 1. Restore GITS_CBASER
+ 2. Restore all other ``GITS_`` registers, except GITS_CTLR!
+ 3. Load the ITS table data (KVM_DEV_ARM_ITS_RESTORE_TABLES)
+ 4. Restore GITS_CTLR
Then vcpus can be started.
- ITS Table ABI REV0:
- -------------------
+ITS Table ABI REV0:
+-------------------
Revision 0 of the ABI only supports the features of a virtual GICv3, and does
not support a virtual GICv4 with support for direct injection of virtual
@@ -125,12 +150,13 @@
entries in the collection are listed in no particular order.
All entries are 8 bytes.
- Device Table Entry (DTE):
+ Device Table Entry (DTE)::
- bits: | 63| 62 ... 49 | 48 ... 5 | 4 ... 0 |
- values: | V | next | ITT_addr | Size |
+ bits: | 63| 62 ... 49 | 48 ... 5 | 4 ... 0 |
+ values: | V | next | ITT_addr | Size |
- where;
+ where:
+
- V indicates whether the entry is valid. If not, other fields
are not meaningful.
- next: equals to 0 if this entry is the last one; otherwise it
@@ -140,32 +166,34 @@
- Size specifies the supported number of bits for the EventID,
minus one
- Collection Table Entry (CTE):
+ Collection Table Entry (CTE)::
- bits: | 63| 62 .. 52 | 51 ... 16 | 15 ... 0 |
- values: | V | RES0 | RDBase | ICID |
+ bits: | 63| 62 .. 52 | 51 ... 16 | 15 ... 0 |
+ values: | V | RES0 | RDBase | ICID |
where:
+
- V indicates whether the entry is valid. If not, other fields are
not meaningful.
- RES0: reserved field with Should-Be-Zero-or-Preserved behavior.
- RDBase is the PE number (GICR_TYPER.Processor_Number semantic),
- ICID is the collection ID
- Interrupt Translation Entry (ITE):
+ Interrupt Translation Entry (ITE)::
- bits: | 63 ... 48 | 47 ... 16 | 15 ... 0 |
- values: | next | pINTID | ICID |
+ bits: | 63 ... 48 | 47 ... 16 | 15 ... 0 |
+ values: | next | pINTID | ICID |
where:
+
- next: equals to 0 if this entry is the last one; otherwise it corresponds
to the EventID offset to the next ITE capped by 2^16 -1.
- pINTID is the physical LPI ID; if zero, it means the entry is not valid
and other fields are not meaningful.
- ICID is the collection ID
- ITS Reset State:
- ----------------
+ITS Reset State:
+----------------
RESET returns the ITS to the same state that it was when first created and
initialized. When the RESET command returns, the following things are
diff --git a/Documentation/virt/kvm/devices/arm-vgic-v3.txt b/Documentation/virt/kvm/devices/arm-vgic-v3.rst
similarity index 76%
rename from Documentation/virt/kvm/devices/arm-vgic-v3.txt
rename to Documentation/virt/kvm/devices/arm-vgic-v3.rst
index ff290b4..5dd3bff 100644
--- a/Documentation/virt/kvm/devices/arm-vgic-v3.txt
+++ b/Documentation/virt/kvm/devices/arm-vgic-v3.rst
@@ -1,9 +1,12 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==============================================================
ARM Virtual Generic Interrupt Controller v3 and later (VGICv3)
==============================================================
Device types supported:
- KVM_DEV_TYPE_ARM_VGIC_V3 ARM Generic Interrupt Controller v3.0
+ - KVM_DEV_TYPE_ARM_VGIC_V3 ARM Generic Interrupt Controller v3.0
Only one VGIC instance may be instantiated through this API. The created VGIC
will act as the VM interrupt controller, requiring emulated user-space devices
@@ -15,7 +18,8 @@
Groups:
KVM_DEV_ARM_VGIC_GRP_ADDR
- Attributes:
+ Attributes:
+
KVM_VGIC_V3_ADDR_TYPE_DIST (rw, 64-bit)
Base address in the guest physical address space of the GICv3 distributor
register mappings. Only valid for KVM_DEV_TYPE_ARM_VGIC_V3.
@@ -29,21 +33,25 @@
This address needs to be 64K aligned.
KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION (rw, 64-bit)
- The attribute data pointed to by kvm_device_attr.addr is a __u64 value:
- bits: | 63 .... 52 | 51 .... 16 | 15 - 12 |11 - 0
- values: | count | base | flags | index
+ The attribute data pointed to by kvm_device_attr.addr is a __u64 value::
+
+ bits: | 63 .... 52 | 51 .... 16 | 15 - 12 |11 - 0
+ values: | count | base | flags | index
+
- index encodes the unique redistributor region index
- flags: reserved for future use, currently 0
- base field encodes bits [51:16] of the guest physical base address
of the first redistributor in the region.
- count encodes the number of redistributors in the region. Must be
greater than 0.
+
There are two 64K pages for each redistributor in the region and
redistributors are laid out contiguously within the region. Regions
are filled with redistributors in the index order. The sum of all
region count fields must be greater than or equal to the number of
VCPUs. Redistributor regions must be registered in the incremental
index order, starting from index 0.
+
The characteristics of a specific redistributor region can be read
by presetting the index field in the attr data.
Only valid for KVM_DEV_TYPE_ARM_VGIC_V3.
@@ -52,23 +60,27 @@
KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION attributes.
Errors:
- -E2BIG: Address outside of addressable IPA range
- -EINVAL: Incorrectly aligned address, bad redistributor region
+
+ ======= =============================================================
+ -E2BIG Address outside of addressable IPA range
+ -EINVAL Incorrectly aligned address, bad redistributor region
count/index, mixed redistributor region attribute usage
- -EEXIST: Address already configured
- -ENOENT: Attempt to read the characteristics of a non existing
+ -EEXIST Address already configured
+ -ENOENT Attempt to read the characteristics of a non existing
redistributor region
- -ENXIO: The group or attribute is unknown/unsupported for this device
+ -ENXIO The group or attribute is unknown/unsupported for this device
or hardware support is missing.
- -EFAULT: Invalid user pointer for attr->addr.
+ -EFAULT Invalid user pointer for attr->addr.
+ ======= =============================================================
- KVM_DEV_ARM_VGIC_GRP_DIST_REGS
- KVM_DEV_ARM_VGIC_GRP_REDIST_REGS
- Attributes:
- The attr field of kvm_device_attr encodes two values:
- bits: | 63 .... 32 | 31 .... 0 |
- values: | mpidr | offset |
+ KVM_DEV_ARM_VGIC_GRP_DIST_REGS, KVM_DEV_ARM_VGIC_GRP_REDIST_REGS
+ Attributes:
+
+ The attr field of kvm_device_attr encodes two values::
+
+ bits: | 63 .... 32 | 31 .... 0 |
+ values: | mpidr | offset |
All distributor regs are (rw, 32-bit) and kvm_device_attr.addr points to a
__u32 value. 64-bit registers must be accessed by separately accessing the
@@ -93,7 +105,8 @@
redistributor is accessed. The mpidr is ignored for the distributor.
The mpidr encoding is based on the affinity information in the
- architecture defined MPIDR, and the field is encoded as follows:
+ architecture defined MPIDR, and the field is encoded as follows::
+
| 63 .... 56 | 55 .... 48 | 47 .... 40 | 39 .... 32 |
| Aff3 | Aff2 | Aff1 | Aff0 |
@@ -148,24 +161,30 @@
ignored.
Errors:
- -ENXIO: Getting or setting this register is not yet supported
- -EBUSY: One or more VCPUs are running
+
+ ====== =====================================================
+ -ENXIO Getting or setting this register is not yet supported
+ -EBUSY One or more VCPUs are running
+ ====== =====================================================
KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS
- Attributes:
- The attr field of kvm_device_attr encodes two values:
- bits: | 63 .... 32 | 31 .... 16 | 15 .... 0 |
- values: | mpidr | RES | instr |
+ Attributes:
+
+ The attr field of kvm_device_attr encodes two values::
+
+ bits: | 63 .... 32 | 31 .... 16 | 15 .... 0 |
+ values: | mpidr | RES | instr |
The mpidr field encodes the CPU ID based on the affinity information in the
- architecture defined MPIDR, and the field is encoded as follows:
+ architecture defined MPIDR, and the field is encoded as follows::
+
| 63 .... 56 | 55 .... 48 | 47 .... 40 | 39 .... 32 |
| Aff3 | Aff2 | Aff1 | Aff0 |
The instr field encodes the system register to access based on the fields
defined in the A64 instruction set encoding for system register access
- (RES means the bits are reserved for future use and should be zero):
+ (RES means the bits are reserved for future use and should be zero)::
| 15 ... 14 | 13 ... 11 | 10 ... 7 | 6 ... 3 | 2 ... 0 |
| Op 0 | Op1 | CRn | CRm | Op2 |
@@ -178,26 +197,35 @@
CPU interface registers access is not implemented for AArch32 mode.
Error -ENXIO is returned when accessed in AArch32 mode.
+
Errors:
- -ENXIO: Getting or setting this register is not yet supported
- -EBUSY: VCPU is running
- -EINVAL: Invalid mpidr or register value supplied
+
+ ======= =====================================================
+ -ENXIO Getting or setting this register is not yet supported
+ -EBUSY VCPU is running
+ -EINVAL Invalid mpidr or register value supplied
+ ======= =====================================================
KVM_DEV_ARM_VGIC_GRP_NR_IRQS
- Attributes:
+ Attributes:
+
A value describing the number of interrupts (SGI, PPI and SPI) for
this GIC instance, ranging from 64 to 1024, in increments of 32.
kvm_device_attr.addr points to a __u32 value.
Errors:
- -EINVAL: Value set is out of the expected range
- -EBUSY: Value has already be set.
+
+ ======= ======================================
+ -EINVAL Value set is out of the expected range
+ -EBUSY Value has already be set.
+ ======= ======================================
KVM_DEV_ARM_VGIC_GRP_CTRL
- Attributes:
+ Attributes:
+
KVM_DEV_ARM_VGIC_CTRL_INIT
request the initialization of the VGIC, no additional parameter in
kvm_device_attr.addr.
@@ -205,20 +233,26 @@
save all LPI pending bits into guest RAM pending tables.
The first kB of the pending table is not altered by this operation.
+
Errors:
- -ENXIO: VGIC not properly configured as required prior to calling
- this attribute
- -ENODEV: no online VCPU
- -ENOMEM: memory shortage when allocating vgic internal data
- -EFAULT: Invalid guest ram access
- -EBUSY: One or more VCPUS are running
+
+ ======= ========================================================
+ -ENXIO VGIC not properly configured as required prior to calling
+ this attribute
+ -ENODEV no online VCPU
+ -ENOMEM memory shortage when allocating vgic internal data
+ -EFAULT Invalid guest ram access
+ -EBUSY One or more VCPUS are running
+ ======= ========================================================
KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO
- Attributes:
- The attr field of kvm_device_attr encodes the following values:
- bits: | 63 .... 32 | 31 .... 10 | 9 .... 0 |
- values: | mpidr | info | vINTID |
+ Attributes:
+
+ The attr field of kvm_device_attr encodes the following values::
+
+ bits: | 63 .... 32 | 31 .... 10 | 9 .... 0 |
+ values: | mpidr | info | vINTID |
The vINTID specifies which set of IRQs is reported on.
@@ -228,6 +262,7 @@
VGIC_LEVEL_INFO_LINE_LEVEL:
Get/Set the input level of the IRQ line for a set of 32 contiguously
numbered interrupts.
+
vINTID must be a multiple of 32.
kvm_device_attr.addr points to a __u32 value which will contain a
@@ -243,9 +278,14 @@
reported with the same value regardless of the mpidr specified.
The mpidr field encodes the CPU ID based on the affinity information in the
- architecture defined MPIDR, and the field is encoded as follows:
+ architecture defined MPIDR, and the field is encoded as follows::
+
| 63 .... 56 | 55 .... 48 | 47 .... 40 | 39 .... 32 |
| Aff3 | Aff2 | Aff1 | Aff0 |
+
Errors:
- -EINVAL: vINTID is not multiple of 32 or
- info field is not VGIC_LEVEL_INFO_LINE_LEVEL
+
+ ======= =============================================
+ -EINVAL vINTID is not multiple of 32 or info field is
+ not VGIC_LEVEL_INFO_LINE_LEVEL
+ ======= =============================================
diff --git a/Documentation/virt/kvm/devices/arm-vgic.txt b/Documentation/virt/kvm/devices/arm-vgic.rst
similarity index 66%
rename from Documentation/virt/kvm/devices/arm-vgic.txt
rename to Documentation/virt/kvm/devices/arm-vgic.rst
index 97b6518..40bdeea 100644
--- a/Documentation/virt/kvm/devices/arm-vgic.txt
+++ b/Documentation/virt/kvm/devices/arm-vgic.rst
@@ -1,8 +1,12 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==================================================
ARM Virtual Generic Interrupt Controller v2 (VGIC)
==================================================
Device types supported:
- KVM_DEV_TYPE_ARM_VGIC_V2 ARM Generic Interrupt Controller v2.0
+
+ - KVM_DEV_TYPE_ARM_VGIC_V2 ARM Generic Interrupt Controller v2.0
Only one VGIC instance may be instantiated through either this API or the
legacy KVM_CREATE_IRQCHIP API. The created VGIC will act as the VM interrupt
@@ -17,7 +21,8 @@
Groups:
KVM_DEV_ARM_VGIC_GRP_ADDR
- Attributes:
+ Attributes:
+
KVM_VGIC_V2_ADDR_TYPE_DIST (rw, 64-bit)
Base address in the guest physical address space of the GIC distributor
register mappings. Only valid for KVM_DEV_TYPE_ARM_VGIC_V2.
@@ -27,19 +32,25 @@
Base address in the guest physical address space of the GIC virtual cpu
interface register mappings. Only valid for KVM_DEV_TYPE_ARM_VGIC_V2.
This address needs to be 4K aligned and the region covers 4 KByte.
+
Errors:
- -E2BIG: Address outside of addressable IPA range
- -EINVAL: Incorrectly aligned address
- -EEXIST: Address already configured
- -ENXIO: The group or attribute is unknown/unsupported for this device
+
+ ======= =============================================================
+ -E2BIG Address outside of addressable IPA range
+ -EINVAL Incorrectly aligned address
+ -EEXIST Address already configured
+ -ENXIO The group or attribute is unknown/unsupported for this device
or hardware support is missing.
- -EFAULT: Invalid user pointer for attr->addr.
+ -EFAULT Invalid user pointer for attr->addr.
+ ======= =============================================================
KVM_DEV_ARM_VGIC_GRP_DIST_REGS
- Attributes:
- The attr field of kvm_device_attr encodes two values:
- bits: | 63 .... 40 | 39 .. 32 | 31 .... 0 |
- values: | reserved | vcpu_index | offset |
+ Attributes:
+
+ The attr field of kvm_device_attr encodes two values::
+
+ bits: | 63 .... 40 | 39 .. 32 | 31 .... 0 |
+ values: | reserved | vcpu_index | offset |
All distributor regs are (rw, 32-bit)
@@ -58,16 +69,22 @@
KVM_DEV_ARM_VGIC_GRP_DIST_REGS and KVM_DEV_ARM_VGIC_GRP_CPU_REGS) to ensure
the expected behavior. Unless GICD_IIDR has been set from userspace, writes
to the interrupt group registers (GICD_IGROUPR) are ignored.
+
Errors:
- -ENXIO: Getting or setting this register is not yet supported
- -EBUSY: One or more VCPUs are running
- -EINVAL: Invalid vcpu_index supplied
+
+ ======= =====================================================
+ -ENXIO Getting or setting this register is not yet supported
+ -EBUSY One or more VCPUs are running
+ -EINVAL Invalid vcpu_index supplied
+ ======= =====================================================
KVM_DEV_ARM_VGIC_GRP_CPU_REGS
- Attributes:
- The attr field of kvm_device_attr encodes two values:
- bits: | 63 .... 40 | 39 .. 32 | 31 .... 0 |
- values: | reserved | vcpu_index | offset |
+ Attributes:
+
+ The attr field of kvm_device_attr encodes two values::
+
+ bits: | 63 .... 40 | 39 .. 32 | 31 .... 0 |
+ values: | reserved | vcpu_index | offset |
All CPU interface regs are (rw, 32-bit)
@@ -101,27 +118,39 @@
value left by 3 places to obtain the actual priority mask level.
Errors:
- -ENXIO: Getting or setting this register is not yet supported
- -EBUSY: One or more VCPUs are running
- -EINVAL: Invalid vcpu_index supplied
+
+ ======= =====================================================
+ -ENXIO Getting or setting this register is not yet supported
+ -EBUSY One or more VCPUs are running
+ -EINVAL Invalid vcpu_index supplied
+ ======= =====================================================
KVM_DEV_ARM_VGIC_GRP_NR_IRQS
- Attributes:
+ Attributes:
+
A value describing the number of interrupts (SGI, PPI and SPI) for
this GIC instance, ranging from 64 to 1024, in increments of 32.
Errors:
- -EINVAL: Value set is out of the expected range
- -EBUSY: Value has already be set, or GIC has already been initialized
- with default values.
+
+ ======= =============================================================
+ -EINVAL Value set is out of the expected range
+ -EBUSY Value has already be set, or GIC has already been initialized
+ with default values.
+ ======= =============================================================
KVM_DEV_ARM_VGIC_GRP_CTRL
- Attributes:
+ Attributes:
+
KVM_DEV_ARM_VGIC_CTRL_INIT
request the initialization of the VGIC or ITS, no additional parameter
in kvm_device_attr.addr.
+
Errors:
- -ENXIO: VGIC not properly configured as required prior to calling
- this attribute
- -ENODEV: no online VCPU
- -ENOMEM: memory shortage when allocating vgic internal data
+
+ ======= =========================================================
+ -ENXIO VGIC not properly configured as required prior to calling
+ this attribute
+ -ENODEV no online VCPU
+ -ENOMEM memory shortage when allocating vgic internal data
+ ======= =========================================================
diff --git a/Documentation/virt/kvm/devices/index.rst b/Documentation/virt/kvm/devices/index.rst
new file mode 100644
index 0000000..192cda7
--- /dev/null
+++ b/Documentation/virt/kvm/devices/index.rst
@@ -0,0 +1,19 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=======
+Devices
+=======
+
+.. toctree::
+ :maxdepth: 2
+
+ arm-vgic-its
+ arm-vgic
+ arm-vgic-v3
+ mpic
+ s390_flic
+ vcpu
+ vfio
+ vm
+ xics
+ xive
diff --git a/Documentation/virt/kvm/devices/mpic.txt b/Documentation/virt/kvm/devices/mpic.rst
similarity index 90%
rename from Documentation/virt/kvm/devices/mpic.txt
rename to Documentation/virt/kvm/devices/mpic.rst
index 8257397..55cefe0 100644
--- a/Documentation/virt/kvm/devices/mpic.txt
+++ b/Documentation/virt/kvm/devices/mpic.rst
@@ -1,9 +1,13 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=========================
MPIC interrupt controller
=========================
Device types supported:
- KVM_DEV_TYPE_FSL_MPIC_20 Freescale MPIC v2.0
- KVM_DEV_TYPE_FSL_MPIC_42 Freescale MPIC v4.2
+
+ - KVM_DEV_TYPE_FSL_MPIC_20 Freescale MPIC v2.0
+ - KVM_DEV_TYPE_FSL_MPIC_42 Freescale MPIC v4.2
Only one MPIC instance, of any type, may be instantiated. The created
MPIC will act as the system interrupt controller, connecting to each
@@ -11,7 +15,8 @@
Groups:
KVM_DEV_MPIC_GRP_MISC
- Attributes:
+ Attributes:
+
KVM_DEV_MPIC_BASE_ADDR (rw, 64-bit)
Base address of the 256 KiB MPIC register space. Must be
naturally aligned. A value of zero disables the mapping.
diff --git a/Documentation/virt/kvm/devices/s390_flic.txt b/Documentation/virt/kvm/devices/s390_flic.rst
similarity index 87%
rename from Documentation/virt/kvm/devices/s390_flic.txt
rename to Documentation/virt/kvm/devices/s390_flic.rst
index a4e20a0..954190d 100644
--- a/Documentation/virt/kvm/devices/s390_flic.txt
+++ b/Documentation/virt/kvm/devices/s390_flic.rst
@@ -1,3 +1,6 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+====================================
FLIC (floating interrupt controller)
====================================
@@ -31,8 +34,10 @@
Copies all floating interrupts into a buffer provided by userspace.
When the buffer is too small it returns -ENOMEM, which is the indication
for userspace to try again with a bigger buffer.
+
-ENOBUFS is returned when the allocation of a kernelspace buffer has
failed.
+
-EFAULT is returned when copying data to userspace failed.
All interrupts remain pending, i.e. are not deleted from the list of
currently pending interrupts.
@@ -60,38 +65,41 @@
KVM_DEV_FLIC_ADAPTER_REGISTER
Register an I/O adapter interrupt source. Takes a kvm_s390_io_adapter
- describing the adapter to register:
+ describing the adapter to register::
-struct kvm_s390_io_adapter {
- __u32 id;
- __u8 isc;
- __u8 maskable;
- __u8 swap;
- __u8 flags;
-};
+ struct kvm_s390_io_adapter {
+ __u32 id;
+ __u8 isc;
+ __u8 maskable;
+ __u8 swap;
+ __u8 flags;
+ };
id contains the unique id for the adapter, isc the I/O interruption subclass
to use, maskable whether this adapter may be masked (interrupts turned off),
swap whether the indicators need to be byte swapped, and flags contains
further characteristics of the adapter.
+
Currently defined values for 'flags' are:
+
- KVM_S390_ADAPTER_SUPPRESSIBLE: adapter is subject to AIS
(adapter-interrupt-suppression) facility. This flag only has an effect if
the AIS capability is enabled.
+
Unknown flag values are ignored.
KVM_DEV_FLIC_ADAPTER_MODIFY
Modifies attributes of an existing I/O adapter interrupt source. Takes
- a kvm_s390_io_adapter_req specifying the adapter and the operation:
+ a kvm_s390_io_adapter_req specifying the adapter and the operation::
-struct kvm_s390_io_adapter_req {
- __u32 id;
- __u8 type;
- __u8 mask;
- __u16 pad0;
- __u64 addr;
-};
+ struct kvm_s390_io_adapter_req {
+ __u32 id;
+ __u8 type;
+ __u8 mask;
+ __u16 pad0;
+ __u64 addr;
+ };
id specifies the adapter and type the operation. The supported operations
are:
@@ -103,8 +111,9 @@
perform a gmap translation for the guest address provided in addr,
pin a userspace page for the translated address and add it to the
list of mappings
- Note: A new mapping will be created unconditionally; therefore,
- the calling code should avoid making duplicate mappings.
+
+ .. note:: A new mapping will be created unconditionally; therefore,
+ the calling code should avoid making duplicate mappings.
KVM_S390_IO_ADAPTER_UNMAP
release a userspace page for the translated address specified in addr
@@ -112,16 +121,17 @@
KVM_DEV_FLIC_AISM
modify the adapter-interruption-suppression mode for a given isc if the
- AIS capability is enabled. Takes a kvm_s390_ais_req describing:
+ AIS capability is enabled. Takes a kvm_s390_ais_req describing::
-struct kvm_s390_ais_req {
- __u8 isc;
- __u16 mode;
-};
+ struct kvm_s390_ais_req {
+ __u8 isc;
+ __u16 mode;
+ };
isc contains the target I/O interruption subclass, mode the target
adapter-interruption-suppression mode. The following modes are
currently supported:
+
- KVM_S390_AIS_MODE_ALL: ALL-Interruptions Mode, i.e. airq injection
is always allowed;
- KVM_S390_AIS_MODE_SINGLE: SINGLE-Interruption Mode, i.e. airq
@@ -139,12 +149,12 @@
KVM_DEV_FLIC_AISM_ALL
Gets or sets the adapter-interruption-suppression mode for all ISCs. Takes
- a kvm_s390_ais_all describing:
+ a kvm_s390_ais_all describing::
-struct kvm_s390_ais_all {
- __u8 simm; /* Single-Interruption-Mode mask */
- __u8 nimm; /* No-Interruption-Mode mask *
-};
+ struct kvm_s390_ais_all {
+ __u8 simm; /* Single-Interruption-Mode mask */
+ __u8 nimm; /* No-Interruption-Mode mask *
+ };
simm contains Single-Interruption-Mode mask for all ISCs, nimm contains
No-Interruption-Mode mask for all ISCs. Each bit in simm and nimm corresponds
@@ -159,5 +169,5 @@
that a FLIC operation is unavailable based on the error code resulting from a
usage attempt.
-Note: The KVM_DEV_FLIC_CLEAR_IO_IRQ ioctl will return EINVAL in case a zero
-schid is specified.
+.. note:: The KVM_DEV_FLIC_CLEAR_IO_IRQ ioctl will return EINVAL in case a
+ zero schid is specified.
diff --git a/Documentation/virt/kvm/devices/vcpu.rst b/Documentation/virt/kvm/devices/vcpu.rst
new file mode 100644
index 0000000..9963e68
--- /dev/null
+++ b/Documentation/virt/kvm/devices/vcpu.rst
@@ -0,0 +1,114 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+======================
+Generic vcpu interface
+======================
+
+The virtual cpu "device" also accepts the ioctls KVM_SET_DEVICE_ATTR,
+KVM_GET_DEVICE_ATTR, and KVM_HAS_DEVICE_ATTR. The interface uses the same struct
+kvm_device_attr as other devices, but targets VCPU-wide settings and controls.
+
+The groups and attributes per virtual cpu, if any, are architecture specific.
+
+1. GROUP: KVM_ARM_VCPU_PMU_V3_CTRL
+==================================
+
+:Architectures: ARM64
+
+1.1. ATTRIBUTE: KVM_ARM_VCPU_PMU_V3_IRQ
+---------------------------------------
+
+:Parameters: in kvm_device_attr.addr the address for PMU overflow interrupt is a
+ pointer to an int
+
+Returns:
+
+ ======= ========================================================
+ -EBUSY The PMU overflow interrupt is already set
+ -ENXIO The overflow interrupt not set when attempting to get it
+ -ENODEV PMUv3 not supported
+ -EINVAL Invalid PMU overflow interrupt number supplied or
+ trying to set the IRQ number without using an in-kernel
+ irqchip.
+ ======= ========================================================
+
+A value describing the PMUv3 (Performance Monitor Unit v3) overflow interrupt
+number for this vcpu. This interrupt could be a PPI or SPI, but the interrupt
+type must be same for each vcpu. As a PPI, the interrupt number is the same for
+all vcpus, while as an SPI it must be a separate number per vcpu.
+
+1.2 ATTRIBUTE: KVM_ARM_VCPU_PMU_V3_INIT
+---------------------------------------
+
+:Parameters: no additional parameter in kvm_device_attr.addr
+
+Returns:
+
+ ======= ======================================================
+ -ENODEV PMUv3 not supported or GIC not initialized
+ -ENXIO PMUv3 not properly configured or in-kernel irqchip not
+ configured as required prior to calling this attribute
+ -EBUSY PMUv3 already initialized
+ ======= ======================================================
+
+Request the initialization of the PMUv3. If using the PMUv3 with an in-kernel
+virtual GIC implementation, this must be done after initializing the in-kernel
+irqchip.
+
+
+2. GROUP: KVM_ARM_VCPU_TIMER_CTRL
+=================================
+
+:Architectures: ARM, ARM64
+
+2.1. ATTRIBUTES: KVM_ARM_VCPU_TIMER_IRQ_VTIMER, KVM_ARM_VCPU_TIMER_IRQ_PTIMER
+-----------------------------------------------------------------------------
+
+:Parameters: in kvm_device_attr.addr the address for the timer interrupt is a
+ pointer to an int
+
+Returns:
+
+ ======= =================================
+ -EINVAL Invalid timer interrupt number
+ -EBUSY One or more VCPUs has already run
+ ======= =================================
+
+A value describing the architected timer interrupt number when connected to an
+in-kernel virtual GIC. These must be a PPI (16 <= intid < 32). Setting the
+attribute overrides the default values (see below).
+
+============================= ==========================================
+KVM_ARM_VCPU_TIMER_IRQ_VTIMER The EL1 virtual timer intid (default: 27)
+KVM_ARM_VCPU_TIMER_IRQ_PTIMER The EL1 physical timer intid (default: 30)
+============================= ==========================================
+
+Setting the same PPI for different timers will prevent the VCPUs from running.
+Setting the interrupt number on a VCPU configures all VCPUs created at that
+time to use the number provided for a given timer, overwriting any previously
+configured values on other VCPUs. Userspace should configure the interrupt
+numbers on at least one VCPU after creating all VCPUs and before running any
+VCPUs.
+
+3. GROUP: KVM_ARM_VCPU_PVTIME_CTRL
+==================================
+
+:Architectures: ARM64
+
+3.1 ATTRIBUTE: KVM_ARM_VCPU_PVTIME_IPA
+--------------------------------------
+
+:Parameters: 64-bit base address
+
+Returns:
+
+ ======= ======================================
+ -ENXIO Stolen time not implemented
+ -EEXIST Base address already set for this VCPU
+ -EINVAL Base address not 64 byte aligned
+ ======= ======================================
+
+Specifies the base address of the stolen time structure for this VCPU. The
+base address must be 64 byte aligned and exist within a valid guest memory
+region. See Documentation/virt/kvm/arm/pvtime.txt for more information
+including the layout of the stolen time structure.
diff --git a/Documentation/virt/kvm/devices/vcpu.txt b/Documentation/virt/kvm/devices/vcpu.txt
deleted file mode 100644
index 6f3bd64..0000000
--- a/Documentation/virt/kvm/devices/vcpu.txt
+++ /dev/null
@@ -1,76 +0,0 @@
-Generic vcpu interface
-====================================
-
-The virtual cpu "device" also accepts the ioctls KVM_SET_DEVICE_ATTR,
-KVM_GET_DEVICE_ATTR, and KVM_HAS_DEVICE_ATTR. The interface uses the same struct
-kvm_device_attr as other devices, but targets VCPU-wide settings and controls.
-
-The groups and attributes per virtual cpu, if any, are architecture specific.
-
-1. GROUP: KVM_ARM_VCPU_PMU_V3_CTRL
-Architectures: ARM64
-
-1.1. ATTRIBUTE: KVM_ARM_VCPU_PMU_V3_IRQ
-Parameters: in kvm_device_attr.addr the address for PMU overflow interrupt is a
- pointer to an int
-Returns: -EBUSY: The PMU overflow interrupt is already set
- -ENXIO: The overflow interrupt not set when attempting to get it
- -ENODEV: PMUv3 not supported
- -EINVAL: Invalid PMU overflow interrupt number supplied or
- trying to set the IRQ number without using an in-kernel
- irqchip.
-
-A value describing the PMUv3 (Performance Monitor Unit v3) overflow interrupt
-number for this vcpu. This interrupt could be a PPI or SPI, but the interrupt
-type must be same for each vcpu. As a PPI, the interrupt number is the same for
-all vcpus, while as an SPI it must be a separate number per vcpu.
-
-1.2 ATTRIBUTE: KVM_ARM_VCPU_PMU_V3_INIT
-Parameters: no additional parameter in kvm_device_attr.addr
-Returns: -ENODEV: PMUv3 not supported or GIC not initialized
- -ENXIO: PMUv3 not properly configured or in-kernel irqchip not
- configured as required prior to calling this attribute
- -EBUSY: PMUv3 already initialized
-
-Request the initialization of the PMUv3. If using the PMUv3 with an in-kernel
-virtual GIC implementation, this must be done after initializing the in-kernel
-irqchip.
-
-
-2. GROUP: KVM_ARM_VCPU_TIMER_CTRL
-Architectures: ARM,ARM64
-
-2.1. ATTRIBUTE: KVM_ARM_VCPU_TIMER_IRQ_VTIMER
-2.2. ATTRIBUTE: KVM_ARM_VCPU_TIMER_IRQ_PTIMER
-Parameters: in kvm_device_attr.addr the address for the timer interrupt is a
- pointer to an int
-Returns: -EINVAL: Invalid timer interrupt number
- -EBUSY: One or more VCPUs has already run
-
-A value describing the architected timer interrupt number when connected to an
-in-kernel virtual GIC. These must be a PPI (16 <= intid < 32). Setting the
-attribute overrides the default values (see below).
-
-KVM_ARM_VCPU_TIMER_IRQ_VTIMER: The EL1 virtual timer intid (default: 27)
-KVM_ARM_VCPU_TIMER_IRQ_PTIMER: The EL1 physical timer intid (default: 30)
-
-Setting the same PPI for different timers will prevent the VCPUs from running.
-Setting the interrupt number on a VCPU configures all VCPUs created at that
-time to use the number provided for a given timer, overwriting any previously
-configured values on other VCPUs. Userspace should configure the interrupt
-numbers on at least one VCPU after creating all VCPUs and before running any
-VCPUs.
-
-3. GROUP: KVM_ARM_VCPU_PVTIME_CTRL
-Architectures: ARM64
-
-3.1 ATTRIBUTE: KVM_ARM_VCPU_PVTIME_IPA
-Parameters: 64-bit base address
-Returns: -ENXIO: Stolen time not implemented
- -EEXIST: Base address already set for this VCPU
- -EINVAL: Base address not 64 byte aligned
-
-Specifies the base address of the stolen time structure for this VCPU. The
-base address must be 64 byte aligned and exist within a valid guest memory
-region. See Documentation/virt/kvm/arm/pvtime.txt for more information
-including the layout of the stolen time structure.
diff --git a/Documentation/virt/kvm/devices/vfio.txt b/Documentation/virt/kvm/devices/vfio.rst
similarity index 72%
rename from Documentation/virt/kvm/devices/vfio.txt
rename to Documentation/virt/kvm/devices/vfio.rst
index 528c77c..2d20dc5 100644
--- a/Documentation/virt/kvm/devices/vfio.txt
+++ b/Documentation/virt/kvm/devices/vfio.rst
@@ -1,8 +1,12 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===================
VFIO virtual device
===================
Device types supported:
- KVM_DEV_TYPE_VFIO
+
+ - KVM_DEV_TYPE_VFIO
Only one VFIO instance may be created per VM. The created device
tracks VFIO groups in use by the VM and features of those groups
@@ -23,14 +27,15 @@
for the VFIO group.
KVM_DEV_VFIO_GROUP_SET_SPAPR_TCE: attaches a guest visible TCE table
allocated by sPAPR KVM.
- kvm_device_attr.addr points to a struct:
+ kvm_device_attr.addr points to a struct::
- struct kvm_vfio_spapr_tce {
- __s32 groupfd;
- __s32 tablefd;
- };
+ struct kvm_vfio_spapr_tce {
+ __s32 groupfd;
+ __s32 tablefd;
+ };
- where
- @groupfd is a file descriptor for a VFIO group;
- @tablefd is a file descriptor for a TCE table allocated via
- KVM_CREATE_SPAPR_TCE.
+ where:
+
+ - @groupfd is a file descriptor for a VFIO group;
+ - @tablefd is a file descriptor for a TCE table allocated via
+ KVM_CREATE_SPAPR_TCE.
diff --git a/Documentation/virt/kvm/devices/vm.txt b/Documentation/virt/kvm/devices/vm.rst
similarity index 61%
rename from Documentation/virt/kvm/devices/vm.txt
rename to Documentation/virt/kvm/devices/vm.rst
index 4ffb82b..0aa5b1c 100644
--- a/Documentation/virt/kvm/devices/vm.txt
+++ b/Documentation/virt/kvm/devices/vm.rst
@@ -1,5 +1,8 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+====================
Generic vm interface
-====================================
+====================
The virtual machine "device" also accepts the ioctls KVM_SET_DEVICE_ATTR,
KVM_GET_DEVICE_ATTR, and KVM_HAS_DEVICE_ATTR. The interface uses the same
@@ -10,30 +13,38 @@
specific.
1. GROUP: KVM_S390_VM_MEM_CTRL
-Architectures: s390
+==============================
+
+:Architectures: s390
1.1. ATTRIBUTE: KVM_S390_VM_MEM_ENABLE_CMMA
-Parameters: none
-Returns: -EBUSY if a vcpu is already defined, otherwise 0
+-------------------------------------------
+
+:Parameters: none
+:Returns: -EBUSY if a vcpu is already defined, otherwise 0
Enables Collaborative Memory Management Assist (CMMA) for the virtual machine.
1.2. ATTRIBUTE: KVM_S390_VM_MEM_CLR_CMMA
-Parameters: none
-Returns: -EINVAL if CMMA was not enabled
- 0 otherwise
+----------------------------------------
+
+:Parameters: none
+:Returns: -EINVAL if CMMA was not enabled;
+ 0 otherwise
Clear the CMMA status for all guest pages, so any pages the guest marked
as unused are again used any may not be reclaimed by the host.
1.3. ATTRIBUTE KVM_S390_VM_MEM_LIMIT_SIZE
-Parameters: in attr->addr the address for the new limit of guest memory
-Returns: -EFAULT if the given address is not accessible
- -EINVAL if the virtual machine is of type UCONTROL
- -E2BIG if the given guest memory is to big for that machine
- -EBUSY if a vcpu is already defined
- -ENOMEM if not enough memory is available for a new shadow guest mapping
- 0 otherwise
+-----------------------------------------
+
+:Parameters: in attr->addr the address for the new limit of guest memory
+:Returns: -EFAULT if the given address is not accessible;
+ -EINVAL if the virtual machine is of type UCONTROL;
+ -E2BIG if the given guest memory is to big for that machine;
+ -EBUSY if a vcpu is already defined;
+ -ENOMEM if not enough memory is available for a new shadow guest mapping;
+ 0 otherwise.
Allows userspace to query the actual limit and set a new limit for
the maximum guest memory size. The limit will be rounded up to
@@ -42,78 +53,92 @@
the limit to KVM_S390_NO_MEM_LIMIT (U64_MAX).
2. GROUP: KVM_S390_VM_CPU_MODEL
-Architectures: s390
+===============================
+
+:Architectures: s390
2.1. ATTRIBUTE: KVM_S390_VM_CPU_MACHINE (r/o)
+---------------------------------------------
-Allows user space to retrieve machine and kvm specific cpu related information:
+Allows user space to retrieve machine and kvm specific cpu related information::
-struct kvm_s390_vm_cpu_machine {
+ struct kvm_s390_vm_cpu_machine {
__u64 cpuid; # CPUID of host
__u32 ibc; # IBC level range offered by host
__u8 pad[4];
__u64 fac_mask[256]; # set of cpu facilities enabled by KVM
__u64 fac_list[256]; # set of cpu facilities offered by host
-}
+ }
-Parameters: address of buffer to store the machine related cpu data
- of type struct kvm_s390_vm_cpu_machine*
-Returns: -EFAULT if the given address is not accessible from kernel space
- -ENOMEM if not enough memory is available to process the ioctl
- 0 in case of success
+:Parameters: address of buffer to store the machine related cpu data
+ of type struct kvm_s390_vm_cpu_machine*
+:Returns: -EFAULT if the given address is not accessible from kernel space;
+ -ENOMEM if not enough memory is available to process the ioctl;
+ 0 in case of success.
2.2. ATTRIBUTE: KVM_S390_VM_CPU_PROCESSOR (r/w)
+===============================================
-Allows user space to retrieve or request to change cpu related information for a vcpu:
+Allows user space to retrieve or request to change cpu related information for a vcpu::
-struct kvm_s390_vm_cpu_processor {
+ struct kvm_s390_vm_cpu_processor {
__u64 cpuid; # CPUID currently (to be) used by this vcpu
__u16 ibc; # IBC level currently (to be) used by this vcpu
__u8 pad[6];
__u64 fac_list[256]; # set of cpu facilities currently (to be) used
- # by this vcpu
-}
+ # by this vcpu
+ }
KVM does not enforce or limit the cpu model data in any form. Take the information
retrieved by means of KVM_S390_VM_CPU_MACHINE as hint for reasonable configuration
setups. Instruction interceptions triggered by additionally set facility bits that
are not handled by KVM need to by imlemented in the VM driver code.
-Parameters: address of buffer to store/set the processor related cpu
- data of type struct kvm_s390_vm_cpu_processor*.
-Returns: -EBUSY in case 1 or more vcpus are already activated (only in write case)
- -EFAULT if the given address is not accessible from kernel space
- -ENOMEM if not enough memory is available to process the ioctl
- 0 in case of success
+:Parameters: address of buffer to store/set the processor related cpu
+ data of type struct kvm_s390_vm_cpu_processor*.
+:Returns: -EBUSY in case 1 or more vcpus are already activated (only in write case);
+ -EFAULT if the given address is not accessible from kernel space;
+ -ENOMEM if not enough memory is available to process the ioctl;
+ 0 in case of success.
+
+.. _KVM_S390_VM_CPU_MACHINE_FEAT:
2.3. ATTRIBUTE: KVM_S390_VM_CPU_MACHINE_FEAT (r/o)
+--------------------------------------------------
Allows user space to retrieve available cpu features. A feature is available if
provided by the hardware and supported by kvm. In theory, cpu features could
even be completely emulated by kvm.
-struct kvm_s390_vm_cpu_feat {
- __u64 feat[16]; # Bitmap (1 = feature available), MSB 0 bit numbering
-};
+::
-Parameters: address of a buffer to load the feature list from.
-Returns: -EFAULT if the given address is not accessible from kernel space.
- 0 in case of success.
+ struct kvm_s390_vm_cpu_feat {
+ __u64 feat[16]; # Bitmap (1 = feature available), MSB 0 bit numbering
+ };
+
+:Parameters: address of a buffer to load the feature list from.
+:Returns: -EFAULT if the given address is not accessible from kernel space;
+ 0 in case of success.
2.4. ATTRIBUTE: KVM_S390_VM_CPU_PROCESSOR_FEAT (r/w)
+----------------------------------------------------
Allows user space to retrieve or change enabled cpu features for all VCPUs of a
VM. Features that are not available cannot be enabled.
-See 2.3. for a description of the parameter struct.
+See :ref:`KVM_S390_VM_CPU_MACHINE_FEAT` for
+a description of the parameter struct.
-Parameters: address of a buffer to store/load the feature list from.
-Returns: -EFAULT if the given address is not accessible from kernel space.
- -EINVAL if a cpu feature that is not available is to be enabled.
- -EBUSY if at least one VCPU has already been defined.
+:Parameters: address of a buffer to store/load the feature list from.
+:Returns: -EFAULT if the given address is not accessible from kernel space;
+ -EINVAL if a cpu feature that is not available is to be enabled;
+ -EBUSY if at least one VCPU has already been defined;
0 in case of success.
+.. _KVM_S390_VM_CPU_MACHINE_SUBFUNC:
+
2.5. ATTRIBUTE: KVM_S390_VM_CPU_MACHINE_SUBFUNC (r/o)
+-----------------------------------------------------
Allows user space to retrieve available cpu subfunctions without any filtering
done by a set IBC. These subfunctions are indicated to the guest VCPU via
@@ -126,7 +151,9 @@
indicates subfunctions via a "test bit" mechanism, the subfunction codes are
contained in the returned struct in MSB 0 bit numbering.
-struct kvm_s390_vm_cpu_subfunc {
+::
+
+ struct kvm_s390_vm_cpu_subfunc {
u8 plo[32]; # always valid (ESA/390 feature)
u8 ptff[16]; # valid with TOD-clock steering
u8 kmac[16]; # valid with Message-Security-Assist
@@ -143,13 +170,14 @@
u8 kma[16]; # valid with Message-Security-Assist-Extension 8
u8 kdsa[16]; # valid with Message-Security-Assist-Extension 9
u8 reserved[1792]; # reserved for future instructions
-};
+ };
-Parameters: address of a buffer to load the subfunction blocks from.
-Returns: -EFAULT if the given address is not accessible from kernel space.
+:Parameters: address of a buffer to load the subfunction blocks from.
+:Returns: -EFAULT if the given address is not accessible from kernel space;
0 in case of success.
2.6. ATTRIBUTE: KVM_S390_VM_CPU_PROCESSOR_SUBFUNC (r/w)
+-------------------------------------------------------
Allows user space to retrieve or change cpu subfunctions to be indicated for
all VCPUs of a VM. This attribute will only be available if kernel and
@@ -164,107 +192,125 @@
to determine available subfunctions in this case, this will guarantee backward
compatibility.
-See 2.5. for a description of the parameter struct.
+See :ref:`KVM_S390_VM_CPU_MACHINE_SUBFUNC` for a
+description of the parameter struct.
-Parameters: address of a buffer to store/load the subfunction blocks from.
-Returns: -EFAULT if the given address is not accessible from kernel space.
- -EINVAL when reading, if there was no write yet.
- -EBUSY if at least one VCPU has already been defined.
+:Parameters: address of a buffer to store/load the subfunction blocks from.
+:Returns: -EFAULT if the given address is not accessible from kernel space;
+ -EINVAL when reading, if there was no write yet;
+ -EBUSY if at least one VCPU has already been defined;
0 in case of success.
3. GROUP: KVM_S390_VM_TOD
-Architectures: s390
+=========================
+
+:Architectures: s390
3.1. ATTRIBUTE: KVM_S390_VM_TOD_HIGH
+------------------------------------
Allows user space to set/get the TOD clock extension (u8) (superseded by
KVM_S390_VM_TOD_EXT).
-Parameters: address of a buffer in user space to store the data (u8) to
-Returns: -EFAULT if the given address is not accessible from kernel space
+:Parameters: address of a buffer in user space to store the data (u8) to
+:Returns: -EFAULT if the given address is not accessible from kernel space;
-EINVAL if setting the TOD clock extension to != 0 is not supported
3.2. ATTRIBUTE: KVM_S390_VM_TOD_LOW
+-----------------------------------
Allows user space to set/get bits 0-63 of the TOD clock register as defined in
the POP (u64).
-Parameters: address of a buffer in user space to store the data (u64) to
-Returns: -EFAULT if the given address is not accessible from kernel space
+:Parameters: address of a buffer in user space to store the data (u64) to
+:Returns: -EFAULT if the given address is not accessible from kernel space
3.3. ATTRIBUTE: KVM_S390_VM_TOD_EXT
+-----------------------------------
+
Allows user space to set/get bits 0-63 of the TOD clock register as defined in
the POP (u64). If the guest CPU model supports the TOD clock extension (u8), it
also allows user space to get/set it. If the guest CPU model does not support
it, it is stored as 0 and not allowed to be set to a value != 0.
-Parameters: address of a buffer in user space to store the data
- (kvm_s390_vm_tod_clock) to
-Returns: -EFAULT if the given address is not accessible from kernel space
+:Parameters: address of a buffer in user space to store the data
+ (kvm_s390_vm_tod_clock) to
+:Returns: -EFAULT if the given address is not accessible from kernel space;
-EINVAL if setting the TOD clock extension to != 0 is not supported
4. GROUP: KVM_S390_VM_CRYPTO
-Architectures: s390
+============================
+
+:Architectures: s390
4.1. ATTRIBUTE: KVM_S390_VM_CRYPTO_ENABLE_AES_KW (w/o)
+------------------------------------------------------
Allows user space to enable aes key wrapping, including generating a new
wrapping key.
-Parameters: none
-Returns: 0
+:Parameters: none
+:Returns: 0
4.2. ATTRIBUTE: KVM_S390_VM_CRYPTO_ENABLE_DEA_KW (w/o)
+------------------------------------------------------
Allows user space to enable dea key wrapping, including generating a new
wrapping key.
-Parameters: none
-Returns: 0
+:Parameters: none
+:Returns: 0
4.3. ATTRIBUTE: KVM_S390_VM_CRYPTO_DISABLE_AES_KW (w/o)
+-------------------------------------------------------
Allows user space to disable aes key wrapping, clearing the wrapping key.
-Parameters: none
-Returns: 0
+:Parameters: none
+:Returns: 0
4.4. ATTRIBUTE: KVM_S390_VM_CRYPTO_DISABLE_DEA_KW (w/o)
+-------------------------------------------------------
Allows user space to disable dea key wrapping, clearing the wrapping key.
-Parameters: none
-Returns: 0
+:Parameters: none
+:Returns: 0
5. GROUP: KVM_S390_VM_MIGRATION
-Architectures: s390
+===============================
+
+:Architectures: s390
5.1. ATTRIBUTE: KVM_S390_VM_MIGRATION_STOP (w/o)
+------------------------------------------------
Allows userspace to stop migration mode, needed for PGSTE migration.
Setting this attribute when migration mode is not active will have no
effects.
-Parameters: none
-Returns: 0
+:Parameters: none
+:Returns: 0
5.2. ATTRIBUTE: KVM_S390_VM_MIGRATION_START (w/o)
+-------------------------------------------------
Allows userspace to start migration mode, needed for PGSTE migration.
Setting this attribute when migration mode is already active will have
no effects.
-Parameters: none
-Returns: -ENOMEM if there is not enough free memory to start migration mode
- -EINVAL if the state of the VM is invalid (e.g. no memory defined)
+:Parameters: none
+:Returns: -ENOMEM if there is not enough free memory to start migration mode;
+ -EINVAL if the state of the VM is invalid (e.g. no memory defined);
0 in case of success.
5.3. ATTRIBUTE: KVM_S390_VM_MIGRATION_STATUS (r/o)
+--------------------------------------------------
Allows userspace to query the status of migration mode.
-Parameters: address of a buffer in user space to store the data (u64) to;
- the data itself is either 0 if migration mode is disabled or 1
- if it is enabled
-Returns: -EFAULT if the given address is not accessible from kernel space
+:Parameters: address of a buffer in user space to store the data (u64) to;
+ the data itself is either 0 if migration mode is disabled or 1
+ if it is enabled
+:Returns: -EFAULT if the given address is not accessible from kernel space;
0 in case of success.
diff --git a/Documentation/virt/kvm/devices/xics.txt b/Documentation/virt/kvm/devices/xics.rst
similarity index 83%
rename from Documentation/virt/kvm/devices/xics.txt
rename to Documentation/virt/kvm/devices/xics.rst
index 423332d..2d6927e 100644
--- a/Documentation/virt/kvm/devices/xics.txt
+++ b/Documentation/virt/kvm/devices/xics.rst
@@ -1,20 +1,31 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=========================
XICS interrupt controller
+=========================
Device type supported: KVM_DEV_TYPE_XICS
Groups:
1. KVM_DEV_XICS_GRP_SOURCES
- Attributes: One per interrupt source, indexed by the source number.
+ Attributes:
+ One per interrupt source, indexed by the source number.
2. KVM_DEV_XICS_GRP_CTRL
- Attributes:
- 2.1 KVM_DEV_XICS_NR_SERVERS (write only)
+ Attributes:
+
+ 2.1 KVM_DEV_XICS_NR_SERVERS (write only)
+
The kvm_device_attr.addr points to a __u32 value which is the number of
interrupt server numbers (ie, highest possible vcpu id plus one).
+
Errors:
- -EINVAL: Value greater than KVM_MAX_VCPU_ID.
- -EFAULT: Invalid user pointer for attr->addr.
- -EBUSY: A vcpu is already connected to the device.
+
+ ======= ==========================================
+ -EINVAL Value greater than KVM_MAX_VCPU_ID.
+ -EFAULT Invalid user pointer for attr->addr.
+ -EBUSY A vcpu is already connected to the device.
+ ======= ==========================================
This device emulates the XICS (eXternal Interrupt Controller
Specification) defined in PAPR. The XICS has a set of interrupt
@@ -53,24 +64,29 @@
bitfields, starting from the least-significant end of the word:
* Destination (server number), 32 bits
+
This specifies where the interrupt should be sent, and is the
interrupt server number specified for the destination vcpu.
* Priority, 8 bits
+
This is the priority specified for this interrupt source, where 0 is
the highest priority and 255 is the lowest. An interrupt with a
priority of 255 will never be delivered.
* Level sensitive flag, 1 bit
+
This bit is 1 for a level-sensitive interrupt source, or 0 for
edge-sensitive (or MSI).
* Masked flag, 1 bit
+
This bit is set to 1 if the interrupt is masked (cannot be delivered
regardless of its priority), for example by the ibm,int-off RTAS
call, or 0 if it is not masked.
* Pending flag, 1 bit
+
This bit is 1 if the source has a pending interrupt, otherwise 0.
Only one XICS instance may be created per VM.
diff --git a/Documentation/virt/kvm/devices/xive.txt b/Documentation/virt/kvm/devices/xive.rst
similarity index 62%
rename from Documentation/virt/kvm/devices/xive.txt
rename to Documentation/virt/kvm/devices/xive.rst
index f5d1d6b..8bdf3dc 100644
--- a/Documentation/virt/kvm/devices/xive.txt
+++ b/Documentation/virt/kvm/devices/xive.rst
@@ -1,8 +1,11 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===========================================================
POWER9 eXternal Interrupt Virtualization Engine (XIVE Gen1)
-==========================================================
+===========================================================
Device types supported:
- KVM_DEV_TYPE_XIVE POWER9 XIVE Interrupt Controller generation 1
+ - KVM_DEV_TYPE_XIVE POWER9 XIVE Interrupt Controller generation 1
This device acts as a VM interrupt controller. It provides the KVM
interface to configure the interrupt sources of a VM in the underlying
@@ -64,72 +67,100 @@
* Groups:
- 1. KVM_DEV_XIVE_GRP_CTRL
- Provides global controls on the device
+1. KVM_DEV_XIVE_GRP_CTRL
+ Provides global controls on the device
+
Attributes:
1.1 KVM_DEV_XIVE_RESET (write only)
Resets the interrupt controller configuration for sources and event
queues. To be used by kexec and kdump.
+
Errors: none
1.2 KVM_DEV_XIVE_EQ_SYNC (write only)
Sync all the sources and queues and mark the EQ pages dirty. This
to make sure that a consistent memory state is captured when
migrating the VM.
+
Errors: none
1.3 KVM_DEV_XIVE_NR_SERVERS (write only)
The kvm_device_attr.addr points to a __u32 value which is the number of
interrupt server numbers (ie, highest possible vcpu id plus one).
- Errors:
- -EINVAL: Value greater than KVM_MAX_VCPU_ID.
- -EFAULT: Invalid user pointer for attr->addr.
- -EBUSY: A vCPU is already connected to the device.
- 2. KVM_DEV_XIVE_GRP_SOURCE (write only)
- Initializes a new source in the XIVE device and mask it.
+ Errors:
+
+ ======= ==========================================
+ -EINVAL Value greater than KVM_MAX_VCPU_ID.
+ -EFAULT Invalid user pointer for attr->addr.
+ -EBUSY A vCPU is already connected to the device.
+ ======= ==========================================
+
+2. KVM_DEV_XIVE_GRP_SOURCE (write only)
+ Initializes a new source in the XIVE device and mask it.
+
Attributes:
Interrupt source number (64-bit)
- The kvm_device_attr.addr points to a __u64 value:
- bits: | 63 .... 2 | 1 | 0
- values: | unused | level | type
+
+ The kvm_device_attr.addr points to a __u64 value::
+
+ bits: | 63 .... 2 | 1 | 0
+ values: | unused | level | type
+
- type: 0:MSI 1:LSI
- level: assertion level in case of an LSI.
- Errors:
- -E2BIG: Interrupt source number is out of range
- -ENOMEM: Could not create a new source block
- -EFAULT: Invalid user pointer for attr->addr.
- -ENXIO: Could not allocate underlying HW interrupt
- 3. KVM_DEV_XIVE_GRP_SOURCE_CONFIG (write only)
- Configures source targeting
+ Errors:
+
+ ======= ==========================================
+ -E2BIG Interrupt source number is out of range
+ -ENOMEM Could not create a new source block
+ -EFAULT Invalid user pointer for attr->addr.
+ -ENXIO Could not allocate underlying HW interrupt
+ ======= ==========================================
+
+3. KVM_DEV_XIVE_GRP_SOURCE_CONFIG (write only)
+ Configures source targeting
+
Attributes:
Interrupt source number (64-bit)
- The kvm_device_attr.addr points to a __u64 value:
- bits: | 63 .... 33 | 32 | 31 .. 3 | 2 .. 0
- values: | eisn | mask | server | priority
+
+ The kvm_device_attr.addr points to a __u64 value::
+
+ bits: | 63 .... 33 | 32 | 31 .. 3 | 2 .. 0
+ values: | eisn | mask | server | priority
+
- priority: 0-7 interrupt priority level
- server: CPU number chosen to handle the interrupt
- mask: mask flag (unused)
- eisn: Effective Interrupt Source Number
- Errors:
- -ENOENT: Unknown source number
- -EINVAL: Not initialized source number
- -EINVAL: Invalid priority
- -EINVAL: Invalid CPU number.
- -EFAULT: Invalid user pointer for attr->addr.
- -ENXIO: CPU event queues not configured or configuration of the
- underlying HW interrupt failed
- -EBUSY: No CPU available to serve interrupt
- 4. KVM_DEV_XIVE_GRP_EQ_CONFIG (read-write)
- Configures an event queue of a CPU
+ Errors:
+
+ ======= =======================================================
+ -ENOENT Unknown source number
+ -EINVAL Not initialized source number
+ -EINVAL Invalid priority
+ -EINVAL Invalid CPU number.
+ -EFAULT Invalid user pointer for attr->addr.
+ -ENXIO CPU event queues not configured or configuration of the
+ underlying HW interrupt failed
+ -EBUSY No CPU available to serve interrupt
+ ======= =======================================================
+
+4. KVM_DEV_XIVE_GRP_EQ_CONFIG (read-write)
+ Configures an event queue of a CPU
+
Attributes:
EQ descriptor identifier (64-bit)
- The EQ descriptor identifier is a tuple (server, priority) :
- bits: | 63 .... 32 | 31 .. 3 | 2 .. 0
- values: | unused | server | priority
- The kvm_device_attr.addr points to :
+
+ The EQ descriptor identifier is a tuple (server, priority)::
+
+ bits: | 63 .... 32 | 31 .. 3 | 2 .. 0
+ values: | unused | server | priority
+
+ The kvm_device_attr.addr points to::
+
struct kvm_ppc_xive_eq {
__u32 flags;
__u32 qshift;
@@ -138,8 +169,9 @@
__u32 qindex;
__u8 pad[40];
};
+
- flags: queue flags
- KVM_XIVE_EQ_ALWAYS_NOTIFY (required)
+ KVM_XIVE_EQ_ALWAYS_NOTIFY (required)
forces notification without using the coalescing mechanism
provided by the XIVE END ESBs.
- qshift: queue size (power of 2)
@@ -147,22 +179,31 @@
- qtoggle: current queue toggle bit
- qindex: current queue index
- pad: reserved for future use
- Errors:
- -ENOENT: Invalid CPU number
- -EINVAL: Invalid priority
- -EINVAL: Invalid flags
- -EINVAL: Invalid queue size
- -EINVAL: Invalid queue address
- -EFAULT: Invalid user pointer for attr->addr.
- -EIO: Configuration of the underlying HW failed
- 5. KVM_DEV_XIVE_GRP_SOURCE_SYNC (write only)
- Synchronize the source to flush event notifications
+ Errors:
+
+ ======= =========================================
+ -ENOENT Invalid CPU number
+ -EINVAL Invalid priority
+ -EINVAL Invalid flags
+ -EINVAL Invalid queue size
+ -EINVAL Invalid queue address
+ -EFAULT Invalid user pointer for attr->addr.
+ -EIO Configuration of the underlying HW failed
+ ======= =========================================
+
+5. KVM_DEV_XIVE_GRP_SOURCE_SYNC (write only)
+ Synchronize the source to flush event notifications
+
Attributes:
Interrupt source number (64-bit)
+
Errors:
- -ENOENT: Unknown source number
- -EINVAL: Not initialized source number
+
+ ======= =============================
+ -ENOENT Unknown source number
+ -EINVAL Not initialized source number
+ ======= =============================
* VCPU state
@@ -175,11 +216,12 @@
as it synthesizes the priorities of the pending interrupts. We
capture a bit more to report debug information.
- KVM_REG_PPC_VP_STATE (2 * 64bits)
- bits: | 63 .... 32 | 31 .... 0 |
- values: | TIMA word0 | TIMA word1 |
- bits: | 127 .......... 64 |
- values: | unused |
+ KVM_REG_PPC_VP_STATE (2 * 64bits)::
+
+ bits: | 63 .... 32 | 31 .... 0 |
+ values: | TIMA word0 | TIMA word1 |
+ bits: | 127 .......... 64 |
+ values: | unused |
* Migration:
@@ -196,7 +238,7 @@
3. Capture the state of the source targeting, the EQs configuration
and the state of thread interrupt context registers.
- Restore is similar :
+ Restore is similar:
1. Restore the EQ configuration. As targeting depends on it.
2. Restore targeting
diff --git a/Documentation/virt/kvm/halt-polling.txt b/Documentation/virt/kvm/halt-polling.rst
similarity index 64%
rename from Documentation/virt/kvm/halt-polling.txt
rename to Documentation/virt/kvm/halt-polling.rst
index 4f791b1..4922e4a 100644
--- a/Documentation/virt/kvm/halt-polling.txt
+++ b/Documentation/virt/kvm/halt-polling.rst
@@ -1,3 +1,6 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===========================
The KVM halt polling system
===========================
@@ -68,7 +71,8 @@
which come at an approximately constant rate, otherwise there will be constant
adjustment of the polling interval.
-[0] total block time: the time between when the halt polling function is
+[0] total block time:
+ the time between when the halt polling function is
invoked and a wakeup source received (irrespective of
whether the scheduler is invoked within that function).
@@ -81,31 +85,32 @@
parameters in virt/kvm/kvm_main.c, or arch/powerpc/kvm/book3s_hv.c in the
powerpc kvm-hv case.
-Module Parameter | Description | Default Value
---------------------------------------------------------------------------------
-halt_poll_ns | The global max polling | KVM_HALT_POLL_NS_DEFAULT
- | interval which defines |
- | the ceiling value of the |
- | polling interval for | (per arch value)
- | each vcpu. |
---------------------------------------------------------------------------------
-halt_poll_ns_grow | The value by which the | 2
- | halt polling interval is |
- | multiplied in the |
- | grow_halt_poll_ns() |
- | function. |
---------------------------------------------------------------------------------
-halt_poll_ns_grow_start | The initial value to grow | 10000
- | to from zero in the |
- | grow_halt_poll_ns() |
- | function. |
---------------------------------------------------------------------------------
-halt_poll_ns_shrink | The value by which the | 0
- | halt polling interval is |
- | divided in the |
- | shrink_halt_poll_ns() |
- | function. |
---------------------------------------------------------------------------------
++-----------------------+---------------------------+-------------------------+
+|Module Parameter | Description | Default Value |
++-----------------------+---------------------------+-------------------------+
+|halt_poll_ns | The global max polling | KVM_HALT_POLL_NS_DEFAULT|
+| | interval which defines | |
+| | the ceiling value of the | |
+| | polling interval for | (per arch value) |
+| | each vcpu. | |
++-----------------------+---------------------------+-------------------------+
+|halt_poll_ns_grow | The value by which the | 2 |
+| | halt polling interval is | |
+| | multiplied in the | |
+| | grow_halt_poll_ns() | |
+| | function. | |
++-----------------------+---------------------------+-------------------------+
+|halt_poll_ns_grow_start| The initial value to grow | 10000 |
+| | to from zero in the | |
+| | grow_halt_poll_ns() | |
+| | function. | |
++-----------------------+---------------------------+-------------------------+
+|halt_poll_ns_shrink | The value by which the | 0 |
+| | halt polling interval is | |
+| | divided in the | |
+| | shrink_halt_poll_ns() | |
+| | function. | |
++-----------------------+---------------------------+-------------------------+
These module parameters can be set from the debugfs files in:
@@ -117,20 +122,19 @@
Further Notes
=============
-- Care should be taken when setting the halt_poll_ns module parameter as a
-large value has the potential to drive the cpu usage to 100% on a machine which
-would be almost entirely idle otherwise. This is because even if a guest has
-wakeups during which very little work is done and which are quite far apart, if
-the period is shorter than the global max polling interval (halt_poll_ns) then
-the host will always poll for the entire block time and thus cpu utilisation
-will go to 100%.
+- Care should be taken when setting the halt_poll_ns module parameter as a large value
+ has the potential to drive the cpu usage to 100% on a machine which would be almost
+ entirely idle otherwise. This is because even if a guest has wakeups during which very
+ little work is done and which are quite far apart, if the period is shorter than the
+ global max polling interval (halt_poll_ns) then the host will always poll for the
+ entire block time and thus cpu utilisation will go to 100%.
-- Halt polling essentially presents a trade off between power usage and latency
-and the module parameters should be used to tune the affinity for this. Idle
-cpu time is essentially converted to host kernel time with the aim of decreasing
-latency when entering the guest.
+- Halt polling essentially presents a trade off between power usage and latency and
+ the module parameters should be used to tune the affinity for this. Idle cpu time is
+ essentially converted to host kernel time with the aim of decreasing latency when
+ entering the guest.
-- Halt polling will only be conducted by the host when no other tasks are
-runnable on that cpu, otherwise the polling will cease immediately and
-schedule will be invoked to allow that other task to run. Thus this doesn't
-allow a guest to denial of service the cpu.
+- Halt polling will only be conducted by the host when no other tasks are runnable on
+ that cpu, otherwise the polling will cease immediately and schedule will be invoked to
+ allow that other task to run. Thus this doesn't allow a guest to denial of service the
+ cpu.
diff --git a/Documentation/virt/kvm/hypercalls.rst b/Documentation/virt/kvm/hypercalls.rst
new file mode 100644
index 0000000..dbaf207
--- /dev/null
+++ b/Documentation/virt/kvm/hypercalls.rst
@@ -0,0 +1,171 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===================
+Linux KVM Hypercall
+===================
+
+X86:
+ KVM Hypercalls have a three-byte sequence of either the vmcall or the vmmcall
+ instruction. The hypervisor can replace it with instructions that are
+ guaranteed to be supported.
+
+ Up to four arguments may be passed in rbx, rcx, rdx, and rsi respectively.
+ The hypercall number should be placed in rax and the return value will be
+ placed in rax. No other registers will be clobbered unless explicitly stated
+ by the particular hypercall.
+
+S390:
+ R2-R7 are used for parameters 1-6. In addition, R1 is used for hypercall
+ number. The return value is written to R2.
+
+ S390 uses diagnose instruction as hypercall (0x500) along with hypercall
+ number in R1.
+
+ For further information on the S390 diagnose call as supported by KVM,
+ refer to Documentation/virt/kvm/s390-diag.txt.
+
+PowerPC:
+ It uses R3-R10 and hypercall number in R11. R4-R11 are used as output registers.
+ Return value is placed in R3.
+
+ KVM hypercalls uses 4 byte opcode, that are patched with 'hypercall-instructions'
+ property inside the device tree's /hypervisor node.
+ For more information refer to Documentation/virt/kvm/ppc-pv.txt
+
+MIPS:
+ KVM hypercalls use the HYPCALL instruction with code 0 and the hypercall
+ number in $2 (v0). Up to four arguments may be placed in $4-$7 (a0-a3) and
+ the return value is placed in $2 (v0).
+
+KVM Hypercalls Documentation
+============================
+
+The template for each hypercall is:
+1. Hypercall name.
+2. Architecture(s)
+3. Status (deprecated, obsolete, active)
+4. Purpose
+
+1. KVM_HC_VAPIC_POLL_IRQ
+------------------------
+
+:Architecture: x86
+:Status: active
+:Purpose: Trigger guest exit so that the host can check for pending
+ interrupts on reentry.
+
+2. KVM_HC_MMU_OP
+----------------
+
+:Architecture: x86
+:Status: deprecated.
+:Purpose: Support MMU operations such as writing to PTE,
+ flushing TLB, release PT.
+
+3. KVM_HC_FEATURES
+------------------
+
+:Architecture: PPC
+:Status: active
+:Purpose: Expose hypercall availability to the guest. On x86 platforms, cpuid
+ used to enumerate which hypercalls are available. On PPC, either
+ device tree based lookup ( which is also what EPAPR dictates)
+ OR KVM specific enumeration mechanism (which is this hypercall)
+ can be used.
+
+4. KVM_HC_PPC_MAP_MAGIC_PAGE
+----------------------------
+
+:Architecture: PPC
+:Status: active
+:Purpose: To enable communication between the hypervisor and guest there is a
+ shared page that contains parts of supervisor visible register state.
+ The guest can map this shared page to access its supervisor register
+ through memory using this hypercall.
+
+5. KVM_HC_KICK_CPU
+------------------
+
+:Architecture: x86
+:Status: active
+:Purpose: Hypercall used to wakeup a vcpu from HLT state
+:Usage example:
+ A vcpu of a paravirtualized guest that is busywaiting in guest
+ kernel mode for an event to occur (ex: a spinlock to become available) can
+ execute HLT instruction once it has busy-waited for more than a threshold
+ time-interval. Execution of HLT instruction would cause the hypervisor to put
+ the vcpu to sleep until occurrence of an appropriate event. Another vcpu of the
+ same guest can wakeup the sleeping vcpu by issuing KVM_HC_KICK_CPU hypercall,
+ specifying APIC ID (a1) of the vcpu to be woken up. An additional argument (a0)
+ is used in the hypercall for future use.
+
+
+6. KVM_HC_CLOCK_PAIRING
+-----------------------
+:Architecture: x86
+:Status: active
+:Purpose: Hypercall used to synchronize host and guest clocks.
+
+Usage:
+
+a0: guest physical address where host copies
+"struct kvm_clock_offset" structure.
+
+a1: clock_type, ATM only KVM_CLOCK_PAIRING_WALLCLOCK (0)
+is supported (corresponding to the host's CLOCK_REALTIME clock).
+
+ ::
+
+ struct kvm_clock_pairing {
+ __s64 sec;
+ __s64 nsec;
+ __u64 tsc;
+ __u32 flags;
+ __u32 pad[9];
+ };
+
+ Where:
+ * sec: seconds from clock_type clock.
+ * nsec: nanoseconds from clock_type clock.
+ * tsc: guest TSC value used to calculate sec/nsec pair
+ * flags: flags, unused (0) at the moment.
+
+The hypercall lets a guest compute a precise timestamp across
+host and guest. The guest can use the returned TSC value to
+compute the CLOCK_REALTIME for its clock, at the same instant.
+
+Returns KVM_EOPNOTSUPP if the host does not use TSC clocksource,
+or if clock type is different than KVM_CLOCK_PAIRING_WALLCLOCK.
+
+6. KVM_HC_SEND_IPI
+------------------
+
+:Architecture: x86
+:Status: active
+:Purpose: Send IPIs to multiple vCPUs.
+
+- a0: lower part of the bitmap of destination APIC IDs
+- a1: higher part of the bitmap of destination APIC IDs
+- a2: the lowest APIC ID in bitmap
+- a3: APIC ICR
+
+The hypercall lets a guest send multicast IPIs, with at most 128
+128 destinations per hypercall in 64-bit mode and 64 vCPUs per
+hypercall in 32-bit mode. The destinations are represented by a
+bitmap contained in the first two arguments (a0 and a1). Bit 0 of
+a0 corresponds to the APIC ID in the third argument (a2), bit 1
+corresponds to the APIC ID a2+1, and so on.
+
+Returns the number of CPUs to which the IPIs were delivered successfully.
+
+7. KVM_HC_SCHED_YIELD
+---------------------
+
+:Architecture: x86
+:Status: active
+:Purpose: Hypercall used to yield if the IPI target vCPU is preempted
+
+a0: destination APIC ID
+
+:Usage example: When sending a call-function IPI-many to vCPUs, yield if
+ any of the IPI target vCPUs was preempted.
diff --git a/Documentation/virt/kvm/hypercalls.txt b/Documentation/virt/kvm/hypercalls.txt
deleted file mode 100644
index 5f6d291..0000000
--- a/Documentation/virt/kvm/hypercalls.txt
+++ /dev/null
@@ -1,154 +0,0 @@
-Linux KVM Hypercall:
-===================
-X86:
- KVM Hypercalls have a three-byte sequence of either the vmcall or the vmmcall
- instruction. The hypervisor can replace it with instructions that are
- guaranteed to be supported.
-
- Up to four arguments may be passed in rbx, rcx, rdx, and rsi respectively.
- The hypercall number should be placed in rax and the return value will be
- placed in rax. No other registers will be clobbered unless explicitly stated
- by the particular hypercall.
-
-S390:
- R2-R7 are used for parameters 1-6. In addition, R1 is used for hypercall
- number. The return value is written to R2.
-
- S390 uses diagnose instruction as hypercall (0x500) along with hypercall
- number in R1.
-
- For further information on the S390 diagnose call as supported by KVM,
- refer to Documentation/virt/kvm/s390-diag.txt.
-
- PowerPC:
- It uses R3-R10 and hypercall number in R11. R4-R11 are used as output registers.
- Return value is placed in R3.
-
- KVM hypercalls uses 4 byte opcode, that are patched with 'hypercall-instructions'
- property inside the device tree's /hypervisor node.
- For more information refer to Documentation/virt/kvm/ppc-pv.txt
-
-MIPS:
- KVM hypercalls use the HYPCALL instruction with code 0 and the hypercall
- number in $2 (v0). Up to four arguments may be placed in $4-$7 (a0-a3) and
- the return value is placed in $2 (v0).
-
-KVM Hypercalls Documentation
-===========================
-The template for each hypercall is:
-1. Hypercall name.
-2. Architecture(s)
-3. Status (deprecated, obsolete, active)
-4. Purpose
-
-1. KVM_HC_VAPIC_POLL_IRQ
-------------------------
-Architecture: x86
-Status: active
-Purpose: Trigger guest exit so that the host can check for pending
-interrupts on reentry.
-
-2. KVM_HC_MMU_OP
-------------------------
-Architecture: x86
-Status: deprecated.
-Purpose: Support MMU operations such as writing to PTE,
-flushing TLB, release PT.
-
-3. KVM_HC_FEATURES
-------------------------
-Architecture: PPC
-Status: active
-Purpose: Expose hypercall availability to the guest. On x86 platforms, cpuid
-used to enumerate which hypercalls are available. On PPC, either device tree
-based lookup ( which is also what EPAPR dictates) OR KVM specific enumeration
-mechanism (which is this hypercall) can be used.
-
-4. KVM_HC_PPC_MAP_MAGIC_PAGE
-------------------------
-Architecture: PPC
-Status: active
-Purpose: To enable communication between the hypervisor and guest there is a
-shared page that contains parts of supervisor visible register state.
-The guest can map this shared page to access its supervisor register through
-memory using this hypercall.
-
-5. KVM_HC_KICK_CPU
-------------------------
-Architecture: x86
-Status: active
-Purpose: Hypercall used to wakeup a vcpu from HLT state
-Usage example : A vcpu of a paravirtualized guest that is busywaiting in guest
-kernel mode for an event to occur (ex: a spinlock to become available) can
-execute HLT instruction once it has busy-waited for more than a threshold
-time-interval. Execution of HLT instruction would cause the hypervisor to put
-the vcpu to sleep until occurrence of an appropriate event. Another vcpu of the
-same guest can wakeup the sleeping vcpu by issuing KVM_HC_KICK_CPU hypercall,
-specifying APIC ID (a1) of the vcpu to be woken up. An additional argument (a0)
-is used in the hypercall for future use.
-
-
-6. KVM_HC_CLOCK_PAIRING
-------------------------
-Architecture: x86
-Status: active
-Purpose: Hypercall used to synchronize host and guest clocks.
-Usage:
-
-a0: guest physical address where host copies
-"struct kvm_clock_offset" structure.
-
-a1: clock_type, ATM only KVM_CLOCK_PAIRING_WALLCLOCK (0)
-is supported (corresponding to the host's CLOCK_REALTIME clock).
-
- struct kvm_clock_pairing {
- __s64 sec;
- __s64 nsec;
- __u64 tsc;
- __u32 flags;
- __u32 pad[9];
- };
-
- Where:
- * sec: seconds from clock_type clock.
- * nsec: nanoseconds from clock_type clock.
- * tsc: guest TSC value used to calculate sec/nsec pair
- * flags: flags, unused (0) at the moment.
-
-The hypercall lets a guest compute a precise timestamp across
-host and guest. The guest can use the returned TSC value to
-compute the CLOCK_REALTIME for its clock, at the same instant.
-
-Returns KVM_EOPNOTSUPP if the host does not use TSC clocksource,
-or if clock type is different than KVM_CLOCK_PAIRING_WALLCLOCK.
-
-6. KVM_HC_SEND_IPI
-------------------------
-Architecture: x86
-Status: active
-Purpose: Send IPIs to multiple vCPUs.
-
-a0: lower part of the bitmap of destination APIC IDs
-a1: higher part of the bitmap of destination APIC IDs
-a2: the lowest APIC ID in bitmap
-a3: APIC ICR
-
-The hypercall lets a guest send multicast IPIs, with at most 128
-128 destinations per hypercall in 64-bit mode and 64 vCPUs per
-hypercall in 32-bit mode. The destinations are represented by a
-bitmap contained in the first two arguments (a0 and a1). Bit 0 of
-a0 corresponds to the APIC ID in the third argument (a2), bit 1
-corresponds to the APIC ID a2+1, and so on.
-
-Returns the number of CPUs to which the IPIs were delivered successfully.
-
-7. KVM_HC_SCHED_YIELD
-------------------------
-Architecture: x86
-Status: active
-Purpose: Hypercall used to yield if the IPI target vCPU is preempted
-
-a0: destination APIC ID
-
-Usage example: When sending a call-function IPI-many to vCPUs, yield if
-any of the IPI target vCPUs was preempted.
diff --git a/Documentation/virt/kvm/index.rst b/Documentation/virt/kvm/index.rst
index ada224a..774deae 100644
--- a/Documentation/virt/kvm/index.rst
+++ b/Documentation/virt/kvm/index.rst
@@ -7,6 +7,22 @@
.. toctree::
:maxdepth: 2
+ api
amd-memory-encryption
cpuid
+ halt-polling
+ hypercalls
+ locking
+ mmu
+ msr
+ nested-vmx
+ ppc-pv
+ s390-diag
+ timekeeping
vcpu-requests
+
+ review-checklist
+
+ arm/index
+
+ devices/index
diff --git a/Documentation/virt/kvm/locking.rst b/Documentation/virt/kvm/locking.rst
new file mode 100644
index 0000000..c02291b
--- /dev/null
+++ b/Documentation/virt/kvm/locking.rst
@@ -0,0 +1,243 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=================
+KVM Lock Overview
+=================
+
+1. Acquisition Orders
+---------------------
+
+The acquisition orders for mutexes are as follows:
+
+- kvm->lock is taken outside vcpu->mutex
+
+- kvm->lock is taken outside kvm->slots_lock and kvm->irq_lock
+
+- kvm->slots_lock is taken outside kvm->irq_lock, though acquiring
+ them together is quite rare.
+
+On x86, vcpu->mutex is taken outside kvm->arch.hyperv.hv_lock.
+
+Everything else is a leaf: no other lock is taken inside the critical
+sections.
+
+2. Exception
+------------
+
+Fast page fault:
+
+Fast page fault is the fast path which fixes the guest page fault out of
+the mmu-lock on x86. Currently, the page fault can be fast in one of the
+following two cases:
+
+1. Access Tracking: The SPTE is not present, but it is marked for access
+ tracking i.e. the SPTE_SPECIAL_MASK is set. That means we need to
+ restore the saved R/X bits. This is described in more detail later below.
+
+2. Write-Protection: The SPTE is present and the fault is
+ caused by write-protect. That means we just need to change the W bit of
+ the spte.
+
+What we use to avoid all the race is the SPTE_HOST_WRITEABLE bit and
+SPTE_MMU_WRITEABLE bit on the spte:
+
+- SPTE_HOST_WRITEABLE means the gfn is writable on host.
+- SPTE_MMU_WRITEABLE means the gfn is writable on mmu. The bit is set when
+ the gfn is writable on guest mmu and it is not write-protected by shadow
+ page write-protection.
+
+On fast page fault path, we will use cmpxchg to atomically set the spte W
+bit if spte.SPTE_HOST_WRITEABLE = 1 and spte.SPTE_WRITE_PROTECT = 1, or
+restore the saved R/X bits if VMX_EPT_TRACK_ACCESS mask is set, or both. This
+is safe because whenever changing these bits can be detected by cmpxchg.
+
+But we need carefully check these cases:
+
+1) The mapping from gfn to pfn
+
+The mapping from gfn to pfn may be changed since we can only ensure the pfn
+is not changed during cmpxchg. This is a ABA problem, for example, below case
+will happen:
+
++------------------------------------------------------------------------+
+| At the beginning:: |
+| |
+| gpte = gfn1 |
+| gfn1 is mapped to pfn1 on host |
+| spte is the shadow page table entry corresponding with gpte and |
+| spte = pfn1 |
++------------------------------------------------------------------------+
+| On fast page fault path: |
++------------------------------------+-----------------------------------+
+| CPU 0: | CPU 1: |
++------------------------------------+-----------------------------------+
+| :: | |
+| | |
+| old_spte = *spte; | |
++------------------------------------+-----------------------------------+
+| | pfn1 is swapped out:: |
+| | |
+| | spte = 0; |
+| | |
+| | pfn1 is re-alloced for gfn2. |
+| | |
+| | gpte is changed to point to |
+| | gfn2 by the guest:: |
+| | |
+| | spte = pfn1; |
++------------------------------------+-----------------------------------+
+| :: |
+| |
+| if (cmpxchg(spte, old_spte, old_spte+W) |
+| mark_page_dirty(vcpu->kvm, gfn1) |
+| OOPS!!! |
++------------------------------------------------------------------------+
+
+We dirty-log for gfn1, that means gfn2 is lost in dirty-bitmap.
+
+For direct sp, we can easily avoid it since the spte of direct sp is fixed
+to gfn. For indirect sp, before we do cmpxchg, we call gfn_to_pfn_atomic()
+to pin gfn to pfn, because after gfn_to_pfn_atomic():
+
+- We have held the refcount of pfn that means the pfn can not be freed and
+ be reused for another gfn.
+- The pfn is writable that means it can not be shared between different gfns
+ by KSM.
+
+Then, we can ensure the dirty bitmaps is correctly set for a gfn.
+
+Currently, to simplify the whole things, we disable fast page fault for
+indirect shadow page.
+
+2) Dirty bit tracking
+
+In the origin code, the spte can be fast updated (non-atomically) if the
+spte is read-only and the Accessed bit has already been set since the
+Accessed bit and Dirty bit can not be lost.
+
+But it is not true after fast page fault since the spte can be marked
+writable between reading spte and updating spte. Like below case:
+
++------------------------------------------------------------------------+
+| At the beginning:: |
+| |
+| spte.W = 0 |
+| spte.Accessed = 1 |
++------------------------------------+-----------------------------------+
+| CPU 0: | CPU 1: |
++------------------------------------+-----------------------------------+
+| In mmu_spte_clear_track_bits():: | |
+| | |
+| old_spte = *spte; | |
+| | |
+| | |
+| /* 'if' condition is satisfied. */| |
+| if (old_spte.Accessed == 1 && | |
+| old_spte.W == 0) | |
+| spte = 0ull; | |
++------------------------------------+-----------------------------------+
+| | on fast page fault path:: |
+| | |
+| | spte.W = 1 |
+| | |
+| | memory write on the spte:: |
+| | |
+| | spte.Dirty = 1 |
++------------------------------------+-----------------------------------+
+| :: | |
+| | |
+| else | |
+| old_spte = xchg(spte, 0ull) | |
+| if (old_spte.Accessed == 1) | |
+| kvm_set_pfn_accessed(spte.pfn);| |
+| if (old_spte.Dirty == 1) | |
+| kvm_set_pfn_dirty(spte.pfn); | |
+| OOPS!!! | |
++------------------------------------+-----------------------------------+
+
+The Dirty bit is lost in this case.
+
+In order to avoid this kind of issue, we always treat the spte as "volatile"
+if it can be updated out of mmu-lock, see spte_has_volatile_bits(), it means,
+the spte is always atomically updated in this case.
+
+3) flush tlbs due to spte updated
+
+If the spte is updated from writable to readonly, we should flush all TLBs,
+otherwise rmap_write_protect will find a read-only spte, even though the
+writable spte might be cached on a CPU's TLB.
+
+As mentioned before, the spte can be updated to writable out of mmu-lock on
+fast page fault path, in order to easily audit the path, we see if TLBs need
+be flushed caused by this reason in mmu_spte_update() since this is a common
+function to update spte (present -> present).
+
+Since the spte is "volatile" if it can be updated out of mmu-lock, we always
+atomically update the spte, the race caused by fast page fault can be avoided,
+See the comments in spte_has_volatile_bits() and mmu_spte_update().
+
+Lockless Access Tracking:
+
+This is used for Intel CPUs that are using EPT but do not support the EPT A/D
+bits. In this case, when the KVM MMU notifier is called to track accesses to a
+page (via kvm_mmu_notifier_clear_flush_young), it marks the PTE as not-present
+by clearing the RWX bits in the PTE and storing the original R & X bits in
+some unused/ignored bits. In addition, the SPTE_SPECIAL_MASK is also set on the
+PTE (using the ignored bit 62). When the VM tries to access the page later on,
+a fault is generated and the fast page fault mechanism described above is used
+to atomically restore the PTE to a Present state. The W bit is not saved when
+the PTE is marked for access tracking and during restoration to the Present
+state, the W bit is set depending on whether or not it was a write access. If
+it wasn't, then the W bit will remain clear until a write access happens, at
+which time it will be set using the Dirty tracking mechanism described above.
+
+3. Reference
+------------
+
+:Name: kvm_lock
+:Type: mutex
+:Arch: any
+:Protects: - vm_list
+
+:Name: kvm_count_lock
+:Type: raw_spinlock_t
+:Arch: any
+:Protects: - hardware virtualization enable/disable
+:Comment: 'raw' because hardware enabling/disabling must be atomic /wrt
+ migration.
+
+:Name: kvm_arch::tsc_write_lock
+:Type: raw_spinlock
+:Arch: x86
+:Protects: - kvm_arch::{last_tsc_write,last_tsc_nsec,last_tsc_offset}
+ - tsc offset in vmcb
+:Comment: 'raw' because updating the tsc offsets must not be preempted.
+
+:Name: kvm->mmu_lock
+:Type: spinlock_t
+:Arch: any
+:Protects: -shadow page/shadow tlb entry
+:Comment: it is a spinlock since it is used in mmu notifier.
+
+:Name: kvm->srcu
+:Type: srcu lock
+:Arch: any
+:Protects: - kvm->memslots
+ - kvm->buses
+:Comment: The srcu read lock must be held while accessing memslots (e.g.
+ when using gfn_to_* functions) and while accessing in-kernel
+ MMIO/PIO address->device structure mapping (kvm->buses).
+ The srcu index can be stored in kvm_vcpu->srcu_idx per vcpu
+ if it is needed by multiple functions.
+
+:Name: blocked_vcpu_on_cpu_lock
+:Type: spinlock_t
+:Arch: x86
+:Protects: blocked_vcpu_on_cpu
+:Comment: This is a per-CPU lock and it is used for VT-d posted-interrupts.
+ When VT-d posted-interrupts is supported and the VM has assigned
+ devices, we put the blocked vCPU on the list blocked_vcpu_on_cpu
+ protected by blocked_vcpu_on_cpu_lock, when VT-d hardware issues
+ wakeup notification event since external interrupts from the
+ assigned devices happens, we will find the vCPU on the list to
+ wakeup.
diff --git a/Documentation/virt/kvm/locking.txt b/Documentation/virt/kvm/locking.txt
deleted file mode 100644
index 635cd6e..0000000
--- a/Documentation/virt/kvm/locking.txt
+++ /dev/null
@@ -1,215 +0,0 @@
-KVM Lock Overview
-=================
-
-1. Acquisition Orders
----------------------
-
-The acquisition orders for mutexes are as follows:
-
-- kvm->lock is taken outside vcpu->mutex
-
-- kvm->lock is taken outside kvm->slots_lock and kvm->irq_lock
-
-- kvm->slots_lock is taken outside kvm->irq_lock, though acquiring
- them together is quite rare.
-
-On x86, vcpu->mutex is taken outside kvm->arch.hyperv.hv_lock.
-
-Everything else is a leaf: no other lock is taken inside the critical
-sections.
-
-2: Exception
-------------
-
-Fast page fault:
-
-Fast page fault is the fast path which fixes the guest page fault out of
-the mmu-lock on x86. Currently, the page fault can be fast in one of the
-following two cases:
-
-1. Access Tracking: The SPTE is not present, but it is marked for access
-tracking i.e. the SPTE_SPECIAL_MASK is set. That means we need to
-restore the saved R/X bits. This is described in more detail later below.
-
-2. Write-Protection: The SPTE is present and the fault is
-caused by write-protect. That means we just need to change the W bit of the
-spte.
-
-What we use to avoid all the race is the SPTE_HOST_WRITEABLE bit and
-SPTE_MMU_WRITEABLE bit on the spte:
-- SPTE_HOST_WRITEABLE means the gfn is writable on host.
-- SPTE_MMU_WRITEABLE means the gfn is writable on mmu. The bit is set when
- the gfn is writable on guest mmu and it is not write-protected by shadow
- page write-protection.
-
-On fast page fault path, we will use cmpxchg to atomically set the spte W
-bit if spte.SPTE_HOST_WRITEABLE = 1 and spte.SPTE_WRITE_PROTECT = 1, or
-restore the saved R/X bits if VMX_EPT_TRACK_ACCESS mask is set, or both. This
-is safe because whenever changing these bits can be detected by cmpxchg.
-
-But we need carefully check these cases:
-1): The mapping from gfn to pfn
-The mapping from gfn to pfn may be changed since we can only ensure the pfn
-is not changed during cmpxchg. This is a ABA problem, for example, below case
-will happen:
-
-At the beginning:
-gpte = gfn1
-gfn1 is mapped to pfn1 on host
-spte is the shadow page table entry corresponding with gpte and
-spte = pfn1
-
- VCPU 0 VCPU0
-on fast page fault path:
-
- old_spte = *spte;
- pfn1 is swapped out:
- spte = 0;
-
- pfn1 is re-alloced for gfn2.
-
- gpte is changed to point to
- gfn2 by the guest:
- spte = pfn1;
-
- if (cmpxchg(spte, old_spte, old_spte+W)
- mark_page_dirty(vcpu->kvm, gfn1)
- OOPS!!!
-
-We dirty-log for gfn1, that means gfn2 is lost in dirty-bitmap.
-
-For direct sp, we can easily avoid it since the spte of direct sp is fixed
-to gfn. For indirect sp, before we do cmpxchg, we call gfn_to_pfn_atomic()
-to pin gfn to pfn, because after gfn_to_pfn_atomic():
-- We have held the refcount of pfn that means the pfn can not be freed and
- be reused for another gfn.
-- The pfn is writable that means it can not be shared between different gfns
- by KSM.
-
-Then, we can ensure the dirty bitmaps is correctly set for a gfn.
-
-Currently, to simplify the whole things, we disable fast page fault for
-indirect shadow page.
-
-2): Dirty bit tracking
-In the origin code, the spte can be fast updated (non-atomically) if the
-spte is read-only and the Accessed bit has already been set since the
-Accessed bit and Dirty bit can not be lost.
-
-But it is not true after fast page fault since the spte can be marked
-writable between reading spte and updating spte. Like below case:
-
-At the beginning:
-spte.W = 0
-spte.Accessed = 1
-
- VCPU 0 VCPU0
-In mmu_spte_clear_track_bits():
-
- old_spte = *spte;
-
- /* 'if' condition is satisfied. */
- if (old_spte.Accessed == 1 &&
- old_spte.W == 0)
- spte = 0ull;
- on fast page fault path:
- spte.W = 1
- memory write on the spte:
- spte.Dirty = 1
-
-
- else
- old_spte = xchg(spte, 0ull)
-
-
- if (old_spte.Accessed == 1)
- kvm_set_pfn_accessed(spte.pfn);
- if (old_spte.Dirty == 1)
- kvm_set_pfn_dirty(spte.pfn);
- OOPS!!!
-
-The Dirty bit is lost in this case.
-
-In order to avoid this kind of issue, we always treat the spte as "volatile"
-if it can be updated out of mmu-lock, see spte_has_volatile_bits(), it means,
-the spte is always atomically updated in this case.
-
-3): flush tlbs due to spte updated
-If the spte is updated from writable to readonly, we should flush all TLBs,
-otherwise rmap_write_protect will find a read-only spte, even though the
-writable spte might be cached on a CPU's TLB.
-
-As mentioned before, the spte can be updated to writable out of mmu-lock on
-fast page fault path, in order to easily audit the path, we see if TLBs need
-be flushed caused by this reason in mmu_spte_update() since this is a common
-function to update spte (present -> present).
-
-Since the spte is "volatile" if it can be updated out of mmu-lock, we always
-atomically update the spte, the race caused by fast page fault can be avoided,
-See the comments in spte_has_volatile_bits() and mmu_spte_update().
-
-Lockless Access Tracking:
-
-This is used for Intel CPUs that are using EPT but do not support the EPT A/D
-bits. In this case, when the KVM MMU notifier is called to track accesses to a
-page (via kvm_mmu_notifier_clear_flush_young), it marks the PTE as not-present
-by clearing the RWX bits in the PTE and storing the original R & X bits in
-some unused/ignored bits. In addition, the SPTE_SPECIAL_MASK is also set on the
-PTE (using the ignored bit 62). When the VM tries to access the page later on,
-a fault is generated and the fast page fault mechanism described above is used
-to atomically restore the PTE to a Present state. The W bit is not saved when
-the PTE is marked for access tracking and during restoration to the Present
-state, the W bit is set depending on whether or not it was a write access. If
-it wasn't, then the W bit will remain clear until a write access happens, at
-which time it will be set using the Dirty tracking mechanism described above.
-
-3. Reference
-------------
-
-Name: kvm_lock
-Type: mutex
-Arch: any
-Protects: - vm_list
-
-Name: kvm_count_lock
-Type: raw_spinlock_t
-Arch: any
-Protects: - hardware virtualization enable/disable
-Comment: 'raw' because hardware enabling/disabling must be atomic /wrt
- migration.
-
-Name: kvm_arch::tsc_write_lock
-Type: raw_spinlock
-Arch: x86
-Protects: - kvm_arch::{last_tsc_write,last_tsc_nsec,last_tsc_offset}
- - tsc offset in vmcb
-Comment: 'raw' because updating the tsc offsets must not be preempted.
-
-Name: kvm->mmu_lock
-Type: spinlock_t
-Arch: any
-Protects: -shadow page/shadow tlb entry
-Comment: it is a spinlock since it is used in mmu notifier.
-
-Name: kvm->srcu
-Type: srcu lock
-Arch: any
-Protects: - kvm->memslots
- - kvm->buses
-Comment: The srcu read lock must be held while accessing memslots (e.g.
- when using gfn_to_* functions) and while accessing in-kernel
- MMIO/PIO address->device structure mapping (kvm->buses).
- The srcu index can be stored in kvm_vcpu->srcu_idx per vcpu
- if it is needed by multiple functions.
-
-Name: blocked_vcpu_on_cpu_lock
-Type: spinlock_t
-Arch: x86
-Protects: blocked_vcpu_on_cpu
-Comment: This is a per-CPU lock and it is used for VT-d posted-interrupts.
- When VT-d posted-interrupts is supported and the VM has assigned
- devices, we put the blocked vCPU on the list blocked_vcpu_on_cpu
- protected by blocked_vcpu_on_cpu_lock, when VT-d hardware issues
- wakeup notification event since external interrupts from the
- assigned devices happens, we will find the vCPU on the list to
- wakeup.
diff --git a/Documentation/virt/kvm/mmu.txt b/Documentation/virt/kvm/mmu.rst
similarity index 94%
rename from Documentation/virt/kvm/mmu.txt
rename to Documentation/virt/kvm/mmu.rst
index dadb29e..6098188 100644
--- a/Documentation/virt/kvm/mmu.txt
+++ b/Documentation/virt/kvm/mmu.rst
@@ -1,3 +1,6 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+======================
The x86 kvm shadow mmu
======================
@@ -7,27 +10,37 @@
The mmu code attempts to satisfy the following requirements:
-- correctness: the guest should not be able to determine that it is running
+- correctness:
+ the guest should not be able to determine that it is running
on an emulated mmu except for timing (we attempt to comply
with the specification, not emulate the characteristics of
a particular implementation such as tlb size)
-- security: the guest must not be able to touch host memory not assigned
+- security:
+ the guest must not be able to touch host memory not assigned
to it
-- performance: minimize the performance penalty imposed by the mmu
-- scaling: need to scale to large memory and large vcpu guests
-- hardware: support the full range of x86 virtualization hardware
-- integration: Linux memory management code must be in control of guest memory
+- performance:
+ minimize the performance penalty imposed by the mmu
+- scaling:
+ need to scale to large memory and large vcpu guests
+- hardware:
+ support the full range of x86 virtualization hardware
+- integration:
+ Linux memory management code must be in control of guest memory
so that swapping, page migration, page merging, transparent
hugepages, and similar features work without change
-- dirty tracking: report writes to guest memory to enable live migration
+- dirty tracking:
+ report writes to guest memory to enable live migration
and framebuffer-based displays
-- footprint: keep the amount of pinned kernel memory low (most memory
+- footprint:
+ keep the amount of pinned kernel memory low (most memory
should be shrinkable)
-- reliability: avoid multipage or GFP_ATOMIC allocations
+- reliability:
+ avoid multipage or GFP_ATOMIC allocations
Acronyms
========
+==== ====================================================================
pfn host page frame number
hpa host physical address
hva host virtual address
@@ -41,6 +54,7 @@
gpte guest pte (referring to gfns)
spte shadow pte (referring to pfns)
tdp two dimensional paging (vendor neutral term for NPT and EPT)
+==== ====================================================================
Virtual and real hardware supported
===================================
@@ -90,11 +104,13 @@
The mmu is driven by events, some from the guest, some from the host.
Guest generated events:
+
- writes to control registers (especially cr3)
- invlpg/invlpga instruction execution
- access to missing or protected translations
Host generated events:
+
- changes in the gpa->hpa translation (either through gpa->hva changes or
through hva->hpa changes)
- memory pressure (the shrinker)
@@ -117,16 +133,19 @@
The following table shows translations encoded by leaf ptes, with higher-level
translations in parentheses:
- Non-nested guests:
+ Non-nested guests::
+
nonpaging: gpa->hpa
paging: gva->gpa->hpa
paging, tdp: (gva->)gpa->hpa
- Nested guests:
+
+ Nested guests::
+
non-tdp: ngva->gpa->hpa (*)
tdp: (ngva->)ngpa->gpa->hpa
-(*) the guest hypervisor will encode the ngva->gpa translation into its page
- tables if npt is not present
+ (*) the guest hypervisor will encode the ngva->gpa translation into its page
+ tables if npt is not present
Shadow pages contain the following information:
role.level:
@@ -291,28 +310,41 @@
- if the RSV bit of the error code is set, the page fault is caused by guest
accessing MMIO and cached MMIO information is available.
+
- walk shadow page table
- check for valid generation number in the spte (see "Fast invalidation of
MMIO sptes" below)
- cache the information to vcpu->arch.mmio_gva, vcpu->arch.mmio_access and
vcpu->arch.mmio_gfn, and call the emulator
+
- If both P bit and R/W bit of error code are set, this could possibly
be handled as a "fast page fault" (fixed without taking the MMU lock). See
the description in Documentation/virt/kvm/locking.txt.
+
- if needed, walk the guest page tables to determine the guest translation
(gva->gpa or ngpa->gpa)
+
- if permissions are insufficient, reflect the fault back to the guest
+
- determine the host page
+
- if this is an mmio request, there is no host page; cache the info to
vcpu->arch.mmio_gva, vcpu->arch.mmio_access and vcpu->arch.mmio_gfn
+
- walk the shadow page table to find the spte for the translation,
instantiating missing intermediate page tables as necessary
+
- If this is an mmio request, cache the mmio info to the spte and set some
reserved bit on the spte (see callers of kvm_mmu_set_mmio_spte_mask)
+
- try to unsynchronize the page
+
- if successful, we can let the guest continue and modify the gpte
+
- emulate the instruction
+
- if failed, unshadow the page and let the guest continue
+
- update any translations that were modified by the instruction
invlpg handling:
@@ -324,10 +356,12 @@
Guest control register updates:
- mov to cr3
+
- look up new shadow roots
- synchronize newly reachable shadow pages
- mov to cr0/cr4/efer
+
- set up mmu context for new paging mode
- look up new shadow roots
- synchronize newly reachable shadow pages
@@ -358,6 +392,7 @@
(user write faults generate a #PF)
In the first case there are two additional complications:
+
- if CR4.SMEP is enabled: since we've turned the page into a kernel page,
the kernel may now execute it. We handle this by also setting spte.nx.
If we get a user fetch or read fault, we'll change spte.u=1 and
@@ -446,4 +481,3 @@
- NPT presentation from KVM Forum 2008
http://www.linux-kvm.org/images/c/c8/KvmForum2008%24kdf2008_21.pdf
-
diff --git a/Documentation/virt/kvm/msr.txt b/Documentation/virt/kvm/msr.rst
similarity index 74%
rename from Documentation/virt/kvm/msr.txt
rename to Documentation/virt/kvm/msr.rst
index df1f433..3389203 100644
--- a/Documentation/virt/kvm/msr.txt
+++ b/Documentation/virt/kvm/msr.rst
@@ -1,6 +1,10 @@
-KVM-specific MSRs.
-Glauber Costa <glommer@redhat.com>, Red Hat Inc, 2010
-=====================================================
+.. SPDX-License-Identifier: GPL-2.0
+
+=================
+KVM-specific MSRs
+=================
+
+:Author: Glauber Costa <glommer@redhat.com>, Red Hat Inc, 2010
KVM makes use of some custom MSRs to service some requests.
@@ -9,34 +13,39 @@
but they are deprecated and their use is discouraged.
Custom MSR list
---------
+---------------
The current supported Custom MSR list is:
-MSR_KVM_WALL_CLOCK_NEW: 0x4b564d00
+MSR_KVM_WALL_CLOCK_NEW:
+ 0x4b564d00
- data: 4-byte alignment physical address of a memory area which must be
+data:
+ 4-byte alignment physical address of a memory area which must be
in guest RAM. This memory is expected to hold a copy of the following
- structure:
+ structure::
- struct pvclock_wall_clock {
+ struct pvclock_wall_clock {
u32 version;
u32 sec;
u32 nsec;
- } __attribute__((__packed__));
+ } __attribute__((__packed__));
whose data will be filled in by the hypervisor. The hypervisor is only
guaranteed to update this data at the moment of MSR write.
Users that want to reliably query this information more than once have
to write more than once to this MSR. Fields have the following meanings:
- version: guest has to check version before and after grabbing
+ version:
+ guest has to check version before and after grabbing
time information and check that they are both equal and even.
An odd version indicates an in-progress update.
- sec: number of seconds for wallclock at time of boot.
+ sec:
+ number of seconds for wallclock at time of boot.
- nsec: number of nanoseconds for wallclock at time of boot.
+ nsec:
+ number of nanoseconds for wallclock at time of boot.
In order to get the current wallclock time, the system_time from
MSR_KVM_SYSTEM_TIME_NEW needs to be added.
@@ -47,13 +56,15 @@
Availability of this MSR must be checked via bit 3 in 0x4000001 cpuid
leaf prior to usage.
-MSR_KVM_SYSTEM_TIME_NEW: 0x4b564d01
+MSR_KVM_SYSTEM_TIME_NEW:
+ 0x4b564d01
- data: 4-byte aligned physical address of a memory area which must be in
+data:
+ 4-byte aligned physical address of a memory area which must be in
guest RAM, plus an enable bit in bit 0. This memory is expected to hold
- a copy of the following structure:
+ a copy of the following structure::
- struct pvclock_vcpu_time_info {
+ struct pvclock_vcpu_time_info {
u32 version;
u32 pad0;
u64 tsc_timestamp;
@@ -62,7 +73,7 @@
s8 tsc_shift;
u8 flags;
u8 pad[2];
- } __attribute__((__packed__)); /* 32 bytes */
+ } __attribute__((__packed__)); /* 32 bytes */
whose data will be filled in by the hypervisor periodically. Only one
write, or registration, is needed for each VCPU. The interval between
@@ -72,23 +83,28 @@
Fields have the following meanings:
- version: guest has to check version before and after grabbing
+ version:
+ guest has to check version before and after grabbing
time information and check that they are both equal and even.
An odd version indicates an in-progress update.
- tsc_timestamp: the tsc value at the current VCPU at the time
+ tsc_timestamp:
+ the tsc value at the current VCPU at the time
of the update of this structure. Guests can subtract this value
from current tsc to derive a notion of elapsed time since the
structure update.
- system_time: a host notion of monotonic time, including sleep
+ system_time:
+ a host notion of monotonic time, including sleep
time at the time this structure was last updated. Unit is
nanoseconds.
- tsc_to_system_mul: multiplier to be used when converting
+ tsc_to_system_mul:
+ multiplier to be used when converting
tsc-related quantity to nanoseconds
- tsc_shift: shift to be used when converting tsc-related
+ tsc_shift:
+ shift to be used when converting tsc-related
quantity to nanoseconds. This shift will ensure that
multiplication with tsc_to_system_mul does not overflow.
A positive value denotes a left shift, a negative value
@@ -96,7 +112,7 @@
The conversion from tsc to nanoseconds involves an additional
right shift by 32 bits. With this information, guests can
- derive per-CPU time by doing:
+ derive per-CPU time by doing::
time = (current_tsc - tsc_timestamp)
if (tsc_shift >= 0)
@@ -106,29 +122,34 @@
time = (time * tsc_to_system_mul) >> 32
time = time + system_time
- flags: bits in this field indicate extended capabilities
+ flags:
+ bits in this field indicate extended capabilities
coordinated between the guest and the hypervisor. Availability
of specific flags has to be checked in 0x40000001 cpuid leaf.
Current flags are:
- flag bit | cpuid bit | meaning
- -------------------------------------------------------------
- | | time measures taken across
- 0 | 24 | multiple cpus are guaranteed to
- | | be monotonic
- -------------------------------------------------------------
- | | guest vcpu has been paused by
- 1 | N/A | the host
- | | See 4.70 in api.txt
- -------------------------------------------------------------
+
+ +-----------+--------------+----------------------------------+
+ | flag bit | cpuid bit | meaning |
+ +-----------+--------------+----------------------------------+
+ | | | time measures taken across |
+ | 0 | 24 | multiple cpus are guaranteed to |
+ | | | be monotonic |
+ +-----------+--------------+----------------------------------+
+ | | | guest vcpu has been paused by |
+ | 1 | N/A | the host |
+ | | | See 4.70 in api.txt |
+ +-----------+--------------+----------------------------------+
Availability of this MSR must be checked via bit 3 in 0x4000001 cpuid
leaf prior to usage.
-MSR_KVM_WALL_CLOCK: 0x11
+MSR_KVM_WALL_CLOCK:
+ 0x11
- data and functioning: same as MSR_KVM_WALL_CLOCK_NEW. Use that instead.
+data and functioning:
+ same as MSR_KVM_WALL_CLOCK_NEW. Use that instead.
This MSR falls outside the reserved KVM range and may be removed in the
future. Its usage is deprecated.
@@ -136,9 +157,11 @@
Availability of this MSR must be checked via bit 0 in 0x4000001 cpuid
leaf prior to usage.
-MSR_KVM_SYSTEM_TIME: 0x12
+MSR_KVM_SYSTEM_TIME:
+ 0x12
- data and functioning: same as MSR_KVM_SYSTEM_TIME_NEW. Use that instead.
+data and functioning:
+ same as MSR_KVM_SYSTEM_TIME_NEW. Use that instead.
This MSR falls outside the reserved KVM range and may be removed in the
future. Its usage is deprecated.
@@ -146,7 +169,7 @@
Availability of this MSR must be checked via bit 0 in 0x4000001 cpuid
leaf prior to usage.
- The suggested algorithm for detecting kvmclock presence is then:
+ The suggested algorithm for detecting kvmclock presence is then::
if (!kvm_para_available()) /* refer to cpuid.txt */
return NON_PRESENT;
@@ -163,8 +186,11 @@
} else
return NON_PRESENT;
-MSR_KVM_ASYNC_PF_EN: 0x4b564d02
- data: Bits 63-6 hold 64-byte aligned physical address of a
+MSR_KVM_ASYNC_PF_EN:
+ 0x4b564d02
+
+data:
+ Bits 63-6 hold 64-byte aligned physical address of a
64 byte memory area which must be in guest RAM and must be
zeroed. Bits 5-3 are reserved and should be zero. Bit 0 is 1
when asynchronous page faults are enabled on the vcpu 0 when
@@ -200,20 +226,22 @@
Currently type 2 APF will be always delivered on the same vcpu as
type 1 was, but guest should not rely on that.
-MSR_KVM_STEAL_TIME: 0x4b564d03
+MSR_KVM_STEAL_TIME:
+ 0x4b564d03
- data: 64-byte alignment physical address of a memory area which must be
+data:
+ 64-byte alignment physical address of a memory area which must be
in guest RAM, plus an enable bit in bit 0. This memory is expected to
- hold a copy of the following structure:
+ hold a copy of the following structure::
- struct kvm_steal_time {
+ struct kvm_steal_time {
__u64 steal;
__u32 version;
__u32 flags;
__u8 preempted;
__u8 u8_pad[3];
__u32 pad[11];
- }
+ }
whose data will be filled in by the hypervisor periodically. Only one
write, or registration, is needed for each VCPU. The interval between
@@ -224,25 +252,32 @@
Fields have the following meanings:
- version: a sequence counter. In other words, guest has to check
+ version:
+ a sequence counter. In other words, guest has to check
this field before and after grabbing time information and make
sure they are both equal and even. An odd version indicates an
in-progress update.
- flags: At this point, always zero. May be used to indicate
+ flags:
+ At this point, always zero. May be used to indicate
changes in this structure in the future.
- steal: the amount of time in which this vCPU did not run, in
+ steal:
+ the amount of time in which this vCPU did not run, in
nanoseconds. Time during which the vcpu is idle, will not be
reported as steal time.
- preempted: indicate the vCPU who owns this struct is running or
+ preempted:
+ indicate the vCPU who owns this struct is running or
not. Non-zero values mean the vCPU has been preempted. Zero
means the vCPU is not preempted. NOTE, it is always zero if the
the hypervisor doesn't support this field.
-MSR_KVM_EOI_EN: 0x4b564d04
- data: Bit 0 is 1 when PV end of interrupt is enabled on the vcpu; 0
+MSR_KVM_EOI_EN:
+ 0x4b564d04
+
+data:
+ Bit 0 is 1 when PV end of interrupt is enabled on the vcpu; 0
when disabled. Bit 1 is reserved and must be zero. When PV end of
interrupt is enabled (bit 0 set), bits 63-2 hold a 4-byte aligned
physical address of a 4 byte memory area which must be in guest RAM and
@@ -274,11 +309,13 @@
clear it using a single CPU instruction, such as test and clear, or
compare and exchange.
-MSR_KVM_POLL_CONTROL: 0x4b564d05
+MSR_KVM_POLL_CONTROL:
+ 0x4b564d05
+
Control host-side polling.
- data: Bit 0 enables (1) or disables (0) host-side HLT polling logic.
+data:
+ Bit 0 enables (1) or disables (0) host-side HLT polling logic.
KVM guests can request the host not to poll on HLT, for example if
they are performing polling themselves.
-
diff --git a/Documentation/virt/kvm/nested-vmx.txt b/Documentation/virt/kvm/nested-vmx.rst
similarity index 89%
rename from Documentation/virt/kvm/nested-vmx.txt
rename to Documentation/virt/kvm/nested-vmx.rst
index 97eb135..592b0ab 100644
--- a/Documentation/virt/kvm/nested-vmx.txt
+++ b/Documentation/virt/kvm/nested-vmx.rst
@@ -1,3 +1,6 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==========
Nested VMX
==========
@@ -41,9 +44,9 @@
emulated CPU type (qemu64) does not list the "VMX" CPU feature, so it must be
explicitly enabled, by giving qemu one of the following options:
- -cpu host (emulated CPU has all features of the real CPU)
+ - cpu host (emulated CPU has all features of the real CPU)
- -cpu qemu64,+vmx (add just the vmx feature to a named CPU type)
+ - cpu qemu64,+vmx (add just the vmx feature to a named CPU type)
ABIs
@@ -75,6 +78,8 @@
VMCS12_REVISION (from vmx.c) should be changed if struct vmcs12 or its inner
struct shadow_vmcs is ever changed.
+::
+
typedef u64 natural_width;
struct __packed vmcs12 {
/* According to the Intel spec, a VMCS region must start with
@@ -220,21 +225,21 @@
-------
These patches were written by:
- Abel Gordon, abelg <at> il.ibm.com
- Nadav Har'El, nyh <at> il.ibm.com
- Orit Wasserman, oritw <at> il.ibm.com
- Ben-Ami Yassor, benami <at> il.ibm.com
- Muli Ben-Yehuda, muli <at> il.ibm.com
+ - Abel Gordon, abelg <at> il.ibm.com
+ - Nadav Har'El, nyh <at> il.ibm.com
+ - Orit Wasserman, oritw <at> il.ibm.com
+ - Ben-Ami Yassor, benami <at> il.ibm.com
+ - Muli Ben-Yehuda, muli <at> il.ibm.com
With contributions by:
- Anthony Liguori, aliguori <at> us.ibm.com
- Mike Day, mdday <at> us.ibm.com
- Michael Factor, factor <at> il.ibm.com
- Zvi Dubitzky, dubi <at> il.ibm.com
+ - Anthony Liguori, aliguori <at> us.ibm.com
+ - Mike Day, mdday <at> us.ibm.com
+ - Michael Factor, factor <at> il.ibm.com
+ - Zvi Dubitzky, dubi <at> il.ibm.com
And valuable reviews by:
- Avi Kivity, avi <at> redhat.com
- Gleb Natapov, gleb <at> redhat.com
- Marcelo Tosatti, mtosatti <at> redhat.com
- Kevin Tian, kevin.tian <at> intel.com
- and others.
+ - Avi Kivity, avi <at> redhat.com
+ - Gleb Natapov, gleb <at> redhat.com
+ - Marcelo Tosatti, mtosatti <at> redhat.com
+ - Kevin Tian, kevin.tian <at> intel.com
+ - and others.
diff --git a/Documentation/virt/kvm/ppc-pv.txt b/Documentation/virt/kvm/ppc-pv.rst
similarity index 90%
rename from Documentation/virt/kvm/ppc-pv.txt
rename to Documentation/virt/kvm/ppc-pv.rst
index e26115c..5fdb907 100644
--- a/Documentation/virt/kvm/ppc-pv.txt
+++ b/Documentation/virt/kvm/ppc-pv.rst
@@ -1,3 +1,6 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=================================
The PPC KVM paravirtual interface
=================================
@@ -34,8 +37,9 @@
The parameters are as follows:
+ ======== ================ ================
Register IN OUT
-
+ ======== ================ ================
r0 - volatile
r3 1st parameter Return code
r4 2nd parameter 1st output value
@@ -47,6 +51,7 @@
r10 8th parameter 7th output value
r11 hypercall number 8th output value
r12 - volatile
+ ======== ================ ================
Hypercall definitions are shared in generic code, so the same hypercall numbers
apply for x86 and powerpc alike with the exception that each KVM hypercall
@@ -54,11 +59,13 @@
Return codes can be as follows:
+ ==== =========================
Code Meaning
-
+ ==== =========================
0 Success
12 Hypercall not implemented
<0 Error
+ ==== =========================
The magic page
==============
@@ -72,7 +79,7 @@
MMU is enabled. The second parameter indicates the address in real mode, if
applicable to the target. For now, we always map the page to -4096. This way we
can access it using absolute load and store functions. The following
-instruction reads the first field of the magic page:
+instruction reads the first field of the magic page::
ld rX, -4096(0)
@@ -93,8 +100,10 @@
The following enhancements to the magic page are currently available:
+ ============================ =======================================
KVM_MAGIC_FEAT_SR Maps SR registers r/w in the magic page
KVM_MAGIC_FEAT_MAS0_TO_SPRG7 Maps MASn, ESR, PIR and high SPRGs
+ ============================ =======================================
For enhanced features in the magic page, please check for the existence of the
feature before using them!
@@ -121,8 +130,8 @@
The following bits are safe to be set inside the guest:
- MSR_EE
- MSR_RI
+ - MSR_EE
+ - MSR_RI
If any other bit changes in the MSR, please still use mtmsr(d).
@@ -138,9 +147,9 @@
also act on the shared page. So calling privileged instructions still works as
before.
+======================= ================================
From To
-==== ==
-
+======================= ================================
mfmsr rX ld rX, magic_page->msr
mfsprg rX, 0 ld rX, magic_page->sprg0
mfsprg rX, 1 ld rX, magic_page->sprg1
@@ -173,7 +182,7 @@
[BookE only]
wrteei [0|1] b <special wrteei section>
-
+======================= ================================
Some instructions require more logic to determine what's going on than a load
or store instruction can deliver. To enable patching of those, we keep some
@@ -191,6 +200,7 @@
Hypercall ABIs in KVM on PowerPC
=================================
+
1) KVM hypercalls (ePAPR)
These are ePAPR compliant hypercall implementation (mentioned above). Even
diff --git a/Documentation/virt/kvm/review-checklist.txt b/Documentation/virt/kvm/review-checklist.rst
similarity index 95%
rename from Documentation/virt/kvm/review-checklist.txt
rename to Documentation/virt/kvm/review-checklist.rst
index 499af49..1f86a9d3 100644
--- a/Documentation/virt/kvm/review-checklist.txt
+++ b/Documentation/virt/kvm/review-checklist.rst
@@ -1,3 +1,6 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+================================
Review checklist for kvm patches
================================
diff --git a/Documentation/virt/kvm/s390-diag.txt b/Documentation/virt/kvm/s390-diag.rst
similarity index 89%
rename from Documentation/virt/kvm/s390-diag.txt
rename to Documentation/virt/kvm/s390-diag.rst
index 7c52e5f..eaac486 100644
--- a/Documentation/virt/kvm/s390-diag.txt
+++ b/Documentation/virt/kvm/s390-diag.rst
@@ -1,3 +1,6 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=============================
The s390 DIAGNOSE call on KVM
=============================
@@ -16,12 +19,12 @@
all supported DIAGNOSE calls need to be handled by either KVM or its
userspace.
-All DIAGNOSE calls supported by KVM use the RS-a format:
+All DIAGNOSE calls supported by KVM use the RS-a format::
---------------------------------------
-| '83' | R1 | R3 | B2 | D2 |
---------------------------------------
-0 8 12 16 20 31
+ --------------------------------------
+ | '83' | R1 | R3 | B2 | D2 |
+ --------------------------------------
+ 0 8 12 16 20 31
The second-operand address (obtained by the base/displacement calculation)
is not used to address data. Instead, bits 48-63 of this address specify
diff --git a/Documentation/virt/kvm/timekeeping.txt b/Documentation/virt/kvm/timekeeping.rst
similarity index 85%
rename from Documentation/virt/kvm/timekeeping.txt
rename to Documentation/virt/kvm/timekeeping.rst
index 76808a1..21ae7ef 100644
--- a/Documentation/virt/kvm/timekeeping.txt
+++ b/Documentation/virt/kvm/timekeeping.rst
@@ -1,17 +1,21 @@
+.. SPDX-License-Identifier: GPL-2.0
- Timekeeping Virtualization for X86-Based Architectures
+======================================================
+Timekeeping Virtualization for X86-Based Architectures
+======================================================
- Zachary Amsden <zamsden@redhat.com>
- Copyright (c) 2010, Red Hat. All rights reserved.
+:Author: Zachary Amsden <zamsden@redhat.com>
+:Copyright: (c) 2010, Red Hat. All rights reserved.
-1) Overview
-2) Timing Devices
-3) TSC Hardware
-4) Virtualization Problems
+.. Contents
-=========================================================================
+ 1) Overview
+ 2) Timing Devices
+ 3) TSC Hardware
+ 4) Virtualization Problems
-1) Overview
+1. Overview
+===========
One of the most complicated parts of the X86 platform, and specifically,
the virtualization of this platform is the plethora of timing devices available
@@ -27,15 +31,15 @@
timekeeping which may be difficult to find elsewhere, specifically,
information relevant to KVM and hardware-based virtualization.
-=========================================================================
-
-2) Timing Devices
+2. Timing Devices
+=================
First we discuss the basic hardware devices available. TSC and the related
KVM clock are special enough to warrant a full exposition and are described in
the following section.
-2.1) i8254 - PIT
+2.1. i8254 - PIT
+----------------
One of the first timer devices available is the programmable interrupt timer,
or PIT. The PIT has a fixed frequency 1.193182 MHz base clock and three
@@ -50,13 +54,13 @@
using single or multiple byte access to the I/O ports. There are 6 modes
available, but not all modes are available to all timers, as only timer 2
has a connected gate input, required for modes 1 and 5. The gate line is
-controlled by port 61h, bit 0, as illustrated in the following diagram.
+controlled by port 61h, bit 0, as illustrated in the following diagram::
- -------------- ----------------
-| | | |
-| 1.1932 MHz |---------->| CLOCK OUT | ---------> IRQ 0
-| Clock | | | |
- -------------- | +->| GATE TIMER 0 |
+ -------------- ----------------
+ | | | |
+ | 1.1932 MHz|---------->| CLOCK OUT | ---------> IRQ 0
+ | Clock | | | |
+ -------------- | +->| GATE TIMER 0 |
| ----------------
|
| ----------------
@@ -70,29 +74,33 @@
| | |
|------>| CLOCK OUT | ---------> Port 61h, bit 5
| | |
-Port 61h, bit 0 ---------->| GATE TIMER 2 | \_.---- ____
+ Port 61h, bit 0 -------->| GATE TIMER 2 | \_.---- ____
---------------- _| )--|LPF|---Speaker
/ *---- \___/
-Port 61h, bit 1 -----------------------------------/
+ Port 61h, bit 1 ---------------------------------/
The timer modes are now described.
-Mode 0: Single Timeout. This is a one-shot software timeout that counts down
+Mode 0: Single Timeout.
+ This is a one-shot software timeout that counts down
when the gate is high (always true for timers 0 and 1). When the count
reaches zero, the output goes high.
-Mode 1: Triggered One-shot. The output is initially set high. When the gate
+Mode 1: Triggered One-shot.
+ The output is initially set high. When the gate
line is set high, a countdown is initiated (which does not stop if the gate is
lowered), during which the output is set low. When the count reaches zero,
the output goes high.
-Mode 2: Rate Generator. The output is initially set high. When the countdown
+Mode 2: Rate Generator.
+ The output is initially set high. When the countdown
reaches 1, the output goes low for one count and then returns high. The value
is reloaded and the countdown automatically resumes. If the gate line goes
low, the count is halted. If the output is low when the gate is lowered, the
output automatically goes high (this only affects timer 2).
-Mode 3: Square Wave. This generates a high / low square wave. The count
+Mode 3: Square Wave.
+ This generates a high / low square wave. The count
determines the length of the pulse, which alternates between high and low
when zero is reached. The count only proceeds when gate is high and is
automatically reloaded on reaching zero. The count is decremented twice at
@@ -103,12 +111,14 @@
values are not observed when reading. This is the intended mode for timer 2,
which generates sine-like tones by low-pass filtering the square wave output.
-Mode 4: Software Strobe. After programming this mode and loading the counter,
+Mode 4: Software Strobe.
+ After programming this mode and loading the counter,
the output remains high until the counter reaches zero. Then the output
goes low for 1 clock cycle and returns high. The counter is not reloaded.
Counting only occurs when gate is high.
-Mode 5: Hardware Strobe. After programming and loading the counter, the
+Mode 5: Hardware Strobe.
+ After programming and loading the counter, the
output remains high. When the gate is raised, a countdown is initiated
(which does not stop if the gate is lowered). When the counter reaches zero,
the output goes low for 1 clock cycle and then returns high. The counter is
@@ -118,49 +128,49 @@
command port, 0x43 is used to set the counter and mode for each of the three
timers.
-PIT commands, issued to port 0x43, using the following bit encoding:
+PIT commands, issued to port 0x43, using the following bit encoding::
-Bit 7-4: Command (See table below)
-Bit 3-1: Mode (000 = Mode 0, 101 = Mode 5, 11X = undefined)
-Bit 0 : Binary (0) / BCD (1)
+ Bit 7-4: Command (See table below)
+ Bit 3-1: Mode (000 = Mode 0, 101 = Mode 5, 11X = undefined)
+ Bit 0 : Binary (0) / BCD (1)
-Command table:
+Command table::
-0000 - Latch Timer 0 count for port 0x40
+ 0000 - Latch Timer 0 count for port 0x40
sample and hold the count to be read in port 0x40;
additional commands ignored until counter is read;
mode bits ignored.
-0001 - Set Timer 0 LSB mode for port 0x40
+ 0001 - Set Timer 0 LSB mode for port 0x40
set timer to read LSB only and force MSB to zero;
mode bits set timer mode
-0010 - Set Timer 0 MSB mode for port 0x40
+ 0010 - Set Timer 0 MSB mode for port 0x40
set timer to read MSB only and force LSB to zero;
mode bits set timer mode
-0011 - Set Timer 0 16-bit mode for port 0x40
+ 0011 - Set Timer 0 16-bit mode for port 0x40
set timer to read / write LSB first, then MSB;
mode bits set timer mode
-0100 - Latch Timer 1 count for port 0x41 - as described above
-0101 - Set Timer 1 LSB mode for port 0x41 - as described above
-0110 - Set Timer 1 MSB mode for port 0x41 - as described above
-0111 - Set Timer 1 16-bit mode for port 0x41 - as described above
+ 0100 - Latch Timer 1 count for port 0x41 - as described above
+ 0101 - Set Timer 1 LSB mode for port 0x41 - as described above
+ 0110 - Set Timer 1 MSB mode for port 0x41 - as described above
+ 0111 - Set Timer 1 16-bit mode for port 0x41 - as described above
-1000 - Latch Timer 2 count for port 0x42 - as described above
-1001 - Set Timer 2 LSB mode for port 0x42 - as described above
-1010 - Set Timer 2 MSB mode for port 0x42 - as described above
-1011 - Set Timer 2 16-bit mode for port 0x42 as described above
+ 1000 - Latch Timer 2 count for port 0x42 - as described above
+ 1001 - Set Timer 2 LSB mode for port 0x42 - as described above
+ 1010 - Set Timer 2 MSB mode for port 0x42 - as described above
+ 1011 - Set Timer 2 16-bit mode for port 0x42 as described above
-1101 - General counter latch
+ 1101 - General counter latch
Latch combination of counters into corresponding ports
Bit 3 = Counter 2
Bit 2 = Counter 1
Bit 1 = Counter 0
Bit 0 = Unused
-1110 - Latch timer status
+ 1110 - Latch timer status
Latch combination of counter mode into corresponding ports
Bit 3 = Counter 2
Bit 2 = Counter 1
@@ -177,7 +187,8 @@
Bit 3-1 = Mode
Bit 0 = Binary (0) / BCD mode (1)
-2.2) RTC
+2.2. RTC
+--------
The second device which was available in the original PC was the MC146818 real
time clock. The original device is now obsolete, and usually emulated by the
@@ -201,21 +212,21 @@
The clock uses a 32.768kHz crystal, so bits 6-4 of register A should be
programmed to a 32kHz divider if the RTC is to count seconds.
-This is the RAM map originally used for the RTC/CMOS:
+This is the RAM map originally used for the RTC/CMOS::
-Location Size Description
-------------------------------------------
-00h byte Current second (BCD)
-01h byte Seconds alarm (BCD)
-02h byte Current minute (BCD)
-03h byte Minutes alarm (BCD)
-04h byte Current hour (BCD)
-05h byte Hours alarm (BCD)
-06h byte Current day of week (BCD)
-07h byte Current day of month (BCD)
-08h byte Current month (BCD)
-09h byte Current year (BCD)
-0Ah byte Register A
+ Location Size Description
+ ------------------------------------------
+ 00h byte Current second (BCD)
+ 01h byte Seconds alarm (BCD)
+ 02h byte Current minute (BCD)
+ 03h byte Minutes alarm (BCD)
+ 04h byte Current hour (BCD)
+ 05h byte Hours alarm (BCD)
+ 06h byte Current day of week (BCD)
+ 07h byte Current day of month (BCD)
+ 08h byte Current month (BCD)
+ 09h byte Current year (BCD)
+ 0Ah byte Register A
bit 7 = Update in progress
bit 6-4 = Divider for clock
000 = 4.194 MHz
@@ -234,7 +245,7 @@
1101 = 125 mS
1110 = 250 mS
1111 = 500 mS
-0Bh byte Register B
+ 0Bh byte Register B
bit 7 = Run (0) / Halt (1)
bit 6 = Periodic interrupt enable
bit 5 = Alarm interrupt enable
@@ -243,19 +254,20 @@
bit 2 = BCD calendar (0) / Binary (1)
bit 1 = 12-hour mode (0) / 24-hour mode (1)
bit 0 = 0 (DST off) / 1 (DST enabled)
-OCh byte Register C (read only)
+ OCh byte Register C (read only)
bit 7 = interrupt request flag (IRQF)
bit 6 = periodic interrupt flag (PF)
bit 5 = alarm interrupt flag (AF)
bit 4 = update interrupt flag (UF)
bit 3-0 = reserved
-ODh byte Register D (read only)
+ ODh byte Register D (read only)
bit 7 = RTC has power
bit 6-0 = reserved
-32h byte Current century BCD (*)
+ 32h byte Current century BCD (*)
(*) location vendor specific and now determined from ACPI global tables
-2.3) APIC
+2.3. APIC
+---------
On Pentium and later processors, an on-board timer is available to each CPU
as part of the Advanced Programmable Interrupt Controller. The APIC is
@@ -276,7 +288,8 @@
of one-shot or periodic operation, and is based on the bus clock divided down
by the programmable divider register.
-2.4) HPET
+2.4. HPET
+---------
HPET is quite complex, and was originally intended to replace the PIT / RTC
support of the X86 PC. It remains to be seen whether that will be the case, as
@@ -297,7 +310,8 @@
Detailed specification of the HPET is beyond the current scope of this
document, as it is also very well documented elsewhere.
-2.5) Offboard Timers
+2.5. Offboard Timers
+--------------------
Several cards, both proprietary (watchdog boards) and commonplace (e1000) have
timing chips built into the cards which may have registers which are accessible
@@ -307,9 +321,8 @@
timer device would require additional support to be virtualized properly and is
not considered important at this time as no known operating system does this.
-=========================================================================
-
-3) TSC Hardware
+3. TSC Hardware
+===============
The TSC or time stamp counter is relatively simple in theory; it counts
instruction cycles issued by the processor, which can be used as a measure of
@@ -340,7 +353,8 @@
promise to allow the TSC to additionally be scaled, but this hardware is not
yet widely available.
-3.1) TSC synchronization
+3.1. TSC synchronization
+------------------------
The TSC is a CPU-local clock in most implementations. This means, on SMP
platforms, the TSCs of different CPUs may start at different times depending
@@ -357,7 +371,8 @@
values are read from the same clock, which generally only is possible on single
socket systems or those with special hardware support.
-3.2) TSC and CPU hotplug
+3.2. TSC and CPU hotplug
+------------------------
As touched on already, CPUs which arrive later than the boot time of the system
may not have a TSC value that is synchronized with the rest of the system.
@@ -367,7 +382,8 @@
TSC is synchronized back to a state where TSC synchronization flaws, however
small, may be exposed to the OS and any virtualization environment.
-3.3) TSC and multi-socket / NUMA
+3.3. TSC and multi-socket / NUMA
+--------------------------------
Multi-socket systems, especially large multi-socket systems are likely to have
individual clocksources rather than a single, universally distributed clock.
@@ -385,7 +401,8 @@
It is recommended not to trust the TSCs to remain synchronized on NUMA or
multiple socket systems for these reasons.
-3.4) TSC and C-states
+3.4. TSC and C-states
+---------------------
C-states, or idling states of the processor, especially C1E and deeper sleep
states may be problematic for TSC as well. The TSC may stop advancing in such
@@ -396,7 +413,8 @@
The TSC in such a case may be corrected by catching it up to a known external
clocksource.
-3.5) TSC frequency change / P-states
+3.5. TSC frequency change / P-states
+------------------------------------
To make things slightly more interesting, some CPUs may change frequency. They
may or may not run the TSC at the same rate, and because the frequency change
@@ -416,14 +434,16 @@
than that of non-halted processors. AMD Turion processors are known to have
this problem.
-3.6) TSC and STPCLK / T-states
+3.6. TSC and STPCLK / T-states
+------------------------------
External signals given to the processor may also have the effect of stopping
the TSC. This is typically done for thermal emergency power control to prevent
an overheating condition, and typically, there is no way to detect that this
condition has happened.
-3.7) TSC virtualization - VMX
+3.7. TSC virtualization - VMX
+-----------------------------
VMX provides conditional trapping of RDTSC, RDMSR, WRMSR and RDTSCP
instructions, which is enough for full virtualization of TSC in any manner. In
@@ -431,14 +451,16 @@
field specified in the VMCS. Special instructions must be used to read and
write the VMCS field.
-3.8) TSC virtualization - SVM
+3.8. TSC virtualization - SVM
+-----------------------------
SVM provides conditional trapping of RDTSC, RDMSR, WRMSR and RDTSCP
instructions, which is enough for full virtualization of TSC in any manner. In
addition, SVM allows passing through the host TSC plus an additional offset
field specified in the SVM control block.
-3.9) TSC feature bits in Linux
+3.9. TSC feature bits in Linux
+------------------------------
In summary, there is no way to guarantee the TSC remains in perfect
synchronization unless it is explicitly guaranteed by the architecture. Even
@@ -448,13 +470,16 @@
The following feature bits are used by Linux to signal various TSC attributes,
but they can only be taken to be meaningful for UP or single node systems.
-X86_FEATURE_TSC : The TSC is available in hardware
-X86_FEATURE_RDTSCP : The RDTSCP instruction is available
-X86_FEATURE_CONSTANT_TSC : The TSC rate is unchanged with P-states
-X86_FEATURE_NONSTOP_TSC : The TSC does not stop in C-states
-X86_FEATURE_TSC_RELIABLE : TSC sync checks are skipped (VMware)
+========================= =======================================
+X86_FEATURE_TSC The TSC is available in hardware
+X86_FEATURE_RDTSCP The RDTSCP instruction is available
+X86_FEATURE_CONSTANT_TSC The TSC rate is unchanged with P-states
+X86_FEATURE_NONSTOP_TSC The TSC does not stop in C-states
+X86_FEATURE_TSC_RELIABLE TSC sync checks are skipped (VMware)
+========================= =======================================
-4) Virtualization Problems
+4. Virtualization Problems
+==========================
Timekeeping is especially problematic for virtualization because a number of
challenges arise. The most obvious problem is that time is now shared between
@@ -473,7 +498,8 @@
cause similar problems to virtualization makes it a good justification for
solving many of these problems on bare metal.
-4.1) Interrupt clocking
+4.1. Interrupt clocking
+-----------------------
One of the most immediate problems that occurs with legacy operating systems
is that the system timekeeping routines are often designed to keep track of
@@ -502,7 +528,8 @@
rate (ed: is it 18.2 Hz?) however that it has not yet been a problem in
practice.
-4.2) TSC sampling and serialization
+4.2. TSC sampling and serialization
+-----------------------------------
As the highest precision time source available, the cycle counter of the CPU
has aroused much interest from developers. As explained above, this timer has
@@ -524,7 +551,8 @@
the TSC as seen from other CPUs, even in an otherwise perfectly synchronized
system.
-4.3) Timespec aliasing
+4.3. Timespec aliasing
+----------------------
Additionally, this lack of serialization from the TSC poses another challenge
when using results of the TSC when measured against another time source. As
@@ -548,7 +576,8 @@
and any other values derived from TSC computation (such as TSC virtualization
itself).
-4.4) Migration
+4.4. Migration
+--------------
Migration of a virtual machine raises problems for timekeeping in two ways.
First, the migration itself may take time, during which interrupts cannot be
@@ -566,7 +595,8 @@
simply storing multipliers and offsets against the TSC for the guest to convert
back into nanosecond resolution values.
-4.5) Scheduling
+4.5. Scheduling
+---------------
Since scheduling may be based on precise timing and firing of interrupts, the
scheduling algorithms of an operating system may be adversely affected by
@@ -579,7 +609,8 @@
paravirtualized scheduler clock, which reveals the true amount of CPU time for
which a virtual machine has been running.
-4.6) Watchdogs
+4.6. Watchdogs
+--------------
Watchdog timers, such as the lock detector in Linux may fire accidentally when
running under hardware virtualization due to timer interrupts being delayed or
@@ -587,7 +618,8 @@
spurious and can be ignored, but in some circumstances it may be necessary to
disable such detection.
-4.7) Delays and precision timing
+4.7. Delays and precision timing
+--------------------------------
Precise timing and delays may not be possible in a virtualized system. This
can happen if the system is controlling physical hardware, or issues delays to
@@ -600,7 +632,8 @@
significant issue. In many cases these delays may be eliminated through
configuration or paravirtualization.
-4.8) Covert channels and leaks
+4.8. Covert channels and leaks
+------------------------------
In addition to the above problems, time information will inevitably leak to the
guest about the host in anything but a perfect implementation of virtualized
diff --git a/Documentation/virt/uml/UserModeLinux-HOWTO.txt b/Documentation/virt/uml/user_mode_linux.rst
similarity index 74%
rename from Documentation/virt/uml/UserModeLinux-HOWTO.txt
rename to Documentation/virt/uml/user_mode_linux.rst
index 87b80f5..de0f0b2 100644
--- a/Documentation/virt/uml/UserModeLinux-HOWTO.txt
+++ b/Documentation/virt/uml/user_mode_linux.rst
@@ -1,12 +1,17 @@
- User Mode Linux HOWTO
- User Mode Linux Core Team
- Mon Nov 18 14:16:16 EST 2002
+.. SPDX-License-Identifier: GPL-2.0
- This document describes the use and abuse of Jeff Dike's User Mode
- Linux: a port of the Linux kernel as a normal Intel Linux process.
- ______________________________________________________________________
+=====================
+User Mode Linux HOWTO
+=====================
- Table of Contents
+:Author: User Mode Linux Core Team
+:Last-updated: Sat Jan 25 16:07:55 CET 2020
+
+This document describes the use and abuse of Jeff Dike's User Mode
+Linux: a port of the Linux kernel as a normal Intel Linux process.
+
+
+.. Table of Contents
1. Introduction
@@ -132,19 +137,19 @@
15.5 Other contributions
- ______________________________________________________________________
-
- 1. Introduction
+1. Introduction
+================
Welcome to User Mode Linux. It's going to be fun.
- 1.1. How is User Mode Linux Different?
+1.1. How is User Mode Linux Different?
+---------------------------------------
Normally, the Linux Kernel talks straight to your hardware (video
card, keyboard, hard drives, etc), and any programs which run ask the
- kernel to operate the hardware, like so:
+ kernel to operate the hardware, like so::
@@ -160,10 +165,10 @@
The User Mode Linux Kernel is different; instead of talking to the
- hardware, it talks to a `real' Linux kernel (called the `host kernel'
+ hardware, it talks to a `real` Linux kernel (called the `host kernel`
from now on), like any other program. Programs can then run inside
User-Mode Linux as if they were running under a normal kernel, like
- so:
+ so::
@@ -181,7 +186,8 @@
- 1.2. Why Would I Want User Mode Linux?
+1.2. Why Would I Want User Mode Linux?
+---------------------------------------
1. If User Mode Linux crashes, your host kernel is still fine.
@@ -204,83 +210,41 @@
+.. _Compiling_the_kernel_and_modules:
-
- 2. Compiling the kernel and modules
+2. Compiling the kernel and modules
+====================================
- 2.1. Compiling the kernel
+2.1. Compiling the kernel
+--------------------------
Compiling the user mode kernel is just like compiling any other
- kernel. Let's go through the steps, using 2.4.0-prerelease (current
- as of this writing) as an example:
+ kernel.
- 1. Download the latest UML patch from
-
- the download page <http://user-mode-linux.sourceforge.net/
-
- In this example, the file is uml-patch-2.4.0-prerelease.bz2.
-
-
- 2. Download the matching kernel from your favourite kernel mirror,
+ 1. Download the latest kernel from your favourite kernel mirror,
such as:
- ftp://ftp.ca.kernel.org/pub/kernel/v2.4/linux-2.4.0-prerelease.tar.bz2
- <ftp://ftp.ca.kernel.org/pub/kernel/v2.4/linux-2.4.0-prerelease.tar.bz2>
- .
+ https://mirrors.edge.kernel.org/pub/linux/kernel/v5.x/linux-5.4.14.tar.xz
-
- 3. Make a directory and unpack the kernel into it.
-
-
+ 2. Make a directory and unpack the kernel into it::
host%
mkdir ~/uml
-
-
-
-
-
host%
cd ~/uml
-
-
-
-
-
host%
- tar -xzvf linux-2.4.0-prerelease.tar.bz2
+ tar xvf linux-5.4.14.tar.xz
-
-
-
-
- 4. Apply the patch using
-
-
-
- host%
- cd ~/uml/linux
-
-
-
- host%
- bzcat uml-patch-2.4.0-prerelease.bz2 | patch -p1
-
-
-
-
-
-
- 5. Run your favorite config; `make xconfig ARCH=um' is the most
- convenient. `make config ARCH=um' and 'make menuconfig ARCH=um'
+ 3. Run your favorite config; ``make xconfig ARCH=um`` is the most
+ convenient. ``make config ARCH=um`` and ``make menuconfig ARCH=um``
will work as well. The defaults will give you a useful kernel. If
you want to change something, go ahead, it probably won't hurt
anything.
@@ -288,44 +252,20 @@
Note: If the host is configured with a 2G/2G address space split
rather than the usual 3G/1G split, then the packaged UML binaries
- will not run. They will immediately segfault. See ``UML on 2G/2G
- hosts'' for the scoop on running UML on your system.
+ will not run. They will immediately segfault. See
+ :ref:`UML_on_2G/2G_hosts` for the scoop on running UML on your system.
- 6. Finish with `make linux ARCH=um': the result is a file called
- `linux' in the top directory of your source tree.
-
- Make sure that you don't build this kernel in /usr/src/linux. On some
- distributions, /usr/include/asm is a link into this pool. The user-
- mode build changes the other end of that link, and things that include
- <asm/anything.h> stop compiling.
-
- The sources are also available from cvs at the project's cvs page,
- which has directions on getting the sources. You can also browse the
- CVS pool from there.
-
- If you get the CVS sources, you will have to check them out into an
- empty directory. You will then have to copy each file into the
- corresponding directory in the appropriate kernel pool.
-
- If you don't have the latest kernel pool, you can get the
- corresponding user-mode sources with
+ 4. Finish with ``make linux ARCH=um``: the result is a file called
+ ``linux`` in the top directory of your source tree.
- host% cvs co -r v_2_3_x linux
-
-
-
-
- where 'x' is the version in your pool. Note that you will not get the
- bug fixes and enhancements that have gone into subsequent releases.
-
-
- 2.2. Compiling and installing kernel modules
+2.2. Compiling and installing kernel modules
+---------------------------------------------
UML modules are built in the same way as the native kernel (with the
- exception of the 'ARCH=um' that you always need for UML):
+ exception of the 'ARCH=um' that you always need for UML)::
host% make modules ARCH=um
@@ -337,12 +277,12 @@
the user-mode pool. Modules from the native kernel won't work.
You can install them by using ftp or something to copy them into the
- virtual machine and dropping them into /lib/modules/`uname -r`.
+ virtual machine and dropping them into ``/lib/modules/$(uname -r)``.
You can also get the kernel build process to install them as follows:
1. with the kernel not booted, mount the root filesystem in the top
- level of the kernel pool:
+ level of the kernel pool::
host% mount root_fs mnt -o loop
@@ -352,7 +292,7 @@
- 2. run
+ 2. run::
host%
@@ -363,7 +303,7 @@
- 3. unmount the filesystem
+ 3. unmount the filesystem::
host% umount mnt
@@ -381,27 +321,28 @@
as modules, especially filesystems and network protocols and filters,
so most symbols which need to be exported probably already are.
However, if you do find symbols that need exporting, let us
- <http://user-mode-linux.sourceforge.net/> know, and
+ know at http://user-mode-linux.sourceforge.net/, and
they'll be "taken care of".
- 2.3. Compiling and installing uml_utilities
+2.3. Compiling and installing uml_utilities
+--------------------------------------------
Many features of the UML kernel require a user-space helper program,
so a uml_utilities package is distributed separately from the kernel
patch which provides these helpers. Included within this is:
- o port-helper - Used by consoles which connect to xterms or ports
+ - port-helper - Used by consoles which connect to xterms or ports
- o tunctl - Configuration tool to create and delete tap devices
+ - tunctl - Configuration tool to create and delete tap devices
- o uml_net - Setuid binary for automatic tap device configuration
+ - uml_net - Setuid binary for automatic tap device configuration
- o uml_switch - User-space virtual switch required for daemon
+ - uml_switch - User-space virtual switch required for daemon
transport
- The uml_utilities tree is compiled with:
+ The uml_utilities tree is compiled with::
host#
@@ -423,38 +364,42 @@
- 3. Running UML and logging in
+3. Running UML and logging in
+==============================
- 3.1. Running UML
+3.1. Running UML
+-----------------
- It runs on 2.2.15 or later, and all 2.4 kernels.
+ It runs on 2.2.15 or later, and all kernel versions since 2.4.
Booting UML is straightforward. Simply run 'linux': it will try to
- mount the file `root_fs' in the current directory. You do not need to
- run it as root. If your root filesystem is not named `root_fs', then
- you need to put a `ubd0=root_fs_whatever' switch on the linux command
+ mount the file ``root_fs`` in the current directory. You do not need to
+ run it as root. If your root filesystem is not named ``root_fs``, then
+ you need to put a ``ubd0=root_fs_whatever`` switch on the linux command
line.
You will need a filesystem to boot UML from. There are a number
- available for download from here <http://user-mode-
- linux.sourceforge.net/> . There are also several tools
- <http://user-mode-linux.sourceforge.net/> which can be
+ available for download from http://user-mode-linux.sourceforge.net.
+ There are also several tools at
+ http://user-mode-linux.sourceforge.net/ which can be
used to generate UML-compatible filesystem images from media.
The kernel will boot up and present you with a login prompt.
- Note: If the host is configured with a 2G/2G address space split
+Note:
+ If the host is configured with a 2G/2G address space split
rather than the usual 3G/1G split, then the packaged UML binaries will
- not run. They will immediately segfault. See ``UML on 2G/2G hosts''
+ not run. They will immediately segfault. See :ref:`UML_on_2G/2G_hosts`
for the scoop on running UML on your system.
- 3.2. Logging in
+3.2. Logging in
+----------------
@@ -468,22 +413,22 @@
There are a couple of other ways to log in:
- o On a virtual console
+ - On a virtual console
Each virtual console that is configured (i.e. the device exists in
/dev and /etc/inittab runs a getty on it) will come up in its own
- xterm. If you get tired of the xterms, read ``Setting up serial
- lines and consoles'' to see how to attach the consoles to
- something else, like host ptys.
+ xterm. If you get tired of the xterms, read
+ :ref:`setting_up_serial_lines_and_consoles` to see how to attach
+ the consoles to something else, like host ptys.
- o Over the serial line
+ - Over the serial line
- In the boot output, find a line that looks like:
+ In the boot output, find a line that looks like::
@@ -493,7 +438,7 @@
Attach your favorite terminal program to the corresponding tty. I.e.
- for minicom, the command would be
+ for minicom, the command would be::
host% minicom -o -p /dev/ttyp1
@@ -503,37 +448,40 @@
- o Over the net
+ - Over the net
If the network is running, then you can telnet to the virtual
- machine and log in to it. See ``Setting up the network'' to learn
+ machine and log in to it. See :ref:`Setting_up_the_network` to learn
about setting up a virtual network.
When you're done using it, run halt, and the kernel will bring itself
down and the process will exit.
- 3.3. Examples
+3.3. Examples
+--------------
Here are some examples of UML in action:
- o A login session <http://user-mode-linux.sourceforge.net/login.html>
+ - A login session http://user-mode-linux.sourceforge.net/old/login.html
- o A virtual network <http://user-mode-linux.sourceforge.net/net.html>
+ - A virtual network http://user-mode-linux.sourceforge.net/old/net.html
+.. _UML_on_2G/2G_hosts:
-
- 4. UML on 2G/2G hosts
+4. UML on 2G/2G hosts
+======================
- 4.1. Introduction
+4.1. Introduction
+------------------
Most Linux machines are configured so that the kernel occupies the
@@ -546,7 +494,8 @@
- 4.2. The problem
+4.2. The problem
+-----------------
The prebuilt UML binaries on this site will not run on 2G/2G hosts
@@ -558,13 +507,14 @@
- 4.3. The solution
+4.3. The solution
+------------------
The fix for this is to rebuild UML from source after enabling
CONFIG_HOST_2G_2G (under 'General Setup'). This will cause UML to
load itself in the top .5G of that smaller process address space,
- where it will run fine. See ``Compiling the kernel and modules'' if
+ where it will run fine. See :ref:`Compiling_the_kernel_and_modules` if
you need help building UML from source.
@@ -573,10 +523,11 @@
+.. _setting_up_serial_lines_and_consoles:
-
- 5. Setting up serial lines and consoles
+5. Setting up serial lines and consoles
+========================================
It is possible to attach UML serial lines and consoles to many types
@@ -584,22 +535,23 @@
You can attach them to host ptys, ttys, file descriptors, and ports.
- This allows you to do things like
+ This allows you to do things like:
- o have a UML console appear on an unused host console,
+ - have a UML console appear on an unused host console,
- o hook two virtual machines together by having one attach to a pty
+ - hook two virtual machines together by having one attach to a pty
and having the other attach to the corresponding tty
- o make a virtual machine accessible from the net by attaching a
+ - make a virtual machine accessible from the net by attaching a
console to a port on the host.
- The general format of the command line option is device=channel.
+ The general format of the command line option is ``device=channel``.
- 5.1. Specifying the device
+5.1. Specifying the device
+---------------------------
Devices are specified with "con" or "ssl" (console or serial line,
respectively), optionally with a device number if you are talking
@@ -613,7 +565,7 @@
A specific device name will override a less general "con=" or "ssl=".
So, for example, you can assign a pty to each of the serial lines
- except for the first two like this:
+ except for the first two like this::
ssl=pty ssl0=tty:/dev/tty0 ssl1=tty:/dev/tty1
@@ -626,13 +578,14 @@
- 5.2. Specifying the channel
+5.2. Specifying the channel
+----------------------------
There are a number of different types of channels to attach a UML
device to, each with a different way of specifying exactly what to
attach to.
- o pseudo-terminals - device=pty pts terminals - device=pts
+ - pseudo-terminals - device=pty pts terminals - device=pts
This will cause UML to allocate a free host pseudo-terminal for the
@@ -640,23 +593,23 @@
log. You access it by attaching a terminal program to the
corresponding tty:
- o screen /dev/pts/n
+ - screen /dev/pts/n
- o screen /dev/ttyxx
+ - screen /dev/ttyxx
- o minicom -o -p /dev/ttyxx - minicom seems not able to handle pts
+ - minicom -o -p /dev/ttyxx - minicom seems not able to handle pts
devices
- o kermit - start it up, 'open' the device, then 'connect'
+ - kermit - start it up, 'open' the device, then 'connect'
- o terminals - device=tty:tty device file
+ - terminals - device=tty:tty device file
- This will make UML attach the device to the specified tty (i.e
+ This will make UML attach the device to the specified tty (i.e::
con1=tty:/dev/tty3
@@ -672,7 +625,7 @@
- o xterms - device=xterm
+ - xterms - device=xterm
UML will run an xterm and the device will be attached to it.
@@ -681,12 +634,12 @@
- o Port - device=port:port number
+ - Port - device=port:port number
This will attach the UML devices to the specified host port.
Attaching console 1 to the host's port 9000 would be done like
- this:
+ this::
con1=port:9000
@@ -694,7 +647,7 @@
- Attaching all the serial lines to that port would be done similarly:
+ Attaching all the serial lines to that port would be done similarly::
ssl=port:9000
@@ -702,8 +655,8 @@
- You access these devices by telnetting to that port. Each active tel-
- net session gets a different device. If there are more telnets to a
+ You access these devices by telnetting to that port. Each active
+ telnet session gets a different device. If there are more telnets to a
port than UML devices attached to it, then the extra telnet sessions
will block until an existing telnet detaches, or until another device
becomes active (i.e. by being activated in /etc/inittab).
@@ -725,13 +678,13 @@
- o already-existing file descriptors - device=file descriptor
+ - already-existing file descriptors - device=file descriptor
If you set up a file descriptor on the UML command line, you can
attach a UML device to it. This is most commonly used to put the
main console back on stdin and stdout after assigning all the other
- consoles to something else:
+ consoles to something else::
con0=fd:0,fd:1 con=pts
@@ -743,7 +696,7 @@
- o Nothing - device=null
+ - Nothing - device=null
This allows the device to be opened, in contrast to 'none', but
@@ -754,7 +707,7 @@
- o None - device=none
+ - None - device=none
This causes the device to disappear.
@@ -762,7 +715,7 @@
You can also specify different input and output channels for a device
- by putting a comma between them:
+ by putting a comma between them::
ssl3=tty:/dev/tty2,xterm
@@ -785,14 +738,15 @@
- 5.3. Examples
+5.3. Examples
+--------------
There are a number of interesting things you can do with this
capability.
First, this is how you get rid of those bleeding console xterms by
- attaching them to host ptys:
+ attaching them to host ptys::
con=pty con0=fd:0,fd:1
@@ -802,7 +756,7 @@
This will make a UML console take over an unused host virtual console,
so that when you switch to it, you will see the UML login prompt
- rather than the host login prompt:
+ rather than the host login prompt::
con1=tty:/dev/tty6
@@ -813,7 +767,7 @@
You can attach two virtual machines together with what amounts to a
serial line as follows:
- Run one UML with a serial line attached to a pty -
+ Run one UML with a serial line attached to a pty::
ssl1=pty
@@ -825,7 +779,7 @@
that it got /dev/ptyp1).
Boot the other UML with a serial line attached to the corresponding
- tty -
+ tty::
ssl1=tty:/dev/ttyp1
@@ -838,7 +792,10 @@
prompt of the other virtual machine.
- 6. Setting up the network
+.. _setting_up_the_network:
+
+6. Setting up the network
+==========================
@@ -858,19 +815,19 @@
There are currently five transport types available for a UML virtual
machine to exchange packets with other hosts:
- o ethertap
+ - ethertap
- o TUN/TAP
+ - TUN/TAP
- o Multicast
+ - Multicast
- o a switch daemon
+ - a switch daemon
- o slip
+ - slip
- o slirp
+ - slirp
- o pcap
+ - pcap
The TUN/TAP, ethertap, slip, and slirp transports allow a UML
instance to exchange packets with the host. They may be directed
@@ -893,28 +850,28 @@
With so many host transports, which one should you use? Here's when
you should use each one:
- o ethertap - if you want access to the host networking and it is
+ - ethertap - if you want access to the host networking and it is
running 2.2
- o TUN/TAP - if you want access to the host networking and it is
+ - TUN/TAP - if you want access to the host networking and it is
running 2.4. Also, the TUN/TAP transport is able to use a
preconfigured device, allowing it to avoid using the setuid uml_net
helper, which is a security advantage.
- o Multicast - if you want a purely virtual network and you don't want
+ - Multicast - if you want a purely virtual network and you don't want
to set up anything but the UML
- o a switch daemon - if you want a purely virtual network and you
+ - a switch daemon - if you want a purely virtual network and you
don't mind running the daemon in order to get somewhat better
performance
- o slip - there is no particular reason to run the slip backend unless
+ - slip - there is no particular reason to run the slip backend unless
ethertap and TUN/TAP are just not available for some reason
- o slirp - if you don't have root access on the host to setup
+ - slirp - if you don't have root access on the host to setup
networking, or if you don't want to allocate an IP to your UML
- o pcap - not much use for actual network connectivity, but great for
+ - pcap - not much use for actual network connectivity, but great for
monitoring traffic on the host
Ethertap is available on 2.4 and works fine. TUN/TAP is preferred
@@ -926,7 +883,8 @@
exploit the helper's root privileges.
- 6.1. General setup
+6.1. General setup
+-------------------
First, you must have the virtual network enabled in your UML. If are
running a prebuilt kernel from this site, everything is already
@@ -938,7 +896,7 @@
The next step is to provide a network device to the virtual machine.
This is done by describing it on the kernel command line.
- The general format is
+ The general format is::
eth <n> = <transport> , <transport args>
@@ -947,7 +905,7 @@
For example, a virtual ethernet device may be attached to a host
- ethertap device as follows:
+ ethertap device as follows::
eth0=ethertap,tap0,fe:fd:0:0:0:1,192.168.0.254
@@ -978,7 +936,7 @@
You can also add devices to a UML and remove them at runtime. See the
- ``The Management Console'' page for details.
+ :ref:`The_Management_Console` page for details.
The sections below describe this in more detail.
@@ -995,7 +953,8 @@
- 6.2. Userspace daemons
+6.2. Userspace daemons
+-----------------------
You will likely need the setuid helper, or the switch daemon, or both.
They are both installed with the RPM and deb, so if you've installed
@@ -1011,7 +970,8 @@
- 6.3. Specifying ethernet addresses
+6.3. Specifying ethernet addresses
+-----------------------------------
Below, you will see that the TUN/TAP, ethertap, and daemon interfaces
allow you to specify hardware addresses for the virtual ethernet
@@ -1023,21 +983,21 @@
sufficient to guarantee a unique hardware address for the device. A
couple of exceptions are:
- o Another set of virtual ethernet devices are on the same network and
+ - Another set of virtual ethernet devices are on the same network and
they are assigned hardware addresses using a different scheme which
may conflict with the UML IP address-based scheme
- o You aren't going to use the device for IP networking, so you don't
+ - You aren't going to use the device for IP networking, so you don't
assign the device an IP address
If you let the driver provide the hardware address, you should make
sure that the device IP address is known before the interface is
- brought up. So, inside UML, this will guarantee that:
+ brought up. So, inside UML, this will guarantee that::
- UML#
- ifconfig eth0 192.168.0.250 up
+ UML#
+ ifconfig eth0 192.168.0.250 up
@@ -1049,13 +1009,14 @@
- 6.4. UML interface setup
+6.4. UML interface setup
+-------------------------
Once the network devices have been described on the command line, you
should boot UML and log in.
- The first thing to do is bring the interface up:
+ The first thing to do is bring the interface up::
UML# ifconfig ethn ip-address up
@@ -1067,7 +1028,7 @@
To reach the rest of the world, you should set a default route to the
- host:
+ host::
UML# route add default gw host ip
@@ -1075,7 +1036,7 @@
- Again, with host ip of 192.168.0.4:
+ Again, with host ip of 192.168.0.4::
UML# route add default gw 192.168.0.4
@@ -1097,29 +1058,25 @@
Note: If you can't communicate with other hosts on your physical
ethernet, it's probably because of a network route that's
automatically set up. If you run 'route -n' and see a route that
- looks like this:
+ looks like this::
- Destination Gateway Genmask Flags Metric Ref Use Iface
- 192.168.0.0 0.0.0.0 255.255.255.0 U 0 0 0 eth0
+ Destination Gateway Genmask Flags Metric Ref Use Iface
+ 192.168.0.0 0.0.0.0 255.255.255.0 U 0 0 0 eth0
with a mask that's not 255.255.255.255, then replace it with a route
- to your host:
+ to your host::
UML#
route del -net 192.168.0.0 dev eth0 netmask 255.255.255.0
-
-
-
-
UML#
route add -host 192.168.0.4 dev eth0
@@ -1131,7 +1088,8 @@
- 6.5. Multicast
+6.5. Multicast
+---------------
The simplest way to set up a virtual network between multiple UMLs is
to use the mcast transport. This was written by Harald Welte and is
@@ -1142,7 +1100,7 @@
messages when you bring the device up inside UML.
- To use it, run two UMLs with
+ To use it, run two UMLs with::
eth0=mcast
@@ -1151,16 +1109,12 @@
on their command lines. Log in, configure the ethernet device in each
- machine with different IP addresses:
+ machine with different IP addresses::
UML1# ifconfig eth0 192.168.0.254
-
-
-
-
UML2# ifconfig eth0 192.168.0.253
@@ -1168,7 +1122,7 @@
and they should be able to talk to each other.
- The full set of command line options for this transport are
+ The full set of command line options for this transport are::
@@ -1177,16 +1131,11 @@
-
- Harald's original README is here <http://user-mode-linux.source-
- forge.net/> and explains these in detail, as well as
- some other issues.
-
There is also a related point-to-point only "ucast" transport.
This is useful when your network does not support multicast, and
all network connections are simple point to point links.
- The full set of command line options for this transport are
+ The full set of command line options for this transport are::
ethn=ucast,ethernet address,remote address,listen port,remote port
@@ -1194,7 +1143,8 @@
- 6.6. TUN/TAP with the uml_net helper
+6.6. TUN/TAP with the uml_net helper
+-------------------------------------
TUN/TAP is the preferred mechanism on 2.4 to exchange packets with the
host. The TUN/TAP backend has been in UML since 2.4.9-3um.
@@ -1216,7 +1166,7 @@
kernel or as the tun.o module.
The format of the command line switch to attach a device to a TUN/TAP
- device is
+ device is::
eth <n> =tuntap,,, <IP address>
@@ -1226,7 +1176,7 @@
For example, this argument will attach the UML's eth0 to the next
available tap device and assign an ethernet address to it based on its
- IP address
+ IP address::
eth0=tuntap,,,192.168.0.254
@@ -1247,10 +1197,10 @@
There are a couple potential problems with running the TUN/TAP
transport on a 2.4 host kernel
- o TUN/TAP seems not to work on 2.4.3 and earlier. Upgrade the host
+ - TUN/TAP seems not to work on 2.4.3 and earlier. Upgrade the host
kernel or use the ethertap transport.
- o With an upgraded kernel, TUN/TAP may fail with
+ - With an upgraded kernel, TUN/TAP may fail with::
File descriptor in bad state
@@ -1263,13 +1213,12 @@
make sure that /usr/src/linux points to the headers for the running
kernel.
- These were pointed out by Tim Robinson <timro at trkr dot net> in
- <http://www.geocrawler.com/> name="this uml-
- user post"> .
+ These were pointed out by Tim Robinson <timro at trkr dot net> in the past.
- 6.7. TUN/TAP with a preconfigured tap device
+6.7. TUN/TAP with a preconfigured tap device
+---------------------------------------------
If you prefer not to have UML use uml_net (which is somewhat
insecure), with UML 2.4.17-11, you can set up a TUN/TAP device
@@ -1277,8 +1226,8 @@
there is no need for root assistance. Setting up the device is done
as follows:
- o Create the device with tunctl (available from the UML utilities
- tarball)
+ - Create the device with tunctl (available from the UML utilities
+ tarball)::
@@ -1291,8 +1240,8 @@
where uid is the user id or username that UML will be run as. This
will tell you what device was created.
- o Configure the device IP (change IP addresses and device name to
- suit)
+ - Configure the device IP (change IP addresses and device name to
+ suit)::
@@ -1303,8 +1252,8 @@
- o Set up routing and arping if desired - this is my recipe, there are
- other ways of doing the same thing
+ - Set up routing and arping if desired - this is my recipe, there are
+ other ways of doing the same thing::
host#
@@ -1313,19 +1262,9 @@
host#
route add -host 192.168.0.253 dev tap0
-
-
-
-
-
host#
bash -c 'echo 1 > /proc/sys/net/ipv4/conf/tap0/proxy_arp'
-
-
-
-
-
host#
arp -Ds 192.168.0.253 eth0 pub
@@ -1338,76 +1277,43 @@
utility which reads the information from a config file and sets up
devices at boot time.
- o Rather than using up two IPs and ARPing for one of them, you can
+ - Rather than using up two IPs and ARPing for one of them, you can
also provide direct access to your LAN by the UML by using a
- bridge.
+ bridge::
host#
brctl addbr br0
-
-
-
-
host#
ifconfig eth0 0.0.0.0 promisc up
-
-
-
-
host#
ifconfig tap0 0.0.0.0 promisc up
-
-
-
-
host#
ifconfig br0 192.168.0.1 netmask 255.255.255.0 up
-
-
-
-
-
- host#
- brctl stp br0 off
-
-
-
-
+ host#
+ brctl stp br0 off
host#
brctl setfd br0 1
-
-
-
-
host#
brctl sethello br0 1
-
-
-
-
host#
brctl addif br0 eth0
-
-
-
-
host#
brctl addif br0 tap0
@@ -1417,12 +1323,12 @@
Note that 'br0' should be setup using ifconfig with the existing IP
address of eth0, as eth0 no longer has its own IP.
- o
+ -
Also, the /dev/net/tun device must be writable by the user running
UML in order for the UML to use the device that's been configured
- for it. The simplest thing to do is
+ for it. The simplest thing to do is::
host# chmod 666 /dev/net/tun
@@ -1438,14 +1344,14 @@
devices and chgrp /dev/net/tun to that group with mode 664 or 660.
- o Once the device is set up, run UML with 'eth0=tuntap,device name'
+ - Once the device is set up, run UML with 'eth0=tuntap,device name'
(i.e. 'eth0=tuntap,tap0') on the command line (or do it with the
mconsole config command).
- o Bring the eth device up in UML and you're in business.
+ - Bring the eth device up in UML and you're in business.
If you don't want that tap device any more, you can make it non-
- persistent with
+ persistent with::
host# tunctl -d tap device
@@ -1455,7 +1361,7 @@
Finally, tunctl has a -b (for brief mode) switch which causes it to
output only the name of the tap device it created. This makes it
- suitable for capture by a script:
+ suitable for capture by a script::
host# TAP=`tunctl -u 1000 -b`
@@ -1465,7 +1371,8 @@
- 6.8. Ethertap
+6.8. Ethertap
+--------------
Ethertap is the general mechanism on 2.2 for userspace processes to
exchange packets with the kernel.
@@ -1473,7 +1380,7 @@
To use this transport, you need to describe the virtual network device
- on the UML command line. The general format for this is
+ on the UML command line. The general format for this is::
eth <n> =ethertap, <device> , <ethernet address> , <tap IP address>
@@ -1481,7 +1388,7 @@
- So, the previous example
+ So, the previous example::
eth0=ethertap,tap0,fe:fd:0:0:0:1,192.168.0.254
@@ -1521,7 +1428,7 @@
If you want to set things up yourself, you need to make sure that the
appropriate /dev entry exists. If it doesn't, become root and create
- it as follows:
+ it as follows::
mknod /dev/tap <minor> c 36 <minor> + 16
@@ -1529,7 +1436,7 @@
- For example, this is how to create /dev/tap0:
+ For example, this is how to create /dev/tap0::
mknod /dev/tap0 c 36 0 + 16
@@ -1539,7 +1446,7 @@
You also need to make sure that the host kernel has ethertap support.
If ethertap is enabled as a module, you apparently need to insmod
- ethertap once for each ethertap device you want to enable. So,
+ ethertap once for each ethertap device you want to enable. So,::
host#
@@ -1549,7 +1456,7 @@
will give you the tap0 interface. To get the tap1 interface, you need
- to run
+ to run::
host#
@@ -1561,7 +1468,8 @@
- 6.9. The switch daemon
+6.9. The switch daemon
+-----------------------
Note: This is the daemon formerly known as uml_router, but which was
renamed so the network weenies of the world would stop growling at me.
@@ -1577,7 +1485,7 @@
sockets.
- If you want it to listen on a different pair of sockets, use
+ If you want it to listen on a different pair of sockets, use::
-unix control socket data socket
@@ -1586,7 +1494,7 @@
- If you want it to act as a hub rather than a switch, use
+ If you want it to act as a hub rather than a switch, use::
-hub
@@ -1596,7 +1504,7 @@
If you want the switch to be connected to host networking (allowing
- the umls to get access to the outside world through the host), use
+ the umls to get access to the outside world through the host), use::
-tap tap0
@@ -1610,7 +1518,7 @@
device than tap0, specify that instead of tap0.
- uml_switch can be backgrounded as follows
+ uml_switch can be backgrounded as follows::
host%
@@ -1623,7 +1531,7 @@
stdin for EOF. When it sees that, it exits.
- The general format of the kernel command line switch is
+ The general format of the kernel command line switch is::
@@ -1639,7 +1547,8 @@
how to communicate with the daemon. You should only specify them if
you told the daemon to use different sockets than the default. So, if
you ran the daemon with no arguments, running the UML on the same
- machine with
+ machine with::
+
eth0=daemon
@@ -1649,7 +1558,8 @@
- 6.10. Slip
+6.10. Slip
+-----------
Slip is another, less general, mechanism for a process to communicate
with the host networking. In contrast to the ethertap interface,
@@ -1658,7 +1568,7 @@
IP.
- The general format of the command line switch is
+ The general format of the command line switch is::
@@ -1681,7 +1591,8 @@
- 6.11. Slirp
+6.11. Slirp
+------------
slirp uses an external program, usually /usr/bin/slirp, to provide IP
only networking connectivity through the host. This is similar to IP
@@ -1691,7 +1602,7 @@
root access or setuid binaries on the host.
- The general format of the command line switch for slirp is:
+ The general format of the command line switch for slirp is::
@@ -1716,7 +1627,7 @@
The eth0 interface on UML should be set up with the IP 10.2.0.15,
although you can use anything as long as it is not used by a network
you will be connecting to. The default route on UML should be set to
- use
+ use::
UML#
@@ -1737,10 +1648,11 @@
- 6.12. pcap
+6.12. pcap
+-----------
The pcap transport is attached to a UML ethernet device on the command
- line or with uml_mconsole with the following syntax:
+ line or with uml_mconsole with the following syntax::
@@ -1762,7 +1674,7 @@
expression optimizer is used.
- Example:
+ Example::
@@ -1777,7 +1689,8 @@
- 6.13. Setting up the host yourself
+6.13. Setting up the host yourself
+-----------------------------------
If you don't specify an address for the host side of the ethertap or
slip device, UML won't do any setup on the host. So this is what is
@@ -1785,19 +1698,15 @@
192.168.0.251 and a UML-side IP of 192.168.0.250 - adjust to suit your
own network):
- o The device needs to be configured with its IP address. Tap devices
+ - The device needs to be configured with its IP address. Tap devices
are also configured with an mtu of 1484. Slip devices are
configured with a point-to-point address pointing at the UML ip
- address.
+ address::
host# ifconfig tap0 arp mtu 1484 192.168.0.251 up
-
-
-
-
host#
ifconfig sl0 192.168.0.251 pointopoint 192.168.0.250 up
@@ -1805,7 +1714,7 @@
- o If a tap device is being set up, a route is set to the UML IP.
+ - If a tap device is being set up, a route is set to the UML IP::
UML# route add -host 192.168.0.250 gw 192.168.0.251
@@ -1814,8 +1723,8 @@
- o To allow other hosts on your network to see the virtual machine,
- proxy arp is set up for it.
+ - To allow other hosts on your network to see the virtual machine,
+ proxy arp is set up for it::
host# arp -Ds 192.168.0.250 eth0 pub
@@ -1824,7 +1733,7 @@
- o Finally, the host is set up to route packets.
+ - Finally, the host is set up to route packets::
host# echo 1 > /proc/sys/net/ipv4/ip_forward
@@ -1838,12 +1747,14 @@
- 7. Sharing Filesystems between Virtual Machines
+7. Sharing Filesystems between Virtual Machines
+================================================
- 7.1. A warning
+7.1. A warning
+---------------
Don't attempt to share filesystems simply by booting two UMLs from the
same file. That's the same thing as booting two physical machines
@@ -1851,7 +1762,8 @@
- 7.2. Using layered block devices
+7.2. Using layered block devices
+---------------------------------
The way to share a filesystem between two virtual machines is to use
the copy-on-write (COW) layering capability of the ubd block driver.
@@ -1872,7 +1784,7 @@
To add a copy-on-write layer to an existing block device file, simply
- add the name of the COW file to the appropriate ubd switch:
+ add the name of the COW file to the appropriate ubd switch::
ubd0=root_fs_cow,root_fs_debian_22
@@ -1883,7 +1795,7 @@
where 'root_fs_cow' is the private COW file and 'root_fs_debian_22' is
the existing shared filesystem. The COW file need not exist. If it
doesn't, the driver will create and initialize it. Once the COW file
- has been initialized, it can be used on its own on the command line:
+ has been initialized, it can be used on its own on the command line::
ubd0=root_fs_cow
@@ -1896,14 +1808,16 @@
- 7.3. Note!
+7.3. Note!
+-----------
When checking the size of the COW file in order to see the gobs of
space that you're saving, make sure you use 'ls -ls' to see the actual
disk consumption rather than the length of the file. The COW file is
sparse, so the length will be very different from the disk usage.
Here is a 'ls -l' of a COW file and backing file from one boot and
- shutdown:
+ shutdown::
+
host% ls -l cow.debian debian2.2
-rw-r--r-- 1 jdike jdike 492504064 Aug 6 21:16 cow.debian
-rwxrw-rw- 1 jdike jdike 537919488 Aug 6 20:42 debian2.2
@@ -1911,7 +1825,7 @@
- Doesn't look like much saved space, does it? Well, here's 'ls -ls':
+ Doesn't look like much saved space, does it? Well, here's 'ls -ls'::
host% ls -ls cow.debian debian2.2
@@ -1926,7 +1840,8 @@
- 7.4. Another warning
+7.4. Another warning
+---------------------
Once a filesystem is being used as a readonly backing file for a COW
file, do not boot directly from it or modify it in any way. Doing so
@@ -1952,7 +1867,8 @@
- 7.5. uml_moo : Merging a COW file with its backing file
+7.5. uml_moo : Merging a COW file with its backing file
+--------------------------------------------------------
Depending on how you use UML and COW devices, it may be advisable to
merge the changes in the COW file into the backing file every once in
@@ -1961,7 +1877,7 @@
- The utility that does this is uml_moo. Its usage is
+ The utility that does this is uml_moo. Its usage is::
host% uml_moo COW file new backing file
@@ -1991,8 +1907,8 @@
uml_moo is installed with the UML deb and RPM. If you didn't install
UML from one of those packages, you can also get it from the UML
- utilities <http://user-mode-linux.sourceforge.net/
- utilities> tar file in tools/moo.
+ utilities http://user-mode-linux.sourceforge.net/utilities tar file
+ in tools/moo.
@@ -2001,7 +1917,8 @@
- 8. Creating filesystems
+8. Creating filesystems
+========================
You may want to create and mount new UML filesystems, either because
@@ -2015,13 +1932,14 @@
should be easy to translate to the filesystem of your choice.
- 8.1. Create the filesystem file
+8.1. Create the filesystem file
+================================
dd is your friend. All you need to do is tell dd to create an empty
file of the appropriate size. I usually make it sparse to save time
and to avoid allocating disk space until it's actually used. For
example, the following command will create a sparse 100 meg file full
- of zeroes.
+ of zeroes::
host%
@@ -2034,9 +1952,9 @@
8.2. Assign the file to a UML device
- Add an argument like the following to the UML command line:
+ Add an argument like the following to the UML command line::
- ubd4=new_filesystem
+ ubd4=new_filesystem
@@ -2053,7 +1971,7 @@
etc), then get them into UML by way of the net or hostfs.
- Make the new filesystem on the device assigned to the new file:
+ Make the new filesystem on the device assigned to the new file::
host# mkreiserfs /dev/ubd/4
@@ -2077,7 +1995,7 @@
- Now, mount it:
+ Now, mount it::
UML#
@@ -2096,7 +2014,8 @@
- 9. Host file access
+9. Host file access
+====================
If you want to access files on the host machine from inside UML, you
@@ -2112,10 +2031,11 @@
files contained in it just as you would on the host.
- 9.1. Using hostfs
+9.1. Using hostfs
+------------------
To begin with, make sure that hostfs is available inside the virtual
- machine with
+ machine with::
UML# cat /proc/filesystems
@@ -2127,7 +2047,7 @@
module and available inside the virtual machine, and insmod it.
- Now all you need to do is run mount:
+ Now all you need to do is run mount::
UML# mount none /mnt/host -t hostfs
@@ -2139,7 +2059,7 @@
If you don't want to mount the host root directory, then you can
- specify a subdirectory to mount with the -o switch to mount:
+ specify a subdirectory to mount with the -o switch to mount::
UML# mount none /mnt/home -t hostfs -o /home
@@ -2151,13 +2071,14 @@
- 9.2. hostfs as the root filesystem
+9.2. hostfs as the root filesystem
+-----------------------------------
It's possible to boot from a directory hierarchy on the host using
hostfs rather than using the standard filesystem in a file.
To start, you need that hierarchy. The easiest way is to loop mount
- an existing root_fs file:
+ an existing root_fs file::
host# mount root_fs uml_root_dir -o loop
@@ -2166,15 +2087,15 @@
You need to change the filesystem type of / in etc/fstab to be
- 'hostfs', so that line looks like this:
+ 'hostfs', so that line looks like this::
- /dev/ubd/0 / hostfs defaults 1 1
+ /dev/ubd/0 / hostfs defaults 1 1
Then you need to chown to yourself all the files in that directory
- that are owned by root. This worked for me:
+ that are owned by root. This worked for me::
host# find . -uid 0 -exec chown jdike {} \;
@@ -2183,7 +2104,7 @@
Next, make sure that your UML kernel has hostfs compiled in, not as a
- module. Then run UML with the boot device pointing at that directory:
+ module. Then run UML with the boot device pointing at that directory::
ubd0=/path/to/uml/root/directory
@@ -2194,41 +2115,35 @@
UML should then boot as it does normally.
- 9.3. Building hostfs
+9.3. Building hostfs
+---------------------
If you need to build hostfs because it's not in your kernel, you have
two choices:
- o Compiling hostfs into the kernel:
+ - Compiling hostfs into the kernel:
Reconfigure the kernel and set the 'Host filesystem' option under
- o Compiling hostfs as a module:
+ - Compiling hostfs as a module:
Reconfigure the kernel and set the 'Host filesystem' option under
be in arch/um/fs/hostfs/hostfs.o. Install that in
- /lib/modules/`uname -r`/fs in the virtual machine, boot it up, and
+ ``/lib/modules/$(uname -r)/fs`` in the virtual machine, boot it up, and::
UML# insmod hostfs
+.. _The_Management_Console:
-
-
-
-
-
-
-
-
-
- 10. The Management Console
+10. The Management Console
+===========================
@@ -2240,15 +2155,15 @@
There are a number of things you can do with the mconsole interface:
- o get the kernel version
+ - get the kernel version
- o add and remove devices
+ - add and remove devices
- o halt or reboot the machine
+ - halt or reboot the machine
- o Send SysRq commands
+ - Send SysRq commands
- o Pause and resume the UML
+ - Pause and resume the UML
You need the mconsole client (uml_mconsole) which is present in CVS
@@ -2257,7 +2172,7 @@
You also need CONFIG_MCONSOLE (under 'General Setup') enabled in UML.
- When you boot UML, you'll see a line like:
+ When you boot UML, you'll see a line like::
mconsole initialized on /home/jdike/.uml/umlNJ32yL/mconsole
@@ -2265,7 +2180,7 @@
- If you specify a unique machine id one the UML command line, i.e.
+ If you specify a unique machine id one the UML command line, i.e.::
umid=debian
@@ -2273,7 +2188,7 @@
- you'll see this
+ you'll see this::
mconsole initialized on /home/jdike/.uml/debian/mconsole
@@ -2282,7 +2197,7 @@
That file is the socket that uml_mconsole will use to communicate with
- UML. Run it with either the umid or the full path as its argument:
+ UML. Run it with either the umid or the full path as its argument::
host% uml_mconsole debian
@@ -2290,7 +2205,7 @@
- or
+ or::
host% uml_mconsole /home/jdike/.uml/debian/mconsole
@@ -2300,30 +2215,31 @@
You'll get a prompt, at which you can run one of these commands:
- o version
+ - version
- o halt
+ - halt
- o reboot
+ - reboot
- o config
+ - config
- o remove
+ - remove
- o sysrq
+ - sysrq
- o help
+ - help
- o cad
+ - cad
- o stop
+ - stop
- o go
+ - go
- 10.1. version
+10.1. version
+--------------
- This takes no arguments. It prints the UML version.
+ This takes no arguments. It prints the UML version::
(mconsole) version
@@ -2342,11 +2258,12 @@
- 10.2. halt and reboot
+10.2. halt and reboot
+----------------------
These take no arguments. They shut the machine down immediately, with
no syncing of disks and no clean shutdown of userspace. So, they are
- pretty close to crashing the machine.
+ pretty close to crashing the machine::
(mconsole) halt
@@ -2357,34 +2274,36 @@
- 10.3. config
+10.3. config
+-------------
"config" adds a new device to the virtual machine. Currently the ubd
and network drivers support this. It takes one argument, which is the
- device to add, with the same syntax as the kernel command line.
+ device to add, with the same syntax as the kernel command line::
- (mconsole)
- config ubd3=/home/jdike/incoming/roots/root_fs_debian22
+ (mconsole)
+ config ubd3=/home/jdike/incoming/roots/root_fs_debian22
- OK
- (mconsole) config eth1=mcast
- OK
+ OK
+ (mconsole) config eth1=mcast
+ OK
- 10.4. remove
+10.4. remove
+-------------
"remove" deletes a device from the system. Its argument is just the
name of the device to be removed. The device must be idle in whatever
sense the driver considers necessary. In the case of the ubd driver,
the removed block device must not be mounted, swapped on, or otherwise
- open, and in the case of the network driver, the device must be down.
+ open, and in the case of the network driver, the device must be down::
(mconsole) remove ubd3
@@ -2397,7 +2316,8 @@
- 10.5. sysrq
+10.5. sysrq
+------------
This takes one argument, which is a single letter. It calls the
generic kernel's SysRq driver, which does whatever is called for by
@@ -2407,19 +2327,21 @@
- 10.6. help
+10.6. help
+-----------
"help" returns a string listing the valid commands and what each one
does.
- 10.7. cad
+10.7. cad
+----------
This invokes the Ctl-Alt-Del action on init. What exactly this ends
up doing is up to /etc/inittab. Normally, it reboots the machine.
With UML, this is usually not desired, so if a halt would be better,
- then find the section of inittab that looks like this
+ then find the section of inittab that looks like this::
# What to do when CTRL-ALT-DEL is pressed.
@@ -2432,7 +2354,8 @@
- 10.8. stop
+10.8. stop
+-----------
This puts the UML in a loop reading mconsole requests until a 'go'
mconsole command is received. This is very useful for making backups
@@ -2448,7 +2371,8 @@
- 10.9. go
+10.9. go
+---------
This resumes a UML after being paused by a 'stop' command. Note that
when the UML has resumed, TCP connections may have timed out and if
@@ -2460,9 +2384,10 @@
+.. _Kernel_debugging:
-
- 11. Kernel debugging
+11. Kernel debugging
+=====================
Note: The interface that makes debugging, as described here, possible
@@ -2477,15 +2402,16 @@
In order to debug the kernel, you need build it from source. See
- ``Compiling the kernel and modules'' for information on doing that.
+ :ref:`Compiling_the_kernel_and_modules` for information on doing that.
Make sure that you enable CONFIG_DEBUGSYM and CONFIG_PT_PROXY during
- the config. These will compile the kernel with -g, and enable the
+ the config. These will compile the kernel with ``-g``, and enable the
ptrace proxy so that gdb works with UML, respectively.
- 11.1. Starting the kernel under gdb
+11.1. Starting the kernel under gdb
+------------------------------------
You can have the kernel running under the control of gdb from the
beginning by putting 'debug' on the command line. You will get an
@@ -2498,7 +2424,11 @@
There is a transcript of a debugging session here <debug-
session.html> , with breakpoints being set in the scheduler and in an
interrupt handler.
- 11.2. Examining sleeping processes
+
+
+11.2. Examining sleeping processes
+-----------------------------------
+
Not every bug is evident in the currently running process. Sometimes,
processes hang in the kernel when they shouldn't because they've
@@ -2516,7 +2446,7 @@
Now what you do is this:
- o detach from the current thread
+ - detach from the current thread::
(UML gdb) det
@@ -2525,7 +2455,7 @@
- o attach to the thread you are interested in
+ - attach to the thread you are interested in::
(UML gdb) att <host pid>
@@ -2534,7 +2464,7 @@
- o look at its stack and anything else of interest
+ - look at its stack and anything else of interest::
(UML gdb) bt
@@ -2545,18 +2475,14 @@
Note that you can't do anything at this point that requires that a
process execute, e.g. calling a function
- o when you're done looking at that process, reattach to the current
- thread and continue it
+ - when you're done looking at that process, reattach to the current
+ thread and continue it::
(UML gdb)
att 1
-
-
-
-
(UML gdb)
c
@@ -2569,12 +2495,13 @@
- 11.3. Running ddd on UML
+11.3. Running ddd on UML
+-------------------------
ddd works on UML, but requires a special kludge. The process goes
like this:
- o Start ddd
+ - Start ddd::
host% ddd linux
@@ -2583,14 +2510,14 @@
- o With ps, get the pid of the gdb that ddd started. You can ask the
+ - With ps, get the pid of the gdb that ddd started. You can ask the
gdb to tell you, but for some reason that confuses things and
causes a hang.
- o run UML with 'debug=parent gdb-pid=<pid>' added to the command line
+ - run UML with 'debug=parent gdb-pid=<pid>' added to the command line
- it will just sit there after you hit return
- o type 'att 1' to the ddd gdb and you will see something like
+ - type 'att 1' to the ddd gdb and you will see something like::
0xa013dc51 in __kill ()
@@ -2602,12 +2529,14 @@
- o At this point, type 'c', UML will boot up, and you can use ddd just
+ - At this point, type 'c', UML will boot up, and you can use ddd just
as you do on any other process.
- 11.4. Debugging modules
+11.4. Debugging modules
+------------------------
+
gdb has support for debugging code which is dynamically loaded into
the process. This support is what is needed to debug kernel modules
@@ -2629,7 +2558,8 @@
First, you must tell it where your modules are. There is a list in
- the script that looks like this:
+ the script that looks like this::
+
set MODULE_PATHS {
"fat" "/usr/src/uml/linux-2.4.18/fs/fat/fat.o"
"isofs" "/usr/src/uml/linux-2.4.18/fs/isofs/isofs.o"
@@ -2641,9 +2571,7 @@
You change that to list the names and paths of the modules that you
are going to debug. Then you run it from the toplevel directory of
- your UML pool and it basically tells you what to do:
-
-
+ your UML pool and it basically tells you what to do::
******** GDB pid is 21903 ********
@@ -2666,7 +2594,7 @@
After you run UML and it sits there doing nothing, you hit return at
- the 'att 1' and continue it:
+ the 'att 1' and continue it::
Attaching to program: /home/jdike/linux/2.4/um/./linux, process 1
@@ -2678,63 +2606,48 @@
At this point, you debug normally. When you insmod something, the
- expect magic will kick in and you'll see something like:
+ expect magic will kick in and you'll see something like::
+ *** Module hostfs loaded ***
+ Breakpoint 1, sys_init_module (name_user=0x805abb0 "hostfs",
+ mod_user=0x8070e00) at module.c:349
+ 349 char *name, *n_name, *name_tmp = NULL;
+ (UML gdb) finish
+ Run till exit from #0 sys_init_module (name_user=0x805abb0 "hostfs",
+ mod_user=0x8070e00) at module.c:349
+ 0xa00e2e23 in execute_syscall (r=0xa8140284) at syscall_kern.c:411
+ 411 else res = EXECUTE_SYSCALL(syscall, regs);
+ Value returned is $1 = 0
+ (UML gdb)
+ p/x (int)module_list + module_list->size_of_struct
+ $2 = 0xa9021054
+ (UML gdb) symbol-file ./linux
+ Load new symbol table from "./linux"? (y or n) y
+ Reading symbols from ./linux...
+ done.
+ (UML gdb)
+ add-symbol-file /home/jdike/linux/2.4/um/arch/um/fs/hostfs/hostfs.o 0xa9021054
+ add symbol table from file "/home/jdike/linux/2.4/um/arch/um/fs/hostfs/hostfs.o" at
+ .text_addr = 0xa9021054
+ (y or n) y
-
-
-
-
-
-
-
-
-
-
-
-
- *** Module hostfs loaded ***
- Breakpoint 1, sys_init_module (name_user=0x805abb0 "hostfs",
- mod_user=0x8070e00) at module.c:349
- 349 char *name, *n_name, *name_tmp = NULL;
- (UML gdb) finish
- Run till exit from #0 sys_init_module (name_user=0x805abb0 "hostfs",
- mod_user=0x8070e00) at module.c:349
- 0xa00e2e23 in execute_syscall (r=0xa8140284) at syscall_kern.c:411
- 411 else res = EXECUTE_SYSCALL(syscall, regs);
- Value returned is $1 = 0
- (UML gdb)
- p/x (int)module_list + module_list->size_of_struct
-
- $2 = 0xa9021054
- (UML gdb) symbol-file ./linux
- Load new symbol table from "./linux"? (y or n) y
- Reading symbols from ./linux...
- done.
- (UML gdb)
- add-symbol-file /home/jdike/linux/2.4/um/arch/um/fs/hostfs/hostfs.o 0xa9021054
-
- add symbol table from file "/home/jdike/linux/2.4/um/arch/um/fs/hostfs/hostfs.o" at
- .text_addr = 0xa9021054
- (y or n) y
-
- Reading symbols from /home/jdike/linux/2.4/um/arch/um/fs/hostfs/hostfs.o...
- done.
- (UML gdb) p *module_list
- $1 = {size_of_struct = 84, next = 0xa0178720, name = 0xa9022de0 "hostfs",
- size = 9016, uc = {usecount = {counter = 0}, pad = 0}, flags = 1,
- nsyms = 57, ndeps = 0, syms = 0xa9023170, deps = 0x0, refs = 0x0,
- init = 0xa90221f0 <init_hostfs>, cleanup = 0xa902222c <exit_hostfs>,
- ex_table_start = 0x0, ex_table_end = 0x0, persist_start = 0x0,
- persist_end = 0x0, can_unload = 0, runsize = 0, kallsyms_start = 0x0,
- kallsyms_end = 0x0,
- archdata_start = 0x1b855 <Address 0x1b855 out of bounds>,
- archdata_end = 0xe5890000 <Address 0xe5890000 out of bounds>,
- kernel_data = 0xf689c35d <Address 0xf689c35d out of bounds>}
- >> Finished loading symbols for hostfs ...
+ Reading symbols from /home/jdike/linux/2.4/um/arch/um/fs/hostfs/hostfs.o...
+ done.
+ (UML gdb) p *module_list
+ $1 = {size_of_struct = 84, next = 0xa0178720, name = 0xa9022de0 "hostfs",
+ size = 9016, uc = {usecount = {counter = 0}, pad = 0}, flags = 1,
+ nsyms = 57, ndeps = 0, syms = 0xa9023170, deps = 0x0, refs = 0x0,
+ init = 0xa90221f0 <init_hostfs>, cleanup = 0xa902222c <exit_hostfs>,
+ ex_table_start = 0x0, ex_table_end = 0x0, persist_start = 0x0,
+ persist_end = 0x0, can_unload = 0, runsize = 0, kallsyms_start = 0x0,
+ kallsyms_end = 0x0,
+ archdata_start = 0x1b855 <Address 0x1b855 out of bounds>,
+ archdata_end = 0xe5890000 <Address 0xe5890000 out of bounds>,
+ kernel_data = 0xf689c35d <Address 0xf689c35d out of bounds>}
+ >> Finished loading symbols for hostfs ...
@@ -2744,7 +2657,7 @@
Boot the kernel under the debugger and load the module with insmod or
- modprobe. With gdb, do:
+ modprobe. With gdb, do::
(UML gdb) p module_list
@@ -2758,12 +2671,12 @@
the name fields until find the module you want to debug. Take the
address of that structure, and add module.size_of_struct (which in
2.4.10 kernels is 96 (0x60)) to it. Gdb can make this hard addition
- for you :-):
+ for you :-)::
- (UML gdb)
- printf "%#x\n", (int)module_list module_list->size_of_struct
+ (UML gdb)
+ printf "%#x\n", (int)module_list module_list->size_of_struct
@@ -2771,7 +2684,7 @@
The offset from the module start occasionally changes (before 2.4.0,
it was module.size_of_struct + 4), so it's a good idea to check the
init and cleanup addresses once in a while, as describe below. Now
- do:
+ do::
(UML gdb)
@@ -2786,7 +2699,7 @@
If there's any doubt that you got the offset right, like breakpoints
appear not to work, or they're appearing in the wrong place, you can
check it by looking at the module structure. The init and cleanup
- fields should look like:
+ fields should look like::
init = 0x588066b0 <init_hostfs>, cleanup = 0x588066c0 <exit_hostfs>
@@ -2801,7 +2714,7 @@
When you want to load in a new version of the module, you need to get
gdb to forget about the old one. The only way I've found to do that
- is to tell gdb to forget about all symbols that it knows about:
+ is to tell gdb to forget about all symbols that it knows about::
(UML gdb) symbol-file
@@ -2809,7 +2722,7 @@
- Then reload the symbols from the kernel binary:
+ Then reload the symbols from the kernel binary::
(UML gdb) symbol-file /path/to/kernel
@@ -2823,17 +2736,19 @@
- 11.5. Attaching gdb to the kernel
+11.5. Attaching gdb to the kernel
+----------------------------------
If you don't have the kernel running under gdb, you can attach gdb to
it later by sending the tracing thread a SIGUSR1. The first line of
- the console output identifies its pid:
+ the console output identifies its pid::
+
tracing thread pid = 20093
- When you send it the signal:
+ When you send it the signal::
host% kill -USR1 20093
@@ -2845,7 +2760,7 @@
If you have the mconsole compiled into UML, then the mconsole client
- can be used to start gdb:
+ can be used to start gdb::
(mconsole) (mconsole) config gdb=xterm
@@ -2857,7 +2772,8 @@
- 11.6. Using alternate debuggers
+11.6. Using alternate debuggers
+--------------------------------
UML has support for attaching to an already running debugger rather
than starting gdb itself. This is present in CVS as of 17 Apr 2001.
@@ -2886,7 +2802,7 @@
An example of an alternate debugger is strace. You can strace the
actual kernel as follows:
- o Run the following in a shell
+ - Run the following in a shell::
host%
@@ -2894,13 +2810,13 @@
- o Run UML with 'debug' and 'gdb-pid=<pid>' with the pid printed out
+ - Run UML with 'debug' and 'gdb-pid=<pid>' with the pid printed out
by the previous command
- o Hit return in the shell, and UML will start running, and strace
+ - Hit return in the shell, and UML will start running, and strace
output will start accumulating in the output file.
- Note that this is different from running
+ Note that this is different from running::
host% strace ./linux
@@ -2917,95 +2833,57 @@
- 12. Kernel debugging examples
+12. Kernel debugging examples
+==============================
- 12.1. The case of the hung fsck
+12.1. The case of the hung fsck
+--------------------------------
When booting up the kernel, fsck failed, and dropped me into a shell
- to fix things up. I ran fsck -y, which hung:
+ to fix things up. I ran fsck -y, which hung::
+ Setting hostname uml [ OK ]
+ Checking root filesystem
+ /dev/fhd0 was not cleanly unmounted, check forced.
+ Error reading block 86894 (Attempt to read block from filesystem resulted in short read) while reading indirect blocks of inode 19780.
+ /dev/fhd0: UNEXPECTED INCONSISTENCY; RUN fsck MANUALLY.
+ (i.e., without -a or -p options)
+ [ FAILED ]
+ *** An error occurred during the file system check.
+ *** Dropping you to a shell; the system will reboot
+ *** when you leave the shell.
+ Give root password for maintenance
+ (or type Control-D for normal startup):
+ [root@uml /root]# fsck -y /dev/fhd0
+ fsck -y /dev/fhd0
+ Parallelizing fsck version 1.14 (9-Jan-1999)
+ e2fsck 1.14, 9-Jan-1999 for EXT2 FS 0.5b, 95/08/09
+ /dev/fhd0 contains a file system with errors, check forced.
+ Pass 1: Checking inodes, blocks, and sizes
+ Error reading block 86894 (Attempt to read block from filesystem resulted in short read) while reading indirect blocks of inode 19780. Ignore error? yes
+ Inode 19780, i_blocks is 1548, should be 540. Fix? yes
+ Pass 2: Checking directory structure
+ Error reading block 49405 (Attempt to read block from filesystem resulted in short read). Ignore error? yes
+ Directory inode 11858, block 0, offset 0: directory corrupted
+ Salvage? yes
+ Missing '.' in directory inode 11858.
+ Fix? yes
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- Setting hostname uml [ OK ]
- Checking root filesystem
- /dev/fhd0 was not cleanly unmounted, check forced.
- Error reading block 86894 (Attempt to read block from filesystem resulted in short read) while reading indirect blocks of inode 19780.
-
- /dev/fhd0: UNEXPECTED INCONSISTENCY; RUN fsck MANUALLY.
- (i.e., without -a or -p options)
- [ FAILED ]
-
- *** An error occurred during the file system check.
- *** Dropping you to a shell; the system will reboot
- *** when you leave the shell.
- Give root password for maintenance
- (or type Control-D for normal startup):
-
- [root@uml /root]# fsck -y /dev/fhd0
- fsck -y /dev/fhd0
- Parallelizing fsck version 1.14 (9-Jan-1999)
- e2fsck 1.14, 9-Jan-1999 for EXT2 FS 0.5b, 95/08/09
- /dev/fhd0 contains a file system with errors, check forced.
- Pass 1: Checking inodes, blocks, and sizes
- Error reading block 86894 (Attempt to read block from filesystem resulted in short read) while reading indirect blocks of inode 19780. Ignore error? yes
-
- Inode 19780, i_blocks is 1548, should be 540. Fix? yes
-
- Pass 2: Checking directory structure
- Error reading block 49405 (Attempt to read block from filesystem resulted in short read). Ignore error? yes
-
- Directory inode 11858, block 0, offset 0: directory corrupted
- Salvage? yes
-
- Missing '.' in directory inode 11858.
- Fix? yes
-
- Missing '..' in directory inode 11858.
- Fix? yes
-
-
-
+ Missing '..' in directory inode 11858.
+ Fix? yes
The standard drill in this sort of situation is to fire up gdb on the
signal thread, which, in this case, was pid 1935. In another window,
- I run gdb and attach pid 1935.
-
-
+ I run gdb and attach pid 1935::
~/linux/2.3.26/um 1016: gdb linux
@@ -3022,11 +2900,7 @@
0x100756d9 in __wait4 ()
-
-
-
-
- Let's see what's currently running:
+ Let's see what's currently running::
@@ -3041,7 +2915,7 @@
reason and never woke up.
- Let's guess that the last process in the process list is fsck:
+ Let's guess that the last process in the process list is fsck::
@@ -3052,7 +2926,7 @@
- It is, so let's see what it thinks it's up to:
+ It is, so let's see what it thinks it's up to::
@@ -3068,8 +2942,6 @@
-
-
The interesting things here are the fact that its .thread.syscall.id
is __NR_write (see the big switch in arch/um/kernel/syscall_kern.c or
the defines in include/asm-um/arch/unistd.h), and that it never
@@ -3081,30 +2953,20 @@
The fact that it never returned from write means that its stack should
be fairly interesting. Its pid is 1980 (.thread.extern_pid). That
process is being ptraced by the signal thread, so it must be detached
- before gdb can attach it:
+ before gdb can attach it::
+ (gdb) call detach(1980)
-
-
-
-
-
-
- (gdb) call detach(1980)
-
- Program received signal SIGSEGV, Segmentation fault.
- <function called from gdb>
- The program being debugged stopped while in a function called from GDB.
- When the function (detach) is done executing, GDB will silently
- stop (instead of continuing to evaluate the expression containing
- the function call).
- (gdb) call detach(1980)
- $15 = 0
-
-
-
+ Program received signal SIGSEGV, Segmentation fault.
+ <function called from gdb>
+ The program being debugged stopped while in a function called from GDB.
+ When the function (detach) is done executing, GDB will silently
+ stop (instead of continuing to evaluate the expression containing
+ the function call).
+ (gdb) call detach(1980)
+ $15 = 0
The first detach segfaults for some reason, and the second one
@@ -3112,7 +2974,7 @@
Now I detach from the signal thread, attach to the fsck thread, and
- look at its stack:
+ look at its stack::
(gdb) det
@@ -3152,14 +3014,14 @@
- The interesting things here are :
+ The interesting things here are:
- o There are two segfaults on this stack (frames 9 and 14)
+ - There are two segfaults on this stack (frames 9 and 14)
- o The first faulting address (frame 11) is 0x50000800
+ - The first faulting address (frame 11) is 0x50000800::
- (gdb) p (void *)1342179328
- $16 = (void *) 0x50000800
+ (gdb) p (void *)1342179328
+ $16 = (void *) 0x50000800
@@ -3175,7 +3037,7 @@
However, the more immediate problem is that second segfault and I'm
going to concentrate on that. First, I want to see where the fault
- happened, so I have to go look at the sigcontent struct in frame 8:
+ happened, so I have to go look at the sigcontent struct in frame 8::
@@ -3211,7 +3073,7 @@
- That's not very useful, so I'll try a more manual method:
+ That's not very useful, so I'll try a more manual method::
(gdb) p *((struct sigcontext *) (&sig + 1))
@@ -3224,7 +3086,7 @@
- The ip is in handle_mm_fault:
+ The ip is in handle_mm_fault::
(gdb) p (void *)268480945
@@ -3236,7 +3098,7 @@
- Specifically, it's in pte_alloc:
+ Specifically, it's in pte_alloc::
(gdb) i line *$20
@@ -3249,7 +3111,7 @@
To find where in handle_mm_fault this is, I'll jump forward in the
- code until I see an address in that procedure:
+ code until I see an address in that procedure::
@@ -3286,21 +3148,21 @@
Something is apparently wrong with the page tables or vma_structs, so
- lets go back to frame 11 and have a look at them:
+ lets go back to frame 11 and have a look at them::
- #11 0x1006c0aa in segv (address=1342179328, is_write=2) at trap_kern.c:50
- 50 handle_mm_fault(current, vma, address, is_write);
- (gdb) call pgd_offset_proc(vma->vm_mm, address)
- $22 = (pgd_t *) 0x80a548c
+ #11 0x1006c0aa in segv (address=1342179328, is_write=2) at trap_kern.c:50
+ 50 handle_mm_fault(current, vma, address, is_write);
+ (gdb) call pgd_offset_proc(vma->vm_mm, address)
+ $22 = (pgd_t *) 0x80a548c
That's pretty bogus. Page tables aren't supposed to be in process
- text or data areas. Let's see what's in the vma:
+ text or data areas. Let's see what's in the vma::
(gdb) p *vma
@@ -3325,12 +3187,9 @@
-
-
This also pretty bogus. With all of the 0x80xxxxx and 0xaffffxxx
addresses, this is looking like a stack was plonked down on top of
- these structures. Maybe it's a stack overflow from the next page:
-
+ these structures. Maybe it's a stack overflow from the next page::
(gdb) p vma
@@ -3338,52 +3197,36 @@
-
-
That's towards the lower quarter of the page, so that would have to
- have been pretty heavy stack overflow:
+ have been pretty heavy stack overflow::
-
-
-
-
-
-
-
-
-
-
-
-
- (gdb) x/100x $25
- 0x507d2434: 0x507d2434 0x00000000 0x08048000 0x080a4f8c
- 0x507d2444: 0x00000000 0x080a79e0 0x080a8c94 0x080d1000
- 0x507d2454: 0xaffffdb0 0xaffffe63 0xaffffe7a 0xaffffe7a
- 0x507d2464: 0xafffffec 0x00000062 0x0000008a 0x00000000
- 0x507d2474: 0x00000000 0x00000000 0x00000000 0x00000000
- 0x507d2484: 0x00000000 0x00000000 0x00000000 0x00000000
- 0x507d2494: 0x00000000 0x00000000 0x507d2fe0 0x00000000
- 0x507d24a4: 0x00000000 0x00000000 0x00000000 0x00000000
- 0x507d24b4: 0x00000000 0x00000000 0x00000000 0x00000000
- 0x507d24c4: 0x00000000 0x00000000 0x00000000 0x00000000
- 0x507d24d4: 0x00000000 0x00000000 0x00000000 0x00000000
- 0x507d24e4: 0x00000000 0x00000000 0x00000000 0x00000000
- 0x507d24f4: 0x00000000 0x00000000 0x00000000 0x00000000
- 0x507d2504: 0x00000000 0x00000000 0x00000000 0x00000000
- 0x507d2514: 0x00000000 0x00000000 0x00000000 0x00000000
- 0x507d2524: 0x00000000 0x00000000 0x00000000 0x00000000
- 0x507d2534: 0x00000000 0x00000000 0x507d25dc 0x00000000
- 0x507d2544: 0x00000000 0x00000000 0x00000000 0x00000000
- 0x507d2554: 0x00000000 0x00000000 0x00000000 0x00000000
- 0x507d2564: 0x00000000 0x00000000 0x00000000 0x00000000
- 0x507d2574: 0x00000000 0x00000000 0x00000000 0x00000000
- 0x507d2584: 0x00000000 0x00000000 0x00000000 0x00000000
- 0x507d2594: 0x00000000 0x00000000 0x00000000 0x00000000
- 0x507d25a4: 0x00000000 0x00000000 0x00000000 0x00000000
- 0x507d25b4: 0x00000000 0x00000000 0x00000000 0x00000000
-
-
+ (gdb) x/100x $25
+ 0x507d2434: 0x507d2434 0x00000000 0x08048000 0x080a4f8c
+ 0x507d2444: 0x00000000 0x080a79e0 0x080a8c94 0x080d1000
+ 0x507d2454: 0xaffffdb0 0xaffffe63 0xaffffe7a 0xaffffe7a
+ 0x507d2464: 0xafffffec 0x00000062 0x0000008a 0x00000000
+ 0x507d2474: 0x00000000 0x00000000 0x00000000 0x00000000
+ 0x507d2484: 0x00000000 0x00000000 0x00000000 0x00000000
+ 0x507d2494: 0x00000000 0x00000000 0x507d2fe0 0x00000000
+ 0x507d24a4: 0x00000000 0x00000000 0x00000000 0x00000000
+ 0x507d24b4: 0x00000000 0x00000000 0x00000000 0x00000000
+ 0x507d24c4: 0x00000000 0x00000000 0x00000000 0x00000000
+ 0x507d24d4: 0x00000000 0x00000000 0x00000000 0x00000000
+ 0x507d24e4: 0x00000000 0x00000000 0x00000000 0x00000000
+ 0x507d24f4: 0x00000000 0x00000000 0x00000000 0x00000000
+ 0x507d2504: 0x00000000 0x00000000 0x00000000 0x00000000
+ 0x507d2514: 0x00000000 0x00000000 0x00000000 0x00000000
+ 0x507d2524: 0x00000000 0x00000000 0x00000000 0x00000000
+ 0x507d2534: 0x00000000 0x00000000 0x507d25dc 0x00000000
+ 0x507d2544: 0x00000000 0x00000000 0x00000000 0x00000000
+ 0x507d2554: 0x00000000 0x00000000 0x00000000 0x00000000
+ 0x507d2564: 0x00000000 0x00000000 0x00000000 0x00000000
+ 0x507d2574: 0x00000000 0x00000000 0x00000000 0x00000000
+ 0x507d2584: 0x00000000 0x00000000 0x00000000 0x00000000
+ 0x507d2594: 0x00000000 0x00000000 0x00000000 0x00000000
+ 0x507d25a4: 0x00000000 0x00000000 0x00000000 0x00000000
+ 0x507d25b4: 0x00000000 0x00000000 0x00000000 0x00000000
@@ -3399,65 +3242,53 @@
on will be somewhat clearer.
- 12.2. Episode 2: The case of the hung fsck
+12.2. Episode 2: The case of the hung fsck
+-------------------------------------------
After setting a trap in the SEGV handler for accesses to the signal
thread's stack, I reran the kernel.
- fsck hung again, this time by hitting the trap:
+ fsck hung again, this time by hitting the trap::
+ Setting hostname uml [ OK ]
+ Checking root filesystem
+ /dev/fhd0 contains a file system with errors, check forced.
+ Error reading block 86894 (Attempt to read block from filesystem resulted in short read) while reading indirect blocks of inode 19780.
+ /dev/fhd0: UNEXPECTED INCONSISTENCY; RUN fsck MANUALLY.
+ (i.e., without -a or -p options)
+ [ FAILED ]
+ *** An error occurred during the file system check.
+ *** Dropping you to a shell; the system will reboot
+ *** when you leave the shell.
+ Give root password for maintenance
+ (or type Control-D for normal startup):
+ [root@uml /root]# fsck -y /dev/fhd0
+ fsck -y /dev/fhd0
+ Parallelizing fsck version 1.14 (9-Jan-1999)
+ e2fsck 1.14, 9-Jan-1999 for EXT2 FS 0.5b, 95/08/09
+ /dev/fhd0 contains a file system with errors, check forced.
+ Pass 1: Checking inodes, blocks, and sizes
+ Error reading block 86894 (Attempt to read block from filesystem resulted in short read) while reading indirect blocks of inode 19780. Ignore error? yes
+ Pass 2: Checking directory structure
+ Error reading block 49405 (Attempt to read block from filesystem resulted in short read). Ignore error? yes
+ Directory inode 11858, block 0, offset 0: directory corrupted
+ Salvage? yes
+ Missing '.' in directory inode 11858.
+ Fix? yes
+ Missing '..' in directory inode 11858.
+ Fix? yes
-
-
-
-
-
- Setting hostname uml [ OK ]
- Checking root filesystem
- /dev/fhd0 contains a file system with errors, check forced.
- Error reading block 86894 (Attempt to read block from filesystem resulted in short read) while reading indirect blocks of inode 19780.
-
- /dev/fhd0: UNEXPECTED INCONSISTENCY; RUN fsck MANUALLY.
- (i.e., without -a or -p options)
- [ FAILED ]
-
- *** An error occurred during the file system check.
- *** Dropping you to a shell; the system will reboot
- *** when you leave the shell.
- Give root password for maintenance
- (or type Control-D for normal startup):
-
- [root@uml /root]# fsck -y /dev/fhd0
- fsck -y /dev/fhd0
- Parallelizing fsck version 1.14 (9-Jan-1999)
- e2fsck 1.14, 9-Jan-1999 for EXT2 FS 0.5b, 95/08/09
- /dev/fhd0 contains a file system with errors, check forced.
- Pass 1: Checking inodes, blocks, and sizes
- Error reading block 86894 (Attempt to read block from filesystem resulted in short read) while reading indirect blocks of inode 19780. Ignore error? yes
-
- Pass 2: Checking directory structure
- Error reading block 49405 (Attempt to read block from filesystem resulted in short read). Ignore error? yes
-
- Directory inode 11858, block 0, offset 0: directory corrupted
- Salvage? yes
-
- Missing '.' in directory inode 11858.
- Fix? yes
-
- Missing '..' in directory inode 11858.
- Fix? yes
-
- Untested (4127) [100fe44c]: trap_kern.c line 31
+ Untested (4127) [100fe44c]: trap_kern.c line 31
@@ -3465,7 +3296,7 @@
I need to get the signal thread to detach from pid 4127 so that I can
attach to it with gdb. This is done by sending it a SIGUSR1, which is
- caught by the signal thread, which detaches the process:
+ caught by the signal thread, which detaches the process::
kill -USR1 4127
@@ -3474,31 +3305,20 @@
- Now I can run gdb on it:
+ Now I can run gdb on it::
-
-
-
-
-
-
-
-
-
-
-
- ~/linux/2.3.26/um 1034: gdb linux
- GNU gdb 4.17.0.11 with Linux support
- Copyright 1998 Free Software Foundation, Inc.
- GDB is free software, covered by the GNU General Public License, and you are
- welcome to change it and/or distribute copies of it under certain conditions.
- Type "show copying" to see the conditions.
- There is absolutely no warranty for GDB. Type "show warranty" for details.
- This GDB was configured as "i386-redhat-linux"...
- (gdb) att 4127
- Attaching to program `/home/dike/linux/2.3.26/um/linux', Pid 4127
- 0x10075891 in __libc_nanosleep ()
+ ~/linux/2.3.26/um 1034: gdb linux
+ GNU gdb 4.17.0.11 with Linux support
+ Copyright 1998 Free Software Foundation, Inc.
+ GDB is free software, covered by the GNU General Public License, and you are
+ welcome to change it and/or distribute copies of it under certain conditions.
+ Type "show copying" to see the conditions.
+ There is absolutely no warranty for GDB. Type "show warranty" for details.
+ This GDB was configured as "i386-redhat-linux"...
+ (gdb) att 4127
+ Attaching to program `/home/dike/linux/2.3.26/um/linux', Pid 4127
+ 0x10075891 in __libc_nanosleep ()
@@ -3506,7 +3326,7 @@
The backtrace shows that it was in a write and that the fault address
(address in frame 3) is 0x50000800, which is right in the middle of
- the signal thread's stack page:
+ the signal thread's stack page::
(gdb) bt
@@ -3540,58 +3360,48 @@
-
-
Going up the stack to the segv_handler frame and looking at where in
the code the access happened shows that it happened near line 110 of
- block_dev.c:
+ block_dev.c::
-
-
-
-
-
-
- (gdb) up
- #1 0x1007584d in __sleep (seconds=1000000)
- at ../sysdeps/unix/sysv/linux/sleep.c:78
- ../sysdeps/unix/sysv/linux/sleep.c:78: No such file or directory.
- (gdb)
- #2 0x1006ce9a in stop () at user_util.c:191
- 191 while(1) sleep(1000000);
- (gdb)
- #3 0x1006bf88 in segv (address=1342179328, is_write=2) at trap_kern.c:31
- 31 KERN_UNTESTED();
- (gdb)
- #4 0x1006c628 in segv_handler (sc=0x5006eaf8) at trap_user.c:174
- 174 segv(sc->cr2, sc->err & 2);
- (gdb) p *sc
- $1 = {gs = 0, __gsh = 0, fs = 0, __fsh = 0, es = 43, __esh = 0, ds = 43,
- __dsh = 0, edi = 1342179328, esi = 134973440, ebp = 1342631484,
- esp = 1342630864, ebx = 256, edx = 0, ecx = 256, eax = 1024, trapno = 14,
- err = 6, eip = 268550834, cs = 35, __csh = 0, eflags = 66070,
- esp_at_signal = 1342630864, ss = 43, __ssh = 0, fpstate = 0x0, oldmask = 0,
- cr2 = 1342179328}
- (gdb) p (void *)268550834
- $2 = (void *) 0x1001c2b2
- (gdb) i sym $2
- block_write + 1090 in section .text
- (gdb) i line *$2
- Line 209 of "/home/dike/linux/2.3.26/um/include/asm/arch/string.h"
- starts at address 0x1001c2a1 <block_write+1073>
- and ends at 0x1001c2bf <block_write+1103>.
- (gdb) i line *0x1001c2c0
- Line 110 of "block_dev.c" starts at address 0x1001c2bf <block_write+1103>
- and ends at 0x1001c2e3 <block_write+1139>.
-
-
+ (gdb) up
+ #1 0x1007584d in __sleep (seconds=1000000)
+ at ../sysdeps/unix/sysv/linux/sleep.c:78
+ ../sysdeps/unix/sysv/linux/sleep.c:78: No such file or directory.
+ (gdb)
+ #2 0x1006ce9a in stop () at user_util.c:191
+ 191 while(1) sleep(1000000);
+ (gdb)
+ #3 0x1006bf88 in segv (address=1342179328, is_write=2) at trap_kern.c:31
+ 31 KERN_UNTESTED();
+ (gdb)
+ #4 0x1006c628 in segv_handler (sc=0x5006eaf8) at trap_user.c:174
+ 174 segv(sc->cr2, sc->err & 2);
+ (gdb) p *sc
+ $1 = {gs = 0, __gsh = 0, fs = 0, __fsh = 0, es = 43, __esh = 0, ds = 43,
+ __dsh = 0, edi = 1342179328, esi = 134973440, ebp = 1342631484,
+ esp = 1342630864, ebx = 256, edx = 0, ecx = 256, eax = 1024, trapno = 14,
+ err = 6, eip = 268550834, cs = 35, __csh = 0, eflags = 66070,
+ esp_at_signal = 1342630864, ss = 43, __ssh = 0, fpstate = 0x0, oldmask = 0,
+ cr2 = 1342179328}
+ (gdb) p (void *)268550834
+ $2 = (void *) 0x1001c2b2
+ (gdb) i sym $2
+ block_write + 1090 in section .text
+ (gdb) i line *$2
+ Line 209 of "/home/dike/linux/2.3.26/um/include/asm/arch/string.h"
+ starts at address 0x1001c2a1 <block_write+1073>
+ and ends at 0x1001c2bf <block_write+1103>.
+ (gdb) i line *0x1001c2c0
+ Line 110 of "block_dev.c" starts at address 0x1001c2bf <block_write+1103>
+ and ends at 0x1001c2e3 <block_write+1139>.
Looking at the source shows that the fault happened during a call to
- copy_from_user to copy the data into the kernel:
+ copy_from_user to copy the data into the kernel::
107 count -= chars;
@@ -3601,10 +3411,8 @@
-
-
p is the pointer which must contain 0x50000800, since buf contains
- 0x80b8800 (frame 8 above). It is defined as:
+ 0x80b8800 (frame 8 above). It is defined as::
p = offset + bh->b_data;
@@ -3615,24 +3423,22 @@
I need to figure out what bh is, and it just so happens that bh is
passed as an argument to mark_buffer_uptodate and mark_buffer_dirty a
- few lines later, so I do a little disassembly:
+ few lines later, so I do a little disassembly::
-
-
- (gdb) disas 0x1001c2bf 0x1001c2e0
- Dump of assembler code from 0x1001c2bf to 0x1001c2d0:
- 0x1001c2bf <block_write+1103>: addl %eax,0xc(%ebp)
- 0x1001c2c2 <block_write+1106>: movl 0xfffffdd4(%ebp),%edx
- 0x1001c2c8 <block_write+1112>: btsl $0x0,0x18(%edx)
- 0x1001c2cd <block_write+1117>: btsl $0x1,0x18(%edx)
- 0x1001c2d2 <block_write+1122>: sbbl %ecx,%ecx
- 0x1001c2d4 <block_write+1124>: testl %ecx,%ecx
- 0x1001c2d6 <block_write+1126>: jne 0x1001c2e3 <block_write+1139>
- 0x1001c2d8 <block_write+1128>: pushl $0x0
- 0x1001c2da <block_write+1130>: pushl %edx
- 0x1001c2db <block_write+1131>: call 0x1001819c <__mark_buffer_dirty>
- End of assembler dump.
+ (gdb) disas 0x1001c2bf 0x1001c2e0
+ Dump of assembler code from 0x1001c2bf to 0x1001c2d0:
+ 0x1001c2bf <block_write+1103>: addl %eax,0xc(%ebp)
+ 0x1001c2c2 <block_write+1106>: movl 0xfffffdd4(%ebp),%edx
+ 0x1001c2c8 <block_write+1112>: btsl $0x0,0x18(%edx)
+ 0x1001c2cd <block_write+1117>: btsl $0x1,0x18(%edx)
+ 0x1001c2d2 <block_write+1122>: sbbl %ecx,%ecx
+ 0x1001c2d4 <block_write+1124>: testl %ecx,%ecx
+ 0x1001c2d6 <block_write+1126>: jne 0x1001c2e3 <block_write+1139>
+ 0x1001c2d8 <block_write+1128>: pushl $0x0
+ 0x1001c2da <block_write+1130>: pushl %edx
+ 0x1001c2db <block_write+1131>: call 0x1001819c <__mark_buffer_dirty>
+ End of assembler dump.
@@ -3640,7 +3446,7 @@
At that point, bh is in %edx (address 0x1001c2da), which is calculated
at 0x1001c2c2 as %ebp + 0xfffffdd4, so I figure exactly what that is,
- taking %ebp from the sigcontext_struct above:
+ taking %ebp from the sigcontext_struct above::
(gdb) p (void *)1342631484
@@ -3657,7 +3463,7 @@
Now, I look at the structure to see what's in it, and particularly,
- what its b_data field contains:
+ what its b_data field contains::
(gdb) p *((struct buffer_head *)0x50100200)
@@ -3682,18 +3488,18 @@
The b_page field is a pointer to the page_struct representing the
0x50000000 page. Looking at it shows the kernel's idea of the state
- of that page:
+ of that page::
- (gdb) p *$13.b_page
- $17 = {list = {next = 0x50004a5c, prev = 0x100c5174}, mapping = 0x0,
- index = 0, next_hash = 0x0, count = {counter = 1}, flags = 132, lru = {
- next = 0x50008460, prev = 0x50019350}, wait = {
- lock = <optimized out or zero length>, task_list = {next = 0x50004024,
- prev = 0x50004024}, __magic = 1342193708, __creator = 0},
- pprev_hash = 0x0, buffers = 0x501002c0, virtual = 1342177280,
- zone = 0x100c5160}
+ (gdb) p *$13.b_page
+ $17 = {list = {next = 0x50004a5c, prev = 0x100c5174}, mapping = 0x0,
+ index = 0, next_hash = 0x0, count = {counter = 1}, flags = 132, lru = {
+ next = 0x50008460, prev = 0x50019350}, wait = {
+ lock = <optimized out or zero length>, task_list = {next = 0x50004024,
+ prev = 0x50004024}, __magic = 1342193708, __creator = 0},
+ pprev_hash = 0x0, buffers = 0x501002c0, virtual = 1342177280,
+ zone = 0x100c5160}
@@ -3702,7 +3508,7 @@
Some sanity-checking: the virtual field shows the "virtual" address of
this page, which in this kernel is the same as its "physical" address,
and the page_struct itself should be mem_map[0], since it represents
- the first page of memory:
+ the first page of memory::
@@ -3719,7 +3525,7 @@
Now to check out the page_struct itself. In particular, the flags
- field shows whether the page is considered free or not:
+ field shows whether the page is considered free or not::
(gdb) p (void *)132
@@ -3739,7 +3545,7 @@
In my setup_arch procedure, I have the following code which looks just
- fine:
+ fine::
@@ -3762,7 +3568,7 @@
Stepping into init_bootmem, and looking at bootmem_map before looking
- at what it contains shows the following:
+ at what it contains shows the following::
@@ -3788,18 +3594,20 @@
- 13. What to do when UML doesn't work
+13. What to do when UML doesn't work
+=====================================
- 13.1. Strange compilation errors when you build from source
+13.1. Strange compilation errors when you build from source
+------------------------------------------------------------
As of test11, it is necessary to have "ARCH=um" in the environment or
on the make command line for all steps in building UML, including
clean, distclean, or mrproper, config, menuconfig, or xconfig, dep,
and linux. If you forget for any of them, the i386 build seems to
- contaminate the UML build. If this happens, start from scratch with
+ contaminate the UML build. If this happens, start from scratch with::
host%
@@ -3811,7 +3619,7 @@
and repeat the build process with ARCH=um on all the steps.
- See ``Compiling the kernel and modules'' for more details.
+ See :ref:`Compiling_the_kernel_and_modules` for more details.
Another cause of strange compilation errors is building UML in
@@ -3824,11 +3632,11 @@
- 13.3. A variety of panics and hangs with /tmp on a reiserfs filesys-
- tem
+13.3. A variety of panics and hangs with /tmp on a reiserfs filesystem
+-----------------------------------------------------------------------
I saw this on reiserfs 3.5.21 and it seems to be fixed in 3.5.27.
- Panics preceded by
+ Panics preceded by::
Detaching pid nnnn
@@ -3854,17 +3662,19 @@
- 13.5. UML doesn't work when /tmp is an NFS filesystem
+13.5. UML doesn't work when /tmp is an NFS filesystem
+------------------------------------------------------
This seems to be a similar situation with the ReiserFS problem above.
Some versions of NFS seems not to handle mmap correctly, which UML
depends on. The workaround is have /tmp be a non-NFS directory.
- 13.6. UML hangs on boot when compiled with gprof support
+13.6. UML hangs on boot when compiled with gprof support
+---------------------------------------------------------
If you build UML with gprof support and, early in the boot, it does
- this
+ this::
kernel BUG at page_alloc.c:100!
@@ -3878,10 +3688,11 @@
- 13.7. syslogd dies with a SIGTERM on startup
+13.7. syslogd dies with a SIGTERM on startup
+---------------------------------------------
The exact boot error depends on the distribution that you're booting,
- but Debian produces this:
+ but Debian produces this::
/etc/rc2.d/S10sysklogd: line 49: 93 Terminated
@@ -3891,23 +3702,21 @@
This is a syslogd bug. There's a race between a parent process
- installing a signal handler and its child sending the signal. See
- this uml-devel post <http://www.geocrawler.com/lists/3/Source-
- Forge/709/0/6612801> for the details.
+ installing a signal handler and its child sending the signal.
- 13.8. TUN/TAP networking doesn't work on a 2.4 host
+13.8. TUN/TAP networking doesn't work on a 2.4 host
+----------------------------------------------------
- There are a couple of problems which were
- <http://www.geocrawler.com/lists/3/SourceForge/597/0/> name="pointed
- out"> by Tim Robinson <timro at trkr dot net>
+ There are a couple of problems which were reported by
+ Tim Robinson <timro at trkr dot net>
- o It doesn't work on hosts running 2.4.7 (or thereabouts) or earlier.
+ - It doesn't work on hosts running 2.4.7 (or thereabouts) or earlier.
The fix is to upgrade to something more recent and then read the
next item.
- o If you see
+ - If you see::
File descriptor in bad state
@@ -3921,8 +3730,8 @@
- 13.9. You can network to the host but not to other machines on the
- net
+13.9. You can network to the host but not to other machines on the net
+=======================================================================
If you can connect to the host, and the host can connect to UML, but
you cannot connect to any other machines, then you may need to enable
@@ -3930,7 +3739,7 @@
using private IP addresses (192.168.x.x or 10.x.x.x) for host/UML
networking, rather than the public address space that your host is
connected to. UML does not enable IP Masquerading, so you will need
- to create a static rule to enable it:
+ to create a static rule to enable it::
host%
@@ -3944,11 +3753,11 @@
Documentation on IP Masquerading, and SNAT, can be found at
- www.netfilter.org <http://www.netfilter.org> .
+ http://www.netfilter.org.
If you can reach the local net, but not the outside Internet, then
- that is usually a routing problem. The UML needs a default route:
+ that is usually a routing problem. The UML needs a default route::
UML#
@@ -3972,7 +3781,8 @@
- 13.10. I have no root and I want to scream
+13.10. I have no root and I want to scream
+===========================================
Thanks to Birgit Wahlich for telling me about this strange one. It
turns out that there's a limit of six environment variables on the
@@ -3987,14 +3797,16 @@
- 13.11. UML build conflict between ptrace.h and ucontext.h
+13.11. UML build conflict between ptrace.h and ucontext.h
+==========================================================
On some older systems, /usr/include/asm/ptrace.h and
/usr/include/sys/ucontext.h define the same names. So, when they're
included together, the defines from one completely mess up the parsing
- of the other, producing errors like:
+ of the other, producing errors like::
+
/usr/include/sys/ucontext.h:47: parse error before
- `10'
+ `10`
@@ -4007,7 +3819,8 @@
- 13.12. The UML BogoMips is exactly half the host's BogoMips
+13.12. The UML BogoMips is exactly half the host's BogoMips
+------------------------------------------------------------
On i386 kernels, there are two ways of running the loop that is used
to calculate the BogoMips rating, using the TSC if it's there or using
@@ -4019,15 +3832,17 @@
- 13.13. When you run UML, it immediately segfaults
+13.13. When you run UML, it immediately segfaults
+--------------------------------------------------
If the host is configured with the 2G/2G address space split, that's
- why. See ``UML on 2G/2G hosts'' for the details on getting UML to
+ why. See ref:`UML_on_2G/2G_hosts` for the details on getting UML to
run on your host.
- 13.14. xterms appear, then immediately disappear
+13.14. xterms appear, then immediately disappear
+-------------------------------------------------
If you're running an up to date kernel with an old release of
uml_utilities, the port-helper program will not work properly, so
@@ -4039,7 +3854,8 @@
- 13.15. Any other panic, hang, or strange behavior
+13.15. Any other panic, hang, or strange behavior
+--------------------------------------------------
If you're seeing truly strange behavior, such as hangs or panics that
happen in random places, or you try running the debugger to see what's
@@ -4057,9 +3873,13 @@
it and that a fix is imminent.
- If you want to be super-helpful, read ``Diagnosing Problems'' and
+ If you want to be super-helpful, read :ref:`Diagnosing_Problems` and
follow the instructions contained therein.
- 14. Diagnosing Problems
+
+.. _Diagnosing_Problems:
+
+14. Diagnosing Problems
+========================
If you get UML to crash, hang, or otherwise misbehave, you should
@@ -4074,21 +3894,22 @@
For any diagnosis, you're going to need to build a debugging kernel.
The binaries from this site aren't debuggable. If you haven't done
- this before, read about ``Compiling the kernel and modules'' and
- ``Kernel debugging'' UML first.
+ this before, read about :ref:`Compiling_the_kernel_and_modules` and
+ :ref:`Kernel_debugging` UML first.
- 14.1. Case 1 : Normal kernel panics
+14.1. Case 1 : Normal kernel panics
+------------------------------------
The most common case is for a normal thread to panic. To debug this,
you will need to run it under the debugger (add 'debug' to the command
line). An xterm will start up with gdb running inside it. Continue
- it when it stops in start_kernel and make it crash. Now ^C gdb and
+ it when it stops in start_kernel and make it crash. Now ``^C gdb`` and
If the panic was a "Kernel mode fault", then there will be a segv
frame on the stack and I'm going to want some more information. The
- stack might look something like this:
+ stack might look something like this::
(UML gdb) backtrace
@@ -4107,7 +3928,7 @@
I'm going to want to see the symbol and line information for the value
- of ip in the segv frame. In this case, you would do the following:
+ of ip in the segv frame. In this case, you would do the following::
(UML gdb) i sym 268849158
@@ -4115,7 +3936,7 @@
- and
+ and::
(UML gdb) i line *268849158
@@ -4128,7 +3949,8 @@
to get that information from the faulting ip.
- 14.2. Case 2 : Tracing thread panics
+14.2. Case 2 : Tracing thread panics
+-------------------------------------
The less common and more painful case is when the tracing thread
panics. In this case, the kernel debugger will be useless because it
@@ -4136,7 +3958,7 @@
do is get a backtrace from the tracing thread. This is done by
figuring out what its pid is, firing up gdb, and attaching it to that
pid. You can figure out the tracing thread pid by looking at the
- first line of the console output, which will look like this:
+ first line of the console output, which will look like this::
tracing thread pid = 15851
@@ -4145,7 +3967,7 @@
or by running ps on the host and finding the line that looks like
- this:
+ this::
jdike 15851 4.5 0.4 132568 1104 pts/0 S 21:34 0:05 ./linux [(tracing thread)]
@@ -4164,7 +3986,7 @@
14.3. Case 3 : Tracing thread panics caused by other threads
However, there are cases where the misbehavior of another thread
- caused the problem. The most common panic of this type is:
+ caused the problem. The most common panic of this type is::
wait_for_stop failed to wait for <pid> to stop with <signal number>
@@ -4177,7 +3999,7 @@
debugger is defunct and without some fancy footwork, another gdb can't
attach to it. So, this is how the fancy footwork goes:
- In a shell:
+ In a shell::
host% kill -STOP pid
@@ -4185,7 +4007,7 @@
- Run gdb on the tracing thread as described in case 2 and do:
+ Run gdb on the tracing thread as described in case 2 and do::
(host gdb) call detach(pid)
@@ -4193,7 +4015,7 @@
If you get a segfault, do it again. It always works the second time.
- Detach from the tracing thread and attach to that other thread:
+ Detach from the tracing thread and attach to that other thread::
(host gdb) detach
@@ -4209,7 +4031,7 @@
If gdb hangs when attaching to that process, go back to a shell and
- do:
+ do::
host%
@@ -4218,7 +4040,7 @@
- And then get the backtrace:
+ And then get the backtrace::
(host gdb) backtrace
@@ -4227,13 +4049,14 @@
- 14.4. Case 4 : Hangs
+14.4. Case 4 : Hangs
+---------------------
Hangs seem to be fairly rare, but they sometimes happen. When a hang
happens, we need a backtrace from the offending process. Run the
kernel debugger as described in case 1 and get a backtrace. If the
current process is not the idle thread, then send in the backtrace.
- You can tell that it's the idle thread if the stack looks like this:
+ You can tell that it's the idle thread if the stack looks like this::
#0 0x100b1401 in __libc_nanosleep ()
@@ -4257,7 +4080,8 @@
- 15. Thanks
+15. Thanks
+===========
A number of people have helped this project in various ways, and this
@@ -4274,20 +4098,21 @@
bookkeeping lapses and I forget about contributions.
- 15.1. Code and Documentation
+15.1. Code and Documentation
+-----------------------------
Rusty Russell <rusty at linuxcare.com.au> -
- o wrote the HOWTO <http://user-mode-
- linux.sourceforge.net/UserModeLinux-HOWTO.html>
+ - wrote the HOWTO
+ http://user-mode-linux.sourceforge.net/old/UserModeLinux-HOWTO.html
- o prodded me into making this project official and putting it on
+ - prodded me into making this project official and putting it on
SourceForge
- o came up with the way cool UML logo <http://user-mode-
- linux.sourceforge.net/uml-small.png>
+ - came up with the way cool UML logo
+ http://user-mode-linux.sourceforge.net/uml-small.png
- o redid the config process
+ - redid the config process
Peter Moulder <reiter at netspace.net.au> - Fixed my config and build
@@ -4296,34 +4121,32 @@
Bill Stearns <wstearns at pobox.com> -
- o HOWTO updates
+ - HOWTO updates
- o lots of bug reports
+ - lots of bug reports
- o lots of testing
+ - lots of testing
- o dedicated a box (uml.ists.dartmouth.edu) to support UML development
+ - dedicated a box (uml.ists.dartmouth.edu) to support UML development
- o wrote the mkrootfs script, which allows bootable filesystems of
+ - wrote the mkrootfs script, which allows bootable filesystems of
RPM-based distributions to be cranked out
- o cranked out a large number of filesystems with said script
+ - cranked out a large number of filesystems with said script
Jim Leu <jleu at mindspring.com> - Wrote the virtual ethernet driver
and associated usermode tools
- Lars Brinkhoff <http://lars.nocrew.org/> - Contributed the ptrace
- proxy from his own project <http://a386.nocrew.org/> to allow easier
- kernel debugging
+ Lars Brinkhoff http://lars.nocrew.org/ - Contributed the ptrace
+ proxy from his own project to allow easier kernel debugging
Andrea Arcangeli <andrea at suse.de> - Redid some of the early boot
code so that it would work on machines with Large File Support
- Chris Emerson <http://www.chiark.greenend.org.uk/~cemerson/> - Did
- the first UML port to Linux/ppc
+ Chris Emerson - Did the first UML port to Linux/ppc
Harald Welte <laforge at gnumonks.org> - Wrote the multicast
@@ -4338,7 +4161,7 @@
wrote the iomem emulation support
- Henrik Nordstrom <http://hem.passagen.se/hno/> - Provided a variety
+ Henrik Nordstrom http://hem.passagen.se/hno/ - Provided a variety
of patches, fixes, and clues
@@ -4373,190 +4196,193 @@
submitted patches for the slip transport and lots of other things.
- David Coulson <http://davidcoulson.net> -
+ David Coulson http://davidcoulson.net -
- o Set up the usermodelinux.org <http://usermodelinux.org> site,
+ - Set up the http://usermodelinux.org site,
which is a great way of keeping the UML user community on top of
UML goings-on.
- o Site documentation and updates
+ - Site documentation and updates
- o Nifty little UML management daemon UMLd
- <http://uml.openconsultancy.com/umld/>
+ - Nifty little UML management daemon UMLd
- o Lots of testing and bug reports
+ - Lots of testing and bug reports
- 15.2. Flushing out bugs
+15.2. Flushing out bugs
+------------------------
- o Yuri Pudgorodsky
+ - Yuri Pudgorodsky
- o Gerald Britton
+ - Gerald Britton
- o Ian Wehrman
+ - Ian Wehrman
- o Gord Lamb
+ - Gord Lamb
- o Eugene Koontz
+ - Eugene Koontz
- o John H. Hartman
+ - John H. Hartman
- o Anders Karlsson
+ - Anders Karlsson
- o Daniel Phillips
+ - Daniel Phillips
- o John Fremlin
+ - John Fremlin
- o Rainer Burgstaller
+ - Rainer Burgstaller
- o James Stevenson
+ - James Stevenson
- o Matt Clay
+ - Matt Clay
- o Cliff Jefferies
+ - Cliff Jefferies
- o Geoff Hoff
+ - Geoff Hoff
- o Lennert Buytenhek
+ - Lennert Buytenhek
- o Al Viro
+ - Al Viro
- o Frank Klingenhoefer
+ - Frank Klingenhoefer
- o Livio Baldini Soares
+ - Livio Baldini Soares
- o Jon Burgess
+ - Jon Burgess
- o Petru Paler
+ - Petru Paler
- o Paul
+ - Paul
- o Chris Reahard
+ - Chris Reahard
- o Sverker Nilsson
+ - Sverker Nilsson
- o Gong Su
+ - Gong Su
- o johan verrept
+ - johan verrept
- o Bjorn Eriksson
+ - Bjorn Eriksson
- o Lorenzo Allegrucci
+ - Lorenzo Allegrucci
- o Muli Ben-Yehuda
+ - Muli Ben-Yehuda
- o David Mansfield
+ - David Mansfield
- o Howard Goff
+ - Howard Goff
- o Mike Anderson
+ - Mike Anderson
- o John Byrne
+ - John Byrne
- o Sapan J. Batia
+ - Sapan J. Batia
- o Iris Huang
+ - Iris Huang
- o Jan Hudec
+ - Jan Hudec
- o Voluspa
+ - Voluspa
- 15.3. Buglets and clean-ups
+15.3. Buglets and clean-ups
+----------------------------
- o Dave Zarzycki
+ - Dave Zarzycki
- o Adam Lazur
+ - Adam Lazur
- o Boria Feigin
+ - Boria Feigin
- o Brian J. Murrell
+ - Brian J. Murrell
- o JS
+ - JS
- o Roman Zippel
+ - Roman Zippel
- o Wil Cooley
+ - Wil Cooley
- o Ayelet Shemesh
+ - Ayelet Shemesh
- o Will Dyson
+ - Will Dyson
- o Sverker Nilsson
+ - Sverker Nilsson
- o dvorak
+ - dvorak
- o v.naga srinivas
+ - v.naga srinivas
- o Shlomi Fish
+ - Shlomi Fish
- o Roger Binns
+ - Roger Binns
- o johan verrept
+ - johan verrept
- o MrChuoi
+ - MrChuoi
- o Peter Cleve
+ - Peter Cleve
- o Vincent Guffens
+ - Vincent Guffens
- o Nathan Scott
+ - Nathan Scott
- o Patrick Caulfield
+ - Patrick Caulfield
- o jbearce
+ - jbearce
- o Catalin Marinas
+ - Catalin Marinas
- o Shane Spencer
+ - Shane Spencer
- o Zou Min
+ - Zou Min
- o Ryan Boder
+ - Ryan Boder
- o Lorenzo Colitti
+ - Lorenzo Colitti
- o Gwendal Grignou
+ - Gwendal Grignou
- o Andre' Breiler
+ - Andre' Breiler
- o Tsutomu Yasuda
+ - Tsutomu Yasuda
- 15.4. Case Studies
+15.4. Case Studies
+-------------------
- o Jon Wright
+ - Jon Wright
- o William McEwan
+ - William McEwan
- o Michael Richardson
+ - Michael Richardson
- 15.5. Other contributions
+15.5. Other contributions
+--------------------------
Bill Carr <Bill.Carr at compaq.com> made the Red Hat mkrootfs script
work with RH 6.2.
Michael Jennings <mikejen at hevanet.com> sent in some material which
- is now gracing the top of the index page <http://user-mode-
- linux.sourceforge.net/> of this site.
+ is now gracing the top of the index page
+ http://user-mode-linux.sourceforge.net/ of this site.
- SGI <http://www.sgi.com> (and more specifically Ralf Baechle <ralf at
- uni-koblenz.de> ) gave me an account on oss.sgi.com
- <http://www.oss.sgi.com> . The bandwidth there made it possible to
+ SGI (and more specifically Ralf Baechle <ralf at
+ uni-koblenz.de> ) gave me an account on oss.sgi.com.
+ The bandwidth there made it possible to
produce most of the filesystems available on the project download
page.
@@ -4573,17 +4399,5 @@
Chris Reahard built a specialized root filesystem for running a DNS
server jailed inside UML. It's available from the download
- <http://user-mode-linux.sourceforge.net/dl-sf.html> page in the Jail
+ http://user-mode-linux.sourceforge.net/old/dl-sf.html page in the Jail
Filesystems section.
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/Documentation/x86/exception-tables.rst b/Documentation/x86/exception-tables.rst
index ed6d4b0..81a3938 100644
--- a/Documentation/x86/exception-tables.rst
+++ b/Documentation/x86/exception-tables.rst
@@ -257,6 +257,9 @@
the original assembly code: > 3: movl $-14,%eax
and linked in vmlinux : > c0199ff5 <.fixup+10b5> movl $0xfffffff2,%eax
+If the fixup was able to handle the exception, control flow may be returned
+to the instruction after the one that triggered the fault, ie. local label 2b.
+
The assembly code::
> .section __ex_table,"a"
@@ -344,3 +347,14 @@
it as special.
More functions can easily be added.
+
+CONFIG_BUILDTIME_TABLE_SORT allows the __ex_table section to be sorted post
+link of the kernel image, via a host utility scripts/sorttable. It will set the
+symbol main_extable_sort_needed to 0, avoiding sorting the __ex_table section
+at boot time. With the exception table sorted, at runtime when an exception
+occurs we can quickly lookup the __ex_table entry via binary search.
+
+This is not just a boot time optimization, some architectures require this
+table to be sorted in order to handle exceptions relatively early in the boot
+process. For example, i386 makes use of this form of exception handling before
+paging support is even enabled!
diff --git a/Documentation/x86/index.rst b/Documentation/x86/index.rst
index a8de2fb..265d9e9 100644
--- a/Documentation/x86/index.rst
+++ b/Documentation/x86/index.rst
@@ -19,7 +19,6 @@
tlb
mtrr
pat
- intel_mpx
intel-iommu
intel_txt
amd-memory-encryption
diff --git a/Documentation/x86/intel-iommu.rst b/Documentation/x86/intel-iommu.rst
index 9dae6b4..099f13d 100644
--- a/Documentation/x86/intel-iommu.rst
+++ b/Documentation/x86/intel-iommu.rst
@@ -95,9 +95,10 @@
When DMAR is enabled for use, you will notice..
PCI-DMA: Using DMAR IOMMU
+-------------------------
Fault reporting
----------------
+^^^^^^^^^^^^^^^
::
diff --git a/MAINTAINERS b/MAINTAINERS
index d66ac41..1b37fea 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -214,7 +214,7 @@
T: git git://git.kernel.org/pub/scm/linux/kernel/git/ericvh/v9fs.git
T: git git://github.com/martinetd/linux.git
S: Maintained
-F: Documentation/filesystems/9p.txt
+F: Documentation/filesystems/9p.rst
F: fs/9p/
F: net/9p/
F: include/net/9p/
@@ -584,7 +584,7 @@
M: David Sterba <dsterba@suse.com>
L: linux-fsdevel@vger.kernel.org
S: Odd Fixes
-F: Documentation/filesystems/affs.txt
+F: Documentation/filesystems/affs.rst
F: fs/affs/
AFS FILESYSTEM
@@ -593,7 +593,7 @@
S: Supported
F: fs/afs/
F: include/trace/events/afs.h
-F: Documentation/filesystems/afs.txt
+F: Documentation/filesystems/afs.rst
W: https://www.infradead.org/~dhowells/kafs/
AGPGART DRIVER
@@ -693,7 +693,7 @@
M: Yangtao Li <tiny.windzz@gmail.com>
L: linux-pm@vger.kernel.org
S: Maintained
-F: Documentation/devicetree/bindings/opp/sun50i-nvmem-cpufreq.txt
+F: Documentation/devicetree/bindings/opp/allwinner,sun50i-h6-operating-points.yaml
F: drivers/cpufreq/sun50i-cpufreq-nvmem.c
ALLWINNER CRYPTO DRIVERS
@@ -2796,11 +2796,11 @@
ATHEROS 71XX/9XXX GPIO DRIVER
M: Alban Bedel <albeu@free.fr>
+S: Maintained
W: https://github.com/AlbanBedel/linux
T: git git://github.com/AlbanBedel/linux
-S: Maintained
-F: drivers/gpio/gpio-ath79.c
F: Documentation/devicetree/bindings/gpio/gpio-ath79.txt
+F: drivers/gpio/gpio-ath79.c
ATHEROS 71XX/9XXX USB PHY DRIVER
M: Alban Bedel <albeu@free.fr>
@@ -2957,6 +2957,14 @@
F: Documentation/devicetree/bindings/sound/axentia,*
F: sound/soc/atmel/tse850-pcm5142.c
+AXI-FAN-CONTROL HARDWARE MONITOR DRIVER
+M: Nuno Sá <nuno.sa@analog.com>
+W: http://ez.analog.com/community/linux-device-drivers
+L: linux-hwmon@vger.kernel.org
+S: Supported
+F: drivers/hwmon/axi-fan-control.c
+F: Documentation/devicetree/bindings/hwmon/adi,axi-fan-control.yaml
+
AXXIA I2C CONTROLLER
M: Krzysztof Adamski <krzysztof.adamski@nokia.com>
L: linux-i2c@vger.kernel.org
@@ -3063,7 +3071,7 @@
M: Salah Triki <salah.triki@gmail.com>
S: Maintained
T: git git://git.kernel.org/pub/scm/linux/kernel/git/luisbg/linux-befs.git
-F: Documentation/filesystems/befs.txt
+F: Documentation/filesystems/befs.rst
F: fs/befs/
BFQ I/O SCHEDULER
@@ -3077,7 +3085,7 @@
BFS FILE SYSTEM
M: "Tigran A. Aivazian" <aivazian.tigran@gmail.com>
S: Maintained
-F: Documentation/filesystems/bfs.txt
+F: Documentation/filesystems/bfs.rst
F: fs/bfs/
F: include/uapi/linux/bfs_fs.h
@@ -3422,8 +3430,8 @@
M: Gregory Fong <gregory.0xf0@gmail.com>
L: bcm-kernel-feedback-list@broadcom.com
S: Supported
-F: drivers/gpio/gpio-brcmstb.c
F: Documentation/devicetree/bindings/gpio/brcm,brcmstb-gpio.txt
+F: drivers/gpio/gpio-brcmstb.c
BROADCOM BRCMSTB I2C DRIVER
M: Kamal Dasu <kdasu.kdev@gmail.com>
@@ -3481,8 +3489,8 @@
M: Ray Jui <rjui@broadcom.com>
L: bcm-kernel-feedback-list@broadcom.com
S: Supported
-F: drivers/gpio/gpio-bcm-kona.c
F: Documentation/devicetree/bindings/gpio/brcm,kona-gpio.txt
+F: drivers/gpio/gpio-bcm-kona.c
BROADCOM NETXTREME-E ROCE DRIVER
M: Selvin Xavier <selvin.xavier@broadcom.com>
@@ -3597,8 +3605,8 @@
BT8XXGPIO DRIVER
M: Michael Buesch <m@bues.ch>
-W: http://bu3sch.de/btgpio.php
S: Maintained
+W: http://bu3sch.de/btgpio.php
F: drivers/gpio/gpio-bt8xx.c
BTRFS FILE SYSTEM
@@ -3610,7 +3618,7 @@
Q: http://patchwork.kernel.org/project/linux-btrfs/list/
T: git git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs.git
S: Maintained
-F: Documentation/filesystems/btrfs.txt
+F: Documentation/filesystems/btrfs.rst
F: fs/btrfs/
F: include/linux/btrfs*
F: include/uapi/linux/btrfs*
@@ -3649,6 +3657,7 @@
C-SKY ARCHITECTURE
M: Guo Ren <guoren@kernel.org>
+L: linux-csky@vger.kernel.org
T: git https://github.com/c-sky/csky-linux.git
S: Supported
F: arch/csky/
@@ -3906,10 +3915,10 @@
T: git git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client.git
T: git git://github.com/ceph/ceph-client.git
S: Supported
-F: Documentation/filesystems/ceph.txt
+F: Documentation/filesystems/ceph.rst
F: fs/ceph/
-CERTIFICATE HANDLING:
+CERTIFICATE HANDLING
M: David Howells <dhowells@redhat.com>
M: David Woodhouse <dwmw2@infradead.org>
L: keyrings@vger.kernel.org
@@ -3919,7 +3928,7 @@
F: scripts/sign-file.c
F: scripts/extract-cert.c
-CERTIFIED WIRELESS USB (WUSB) SUBSYSTEM:
+CERTIFIED WIRELESS USB (WUSB) SUBSYSTEM
L: devel@driverdev.osuosl.org
S: Obsolete
F: drivers/staging/wusbcore/
@@ -4016,7 +4025,7 @@
S: Maintained
R: Enric Balletbo i Serra <enric.balletbo@collabora.com>
R: Guenter Roeck <groeck@chromium.org>
-F: Documentation/devicetree/bindings/sound/google,cros-ec-codec.txt
+F: Documentation/devicetree/bindings/sound/google,cros-ec-codec.yaml
F: sound/soc/codecs/cros_ec_codec.*
CIRRUS LOGIC AUDIO CODEC DRIVERS
@@ -4072,7 +4081,6 @@
CISCO VIC ETHERNET NIC DRIVER
M: Christian Benvenuti <benve@cisco.com>
M: Govindarajulu Varadarajan <_govind@gmx.com>
-M: Parvi Kaustubhi <pkaustub@cisco.com>
S: Supported
F: drivers/net/ethernet/cisco/enic/
@@ -4423,7 +4431,7 @@
CRAMFS FILESYSTEM
M: Nicolas Pitre <nico@fluxnic.net>
S: Maintained
-F: Documentation/filesystems/cramfs.txt
+F: Documentation/filesystems/cramfs.rst
F: fs/cramfs/
CREATIVE SB0540
@@ -4474,7 +4482,7 @@
T: git git://linuxtv.org/media_tree.git
S: Maintained
F: drivers/media/platform/sunxi/sun6i-csi/
-F: Documentation/devicetree/bindings/media/sun6i-csi.txt
+F: Documentation/devicetree/bindings/media/allwinner,sun6i-a31-csi.yaml
CW1200 WLAN driver
M: Solomon Peachy <pizza@shaftnet.org>
@@ -4571,7 +4579,7 @@
F: include/uapi/rdma/cxgb4-abi.h
CXGB4VF ETHERNET DRIVER (CXGB4VF)
-M: Casey Leedom <leedom@chelsio.com>
+M: Vishal Kulkarni <vishal@gmail.com>
L: netdev@vger.kernel.org
W: http://www.chelsio.com
S: Supported
@@ -5201,7 +5209,7 @@
R: "Rafael J. Wysocki" <rafael@kernel.org>
T: git git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/driver-core.git
S: Supported
-F: Documentation/kobject.txt
+F: Documentation/core-api/kobject.rst
F: drivers/base/
F: fs/debugfs/
F: fs/sysfs/
@@ -5667,7 +5675,7 @@
T: git git://anongit.freedesktop.org/drm/drm-misc
S: Maintained
F: drivers/gpu/drm/stm
-F: Documentation/devicetree/bindings/display/st,stm32-ltdc.txt
+F: Documentation/devicetree/bindings/display/st,stm32-ltdc.yaml
DRM DRIVERS FOR TI LCDC
M: Jyri Sarha <jsarha@ti.com>
@@ -5932,13 +5940,13 @@
F: drivers/media/dvb-frontends/ec100*
ECRYPT FILE SYSTEM
-M: Tyler Hicks <tyhicks@canonical.com>
+M: Tyler Hicks <code@tyhicks.com>
L: ecryptfs@vger.kernel.org
W: http://ecryptfs.org
W: https://launchpad.net/ecryptfs
T: git git://git.kernel.org/pub/scm/linux/kernel/git/tyhicks/ecryptfs.git
-S: Supported
-F: Documentation/filesystems/ecryptfs.txt
+S: Odd Fixes
+F: Documentation/filesystems/ecryptfs.rst
F: fs/ecryptfs/
EDAC-AMD64
@@ -5998,6 +6006,12 @@
F: drivers/edac/
F: include/linux/edac.h
+EDAC-DMC520
+M: Lei Wang <lewan@microsoft.com>
+L: linux-edac@vger.kernel.org
+S: Supported
+F: drivers/edac/dmc520_edac.c
+
EDAC-E752X
M: Mark Gross <mark.gross@intel.com>
L: linux-edac@vger.kernel.org
@@ -6197,7 +6211,6 @@
F: drivers/scsi/be2iscsi/
Emulex 10Gbps NIC BE2, BE3-R, Lancer, Skyhawk-R DRIVER (be2net)
-M: Sathya Perla <sathya.perla@broadcom.com>
M: Ajit Khaparde <ajit.khaparde@broadcom.com>
M: Sriharsha Basavapatna <sriharsha.basavapatna@broadcom.com>
M: Somnath Kotur <somnath.kotur@broadcom.com>
@@ -6249,12 +6262,12 @@
F: include/video/s1d13xxxfb.h
EROFS FILE SYSTEM
-M: Gao Xiang <gaoxiang25@huawei.com>
+M: Gao Xiang <xiang@kernel.org>
M: Chao Yu <yuchao0@huawei.com>
L: linux-erofs@lists.ozlabs.org
S: Maintained
T: git git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs.git
-F: Documentation/filesystems/erofs.txt
+F: Documentation/filesystems/erofs.rst
F: fs/erofs/
F: include/trace/events/erofs.h
@@ -6315,7 +6328,7 @@
M: Jan Kara <jack@suse.com>
L: linux-ext4@vger.kernel.org
S: Maintained
-F: Documentation/filesystems/ext2.txt
+F: Documentation/filesystems/ext2.rst
F: fs/ext2/
F: include/linux/ext2*
@@ -6389,7 +6402,7 @@
W: https://f2fs.wiki.kernel.org/
T: git git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs.git
S: Maintained
-F: Documentation/filesystems/f2fs.txt
+F: Documentation/filesystems/f2fs.rst
F: Documentation/ABI/testing/sysfs-fs-f2fs
F: fs/f2fs/
F: include/linux/f2fs_fs.h
@@ -6934,7 +6947,7 @@
F: scripts/gcc-plugins/
F: scripts/gcc-plugin.sh
F: scripts/Makefile.gcc-plugins
-F: Documentation/core-api/gcc-plugins.rst
+F: Documentation/kbuild/gcc-plugins.rst
GASKET DRIVER FRAMEWORK
M: Rob Springer <rspringer@google.com>
@@ -7047,7 +7060,7 @@
S: Supported
F: drivers/uio/uio_pci_generic.c
-GENERIC VDSO LIBRARY:
+GENERIC VDSO LIBRARY
M: Andy Lutomirski <luto@kernel.org>
M: Thomas Gleixner <tglx@linutronix.de>
M: Vincenzo Frascino <vincenzo.frascino@arm.com>
@@ -7143,18 +7156,18 @@
M: Linus Walleij <linus.walleij@linaro.org>
M: Bartosz Golaszewski <bgolaszewski@baylibre.com>
L: linux-gpio@vger.kernel.org
-T: git git://git.kernel.org/pub/scm/linux/kernel/git/linusw/linux-gpio.git
S: Maintained
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/linusw/linux-gpio.git
+F: Documentation/ABI/obsolete/sysfs-gpio
+F: Documentation/ABI/testing/gpio-cdev
+F: Documentation/admin-guide/gpio/
F: Documentation/devicetree/bindings/gpio/
F: Documentation/driver-api/gpio/
-F: Documentation/admin-guide/gpio/
-F: Documentation/ABI/testing/gpio-cdev
-F: Documentation/ABI/obsolete/sysfs-gpio
F: drivers/gpio/
+F: include/asm-generic/gpio.h
F: include/linux/gpio/
F: include/linux/gpio.h
F: include/linux/of_gpio.h
-F: include/asm-generic/gpio.h
F: include/uapi/linux/gpio.h
F: tools/gpio/
@@ -7431,13 +7444,13 @@
HFS FILESYSTEM
L: linux-fsdevel@vger.kernel.org
S: Orphan
-F: Documentation/filesystems/hfs.txt
+F: Documentation/filesystems/hfs.rst
F: fs/hfs/
HFSPLUS FILESYSTEM
L: linux-fsdevel@vger.kernel.org
S: Orphan
-F: Documentation/filesystems/hfsplus.txt
+F: Documentation/filesystems/hfsplus.rst
F: fs/hfsplus/
HGA FRAMEBUFFER DRIVER
@@ -7517,6 +7530,12 @@
F: net/802/hippi.c
F: drivers/net/hippi/
+HISILICON DMA DRIVER
+M: Zhou Wang <wangzhou1@hisilicon.com>
+L: dmaengine@vger.kernel.org
+S: Maintained
+F: drivers/dma/hisi_dma.c
+
HISILICON SECURITY ENGINE V2 DRIVER (SEC2)
M: Zaibo Xu <xuzaibo@huawei.com>
L: linux-crypto@vger.kernel.org
@@ -7574,7 +7593,8 @@
HISILICON ROCE DRIVER
M: Lijun Ou <oulijun@huawei.com>
-M: Wei Hu(Xavier) <xavier.huwei@huawei.com>
+M: Wei Hu(Xavier) <huwei87@hisilicon.com>
+M: Weihang Li <liweihang@huawei.com>
L: linux-rdma@vger.kernel.org
S: Maintained
F: drivers/infiniband/hw/hns/
@@ -7737,7 +7757,7 @@
M: "K. Y. Srinivasan" <kys@microsoft.com>
M: Haiyang Zhang <haiyangz@microsoft.com>
M: Stephen Hemminger <sthemmin@microsoft.com>
-M: Sasha Levin <sashal@kernel.org>
+M: Wei Liu <wei.liu@kernel.org>
T: git git://git.kernel.org/pub/scm/linux/kernel/git/hyperv/linux.git
L: linux-hyperv@vger.kernel.org
S: Supported
@@ -8055,8 +8075,8 @@
ICH LPC AND GPIO DRIVER
M: Peter Tyser <ptyser@xes-inc.com>
S: Maintained
-F: drivers/mfd/lpc_ich.c
F: drivers/gpio/gpio-ich.c
+F: drivers/mfd/lpc_ich.c
ICY I2C DRIVER
M: Max Staudt <max@enpas.org>
@@ -8308,7 +8328,7 @@
R: Amir Goldstein <amir73il@gmail.com>
L: linux-fsdevel@vger.kernel.org
S: Maintained
-F: Documentation/filesystems/inotify.txt
+F: Documentation/filesystems/inotify.rst
F: fs/notify/inotify/
F: include/linux/inotify.h
F: include/uapi/linux/inotify.h
@@ -8392,7 +8412,7 @@
M: Rodrigo Vivi <rodrigo.vivi@intel.com>
L: intel-gfx@lists.freedesktop.org
W: https://01.org/linuxgraphics/
-B: https://01.org/linuxgraphics/documentation/how-report-bugs
+B: https://gitlab.freedesktop.org/drm/intel/-/wikis/How-to-file-i915-bugs
C: irc://chat.freenode.net/intel-gfx
Q: http://patchwork.freedesktop.org/project/intel-gfx/
T: git git://anongit.freedesktop.org/drm-intel
@@ -8477,7 +8497,6 @@
S: Supported
F: drivers/dma/idxd/*
F: include/uapi/linux/idxd.h
-F: include/linux/idxd.h
INTEL IDLE DRIVER
M: Jacob Pan <jacob.jun.pan@linux.intel.com>
@@ -8569,15 +8588,15 @@
S: Supported
W: https://github.com/sudeepdutt/mic
W: http://software.intel.com/en-us/mic-developer
+F: Documentation/misc-devices/mic/
+F: drivers/dma/mic_x100_dma.c
+F: drivers/dma/mic_x100_dma.h
+F: drivers/misc/mic/
F: include/linux/mic_bus.h
F: include/linux/scif.h
F: include/uapi/linux/mic_common.h
F: include/uapi/linux/mic_ioctl.h
F: include/uapi/linux/scif_ioctl.h
-F: drivers/misc/mic/
-F: drivers/dma/mic_x100_dma.c
-F: drivers/dma/mic_x100_dma.h
-F: Documentation/mic/
INTEL PMC CORE DRIVER
M: Rajneesh Bhardwaj <rajneesh.bhardwaj@intel.com>
@@ -8684,7 +8703,7 @@
M: Luca Coelho <luciano.coelho@intel.com>
M: Intel Linux Wireless <linuxwifi@intel.com>
L: linux-wireless@vger.kernel.org
-W: http://intellinuxwireless.org
+W: https://wireless.wiki.kernel.org/en/users/drivers/iwlwifi
T: git git://git.kernel.org/pub/scm/linux/kernel/git/iwlwifi/iwlwifi.git
S: Supported
F: drivers/net/wireless/intel/iwlwifi/
@@ -9275,10 +9294,10 @@
S: Supported
F: Documentation/security/keys/trusted-encrypted.rst
F: include/keys/trusted-type.h
-F: security/keys/trusted.c
-F: include/keys/trusted.h
+F: include/keys/trusted_tpm.h
+F: security/keys/trusted-keys/
-KEYS/KEYRINGS:
+KEYS/KEYRINGS
M: David Howells <dhowells@redhat.com>
M: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
L: keyrings@vger.kernel.org
@@ -10163,7 +10182,7 @@
M: Andreas Klinger <ak@it-klinger.de>
L: linux-iio@vger.kernel.org
S: Maintained
-F: Documentation/devicetree/bindings/iio/proximity/maxbotix,mb1232.txt
+F: Documentation/devicetree/bindings/iio/proximity/maxbotix,mb1232.yaml
F: drivers/iio/proximity/mb1232.c
MAXIM MAX77650 PMIC MFD DRIVER
@@ -10466,7 +10485,7 @@
L: linux-media@vger.kernel.org
T: git git://linuxtv.org/media_tree.git
S: Supported
-F: Documentation/devicetree/bindings/media/st,stm32-dcmi.txt
+F: Documentation/devicetree/bindings/media/st,stm32-dcmi.yaml
F: drivers/media/platform/stm32/stm32-dcmi.c
MEDIA DRIVERS FOR NVIDIA TEGRA - VDE
@@ -11115,14 +11134,12 @@
F: drivers/usb/image/microtek.*
MIPS
-M: Ralf Baechle <ralf@linux-mips.org>
-M: Paul Burton <paulburton@kernel.org>
+M: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
L: linux-mips@vger.kernel.org
W: http://www.linux-mips.org/
-T: git git://git.linux-mips.org/pub/scm/ralf/linux.git
T: git git://git.kernel.org/pub/scm/linux/kernel/git/mips/linux.git
-Q: http://patchwork.linux-mips.org/project/linux-mips/list/
-S: Supported
+Q: https://patchwork.kernel.org/project/linux-mips/list/
+S: Maintained
F: Documentation/devicetree/bindings/mips/
F: Documentation/mips/
F: arch/mips/
@@ -11485,7 +11502,7 @@
F: drivers/scsi/sun3_scsi.*
F: drivers/scsi/sun3_scsi_vme.c
-NCSI LIBRARY:
+NCSI LIBRARY
M: Samuel Mendoza-Jonas <sam@mendozajonas.com>
S: Maintained
F: net/ncsi/
@@ -11792,7 +11809,7 @@
W: https://nilfs.osdn.jp/
T: git git://github.com/konis/nilfs2.git
S: Supported
-F: Documentation/filesystems/nilfs2.txt
+F: Documentation/filesystems/nilfs2.rst
F: fs/nilfs2/
F: include/trace/events/nilfs2.h
F: include/uapi/linux/nilfs2_api.h
@@ -11901,7 +11918,7 @@
W: http://www.tuxera.com/
T: git git://git.kernel.org/pub/scm/linux/kernel/git/aia21/ntfs.git
S: Supported
-F: Documentation/filesystems/ntfs.txt
+F: Documentation/filesystems/ntfs.rst
F: fs/ntfs/
NUBUS SUBSYSTEM
@@ -12247,7 +12264,7 @@
M: Bob Copeland <me@bobcopeland.com>
L: linux-karma-devel@lists.sourceforge.net
S: Maintained
-F: Documentation/filesystems/omfs.txt
+F: Documentation/filesystems/omfs.rst
F: fs/omfs/
OMNIKEY CARDMAN 4000 DRIVER
@@ -12496,8 +12513,8 @@
L: ocfs2-devel@oss.oracle.com (moderated for non-subscribers)
W: http://ocfs2.wiki.kernel.org
S: Supported
-F: Documentation/filesystems/ocfs2.txt
-F: Documentation/filesystems/dlmfs.txt
+F: Documentation/filesystems/ocfs2.rst
+F: Documentation/filesystems/dlmfs.rst
F: fs/ocfs2/
ORANGEFS FILESYSTEM
@@ -12507,7 +12524,7 @@
T: git git://git.kernel.org/pub/scm/linux/kernel/git/hubcap/linux.git
S: Supported
F: fs/orangefs/
-F: Documentation/filesystems/orangefs.txt
+F: Documentation/filesystems/orangefs.rst
ORINOCO DRIVER
L: linux-wireless@vger.kernel.org
@@ -12741,7 +12758,7 @@
L: linux-pci@vger.kernel.org
S: Maintained
F: Documentation/devicetree/bindings/pci/cdns,*.txt
-F: drivers/pci/controller/pcie-cadence*
+F: drivers/pci/controller/cadence/
PCI DRIVER FOR FREESCALE LAYERSCAPE
M: Minghuan Lian <minghuan.Lian@nxp.com>
@@ -12954,7 +12971,6 @@
L: linux-pci@vger.kernel.org
L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
S: Supported
-F: Documentation/devicetree/bindings/pci/pci-thunder-*
F: drivers/pci/controller/pci-thunder-*
PCIE DRIVER FOR HISILICON
@@ -13470,7 +13486,7 @@
F: fs/proc/
F: include/linux/proc_fs.h
F: tools/testing/selftests/proc/
-F: Documentation/filesystems/proc.txt
+F: Documentation/filesystems/proc.rst
PROC SYSCTL
M: Luis Chamberlain <mcgrof@kernel.org>
@@ -13513,7 +13529,7 @@
S: Maintained
F: drivers/block/ps3vram.c
-PSAMPLE PACKET SAMPLING SUPPORT:
+PSAMPLE PACKET SAMPLING SUPPORT
M: Yotam Gigi <yotam.gi@gmail.com>
S: Maintained
F: net/psample
@@ -14230,7 +14246,7 @@
F: include/linux/reset.h
F: include/linux/reset/
F: include/linux/reset-controller.h
-K: \b(?:devm_|of_)?reset_control(?:ler_[a-z]+|_[a-z_]+)?\b
+K: \b(?:devm_|of_)?reset_control(?:ler_[a-z]+|_[a-z_]+)?\b
RESTARTABLE SEQUENCES SUPPORT
M: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
@@ -14595,10 +14611,10 @@
F: include/media/drv-intf/saa7146*
SAFESETID SECURITY MODULE
-M: Micah Morton <mortonm@chromium.org>
-S: Supported
-F: security/safesetid/
-F: Documentation/admin-guide/LSM/SafeSetID.rst
+M: Micah Morton <mortonm@chromium.org>
+S: Supported
+F: security/safesetid/
+F: Documentation/admin-guide/LSM/SafeSetID.rst
SAMSUNG AUDIO (ASoC) DRIVERS
M: Krzysztof Kozlowski <krzk@kernel.org>
@@ -15432,11 +15448,9 @@
F: include/uapi/rdma/siw-abi.h
SOFT-ROCE DRIVER (rxe)
-M: Moni Shoua <monis@mellanox.com>
+M: Zhu Yanjun <yanjunz@mellanox.com>
L: linux-rdma@vger.kernel.org
S: Supported
-W: https://github.com/SoftRoCE/rxe-dev/wiki/rxe-dev:-Home
-Q: http://patchwork.kernel.org/project/linux-rdma/list/
F: drivers/infiniband/sw/rxe/
F: include/uapi/rdma/rdma_user_rxe.h
@@ -15758,7 +15772,7 @@
W: http://squashfs.org.uk
T: git git://git.kernel.org/pub/scm/linux/kernel/git/pkl/squashfs-next.git
S: Maintained
-F: Documentation/filesystems/squashfs.txt
+F: Documentation/filesystems/squashfs.rst
F: fs/squashfs/
SRM (Alpha) environment access
@@ -15943,7 +15957,7 @@
F: drivers/pwm/pwm-stm32*
F: include/linux/*/stm32-*tim*
F: Documentation/ABI/testing/*timer-stm32
-F: Documentation/devicetree/bindings/*/stm32-*timer*
+F: Documentation/devicetree/bindings/*/*stm32-*timer*
F: Documentation/devicetree/bindings/pwm/pwm-stm32*
STMMAC ETHERNET DRIVER
@@ -16095,20 +16109,22 @@
SYNOPSYS CREG GPIO DRIVER
M: Eugeniy Paltsev <Eugeniy.Paltsev@synopsys.com>
S: Maintained
-F: drivers/gpio/gpio-creg-snps.c
F: Documentation/devicetree/bindings/gpio/snps,creg-gpio.txt
+F: drivers/gpio/gpio-creg-snps.c
SYNOPSYS DESIGNWARE 8250 UART DRIVER
R: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
S: Maintained
F: drivers/tty/serial/8250/8250_dw.c
+F: drivers/tty/serial/8250/8250_dwlib.*
+F: drivers/tty/serial/8250/8250_lpss.c
SYNOPSYS DESIGNWARE APB GPIO DRIVER
M: Hoan Tran <hoan@os.amperecomputing.com>
L: linux-gpio@vger.kernel.org
S: Maintained
-F: drivers/gpio/gpio-dwapb.c
F: Documentation/devicetree/bindings/gpio/snps-dwapb-gpio.txt
+F: drivers/gpio/gpio-dwapb.c
SYNOPSYS DESIGNWARE AXI DMAC DRIVER
M: Eugeniy Paltsev <Eugeniy.Paltsev@synopsys.com>
@@ -16201,7 +16217,7 @@
SYSV FILESYSTEM
M: Christoph Hellwig <hch@infradead.org>
S: Maintained
-F: Documentation/filesystems/sysv-fs.txt
+F: Documentation/filesystems/sysv-fs.rst
F: fs/sysv/
F: include/linux/sysv_fs.h
@@ -16572,8 +16588,8 @@
M: Mika Westerberg <mika.westerberg@linux.intel.com>
M: Yehezkel Bernat <YehezkelShB@gmail.com>
L: linux-usb@vger.kernel.org
-T: git git://git.kernel.org/pub/scm/linux/kernel/git/westeri/thunderbolt.git
S: Maintained
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/westeri/thunderbolt.git
F: Documentation/admin-guide/thunderbolt.rst
F: drivers/thunderbolt/
F: include/linux/thunderbolt.h
@@ -17066,7 +17082,7 @@
T: git git://git.kernel.org/pub/scm/linux/kernel/git/rw/ubifs.git fixes
W: http://www.linux-mtd.infradead.org/doc/ubifs.html
S: Supported
-F: Documentation/filesystems/ubifs.txt
+F: Documentation/filesystems/ubifs.rst
F: fs/ubifs/
UCLINUX (M68KNOMMU AND COLDFIRE)
@@ -17085,7 +17101,7 @@
UDF FILESYSTEM
M: Jan Kara <jack@suse.com>
S: Maintained
-F: Documentation/filesystems/udf.txt
+F: Documentation/filesystems/udf.rst
F: fs/udf/
UDRAW TABLET
@@ -17100,7 +17116,7 @@
F: Documentation/admin-guide/ufs.rst
F: fs/ufs/
-UHID USERSPACE HID IO DRIVER:
+UHID USERSPACE HID IO DRIVER
M: David Herrmann <dh.herrmann@googlemail.com>
L: linux-input@vger.kernel.org
S: Maintained
@@ -17114,18 +17130,18 @@
F: drivers/usb/common/ulpi.c
F: include/linux/ulpi/
-ULTRA-WIDEBAND (UWB) SUBSYSTEM:
+ULTRA-WIDEBAND (UWB) SUBSYSTEM
L: devel@driverdev.osuosl.org
S: Obsolete
F: drivers/staging/uwb/
-UNICODE SUBSYSTEM:
+UNICODE SUBSYSTEM
M: Gabriel Krisman Bertazi <krisman@collabora.com>
L: linux-fsdevel@vger.kernel.org
S: Supported
F: fs/unicode/
-UNICORE32 ARCHITECTURE:
+UNICORE32 ARCHITECTURE
M: Guan Xuetao <gxt@pku.edu.cn>
W: http://mprc.pku.edu.cn/~guanxuetao/linux
S: Maintained
@@ -17412,11 +17428,14 @@
F: include/linux/usb.h
F: include/linux/usb/
-USB TYPEC PI3USB30532 MUX DRIVER
-M: Hans de Goede <hdegoede@redhat.com>
+USB TYPEC BUS FOR ALTERNATE MODES
+M: Heikki Krogerus <heikki.krogerus@linux.intel.com>
L: linux-usb@vger.kernel.org
S: Maintained
-F: drivers/usb/typec/mux/pi3usb30532.c
+F: Documentation/ABI/testing/sysfs-bus-typec
+F: Documentation/driver-api/usb/typec_bus.rst
+F: drivers/usb/typec/altmodes/
+F: include/linux/usb/typec_altmode.h
USB TYPEC CLASS
M: Heikki Krogerus <heikki.krogerus@linux.intel.com>
@@ -17427,14 +17446,11 @@
F: drivers/usb/typec/
F: include/linux/usb/typec.h
-USB TYPEC BUS FOR ALTERNATE MODES
-M: Heikki Krogerus <heikki.krogerus@linux.intel.com>
+USB TYPEC PI3USB30532 MUX DRIVER
+M: Hans de Goede <hdegoede@redhat.com>
L: linux-usb@vger.kernel.org
S: Maintained
-F: Documentation/ABI/testing/sysfs-bus-typec
-F: Documentation/driver-api/usb/typec_bus.rst
-F: drivers/usb/typec/altmodes/
-F: include/linux/usb/typec_altmode.h
+F: drivers/usb/typec/mux/pi3usb30532.c
USB TYPEC PORT CONTROLLER DRIVERS
M: Guenter Roeck <linux@roeck-us.net>
@@ -17811,7 +17827,7 @@
F: include/uapi/linux/vbox*.h
F: drivers/virt/vboxguest/
-VIRTUAL BOX SHARED FOLDER VFS DRIVER:
+VIRTUAL BOX SHARED FOLDER VFS DRIVER
M: Hans de Goede <hdegoede@redhat.com>
L: linux-fsdevel@vger.kernel.org
S: Maintained
@@ -18434,8 +18450,8 @@
M: Semi Malinen <semi.malinen@ge.com>
L: linux-gpio@vger.kernel.org
S: Maintained
-F: drivers/gpio/gpio-xra1403.c
F: Documentation/devicetree/bindings/gpio/gpio-xra1403.txt
+F: drivers/gpio/gpio-xra1403.c
XTENSA XTFPGA PLATFORM SUPPORT
M: Max Filippov <jcmvbkbc@gmail.com>
@@ -18524,7 +18540,7 @@
T: git git://git.kernel.org/pub/scm/linux/kernel/git/dlemoal/zonefs.git
S: Maintained
F: fs/zonefs/
-F: Documentation/filesystems/zonefs.txt
+F: Documentation/filesystems/zonefs.rst
ZPOOL COMPRESSED PAGE STORAGE API
M: Dan Streetman <ddstreet@ieee.org>
diff --git a/Makefile b/Makefile
index 84b7184..4d0711f 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
VERSION = 5
PATCHLEVEL = 6
SUBLEVEL = 0
-EXTRAVERSION = -rc1
+EXTRAVERSION =
NAME = Kleptomaniac Octopus
# *DOCUMENTATION*
@@ -68,6 +68,7 @@
#
# If KBUILD_VERBOSE equals 0 then the above command will be hidden.
# If KBUILD_VERBOSE equals 1 then the above command is displayed.
+# If KBUILD_VERBOSE equals 2 then give the reason why each target is rebuilt.
#
# To put more focus on warnings, be less verbose as default
# Use 'make V=1' to see the full commands
@@ -1238,7 +1239,7 @@
%.dtb: include/config/kernel.release scripts_dtc
$(Q)$(MAKE) $(build)=$(dtstree) $(dtstree)/$@
-PHONY += dtbs dtbs_install dt_binding_check
+PHONY += dtbs dtbs_install dtbs_check
dtbs dtbs_check: include/config/kernel.release scripts_dtc
$(Q)$(MAKE) $(build)=$(dtstree)
@@ -1258,6 +1259,7 @@
scripts_dtc: scripts_basic
$(Q)$(MAKE) $(build)=scripts/dtc
+PHONY += dt_binding_check
dt_binding_check: scripts_dtc
$(Q)$(MAKE) $(build)=Documentation/devicetree/bindings
@@ -1802,7 +1804,7 @@
-include $(foreach f,$(existing-targets),$(dir $(f)).$(notdir $(f)).cmd)
-endif # config-targets
+endif # config-build
endif # mixed-build
endif # need-sub-make
diff --git a/arch/Kconfig b/arch/Kconfig
index 98de654..17fe351 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -738,8 +738,9 @@
config HAVE_RELIABLE_STACKTRACE
bool
help
- Architecture has a save_stack_trace_tsk_reliable() function which
- only returns a stack trace if it can guarantee the trace is reliable.
+ Architecture has either save_stack_trace_tsk_reliable() or
+ arch_stack_walk_reliable() function which only returns a stack trace
+ if it can guarantee the trace is reliable.
config HAVE_ARCH_HASH
bool
diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig
index ff2a393..7124ab8 100644
--- a/arch/arc/Kconfig
+++ b/arch/arc/Kconfig
@@ -154,7 +154,7 @@
help
Support for ARC HS38x Cores based on ARCv2 ISA
The notable features are:
- - SMP configurations of upto 4 core with coherency
+ - SMP configurations of up to 4 cores with coherency
- Optional L2 Cache and IO-Coherency
- Revised Interrupt Architecture (multiple priorites, reg banks,
auto stack switch, auto regfile save/restore)
@@ -192,7 +192,7 @@
help
In SMP configuration cores can be configured as Halt-on-reset
or they could all start at same time. For Halt-on-reset, non
- masters are parked until Master kicks them so they can start of
+ masters are parked until Master kicks them so they can start off
at designated entry point. For other case, all jump to common
entry point and spin wait for Master's signal.
diff --git a/arch/arc/configs/nps_defconfig b/arch/arc/configs/nps_defconfig
index 07f26ed..f7a978d 100644
--- a/arch/arc/configs/nps_defconfig
+++ b/arch/arc/configs/nps_defconfig
@@ -21,8 +21,6 @@
CONFIG_MODULE_FORCE_LOAD=y
CONFIG_MODULE_UNLOAD=y
# CONFIG_BLK_DEV_BSG is not set
-# CONFIG_IOSCHED_DEADLINE is not set
-# CONFIG_IOSCHED_CFQ is not set
CONFIG_ARC_PLAT_EZNPS=y
CONFIG_SMP=y
CONFIG_NR_CPUS=4096
diff --git a/arch/arc/configs/nsimosci_defconfig b/arch/arc/configs/nsimosci_defconfig
index 5dd470b..bf39a00 100644
--- a/arch/arc/configs/nsimosci_defconfig
+++ b/arch/arc/configs/nsimosci_defconfig
@@ -20,8 +20,6 @@
CONFIG_KPROBES=y
CONFIG_MODULES=y
# CONFIG_BLK_DEV_BSG is not set
-# CONFIG_IOSCHED_DEADLINE is not set
-# CONFIG_IOSCHED_CFQ is not set
CONFIG_ARC_BUILTIN_DTB_NAME="nsimosci"
# CONFIG_COMPACTION is not set
CONFIG_NET=y
diff --git a/arch/arc/configs/nsimosci_hs_defconfig b/arch/arc/configs/nsimosci_hs_defconfig
index 3532e86..7121bd7 100644
--- a/arch/arc/configs/nsimosci_hs_defconfig
+++ b/arch/arc/configs/nsimosci_hs_defconfig
@@ -19,8 +19,6 @@
CONFIG_KPROBES=y
CONFIG_MODULES=y
# CONFIG_BLK_DEV_BSG is not set
-# CONFIG_IOSCHED_DEADLINE is not set
-# CONFIG_IOSCHED_CFQ is not set
CONFIG_ISA_ARCV2=y
CONFIG_ARC_BUILTIN_DTB_NAME="nsimosci_hs"
# CONFIG_COMPACTION is not set
diff --git a/arch/arc/configs/nsimosci_hs_smp_defconfig b/arch/arc/configs/nsimosci_hs_smp_defconfig
index d90448b..f9863b2 100644
--- a/arch/arc/configs/nsimosci_hs_smp_defconfig
+++ b/arch/arc/configs/nsimosci_hs_smp_defconfig
@@ -14,8 +14,6 @@
CONFIG_KPROBES=y
CONFIG_MODULES=y
# CONFIG_BLK_DEV_BSG is not set
-# CONFIG_IOSCHED_DEADLINE is not set
-# CONFIG_IOSCHED_CFQ is not set
CONFIG_ISA_ARCV2=y
CONFIG_SMP=y
# CONFIG_ARC_TIMERS_64BIT is not set
diff --git a/arch/arc/include/asm/fpu.h b/arch/arc/include/asm/fpu.h
index 6434725..006bcf8 100644
--- a/arch/arc/include/asm/fpu.h
+++ b/arch/arc/include/asm/fpu.h
@@ -43,6 +43,8 @@ extern void fpu_init_task(struct pt_regs *regs);
#endif /* !CONFIG_ISA_ARCOMPACT */
+struct task_struct;
+
extern void fpu_save_restore(struct task_struct *p, struct task_struct *n);
#else /* !CONFIG_ARC_FPU_SAVE_RESTORE */
diff --git a/arch/arc/include/asm/linkage.h b/arch/arc/include/asm/linkage.h
index d9ee43c6..fe19f1d 100644
--- a/arch/arc/include/asm/linkage.h
+++ b/arch/arc/include/asm/linkage.h
@@ -29,6 +29,8 @@
.endm
#define ASM_NL ` /* use '`' to mark new line in macro */
+#define __ALIGN .align 4
+#define __ALIGN_STR __stringify(__ALIGN)
/* annotation for data we want in DCCM - if enabled in .config */
.macro ARCFP_DATA nm
diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c
index e1c6474..aa41af6 100644
--- a/arch/arc/kernel/setup.c
+++ b/arch/arc/kernel/setup.c
@@ -8,11 +8,11 @@
#include <linux/delay.h>
#include <linux/root_dev.h>
#include <linux/clk.h>
-#include <linux/clk-provider.h>
#include <linux/clocksource.h>
#include <linux/console.h>
#include <linux/module.h>
#include <linux/cpu.h>
+#include <linux/of_clk.h>
#include <linux/of_fdt.h>
#include <linux/of.h>
#include <linux/cache.h>
diff --git a/arch/arc/kernel/troubleshoot.c b/arch/arc/kernel/troubleshoot.c
index b79886a..d299950 100644
--- a/arch/arc/kernel/troubleshoot.c
+++ b/arch/arc/kernel/troubleshoot.c
@@ -104,8 +104,7 @@ static void show_faulting_vma(unsigned long address)
if (IS_ERR(nm))
nm = "?";
}
- pr_info(" @off 0x%lx in [%s]\n"
- " VMA: 0x%08lx to 0x%08lx\n",
+ pr_info(" @off 0x%lx in [%s] VMA: 0x%08lx to 0x%08lx\n",
vma->vm_start < TASK_UNMAPPED_BASE ?
address : address - vma->vm_start,
nm, vma->vm_start, vma->vm_end);
@@ -120,8 +119,6 @@ static void show_ecr_verbose(struct pt_regs *regs)
unsigned int vec, cause_code;
unsigned long address;
- pr_info("\n[ECR ]: 0x%08lx => ", regs->event);
-
/* For Data fault, this is data address not instruction addr */
address = current->thread.fault_address;
@@ -130,10 +127,10 @@ static void show_ecr_verbose(struct pt_regs *regs)
/* For DTLB Miss or ProtV, display the memory involved too */
if (vec == ECR_V_DTLB_MISS) {
- pr_cont("Invalid %s @ 0x%08lx by insn @ 0x%08lx\n",
+ pr_cont("Invalid %s @ 0x%08lx by insn @ %pS\n",
(cause_code == 0x01) ? "Read" :
((cause_code == 0x02) ? "Write" : "EX"),
- address, regs->ret);
+ address, (void *)regs->ret);
} else if (vec == ECR_V_ITLB_MISS) {
pr_cont("Insn could not be fetched\n");
} else if (vec == ECR_V_MACH_CHK) {
@@ -191,31 +188,31 @@ void show_regs(struct pt_regs *regs)
show_ecr_verbose(regs);
- pr_info("[EFA ]: 0x%08lx\n[BLINK ]: %pS\n[ERET ]: %pS\n",
- current->thread.fault_address,
- (void *)regs->blink, (void *)regs->ret);
-
if (user_mode(regs))
show_faulting_vma(regs->ret); /* faulting code, not data */
- pr_info("[STAT32]: 0x%08lx", regs->status32);
+ pr_info("ECR: 0x%08lx EFA: 0x%08lx ERET: 0x%08lx\n",
+ regs->event, current->thread.fault_address, regs->ret);
+
+ pr_info("STAT32: 0x%08lx", regs->status32);
#define STS_BIT(r, bit) r->status32 & STATUS_##bit##_MASK ? #bit" " : ""
#ifdef CONFIG_ISA_ARCOMPACT
- pr_cont(" : %2s%2s%2s%2s%2s%2s%2s\n",
+ pr_cont(" [%2s%2s%2s%2s%2s%2s%2s]",
(regs->status32 & STATUS_U_MASK) ? "U " : "K ",
STS_BIT(regs, DE), STS_BIT(regs, AE),
STS_BIT(regs, A2), STS_BIT(regs, A1),
STS_BIT(regs, E2), STS_BIT(regs, E1));
#else
- pr_cont(" : %2s%2s%2s%2s\n",
+ pr_cont(" [%2s%2s%2s%2s]",
STS_BIT(regs, IE),
(regs->status32 & STATUS_U_MASK) ? "U " : "K ",
STS_BIT(regs, DE), STS_BIT(regs, AE));
#endif
- pr_info("BTA: 0x%08lx\t SP: 0x%08lx\t FP: 0x%08lx\n",
- regs->bta, regs->sp, regs->fp);
+ pr_cont(" BTA: 0x%08lx\n", regs->bta);
+ pr_info("BLK: %pS\n SP: 0x%08lx FP: 0x%08lx\n",
+ (void *)regs->blink, regs->sp, regs->fp);
pr_info("LPS: 0x%08lx\tLPE: 0x%08lx\tLPC: 0x%08lx\n",
regs->lp_start, regs->lp_end, regs->lp_count);
diff --git a/arch/arm/Makefile b/arch/arm/Makefile
index db857d0..1fc32b6 100644
--- a/arch/arm/Makefile
+++ b/arch/arm/Makefile
@@ -307,13 +307,15 @@
ifeq ($(CONFIG_STACKPROTECTOR_PER_TASK),y)
prepare: stack_protector_prepare
stack_protector_prepare: prepare0
- $(eval KBUILD_CFLAGS += \
+ $(eval SSP_PLUGIN_CFLAGS := \
-fplugin-arg-arm_ssp_per_task_plugin-tso=$(shell \
awk '{if ($$2 == "THREAD_SZ_ORDER") print $$3;}'\
include/generated/asm-offsets.h) \
-fplugin-arg-arm_ssp_per_task_plugin-offset=$(shell \
awk '{if ($$2 == "TI_STACK_CANARY") print $$3;}'\
include/generated/asm-offsets.h))
+ $(eval KBUILD_CFLAGS += $(SSP_PLUGIN_CFLAGS))
+ $(eval GCC_PLUGINS_CFLAGS += $(SSP_PLUGIN_CFLAGS))
endif
all: $(notdir $(KBUILD_IMAGE))
diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile
index da599c3..9c11e74 100644
--- a/arch/arm/boot/compressed/Makefile
+++ b/arch/arm/boot/compressed/Makefile
@@ -101,7 +101,6 @@
$(libfdt) $(libfdt_hdrs) hyp-stub.S
KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING
-KBUILD_CFLAGS += $(DISABLE_ARM_SSP_PER_TASK_PLUGIN)
ifeq ($(CONFIG_FUNCTION_TRACER),y)
ORIG_CFLAGS := $(KBUILD_CFLAGS)
@@ -117,7 +116,8 @@
CFLAGS_fdt_rw.o := $(nossp-flags-y)
CFLAGS_fdt_wip.o := $(nossp-flags-y)
-ccflags-y := -fpic $(call cc-option,-mno-single-pic-base,) -fno-builtin -I$(obj)
+ccflags-y := -fpic $(call cc-option,-mno-single-pic-base,) -fno-builtin \
+ -I$(obj) $(DISABLE_ARM_SSP_PER_TASK_PLUGIN)
asflags-y := -DZIMAGE
# Supply kernel BSS size to the decompressor via a linker symbol.
diff --git a/arch/arm/boot/dts/am437x-idk-evm.dts b/arch/arm/boot/dts/am437x-idk-evm.dts
index f3ced6d..9f66f96 100644
--- a/arch/arm/boot/dts/am437x-idk-evm.dts
+++ b/arch/arm/boot/dts/am437x-idk-evm.dts
@@ -526,11 +526,11 @@ &cpu0_opp_table {
* Supply voltage supervisor on board will not allow opp50 so
* disable it and set opp100 as suspend OPP.
*/
- opp50@300000000 {
+ opp50-300000000 {
status = "disabled";
};
- opp100@600000000 {
+ opp100-600000000 {
opp-suspend;
};
};
diff --git a/arch/arm/boot/dts/bcm2711-rpi-4-b.dts b/arch/arm/boot/dts/bcm2711-rpi-4-b.dts
index 1b5a835..efea891 100644
--- a/arch/arm/boot/dts/bcm2711-rpi-4-b.dts
+++ b/arch/arm/boot/dts/bcm2711-rpi-4-b.dts
@@ -21,6 +21,7 @@ memory@0 {
aliases {
ethernet0 = &genet;
+ pcie0 = &pcie0;
};
leds {
@@ -31,6 +32,8 @@ act {
pwr {
label = "PWR";
gpios = <&expgpio 2 GPIO_ACTIVE_LOW>;
+ default-state = "keep";
+ linux,default-trigger = "default-on";
};
};
diff --git a/arch/arm/boot/dts/bcm2835-rpi-zero-w.dts b/arch/arm/boot/dts/bcm2835-rpi-zero-w.dts
index b75af21..4c3f606 100644
--- a/arch/arm/boot/dts/bcm2835-rpi-zero-w.dts
+++ b/arch/arm/boot/dts/bcm2835-rpi-zero-w.dts
@@ -112,6 +112,7 @@ &hdmi {
&sdhci {
#address-cells = <1>;
#size-cells = <0>;
+ pinctrl-names = "default";
pinctrl-0 = <&emmc_gpio34 &gpclk2_gpio43>;
bus-width = <4>;
mmc-pwrseq = <&wifi_pwrseq>;
diff --git a/arch/arm/boot/dts/bcm2835-rpi.dtsi b/arch/arm/boot/dts/bcm2835-rpi.dtsi
index 394c8a7..fd2c766e 100644
--- a/arch/arm/boot/dts/bcm2835-rpi.dtsi
+++ b/arch/arm/boot/dts/bcm2835-rpi.dtsi
@@ -15,6 +15,7 @@ soc {
firmware: firmware {
compatible = "raspberrypi,bcm2835-firmware", "simple-bus";
mboxes = <&mailbox>;
+ dma-ranges;
};
power: power {
diff --git a/arch/arm/boot/dts/bcm2837-rpi-3-a-plus.dts b/arch/arm/boot/dts/bcm2837-rpi-3-a-plus.dts
index 66ab35e..28be033 100644
--- a/arch/arm/boot/dts/bcm2837-rpi-3-a-plus.dts
+++ b/arch/arm/boot/dts/bcm2837-rpi-3-a-plus.dts
@@ -26,6 +26,8 @@ act {
pwr {
label = "PWR";
gpios = <&expgpio 2 GPIO_ACTIVE_LOW>;
+ default-state = "keep";
+ linux,default-trigger = "default-on";
};
};
};
diff --git a/arch/arm/boot/dts/bcm2837-rpi-3-b-plus.dts b/arch/arm/boot/dts/bcm2837-rpi-3-b-plus.dts
index 74ed6d0..3734314 100644
--- a/arch/arm/boot/dts/bcm2837-rpi-3-b-plus.dts
+++ b/arch/arm/boot/dts/bcm2837-rpi-3-b-plus.dts
@@ -27,6 +27,8 @@ act {
pwr {
label = "PWR";
gpios = <&expgpio 2 GPIO_ACTIVE_LOW>;
+ default-state = "keep";
+ linux,default-trigger = "default-on";
};
};
diff --git a/arch/arm/boot/dts/dm8148-evm.dts b/arch/arm/boot/dts/dm8148-evm.dts
index 3931fb0..91d1018 100644
--- a/arch/arm/boot/dts/dm8148-evm.dts
+++ b/arch/arm/boot/dts/dm8148-evm.dts
@@ -24,12 +24,12 @@ vmmcsd_fixed: fixedregulator0 {
&cpsw_emac0 {
phy-handle = <ðphy0>;
- phy-mode = "rgmii";
+ phy-mode = "rgmii-id";
};
&cpsw_emac1 {
phy-handle = <ðphy1>;
- phy-mode = "rgmii";
+ phy-mode = "rgmii-id";
};
&davinci_mdio {
diff --git a/arch/arm/boot/dts/dm8148-t410.dts b/arch/arm/boot/dts/dm8148-t410.dts
index 9e43d5e..79ccdd4 100644
--- a/arch/arm/boot/dts/dm8148-t410.dts
+++ b/arch/arm/boot/dts/dm8148-t410.dts
@@ -33,12 +33,12 @@ vmmcsd_fixed: fixedregulator0 {
&cpsw_emac0 {
phy-handle = <ðphy0>;
- phy-mode = "rgmii";
+ phy-mode = "rgmii-id";
};
&cpsw_emac1 {
phy-handle = <ðphy1>;
- phy-mode = "rgmii";
+ phy-mode = "rgmii-id";
};
&davinci_mdio {
diff --git a/arch/arm/boot/dts/dra62x-j5eco-evm.dts b/arch/arm/boot/dts/dra62x-j5eco-evm.dts
index 861ab90..c16e183 100644
--- a/arch/arm/boot/dts/dra62x-j5eco-evm.dts
+++ b/arch/arm/boot/dts/dra62x-j5eco-evm.dts
@@ -24,12 +24,12 @@ vmmcsd_fixed: fixedregulator0 {
&cpsw_emac0 {
phy-handle = <ðphy0>;
- phy-mode = "rgmii";
+ phy-mode = "rgmii-id";
};
&cpsw_emac1 {
phy-handle = <ðphy1>;
- phy-mode = "rgmii";
+ phy-mode = "rgmii-id";
};
&davinci_mdio {
diff --git a/arch/arm/boot/dts/dra7-evm.dts b/arch/arm/boot/dts/dra7-evm.dts
index de7f85e..af06a55 100644
--- a/arch/arm/boot/dts/dra7-evm.dts
+++ b/arch/arm/boot/dts/dra7-evm.dts
@@ -61,10 +61,10 @@ aic_dvdd: fixedregulator-aic_dvdd {
regulator-max-microvolt = <1800000>;
};
- evm_3v3: fixedregulator-evm3v3 {
+ vsys_3v3: fixedregulator-vsys3v3 {
/* Output of Cntlr A of TPS43351-Q1 on dra7-evm */
compatible = "regulator-fixed";
- regulator-name = "evm_3v3";
+ regulator-name = "vsys_3v3";
regulator-min-microvolt = <3300000>;
regulator-max-microvolt = <3300000>;
vin-supply = <&evm_12v0>;
diff --git a/arch/arm/boot/dts/dra7-l4.dtsi b/arch/arm/boot/dts/dra7-l4.dtsi
index fc41883..2119a78 100644
--- a/arch/arm/boot/dts/dra7-l4.dtsi
+++ b/arch/arm/boot/dts/dra7-l4.dtsi
@@ -3474,6 +3474,7 @@ timer13: timer@0 {
clocks = <&l4per3_clkctrl DRA7_L4PER3_TIMER13_CLKCTRL 24>;
clock-names = "fck";
interrupts = <GIC_SPI 339 IRQ_TYPE_LEVEL_HIGH>;
+ ti,timer-pwm;
};
};
@@ -3501,6 +3502,7 @@ timer14: timer@0 {
clocks = <&l4per3_clkctrl DRA7_L4PER3_TIMER14_CLKCTRL 24>;
clock-names = "fck";
interrupts = <GIC_SPI 340 IRQ_TYPE_LEVEL_HIGH>;
+ ti,timer-pwm;
};
};
@@ -3528,6 +3530,7 @@ timer15: timer@0 {
clocks = <&l4per3_clkctrl DRA7_L4PER3_TIMER15_CLKCTRL 24>;
clock-names = "fck";
interrupts = <GIC_SPI 341 IRQ_TYPE_LEVEL_HIGH>;
+ ti,timer-pwm;
};
};
@@ -3555,6 +3558,7 @@ timer16: timer@0 {
clocks = <&l4per3_clkctrl DRA7_L4PER3_TIMER16_CLKCTRL 24>;
clock-names = "fck";
interrupts = <GIC_SPI 342 IRQ_TYPE_LEVEL_HIGH>;
+ ti,timer-pwm;
};
};
diff --git a/arch/arm/boot/dts/dra7.dtsi b/arch/arm/boot/dts/dra7.dtsi
index d78b684..5f5ee16 100644
--- a/arch/arm/boot/dts/dra7.dtsi
+++ b/arch/arm/boot/dts/dra7.dtsi
@@ -148,6 +148,7 @@ ocp {
#address-cells = <1>;
#size-cells = <1>;
ranges = <0x0 0x0 0x0 0xc0000000>;
+ dma-ranges = <0x80000000 0x0 0x80000000 0x80000000>;
ti,hwmods = "l3_main_1", "l3_main_2";
reg = <0x0 0x44000000 0x0 0x1000000>,
<0x0 0x45000000 0x0 0x1000>;
@@ -184,6 +185,7 @@ pcie1_rc: pcie@51000000 {
device_type = "pci";
ranges = <0x81000000 0 0 0x03000 0 0x00010000
0x82000000 0 0x20013000 0x13000 0 0xffed000>;
+ dma-ranges = <0x02000000 0x0 0x00000000 0x00000000 0x1 0x00000000>;
bus-range = <0x00 0xff>;
#interrupt-cells = <1>;
num-lanes = <1>;
@@ -238,6 +240,7 @@ pcie2_rc: pcie@51800000 {
device_type = "pci";
ranges = <0x81000000 0 0 0x03000 0 0x00010000
0x82000000 0 0x30013000 0x13000 0 0xffed000>;
+ dma-ranges = <0x02000000 0x0 0x00000000 0x00000000 0x1 0x00000000>;
bus-range = <0x00 0xff>;
#interrupt-cells = <1>;
num-lanes = <1>;
diff --git a/arch/arm/boot/dts/dra76x.dtsi b/arch/arm/boot/dts/dra76x.dtsi
index 2f7539a..42b8a205 100644
--- a/arch/arm/boot/dts/dra76x.dtsi
+++ b/arch/arm/boot/dts/dra76x.dtsi
@@ -128,3 +128,8 @@ &rtctarget {
&usb4_tm {
status = "disabled";
};
+
+&mmc3 {
+ /* dra76x is not affected by i887 */
+ max-frequency = <96000000>;
+};
diff --git a/arch/arm/boot/dts/dra7xx-clocks.dtsi b/arch/arm/boot/dts/dra7xx-clocks.dtsi
index 55cef4c..dc0a93b 100644
--- a/arch/arm/boot/dts/dra7xx-clocks.dtsi
+++ b/arch/arm/boot/dts/dra7xx-clocks.dtsi
@@ -796,16 +796,6 @@ video2_div_clk: video2_div_clk {
clock-div = <1>;
};
- ipu1_gfclk_mux: ipu1_gfclk_mux@520 {
- #clock-cells = <0>;
- compatible = "ti,mux-clock";
- clocks = <&dpll_abe_m2x2_ck>, <&dpll_core_h22x2_ck>;
- ti,bit-shift = <24>;
- reg = <0x0520>;
- assigned-clocks = <&ipu1_gfclk_mux>;
- assigned-clock-parents = <&dpll_core_h22x2_ck>;
- };
-
dummy_ck: dummy_ck {
#clock-cells = <0>;
compatible = "fixed-clock";
@@ -1564,6 +1554,8 @@ ipu1_clkctrl: ipu1-clkctrl@20 {
compatible = "ti,clkctrl";
reg = <0x20 0x4>;
#clock-cells = <2>;
+ assigned-clocks = <&ipu1_clkctrl DRA7_IPU1_MMU_IPU1_CLKCTRL 24>;
+ assigned-clock-parents = <&dpll_core_h22x2_ck>;
};
ipu_clkctrl: ipu-clkctrl@50 {
diff --git a/arch/arm/boot/dts/exynos4412-galaxy-s3.dtsi b/arch/arm/boot/dts/exynos4412-galaxy-s3.dtsi
index 31719c0..44f9754 100644
--- a/arch/arm/boot/dts/exynos4412-galaxy-s3.dtsi
+++ b/arch/arm/boot/dts/exynos4412-galaxy-s3.dtsi
@@ -33,7 +33,7 @@ flash-led {
};
};
- lcd_vdd3_reg: voltage-regulator-6 {
+ lcd_vdd3_reg: voltage-regulator-7 {
compatible = "regulator-fixed";
regulator-name = "LCD_VDD_2.2V";
regulator-min-microvolt = <2200000>;
@@ -42,7 +42,7 @@ lcd_vdd3_reg: voltage-regulator-6 {
enable-active-high;
};
- ps_als_reg: voltage-regulator-7 {
+ ps_als_reg: voltage-regulator-8 {
compatible = "regulator-fixed";
regulator-name = "LED_A_3.0V";
regulator-min-microvolt = <3000000>;
diff --git a/arch/arm/boot/dts/exynos4412-n710x.dts b/arch/arm/boot/dts/exynos4412-n710x.dts
index 98cd128..4189e1f 100644
--- a/arch/arm/boot/dts/exynos4412-n710x.dts
+++ b/arch/arm/boot/dts/exynos4412-n710x.dts
@@ -13,7 +13,7 @@ memory@40000000 {
/* bootargs are passed in by bootloader */
- cam_vdda_reg: voltage-regulator-6 {
+ cam_vdda_reg: voltage-regulator-7 {
compatible = "regulator-fixed";
regulator-name = "CAM_SENSOR_CORE_1.2V";
regulator-min-microvolt = <1200000>;
diff --git a/arch/arm/boot/dts/imx6dl-colibri-eval-v3.dts b/arch/arm/boot/dts/imx6dl-colibri-eval-v3.dts
index cd07562..84fcc20 100644
--- a/arch/arm/boot/dts/imx6dl-colibri-eval-v3.dts
+++ b/arch/arm/boot/dts/imx6dl-colibri-eval-v3.dts
@@ -275,7 +275,7 @@ &weim {
/* SRAM on Colibri nEXT_CS0 */
sram@0,0 {
- compatible = "cypress,cy7c1019dv33-10zsxi, mtd-ram";
+ compatible = "cypress,cy7c1019dv33-10zsxi", "mtd-ram";
reg = <0 0 0x00010000>;
#address-cells = <1>;
#size-cells = <1>;
@@ -286,7 +286,7 @@ sram@0,0 {
/* SRAM on Colibri nEXT_CS1 */
sram@1,0 {
- compatible = "cypress,cy7c1019dv33-10zsxi, mtd-ram";
+ compatible = "cypress,cy7c1019dv33-10zsxi", "mtd-ram";
reg = <1 0 0x00010000>;
#address-cells = <1>;
#size-cells = <1>;
diff --git a/arch/arm/boot/dts/imx6qdl-phytec-phycore-som.dtsi b/arch/arm/boot/dts/imx6qdl-phytec-phycore-som.dtsi
index 978dc1c..77d8713 100644
--- a/arch/arm/boot/dts/imx6qdl-phytec-phycore-som.dtsi
+++ b/arch/arm/boot/dts/imx6qdl-phytec-phycore-som.dtsi
@@ -112,7 +112,7 @@ watchdog {
regulators {
vdd_arm: buck1 {
regulator-name = "vdd_arm";
- regulator-min-microvolt = <730000>;
+ regulator-min-microvolt = <925000>;
regulator-max-microvolt = <1380000>;
regulator-initial-mode = <DA9063_BUCK_MODE_SYNC>;
regulator-always-on;
@@ -120,7 +120,7 @@ vdd_arm: buck1 {
vdd_soc: buck2 {
regulator-name = "vdd_soc";
- regulator-min-microvolt = <730000>;
+ regulator-min-microvolt = <1150000>;
regulator-max-microvolt = <1380000>;
regulator-initial-mode = <DA9063_BUCK_MODE_SYNC>;
regulator-always-on;
@@ -192,7 +192,6 @@ &usdhc4 {
pinctrl-0 = <&pinctrl_usdhc4>;
bus-width = <8>;
non-removable;
- vmmc-supply = <&vdd_emmc_1p8>;
status = "disabled";
};
diff --git a/arch/arm/boot/dts/imx7-colibri.dtsi b/arch/arm/boot/dts/imx7-colibri.dtsi
index d05be3f..04717cf 100644
--- a/arch/arm/boot/dts/imx7-colibri.dtsi
+++ b/arch/arm/boot/dts/imx7-colibri.dtsi
@@ -336,7 +336,6 @@ &usdhc3 {
assigned-clock-rates = <400000000>;
bus-width = <8>;
fsl,tuning-step = <2>;
- max-frequency = <100000000>;
vmmc-supply = <®_module_3v3>;
vqmmc-supply = <®_DCDC3>;
non-removable;
diff --git a/arch/arm/boot/dts/imx7d.dtsi b/arch/arm/boot/dts/imx7d.dtsi
index 92f6d0c..4c22828 100644
--- a/arch/arm/boot/dts/imx7d.dtsi
+++ b/arch/arm/boot/dts/imx7d.dtsi
@@ -44,7 +44,7 @@ opp-792000000 {
opp-hz = /bits/ 64 <792000000>;
opp-microvolt = <1000000>;
clock-latency-ns = <150000>;
- opp-supported-hw = <0xd>, <0xf>;
+ opp-supported-hw = <0xd>, <0x7>;
opp-suspend;
};
@@ -52,7 +52,7 @@ opp-996000000 {
opp-hz = /bits/ 64 <996000000>;
opp-microvolt = <1100000>;
clock-latency-ns = <150000>;
- opp-supported-hw = <0xc>, <0xf>;
+ opp-supported-hw = <0xc>, <0x7>;
opp-suspend;
};
@@ -60,7 +60,7 @@ opp-1200000000 {
opp-hz = /bits/ 64 <1200000000>;
opp-microvolt = <1225000>;
clock-latency-ns = <150000>;
- opp-supported-hw = <0x8>, <0xf>;
+ opp-supported-hw = <0x8>, <0x3>;
opp-suspend;
};
};
diff --git a/arch/arm/boot/dts/ls1021a.dtsi b/arch/arm/boot/dts/ls1021a.dtsi
index 0855b1f..760a68c 100644
--- a/arch/arm/boot/dts/ls1021a.dtsi
+++ b/arch/arm/boot/dts/ls1021a.dtsi
@@ -747,7 +747,7 @@ dcu: dcu@2ce0000 {
};
mdio0: mdio@2d24000 {
- compatible = "fsl,etsec2-mdio";
+ compatible = "gianfar";
device_type = "mdio";
#address-cells = <1>;
#size-cells = <0>;
@@ -756,7 +756,7 @@ mdio0: mdio@2d24000 {
};
mdio1: mdio@2d64000 {
- compatible = "fsl,etsec2-mdio";
+ compatible = "gianfar";
device_type = "mdio";
#address-cells = <1>;
#size-cells = <0>;
diff --git a/arch/arm/boot/dts/motorola-mapphone-common.dtsi b/arch/arm/boot/dts/motorola-mapphone-common.dtsi
index 8566550..9067e0e 100644
--- a/arch/arm/boot/dts/motorola-mapphone-common.dtsi
+++ b/arch/arm/boot/dts/motorola-mapphone-common.dtsi
@@ -182,6 +182,14 @@ vibrator {
pwm-names = "enable", "direction";
direction-duty-cycle-ns = <10000000>;
};
+
+ backlight: backlight {
+ compatible = "led-backlight";
+
+ leds = <&backlight_led>;
+ brightness-levels = <31 63 95 127 159 191 223 255>;
+ default-brightness-level = <6>;
+ };
};
&dss {
@@ -205,6 +213,8 @@ lcd0: display {
vddi-supply = <&lcd_regulator>;
reset-gpios = <&gpio4 5 GPIO_ACTIVE_HIGH>; /* gpio101 */
+ backlight = <&backlight>;
+
width-mm = <50>;
height-mm = <89>;
@@ -393,12 +403,11 @@ led-controller@38 {
ramp-up-us = <1024>;
ramp-down-us = <8193>;
- led@0 {
+ backlight_led: led@0 {
reg = <0>;
led-sources = <2>;
ti,led-mode = <0>;
label = ":backlight";
- linux,default-trigger = "backlight";
};
led@1 {
@@ -420,7 +429,7 @@ touchscreen@4a {
reset-gpios = <&gpio6 13 GPIO_ACTIVE_HIGH>; /* gpio173 */
/* gpio_183 with sys_nirq2 pad as wakeup */
- interrupts-extended = <&gpio6 23 IRQ_TYPE_EDGE_FALLING>,
+ interrupts-extended = <&gpio6 23 IRQ_TYPE_LEVEL_LOW>,
<&omap4_pmx_core 0x160>;
interrupt-names = "irq", "wakeup";
wakeup-source;
diff --git a/arch/arm/boot/dts/omap3-n900.dts b/arch/arm/boot/dts/omap3-n900.dts
index c3c6d7d..4089d97 100644
--- a/arch/arm/boot/dts/omap3-n900.dts
+++ b/arch/arm/boot/dts/omap3-n900.dts
@@ -854,34 +854,46 @@ onenand@0,0 {
compatible = "ti,omap2-onenand";
reg = <0 0 0x20000>; /* CS0, offset 0, IO size 128K */
+ /*
+ * These timings are based on CONFIG_OMAP_GPMC_DEBUG=y reported
+ * bootloader set values when booted with v5.1
+ * (OneNAND Manufacturer: Samsung):
+ *
+ * cs0 GPMC_CS_CONFIG1: 0xfb001202
+ * cs0 GPMC_CS_CONFIG2: 0x00111100
+ * cs0 GPMC_CS_CONFIG3: 0x00020200
+ * cs0 GPMC_CS_CONFIG4: 0x11001102
+ * cs0 GPMC_CS_CONFIG5: 0x03101616
+ * cs0 GPMC_CS_CONFIG6: 0x90060000
+ */
gpmc,sync-read;
gpmc,sync-write;
gpmc,burst-length = <16>;
gpmc,burst-read;
gpmc,burst-wrap;
gpmc,burst-write;
- gpmc,device-width = <2>; /* GPMC_DEVWIDTH_16BIT */
- gpmc,mux-add-data = <2>; /* GPMC_MUX_AD */
+ gpmc,device-width = <2>;
+ gpmc,mux-add-data = <2>;
gpmc,cs-on-ns = <0>;
- gpmc,cs-rd-off-ns = <87>;
- gpmc,cs-wr-off-ns = <87>;
+ gpmc,cs-rd-off-ns = <102>;
+ gpmc,cs-wr-off-ns = <102>;
gpmc,adv-on-ns = <0>;
- gpmc,adv-rd-off-ns = <10>;
- gpmc,adv-wr-off-ns = <10>;
- gpmc,oe-on-ns = <15>;
- gpmc,oe-off-ns = <87>;
+ gpmc,adv-rd-off-ns = <12>;
+ gpmc,adv-wr-off-ns = <12>;
+ gpmc,oe-on-ns = <12>;
+ gpmc,oe-off-ns = <102>;
gpmc,we-on-ns = <0>;
- gpmc,we-off-ns = <87>;
- gpmc,rd-cycle-ns = <112>;
- gpmc,wr-cycle-ns = <112>;
- gpmc,access-ns = <81>;
- gpmc,page-burst-access-ns = <15>;
+ gpmc,we-off-ns = <102>;
+ gpmc,rd-cycle-ns = <132>;
+ gpmc,wr-cycle-ns = <132>;
+ gpmc,access-ns = <96>;
+ gpmc,page-burst-access-ns = <18>;
gpmc,bus-turnaround-ns = <0>;
gpmc,cycle2cycle-delay-ns = <0>;
gpmc,wait-monitoring-ns = <0>;
- gpmc,clk-activation-ns = <5>;
- gpmc,wr-data-mux-bus-ns = <30>;
- gpmc,wr-access-ns = <81>;
+ gpmc,clk-activation-ns = <6>;
+ gpmc,wr-data-mux-bus-ns = <36>;
+ gpmc,wr-access-ns = <96>;
gpmc,sync-clk-ps = <15000>;
/*
diff --git a/arch/arm/boot/dts/omap5.dtsi b/arch/arm/boot/dts/omap5.dtsi
index d0ecf54..a7562d3 100644
--- a/arch/arm/boot/dts/omap5.dtsi
+++ b/arch/arm/boot/dts/omap5.dtsi
@@ -143,6 +143,7 @@ ocp {
#address-cells = <1>;
#size-cells = <1>;
ranges = <0 0 0 0xc0000000>;
+ dma-ranges = <0x80000000 0x0 0x80000000 0x80000000>;
ti,hwmods = "l3_main_1", "l3_main_2", "l3_main_3";
reg = <0 0x44000000 0 0x2000>,
<0 0x44800000 0 0x3000>,
diff --git a/arch/arm/boot/dts/ox810se.dtsi b/arch/arm/boot/dts/ox810se.dtsi
index 9f6c2b6..0755e58 100644
--- a/arch/arm/boot/dts/ox810se.dtsi
+++ b/arch/arm/boot/dts/ox810se.dtsi
@@ -323,8 +323,8 @@ intc: interrupt-controller@0 {
interrupt-controller;
reg = <0 0x200>;
#interrupt-cells = <1>;
- valid-mask = <0xFFFFFFFF>;
- clear-mask = <0>;
+ valid-mask = <0xffffffff>;
+ clear-mask = <0xffffffff>;
};
timer0: timer@200 {
diff --git a/arch/arm/boot/dts/ox820.dtsi b/arch/arm/boot/dts/ox820.dtsi
index c9b3277..90846a7 100644
--- a/arch/arm/boot/dts/ox820.dtsi
+++ b/arch/arm/boot/dts/ox820.dtsi
@@ -240,8 +240,8 @@ intc: interrupt-controller@0 {
reg = <0 0x200>;
interrupts = <GIC_SPI 5 IRQ_TYPE_LEVEL_HIGH>;
#interrupt-cells = <1>;
- valid-mask = <0xFFFFFFFF>;
- clear-mask = <0>;
+ valid-mask = <0xffffffff>;
+ clear-mask = <0xffffffff>;
};
timer0: timer@200 {
diff --git a/arch/arm/boot/dts/r8a7779.dtsi b/arch/arm/boot/dts/r8a7779.dtsi
index beb9885..c0999e2 100644
--- a/arch/arm/boot/dts/r8a7779.dtsi
+++ b/arch/arm/boot/dts/r8a7779.dtsi
@@ -377,7 +377,7 @@ tmu2: timer@ffd82000 {
};
sata: sata@fc600000 {
- compatible = "renesas,sata-r8a7779", "renesas,rcar-sata";
+ compatible = "renesas,sata-r8a7779";
reg = <0xfc600000 0x200000>;
interrupts = <GIC_SPI 100 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&mstp1_clks R8A7779_CLK_SATA>;
diff --git a/arch/arm/boot/dts/stih410-b2260.dts b/arch/arm/boot/dts/stih410-b2260.dts
index 4fbd8e9..e2bb597 100644
--- a/arch/arm/boot/dts/stih410-b2260.dts
+++ b/arch/arm/boot/dts/stih410-b2260.dts
@@ -178,9 +178,6 @@ ethernet0: dwmac@9630000 {
phy-mode = "rgmii";
pinctrl-0 = <&pinctrl_rgmii1 &pinctrl_rgmii1_mdio_1>;
- snps,phy-bus-name = "stmmac";
- snps,phy-bus-id = <0>;
- snps,phy-addr = <0>;
snps,reset-gpio = <&pio0 7 0>;
snps,reset-active-low;
snps,reset-delays-us = <0 10000 1000000>;
diff --git a/arch/arm/boot/dts/stihxxx-b2120.dtsi b/arch/arm/boot/dts/stihxxx-b2120.dtsi
index 60e1104..d051f08 100644
--- a/arch/arm/boot/dts/stihxxx-b2120.dtsi
+++ b/arch/arm/boot/dts/stihxxx-b2120.dtsi
@@ -46,7 +46,7 @@ simple-audio-card,dai-link@1 {
/* DAC */
format = "i2s";
mclk-fs = <256>;
- frame-inversion = <1>;
+ frame-inversion;
cpu {
sound-dai = <&sti_uni_player2>;
};
diff --git a/arch/arm/boot/dts/sun8i-a33.dtsi b/arch/arm/boot/dts/sun8i-a33.dtsi
index 1532a0e..a2c37ad 100644
--- a/arch/arm/boot/dts/sun8i-a33.dtsi
+++ b/arch/arm/boot/dts/sun8i-a33.dtsi
@@ -215,7 +215,7 @@ video-codec@1c0e000 {
};
crypto: crypto-engine@1c15000 {
- compatible = "allwinner,sun4i-a10-crypto";
+ compatible = "allwinner,sun8i-a33-crypto";
reg = <0x01c15000 0x1000>;
interrupts = <GIC_SPI 80 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&ccu CLK_BUS_SS>, <&ccu CLK_SS>;
diff --git a/arch/arm/boot/dts/sun8i-a83t-tbs-a711.dts b/arch/arm/boot/dts/sun8i-a83t-tbs-a711.dts
index 2fd31a0..e8b3669 100644
--- a/arch/arm/boot/dts/sun8i-a83t-tbs-a711.dts
+++ b/arch/arm/boot/dts/sun8i-a83t-tbs-a711.dts
@@ -374,8 +374,8 @@ ®_dldo2 {
};
®_dldo3 {
- regulator-min-microvolt = <2800000>;
- regulator-max-microvolt = <2800000>;
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <1800000>;
regulator-name = "vdd-csi";
};
@@ -498,7 +498,8 @@ &usb_otg {
};
&usbphy {
- usb0_id_det-gpios = <&pio 7 11 GPIO_ACTIVE_HIGH>; /* PH11 */
+ usb0_id_det-gpios = <&pio 7 11 (GPIO_ACTIVE_HIGH | GPIO_PULL_UP)>; /* PH11 */
+ usb0_vbus_power-supply = <&usb_power_supply>;
usb0_vbus-supply = <®_drivevbus>;
usb1_vbus-supply = <®_vmain>;
usb2_vbus-supply = <®_vmain>;
diff --git a/arch/arm/boot/dts/sun8i-a83t.dtsi b/arch/arm/boot/dts/sun8i-a83t.dtsi
index 74ac7ee..e7b9bef 100644
--- a/arch/arm/boot/dts/sun8i-a83t.dtsi
+++ b/arch/arm/boot/dts/sun8i-a83t.dtsi
@@ -1006,10 +1006,10 @@ emac: ethernet@1c30000 {
reg = <0x01c30000 0x104>;
interrupts = <GIC_SPI 82 IRQ_TYPE_LEVEL_HIGH>;
interrupt-names = "macirq";
- resets = <&ccu CLK_BUS_EMAC>;
- reset-names = "stmmaceth";
- clocks = <&ccu RST_BUS_EMAC>;
+ clocks = <&ccu CLK_BUS_EMAC>;
clock-names = "stmmaceth";
+ resets = <&ccu RST_BUS_EMAC>;
+ reset-names = "stmmaceth";
status = "disabled";
mdio: mdio {
diff --git a/arch/arm/boot/dts/sun8i-r40.dtsi b/arch/arm/boot/dts/sun8i-r40.dtsi
index 8f09a24..a9d5d6d 100644
--- a/arch/arm/boot/dts/sun8i-r40.dtsi
+++ b/arch/arm/boot/dts/sun8i-r40.dtsi
@@ -181,6 +181,32 @@ nmi_intc: interrupt-controller@1c00030 {
interrupts = <GIC_SPI 0 IRQ_TYPE_LEVEL_HIGH>;
};
+ spi0: spi@1c05000 {
+ compatible = "allwinner,sun8i-r40-spi",
+ "allwinner,sun8i-h3-spi";
+ reg = <0x01c05000 0x1000>;
+ interrupts = <GIC_SPI 10 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&ccu CLK_BUS_SPI0>, <&ccu CLK_SPI0>;
+ clock-names = "ahb", "mod";
+ resets = <&ccu RST_BUS_SPI0>;
+ status = "disabled";
+ #address-cells = <1>;
+ #size-cells = <0>;
+ };
+
+ spi1: spi@1c06000 {
+ compatible = "allwinner,sun8i-r40-spi",
+ "allwinner,sun8i-h3-spi";
+ reg = <0x01c06000 0x1000>;
+ interrupts = <GIC_SPI 11 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&ccu CLK_BUS_SPI1>, <&ccu CLK_SPI1>;
+ clock-names = "ahb", "mod";
+ resets = <&ccu RST_BUS_SPI1>;
+ status = "disabled";
+ #address-cells = <1>;
+ #size-cells = <0>;
+ };
+
csi0: csi@1c09000 {
compatible = "allwinner,sun8i-r40-csi0",
"allwinner,sun7i-a20-csi0";
@@ -290,6 +316,29 @@ crypto: crypto@1c15000 {
resets = <&ccu RST_BUS_CE>;
};
+ spi2: spi@1c17000 {
+ compatible = "allwinner,sun8i-r40-spi",
+ "allwinner,sun8i-h3-spi";
+ reg = <0x01c17000 0x1000>;
+ interrupts = <GIC_SPI 12 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&ccu CLK_BUS_SPI2>, <&ccu CLK_SPI2>;
+ clock-names = "ahb", "mod";
+ resets = <&ccu RST_BUS_SPI2>;
+ status = "disabled";
+ #address-cells = <1>;
+ #size-cells = <0>;
+ };
+
+ ahci: sata@1c18000 {
+ compatible = "allwinner,sun8i-r40-ahci";
+ reg = <0x01c18000 0x1000>;
+ interrupts = <GIC_SPI 56 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&ccu CLK_BUS_SATA>, <&ccu CLK_SATA>;
+ resets = <&ccu RST_BUS_SATA>;
+ reset-names = "ahci";
+ status = "disabled";
+ };
+
ehci1: usb@1c19000 {
compatible = "allwinner,sun8i-r40-ehci", "generic-ehci";
reg = <0x01c19000 0x100>;
@@ -336,6 +385,19 @@ ohci2: usb@1c1c400 {
status = "disabled";
};
+ spi3: spi@1c1f000 {
+ compatible = "allwinner,sun8i-r40-spi",
+ "allwinner,sun8i-h3-spi";
+ reg = <0x01c1f000 0x1000>;
+ interrupts = <GIC_SPI 50 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&ccu CLK_BUS_SPI3>, <&ccu CLK_SPI3>;
+ clock-names = "ahb", "mod";
+ resets = <&ccu RST_BUS_SPI3>;
+ status = "disabled";
+ #address-cells = <1>;
+ #size-cells = <0>;
+ };
+
ccu: clock@1c20000 {
compatible = "allwinner,sun8i-r40-ccu";
reg = <0x01c20000 0x400>;
@@ -653,69 +715,6 @@ i2c4: i2c@1c2c000 {
#size-cells = <0>;
};
- spi0: spi@1c05000 {
- compatible = "allwinner,sun8i-r40-spi",
- "allwinner,sun8i-h3-spi";
- reg = <0x01c05000 0x1000>;
- interrupts = <GIC_SPI 10 IRQ_TYPE_LEVEL_HIGH>;
- clocks = <&ccu CLK_BUS_SPI0>, <&ccu CLK_SPI0>;
- clock-names = "ahb", "mod";
- resets = <&ccu RST_BUS_SPI0>;
- status = "disabled";
- #address-cells = <1>;
- #size-cells = <0>;
- };
-
- spi1: spi@1c06000 {
- compatible = "allwinner,sun8i-r40-spi",
- "allwinner,sun8i-h3-spi";
- reg = <0x01c06000 0x1000>;
- interrupts = <GIC_SPI 11 IRQ_TYPE_LEVEL_HIGH>;
- clocks = <&ccu CLK_BUS_SPI1>, <&ccu CLK_SPI1>;
- clock-names = "ahb", "mod";
- resets = <&ccu RST_BUS_SPI1>;
- status = "disabled";
- #address-cells = <1>;
- #size-cells = <0>;
- };
-
- spi2: spi@1c07000 {
- compatible = "allwinner,sun8i-r40-spi",
- "allwinner,sun8i-h3-spi";
- reg = <0x01c07000 0x1000>;
- interrupts = <GIC_SPI 12 IRQ_TYPE_LEVEL_HIGH>;
- clocks = <&ccu CLK_BUS_SPI2>, <&ccu CLK_SPI2>;
- clock-names = "ahb", "mod";
- resets = <&ccu RST_BUS_SPI2>;
- status = "disabled";
- #address-cells = <1>;
- #size-cells = <0>;
- };
-
- spi3: spi@1c0f000 {
- compatible = "allwinner,sun8i-r40-spi",
- "allwinner,sun8i-h3-spi";
- reg = <0x01c0f000 0x1000>;
- interrupts = <GIC_SPI 50 IRQ_TYPE_LEVEL_HIGH>;
- clocks = <&ccu CLK_BUS_SPI3>, <&ccu CLK_SPI3>;
- clock-names = "ahb", "mod";
- resets = <&ccu RST_BUS_SPI3>;
- status = "disabled";
- #address-cells = <1>;
- #size-cells = <0>;
- };
-
- ahci: sata@1c18000 {
- compatible = "allwinner,sun8i-r40-ahci";
- reg = <0x01c18000 0x1000>;
- interrupts = <GIC_SPI 56 IRQ_TYPE_LEVEL_HIGH>;
- clocks = <&ccu CLK_BUS_SATA>, <&ccu CLK_SATA>;
- resets = <&ccu RST_BUS_SATA>;
- reset-names = "ahci";
- status = "disabled";
-
- };
-
gmac: ethernet@1c50000 {
compatible = "allwinner,sun8i-r40-gmac";
syscon = <&ccu>;
diff --git a/arch/arm/configs/am200epdkit_defconfig b/arch/arm/configs/am200epdkit_defconfig
index 622436f..f56ac39 100644
--- a/arch/arm/configs/am200epdkit_defconfig
+++ b/arch/arm/configs/am200epdkit_defconfig
@@ -11,8 +11,6 @@
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
# CONFIG_BLK_DEV_BSG is not set
-# CONFIG_IOSCHED_DEADLINE is not set
-# CONFIG_IOSCHED_CFQ is not set
CONFIG_ARCH_PXA=y
CONFIG_ARCH_GUMSTIX=y
CONFIG_PCCARD=y
diff --git a/arch/arm/configs/axm55xx_defconfig b/arch/arm/configs/axm55xx_defconfig
index f53634a..6ea7daf 100644
--- a/arch/arm/configs/axm55xx_defconfig
+++ b/arch/arm/configs/axm55xx_defconfig
@@ -25,7 +25,6 @@
CONFIG_PROFILING=y
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
-# CONFIG_IOSCHED_DEADLINE is not set
CONFIG_ARCH_AXXIA=y
CONFIG_GPIO_PCA953X=y
CONFIG_ARM_LPAE=y
diff --git a/arch/arm/configs/bcm2835_defconfig b/arch/arm/configs/bcm2835_defconfig
index 519ff58..0afcae9 100644
--- a/arch/arm/configs/bcm2835_defconfig
+++ b/arch/arm/configs/bcm2835_defconfig
@@ -178,6 +178,7 @@
CONFIG_STACK_TRACER=y
CONFIG_FUNCTION_PROFILER=y
CONFIG_TEST_KSTRTOX=y
+CONFIG_DEBUG_FS=y
CONFIG_KGDB=y
CONFIG_KGDB_KDB=y
CONFIG_STRICT_DEVMEM=y
diff --git a/arch/arm/configs/clps711x_defconfig b/arch/arm/configs/clps711x_defconfig
index c255dab..63a153f 100644
--- a/arch/arm/configs/clps711x_defconfig
+++ b/arch/arm/configs/clps711x_defconfig
@@ -7,7 +7,6 @@
CONFIG_SLOB=y
CONFIG_JUMP_LABEL=y
CONFIG_PARTITION_ADVANCED=y
-# CONFIG_IOSCHED_CFQ is not set
CONFIG_ARCH_CLPS711X=y
CONFIG_ARCH_AUTCPU12=y
CONFIG_ARCH_CDB89712=y
diff --git a/arch/arm/configs/cns3420vb_defconfig b/arch/arm/configs/cns3420vb_defconfig
index 89df0a5..66a80b4 100644
--- a/arch/arm/configs/cns3420vb_defconfig
+++ b/arch/arm/configs/cns3420vb_defconfig
@@ -17,7 +17,7 @@
CONFIG_MODULE_FORCE_UNLOAD=y
CONFIG_MODVERSIONS=y
# CONFIG_BLK_DEV_BSG is not set
-CONFIG_IOSCHED_CFQ=m
+CONFIG_IOSCHED_BFQ=m
CONFIG_ARCH_MULTI_V6=y
#CONFIG_ARCH_MULTI_V7 is not set
CONFIG_ARCH_CNS3XXX=y
diff --git a/arch/arm/configs/colibri_pxa300_defconfig b/arch/arm/configs/colibri_pxa300_defconfig
index 446134c..0dae3b1 100644
--- a/arch/arm/configs/colibri_pxa300_defconfig
+++ b/arch/arm/configs/colibri_pxa300_defconfig
@@ -43,7 +43,6 @@
CONFIG_USB_MON=y
CONFIG_USB_STORAGE=y
CONFIG_MMC=y
-# CONFIG_MMC_BLOCK_BOUNCE is not set
CONFIG_MMC_PXA=y
CONFIG_EXT3_FS=y
CONFIG_NFS_FS=y
diff --git a/arch/arm/configs/collie_defconfig b/arch/arm/configs/collie_defconfig
index e6df11e..36384fd 100644
--- a/arch/arm/configs/collie_defconfig
+++ b/arch/arm/configs/collie_defconfig
@@ -7,8 +7,6 @@
# CONFIG_BASE_FULL is not set
# CONFIG_EPOLL is not set
CONFIG_SLOB=y
-# CONFIG_IOSCHED_DEADLINE is not set
-# CONFIG_IOSCHED_CFQ is not set
CONFIG_ARCH_SA1100=y
CONFIG_SA1100_COLLIE=y
CONFIG_PCCARD=y
diff --git a/arch/arm/configs/davinci_all_defconfig b/arch/arm/configs/davinci_all_defconfig
index 231f897..b5ba8d7 100644
--- a/arch/arm/configs/davinci_all_defconfig
+++ b/arch/arm/configs/davinci_all_defconfig
@@ -15,8 +15,6 @@
CONFIG_MODULE_FORCE_UNLOAD=y
CONFIG_MODVERSIONS=y
CONFIG_PARTITION_ADVANCED=y
-# CONFIG_IOSCHED_DEADLINE is not set
-# CONFIG_IOSCHED_CFQ is not set
CONFIG_ARCH_MULTIPLATFORM=y
CONFIG_ARCH_MULTI_V7=n
CONFIG_ARCH_MULTI_V5=y
diff --git a/arch/arm/configs/efm32_defconfig b/arch/arm/configs/efm32_defconfig
index 10ea925..46213f0 100644
--- a/arch/arm/configs/efm32_defconfig
+++ b/arch/arm/configs/efm32_defconfig
@@ -12,8 +12,6 @@
# CONFIG_VM_EVENT_COUNTERS is not set
# CONFIG_SLUB_DEBUG is not set
# CONFIG_BLK_DEV_BSG is not set
-# CONFIG_IOSCHED_DEADLINE is not set
-# CONFIG_IOSCHED_CFQ is not set
# CONFIG_MMU is not set
CONFIG_ARM_SINGLE_ARMV7M=y
CONFIG_ARCH_EFM32=y
diff --git a/arch/arm/configs/ep93xx_defconfig b/arch/arm/configs/ep93xx_defconfig
index ef2d2a8..cd16fb6 100644
--- a/arch/arm/configs/ep93xx_defconfig
+++ b/arch/arm/configs/ep93xx_defconfig
@@ -11,7 +11,6 @@
CONFIG_MODULE_FORCE_UNLOAD=y
# CONFIG_BLK_DEV_BSG is not set
CONFIG_PARTITION_ADVANCED=y
-# CONFIG_IOSCHED_CFQ is not set
CONFIG_ARCH_EP93XX=y
CONFIG_CRUNCH=y
CONFIG_MACH_ADSSPHERE=y
diff --git a/arch/arm/configs/eseries_pxa_defconfig b/arch/arm/configs/eseries_pxa_defconfig
index 56452fa..046f4dc 100644
--- a/arch/arm/configs/eseries_pxa_defconfig
+++ b/arch/arm/configs/eseries_pxa_defconfig
@@ -9,8 +9,6 @@
CONFIG_MODULE_UNLOAD=y
CONFIG_MODULE_FORCE_UNLOAD=y
# CONFIG_BLK_DEV_BSG is not set
-# CONFIG_IOSCHED_DEADLINE is not set
-# CONFIG_IOSCHED_CFQ is not set
CONFIG_ARCH_PXA=y
CONFIG_ARCH_PXA_ESERIES=y
# CONFIG_ARM_THUMB is not set
diff --git a/arch/arm/configs/ezx_defconfig b/arch/arm/configs/ezx_defconfig
index 4e28771..bd7b7f9 100644
--- a/arch/arm/configs/ezx_defconfig
+++ b/arch/arm/configs/ezx_defconfig
@@ -14,7 +14,6 @@
CONFIG_MODULE_FORCE_UNLOAD=y
CONFIG_MODVERSIONS=y
# CONFIG_BLK_DEV_BSG is not set
-# CONFIG_IOSCHED_CFQ is not set
CONFIG_ARCH_PXA=y
CONFIG_PXA_EZX=y
CONFIG_NO_HZ=y
diff --git a/arch/arm/configs/h3600_defconfig b/arch/arm/configs/h3600_defconfig
index 4d91e41..c02b3e4 100644
--- a/arch/arm/configs/h3600_defconfig
+++ b/arch/arm/configs/h3600_defconfig
@@ -5,8 +5,6 @@
CONFIG_BLK_DEV_INITRD=y
CONFIG_MODULES=y
# CONFIG_BLK_DEV_BSG is not set
-# CONFIG_IOSCHED_DEADLINE is not set
-# CONFIG_IOSCHED_CFQ is not set
CONFIG_ARCH_SA1100=y
CONFIG_SA1100_H3600=y
CONFIG_PCCARD=y
diff --git a/arch/arm/configs/h5000_defconfig b/arch/arm/configs/h5000_defconfig
index 3946c60..f5a338f 100644
--- a/arch/arm/configs/h5000_defconfig
+++ b/arch/arm/configs/h5000_defconfig
@@ -10,7 +10,6 @@
CONFIG_MODULE_UNLOAD=y
CONFIG_MODULE_FORCE_UNLOAD=y
# CONFIG_BLK_DEV_BSG is not set
-# CONFIG_IOSCHED_CFQ is not set
CONFIG_ARCH_PXA=y
CONFIG_MACH_H5000=y
CONFIG_AEABI=y
diff --git a/arch/arm/configs/imote2_defconfig b/arch/arm/configs/imote2_defconfig
index 770469f..05c5515 100644
--- a/arch/arm/configs/imote2_defconfig
+++ b/arch/arm/configs/imote2_defconfig
@@ -13,7 +13,6 @@
CONFIG_MODULE_FORCE_UNLOAD=y
CONFIG_MODVERSIONS=y
# CONFIG_BLK_DEV_BSG is not set
-# CONFIG_IOSCHED_CFQ is not set
CONFIG_ARCH_PXA=y
CONFIG_MACH_INTELMOTE2=y
CONFIG_NO_HZ=y
diff --git a/arch/arm/configs/imx_v4_v5_defconfig b/arch/arm/configs/imx_v4_v5_defconfig
index 2b2d617..3df90fc 100644
--- a/arch/arm/configs/imx_v4_v5_defconfig
+++ b/arch/arm/configs/imx_v4_v5_defconfig
@@ -32,8 +32,6 @@
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
# CONFIG_BLK_DEV_BSG is not set
-# CONFIG_IOSCHED_DEADLINE is not set
-# CONFIG_IOSCHED_CFQ is not set
CONFIG_NET=y
CONFIG_PACKET=y
CONFIG_UNIX=y
diff --git a/arch/arm/configs/lpc18xx_defconfig b/arch/arm/configs/lpc18xx_defconfig
index e518168..be882ea 100644
--- a/arch/arm/configs/lpc18xx_defconfig
+++ b/arch/arm/configs/lpc18xx_defconfig
@@ -1,4 +1,3 @@
-CONFIG_CROSS_COMPILE="arm-linux-gnueabihf-"
CONFIG_HIGH_RES_TIMERS=y
CONFIG_PREEMPT=y
CONFIG_BLK_DEV_INITRD=y
@@ -28,10 +27,7 @@
CONFIG_ZBOOT_ROM_TEXT=0x0
CONFIG_ZBOOT_ROM_BSS=0x0
CONFIG_ARM_APPENDED_DTB=y
-# CONFIG_LBDAF is not set
# CONFIG_BLK_DEV_BSG is not set
-# CONFIG_IOSCHED_DEADLINE is not set
-# CONFIG_IOSCHED_CFQ is not set
CONFIG_BINFMT_FLAT=y
CONFIG_BINFMT_ZFLAT=y
CONFIG_BINFMT_SHARED_FLAT=y
diff --git a/arch/arm/configs/magician_defconfig b/arch/arm/configs/magician_defconfig
index e6486c9..d2e684f 100644
--- a/arch/arm/configs/magician_defconfig
+++ b/arch/arm/configs/magician_defconfig
@@ -9,8 +9,6 @@
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
# CONFIG_BLK_DEV_BSG is not set
-# CONFIG_IOSCHED_DEADLINE is not set
-# CONFIG_IOSCHED_CFQ is not set
CONFIG_ARCH_PXA=y
CONFIG_MACH_H4700=y
CONFIG_MACH_MAGICIAN=y
diff --git a/arch/arm/configs/moxart_defconfig b/arch/arm/configs/moxart_defconfig
index 45d2719..6834e97 100644
--- a/arch/arm/configs/moxart_defconfig
+++ b/arch/arm/configs/moxart_defconfig
@@ -15,7 +15,6 @@
# CONFIG_SLUB_DEBUG is not set
# CONFIG_COMPAT_BRK is not set
# CONFIG_BLK_DEV_BSG is not set
-# CONFIG_IOSCHED_DEADLINE is not set
CONFIG_ARCH_MULTI_V4=y
# CONFIG_ARCH_MULTI_V7 is not set
CONFIG_ARCH_MOXART=y
diff --git a/arch/arm/configs/mxs_defconfig b/arch/arm/configs/mxs_defconfig
index 2773899..a9c6f32 100644
--- a/arch/arm/configs/mxs_defconfig
+++ b/arch/arm/configs/mxs_defconfig
@@ -25,8 +25,6 @@
CONFIG_MODULE_FORCE_UNLOAD=y
CONFIG_MODVERSIONS=y
CONFIG_BLK_DEV_INTEGRITY=y
-# CONFIG_IOSCHED_DEADLINE is not set
-# CONFIG_IOSCHED_CFQ is not set
CONFIG_NET=y
CONFIG_PACKET=y
CONFIG_UNIX=y
diff --git a/arch/arm/configs/omap1_defconfig b/arch/arm/configs/omap1_defconfig
index 0c43c58..3b6e745 100644
--- a/arch/arm/configs/omap1_defconfig
+++ b/arch/arm/configs/omap1_defconfig
@@ -18,8 +18,6 @@
CONFIG_MODULE_UNLOAD=y
CONFIG_MODULE_FORCE_UNLOAD=y
# CONFIG_BLK_DEV_BSG is not set
-# CONFIG_IOSCHED_DEADLINE is not set
-# CONFIG_IOSCHED_CFQ is not set
CONFIG_ARCH_OMAP=y
CONFIG_ARCH_OMAP1=y
CONFIG_OMAP_RESET_CLOCKS=y
diff --git a/arch/arm/configs/omap2plus_defconfig b/arch/arm/configs/omap2plus_defconfig
index c32c338..847f987 100644
--- a/arch/arm/configs/omap2plus_defconfig
+++ b/arch/arm/configs/omap2plus_defconfig
@@ -375,6 +375,7 @@
CONFIG_BACKLIGHT_PWM=m
CONFIG_BACKLIGHT_PANDORA=m
CONFIG_BACKLIGHT_GPIO=m
+CONFIG_BACKLIGHT_LED=m
CONFIG_FRAMEBUFFER_CONSOLE=y
CONFIG_FRAMEBUFFER_CONSOLE_ROTATION=y
CONFIG_LOGO=y
diff --git a/arch/arm/configs/palmz72_defconfig b/arch/arm/configs/palmz72_defconfig
index 4a3fd82..b47c8ab 100644
--- a/arch/arm/configs/palmz72_defconfig
+++ b/arch/arm/configs/palmz72_defconfig
@@ -7,8 +7,6 @@
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
# CONFIG_BLK_DEV_BSG is not set
-# CONFIG_IOSCHED_DEADLINE is not set
-# CONFIG_IOSCHED_CFQ is not set
CONFIG_ARCH_PXA=y
CONFIG_ARCH_PXA_PALM=y
# CONFIG_MACH_PALMTX is not set
diff --git a/arch/arm/configs/pcm027_defconfig b/arch/arm/configs/pcm027_defconfig
index a8c5322..e97a158 100644
--- a/arch/arm/configs/pcm027_defconfig
+++ b/arch/arm/configs/pcm027_defconfig
@@ -13,8 +13,6 @@
CONFIG_MODULE_UNLOAD=y
CONFIG_MODULE_FORCE_UNLOAD=y
# CONFIG_BLK_DEV_BSG is not set
-# CONFIG_IOSCHED_DEADLINE is not set
-# CONFIG_IOSCHED_CFQ is not set
CONFIG_ARCH_PXA=y
CONFIG_MACH_PCM027=y
CONFIG_MACH_PCM990_BASEBOARD=y
diff --git a/arch/arm/configs/pleb_defconfig b/arch/arm/configs/pleb_defconfig
index f0541b0..2170148 100644
--- a/arch/arm/configs/pleb_defconfig
+++ b/arch/arm/configs/pleb_defconfig
@@ -6,8 +6,6 @@
# CONFIG_HOTPLUG is not set
# CONFIG_SHMEM is not set
CONFIG_MODULES=y
-# CONFIG_IOSCHED_DEADLINE is not set
-# CONFIG_IOSCHED_CFQ is not set
CONFIG_ARCH_SA1100=y
CONFIG_SA1100_PLEB=y
CONFIG_ZBOOT_ROM_TEXT=0x0
diff --git a/arch/arm/configs/realview_defconfig b/arch/arm/configs/realview_defconfig
index 8a056cc..70e2c74 100644
--- a/arch/arm/configs/realview_defconfig
+++ b/arch/arm/configs/realview_defconfig
@@ -8,7 +8,6 @@
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
# CONFIG_BLK_DEV_BSG is not set
-# CONFIG_IOSCHED_CFQ is not set
CONFIG_ARCH_MULTI_V6=y
CONFIG_ARCH_REALVIEW=y
CONFIG_MACH_REALVIEW_EB=y
diff --git a/arch/arm/configs/sama5_defconfig b/arch/arm/configs/sama5_defconfig
index 27f6135..bab7861 100644
--- a/arch/arm/configs/sama5_defconfig
+++ b/arch/arm/configs/sama5_defconfig
@@ -14,8 +14,6 @@
CONFIG_MODULE_UNLOAD=y
CONFIG_MODULE_FORCE_UNLOAD=y
# CONFIG_BLK_DEV_BSG is not set
-# CONFIG_IOSCHED_DEADLINE is not set
-# CONFIG_IOSCHED_CFQ is not set
CONFIG_ARCH_AT91=y
CONFIG_SOC_SAMA5D2=y
CONFIG_SOC_SAMA5D3=y
@@ -182,7 +180,6 @@
CONFIG_USB_ATMEL_USBA=y
CONFIG_USB_G_SERIAL=y
CONFIG_MMC=y
-# CONFIG_MMC_BLOCK_BOUNCE is not set
CONFIG_MMC_SDHCI=y
CONFIG_MMC_SDHCI_PLTFM=y
CONFIG_MMC_SDHCI_OF_AT91=y
diff --git a/arch/arm/configs/socfpga_defconfig b/arch/arm/configs/socfpga_defconfig
index fe2e1e8..e73c97b 100644
--- a/arch/arm/configs/socfpga_defconfig
+++ b/arch/arm/configs/socfpga_defconfig
@@ -157,6 +157,7 @@
CONFIG_PRINTK_TIME=y
CONFIG_DEBUG_INFO=y
CONFIG_MAGIC_SYSRQ=y
+CONFIG_DEBUG_FS=y
CONFIG_DETECT_HUNG_TASK=y
# CONFIG_SCHED_DEBUG is not set
CONFIG_FUNCTION_TRACER=y
diff --git a/arch/arm/configs/stm32_defconfig b/arch/arm/configs/stm32_defconfig
index 152321d..551db32 100644
--- a/arch/arm/configs/stm32_defconfig
+++ b/arch/arm/configs/stm32_defconfig
@@ -14,8 +14,6 @@
# CONFIG_VM_EVENT_COUNTERS is not set
# CONFIG_SLUB_DEBUG is not set
# CONFIG_BLK_DEV_BSG is not set
-# CONFIG_IOSCHED_DEADLINE is not set
-# CONFIG_IOSCHED_CFQ is not set
# CONFIG_MMU is not set
CONFIG_ARCH_STM32=y
CONFIG_CPU_V7M_NUM_IRQ=240
diff --git a/arch/arm/configs/sunxi_defconfig b/arch/arm/configs/sunxi_defconfig
index 3f5d727..e9fb573 100644
--- a/arch/arm/configs/sunxi_defconfig
+++ b/arch/arm/configs/sunxi_defconfig
@@ -85,6 +85,7 @@
CONFIG_AXP20X_POWER=y
CONFIG_THERMAL=y
CONFIG_CPU_THERMAL=y
+CONFIG_SUN8I_THERMAL=y
CONFIG_WATCHDOG=y
CONFIG_SUNXI_WATCHDOG=y
CONFIG_MFD_AC100=y
diff --git a/arch/arm/configs/u300_defconfig b/arch/arm/configs/u300_defconfig
index 8223397..543f073 100644
--- a/arch/arm/configs/u300_defconfig
+++ b/arch/arm/configs/u300_defconfig
@@ -11,7 +11,6 @@
CONFIG_MODULE_UNLOAD=y
# CONFIG_BLK_DEV_BSG is not set
CONFIG_PARTITION_ADVANCED=y
-# CONFIG_IOSCHED_CFQ is not set
# CONFIG_ARCH_MULTI_V7 is not set
CONFIG_ARCH_U300=y
CONFIG_MACH_U300_SPIDUMMY=y
@@ -46,7 +45,6 @@
CONFIG_BACKLIGHT_CLASS_DEVICE=y
# CONFIG_USB_SUPPORT is not set
CONFIG_MMC=y
-# CONFIG_MMC_BLOCK_BOUNCE is not set
CONFIG_MMC_ARMMMCI=y
CONFIG_RTC_CLASS=y
# CONFIG_RTC_HCTOSYS is not set
diff --git a/arch/arm/configs/vexpress_defconfig b/arch/arm/configs/vexpress_defconfig
index 2575355..c01baf7 100644
--- a/arch/arm/configs/vexpress_defconfig
+++ b/arch/arm/configs/vexpress_defconfig
@@ -15,8 +15,6 @@
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
# CONFIG_BLK_DEV_BSG is not set
-# CONFIG_IOSCHED_DEADLINE is not set
-# CONFIG_IOSCHED_CFQ is not set
CONFIG_ARCH_VEXPRESS=y
CONFIG_ARCH_VEXPRESS_DCSCB=y
CONFIG_ARCH_VEXPRESS_TC2_PM=y
diff --git a/arch/arm/configs/viper_defconfig b/arch/arm/configs/viper_defconfig
index 2ff1616..989599c 100644
--- a/arch/arm/configs/viper_defconfig
+++ b/arch/arm/configs/viper_defconfig
@@ -9,7 +9,6 @@
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
# CONFIG_BLK_DEV_BSG is not set
-# CONFIG_IOSCHED_CFQ is not set
CONFIG_ARCH_PXA=y
CONFIG_ARCH_VIPER=y
CONFIG_IWMMXT=y
diff --git a/arch/arm/configs/zeus_defconfig b/arch/arm/configs/zeus_defconfig
index aa3023c..d3b98c4 100644
--- a/arch/arm/configs/zeus_defconfig
+++ b/arch/arm/configs/zeus_defconfig
@@ -4,7 +4,6 @@
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
# CONFIG_BLK_DEV_BSG is not set
-# CONFIG_IOSCHED_CFQ is not set
CONFIG_ARCH_PXA=y
CONFIG_MACH_ARCOM_ZEUS=y
CONFIG_PCCARD=m
@@ -137,7 +136,6 @@
CONFIG_USB_G_SERIAL=m
CONFIG_USB_G_PRINTER=m
CONFIG_MMC=y
-# CONFIG_MMC_BLOCK_BOUNCE is not set
CONFIG_MMC_PXA=y
CONFIG_NEW_LEDS=y
CONFIG_LEDS_CLASS=m
diff --git a/arch/arm/configs/zx_defconfig b/arch/arm/configs/zx_defconfig
index 4d2ef78..a046a49 100644
--- a/arch/arm/configs/zx_defconfig
+++ b/arch/arm/configs/zx_defconfig
@@ -16,7 +16,6 @@
CONFIG_PERF_EVENTS=y
CONFIG_SLAB=y
# CONFIG_BLK_DEV_BSG is not set
-# CONFIG_IOSCHED_CFQ is not set
CONFIG_ARCH_ZX=y
CONFIG_SOC_ZX296702=y
# CONFIG_SWP_EMULATE is not set
diff --git a/arch/arm/include/asm/floppy.h b/arch/arm/include/asm/floppy.h
index f4fe4d0..79fa327 100644
--- a/arch/arm/include/asm/floppy.h
+++ b/arch/arm/include/asm/floppy.h
@@ -8,16 +8,18 @@
*/
#ifndef __ASM_ARM_FLOPPY_H
#define __ASM_ARM_FLOPPY_H
-#if 0
-#include <mach/floppy.h>
-#endif
-#define fd_outb(val,port) \
- do { \
- if ((port) == (u32)FD_DOR) \
- fd_setdor((val)); \
- else \
- outb((val),(port)); \
+#define fd_outb(val,port) \
+ do { \
+ int new_val = (val); \
+ if (((port) & 7) == FD_DOR) { \
+ if (new_val & 0xf0) \
+ new_val = (new_val & 0x0c) | \
+ floppy_selects[new_val & 3]; \
+ else \
+ new_val &= 0x0c; \
+ } \
+ outb(new_val, (port)); \
} while(0)
#define fd_inb(port) inb((port))
@@ -53,69 +55,7 @@ static inline int fd_dma_setup(void *data, unsigned int length,
* to a non-zero track, and then restoring it to track 0. If an error occurs,
* then there is no floppy drive present. [to be put back in again]
*/
-static unsigned char floppy_selects[2][4] =
-{
- { 0x10, 0x21, 0x23, 0x33 },
- { 0x10, 0x21, 0x23, 0x33 }
-};
-
-#define fd_setdor(dor) \
-do { \
- int new_dor = (dor); \
- if (new_dor & 0xf0) \
- new_dor = (new_dor & 0x0c) | floppy_selects[fdc][new_dor & 3]; \
- else \
- new_dor &= 0x0c; \
- outb(new_dor, FD_DOR); \
-} while (0)
-
-/*
- * Someday, we'll automatically detect which drives are present...
- */
-static inline void fd_scandrives (void)
-{
-#if 0
- int floppy, drive_count;
-
- fd_disable_irq();
- raw_cmd = &default_raw_cmd;
- raw_cmd->flags = FD_RAW_SPIN | FD_RAW_NEED_SEEK;
- raw_cmd->track = 0;
- raw_cmd->rate = ?;
- drive_count = 0;
- for (floppy = 0; floppy < 4; floppy ++) {
- current_drive = drive_count;
- /*
- * Turn on floppy motor
- */
- if (start_motor(redo_fd_request))
- continue;
- /*
- * Set up FDC
- */
- fdc_specify();
- /*
- * Tell FDC to recalibrate
- */
- output_byte(FD_RECALIBRATE);
- LAST_OUT(UNIT(floppy));
- /* wait for command to complete */
- if (!successful) {
- int i;
- for (i = drive_count; i < 3; i--)
- floppy_selects[fdc][i] = floppy_selects[fdc][i + 1];
- floppy_selects[fdc][3] = 0;
- floppy -= 1;
- } else
- drive_count++;
- }
-#else
- floppy_selects[0][0] = 0x10;
- floppy_selects[0][1] = 0x21;
- floppy_selects[0][2] = 0x23;
- floppy_selects[0][3] = 0x33;
-#endif
-}
+static unsigned char floppy_selects[4] = { 0x10, 0x21, 0x23, 0x33 };
#define FDC1 (0x3f0)
@@ -135,9 +75,7 @@ static inline void fd_scandrives (void)
*/
static void driveswap(int *ints, int dummy, int dummy2)
{
- floppy_selects[0][0] ^= floppy_selects[0][1];
- floppy_selects[0][1] ^= floppy_selects[0][0];
- floppy_selects[0][0] ^= floppy_selects[0][1];
+ swap(floppy_selects[0], floppy_selects[1]);
}
#define EXTRA_FLOPPY_PARAMS ,{ "driveswap", &driveswap, NULL, 0, 0 }
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index c3314b2..a827b4d 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -392,9 +392,6 @@ static inline void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu) {}
static inline void kvm_vcpu_pmu_restore_guest(struct kvm_vcpu *vcpu) {}
static inline void kvm_vcpu_pmu_restore_host(struct kvm_vcpu *vcpu) {}
-static inline void kvm_arm_vhe_guest_enter(void) {}
-static inline void kvm_arm_vhe_guest_exit(void) {}
-
#define KVM_BP_HARDEN_UNKNOWN -1
#define KVM_BP_HARDEN_WA_NEEDED 0
#define KVM_BP_HARDEN_NOT_REQUIRED 1
diff --git a/arch/arm/kernel/ftrace.c b/arch/arm/kernel/ftrace.c
index 2a5ff69..10499d4 100644
--- a/arch/arm/kernel/ftrace.c
+++ b/arch/arm/kernel/ftrace.c
@@ -78,13 +78,10 @@ static int ftrace_modify_code(unsigned long pc, unsigned long old,
{
unsigned long replaced;
- if (IS_ENABLED(CONFIG_THUMB2_KERNEL)) {
+ if (IS_ENABLED(CONFIG_THUMB2_KERNEL))
old = __opcode_to_mem_thumb32(old);
- new = __opcode_to_mem_thumb32(new);
- } else {
+ else
old = __opcode_to_mem_arm(old);
- new = __opcode_to_mem_arm(new);
- }
if (validate) {
if (probe_kernel_read(&replaced, (void *)pc, MCOUNT_INSN_SIZE))
diff --git a/arch/arm/kernel/patch.c b/arch/arm/kernel/patch.c
index d0a05a3..e9e828b 100644
--- a/arch/arm/kernel/patch.c
+++ b/arch/arm/kernel/patch.c
@@ -16,10 +16,10 @@ struct patch {
unsigned int insn;
};
+#ifdef CONFIG_MMU
static DEFINE_RAW_SPINLOCK(patch_lock);
static void __kprobes *patch_map(void *addr, int fixmap, unsigned long *flags)
- __acquires(&patch_lock)
{
unsigned int uintaddr = (uintptr_t) addr;
bool module = !core_kernel_text(uintaddr);
@@ -34,8 +34,6 @@ static void __kprobes *patch_map(void *addr, int fixmap, unsigned long *flags)
if (flags)
raw_spin_lock_irqsave(&patch_lock, *flags);
- else
- __acquire(&patch_lock);
set_fixmap(fixmap, page_to_phys(page));
@@ -43,15 +41,19 @@ static void __kprobes *patch_map(void *addr, int fixmap, unsigned long *flags)
}
static void __kprobes patch_unmap(int fixmap, unsigned long *flags)
- __releases(&patch_lock)
{
clear_fixmap(fixmap);
if (flags)
raw_spin_unlock_irqrestore(&patch_lock, *flags);
- else
- __release(&patch_lock);
}
+#else
+static void __kprobes *patch_map(void *addr, int fixmap, unsigned long *flags)
+{
+ return addr;
+}
+static void __kprobes patch_unmap(int fixmap, unsigned long *flags) { }
+#endif
void __kprobes __patch_text_real(void *addr, unsigned int insn, bool remap)
{
@@ -64,8 +66,6 @@ void __kprobes __patch_text_real(void *addr, unsigned int insn, bool remap)
if (remap)
waddr = patch_map(addr, FIX_TEXT_POKE0, &flags);
- else
- __acquire(&patch_lock);
if (thumb2 && __opcode_is_thumb16(insn)) {
*(u16 *)waddr = __opcode_to_mem_thumb16(insn);
@@ -102,8 +102,7 @@ void __kprobes __patch_text_real(void *addr, unsigned int insn, bool remap)
if (waddr != addr) {
flush_kernel_vmap_range(waddr, twopage ? size / 2 : size);
patch_unmap(FIX_TEXT_POKE0, &flags);
- } else
- __release(&patch_lock);
+ }
flush_icache_range((uintptr_t)(addr),
(uintptr_t)(addr) + size);
diff --git a/arch/arm/kernel/vdso.c b/arch/arm/kernel/vdso.c
index c89ac1b..e0330a2 100644
--- a/arch/arm/kernel/vdso.c
+++ b/arch/arm/kernel/vdso.c
@@ -95,6 +95,8 @@ static bool __init cntvct_functional(void)
*/
np = of_find_compatible_node(NULL, NULL, "arm,armv7-timer");
if (!np)
+ np = of_find_compatible_node(NULL, NULL, "arm,armv8-timer");
+ if (!np)
goto out_put;
if (of_property_read_bool(np, "arm,cpu-registers-not-fw-configured"))
diff --git a/arch/arm/lib/copy_from_user.S b/arch/arm/lib/copy_from_user.S
index 95b2e1c..f8016e3 100644
--- a/arch/arm/lib/copy_from_user.S
+++ b/arch/arm/lib/copy_from_user.S
@@ -118,7 +118,7 @@
ENDPROC(arm_copy_from_user)
- .pushsection .fixup,"ax"
+ .pushsection .text.fixup,"ax"
.align 0
copy_abort_preamble
ldmfd sp!, {r1, r2, r3}
diff --git a/arch/arm/mach-imx/Makefile b/arch/arm/mach-imx/Makefile
index 35ff620..03506ce 100644
--- a/arch/arm/mach-imx/Makefile
+++ b/arch/arm/mach-imx/Makefile
@@ -91,6 +91,8 @@
obj-$(CONFIG_SOC_IMX6) += suspend-imx6.o
obj-$(CONFIG_SOC_IMX53) += suspend-imx53.o
endif
+AFLAGS_resume-imx6.o :=-Wa,-march=armv7-a
+obj-$(CONFIG_SOC_IMX6) += resume-imx6.o
obj-$(CONFIG_SOC_IMX6) += pm-imx6.o
obj-$(CONFIG_SOC_IMX1) += mach-imx1.o
diff --git a/arch/arm/mach-imx/common.h b/arch/arm/mach-imx/common.h
index 912aece..5aa5796 100644
--- a/arch/arm/mach-imx/common.h
+++ b/arch/arm/mach-imx/common.h
@@ -109,17 +109,17 @@ void imx_cpu_die(unsigned int cpu);
int imx_cpu_kill(unsigned int cpu);
#ifdef CONFIG_SUSPEND
-void v7_cpu_resume(void);
void imx53_suspend(void __iomem *ocram_vbase);
extern const u32 imx53_suspend_sz;
void imx6_suspend(void __iomem *ocram_vbase);
#else
-static inline void v7_cpu_resume(void) {}
static inline void imx53_suspend(void __iomem *ocram_vbase) {}
static const u32 imx53_suspend_sz;
static inline void imx6_suspend(void __iomem *ocram_vbase) {}
#endif
+void v7_cpu_resume(void);
+
void imx6_pm_ccm_init(const char *ccm_compat);
void imx6q_pm_init(void);
void imx6dl_pm_init(void);
diff --git a/arch/arm/mach-imx/resume-imx6.S b/arch/arm/mach-imx/resume-imx6.S
new file mode 100644
index 0000000..5bd1ba7
--- /dev/null
+++ b/arch/arm/mach-imx/resume-imx6.S
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright 2014 Freescale Semiconductor, Inc.
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+#include <asm/asm-offsets.h>
+#include <asm/hardware/cache-l2x0.h>
+#include "hardware.h"
+
+/*
+ * The following code must assume it is running from physical address
+ * where absolute virtual addresses to the data section have to be
+ * turned into relative ones.
+ */
+
+ENTRY(v7_cpu_resume)
+ bl v7_invalidate_l1
+#ifdef CONFIG_CACHE_L2X0
+ bl l2c310_early_resume
+#endif
+ b cpu_resume
+ENDPROC(v7_cpu_resume)
diff --git a/arch/arm/mach-imx/suspend-imx6.S b/arch/arm/mach-imx/suspend-imx6.S
index 062391f..1eabf2d 100644
--- a/arch/arm/mach-imx/suspend-imx6.S
+++ b/arch/arm/mach-imx/suspend-imx6.S
@@ -327,17 +327,3 @@
ret lr
ENDPROC(imx6_suspend)
-
-/*
- * The following code must assume it is running from physical address
- * where absolute virtual addresses to the data section have to be
- * turned into relative ones.
- */
-
-ENTRY(v7_cpu_resume)
- bl v7_invalidate_l1
-#ifdef CONFIG_CACHE_L2X0
- bl l2c310_early_resume
-#endif
- b cpu_resume
-ENDPROC(v7_cpu_resume)
diff --git a/arch/arm/mach-meson/Kconfig b/arch/arm/mach-meson/Kconfig
index 01f0f4b..75034fe 100644
--- a/arch/arm/mach-meson/Kconfig
+++ b/arch/arm/mach-meson/Kconfig
@@ -9,7 +9,6 @@
select CACHE_L2X0
select PINCTRL
select PINCTRL_MESON
- select COMMON_CLK
select HAVE_ARM_SCU if SMP
select HAVE_ARM_TWD if SMP
diff --git a/arch/arm/mach-npcm/Kconfig b/arch/arm/mach-npcm/Kconfig
index 880bc2a..7f7002d 100644
--- a/arch/arm/mach-npcm/Kconfig
+++ b/arch/arm/mach-npcm/Kconfig
@@ -11,7 +11,7 @@
depends on ARCH_MULTI_V7
select PINCTRL_NPCM7XX
select NPCM7XX_TIMER
- select ARCH_REQUIRE_GPIOLIB
+ select GPIOLIB
select CACHE_L2X0
select ARM_GIC
select HAVE_ARM_TWD if SMP
diff --git a/arch/arm/mach-omap2/Makefile b/arch/arm/mach-omap2/Makefile
index e1135b9..5017a3b 100644
--- a/arch/arm/mach-omap2/Makefile
+++ b/arch/arm/mach-omap2/Makefile
@@ -16,7 +16,7 @@
clock-common = clock.o
secure-common = omap-smc.o omap-secure.o
-obj-$(CONFIG_ARCH_OMAP2) += $(omap-2-3-common) $(hwmod-common) $(secure-common)
+obj-$(CONFIG_ARCH_OMAP2) += $(omap-2-3-common) $(hwmod-common)
obj-$(CONFIG_ARCH_OMAP3) += $(omap-2-3-common) $(hwmod-common) $(secure-common)
obj-$(CONFIG_ARCH_OMAP4) += $(hwmod-common) $(secure-common)
obj-$(CONFIG_SOC_AM33XX) += $(hwmod-common) $(secure-common)
diff --git a/arch/arm/mach-omap2/io.c b/arch/arm/mach-omap2/io.c
index f280472..27608d1 100644
--- a/arch/arm/mach-omap2/io.c
+++ b/arch/arm/mach-omap2/io.c
@@ -431,7 +431,6 @@ void __init omap2420_init_early(void)
omap_hwmod_init_postsetup();
omap_clk_soc_init = omap2420_dt_clk_init;
rate_table = omap2420_rate_table;
- omap_secure_init();
}
void __init omap2420_init_late(void)
@@ -456,7 +455,6 @@ void __init omap2430_init_early(void)
omap_hwmod_init_postsetup();
omap_clk_soc_init = omap2430_dt_clk_init;
rate_table = omap2430_rate_table;
- omap_secure_init();
}
void __init omap2430_init_late(void)
diff --git a/arch/arm64/boot/dts/amlogic/meson-gxm-khadas-vim2.dts b/arch/arm64/boot/dts/amlogic/meson-gxm-khadas-vim2.dts
index f82f25c..d5dc128 100644
--- a/arch/arm64/boot/dts/amlogic/meson-gxm-khadas-vim2.dts
+++ b/arch/arm64/boot/dts/amlogic/meson-gxm-khadas-vim2.dts
@@ -327,7 +327,7 @@ &sd_emmc_a {
#size-cells = <0>;
bus-width = <4>;
- max-frequency = <50000000>;
+ max-frequency = <60000000>;
non-removable;
disable-wp;
diff --git a/arch/arm64/boot/dts/amlogic/meson-sm1-sei610.dts b/arch/arm64/boot/dts/amlogic/meson-sm1-sei610.dts
index a8bb3fa..cb1b48f 100644
--- a/arch/arm64/boot/dts/amlogic/meson-sm1-sei610.dts
+++ b/arch/arm64/boot/dts/amlogic/meson-sm1-sei610.dts
@@ -593,6 +593,7 @@ bluetooth {
compatible = "brcm,bcm43438-bt";
interrupt-parent = <&gpio_intc>;
interrupts = <95 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "host-wakeup";
shutdown-gpios = <&gpio GPIOX_17 GPIO_ACTIVE_HIGH>;
max-speed = <2000000>;
clocks = <&wifi32k>;
diff --git a/arch/arm64/boot/dts/arm/fvp-base-revc.dts b/arch/arm64/boot/dts/arm/fvp-base-revc.dts
index 62ab0d5..335fff7 100644
--- a/arch/arm64/boot/dts/arm/fvp-base-revc.dts
+++ b/arch/arm64/boot/dts/arm/fvp-base-revc.dts
@@ -161,10 +161,10 @@ pci: pci@40000000 {
bus-range = <0x0 0x1>;
reg = <0x0 0x40000000 0x0 0x10000000>;
ranges = <0x2000000 0x0 0x50000000 0x0 0x50000000 0x0 0x10000000>;
- interrupt-map = <0 0 0 1 &gic GIC_SPI 168 IRQ_TYPE_LEVEL_HIGH>,
- <0 0 0 2 &gic GIC_SPI 169 IRQ_TYPE_LEVEL_HIGH>,
- <0 0 0 3 &gic GIC_SPI 170 IRQ_TYPE_LEVEL_HIGH>,
- <0 0 0 4 &gic GIC_SPI 171 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-map = <0 0 0 1 &gic 0 0 GIC_SPI 168 IRQ_TYPE_LEVEL_HIGH>,
+ <0 0 0 2 &gic 0 0 GIC_SPI 169 IRQ_TYPE_LEVEL_HIGH>,
+ <0 0 0 3 &gic 0 0 GIC_SPI 170 IRQ_TYPE_LEVEL_HIGH>,
+ <0 0 0 4 &gic 0 0 GIC_SPI 171 IRQ_TYPE_LEVEL_HIGH>;
interrupt-map-mask = <0x0 0x0 0x0 0x7>;
msi-map = <0x0 &its 0x0 0x10000>;
iommu-map = <0x0 &smmu 0x0 0x10000>;
diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi
index 0bf375e..55b71bb 100644
--- a/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi
+++ b/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi
@@ -53,7 +53,7 @@ idle-states {
* PSCI node is not added default, U-boot will add missing
* parts if it determines to use PSCI.
*/
- entry-method = "arm,psci";
+ entry-method = "psci";
CPU_PW20: cpu-pw20 {
compatible = "arm,idle-state";
diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1043-post.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1043-post.dtsi
index 6082ae0..d237162 100644
--- a/arch/arm64/boot/dts/freescale/fsl-ls1043-post.dtsi
+++ b/arch/arm64/boot/dts/freescale/fsl-ls1043-post.dtsi
@@ -20,6 +20,8 @@ &soc {
};
&fman0 {
+ fsl,erratum-a050385;
+
/* these aliases provide the FMan ports mapping */
enet0: ethernet@e0000 {
};
diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1043a-rdb.dts b/arch/arm64/boot/dts/freescale/fsl-ls1043a-rdb.dts
index 4223a23..dde50c8 100644
--- a/arch/arm64/boot/dts/freescale/fsl-ls1043a-rdb.dts
+++ b/arch/arm64/boot/dts/freescale/fsl-ls1043a-rdb.dts
@@ -119,12 +119,12 @@ ethernet@e2000 {
ethernet@e4000 {
phy-handle = <&rgmii_phy1>;
- phy-connection-type = "rgmii-txid";
+ phy-connection-type = "rgmii-id";
};
ethernet@e6000 {
phy-handle = <&rgmii_phy2>;
- phy-connection-type = "rgmii-txid";
+ phy-connection-type = "rgmii-id";
};
ethernet@e8000 {
diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1046a-rdb.dts b/arch/arm64/boot/dts/freescale/fsl-ls1046a-rdb.dts
index dbc23d6..d53ccc5 100644
--- a/arch/arm64/boot/dts/freescale/fsl-ls1046a-rdb.dts
+++ b/arch/arm64/boot/dts/freescale/fsl-ls1046a-rdb.dts
@@ -131,12 +131,12 @@ &usb1 {
&fman0 {
ethernet@e4000 {
phy-handle = <&rgmii_phy1>;
- phy-connection-type = "rgmii";
+ phy-connection-type = "rgmii-id";
};
ethernet@e6000 {
phy-handle = <&rgmii_phy2>;
- phy-connection-type = "rgmii";
+ phy-connection-type = "rgmii-id";
};
ethernet@e8000 {
diff --git a/arch/arm64/boot/dts/freescale/imx8qxp-mek.dts b/arch/arm64/boot/dts/freescale/imx8qxp-mek.dts
index d3d26cc..13460a3 100644
--- a/arch/arm64/boot/dts/freescale/imx8qxp-mek.dts
+++ b/arch/arm64/boot/dts/freescale/imx8qxp-mek.dts
@@ -52,11 +52,6 @@ ethphy0: ethernet-phy@0 {
compatible = "ethernet-phy-ieee802.3-c22";
reg = <0>;
};
-
- ethphy1: ethernet-phy@1 {
- compatible = "ethernet-phy-ieee802.3-c22";
- reg = <1>;
- };
};
};
diff --git a/arch/arm64/boot/dts/intel/socfpga_agilex.dtsi b/arch/arm64/boot/dts/intel/socfpga_agilex.dtsi
index e1d357e..d8c44d3 100644
--- a/arch/arm64/boot/dts/intel/socfpga_agilex.dtsi
+++ b/arch/arm64/boot/dts/intel/socfpga_agilex.dtsi
@@ -102,7 +102,7 @@ base_fpga_region {
};
gmac0: ethernet@ff800000 {
- compatible = "altr,socfpga-stmmac", "snps,dwmac-3.74a", "snps,dwmac";
+ compatible = "altr,socfpga-stmmac-a10-s10", "snps,dwmac-3.74a", "snps,dwmac";
reg = <0xff800000 0x2000>;
interrupts = <0 90 4>;
interrupt-names = "macirq";
@@ -118,7 +118,7 @@ gmac0: ethernet@ff800000 {
};
gmac1: ethernet@ff802000 {
- compatible = "altr,socfpga-stmmac", "snps,dwmac-3.74a", "snps,dwmac";
+ compatible = "altr,socfpga-stmmac-a10-s10", "snps,dwmac-3.74a", "snps,dwmac";
reg = <0xff802000 0x2000>;
interrupts = <0 91 4>;
interrupt-names = "macirq";
@@ -134,7 +134,7 @@ gmac1: ethernet@ff802000 {
};
gmac2: ethernet@ff804000 {
- compatible = "altr,socfpga-stmmac", "snps,dwmac-3.74a", "snps,dwmac";
+ compatible = "altr,socfpga-stmmac-a10-s10", "snps,dwmac-3.74a", "snps,dwmac";
reg = <0xff804000 0x2000>;
interrupts = <0 92 4>;
interrupt-names = "macirq";
diff --git a/arch/arm64/boot/dts/sprd/sc9863a.dtsi b/arch/arm64/boot/dts/sprd/sc9863a.dtsi
index cd80756..2c590ca 100644
--- a/arch/arm64/boot/dts/sprd/sc9863a.dtsi
+++ b/arch/arm64/boot/dts/sprd/sc9863a.dtsi
@@ -108,7 +108,7 @@ CPU7: cpu@700 {
};
idle-states {
- entry-method = "arm,psci";
+ entry-method = "psci";
CORE_PD: core-pd {
compatible = "arm,idle-state";
entry-latency-us = <4000>;
diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index 0f21288..4db223d 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -452,6 +452,7 @@
CONFIG_CPU_THERMAL=y
CONFIG_THERMAL_EMULATION=y
CONFIG_QORIQ_THERMAL=m
+CONFIG_SUN8I_THERMAL=y
CONFIG_ROCKCHIP_THERMAL=m
CONFIG_RCAR_THERMAL=y
CONFIG_RCAR_GEN3_THERMAL=y
@@ -547,6 +548,7 @@
CONFIG_ROCKCHIP_INNO_HDMI=y
CONFIG_DRM_RCAR_DU=m
CONFIG_DRM_SUN4I=m
+CONFIG_DRM_SUN6I_DSI=m
CONFIG_DRM_SUN8I_DW_HDMI=m
CONFIG_DRM_SUN8I_MIXER=m
CONFIG_DRM_MSM=m
@@ -681,7 +683,7 @@
CONFIG_RTC_DRV_IMX_SC=m
CONFIG_RTC_DRV_XGENE=y
CONFIG_DMADEVICES=y
-CONFIG_DMA_BCM2835=m
+CONFIG_DMA_BCM2835=y
CONFIG_DMA_SUN6I=m
CONFIG_FSL_EDMA=y
CONFIG_IMX_SDMA=y
@@ -771,7 +773,7 @@
CONFIG_ARCH_R8A774B1=y
CONFIG_ARCH_R8A774C0=y
CONFIG_ARCH_R8A7795=y
-CONFIG_ARCH_R8A7796=y
+CONFIG_ARCH_R8A77960=y
CONFIG_ARCH_R8A77961=y
CONFIG_ARCH_R8A77965=y
CONFIG_ARCH_R8A77970=y
diff --git a/arch/arm64/crypto/chacha-neon-glue.c b/arch/arm64/crypto/chacha-neon-glue.c
index c1f9660..37ca3e8 100644
--- a/arch/arm64/crypto/chacha-neon-glue.c
+++ b/arch/arm64/crypto/chacha-neon-glue.c
@@ -55,10 +55,10 @@ static void chacha_doneon(u32 *state, u8 *dst, const u8 *src,
break;
}
chacha_4block_xor_neon(state, dst, src, nrounds, l);
- bytes -= CHACHA_BLOCK_SIZE * 5;
- src += CHACHA_BLOCK_SIZE * 5;
- dst += CHACHA_BLOCK_SIZE * 5;
- state[12] += 5;
+ bytes -= l;
+ src += l;
+ dst += l;
+ state[12] += DIV_ROUND_UP(l, CHACHA_BLOCK_SIZE);
}
}
diff --git a/arch/arm64/include/asm/alternative.h b/arch/arm64/include/asm/alternative.h
index 324e7d5..5e5dc05 100644
--- a/arch/arm64/include/asm/alternative.h
+++ b/arch/arm64/include/asm/alternative.h
@@ -221,7 +221,7 @@ alternative_endif
.macro user_alt, label, oldinstr, newinstr, cond
9999: alternative_insn "\oldinstr", "\newinstr", \cond
- _ASM_EXTABLE 9999b, \label
+ _asm_extable 9999b, \label
.endm
/*
diff --git a/arch/arm64/include/asm/arch_gicv3.h b/arch/arm64/include/asm/arch_gicv3.h
index 25fec4b..a358e97 100644
--- a/arch/arm64/include/asm/arch_gicv3.h
+++ b/arch/arm64/include/asm/arch_gicv3.h
@@ -32,7 +32,7 @@ static inline void gic_write_eoir(u32 irq)
isb();
}
-static inline void gic_write_dir(u32 irq)
+static __always_inline void gic_write_dir(u32 irq)
{
write_sysreg_s(irq, SYS_ICC_DIR_EL1);
isb();
diff --git a/arch/arm64/include/asm/cache.h b/arch/arm64/include/asm/cache.h
index 806e9dc..a4d1b5f 100644
--- a/arch/arm64/include/asm/cache.h
+++ b/arch/arm64/include/asm/cache.h
@@ -69,7 +69,7 @@ static inline int icache_is_aliasing(void)
return test_bit(ICACHEF_ALIASING, &__icache_flags);
}
-static inline int icache_is_vpipt(void)
+static __always_inline int icache_is_vpipt(void)
{
return test_bit(ICACHEF_VPIPT, &__icache_flags);
}
diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h
index 665c78e..e6cca3d 100644
--- a/arch/arm64/include/asm/cacheflush.h
+++ b/arch/arm64/include/asm/cacheflush.h
@@ -145,7 +145,7 @@ extern void copy_to_user_page(struct vm_area_struct *, struct page *,
#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
extern void flush_dcache_page(struct page *);
-static inline void __flush_icache_all(void)
+static __always_inline void __flush_icache_all(void)
{
if (cpus_have_const_cap(ARM64_HAS_CACHE_DIC))
return;
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index 92ef953..2a746b9 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -435,13 +435,13 @@ cpuid_feature_extract_signed_field(u64 features, int field)
return cpuid_feature_extract_signed_field_width(features, field, 4);
}
-static inline unsigned int __attribute_const__
+static __always_inline unsigned int __attribute_const__
cpuid_feature_extract_unsigned_field_width(u64 features, int field, int width)
{
return (u64)(features << (64 - width - field)) >> (64 - width);
}
-static inline unsigned int __attribute_const__
+static __always_inline unsigned int __attribute_const__
cpuid_feature_extract_unsigned_field(u64 features, int field)
{
return cpuid_feature_extract_unsigned_field_width(features, field, 4);
@@ -564,7 +564,7 @@ static inline bool system_supports_mixed_endian(void)
return val == 0x1;
}
-static inline bool system_supports_fpsimd(void)
+static __always_inline bool system_supports_fpsimd(void)
{
return !cpus_have_const_cap(ARM64_HAS_NO_FPSIMD);
}
@@ -575,13 +575,13 @@ static inline bool system_uses_ttbr0_pan(void)
!cpus_have_const_cap(ARM64_HAS_PAN);
}
-static inline bool system_supports_sve(void)
+static __always_inline bool system_supports_sve(void)
{
return IS_ENABLED(CONFIG_ARM64_SVE) &&
cpus_have_const_cap(ARM64_SVE);
}
-static inline bool system_supports_cnp(void)
+static __always_inline bool system_supports_cnp(void)
{
return IS_ENABLED(CONFIG_ARM64_CNP) &&
cpus_have_const_cap(ARM64_HAS_CNP);
diff --git a/arch/arm64/include/asm/exception.h b/arch/arm64/include/asm/exception.h
index b87c6e2..7a6e81ca 100644
--- a/arch/arm64/include/asm/exception.h
+++ b/arch/arm64/include/asm/exception.h
@@ -33,7 +33,6 @@ static inline u32 disr_to_esr(u64 disr)
asmlinkage void enter_from_user_mode(void);
void do_mem_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs);
-void do_sp_pc_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs);
void do_undefinstr(struct pt_regs *regs);
asmlinkage void bad_mode(struct pt_regs *regs, int reason, unsigned int esr);
void do_debug_exception(unsigned long addr_if_watchpoint, unsigned int esr,
@@ -47,7 +46,4 @@ void bad_el0_sync(struct pt_regs *regs, int reason, unsigned int esr);
void do_cp15instr(unsigned int esr, struct pt_regs *regs);
void do_el0_svc(struct pt_regs *regs);
void do_el0_svc_compat(struct pt_regs *regs);
-void do_el0_ia_bp_hardening(unsigned long addr, unsigned int esr,
- struct pt_regs *regs);
-
#endif /* __ASM_EXCEPTION_H */
diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h
index 4e531f5..6facd13 100644
--- a/arch/arm64/include/asm/io.h
+++ b/arch/arm64/include/asm/io.h
@@ -34,7 +34,7 @@ static inline void __raw_writew(u16 val, volatile void __iomem *addr)
}
#define __raw_writel __raw_writel
-static inline void __raw_writel(u32 val, volatile void __iomem *addr)
+static __always_inline void __raw_writel(u32 val, volatile void __iomem *addr)
{
asm volatile("str %w0, [%1]" : : "rZ" (val), "r" (addr));
}
@@ -69,7 +69,7 @@ static inline u16 __raw_readw(const volatile void __iomem *addr)
}
#define __raw_readl __raw_readl
-static inline u32 __raw_readl(const volatile void __iomem *addr)
+static __always_inline u32 __raw_readl(const volatile void __iomem *addr)
{
u32 val;
asm volatile(ALTERNATIVE("ldr %w0, [%1]",
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index 688c634..f658dda12 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -36,7 +36,7 @@ void kvm_inject_undef32(struct kvm_vcpu *vcpu);
void kvm_inject_dabt32(struct kvm_vcpu *vcpu, unsigned long addr);
void kvm_inject_pabt32(struct kvm_vcpu *vcpu, unsigned long addr);
-static inline bool vcpu_el1_is_32bit(struct kvm_vcpu *vcpu)
+static __always_inline bool vcpu_el1_is_32bit(struct kvm_vcpu *vcpu)
{
return !(vcpu->arch.hcr_el2 & HCR_RW);
}
@@ -127,7 +127,7 @@ static inline void vcpu_set_vsesr(struct kvm_vcpu *vcpu, u64 vsesr)
vcpu->arch.vsesr_el2 = vsesr;
}
-static inline unsigned long *vcpu_pc(const struct kvm_vcpu *vcpu)
+static __always_inline unsigned long *vcpu_pc(const struct kvm_vcpu *vcpu)
{
return (unsigned long *)&vcpu_gp_regs(vcpu)->regs.pc;
}
@@ -153,17 +153,17 @@ static inline void vcpu_write_elr_el1(const struct kvm_vcpu *vcpu, unsigned long
*__vcpu_elr_el1(vcpu) = v;
}
-static inline unsigned long *vcpu_cpsr(const struct kvm_vcpu *vcpu)
+static __always_inline unsigned long *vcpu_cpsr(const struct kvm_vcpu *vcpu)
{
return (unsigned long *)&vcpu_gp_regs(vcpu)->regs.pstate;
}
-static inline bool vcpu_mode_is_32bit(const struct kvm_vcpu *vcpu)
+static __always_inline bool vcpu_mode_is_32bit(const struct kvm_vcpu *vcpu)
{
return !!(*vcpu_cpsr(vcpu) & PSR_MODE32_BIT);
}
-static inline bool kvm_condition_valid(const struct kvm_vcpu *vcpu)
+static __always_inline bool kvm_condition_valid(const struct kvm_vcpu *vcpu)
{
if (vcpu_mode_is_32bit(vcpu))
return kvm_condition_valid32(vcpu);
@@ -181,13 +181,13 @@ static inline void vcpu_set_thumb(struct kvm_vcpu *vcpu)
* coming from a read of ESR_EL2. Otherwise, it may give the wrong result on
* AArch32 with banked registers.
*/
-static inline unsigned long vcpu_get_reg(const struct kvm_vcpu *vcpu,
+static __always_inline unsigned long vcpu_get_reg(const struct kvm_vcpu *vcpu,
u8 reg_num)
{
return (reg_num == 31) ? 0 : vcpu_gp_regs(vcpu)->regs.regs[reg_num];
}
-static inline void vcpu_set_reg(struct kvm_vcpu *vcpu, u8 reg_num,
+static __always_inline void vcpu_set_reg(struct kvm_vcpu *vcpu, u8 reg_num,
unsigned long val)
{
if (reg_num != 31)
@@ -264,12 +264,12 @@ static inline bool vcpu_mode_priv(const struct kvm_vcpu *vcpu)
return mode != PSR_MODE_EL0t;
}
-static inline u32 kvm_vcpu_get_hsr(const struct kvm_vcpu *vcpu)
+static __always_inline u32 kvm_vcpu_get_hsr(const struct kvm_vcpu *vcpu)
{
return vcpu->arch.fault.esr_el2;
}
-static inline int kvm_vcpu_get_condition(const struct kvm_vcpu *vcpu)
+static __always_inline int kvm_vcpu_get_condition(const struct kvm_vcpu *vcpu)
{
u32 esr = kvm_vcpu_get_hsr(vcpu);
@@ -279,12 +279,12 @@ static inline int kvm_vcpu_get_condition(const struct kvm_vcpu *vcpu)
return -1;
}
-static inline unsigned long kvm_vcpu_get_hfar(const struct kvm_vcpu *vcpu)
+static __always_inline unsigned long kvm_vcpu_get_hfar(const struct kvm_vcpu *vcpu)
{
return vcpu->arch.fault.far_el2;
}
-static inline phys_addr_t kvm_vcpu_get_fault_ipa(const struct kvm_vcpu *vcpu)
+static __always_inline phys_addr_t kvm_vcpu_get_fault_ipa(const struct kvm_vcpu *vcpu)
{
return ((phys_addr_t)vcpu->arch.fault.hpfar_el2 & HPFAR_MASK) << 8;
}
@@ -299,7 +299,7 @@ static inline u32 kvm_vcpu_hvc_get_imm(const struct kvm_vcpu *vcpu)
return kvm_vcpu_get_hsr(vcpu) & ESR_ELx_xVC_IMM_MASK;
}
-static inline bool kvm_vcpu_dabt_isvalid(const struct kvm_vcpu *vcpu)
+static __always_inline bool kvm_vcpu_dabt_isvalid(const struct kvm_vcpu *vcpu)
{
return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_ISV);
}
@@ -319,17 +319,17 @@ static inline bool kvm_vcpu_dabt_issf(const struct kvm_vcpu *vcpu)
return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_SF);
}
-static inline int kvm_vcpu_dabt_get_rd(const struct kvm_vcpu *vcpu)
+static __always_inline int kvm_vcpu_dabt_get_rd(const struct kvm_vcpu *vcpu)
{
return (kvm_vcpu_get_hsr(vcpu) & ESR_ELx_SRT_MASK) >> ESR_ELx_SRT_SHIFT;
}
-static inline bool kvm_vcpu_dabt_iss1tw(const struct kvm_vcpu *vcpu)
+static __always_inline bool kvm_vcpu_dabt_iss1tw(const struct kvm_vcpu *vcpu)
{
return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_S1PTW);
}
-static inline bool kvm_vcpu_dabt_iswrite(const struct kvm_vcpu *vcpu)
+static __always_inline bool kvm_vcpu_dabt_iswrite(const struct kvm_vcpu *vcpu)
{
return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_WNR) ||
kvm_vcpu_dabt_iss1tw(vcpu); /* AF/DBM update */
@@ -340,18 +340,18 @@ static inline bool kvm_vcpu_dabt_is_cm(const struct kvm_vcpu *vcpu)
return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_CM);
}
-static inline unsigned int kvm_vcpu_dabt_get_as(const struct kvm_vcpu *vcpu)
+static __always_inline unsigned int kvm_vcpu_dabt_get_as(const struct kvm_vcpu *vcpu)
{
return 1 << ((kvm_vcpu_get_hsr(vcpu) & ESR_ELx_SAS) >> ESR_ELx_SAS_SHIFT);
}
/* This one is not specific to Data Abort */
-static inline bool kvm_vcpu_trap_il_is32bit(const struct kvm_vcpu *vcpu)
+static __always_inline bool kvm_vcpu_trap_il_is32bit(const struct kvm_vcpu *vcpu)
{
return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_IL);
}
-static inline u8 kvm_vcpu_trap_get_class(const struct kvm_vcpu *vcpu)
+static __always_inline u8 kvm_vcpu_trap_get_class(const struct kvm_vcpu *vcpu)
{
return ESR_ELx_EC(kvm_vcpu_get_hsr(vcpu));
}
@@ -361,17 +361,17 @@ static inline bool kvm_vcpu_trap_is_iabt(const struct kvm_vcpu *vcpu)
return kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_IABT_LOW;
}
-static inline u8 kvm_vcpu_trap_get_fault(const struct kvm_vcpu *vcpu)
+static __always_inline u8 kvm_vcpu_trap_get_fault(const struct kvm_vcpu *vcpu)
{
return kvm_vcpu_get_hsr(vcpu) & ESR_ELx_FSC;
}
-static inline u8 kvm_vcpu_trap_get_fault_type(const struct kvm_vcpu *vcpu)
+static __always_inline u8 kvm_vcpu_trap_get_fault_type(const struct kvm_vcpu *vcpu)
{
return kvm_vcpu_get_hsr(vcpu) & ESR_ELx_FSC_TYPE;
}
-static inline bool kvm_vcpu_dabt_isextabt(const struct kvm_vcpu *vcpu)
+static __always_inline bool kvm_vcpu_dabt_isextabt(const struct kvm_vcpu *vcpu)
{
switch (kvm_vcpu_trap_get_fault(vcpu)) {
case FSC_SEA:
@@ -390,7 +390,7 @@ static inline bool kvm_vcpu_dabt_isextabt(const struct kvm_vcpu *vcpu)
}
}
-static inline int kvm_vcpu_sys_get_rt(struct kvm_vcpu *vcpu)
+static __always_inline int kvm_vcpu_sys_get_rt(struct kvm_vcpu *vcpu)
{
u32 esr = kvm_vcpu_get_hsr(vcpu);
return ESR_ELx_SYS64_ISS_RT(esr);
@@ -504,7 +504,7 @@ static inline unsigned long vcpu_data_host_to_guest(struct kvm_vcpu *vcpu,
return data; /* Leave LE untouched */
}
-static inline void kvm_skip_instr(struct kvm_vcpu *vcpu, bool is_wide_instr)
+static __always_inline void kvm_skip_instr(struct kvm_vcpu *vcpu, bool is_wide_instr)
{
if (vcpu_mode_is_32bit(vcpu))
kvm_skip_instr32(vcpu, is_wide_instr);
@@ -519,7 +519,7 @@ static inline void kvm_skip_instr(struct kvm_vcpu *vcpu, bool is_wide_instr)
* Skip an instruction which has been emulated at hyp while most guest sysregs
* are live.
*/
-static inline void __hyp_text __kvm_skip_instr(struct kvm_vcpu *vcpu)
+static __always_inline void __hyp_text __kvm_skip_instr(struct kvm_vcpu *vcpu)
{
*vcpu_pc(vcpu) = read_sysreg_el2(SYS_ELR);
vcpu->arch.ctxt.gp_regs.regs.pstate = read_sysreg_el2(SYS_SPSR);
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index d87aa60..57fd46a 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -626,38 +626,6 @@ static inline void kvm_set_pmu_events(u32 set, struct perf_event_attr *attr) {}
static inline void kvm_clr_pmu_events(u32 clr) {}
#endif
-static inline void kvm_arm_vhe_guest_enter(void)
-{
- local_daif_mask();
-
- /*
- * Having IRQs masked via PMR when entering the guest means the GIC
- * will not signal the CPU of interrupts of lower priority, and the
- * only way to get out will be via guest exceptions.
- * Naturally, we want to avoid this.
- *
- * local_daif_mask() already sets GIC_PRIO_PSR_I_SET, we just need a
- * dsb to ensure the redistributor is forwards EL2 IRQs to the CPU.
- */
- pmr_sync();
-}
-
-static inline void kvm_arm_vhe_guest_exit(void)
-{
- /*
- * local_daif_restore() takes care to properly restore PSTATE.DAIF
- * and the GIC PMR if the host is using IRQ priorities.
- */
- local_daif_restore(DAIF_PROCCTX_NOIRQ);
-
- /*
- * When we exit from the guest we change a number of CPU configuration
- * parameters, such as traps. Make sure these changes take effect
- * before running the host or additional guests.
- */
- isb();
-}
-
#define KVM_BP_HARDEN_UNKNOWN -1
#define KVM_BP_HARDEN_WA_NEEDED 0
#define KVM_BP_HARDEN_NOT_REQUIRED 1
diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h
index a3a6a2b..fe57f60 100644
--- a/arch/arm64/include/asm/kvm_hyp.h
+++ b/arch/arm64/include/asm/kvm_hyp.h
@@ -47,6 +47,13 @@
#define read_sysreg_el2(r) read_sysreg_elx(r, _EL2, _EL1)
#define write_sysreg_el2(v,r) write_sysreg_elx(v, r, _EL2, _EL1)
+/*
+ * Without an __arch_swab32(), we fall back to ___constant_swab32(), but the
+ * static inline can allow the compiler to out-of-line this. KVM always wants
+ * the macro version as its always inlined.
+ */
+#define __kvm_swab32(x) ___constant_swab32(x)
+
int __vgic_v2_perform_cpuif_access(struct kvm_vcpu *vcpu);
void __vgic_v3_save_state(struct kvm_vcpu *vcpu);
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 53d846f..7857628 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -93,7 +93,7 @@ void kvm_update_va_mask(struct alt_instr *alt,
__le32 *origptr, __le32 *updptr, int nr_inst);
void kvm_compute_layout(void);
-static inline unsigned long __kern_hyp_va(unsigned long v)
+static __always_inline unsigned long __kern_hyp_va(unsigned long v)
{
asm volatile(ALTERNATIVE_CB("and %0, %0, #1\n"
"ror %0, %0, #1\n"
@@ -473,6 +473,7 @@ static inline int kvm_write_guest_lock(struct kvm *kvm, gpa_t gpa,
extern void *__kvm_bp_vect_base;
extern int __kvm_harden_el2_vector_slot;
+/* This is only called on a VHE system */
static inline void *kvm_get_hyp_vector(void)
{
struct bp_hardening_data *data = arm64_get_bp_hardening_data();
diff --git a/arch/arm64/include/asm/lse.h b/arch/arm64/include/asm/lse.h
index d429f77..5d10051 100644
--- a/arch/arm64/include/asm/lse.h
+++ b/arch/arm64/include/asm/lse.h
@@ -6,7 +6,7 @@
#ifdef CONFIG_ARM64_LSE_ATOMICS
-#define __LSE_PREAMBLE ".arch armv8-a+lse\n"
+#define __LSE_PREAMBLE ".arch_extension lse\n"
#include <linux/compiler_types.h>
#include <linux/export.h>
diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
index a4f9ca5..4d94676e 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -213,7 +213,7 @@ static inline unsigned long kaslr_offset(void)
((__force __typeof__(addr))sign_extend64((__force u64)(addr), 55))
#define untagged_addr(addr) ({ \
- u64 __addr = (__force u64)addr; \
+ u64 __addr = (__force u64)(addr); \
__addr &= __untagged_addr(__addr); \
(__force __typeof__(addr))__addr; \
})
diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h
index e4d8624..d79ce6d 100644
--- a/arch/arm64/include/asm/mmu.h
+++ b/arch/arm64/include/asm/mmu.h
@@ -29,11 +29,9 @@ typedef struct {
*/
#define ASID(mm) ((mm)->context.id.counter & 0xffff)
-extern bool arm64_use_ng_mappings;
-
static inline bool arm64_kernel_unmapped_at_el0(void)
{
- return arm64_use_ng_mappings;
+ return cpus_have_const_cap(ARM64_UNMAP_KERNEL_AT_EL0);
}
typedef void (*bp_hardening_cb_t)(void);
diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h
index 6f87839..1305e28 100644
--- a/arch/arm64/include/asm/pgtable-prot.h
+++ b/arch/arm64/include/asm/pgtable-prot.h
@@ -23,11 +23,13 @@
#include <asm/pgtable-types.h>
+extern bool arm64_use_ng_mappings;
+
#define _PROT_DEFAULT (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED)
#define _PROT_SECT_DEFAULT (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S)
-#define PTE_MAYBE_NG (arm64_kernel_unmapped_at_el0() ? PTE_NG : 0)
-#define PMD_MAYBE_NG (arm64_kernel_unmapped_at_el0() ? PMD_SECT_NG : 0)
+#define PTE_MAYBE_NG (arm64_use_ng_mappings ? PTE_NG : 0)
+#define PMD_MAYBE_NG (arm64_use_ng_mappings ? PMD_SECT_NG : 0)
#define PROT_DEFAULT (_PROT_DEFAULT | PTE_MAYBE_NG)
#define PROT_SECT_DEFAULT (_PROT_SECT_DEFAULT | PMD_MAYBE_NG)
diff --git a/arch/arm64/include/asm/spinlock.h b/arch/arm64/include/asm/spinlock.h
index 102404d..9083d69 100644
--- a/arch/arm64/include/asm/spinlock.h
+++ b/arch/arm64/include/asm/spinlock.h
@@ -18,6 +18,10 @@
* See:
* https://lore.kernel.org/lkml/20200110100612.GC2827@hirez.programming.kicks-ass.net
*/
-#define vcpu_is_preempted(cpu) false
+#define vcpu_is_preempted vcpu_is_preempted
+static inline bool vcpu_is_preempted(int cpu)
+{
+ return false;
+}
#endif /* __ASM_SPINLOCK_H */
diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h
index 1dd22da..803039d 100644
--- a/arch/arm64/include/asm/unistd.h
+++ b/arch/arm64/include/asm/unistd.h
@@ -25,8 +25,8 @@
#define __NR_compat_gettimeofday 78
#define __NR_compat_sigreturn 119
#define __NR_compat_rt_sigreturn 173
-#define __NR_compat_clock_getres 247
#define __NR_compat_clock_gettime 263
+#define __NR_compat_clock_getres 264
#define __NR_compat_clock_gettime64 403
#define __NR_compat_clock_getres_time64 406
diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h
index 0958ed6..61fd267 100644
--- a/arch/arm64/include/asm/virt.h
+++ b/arch/arm64/include/asm/virt.h
@@ -83,7 +83,7 @@ static inline bool is_kernel_in_hyp_mode(void)
return read_sysreg(CurrentEL) == CurrentEL_EL2;
}
-static inline bool has_vhe(void)
+static __always_inline bool has_vhe(void)
{
if (cpus_have_const_cap(ARM64_HAS_VIRT_HOST_EXTN))
return true;
diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c
index 53b8a4e..91a8310 100644
--- a/arch/arm64/kernel/kaslr.c
+++ b/arch/arm64/kernel/kaslr.c
@@ -11,6 +11,7 @@
#include <linux/sched.h>
#include <linux/types.h>
+#include <asm/archrandom.h>
#include <asm/cacheflush.h>
#include <asm/fixmap.h>
#include <asm/kernel-pgtable.h>
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index bbb0f0c..0062605 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -466,6 +466,13 @@ static void ssbs_thread_switch(struct task_struct *next)
if (unlikely(next->flags & PF_KTHREAD))
return;
+ /*
+ * If all CPUs implement the SSBS extension, then we just need to
+ * context-switch the PSTATE field.
+ */
+ if (cpu_have_feature(cpu_feature(SSBS)))
+ return;
+
/* If the mitigation is enabled, then we leave SSBS clear. */
if ((arm64_get_ssbd_state() == ARM64_SSBD_FORCE_ENABLE) ||
test_tsk_thread_flag(next, TIF_SSBD))
@@ -608,8 +615,6 @@ long get_tagged_addr_ctrl(void)
* only prevents the tagged address ABI enabling via prctl() and does not
* disable it for tasks that already opted in to the relaxed ABI.
*/
-static int zero;
-static int one = 1;
static struct ctl_table tagged_addr_sysctl_table[] = {
{
@@ -618,8 +623,8 @@ static struct ctl_table tagged_addr_sysctl_table[] = {
.data = &tagged_addr_disabled,
.maxlen = sizeof(int),
.proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
- .extra2 = &one,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
},
{ }
};
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index d4ed9a1..5407bf5 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -958,11 +958,22 @@ void tick_broadcast(const struct cpumask *mask)
}
#endif
+/*
+ * The number of CPUs online, not counting this CPU (which may not be
+ * fully online and so not counted in num_online_cpus()).
+ */
+static inline unsigned int num_other_online_cpus(void)
+{
+ unsigned int this_cpu_online = cpu_online(smp_processor_id());
+
+ return num_online_cpus() - this_cpu_online;
+}
+
void smp_send_stop(void)
{
unsigned long timeout;
- if (num_online_cpus() > 1) {
+ if (num_other_online_cpus()) {
cpumask_t mask;
cpumask_copy(&mask, cpu_online_mask);
@@ -975,10 +986,10 @@ void smp_send_stop(void)
/* Wait up to one second for other CPUs to stop */
timeout = USEC_PER_SEC;
- while (num_online_cpus() > 1 && timeout--)
+ while (num_other_online_cpus() && timeout--)
udelay(1);
- if (num_online_cpus() > 1)
+ if (num_other_online_cpus())
pr_warn("SMP: failed to stop secondary CPUs %*pbl\n",
cpumask_pr_args(cpu_online_mask));
@@ -1001,7 +1012,11 @@ void crash_smp_send_stop(void)
cpus_stopped = 1;
- if (num_online_cpus() == 1) {
+ /*
+ * If this cpu is the only one alive at this point in time, online or
+ * not, there are no stop messages to be sent around, so just back out.
+ */
+ if (num_other_online_cpus() == 0) {
sdei_mask_local_cpu();
return;
}
@@ -1009,7 +1024,7 @@ void crash_smp_send_stop(void)
cpumask_copy(&mask, cpu_online_mask);
cpumask_clear_cpu(smp_processor_id(), &mask);
- atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1);
+ atomic_set(&waiting_for_crash_ipi, num_other_online_cpus());
pr_crit("SMP: stopping secondary CPUs\n");
smp_cross_call(&mask, IPI_CPU_CRASH_STOP);
diff --git a/arch/arm64/kernel/time.c b/arch/arm64/kernel/time.c
index 73f06d4..eebbc8d 100644
--- a/arch/arm64/kernel/time.c
+++ b/arch/arm64/kernel/time.c
@@ -23,7 +23,7 @@
#include <linux/irq.h>
#include <linux/delay.h>
#include <linux/clocksource.h>
-#include <linux/clk-provider.h>
+#include <linux/of_clk.h>
#include <linux/acpi.h>
#include <clocksource/arm_arch_timer.h>
diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
index dfe8dd1..925086b 100644
--- a/arch/arm64/kvm/hyp/switch.c
+++ b/arch/arm64/kvm/hyp/switch.c
@@ -625,7 +625,7 @@ static void __hyp_text __pmu_switch_to_host(struct kvm_cpu_context *host_ctxt)
}
/* Switch to the guest for VHE systems running in EL2 */
-int kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
+static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
{
struct kvm_cpu_context *host_ctxt;
struct kvm_cpu_context *guest_ctxt;
@@ -678,7 +678,42 @@ int kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
return exit_code;
}
-NOKPROBE_SYMBOL(kvm_vcpu_run_vhe);
+NOKPROBE_SYMBOL(__kvm_vcpu_run_vhe);
+
+int kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
+{
+ int ret;
+
+ local_daif_mask();
+
+ /*
+ * Having IRQs masked via PMR when entering the guest means the GIC
+ * will not signal the CPU of interrupts of lower priority, and the
+ * only way to get out will be via guest exceptions.
+ * Naturally, we want to avoid this.
+ *
+ * local_daif_mask() already sets GIC_PRIO_PSR_I_SET, we just need a
+ * dsb to ensure the redistributor is forwards EL2 IRQs to the CPU.
+ */
+ pmr_sync();
+
+ ret = __kvm_vcpu_run_vhe(vcpu);
+
+ /*
+ * local_daif_restore() takes care to properly restore PSTATE.DAIF
+ * and the GIC PMR if the host is using IRQ priorities.
+ */
+ local_daif_restore(DAIF_PROCCTX_NOIRQ);
+
+ /*
+ * When we exit from the guest we change a number of CPU configuration
+ * parameters, such as traps. Make sure these changes take effect
+ * before running the host or additional guests.
+ */
+ isb();
+
+ return ret;
+}
/* Switch to the guest for legacy non-VHE systems */
int __hyp_text __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu)
diff --git a/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c b/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c
index 29ee1fe..4f3a087 100644
--- a/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c
+++ b/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c
@@ -69,14 +69,14 @@ int __hyp_text __vgic_v2_perform_cpuif_access(struct kvm_vcpu *vcpu)
u32 data = vcpu_get_reg(vcpu, rd);
if (__is_be(vcpu)) {
/* guest pre-swabbed data, undo this for writel() */
- data = swab32(data);
+ data = __kvm_swab32(data);
}
writel_relaxed(data, addr);
} else {
u32 data = readl_relaxed(addr);
if (__is_be(vcpu)) {
/* guest expects swabbed data */
- data = swab32(data);
+ data = __kvm_swab32(data);
}
vcpu_set_reg(vcpu, rd, data);
}
diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
index 8ef73e8..d89bb22 100644
--- a/arch/arm64/mm/context.c
+++ b/arch/arm64/mm/context.c
@@ -260,14 +260,26 @@ asmlinkage void post_ttbr_update_workaround(void)
CONFIG_CAVIUM_ERRATUM_27456));
}
-static int asids_init(void)
+static int asids_update_limit(void)
{
- asid_bits = get_cpu_asid_bits();
+ unsigned long num_available_asids = NUM_USER_ASIDS;
+
+ if (arm64_kernel_unmapped_at_el0())
+ num_available_asids /= 2;
/*
* Expect allocation after rollover to fail if we don't have at least
* one more ASID than CPUs. ASID #0 is reserved for init_mm.
*/
- WARN_ON(NUM_USER_ASIDS - 1 <= num_possible_cpus());
+ WARN_ON(num_available_asids - 1 <= num_possible_cpus());
+ pr_info("ASID allocator initialised with %lu entries\n",
+ num_available_asids);
+ return 0;
+}
+arch_initcall(asids_update_limit);
+
+static int asids_init(void)
+{
+ asid_bits = get_cpu_asid_bits();
atomic64_set(&asid_generation, ASID_FIRST_VERSION);
asid_map = kcalloc(BITS_TO_LONGS(NUM_USER_ASIDS), sizeof(*asid_map),
GFP_KERNEL);
@@ -282,8 +294,6 @@ static int asids_init(void)
*/
if (IS_ENABLED(CONFIG_UNMAP_KERNEL_AT_EL0))
set_kpti_asid_bits();
-
- pr_info("ASID allocator initialised with %lu entries\n", NUM_USER_ASIDS);
return 0;
}
early_initcall(asids_init);
diff --git a/arch/csky/Kconfig b/arch/csky/Kconfig
index da09c88..047427f 100644
--- a/arch/csky/Kconfig
+++ b/arch/csky/Kconfig
@@ -9,7 +9,6 @@
select ARCH_USE_QUEUED_RWLOCKS if NR_CPUS>2
select COMMON_CLK
select CLKSRC_MMIO
- select CLKSRC_OF
select CSKY_MPINTC if CPU_CK860
select CSKY_MP_TIMER if CPU_CK860
select CSKY_APB_INTC
@@ -37,6 +36,7 @@
select GX6605S_TIMER if CPU_CK610
select HAVE_ARCH_TRACEHOOK
select HAVE_ARCH_AUDITSYSCALL
+ select HAVE_COPY_THREAD_TLS
select HAVE_DYNAMIC_FTRACE
select HAVE_FUNCTION_TRACER
select HAVE_FUNCTION_GRAPH_TRACER
@@ -47,8 +47,8 @@
select HAVE_PERF_EVENTS
select HAVE_PERF_REGS
select HAVE_PERF_USER_STACK_DUMP
- select HAVE_DMA_API_DEBUG
select HAVE_DMA_CONTIGUOUS
+ select HAVE_STACKPROTECTOR
select HAVE_SYSCALL_TRACEPOINTS
select MAY_HAVE_SPARSE_IRQ
select MODULES_USE_ELF_RELA if MODULES
@@ -59,6 +59,11 @@
select TIMER_OF
select USB_ARCH_HAS_EHCI
select USB_ARCH_HAS_OHCI
+ select GENERIC_PCI_IOMAP
+ select HAVE_PCI
+ select PCI_DOMAINS_GENERIC if PCI
+ select PCI_SYSCALL if PCI
+ select PCI_MSI if PCI
config CPU_HAS_CACHEV2
bool
@@ -75,7 +80,7 @@
config CPU_HAS_LDSTEX
bool
help
- For SMP, CPU needs "ldex&stex" instrcutions to atomic operations.
+ For SMP, CPU needs "ldex&stex" instructions for atomic operations.
config CPU_NEED_TLBSYNC
bool
@@ -188,6 +193,40 @@
bool "stop"
endchoice
+menuconfig HAVE_TCM
+ bool "Tightly-Coupled/Sram Memory"
+ select GENERIC_ALLOCATOR
+ help
+ The implementation are not only used by TCM (Tightly-Coupled Meory)
+ but also used by sram on SOC bus. It follow existed linux tcm
+ software interface, so that old tcm application codes could be
+ re-used directly.
+
+if HAVE_TCM
+config ITCM_RAM_BASE
+ hex "ITCM ram base"
+ default 0xffffffff
+
+config ITCM_NR_PAGES
+ int "Page count of ITCM size: NR*4KB"
+ range 1 256
+ default 32
+
+config HAVE_DTCM
+ bool "DTCM Support"
+
+config DTCM_RAM_BASE
+ hex "DTCM ram base"
+ depends on HAVE_DTCM
+ default 0xffffffff
+
+config DTCM_NR_PAGES
+ int "Page count of DTCM size: NR*4KB"
+ depends on HAVE_DTCM
+ range 1 256
+ default 32
+endif
+
config CPU_HAS_VDSP
bool "CPU has VDSP coprocessor"
depends on CPU_HAS_FPU && CPU_HAS_FPUV2
@@ -196,6 +235,10 @@
bool "CPU has FPU coprocessor"
depends on CPU_CK807 || CPU_CK810 || CPU_CK860
+config CPU_HAS_ICACHE_INS
+ bool "CPU has Icache invalidate instructions"
+ depends on CPU_HAS_CACHEV2
+
config CPU_HAS_TEE
bool "CPU has Trusted Execution Environment"
depends on CPU_CK810
@@ -235,4 +278,6 @@
Say N if you want to disable CPU hotplug.
endmenu
+source "arch/csky/Kconfig.platforms"
+
source "kernel/Kconfig.hz"
diff --git a/arch/csky/Kconfig.platforms b/arch/csky/Kconfig.platforms
new file mode 100644
index 0000000..639e17f
--- /dev/null
+++ b/arch/csky/Kconfig.platforms
@@ -0,0 +1,9 @@
+menu "Platform drivers selection"
+
+config ARCH_CSKY_DW_APB_ICTL
+ bool "Select dw-apb interrupt controller"
+ select DW_APB_ICTL
+ default y
+ help
+ This enables support for snps dw-apb-ictl
+endmenu
diff --git a/arch/csky/abiv1/inc/abi/cacheflush.h b/arch/csky/abiv1/inc/abi/cacheflush.h
index 79ef9e8..d3e0420 100644
--- a/arch/csky/abiv1/inc/abi/cacheflush.h
+++ b/arch/csky/abiv1/inc/abi/cacheflush.h
@@ -48,9 +48,8 @@ extern void flush_cache_range(struct vm_area_struct *vma, unsigned long start, u
#define flush_icache_page(vma, page) do {} while (0);
#define flush_icache_range(start, end) cache_wbinv_range(start, end)
-
-#define flush_icache_user_range(vma,page,addr,len) \
- flush_dcache_page(page)
+#define flush_icache_mm_range(mm, start, end) cache_wbinv_range(start, end)
+#define flush_icache_deferred(mm) do {} while (0);
#define copy_from_user_page(vma, page, vaddr, dst, src, len) \
do { \
diff --git a/arch/csky/abiv1/inc/abi/entry.h b/arch/csky/abiv1/inc/abi/entry.h
index 7ab78bd..f35a9f3 100644
--- a/arch/csky/abiv1/inc/abi/entry.h
+++ b/arch/csky/abiv1/inc/abi/entry.h
@@ -16,14 +16,16 @@
#define LSAVE_A4 40
#define LSAVE_A5 44
+#define usp ss1
+
.macro USPTOKSP
- mtcr sp, ss1
+ mtcr sp, usp
mfcr sp, ss0
.endm
.macro KSPTOUSP
mtcr sp, ss0
- mfcr sp, ss1
+ mfcr sp, usp
.endm
.macro SAVE_ALL epc_inc
@@ -45,7 +47,13 @@
add lr, r13
stw lr, (sp, 8)
+ mov lr, sp
+ addi lr, 32
+ addi lr, 32
+ addi lr, 16
+ bt 2f
mfcr lr, ss1
+2:
stw lr, (sp, 16)
stw a0, (sp, 20)
@@ -79,9 +87,10 @@
ldw a0, (sp, 12)
mtcr a0, epsr
btsti a0, 31
+ bt 1f
ldw a0, (sp, 16)
mtcr a0, ss1
-
+1:
ldw a0, (sp, 24)
ldw a1, (sp, 28)
ldw a2, (sp, 32)
@@ -102,9 +111,9 @@
addi sp, 32
addi sp, 8
- bt 1f
+ bt 2f
KSPTOUSP
-1:
+2:
rte
.endm
diff --git a/arch/csky/abiv2/cacheflush.c b/arch/csky/abiv2/cacheflush.c
index 5bb887b..790f1eb 100644
--- a/arch/csky/abiv2/cacheflush.c
+++ b/arch/csky/abiv2/cacheflush.c
@@ -6,46 +6,80 @@
#include <linux/mm.h>
#include <asm/cache.h>
-void flush_icache_page(struct vm_area_struct *vma, struct page *page)
-{
- unsigned long start;
-
- start = (unsigned long) kmap_atomic(page);
-
- cache_wbinv_range(start, start + PAGE_SIZE);
-
- kunmap_atomic((void *)start);
-}
-
-void flush_icache_user_range(struct vm_area_struct *vma, struct page *page,
- unsigned long vaddr, int len)
-{
- unsigned long kaddr;
-
- kaddr = (unsigned long) kmap_atomic(page) + (vaddr & ~PAGE_MASK);
-
- cache_wbinv_range(kaddr, kaddr + len);
-
- kunmap_atomic((void *)kaddr);
-}
-
void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
pte_t *pte)
{
- unsigned long addr, pfn;
+ unsigned long addr;
struct page *page;
- pfn = pte_pfn(*pte);
- if (unlikely(!pfn_valid(pfn)))
+ page = pfn_to_page(pte_pfn(*pte));
+ if (page == ZERO_PAGE(0))
return;
- page = pfn_to_page(pfn);
- if (page == ZERO_PAGE(0))
+ if (test_and_set_bit(PG_dcache_clean, &page->flags))
return;
addr = (unsigned long) kmap_atomic(page);
- cache_wbinv_range(addr, addr + PAGE_SIZE);
+ dcache_wb_range(addr, addr + PAGE_SIZE);
+
+ if (vma->vm_flags & VM_EXEC)
+ icache_inv_range(addr, addr + PAGE_SIZE);
kunmap_atomic((void *) addr);
}
+
+void flush_icache_deferred(struct mm_struct *mm)
+{
+ unsigned int cpu = smp_processor_id();
+ cpumask_t *mask = &mm->context.icache_stale_mask;
+
+ if (cpumask_test_cpu(cpu, mask)) {
+ cpumask_clear_cpu(cpu, mask);
+ /*
+ * Ensure the remote hart's writes are visible to this hart.
+ * This pairs with a barrier in flush_icache_mm.
+ */
+ smp_mb();
+ local_icache_inv_all(NULL);
+ }
+}
+
+void flush_icache_mm_range(struct mm_struct *mm,
+ unsigned long start, unsigned long end)
+{
+ unsigned int cpu;
+ cpumask_t others, *mask;
+
+ preempt_disable();
+
+#ifdef CONFIG_CPU_HAS_ICACHE_INS
+ if (mm == current->mm) {
+ icache_inv_range(start, end);
+ preempt_enable();
+ return;
+ }
+#endif
+
+ /* Mark every hart's icache as needing a flush for this MM. */
+ mask = &mm->context.icache_stale_mask;
+ cpumask_setall(mask);
+
+ /* Flush this hart's I$ now, and mark it as flushed. */
+ cpu = smp_processor_id();
+ cpumask_clear_cpu(cpu, mask);
+ local_icache_inv_all(NULL);
+
+ /*
+ * Flush the I$ of other harts concurrently executing, and mark them as
+ * flushed.
+ */
+ cpumask_andnot(&others, mm_cpumask(mm), cpumask_of(cpu));
+
+ if (mm != current->active_mm || !cpumask_empty(&others)) {
+ on_each_cpu_mask(&others, local_icache_inv_all, NULL, 1);
+ cpumask_clear(mask);
+ }
+
+ preempt_enable();
+}
diff --git a/arch/csky/abiv2/inc/abi/cacheflush.h b/arch/csky/abiv2/inc/abi/cacheflush.h
index b8db5e0..a565e00 100644
--- a/arch/csky/abiv2/inc/abi/cacheflush.h
+++ b/arch/csky/abiv2/inc/abi/cacheflush.h
@@ -13,24 +13,27 @@
#define flush_cache_all() do { } while (0)
#define flush_cache_mm(mm) do { } while (0)
#define flush_cache_dup_mm(mm) do { } while (0)
-
-#define flush_cache_range(vma, start, end) \
- do { \
- if (vma->vm_flags & VM_EXEC) \
- icache_inv_all(); \
- } while (0)
-
+#define flush_cache_range(vma, start, end) do { } while (0)
#define flush_cache_page(vma, vmaddr, pfn) do { } while (0)
-#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 0
-#define flush_dcache_page(page) do { } while (0)
+
+#define PG_dcache_clean PG_arch_1
+
+#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
+static inline void flush_dcache_page(struct page *page)
+{
+ if (test_bit(PG_dcache_clean, &page->flags))
+ clear_bit(PG_dcache_clean, &page->flags);
+}
+
#define flush_dcache_mmap_lock(mapping) do { } while (0)
#define flush_dcache_mmap_unlock(mapping) do { } while (0)
+#define flush_icache_page(vma, page) do { } while (0)
#define flush_icache_range(start, end) cache_wbinv_range(start, end)
-void flush_icache_page(struct vm_area_struct *vma, struct page *page);
-void flush_icache_user_range(struct vm_area_struct *vma, struct page *page,
- unsigned long vaddr, int len);
+void flush_icache_mm_range(struct mm_struct *mm,
+ unsigned long start, unsigned long end);
+void flush_icache_deferred(struct mm_struct *mm);
#define flush_cache_vmap(start, end) do { } while (0)
#define flush_cache_vunmap(start, end) do { } while (0)
@@ -38,7 +41,13 @@ void flush_icache_user_range(struct vm_area_struct *vma, struct page *page,
#define copy_to_user_page(vma, page, vaddr, dst, src, len) \
do { \
memcpy(dst, src, len); \
- cache_wbinv_range((unsigned long)dst, (unsigned long)dst + len); \
+ if (vma->vm_flags & VM_EXEC) { \
+ dcache_wb_range((unsigned long)dst, \
+ (unsigned long)dst + len); \
+ flush_icache_mm_range(current->mm, \
+ (unsigned long)dst, \
+ (unsigned long)dst + len); \
+ } \
} while (0)
#define copy_from_user_page(vma, page, vaddr, dst, src, len) \
memcpy(dst, src, len)
diff --git a/arch/csky/abiv2/inc/abi/entry.h b/arch/csky/abiv2/inc/abi/entry.h
index 9897a16b..94a7a58 100644
--- a/arch/csky/abiv2/inc/abi/entry.h
+++ b/arch/csky/abiv2/inc/abi/entry.h
@@ -31,7 +31,13 @@
mfcr lr, epsr
stw lr, (sp, 12)
+ btsti lr, 31
+ bf 1f
+ addi lr, sp, 152
+ br 2f
+1:
mfcr lr, usp
+2:
stw lr, (sp, 16)
stw a0, (sp, 20)
@@ -64,8 +70,10 @@
mtcr a0, epc
ldw a0, (sp, 12)
mtcr a0, epsr
+ btsti a0, 31
ldw a0, (sp, 16)
mtcr a0, usp
+ mtcr a0, ss0
#ifdef CONFIG_CPU_HAS_HILO
ldw a0, (sp, 140)
@@ -86,6 +94,9 @@
addi sp, 40
ldm r16-r30, (sp)
addi sp, 72
+ bf 1f
+ mfcr sp, ss0
+1:
rte
.endm
diff --git a/arch/csky/configs/defconfig b/arch/csky/configs/defconfig
index 7ef4289..af722e4 100644
--- a/arch/csky/configs/defconfig
+++ b/arch/csky/configs/defconfig
@@ -10,9 +10,6 @@
CONFIG_BSD_PROCESS_ACCT_V3=y
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
-CONFIG_DEFAULT_DEADLINE=y
-CONFIG_CPU_CK807=y
-CONFIG_CPU_HAS_FPU=y
CONFIG_NET=y
CONFIG_PACKET=y
CONFIG_UNIX=y
@@ -27,10 +24,7 @@
CONFIG_SERIAL_8250=y
CONFIG_SERIAL_8250_CONSOLE=y
CONFIG_SERIAL_OF_PLATFORM=y
-CONFIG_TTY_PRINTK=y
# CONFIG_VGA_CONSOLE is not set
-CONFIG_CSKY_MPTIMER=y
-CONFIG_GX6605S_TIMER=y
CONFIG_PM_DEVFREQ=y
CONFIG_DEVFREQ_GOV_SIMPLE_ONDEMAND=y
CONFIG_DEVFREQ_GOV_PERFORMANCE=y
@@ -56,6 +50,4 @@
CONFIG_ROMFS_FS=y
CONFIG_NFS_FS=y
CONFIG_PRINTK_TIME=y
-CONFIG_DEBUG_INFO=y
-CONFIG_DEBUG_FS=y
CONFIG_MAGIC_SYSRQ=y
diff --git a/arch/csky/include/asm/Kbuild b/arch/csky/include/asm/Kbuild
index bc15a26..4130e3e 100644
--- a/arch/csky/include/asm/Kbuild
+++ b/arch/csky/include/asm/Kbuild
@@ -28,7 +28,6 @@
generic-y += mm-arch-hooks.h
generic-y += mmiowb.h
generic-y += module.h
-generic-y += pci.h
generic-y += percpu.h
generic-y += preempt.h
generic-y += qrwlock.h
diff --git a/arch/csky/include/asm/cache.h b/arch/csky/include/asm/cache.h
index 1d5fc2f..4b5c09bf 100644
--- a/arch/csky/include/asm/cache.h
+++ b/arch/csky/include/asm/cache.h
@@ -16,6 +16,7 @@ void dcache_wb_line(unsigned long start);
void icache_inv_range(unsigned long start, unsigned long end);
void icache_inv_all(void);
+void local_icache_inv_all(void *priv);
void dcache_wb_range(unsigned long start, unsigned long end);
void dcache_wbinv_all(void);
diff --git a/arch/csky/include/asm/cacheflush.h b/arch/csky/include/asm/cacheflush.h
index a96da67..f0b8f25 100644
--- a/arch/csky/include/asm/cacheflush.h
+++ b/arch/csky/include/asm/cacheflush.h
@@ -4,6 +4,7 @@
#ifndef __ASM_CSKY_CACHEFLUSH_H
#define __ASM_CSKY_CACHEFLUSH_H
+#include <linux/mm.h>
#include <abi/cacheflush.h>
#endif /* __ASM_CSKY_CACHEFLUSH_H */
diff --git a/arch/csky/include/asm/fixmap.h b/arch/csky/include/asm/fixmap.h
index 380ff0a..81f9477 100644
--- a/arch/csky/include/asm/fixmap.h
+++ b/arch/csky/include/asm/fixmap.h
@@ -5,12 +5,16 @@
#define __ASM_CSKY_FIXMAP_H
#include <asm/page.h>
+#include <asm/memory.h>
#ifdef CONFIG_HIGHMEM
#include <linux/threads.h>
#include <asm/kmap_types.h>
#endif
enum fixed_addresses {
+#ifdef CONFIG_HAVE_TCM
+ FIX_TCM = TCM_NR_PAGES,
+#endif
#ifdef CONFIG_HIGHMEM
FIX_KMAP_BEGIN,
FIX_KMAP_END = FIX_KMAP_BEGIN + (KM_TYPE_NR * NR_CPUS) - 1,
@@ -18,10 +22,13 @@ enum fixed_addresses {
__end_of_fixed_addresses
};
-#define FIXADDR_TOP 0xffffc000
#define FIXADDR_SIZE (__end_of_fixed_addresses << PAGE_SHIFT)
#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE)
#include <asm-generic/fixmap.h>
+extern void fixrange_init(unsigned long start, unsigned long end,
+ pgd_t *pgd_base);
+extern void __init fixaddr_init(void);
+
#endif /* __ASM_CSKY_FIXMAP_H */
diff --git a/arch/csky/include/asm/memory.h b/arch/csky/include/asm/memory.h
new file mode 100644
index 0000000..a65c675
--- /dev/null
+++ b/arch/csky/include/asm/memory.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __ASM_CSKY_MEMORY_H
+#define __ASM_CSKY_MEMORY_H
+
+#include <linux/compiler.h>
+#include <linux/const.h>
+#include <linux/types.h>
+#include <linux/sizes.h>
+
+#define FIXADDR_TOP _AC(0xffffc000, UL)
+#define PKMAP_BASE _AC(0xff800000, UL)
+#define VMALLOC_START _AC(0xc0008000, UL)
+#define VMALLOC_END (PKMAP_BASE - (PAGE_SIZE * 2))
+
+#ifdef CONFIG_HAVE_TCM
+#ifdef CONFIG_HAVE_DTCM
+#define TCM_NR_PAGES (CONFIG_ITCM_NR_PAGES + CONFIG_DTCM_NR_PAGES)
+#else
+#define TCM_NR_PAGES (CONFIG_ITCM_NR_PAGES)
+#endif
+#define FIXADDR_TCM _AC(FIXADDR_TOP - (TCM_NR_PAGES * PAGE_SIZE), UL)
+#endif
+
+#endif
diff --git a/arch/csky/include/asm/mmu.h b/arch/csky/include/asm/mmu.h
index b382a14..26fbb1d 100644
--- a/arch/csky/include/asm/mmu.h
+++ b/arch/csky/include/asm/mmu.h
@@ -7,6 +7,7 @@
typedef struct {
atomic64_t asid;
void *vdso;
+ cpumask_t icache_stale_mask;
} mm_context_t;
#endif /* __ASM_CSKY_MMU_H */
diff --git a/arch/csky/include/asm/mmu_context.h b/arch/csky/include/asm/mmu_context.h
index 0285b0a..abdf1f1 100644
--- a/arch/csky/include/asm/mmu_context.h
+++ b/arch/csky/include/asm/mmu_context.h
@@ -43,5 +43,7 @@ switch_mm(struct mm_struct *prev, struct mm_struct *next,
TLBMISS_HANDLER_SETUP_PGD(next->pgd);
write_mmu_entryhi(next->context.asid.counter);
+
+ flush_icache_deferred(next);
}
#endif /* __ASM_CSKY_MMU_CONTEXT_H */
diff --git a/arch/csky/include/asm/pci.h b/arch/csky/include/asm/pci.h
new file mode 100644
index 0000000..ebc765b
--- /dev/null
+++ b/arch/csky/include/asm/pci.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef __ASM_CSKY_PCI_H
+#define __ASM_CSKY_PCI_H
+
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/dma-mapping.h>
+
+#include <asm/io.h>
+
+#define PCIBIOS_MIN_IO 0
+#define PCIBIOS_MIN_MEM 0
+
+/* C-SKY shim does not initialize PCI bus */
+#define pcibios_assign_all_busses() 1
+
+extern int isa_dma_bridge_buggy;
+
+#ifdef CONFIG_PCI
+static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel)
+{
+ /* no legacy IRQ on csky */
+ return -ENODEV;
+}
+
+static inline int pci_proc_domain(struct pci_bus *bus)
+{
+ /* always show the domain in /proc */
+ return 1;
+}
+#endif /* CONFIG_PCI */
+
+#endif /* __ASM_CSKY_PCI_H */
diff --git a/arch/csky/include/asm/pgtable.h b/arch/csky/include/asm/pgtable.h
index 4b2a41e..9b7764c 100644
--- a/arch/csky/include/asm/pgtable.h
+++ b/arch/csky/include/asm/pgtable.h
@@ -5,6 +5,7 @@
#define __ASM_CSKY_PGTABLE_H
#include <asm/fixmap.h>
+#include <asm/memory.h>
#include <asm/addrspace.h>
#include <abi/pgtable-bits.h>
#include <asm-generic/pgtable-nopmd.h>
@@ -16,11 +17,6 @@
#define USER_PTRS_PER_PGD (0x80000000UL/PGDIR_SIZE)
#define FIRST_USER_ADDRESS 0UL
-#define PKMAP_BASE (0xff800000)
-
-#define VMALLOC_START (0xc0008000)
-#define VMALLOC_END (PKMAP_BASE - 2*PAGE_SIZE)
-
/*
* C-SKY is two-level paging structure:
*/
diff --git a/arch/csky/include/asm/stackprotector.h b/arch/csky/include/asm/stackprotector.h
new file mode 100644
index 0000000..d7cd4e5
--- /dev/null
+++ b/arch/csky/include/asm/stackprotector.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_STACKPROTECTOR_H
+#define _ASM_STACKPROTECTOR_H 1
+
+#include <linux/random.h>
+#include <linux/version.h>
+
+extern unsigned long __stack_chk_guard;
+
+/*
+ * Initialize the stackprotector canary value.
+ *
+ * NOTE: this must only be called from functions that never return,
+ * and it must always be inlined.
+ */
+static __always_inline void boot_init_stack_canary(void)
+{
+ unsigned long canary;
+
+ /* Try to get a semi random initial value. */
+ get_random_bytes(&canary, sizeof(canary));
+ canary ^= LINUX_VERSION_CODE;
+ canary &= CANARY_MASK;
+
+ current->stack_canary = canary;
+ __stack_chk_guard = current->stack_canary;
+}
+
+#endif /* __ASM_SH_STACKPROTECTOR_H */
diff --git a/arch/csky/include/asm/tcm.h b/arch/csky/include/asm/tcm.h
new file mode 100644
index 0000000..2b135ce
--- /dev/null
+++ b/arch/csky/include/asm/tcm.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __ASM_CSKY_TCM_H
+#define __ASM_CSKY_TCM_H
+
+#ifndef CONFIG_HAVE_TCM
+#error "You should not be including tcm.h unless you have a TCM!"
+#endif
+
+#include <linux/compiler.h>
+
+/* Tag variables with this */
+#define __tcmdata __section(.tcm.data)
+/* Tag constants with this */
+#define __tcmconst __section(.tcm.rodata)
+/* Tag functions inside TCM called from outside TCM with this */
+#define __tcmfunc __section(.tcm.text) noinline
+/* Tag function inside TCM called from inside TCM with this */
+#define __tcmlocalfunc __section(.tcm.text)
+
+void *tcm_alloc(size_t len);
+void tcm_free(void *addr, size_t len);
+
+#endif
diff --git a/arch/csky/include/uapi/asm/unistd.h b/arch/csky/include/uapi/asm/unistd.h
index 211c983..ba40189 100644
--- a/arch/csky/include/uapi/asm/unistd.h
+++ b/arch/csky/include/uapi/asm/unistd.h
@@ -1,7 +1,10 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
+#define __ARCH_WANT_STAT64
+#define __ARCH_WANT_NEW_STAT
#define __ARCH_WANT_SYS_CLONE
+#define __ARCH_WANT_SYS_CLONE3
#define __ARCH_WANT_SET_GET_RLIMIT
#define __ARCH_WANT_TIME32_SYSCALLS
#include <asm-generic/unistd.h>
diff --git a/arch/csky/kernel/atomic.S b/arch/csky/kernel/atomic.S
index 5b84f11..3821ef9 100644
--- a/arch/csky/kernel/atomic.S
+++ b/arch/csky/kernel/atomic.S
@@ -17,10 +17,12 @@
mfcr a3, epc
addi a3, TRAP0_SIZE
- subi sp, 8
+ subi sp, 16
stw a3, (sp, 0)
mfcr a3, epsr
stw a3, (sp, 4)
+ mfcr a3, usp
+ stw a3, (sp, 8)
psrset ee
#ifdef CONFIG_CPU_HAS_LDSTEX
@@ -47,7 +49,9 @@
mtcr a3, epc
ldw a3, (sp, 4)
mtcr a3, epsr
- addi sp, 8
+ ldw a3, (sp, 8)
+ mtcr a3, usp
+ addi sp, 16
KSPTOUSP
rte
END(csky_cmpxchg)
diff --git a/arch/csky/kernel/process.c b/arch/csky/kernel/process.c
index f320d92..f7b231c 100644
--- a/arch/csky/kernel/process.c
+++ b/arch/csky/kernel/process.c
@@ -16,6 +16,12 @@
struct cpuinfo_csky cpu_data[NR_CPUS];
+#ifdef CONFIG_STACKPROTECTOR
+#include <linux/stackprotector.h>
+unsigned long __stack_chk_guard __read_mostly;
+EXPORT_SYMBOL(__stack_chk_guard);
+#endif
+
asmlinkage void ret_from_fork(void);
asmlinkage void ret_from_kernel_thread(void);
@@ -34,10 +40,11 @@ unsigned long thread_saved_pc(struct task_struct *tsk)
return sw->r15;
}
-int copy_thread(unsigned long clone_flags,
+int copy_thread_tls(unsigned long clone_flags,
unsigned long usp,
unsigned long kthread_arg,
- struct task_struct *p)
+ struct task_struct *p,
+ unsigned long tls)
{
struct switch_stack *childstack;
struct pt_regs *childregs = task_pt_regs(p);
@@ -64,7 +71,7 @@ int copy_thread(unsigned long clone_flags,
childregs->usp = usp;
if (clone_flags & CLONE_SETTLS)
task_thread_info(p)->tp_value = childregs->tls
- = childregs->regs[0];
+ = tls;
childregs->a0 = 0;
childstack->r15 = (unsigned long) ret_from_fork;
diff --git a/arch/csky/kernel/setup.c b/arch/csky/kernel/setup.c
index 52eaf31..3821e55 100644
--- a/arch/csky/kernel/setup.c
+++ b/arch/csky/kernel/setup.c
@@ -47,9 +47,6 @@ static void __init csky_memblock_init(void)
signed long size;
memblock_reserve(__pa(_stext), _end - _stext);
-#ifdef CONFIG_BLK_DEV_INITRD
- memblock_reserve(__pa(initrd_start), initrd_end - initrd_start);
-#endif
early_init_fdt_reserve_self();
early_init_fdt_scan_reserved_mem();
@@ -133,6 +130,8 @@ void __init setup_arch(char **cmdline_p)
sparse_init();
+ fixaddr_init();
+
#ifdef CONFIG_HIGHMEM
kmap_init();
#endif
diff --git a/arch/csky/kernel/smp.c b/arch/csky/kernel/smp.c
index b753d38..0bb0954 100644
--- a/arch/csky/kernel/smp.c
+++ b/arch/csky/kernel/smp.c
@@ -120,7 +120,7 @@ void __init setup_smp_ipi(void)
int rc;
if (ipi_irq == 0)
- panic("%s IRQ mapping failed\n", __func__);
+ return;
rc = request_percpu_irq(ipi_irq, handle_ipi, "IPI Interrupt",
&ipi_dummy_dev);
diff --git a/arch/csky/kernel/time.c b/arch/csky/kernel/time.c
index b5fc944..52379d8 100644
--- a/arch/csky/kernel/time.c
+++ b/arch/csky/kernel/time.c
@@ -1,8 +1,8 @@
// SPDX-License-Identifier: GPL-2.0
// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
-#include <linux/clk-provider.h>
#include <linux/clocksource.h>
+#include <linux/of_clk.h>
void __init time_init(void)
{
diff --git a/arch/csky/kernel/vmlinux.lds.S b/arch/csky/kernel/vmlinux.lds.S
index 2ff37be..f05b413 100644
--- a/arch/csky/kernel/vmlinux.lds.S
+++ b/arch/csky/kernel/vmlinux.lds.S
@@ -2,6 +2,7 @@
#include <asm/vmlinux.lds.h>
#include <asm/page.h>
+#include <asm/memory.h>
OUTPUT_ARCH(csky)
ENTRY(_start)
@@ -53,6 +54,54 @@
RW_DATA(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE)
_edata = .;
+#ifdef CONFIG_HAVE_TCM
+ .tcm_start : {
+ . = ALIGN(PAGE_SIZE);
+ __tcm_start = .;
+ }
+
+ .text_data_tcm FIXADDR_TCM : AT(__tcm_start)
+ {
+ . = ALIGN(4);
+ __stcm_text_data = .;
+ *(.tcm.text)
+ *(.tcm.rodata)
+#ifndef CONFIG_HAVE_DTCM
+ *(.tcm.data)
+#endif
+ . = ALIGN(4);
+ __etcm_text_data = .;
+ }
+
+ . = ADDR(.tcm_start) + SIZEOF(.tcm_start) + SIZEOF(.text_data_tcm);
+
+#ifdef CONFIG_HAVE_DTCM
+ #define ITCM_SIZE CONFIG_ITCM_NR_PAGES * PAGE_SIZE
+
+ .dtcm_start : {
+ __dtcm_start = .;
+ }
+
+ .data_tcm FIXADDR_TCM + ITCM_SIZE : AT(__dtcm_start)
+ {
+ . = ALIGN(4);
+ __stcm_data = .;
+ *(.tcm.data)
+ . = ALIGN(4);
+ __etcm_data = .;
+ }
+
+ . = ADDR(.dtcm_start) + SIZEOF(.data_tcm);
+
+ .tcm_end : AT(ADDR(.dtcm_start) + SIZEOF(.data_tcm)) {
+#else
+ .tcm_end : AT(ADDR(.tcm_start) + SIZEOF(.text_data_tcm)) {
+#endif
+ . = ALIGN(PAGE_SIZE);
+ __tcm_end = .;
+ }
+#endif
+
EXCEPTION_TABLE(L1_CACHE_BYTES)
BSS_SECTION(L1_CACHE_BYTES, PAGE_SIZE, L1_CACHE_BYTES)
VBR_BASE
diff --git a/arch/csky/mm/Makefile b/arch/csky/mm/Makefile
index c94ef64..6e7696e 100644
--- a/arch/csky/mm/Makefile
+++ b/arch/csky/mm/Makefile
@@ -1,8 +1,10 @@
# SPDX-License-Identifier: GPL-2.0-only
ifeq ($(CONFIG_CPU_HAS_CACHEV2),y)
obj-y += cachev2.o
+CFLAGS_REMOVE_cachev2.o = $(CC_FLAGS_FTRACE)
else
obj-y += cachev1.o
+CFLAGS_REMOVE_cachev1.o = $(CC_FLAGS_FTRACE)
endif
obj-y += dma-mapping.o
@@ -14,3 +16,4 @@
obj-y += tlb.o
obj-y += asid.o
obj-y += context.o
+obj-$(CONFIG_HAVE_TCM) += tcm.o
diff --git a/arch/csky/mm/cachev1.c b/arch/csky/mm/cachev1.c
index 494ec91..5a5a980 100644
--- a/arch/csky/mm/cachev1.c
+++ b/arch/csky/mm/cachev1.c
@@ -94,6 +94,11 @@ void icache_inv_all(void)
cache_op_all(INS_CACHE|CACHE_INV, 0);
}
+void local_icache_inv_all(void *priv)
+{
+ cache_op_all(INS_CACHE|CACHE_INV, 0);
+}
+
void dcache_wb_range(unsigned long start, unsigned long end)
{
cache_op_range(start, end, DATA_CACHE|CACHE_CLR, 0);
diff --git a/arch/csky/mm/cachev2.c b/arch/csky/mm/cachev2.c
index b61be65..bc419f80 100644
--- a/arch/csky/mm/cachev2.c
+++ b/arch/csky/mm/cachev2.c
@@ -3,15 +3,25 @@
#include <linux/spinlock.h>
#include <linux/smp.h>
+#include <linux/mm.h>
#include <asm/cache.h>
#include <asm/barrier.h>
-inline void dcache_wb_line(unsigned long start)
+#define INS_CACHE (1 << 0)
+#define CACHE_INV (1 << 4)
+
+void local_icache_inv_all(void *priv)
{
- asm volatile("dcache.cval1 %0\n"::"r"(start):"memory");
+ mtcr("cr17", INS_CACHE|CACHE_INV);
sync_is();
}
+void icache_inv_all(void)
+{
+ on_each_cpu(local_icache_inv_all, NULL, 1);
+}
+
+#ifdef CONFIG_CPU_HAS_ICACHE_INS
void icache_inv_range(unsigned long start, unsigned long end)
{
unsigned long i = start & ~(L1_CACHE_BYTES - 1);
@@ -20,10 +30,16 @@ void icache_inv_range(unsigned long start, unsigned long end)
asm volatile("icache.iva %0\n"::"r"(i):"memory");
sync_is();
}
-
-void icache_inv_all(void)
+#else
+void icache_inv_range(unsigned long start, unsigned long end)
{
- asm volatile("icache.ialls\n":::"memory");
+ icache_inv_all();
+}
+#endif
+
+inline void dcache_wb_line(unsigned long start)
+{
+ asm volatile("dcache.cval1 %0\n"::"r"(start):"memory");
sync_is();
}
@@ -36,27 +52,10 @@ void dcache_wb_range(unsigned long start, unsigned long end)
sync_is();
}
-void dcache_inv_range(unsigned long start, unsigned long end)
-{
- unsigned long i = start & ~(L1_CACHE_BYTES - 1);
-
- for (; i < end; i += L1_CACHE_BYTES)
- asm volatile("dcache.civa %0\n"::"r"(i):"memory");
- sync_is();
-}
-
void cache_wbinv_range(unsigned long start, unsigned long end)
{
- unsigned long i = start & ~(L1_CACHE_BYTES - 1);
-
- for (; i < end; i += L1_CACHE_BYTES)
- asm volatile("dcache.cval1 %0\n"::"r"(i):"memory");
- sync_is();
-
- i = start & ~(L1_CACHE_BYTES - 1);
- for (; i < end; i += L1_CACHE_BYTES)
- asm volatile("icache.iva %0\n"::"r"(i):"memory");
- sync_is();
+ dcache_wb_range(start, end);
+ icache_inv_range(start, end);
}
EXPORT_SYMBOL(cache_wbinv_range);
diff --git a/arch/csky/mm/highmem.c b/arch/csky/mm/highmem.c
index 3317b774..8131291 100644
--- a/arch/csky/mm/highmem.c
+++ b/arch/csky/mm/highmem.c
@@ -117,85 +117,29 @@ struct page *kmap_atomic_to_page(void *ptr)
return pte_page(*pte);
}
-static void __init fixrange_init(unsigned long start, unsigned long end,
- pgd_t *pgd_base)
-{
-#ifdef CONFIG_HIGHMEM
- pgd_t *pgd;
- pud_t *pud;
- pmd_t *pmd;
- pte_t *pte;
- int i, j, k;
- unsigned long vaddr;
-
- vaddr = start;
- i = __pgd_offset(vaddr);
- j = __pud_offset(vaddr);
- k = __pmd_offset(vaddr);
- pgd = pgd_base + i;
-
- for ( ; (i < PTRS_PER_PGD) && (vaddr != end); pgd++, i++) {
- pud = (pud_t *)pgd;
- for ( ; (j < PTRS_PER_PUD) && (vaddr != end); pud++, j++) {
- pmd = (pmd_t *)pud;
- for (; (k < PTRS_PER_PMD) && (vaddr != end); pmd++, k++) {
- if (pmd_none(*pmd)) {
- pte = (pte_t *) memblock_alloc_low(PAGE_SIZE, PAGE_SIZE);
- if (!pte)
- panic("%s: Failed to allocate %lu bytes align=%lx\n",
- __func__, PAGE_SIZE,
- PAGE_SIZE);
-
- set_pmd(pmd, __pmd(__pa(pte)));
- BUG_ON(pte != pte_offset_kernel(pmd, 0));
- }
- vaddr += PMD_SIZE;
- }
- k = 0;
- }
- j = 0;
- }
-#endif
-}
-
-void __init fixaddr_kmap_pages_init(void)
+static void __init kmap_pages_init(void)
{
unsigned long vaddr;
- pgd_t *pgd_base;
-#ifdef CONFIG_HIGHMEM
pgd_t *pgd;
pmd_t *pmd;
pud_t *pud;
pte_t *pte;
-#endif
- pgd_base = swapper_pg_dir;
- /*
- * Fixed mappings:
- */
- vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK;
- fixrange_init(vaddr, 0, pgd_base);
-
-#ifdef CONFIG_HIGHMEM
- /*
- * Permanent kmaps:
- */
vaddr = PKMAP_BASE;
- fixrange_init(vaddr, vaddr + PAGE_SIZE*LAST_PKMAP, pgd_base);
+ fixrange_init(vaddr, vaddr + PAGE_SIZE*LAST_PKMAP, swapper_pg_dir);
pgd = swapper_pg_dir + __pgd_offset(vaddr);
pud = (pud_t *)pgd;
pmd = pmd_offset(pud, vaddr);
pte = pte_offset_kernel(pmd, vaddr);
pkmap_page_table = pte;
-#endif
}
void __init kmap_init(void)
{
unsigned long vaddr;
- fixaddr_kmap_pages_init();
+ kmap_pages_init();
vaddr = __fix_to_virt(FIX_KMAP_BEGIN);
diff --git a/arch/csky/mm/init.c b/arch/csky/mm/init.c
index d4c2292..cb64d86 100644
--- a/arch/csky/mm/init.c
+++ b/arch/csky/mm/init.c
@@ -19,6 +19,7 @@
#include <linux/swap.h>
#include <linux/proc_fs.h>
#include <linux/pfn.h>
+#include <linux/initrd.h>
#include <asm/setup.h>
#include <asm/cachectl.h>
@@ -31,10 +32,50 @@
pgd_t swapper_pg_dir[PTRS_PER_PGD] __page_aligned_bss;
pte_t invalid_pte_table[PTRS_PER_PTE] __page_aligned_bss;
+EXPORT_SYMBOL(invalid_pte_table);
unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]
__page_aligned_bss;
EXPORT_SYMBOL(empty_zero_page);
+#ifdef CONFIG_BLK_DEV_INITRD
+static void __init setup_initrd(void)
+{
+ unsigned long size;
+
+ if (initrd_start >= initrd_end) {
+ pr_err("initrd not found or empty");
+ goto disable;
+ }
+
+ if (__pa(initrd_end) > PFN_PHYS(max_low_pfn)) {
+ pr_err("initrd extends beyond end of memory");
+ goto disable;
+ }
+
+ size = initrd_end - initrd_start;
+
+ if (memblock_is_region_reserved(__pa(initrd_start), size)) {
+ pr_err("INITRD: 0x%08lx+0x%08lx overlaps in-use memory region",
+ __pa(initrd_start), size);
+ goto disable;
+ }
+
+ memblock_reserve(__pa(initrd_start), size);
+
+ pr_info("Initial ramdisk at: 0x%p (%lu bytes)\n",
+ (void *)(initrd_start), size);
+
+ initrd_below_start_ok = 1;
+
+ return;
+
+disable:
+ initrd_start = initrd_end = 0;
+
+ pr_err(" - disabling initrd\n");
+}
+#endif
+
void __init mem_init(void)
{
#ifdef CONFIG_HIGHMEM
@@ -46,6 +87,10 @@ void __init mem_init(void)
#endif
high_memory = (void *) __va(max_low_pfn << PAGE_SHIFT);
+#ifdef CONFIG_BLK_DEV_INITRD
+ setup_initrd();
+#endif
+
memblock_free_all();
#ifdef CONFIG_HIGHMEM
@@ -101,3 +146,50 @@ void __init pre_mmu_init(void)
/* Setup page mask to 4k */
write_mmu_pagemask(0);
}
+
+void __init fixrange_init(unsigned long start, unsigned long end,
+ pgd_t *pgd_base)
+{
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *pte;
+ int i, j, k;
+ unsigned long vaddr;
+
+ vaddr = start;
+ i = __pgd_offset(vaddr);
+ j = __pud_offset(vaddr);
+ k = __pmd_offset(vaddr);
+ pgd = pgd_base + i;
+
+ for ( ; (i < PTRS_PER_PGD) && (vaddr != end); pgd++, i++) {
+ pud = (pud_t *)pgd;
+ for ( ; (j < PTRS_PER_PUD) && (vaddr != end); pud++, j++) {
+ pmd = (pmd_t *)pud;
+ for (; (k < PTRS_PER_PMD) && (vaddr != end); pmd++, k++) {
+ if (pmd_none(*pmd)) {
+ pte = (pte_t *) memblock_alloc_low(PAGE_SIZE, PAGE_SIZE);
+ if (!pte)
+ panic("%s: Failed to allocate %lu bytes align=%lx\n",
+ __func__, PAGE_SIZE,
+ PAGE_SIZE);
+
+ set_pmd(pmd, __pmd(__pa(pte)));
+ BUG_ON(pte != pte_offset_kernel(pmd, 0));
+ }
+ vaddr += PMD_SIZE;
+ }
+ k = 0;
+ }
+ j = 0;
+ }
+}
+
+void __init fixaddr_init(void)
+{
+ unsigned long vaddr;
+
+ vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK;
+ fixrange_init(vaddr, vaddr + PMD_SIZE, swapper_pg_dir);
+}
diff --git a/arch/csky/mm/syscache.c b/arch/csky/mm/syscache.c
index c4645e4..ffade2f 100644
--- a/arch/csky/mm/syscache.c
+++ b/arch/csky/mm/syscache.c
@@ -3,7 +3,7 @@
#include <linux/syscalls.h>
#include <asm/page.h>
-#include <asm/cache.h>
+#include <asm/cacheflush.h>
#include <asm/cachectl.h>
SYSCALL_DEFINE3(cacheflush,
@@ -13,17 +13,14 @@ SYSCALL_DEFINE3(cacheflush,
{
switch (cache) {
case ICACHE:
- icache_inv_range((unsigned long)addr,
- (unsigned long)addr + bytes);
- break;
+ case BCACHE:
+ flush_icache_mm_range(current->mm,
+ (unsigned long)addr,
+ (unsigned long)addr + bytes);
case DCACHE:
dcache_wb_range((unsigned long)addr,
(unsigned long)addr + bytes);
break;
- case BCACHE:
- cache_wbinv_range((unsigned long)addr,
- (unsigned long)addr + bytes);
- break;
default:
return -EINVAL;
}
diff --git a/arch/csky/mm/tcm.c b/arch/csky/mm/tcm.c
new file mode 100644
index 0000000..ddeb363
--- /dev/null
+++ b/arch/csky/mm/tcm.c
@@ -0,0 +1,169 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/highmem.h>
+#include <linux/genalloc.h>
+#include <asm/tlbflush.h>
+#include <asm/fixmap.h>
+
+#if (CONFIG_ITCM_RAM_BASE == 0xffffffff)
+#error "You should define ITCM_RAM_BASE"
+#endif
+
+#ifdef CONFIG_HAVE_DTCM
+#if (CONFIG_DTCM_RAM_BASE == 0xffffffff)
+#error "You should define DTCM_RAM_BASE"
+#endif
+
+#if (CONFIG_DTCM_RAM_BASE == CONFIG_ITCM_RAM_BASE)
+#error "You should define correct DTCM_RAM_BASE"
+#endif
+#endif
+
+extern char __tcm_start, __tcm_end, __dtcm_start;
+
+static struct gen_pool *tcm_pool;
+
+static void __init tcm_mapping_init(void)
+{
+ pte_t *tcm_pte;
+ unsigned long vaddr, paddr;
+ int i;
+
+ paddr = CONFIG_ITCM_RAM_BASE;
+
+ if (pfn_valid(PFN_DOWN(CONFIG_ITCM_RAM_BASE)))
+ goto panic;
+
+#ifndef CONFIG_HAVE_DTCM
+ for (i = 0; i < TCM_NR_PAGES; i++) {
+#else
+ for (i = 0; i < CONFIG_ITCM_NR_PAGES; i++) {
+#endif
+ vaddr = __fix_to_virt(FIX_TCM - i);
+
+ tcm_pte =
+ pte_offset_kernel((pmd_t *)pgd_offset_k(vaddr), vaddr);
+
+ set_pte(tcm_pte, pfn_pte(__phys_to_pfn(paddr), PAGE_KERNEL));
+
+ flush_tlb_one(vaddr);
+
+ paddr = paddr + PAGE_SIZE;
+ }
+
+#ifdef CONFIG_HAVE_DTCM
+ if (pfn_valid(PFN_DOWN(CONFIG_DTCM_RAM_BASE)))
+ goto panic;
+
+ paddr = CONFIG_DTCM_RAM_BASE;
+
+ for (i = 0; i < CONFIG_DTCM_NR_PAGES; i++) {
+ vaddr = __fix_to_virt(FIX_TCM - CONFIG_ITCM_NR_PAGES - i);
+
+ tcm_pte =
+ pte_offset_kernel((pmd_t *) pgd_offset_k(vaddr), vaddr);
+
+ set_pte(tcm_pte, pfn_pte(__phys_to_pfn(paddr), PAGE_KERNEL));
+
+ flush_tlb_one(vaddr);
+
+ paddr = paddr + PAGE_SIZE;
+ }
+#endif
+
+#ifndef CONFIG_HAVE_DTCM
+ memcpy((void *)__fix_to_virt(FIX_TCM),
+ &__tcm_start, &__tcm_end - &__tcm_start);
+
+ pr_info("%s: mapping tcm va:0x%08lx to pa:0x%08x\n",
+ __func__, __fix_to_virt(FIX_TCM), CONFIG_ITCM_RAM_BASE);
+
+ pr_info("%s: __tcm_start va:0x%08lx size:%d\n",
+ __func__, (unsigned long)&__tcm_start, &__tcm_end - &__tcm_start);
+#else
+ memcpy((void *)__fix_to_virt(FIX_TCM),
+ &__tcm_start, &__dtcm_start - &__tcm_start);
+
+ pr_info("%s: mapping itcm va:0x%08lx to pa:0x%08x\n",
+ __func__, __fix_to_virt(FIX_TCM), CONFIG_ITCM_RAM_BASE);
+
+ pr_info("%s: __itcm_start va:0x%08lx size:%d\n",
+ __func__, (unsigned long)&__tcm_start, &__dtcm_start - &__tcm_start);
+
+ memcpy((void *)__fix_to_virt(FIX_TCM - CONFIG_ITCM_NR_PAGES),
+ &__dtcm_start, &__tcm_end - &__dtcm_start);
+
+ pr_info("%s: mapping dtcm va:0x%08lx to pa:0x%08x\n",
+ __func__, __fix_to_virt(FIX_TCM - CONFIG_ITCM_NR_PAGES),
+ CONFIG_DTCM_RAM_BASE);
+
+ pr_info("%s: __dtcm_start va:0x%08lx size:%d\n",
+ __func__, (unsigned long)&__dtcm_start, &__tcm_end - &__dtcm_start);
+
+#endif
+ return;
+panic:
+ panic("TCM init error");
+}
+
+void *tcm_alloc(size_t len)
+{
+ unsigned long vaddr;
+
+ if (!tcm_pool)
+ return NULL;
+
+ vaddr = gen_pool_alloc(tcm_pool, len);
+ if (!vaddr)
+ return NULL;
+
+ return (void *) vaddr;
+}
+EXPORT_SYMBOL(tcm_alloc);
+
+void tcm_free(void *addr, size_t len)
+{
+ gen_pool_free(tcm_pool, (unsigned long) addr, len);
+}
+EXPORT_SYMBOL(tcm_free);
+
+static int __init tcm_setup_pool(void)
+{
+#ifndef CONFIG_HAVE_DTCM
+ u32 pool_size = (u32) (TCM_NR_PAGES * PAGE_SIZE)
+ - (u32) (&__tcm_end - &__tcm_start);
+
+ u32 tcm_pool_start = __fix_to_virt(FIX_TCM)
+ + (u32) (&__tcm_end - &__tcm_start);
+#else
+ u32 pool_size = (u32) (CONFIG_DTCM_NR_PAGES * PAGE_SIZE)
+ - (u32) (&__tcm_end - &__dtcm_start);
+
+ u32 tcm_pool_start = __fix_to_virt(FIX_TCM - CONFIG_ITCM_NR_PAGES)
+ + (u32) (&__tcm_end - &__dtcm_start);
+#endif
+ int ret;
+
+ tcm_pool = gen_pool_create(2, -1);
+
+ ret = gen_pool_add(tcm_pool, tcm_pool_start, pool_size, -1);
+ if (ret) {
+ pr_err("%s: gen_pool add failed!\n", __func__);
+ return ret;
+ }
+
+ pr_info("%s: Added %d bytes @ 0x%08x to memory pool\n",
+ __func__, pool_size, tcm_pool_start);
+
+ return 0;
+}
+
+static int __init tcm_init(void)
+{
+ tcm_mapping_init();
+
+ tcm_setup_pool();
+
+ return 0;
+}
+arch_initcall(tcm_init);
diff --git a/arch/m68k/emu/nfblock.c b/arch/m68k/emu/nfblock.c
index 40712e4..c3a6304 100644
--- a/arch/m68k/emu/nfblock.c
+++ b/arch/m68k/emu/nfblock.c
@@ -118,12 +118,11 @@ static int __init nfhd_init_one(int id, u32 blocks, u32 bsize)
dev->bsize = bsize;
dev->bshift = ffs(bsize) - 10;
- dev->queue = blk_alloc_queue(GFP_KERNEL);
+ dev->queue = blk_alloc_queue(nfhd_make_request, NUMA_NO_NODE);
if (dev->queue == NULL)
goto free_dev;
dev->queue->queuedata = dev;
- blk_queue_make_request(dev->queue, nfhd_make_request);
blk_queue_logical_block_size(dev->queue, bsize);
dev->disk = alloc_disk(16);
diff --git a/arch/mips/boot/dts/ingenic/ci20.dts b/arch/mips/boot/dts/ingenic/ci20.dts
index 37b9316..c340f94 100644
--- a/arch/mips/boot/dts/ingenic/ci20.dts
+++ b/arch/mips/boot/dts/ingenic/ci20.dts
@@ -4,6 +4,8 @@
#include "jz4780.dtsi"
#include <dt-bindings/clock/ingenic,tcu.h>
#include <dt-bindings/gpio/gpio.h>
+#include <dt-bindings/interrupt-controller/irq.h>
+#include <dt-bindings/regulator/active-semi,8865-regulator.h>
/ {
compatible = "img,ci20", "ingenic,jz4780";
@@ -163,63 +165,71 @@ act8600: act8600@5a {
regulators {
vddcore: SUDCDC1 {
- regulator-name = "VDDCORE";
+ regulator-name = "DCDC_REG1";
regulator-min-microvolt = <1100000>;
regulator-max-microvolt = <1100000>;
regulator-always-on;
};
vddmem: SUDCDC2 {
- regulator-name = "VDDMEM";
+ regulator-name = "DCDC_REG2";
regulator-min-microvolt = <1500000>;
regulator-max-microvolt = <1500000>;
regulator-always-on;
};
vcc_33: SUDCDC3 {
- regulator-name = "VCC33";
+ regulator-name = "DCDC_REG3";
regulator-min-microvolt = <3300000>;
regulator-max-microvolt = <3300000>;
regulator-always-on;
};
vcc_50: SUDCDC4 {
- regulator-name = "VCC50";
+ regulator-name = "SUDCDC_REG4";
regulator-min-microvolt = <5000000>;
regulator-max-microvolt = <5000000>;
regulator-always-on;
};
vcc_25: LDO_REG5 {
- regulator-name = "VCC25";
+ regulator-name = "LDO_REG5";
regulator-min-microvolt = <2500000>;
regulator-max-microvolt = <2500000>;
regulator-always-on;
};
wifi_io: LDO_REG6 {
- regulator-name = "WIFIIO";
+ regulator-name = "LDO_REG6";
regulator-min-microvolt = <2500000>;
regulator-max-microvolt = <2500000>;
regulator-always-on;
};
vcc_28: LDO_REG7 {
- regulator-name = "VCC28";
+ regulator-name = "LDO_REG7";
regulator-min-microvolt = <2800000>;
regulator-max-microvolt = <2800000>;
regulator-always-on;
};
vcc_15: LDO_REG8 {
- regulator-name = "VCC15";
+ regulator-name = "LDO_REG8";
regulator-min-microvolt = <1500000>;
regulator-max-microvolt = <1500000>;
regulator-always-on;
};
- vcc_18: LDO_REG9 {
- regulator-name = "VCC18";
- regulator-min-microvolt = <1800000>;
- regulator-max-microvolt = <1800000>;
+ vrtc_18: LDO_REG9 {
+ regulator-name = "LDO_REG9";
+ /* Despite the datasheet stating 3.3V
+ * for REG9 and the driver expecting that,
+ * REG9 outputs 1.8V.
+ * Likely the CI20 uses a proprietary
+ * factory programmed chip variant.
+ * Since this is a simple on/off LDO the
+ * exact values do not matter.
+ */
+ regulator-min-microvolt = <3300000>;
+ regulator-max-microvolt = <3300000>;
regulator-always-on;
};
vcc_11: LDO_REG10 {
- regulator-name = "VCC11";
- regulator-min-microvolt = <1100000>;
- regulator-max-microvolt = <1100000>;
+ regulator-name = "LDO_REG10";
+ regulator-min-microvolt = <1200000>;
+ regulator-max-microvolt = <1200000>;
regulator-always-on;
};
};
@@ -261,7 +271,9 @@ &i2c4 {
rtc@51 {
compatible = "nxp,pcf8563";
reg = <0x51>;
- interrupts = <110>;
+
+ interrupt-parent = <&gpf>;
+ interrupts = <30 IRQ_TYPE_LEVEL_LOW>;
};
};
diff --git a/arch/mips/boot/dts/ingenic/jz4740.dtsi b/arch/mips/boot/dts/ingenic/jz4740.dtsi
index 5accda2..a3301ba 100644
--- a/arch/mips/boot/dts/ingenic/jz4740.dtsi
+++ b/arch/mips/boot/dts/ingenic/jz4740.dtsi
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
#include <dt-bindings/clock/jz4740-cgu.h>
+#include <dt-bindings/clock/ingenic,tcu.h>
/ {
#address-cells = <1>;
@@ -45,14 +46,6 @@ cgu: jz4740-cgu@10000000 {
#clock-cells = <1>;
};
- watchdog: watchdog@10002000 {
- compatible = "ingenic,jz4740-watchdog";
- reg = <0x10002000 0x10>;
-
- clocks = <&cgu JZ4740_CLK_RTC>;
- clock-names = "rtc";
- };
-
tcu: timer@10002000 {
compatible = "ingenic,jz4740-tcu", "simple-mfd";
reg = <0x10002000 0x1000>;
@@ -73,6 +66,14 @@ &cgu JZ4740_CLK_PCLK
interrupt-parent = <&intc>;
interrupts = <23 22 21>;
+
+ watchdog: watchdog@0 {
+ compatible = "ingenic,jz4740-watchdog";
+ reg = <0x0 0xc>;
+
+ clocks = <&tcu TCU_CLK_WDT>;
+ clock-names = "wdt";
+ };
};
rtc_dev: rtc@10003000 {
diff --git a/arch/mips/boot/dts/ingenic/jz4780.dtsi b/arch/mips/boot/dts/ingenic/jz4780.dtsi
index f928329..bb89653 100644
--- a/arch/mips/boot/dts/ingenic/jz4780.dtsi
+++ b/arch/mips/boot/dts/ingenic/jz4780.dtsi
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
#include <dt-bindings/clock/jz4780-cgu.h>
+#include <dt-bindings/clock/ingenic,tcu.h>
#include <dt-bindings/dma/jz4780-dma.h>
/ {
@@ -67,6 +68,14 @@ &cgu JZ4780_CLK_EXCLK
interrupt-parent = <&intc>;
interrupts = <27 26 25>;
+
+ watchdog: watchdog@0 {
+ compatible = "ingenic,jz4780-watchdog";
+ reg = <0x0 0xc>;
+
+ clocks = <&tcu TCU_CLK_WDT>;
+ clock-names = "wdt";
+ };
};
rtc_dev: rtc@10003000 {
@@ -348,14 +357,6 @@ i2c4: i2c@10054000 {
status = "disabled";
};
- watchdog: watchdog@10002000 {
- compatible = "ingenic,jz4780-watchdog";
- reg = <0x10002000 0x10>;
-
- clocks = <&cgu JZ4780_CLK_RTCLK>;
- clock-names = "rtc";
- };
-
nemc: nemc@13410000 {
compatible = "ingenic,jz4780-nemc";
reg = <0x13410000 0x10000>;
diff --git a/arch/mips/boot/dts/ingenic/x1000.dtsi b/arch/mips/boot/dts/ingenic/x1000.dtsi
index 4994c69..147f7d5 100644
--- a/arch/mips/boot/dts/ingenic/x1000.dtsi
+++ b/arch/mips/boot/dts/ingenic/x1000.dtsi
@@ -1,4 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
+#include <dt-bindings/clock/ingenic,tcu.h>
#include <dt-bindings/clock/x1000-cgu.h>
#include <dt-bindings/dma/x1000-dma.h>
@@ -72,7 +73,7 @@ wdt: watchdog@0 {
compatible = "ingenic,x1000-watchdog", "ingenic,jz4780-watchdog";
reg = <0x0 0x10>;
- clocks = <&cgu X1000_CLK_RTCLK>;
+ clocks = <&tcu TCU_CLK_WDT>;
clock-names = "wdt";
};
};
@@ -158,7 +159,6 @@ gpd: gpio@3 {
i2c0: i2c-controller@10050000 {
compatible = "ingenic,x1000-i2c";
reg = <0x10050000 0x1000>;
-
#address-cells = <1>;
#size-cells = <0>;
@@ -173,7 +173,6 @@ i2c0: i2c-controller@10050000 {
i2c1: i2c-controller@10051000 {
compatible = "ingenic,x1000-i2c";
reg = <0x10051000 0x1000>;
-
#address-cells = <1>;
#size-cells = <0>;
@@ -188,7 +187,6 @@ i2c1: i2c-controller@10051000 {
i2c2: i2c-controller@10052000 {
compatible = "ingenic,x1000-i2c";
reg = <0x10052000 0x1000>;
-
#address-cells = <1>;
#size-cells = <0>;
diff --git a/arch/mips/include/asm/sync.h b/arch/mips/include/asm/sync.h
index 7c6a109..aabd097 100644
--- a/arch/mips/include/asm/sync.h
+++ b/arch/mips/include/asm/sync.h
@@ -155,9 +155,11 @@
* effective barrier as noted by commit 6b07d38aaa52 ("MIPS: Octeon: Use
* optimized memory barrier primitives."). Here we specify that the affected
* sync instructions should be emitted twice.
+ * Note that this expression is evaluated by the assembler (not the compiler),
+ * and that the assembler evaluates '==' as 0 or -1, not 0 or 1.
*/
#ifdef CONFIG_CPU_CAVIUM_OCTEON
-# define __SYNC_rpt(type) (1 + (type == __SYNC_wmb))
+# define __SYNC_rpt(type) (1 - (type == __SYNC_wmb))
#else
# define __SYNC_rpt(type) 1
#endif
diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c
index 1ac2752..a7b469d 100644
--- a/arch/mips/kernel/setup.c
+++ b/arch/mips/kernel/setup.c
@@ -605,7 +605,8 @@ static void __init bootcmdline_init(char **cmdline_p)
* If we're configured to take boot arguments from DT, look for those
* now.
*/
- if (IS_ENABLED(CONFIG_MIPS_CMDLINE_FROM_DTB))
+ if (IS_ENABLED(CONFIG_MIPS_CMDLINE_FROM_DTB) ||
+ IS_ENABLED(CONFIG_MIPS_CMDLINE_DTB_EXTEND))
of_scan_flat_dt(bootcmdline_scan_chosen, &dt_bootargs);
#endif
diff --git a/arch/mips/kernel/vpe.c b/arch/mips/kernel/vpe.c
index 6176b9a..d0d832a 100644
--- a/arch/mips/kernel/vpe.c
+++ b/arch/mips/kernel/vpe.c
@@ -134,7 +134,7 @@ void release_vpe(struct vpe *v)
{
list_del(&v->list);
if (v->load_addr)
- release_progmem(v);
+ release_progmem(v->load_addr);
kfree(v);
}
diff --git a/arch/mips/vdso/Makefile b/arch/mips/vdso/Makefile
index aa89a41..d7fe840 100644
--- a/arch/mips/vdso/Makefile
+++ b/arch/mips/vdso/Makefile
@@ -33,6 +33,7 @@
cflags-vdso := $(ccflags-vdso) \
$(filter -W%,$(filter-out -Wa$(comma)%,$(KBUILD_CFLAGS))) \
-O3 -g -fPIC -fno-strict-aliasing -fno-common -fno-builtin -G 0 \
+ -mrelax-pic-calls $(call cc-option, -mexplicit-relocs) \
-fno-stack-protector -fno-jump-tables -DDISABLE_BRANCH_PROFILING \
$(call cc-option, -fno-asynchronous-unwind-tables) \
$(call cc-option, -fno-stack-protector)
@@ -51,6 +52,8 @@
CFLAGS_REMOVE_vgettimeofday.o = -pg
+DISABLE_VDSO := n
+
#
# For the pre-R6 code in arch/mips/vdso/vdso.h for locating
# the base address of VDSO, the linker will emit a R_MIPS_PC32
@@ -64,11 +67,24 @@
ifndef CONFIG_CPU_MIPSR6
ifeq ($(call ld-ifversion, -lt, 225000000, y),y)
$(warning MIPS VDSO requires binutils >= 2.25)
- obj-vdso-y := $(filter-out vgettimeofday.o, $(obj-vdso-y))
- ccflags-vdso += -DDISABLE_MIPS_VDSO
+ DISABLE_VDSO := y
endif
endif
+#
+# GCC (at least up to version 9.2) appears to emit function calls that make use
+# of the GOT when targeting microMIPS, which we can't use in the VDSO due to
+# the lack of relocations. As such, we disable the VDSO for microMIPS builds.
+#
+ifdef CONFIG_CPU_MICROMIPS
+ DISABLE_VDSO := y
+endif
+
+ifeq ($(DISABLE_VDSO),y)
+ obj-vdso-y := $(filter-out vgettimeofday.o, $(obj-vdso-y))
+ ccflags-vdso += -DDISABLE_MIPS_VDSO
+endif
+
# VDSO linker flags.
VDSO_LDFLAGS := \
-Wl,-Bsymbolic -Wl,--no-undefined -Wl,-soname=linux-vdso.so.1 \
@@ -81,12 +97,18 @@
UBSAN_SANITIZE := n
KCOV_INSTRUMENT := n
+# Check that we don't have PIC 'jalr t9' calls left
+quiet_cmd_vdso_mips_check = VDSOCHK $@
+ cmd_vdso_mips_check = if $(OBJDUMP) --disassemble $@ | egrep -h "jalr.*t9" > /dev/null; \
+ then (echo >&2 "$@: PIC 'jalr t9' calls are not supported"; \
+ rm -f $@; /bin/false); fi
+
#
# Shared build commands.
#
quiet_cmd_vdsold_and_vdso_check = LD $@
- cmd_vdsold_and_vdso_check = $(cmd_vdsold); $(cmd_vdso_check)
+ cmd_vdsold_and_vdso_check = $(cmd_vdsold); $(cmd_vdso_check); $(cmd_vdso_mips_check)
quiet_cmd_vdsold = VDSO $@
cmd_vdsold = $(CC) $(c_flags) $(VDSO_LDFLAGS) \
diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
index 71034b5..3801a2e 100644
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -79,6 +79,11 @@
config STACK_GROWSUP
def_bool y
+config ARCH_DEFCONFIG
+ string
+ default "arch/parisc/configs/generic-32bit_defconfig" if !64BIT
+ default "arch/parisc/configs/generic-64bit_defconfig" if 64BIT
+
config GENERIC_LOCKBREAK
bool
default y
diff --git a/arch/parisc/Makefile b/arch/parisc/Makefile
index dca8f2d..628cd8b 100644
--- a/arch/parisc/Makefile
+++ b/arch/parisc/Makefile
@@ -34,6 +34,13 @@
LD_BFD := elf32-hppa-linux
endif
+# select defconfig based on actual architecture
+ifeq ($(shell uname -m),parisc64)
+ KBUILD_DEFCONFIG := generic-64bit_defconfig
+else
+ KBUILD_DEFCONFIG := generic-32bit_defconfig
+endif
+
export LD_BFD
ifneq ($(SUBARCH),$(UTS_MACHINE))
diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h
index 8633208..080a0bf 100644
--- a/arch/powerpc/include/asm/page.h
+++ b/arch/powerpc/include/asm/page.h
@@ -295,8 +295,13 @@ static inline bool pfn_valid(unsigned long pfn)
/*
* Some number of bits at the level of the page table that points to
* a hugepte are used to encode the size. This masks those bits.
+ * On 8xx, HW assistance requires 4k alignment for the hugepte.
*/
+#ifdef CONFIG_PPC_8xx
+#define HUGEPD_SHIFT_MASK 0xfff
+#else
#define HUGEPD_SHIFT_MASK 0x3f
+#endif
#ifndef __ASSEMBLY__
diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index 8387698..eedcbfb 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -168,6 +168,10 @@ struct thread_struct {
unsigned long srr1;
unsigned long dar;
unsigned long dsisr;
+#ifdef CONFIG_PPC_BOOK3S_32
+ unsigned long r0, r3, r4, r5, r6, r8, r9, r11;
+ unsigned long lr, ctr;
+#endif
#endif
/* Debug Registers */
struct debug_reg debug;
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index c25e562..fcf24a3 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -132,6 +132,18 @@ int main(void)
OFFSET(SRR1, thread_struct, srr1);
OFFSET(DAR, thread_struct, dar);
OFFSET(DSISR, thread_struct, dsisr);
+#ifdef CONFIG_PPC_BOOK3S_32
+ OFFSET(THR0, thread_struct, r0);
+ OFFSET(THR3, thread_struct, r3);
+ OFFSET(THR4, thread_struct, r4);
+ OFFSET(THR5, thread_struct, r5);
+ OFFSET(THR6, thread_struct, r6);
+ OFFSET(THR8, thread_struct, r8);
+ OFFSET(THR9, thread_struct, r9);
+ OFFSET(THR11, thread_struct, r11);
+ OFFSET(THLR, thread_struct, lr);
+ OFFSET(THCTR, thread_struct, ctr);
+#endif
#endif
#ifdef CONFIG_SPE
OFFSET(THREAD_EVR0, thread_struct, evr[0]);
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index e745abc..245be4f 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -2193,11 +2193,13 @@ static struct cpu_spec * __init setup_cpu_spec(unsigned long offset,
* oprofile_cpu_type already has a value, then we are
* possibly overriding a real PVR with a logical one,
* and, in that case, keep the current value for
- * oprofile_cpu_type.
+ * oprofile_cpu_type. Futhermore, let's ensure that the
+ * fix for the PMAO bug is enabled on compatibility mode.
*/
if (old.oprofile_cpu_type != NULL) {
t->oprofile_cpu_type = old.oprofile_cpu_type;
t->oprofile_type = old.oprofile_type;
+ t->cpu_features |= old.cpu_features & CPU_FTR_PMAO_BUG;
}
}
diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index a1eaffe..7b048ce 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -1184,6 +1184,17 @@ void eeh_handle_special_event(void)
eeh_pe_state_mark(pe, EEH_PE_RECOVERING);
eeh_handle_normal_event(pe);
} else {
+ eeh_for_each_pe(pe, tmp_pe)
+ eeh_pe_for_each_dev(tmp_pe, edev, tmp_edev)
+ edev->mode &= ~EEH_DEV_NO_HANDLER;
+
+ /* Notify all devices to be down */
+ eeh_pe_state_clear(pe, EEH_PE_PRI_BUS, true);
+ eeh_set_channel_state(pe, pci_channel_io_perm_failure);
+ eeh_pe_report(
+ "error_detected(permanent failure)", pe,
+ eeh_report_failure, NULL);
+
pci_lock_rescan_remove();
list_for_each_entry(hose, &hose_list, list_node) {
phb_pe = eeh_phb_pe_get(hose);
@@ -1192,16 +1203,6 @@ void eeh_handle_special_event(void)
(phb_pe->state & EEH_PE_RECOVERING))
continue;
- eeh_for_each_pe(pe, tmp_pe)
- eeh_pe_for_each_dev(tmp_pe, edev, tmp_edev)
- edev->mode &= ~EEH_DEV_NO_HANDLER;
-
- /* Notify all devices to be down */
- eeh_pe_state_clear(pe, EEH_PE_PRI_BUS, true);
- eeh_set_channel_state(pe, pci_channel_io_perm_failure);
- eeh_pe_report(
- "error_detected(permanent failure)", pe,
- eeh_report_failure, NULL);
bus = eeh_pe_bus_get(phb_pe);
if (!bus) {
pr_err("%s: Cannot find PCI bus for "
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index 0713daa..16af0d8 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -783,7 +783,7 @@
1: lis r3,exc_exit_restart_end@ha
addi r3,r3,exc_exit_restart_end@l
cmplw r12,r3
-#if CONFIG_PPC_BOOK3S_601
+#ifdef CONFIG_PPC_BOOK3S_601
bge 2b
#else
bge 3f
@@ -791,7 +791,7 @@
lis r4,exc_exit_restart@ha
addi r4,r4,exc_exit_restart@l
cmplw r12,r4
-#if CONFIG_PPC_BOOK3S_601
+#ifdef CONFIG_PPC_BOOK3S_601
blt 2b
#else
blt 3f
@@ -1354,12 +1354,17 @@
mtspr SPRN_SRR0,r8
mtspr SPRN_SRR1,r9
RFI
-1: tophys(r9,r1)
+1: tophys_novmstack r9, r1
+#ifdef CONFIG_VMAP_STACK
+ li r0, MSR_KERNEL & ~MSR_IR /* can take DTLB miss */
+ mtmsr r0
+ isync
+#endif
lwz r8,INT_FRAME_SIZE+4(r9) /* get return address */
lwz r9,8(r9) /* original msr value */
addi r1,r1,INT_FRAME_SIZE
li r0,0
- tophys(r7, r2)
+ tophys_novmstack r7, r2
stw r0, THREAD + RTAS_SP(r7)
mtspr SPRN_SRR0,r8
mtspr SPRN_SRR1,r9
diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S
index 0493fca..97c8879 100644
--- a/arch/powerpc/kernel/head_32.S
+++ b/arch/powerpc/kernel/head_32.S
@@ -290,17 +290,55 @@
7: EXCEPTION_PROLOG_2
addi r3,r1,STACK_FRAME_OVERHEAD
#ifdef CONFIG_PPC_CHRP
- bne cr1,1f
+#ifdef CONFIG_VMAP_STACK
+ mfspr r4, SPRN_SPRG_THREAD
+ tovirt(r4, r4)
+ lwz r4, RTAS_SP(r4)
+ cmpwi cr1, r4, 0
#endif
- EXC_XFER_STD(0x200, machine_check_exception)
-#ifdef CONFIG_PPC_CHRP
-1: b machine_check_in_rtas
+ beq cr1, machine_check_tramp
+ b machine_check_in_rtas
+#else
+ b machine_check_tramp
#endif
/* Data access exception. */
. = 0x300
DO_KVM 0x300
DataAccess:
+#ifdef CONFIG_VMAP_STACK
+ mtspr SPRN_SPRG_SCRATCH0,r10
+ mfspr r10, SPRN_SPRG_THREAD
+BEGIN_MMU_FTR_SECTION
+ stw r11, THR11(r10)
+ mfspr r10, SPRN_DSISR
+ mfcr r11
+#ifdef CONFIG_PPC_KUAP
+ andis. r10, r10, (DSISR_BAD_FAULT_32S | DSISR_DABRMATCH | DSISR_PROTFAULT)@h
+#else
+ andis. r10, r10, (DSISR_BAD_FAULT_32S | DSISR_DABRMATCH)@h
+#endif
+ mfspr r10, SPRN_SPRG_THREAD
+ beq hash_page_dsi
+.Lhash_page_dsi_cont:
+ mtcr r11
+ lwz r11, THR11(r10)
+END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE)
+ mtspr SPRN_SPRG_SCRATCH1,r11
+ mfspr r11, SPRN_DAR
+ stw r11, DAR(r10)
+ mfspr r11, SPRN_DSISR
+ stw r11, DSISR(r10)
+ mfspr r11, SPRN_SRR0
+ stw r11, SRR0(r10)
+ mfspr r11, SPRN_SRR1 /* check whether user or kernel */
+ stw r11, SRR1(r10)
+ mfcr r10
+ andi. r11, r11, MSR_PR
+
+ EXCEPTION_PROLOG_1
+ b handle_page_fault_tramp_1
+#else /* CONFIG_VMAP_STACK */
EXCEPTION_PROLOG handle_dar_dsisr=1
get_and_save_dar_dsisr_on_stack r4, r5, r11
BEGIN_MMU_FTR_SECTION
@@ -316,11 +354,32 @@
FTR_SECTION_ELSE
b handle_page_fault_tramp_2
ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_HPTE_TABLE)
+#endif /* CONFIG_VMAP_STACK */
/* Instruction access exception. */
. = 0x400
DO_KVM 0x400
InstructionAccess:
+#ifdef CONFIG_VMAP_STACK
+ mtspr SPRN_SPRG_SCRATCH0,r10
+ mtspr SPRN_SPRG_SCRATCH1,r11
+ mfspr r10, SPRN_SPRG_THREAD
+ mfspr r11, SPRN_SRR0
+ stw r11, SRR0(r10)
+ mfspr r11, SPRN_SRR1 /* check whether user or kernel */
+ stw r11, SRR1(r10)
+ mfcr r10
+BEGIN_MMU_FTR_SECTION
+ andis. r11, r11, SRR1_ISI_NOPT@h /* no pte found? */
+ bne hash_page_isi
+.Lhash_page_isi_cont:
+ mfspr r11, SPRN_SRR1 /* check whether user or kernel */
+END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE)
+ andi. r11, r11, MSR_PR
+
+ EXCEPTION_PROLOG_1
+ EXCEPTION_PROLOG_2
+#else /* CONFIG_VMAP_STACK */
EXCEPTION_PROLOG
andis. r0,r9,SRR1_ISI_NOPT@h /* no pte found? */
beq 1f /* if so, try to put a PTE */
@@ -329,6 +388,7 @@
BEGIN_MMU_FTR_SECTION
bl hash_page
END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE)
+#endif /* CONFIG_VMAP_STACK */
1: mr r4,r12
andis. r5,r9,DSISR_SRR1_MATCH_32S@h /* Filter relevant SRR1 bits */
stw r4, _DAR(r11)
@@ -344,7 +404,7 @@
EXCEPTION_PROLOG handle_dar_dsisr=1
save_dar_dsisr_on_stack r4, r5, r11
addi r3,r1,STACK_FRAME_OVERHEAD
- EXC_XFER_STD(0x600, alignment_exception)
+ b alignment_exception_tramp
/* Program check exception */
EXCEPTION(0x700, ProgramCheck, program_check_exception, EXC_XFER_STD)
@@ -645,15 +705,100 @@
. = 0x3000
+machine_check_tramp:
+ EXC_XFER_STD(0x200, machine_check_exception)
+
+alignment_exception_tramp:
+ EXC_XFER_STD(0x600, alignment_exception)
+
handle_page_fault_tramp_1:
+#ifdef CONFIG_VMAP_STACK
+ EXCEPTION_PROLOG_2 handle_dar_dsisr=1
+#endif
lwz r4, _DAR(r11)
lwz r5, _DSISR(r11)
/* fall through */
handle_page_fault_tramp_2:
EXC_XFER_LITE(0x300, handle_page_fault)
+#ifdef CONFIG_VMAP_STACK
+.macro save_regs_thread thread
+ stw r0, THR0(\thread)
+ stw r3, THR3(\thread)
+ stw r4, THR4(\thread)
+ stw r5, THR5(\thread)
+ stw r6, THR6(\thread)
+ stw r8, THR8(\thread)
+ stw r9, THR9(\thread)
+ mflr r0
+ stw r0, THLR(\thread)
+ mfctr r0
+ stw r0, THCTR(\thread)
+.endm
+
+.macro restore_regs_thread thread
+ lwz r0, THLR(\thread)
+ mtlr r0
+ lwz r0, THCTR(\thread)
+ mtctr r0
+ lwz r0, THR0(\thread)
+ lwz r3, THR3(\thread)
+ lwz r4, THR4(\thread)
+ lwz r5, THR5(\thread)
+ lwz r6, THR6(\thread)
+ lwz r8, THR8(\thread)
+ lwz r9, THR9(\thread)
+.endm
+
+hash_page_dsi:
+ save_regs_thread r10
+ mfdsisr r3
+ mfdar r4
+ mfsrr0 r5
+ mfsrr1 r9
+ rlwinm r3, r3, 32 - 15, _PAGE_RW /* DSISR_STORE -> _PAGE_RW */
+ bl hash_page
+ mfspr r10, SPRN_SPRG_THREAD
+ restore_regs_thread r10
+ b .Lhash_page_dsi_cont
+
+hash_page_isi:
+ mr r11, r10
+ mfspr r10, SPRN_SPRG_THREAD
+ save_regs_thread r10
+ li r3, 0
+ lwz r4, SRR0(r10)
+ lwz r9, SRR1(r10)
+ bl hash_page
+ mfspr r10, SPRN_SPRG_THREAD
+ restore_regs_thread r10
+ mr r10, r11
+ b .Lhash_page_isi_cont
+
+ .globl fast_hash_page_return
+fast_hash_page_return:
+ andis. r10, r9, SRR1_ISI_NOPT@h /* Set on ISI, cleared on DSI */
+ mfspr r10, SPRN_SPRG_THREAD
+ restore_regs_thread r10
+ bne 1f
+
+ /* DSI */
+ mtcr r11
+ lwz r11, THR11(r10)
+ mfspr r10, SPRN_SPRG_SCRATCH0
+ SYNC
+ RFI
+
+1: /* ISI */
+ mtcr r11
+ mfspr r11, SPRN_SPRG_SCRATCH1
+ mfspr r10, SPRN_SPRG_SCRATCH0
+ SYNC
+ RFI
+
stack_overflow:
vmap_stack_overflow_exception
+#endif
AltiVecUnavailable:
EXCEPTION_PROLOG
diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h
index a6a5fbb..9db162f 100644
--- a/arch/powerpc/kernel/head_32.h
+++ b/arch/powerpc/kernel/head_32.h
@@ -64,11 +64,25 @@
.endm
.macro EXCEPTION_PROLOG_2 handle_dar_dsisr=0
+#if defined(CONFIG_VMAP_STACK) && defined(CONFIG_PPC_BOOK3S)
+BEGIN_MMU_FTR_SECTION
+ mtcr r10
+FTR_SECTION_ELSE
+ stw r10, _CCR(r11)
+ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_HPTE_TABLE)
+#else
stw r10,_CCR(r11) /* save registers */
+#endif
+ mfspr r10, SPRN_SPRG_SCRATCH0
stw r12,GPR12(r11)
stw r9,GPR9(r11)
- mfspr r10,SPRN_SPRG_SCRATCH0
stw r10,GPR10(r11)
+#if defined(CONFIG_VMAP_STACK) && defined(CONFIG_PPC_BOOK3S)
+BEGIN_MMU_FTR_SECTION
+ mfcr r10
+ stw r10, _CCR(r11)
+END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE)
+#endif
mfspr r12,SPRN_SPRG_SCRATCH1
stw r12,GPR11(r11)
mflr r10
@@ -83,6 +97,11 @@
stw r10, _DSISR(r11)
.endif
lwz r9, SRR1(r12)
+#if defined(CONFIG_VMAP_STACK) && defined(CONFIG_PPC_BOOK3S)
+BEGIN_MMU_FTR_SECTION
+ andi. r10, r9, MSR_PR
+END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE)
+#endif
lwz r12, SRR0(r12)
#else
mfspr r12,SPRN_SRR0
diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index 9922306..073a651 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -256,7 +256,7 @@
* set. All other Linux PTE bits control the behavior
* of the MMU.
*/
- rlwimi r10, r10, 0, 0x0f00 /* Clear bits 20-23 */
+ rlwinm r10, r10, 0, ~0x0f00 /* Clear bits 20-23 */
rlwimi r10, r10, 4, 0x0400 /* Copy _PAGE_EXEC into bit 21 */
ori r10, r10, RPN_PATTERN | 0x200 /* Set 22 and 24-27 */
mtspr SPRN_MI_RPN, r10 /* Update TLB entry */
diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c
index 2462cd7..d085432 100644
--- a/arch/powerpc/kernel/hw_breakpoint.c
+++ b/arch/powerpc/kernel/hw_breakpoint.c
@@ -331,11 +331,13 @@ int hw_breakpoint_handler(struct die_args *args)
}
info->type &= ~HW_BRK_TYPE_EXTRANEOUS_IRQ;
- if (!dar_within_range(regs->dar, info))
- info->type |= HW_BRK_TYPE_EXTRANEOUS_IRQ;
-
- if (!IS_ENABLED(CONFIG_PPC_8xx) && !stepping_handler(regs, bp, info))
- goto out;
+ if (IS_ENABLED(CONFIG_PPC_8xx)) {
+ if (!dar_within_range(regs->dar, info))
+ info->type |= HW_BRK_TYPE_EXTRANEOUS_IRQ;
+ } else {
+ if (!stepping_handler(regs, bp, info))
+ goto out;
+ }
/*
* As a policy, the callback is invoked in a 'trigger-after-execute'
diff --git a/arch/powerpc/kernel/idle_6xx.S b/arch/powerpc/kernel/idle_6xx.S
index 0ffdd18..433d97b 100644
--- a/arch/powerpc/kernel/idle_6xx.S
+++ b/arch/powerpc/kernel/idle_6xx.S
@@ -166,7 +166,11 @@
mfspr r9,SPRN_HID0
andis. r9,r9,HID0_NAP@h
beq 1f
+#ifdef CONFIG_VMAP_STACK
+ addis r9, r11, nap_save_msscr0@ha
+#else
addis r9,r11,(nap_save_msscr0-KERNELBASE)@ha
+#endif
lwz r9,nap_save_msscr0@l(r9)
mtspr SPRN_MSSCR0, r9
sync
@@ -174,7 +178,11 @@
1:
END_FTR_SECTION_IFSET(CPU_FTR_NAP_DISABLE_L2_PR)
BEGIN_FTR_SECTION
+#ifdef CONFIG_VMAP_STACK
+ addis r9, r11, nap_save_hid1@ha
+#else
addis r9,r11,(nap_save_hid1-KERNELBASE)@ha
+#endif
lwz r9,nap_save_hid1@l(r9)
mtspr SPRN_HID1, r9
END_FTR_SECTION_IFSET(CPU_FTR_DUAL_PLL_750FX)
diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c
index e6c30ce..d215f95 100644
--- a/arch/powerpc/kernel/signal.c
+++ b/arch/powerpc/kernel/signal.c
@@ -200,14 +200,27 @@ unsigned long get_tm_stackpointer(struct task_struct *tsk)
* normal/non-checkpointed stack pointer.
*/
+ unsigned long ret = tsk->thread.regs->gpr[1];
+
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
BUG_ON(tsk != current);
if (MSR_TM_ACTIVE(tsk->thread.regs->msr)) {
+ preempt_disable();
tm_reclaim_current(TM_CAUSE_SIGNAL);
if (MSR_TM_TRANSACTIONAL(tsk->thread.regs->msr))
- return tsk->thread.ckpt_regs.gpr[1];
+ ret = tsk->thread.ckpt_regs.gpr[1];
+
+ /*
+ * If we treclaim, we must clear the current thread's TM bits
+ * before re-enabling preemption. Otherwise we might be
+ * preempted and have the live MSR[TS] changed behind our back
+ * (tm_recheckpoint_new_task() would recheckpoint). Besides, we
+ * enter the signal handler in non-transactional state.
+ */
+ tsk->thread.regs->msr &= ~MSR_TS_MASK;
+ preempt_enable();
}
#endif
- return tsk->thread.regs->gpr[1];
+ return ret;
}
diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c
index 98600b2..1b090a7 100644
--- a/arch/powerpc/kernel/signal_32.c
+++ b/arch/powerpc/kernel/signal_32.c
@@ -489,19 +489,11 @@ static int save_user_regs(struct pt_regs *regs, struct mcontext __user *frame,
*/
static int save_tm_user_regs(struct pt_regs *regs,
struct mcontext __user *frame,
- struct mcontext __user *tm_frame, int sigret)
+ struct mcontext __user *tm_frame, int sigret,
+ unsigned long msr)
{
- unsigned long msr = regs->msr;
-
WARN_ON(tm_suspend_disabled);
- /* Remove TM bits from thread's MSR. The MSR in the sigcontext
- * just indicates to userland that we were doing a transaction, but we
- * don't want to return in transactional state. This also ensures
- * that flush_fp_to_thread won't set TIF_RESTORE_TM again.
- */
- regs->msr &= ~MSR_TS_MASK;
-
/* Save both sets of general registers */
if (save_general_regs(¤t->thread.ckpt_regs, frame)
|| save_general_regs(regs, tm_frame))
@@ -912,6 +904,10 @@ int handle_rt_signal32(struct ksignal *ksig, sigset_t *oldset,
int sigret;
unsigned long tramp;
struct pt_regs *regs = tsk->thread.regs;
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ /* Save the thread's msr before get_tm_stackpointer() changes it */
+ unsigned long msr = regs->msr;
+#endif
BUG_ON(tsk != current);
@@ -944,13 +940,13 @@ int handle_rt_signal32(struct ksignal *ksig, sigset_t *oldset,
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
tm_frame = &rt_sf->uc_transact.uc_mcontext;
- if (MSR_TM_ACTIVE(regs->msr)) {
+ if (MSR_TM_ACTIVE(msr)) {
if (__put_user((unsigned long)&rt_sf->uc_transact,
&rt_sf->uc.uc_link) ||
__put_user((unsigned long)tm_frame,
&rt_sf->uc_transact.uc_regs))
goto badframe;
- if (save_tm_user_regs(regs, frame, tm_frame, sigret))
+ if (save_tm_user_regs(regs, frame, tm_frame, sigret, msr))
goto badframe;
}
else
@@ -1369,6 +1365,10 @@ int handle_signal32(struct ksignal *ksig, sigset_t *oldset,
int sigret;
unsigned long tramp;
struct pt_regs *regs = tsk->thread.regs;
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ /* Save the thread's msr before get_tm_stackpointer() changes it */
+ unsigned long msr = regs->msr;
+#endif
BUG_ON(tsk != current);
@@ -1402,9 +1402,9 @@ int handle_signal32(struct ksignal *ksig, sigset_t *oldset,
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
tm_mctx = &frame->mctx_transact;
- if (MSR_TM_ACTIVE(regs->msr)) {
+ if (MSR_TM_ACTIVE(msr)) {
if (save_tm_user_regs(regs, &frame->mctx, &frame->mctx_transact,
- sigret))
+ sigret, msr))
goto badframe;
}
else
diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
index 1175155..84ed2e7 100644
--- a/arch/powerpc/kernel/signal_64.c
+++ b/arch/powerpc/kernel/signal_64.c
@@ -192,7 +192,8 @@ static long setup_sigcontext(struct sigcontext __user *sc,
static long setup_tm_sigcontexts(struct sigcontext __user *sc,
struct sigcontext __user *tm_sc,
struct task_struct *tsk,
- int signr, sigset_t *set, unsigned long handler)
+ int signr, sigset_t *set, unsigned long handler,
+ unsigned long msr)
{
/* When CONFIG_ALTIVEC is set, we _always_ setup v_regs even if the
* process never used altivec yet (MSR_VEC is zero in pt_regs of
@@ -207,12 +208,11 @@ static long setup_tm_sigcontexts(struct sigcontext __user *sc,
elf_vrreg_t __user *tm_v_regs = sigcontext_vmx_regs(tm_sc);
#endif
struct pt_regs *regs = tsk->thread.regs;
- unsigned long msr = tsk->thread.regs->msr;
long err = 0;
BUG_ON(tsk != current);
- BUG_ON(!MSR_TM_ACTIVE(regs->msr));
+ BUG_ON(!MSR_TM_ACTIVE(msr));
WARN_ON(tm_suspend_disabled);
@@ -222,13 +222,6 @@ static long setup_tm_sigcontexts(struct sigcontext __user *sc,
*/
msr |= tsk->thread.ckpt_regs.msr & (MSR_FP | MSR_VEC | MSR_VSX);
- /* Remove TM bits from thread's MSR. The MSR in the sigcontext
- * just indicates to userland that we were doing a transaction, but we
- * don't want to return in transactional state. This also ensures
- * that flush_fp_to_thread won't set TIF_RESTORE_TM again.
- */
- regs->msr &= ~MSR_TS_MASK;
-
#ifdef CONFIG_ALTIVEC
err |= __put_user(v_regs, &sc->v_regs);
err |= __put_user(tm_v_regs, &tm_sc->v_regs);
@@ -824,6 +817,10 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set,
unsigned long newsp = 0;
long err = 0;
struct pt_regs *regs = tsk->thread.regs;
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ /* Save the thread's msr before get_tm_stackpointer() changes it */
+ unsigned long msr = regs->msr;
+#endif
BUG_ON(tsk != current);
@@ -841,7 +838,7 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set,
err |= __put_user(0, &frame->uc.uc_flags);
err |= __save_altstack(&frame->uc.uc_stack, regs->gpr[1]);
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
- if (MSR_TM_ACTIVE(regs->msr)) {
+ if (MSR_TM_ACTIVE(msr)) {
/* The ucontext_t passed to userland points to the second
* ucontext_t (for transactional state) with its uc_link ptr.
*/
@@ -849,7 +846,8 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set,
err |= setup_tm_sigcontexts(&frame->uc.uc_mcontext,
&frame->uc_transact.uc_mcontext,
tsk, ksig->sig, NULL,
- (unsigned long)ksig->ka.sa.sa_handler);
+ (unsigned long)ksig->ka.sa.sa_handler,
+ msr);
} else
#endif
{
diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S
index b4c89a1..a32d478 100644
--- a/arch/powerpc/kernel/vmlinux.lds.S
+++ b/arch/powerpc/kernel/vmlinux.lds.S
@@ -303,6 +303,12 @@
*(.branch_lt)
}
+#ifdef CONFIG_DEBUG_INFO_BTF
+ .BTF : AT(ADDR(.BTF) - LOAD_OFFSET) {
+ *(.BTF)
+ }
+#endif
+
.opd : AT(ADDR(.opd) - LOAD_OFFSET) {
__start_opd = .;
KEEP(*(.opd))
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index 729a0f1..db3a873 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -1817,6 +1817,7 @@ static void kvmppc_core_vcpu_free_pr(struct kvm_vcpu *vcpu)
{
struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);
+ kvmppc_mmu_destroy_pr(vcpu);
free_page((unsigned long)vcpu->arch.shared & PAGE_MASK);
#ifdef CONFIG_KVM_BOOK3S_32_HANDLER
kfree(vcpu->arch.shadow_vcpu);
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 1af96fb5..302e9dc 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -759,7 +759,6 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
return 0;
out_vcpu_uninit:
- kvmppc_mmu_destroy(vcpu);
kvmppc_subarch_vcpu_uninit(vcpu);
return err;
}
@@ -792,7 +791,6 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
kvmppc_core_vcpu_free(vcpu);
- kvmppc_mmu_destroy(vcpu);
kvmppc_subarch_vcpu_uninit(vcpu);
}
diff --git a/arch/powerpc/mm/book3s32/hash_low.S b/arch/powerpc/mm/book3s32/hash_low.S
index c11b0a0..2015c4f 100644
--- a/arch/powerpc/mm/book3s32/hash_low.S
+++ b/arch/powerpc/mm/book3s32/hash_low.S
@@ -25,12 +25,6 @@
#include <asm/feature-fixups.h>
#include <asm/code-patching-asm.h>
-#ifdef CONFIG_VMAP_STACK
-#define ADDR_OFFSET 0
-#else
-#define ADDR_OFFSET PAGE_OFFSET
-#endif
-
#ifdef CONFIG_SMP
.section .bss
.align 2
@@ -53,8 +47,8 @@
.text
_GLOBAL(hash_page)
#ifdef CONFIG_SMP
- lis r8, (mmu_hash_lock - ADDR_OFFSET)@h
- ori r8, r8, (mmu_hash_lock - ADDR_OFFSET)@l
+ lis r8, (mmu_hash_lock - PAGE_OFFSET)@h
+ ori r8, r8, (mmu_hash_lock - PAGE_OFFSET)@l
lis r0,0x0fff
b 10f
11: lwz r6,0(r8)
@@ -72,12 +66,9 @@
cmplw 0,r4,r0
ori r3,r3,_PAGE_USER|_PAGE_PRESENT /* test low addresses as user */
mfspr r5, SPRN_SPRG_PGDIR /* phys page-table root */
-#ifdef CONFIG_VMAP_STACK
- tovirt(r5, r5)
-#endif
blt+ 112f /* assume user more likely */
- lis r5, (swapper_pg_dir - ADDR_OFFSET)@ha /* if kernel address, use */
- addi r5 ,r5 ,(swapper_pg_dir - ADDR_OFFSET)@l /* kernel page table */
+ lis r5, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */
+ addi r5 ,r5 ,(swapper_pg_dir - PAGE_OFFSET)@l /* kernel page table */
rlwimi r3,r9,32-12,29,29 /* MSR_PR -> _PAGE_USER */
112:
#ifndef CONFIG_PTE_64BIT
@@ -89,9 +80,6 @@
lwzx r8,r8,r5 /* Get L1 entry */
rlwinm. r8,r8,0,0,20 /* extract pt base address */
#endif
-#ifdef CONFIG_VMAP_STACK
- tovirt(r8, r8)
-#endif
#ifdef CONFIG_SMP
beq- hash_page_out /* return if no mapping */
#else
@@ -143,30 +131,36 @@
bne- retry /* retry if someone got there first */
mfsrin r3,r4 /* get segment reg for segment */
+#ifndef CONFIG_VMAP_STACK
mfctr r0
stw r0,_CTR(r11)
+#endif
bl create_hpte /* add the hash table entry */
#ifdef CONFIG_SMP
eieio
- lis r8, (mmu_hash_lock - ADDR_OFFSET)@ha
+ lis r8, (mmu_hash_lock - PAGE_OFFSET)@ha
li r0,0
- stw r0, (mmu_hash_lock - ADDR_OFFSET)@l(r8)
+ stw r0, (mmu_hash_lock - PAGE_OFFSET)@l(r8)
#endif
+#ifdef CONFIG_VMAP_STACK
+ b fast_hash_page_return
+#else
/* Return from the exception */
lwz r5,_CTR(r11)
mtctr r5
lwz r0,GPR0(r11)
lwz r8,GPR8(r11)
b fast_exception_return
+#endif
#ifdef CONFIG_SMP
hash_page_out:
eieio
- lis r8, (mmu_hash_lock - ADDR_OFFSET)@ha
+ lis r8, (mmu_hash_lock - PAGE_OFFSET)@ha
li r0,0
- stw r0, (mmu_hash_lock - ADDR_OFFSET)@l(r8)
+ stw r0, (mmu_hash_lock - PAGE_OFFSET)@l(r8)
blr
#endif /* CONFIG_SMP */
@@ -341,7 +335,7 @@
patch_site 1f, patch__hash_page_A1
patch_site 2f, patch__hash_page_A2
/* Get the address of the primary PTE group in the hash table (r3) */
-0: lis r0, (Hash_base - ADDR_OFFSET)@h /* base address of hash table */
+0: lis r0, (Hash_base - PAGE_OFFSET)@h /* base address of hash table */
1: rlwimi r0,r3,LG_PTEG_SIZE,HASH_LEFT,HASH_RIGHT /* VSID -> hash */
2: rlwinm r3,r4,20+LG_PTEG_SIZE,HASH_LEFT,HASH_RIGHT /* PI -> hash */
xor r3,r3,r0 /* make primary hash */
@@ -355,10 +349,10 @@
beq+ 10f /* no PTE: go look for an empty slot */
tlbie r4
- lis r4, (htab_hash_searches - ADDR_OFFSET)@ha
- lwz r6, (htab_hash_searches - ADDR_OFFSET)@l(r4)
+ lis r4, (htab_hash_searches - PAGE_OFFSET)@ha
+ lwz r6, (htab_hash_searches - PAGE_OFFSET)@l(r4)
addi r6,r6,1 /* count how many searches we do */
- stw r6, (htab_hash_searches - ADDR_OFFSET)@l(r4)
+ stw r6, (htab_hash_searches - PAGE_OFFSET)@l(r4)
/* Search the primary PTEG for a PTE whose 1st (d)word matches r5 */
mtctr r0
@@ -390,10 +384,10 @@
beq+ found_empty
/* update counter of times that the primary PTEG is full */
- lis r4, (primary_pteg_full - ADDR_OFFSET)@ha
- lwz r6, (primary_pteg_full - ADDR_OFFSET)@l(r4)
+ lis r4, (primary_pteg_full - PAGE_OFFSET)@ha
+ lwz r6, (primary_pteg_full - PAGE_OFFSET)@l(r4)
addi r6,r6,1
- stw r6, (primary_pteg_full - ADDR_OFFSET)@l(r4)
+ stw r6, (primary_pteg_full - PAGE_OFFSET)@l(r4)
patch_site 0f, patch__hash_page_C
/* Search the secondary PTEG for an empty slot */
@@ -427,8 +421,8 @@
* lockup here but that shouldn't happen
*/
-1: lis r4, (next_slot - ADDR_OFFSET)@ha /* get next evict slot */
- lwz r6, (next_slot - ADDR_OFFSET)@l(r4)
+1: lis r4, (next_slot - PAGE_OFFSET)@ha /* get next evict slot */
+ lwz r6, (next_slot - PAGE_OFFSET)@l(r4)
addi r6,r6,HPTE_SIZE /* search for candidate */
andi. r6,r6,7*HPTE_SIZE
stw r6,next_slot@l(r4)
diff --git a/arch/powerpc/mm/book3s32/mmu.c b/arch/powerpc/mm/book3s32/mmu.c
index 0a1c65a..f888cbb 100644
--- a/arch/powerpc/mm/book3s32/mmu.c
+++ b/arch/powerpc/mm/book3s32/mmu.c
@@ -413,7 +413,7 @@ void __init MMU_init_hw(void)
void __init MMU_init_hw_patch(void)
{
unsigned int hmask = Hash_mask >> (16 - LG_HPTEG_SIZE);
- unsigned int hash;
+ unsigned int hash = (unsigned int)Hash - PAGE_OFFSET;
if (ppc_md.progress)
ppc_md.progress("hash:patch", 0x345);
@@ -425,11 +425,6 @@ void __init MMU_init_hw_patch(void)
/*
* Patch up the instructions in hashtable.S:create_hpte
*/
- if (IS_ENABLED(CONFIG_VMAP_STACK))
- hash = (unsigned int)Hash;
- else
- hash = (unsigned int)Hash - PAGE_OFFSET;
-
modify_instruction_site(&patch__hash_page_A0, 0xffff, hash >> 16);
modify_instruction_site(&patch__hash_page_A1, 0x7c0, hash_mb << 6);
modify_instruction_site(&patch__hash_page_A2, 0x7c0, hash_mb2 << 6);
@@ -439,8 +434,7 @@ void __init MMU_init_hw_patch(void)
/*
* Patch up the instructions in hashtable.S:flush_hash_page
*/
- modify_instruction_site(&patch__flush_hash_A0, 0xffff,
- ((unsigned int)Hash - PAGE_OFFSET) >> 16);
+ modify_instruction_site(&patch__flush_hash_A0, 0xffff, hash >> 16);
modify_instruction_site(&patch__flush_hash_A1, 0x7c0, hash_mb << 6);
modify_instruction_site(&patch__flush_hash_A2, 0x7c0, hash_mb2 << 6);
modify_instruction_site(&patch__flush_hash_B, 0xffff, hmask);
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 73d4873f..33b3461 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -53,20 +53,24 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
if (pshift >= pdshift) {
cachep = PGT_CACHE(PTE_T_ORDER);
num_hugepd = 1 << (pshift - pdshift);
+ new = NULL;
} else if (IS_ENABLED(CONFIG_PPC_8xx)) {
- cachep = PGT_CACHE(PTE_INDEX_SIZE);
+ cachep = NULL;
num_hugepd = 1;
+ new = pte_alloc_one(mm);
} else {
cachep = PGT_CACHE(pdshift - pshift);
num_hugepd = 1;
+ new = NULL;
}
- if (!cachep) {
+ if (!cachep && !new) {
WARN_ONCE(1, "No page table cache created for hugetlb tables");
return -ENOMEM;
}
- new = kmem_cache_alloc(cachep, pgtable_gfp_flags(mm, GFP_KERNEL));
+ if (cachep)
+ new = kmem_cache_alloc(cachep, pgtable_gfp_flags(mm, GFP_KERNEL));
BUG_ON(pshift > HUGEPD_SHIFT_MASK);
BUG_ON((unsigned long)new & HUGEPD_SHIFT_MASK);
@@ -97,7 +101,10 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
if (i < num_hugepd) {
for (i = i - 1 ; i >= 0; i--, hpdp--)
*hpdp = __hugepd(0);
- kmem_cache_free(cachep, new);
+ if (cachep)
+ kmem_cache_free(cachep, new);
+ else
+ pte_free(mm, new);
} else {
kmemleak_ignore(new);
}
@@ -324,8 +331,7 @@ static void free_hugepd_range(struct mmu_gather *tlb, hugepd_t *hpdp, int pdshif
if (shift >= pdshift)
hugepd_free(tlb, hugepte);
else if (IS_ENABLED(CONFIG_PPC_8xx))
- pgtable_free_tlb(tlb, hugepte,
- get_hugepd_cache_index(PTE_INDEX_SIZE));
+ pgtable_free_tlb(tlb, hugepte, 0);
else
pgtable_free_tlb(tlb, hugepte,
get_hugepd_cache_index(pdshift - shift));
@@ -639,12 +645,13 @@ static int __init hugetlbpage_init(void)
* if we have pdshift and shift value same, we don't
* use pgt cache for hugepd.
*/
- if (pdshift > shift && IS_ENABLED(CONFIG_PPC_8xx))
- pgtable_cache_add(PTE_INDEX_SIZE);
- else if (pdshift > shift)
- pgtable_cache_add(pdshift - shift);
- else if (IS_ENABLED(CONFIG_PPC_FSL_BOOK3E) || IS_ENABLED(CONFIG_PPC_8xx))
+ if (pdshift > shift) {
+ if (!IS_ENABLED(CONFIG_PPC_8xx))
+ pgtable_cache_add(pdshift - shift);
+ } else if (IS_ENABLED(CONFIG_PPC_FSL_BOOK3E) ||
+ IS_ENABLED(CONFIG_PPC_8xx)) {
pgtable_cache_add(PTE_T_ORDER);
+ }
configured = true;
}
diff --git a/arch/powerpc/mm/kasan/kasan_init_32.c b/arch/powerpc/mm/kasan/kasan_init_32.c
index 16dd95b..d2bed3f 100644
--- a/arch/powerpc/mm/kasan/kasan_init_32.c
+++ b/arch/powerpc/mm/kasan/kasan_init_32.c
@@ -120,12 +120,6 @@ static void __init kasan_unmap_early_shadow_vmalloc(void)
unsigned long k_cur;
phys_addr_t pa = __pa(kasan_early_shadow_page);
- if (!early_mmu_has_feature(MMU_FTR_HPTE_TABLE)) {
- int ret = kasan_init_shadow_page_tables(k_start, k_end);
-
- if (ret)
- panic("kasan: kasan_init_shadow_page_tables() failed");
- }
for (k_cur = k_start & PAGE_MASK; k_cur < k_end; k_cur += PAGE_SIZE) {
pmd_t *pmd = pmd_offset(pud_offset(pgd_offset_k(k_cur), k_cur), k_cur);
pte_t *ptep = pte_offset_kernel(pmd, k_cur);
@@ -143,7 +137,8 @@ void __init kasan_mmu_init(void)
int ret;
struct memblock_region *reg;
- if (early_mmu_has_feature(MMU_FTR_HPTE_TABLE)) {
+ if (early_mmu_has_feature(MMU_FTR_HPTE_TABLE) ||
+ IS_ENABLED(CONFIG_KASAN_VMALLOC)) {
ret = kasan_init_shadow_page_tables(KASAN_SHADOW_START, KASAN_SHADOW_END);
if (ret)
@@ -185,8 +180,7 @@ u8 __initdata early_hash[256 << 10] __aligned(256 << 10) = {0};
static void __init kasan_early_hash_table(void)
{
- unsigned int hash = IS_ENABLED(CONFIG_VMAP_STACK) ? (unsigned int)early_hash :
- __pa(early_hash);
+ unsigned int hash = __pa(early_hash);
modify_instruction_site(&patch__hash_page_A0, 0xffff, hash >> 16);
modify_instruction_site(&patch__flush_hash_A0, 0xffff, hash >> 16);
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index ef7b111..1c07d5a 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -373,7 +373,9 @@ static inline bool flush_coherent_icache(unsigned long addr)
*/
if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) {
mb(); /* sync */
+ allow_read_from_user((const void __user *)addr, L1_CACHE_BYTES);
icbi((void *)addr);
+ prevent_read_from_user((const void __user *)addr, L1_CACHE_BYTES);
mb(); /* sync */
isync();
return true;
diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index e8c84d26..0ec9640 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -3435,6 +3435,11 @@ getstring(char *s, int size)
int c;
c = skipbl();
+ if (c == '\n') {
+ *s = 0;
+ return;
+ }
+
do {
if( size > 1 ){
*s++ = c;
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index 73f029e..cd5db57 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -50,7 +50,6 @@
select PCI_DOMAINS_GENERIC if PCI
select PCI_MSI if PCI
select RISCV_TIMER
- select UACCESS_MEMCPY if !MMU
select GENERIC_IRQ_MULTI_HANDLER
select GENERIC_ARCH_TOPOLOGY if SMP
select ARCH_HAS_PTE_SPECIAL
@@ -121,6 +120,7 @@
config ARCH_SPARSEMEM_ENABLE
def_bool y
+ depends on MMU
select SPARSEMEM_VMEMMAP_ENABLE
config ARCH_SELECT_MEMORY_MODEL
diff --git a/arch/riscv/Kconfig.socs b/arch/riscv/Kconfig.socs
index d325b67..a131174a 100644
--- a/arch/riscv/Kconfig.socs
+++ b/arch/riscv/Kconfig.socs
@@ -10,4 +10,14 @@
help
This enables support for SiFive SoC platform hardware.
+config SOC_VIRT
+ bool "QEMU Virt Machine"
+ select POWER_RESET_SYSCON
+ select POWER_RESET_SYSCON_POWEROFF
+ select GOLDFISH
+ select RTC_DRV_GOLDFISH
+ select SIFIVE_PLIC
+ help
+ This enables support for QEMU Virt Machine.
+
endmenu
diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile
index b9009a2..259cb53 100644
--- a/arch/riscv/Makefile
+++ b/arch/riscv/Makefile
@@ -13,8 +13,10 @@
ifeq ($(CONFIG_DYNAMIC_FTRACE),y)
LDFLAGS_vmlinux := --no-relax
endif
-KBUILD_AFLAGS_MODULE += -fPIC
-KBUILD_CFLAGS_MODULE += -fPIC
+
+ifeq ($(CONFIG_64BIT)$(CONFIG_CMODEL_MEDLOW),yy)
+KBUILD_CFLAGS_MODULE += -mcmodel=medany
+endif
export BITS
ifeq ($(CONFIG_ARCH_RV64I),y)
diff --git a/arch/riscv/boot/.gitignore b/arch/riscv/boot/.gitignore
index 8dab0bb..8a45a37 100644
--- a/arch/riscv/boot/.gitignore
+++ b/arch/riscv/boot/.gitignore
@@ -1,2 +1,4 @@
Image
Image.gz
+loader
+loader.lds
diff --git a/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts b/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts
index 609198c..4a2729f 100644
--- a/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts
+++ b/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts
@@ -2,6 +2,7 @@
/* Copyright (c) 2018-2019 SiFive, Inc */
#include "fu540-c000.dtsi"
+#include <dt-bindings/gpio/gpio.h>
/* Clock frequency (in Hz) of the PCB crystal for rtcclk */
#define RTCCLK_FREQ 1000000
@@ -41,6 +42,10 @@ rtcclk: rtcclk {
clock-frequency = <RTCCLK_FREQ>;
clock-output-names = "rtcclk";
};
+ gpio-restart {
+ compatible = "gpio-restart";
+ gpios = <&gpio 10 GPIO_ACTIVE_LOW>;
+ };
};
&uart0 {
diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig
index e2ff95c..2557c53 100644
--- a/arch/riscv/configs/defconfig
+++ b/arch/riscv/configs/defconfig
@@ -15,6 +15,7 @@
CONFIG_EXPERT=y
CONFIG_BPF_SYSCALL=y
CONFIG_SOC_SIFIVE=y
+CONFIG_SOC_VIRT=y
CONFIG_SMP=y
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
@@ -63,6 +64,7 @@
CONFIG_SPI=y
CONFIG_SPI_SIFIVE=y
# CONFIG_PTP_1588_CLOCK is not set
+CONFIG_POWER_RESET=y
CONFIG_DRM=y
CONFIG_DRM_RADEON=y
CONFIG_DRM_VIRTIO_GPU=y
@@ -78,6 +80,7 @@
CONFIG_USB_UAS=y
CONFIG_MMC=y
CONFIG_MMC_SPI=y
+CONFIG_RTC_CLASS=y
CONFIG_VIRTIO_PCI=y
CONFIG_VIRTIO_BALLOON=y
CONFIG_VIRTIO_INPUT=y
@@ -102,13 +105,13 @@
CONFIG_PRINTK_TIME=y
CONFIG_DEBUG_FS=y
CONFIG_DEBUG_PAGEALLOC=y
+CONFIG_SCHED_STACK_END_CHECK=y
CONFIG_DEBUG_VM=y
CONFIG_DEBUG_VM_PGFLAGS=y
CONFIG_DEBUG_MEMORY_INIT=y
CONFIG_DEBUG_PER_CPU_MAPS=y
CONFIG_SOFTLOCKUP_DETECTOR=y
CONFIG_WQ_WATCHDOG=y
-CONFIG_SCHED_STACK_END_CHECK=y
CONFIG_DEBUG_TIMEKEEPING=y
CONFIG_DEBUG_RT_MUTEXES=y
CONFIG_DEBUG_SPINLOCK=y
diff --git a/arch/riscv/configs/rv32_defconfig b/arch/riscv/configs/rv32_defconfig
index eb51940..0292879 100644
--- a/arch/riscv/configs/rv32_defconfig
+++ b/arch/riscv/configs/rv32_defconfig
@@ -14,6 +14,7 @@
CONFIG_BLK_DEV_INITRD=y
CONFIG_EXPERT=y
CONFIG_BPF_SYSCALL=y
+CONFIG_SOC_VIRT=y
CONFIG_ARCH_RV32I=y
CONFIG_SMP=y
CONFIG_MODULES=y
@@ -61,6 +62,7 @@
CONFIG_HW_RANDOM=y
CONFIG_HW_RANDOM_VIRTIO=y
# CONFIG_PTP_1588_CLOCK is not set
+CONFIG_POWER_RESET=y
CONFIG_DRM=y
CONFIG_DRM_RADEON=y
CONFIG_DRM_VIRTIO_GPU=y
@@ -74,13 +76,13 @@
CONFIG_USB_OHCI_HCD_PLATFORM=y
CONFIG_USB_STORAGE=y
CONFIG_USB_UAS=y
+CONFIG_RTC_CLASS=y
CONFIG_VIRTIO_PCI=y
CONFIG_VIRTIO_BALLOON=y
CONFIG_VIRTIO_INPUT=y
CONFIG_VIRTIO_MMIO=y
CONFIG_RPMSG_CHAR=y
CONFIG_RPMSG_VIRTIO=y
-CONFIG_SIFIVE_PLIC=y
CONFIG_EXT4_FS=y
CONFIG_EXT4_FS_POSIX_ACL=y
CONFIG_AUTOFS4_FS=y
@@ -99,13 +101,13 @@
CONFIG_PRINTK_TIME=y
CONFIG_DEBUG_FS=y
CONFIG_DEBUG_PAGEALLOC=y
+CONFIG_SCHED_STACK_END_CHECK=y
CONFIG_DEBUG_VM=y
CONFIG_DEBUG_VM_PGFLAGS=y
CONFIG_DEBUG_MEMORY_INIT=y
CONFIG_DEBUG_PER_CPU_MAPS=y
CONFIG_SOFTLOCKUP_DETECTOR=y
CONFIG_WQ_WATCHDOG=y
-CONFIG_SCHED_STACK_END_CHECK=y
CONFIG_DEBUG_TIMEKEEPING=y
CONFIG_DEBUG_RT_MUTEXES=y
CONFIG_DEBUG_SPINLOCK=y
diff --git a/arch/riscv/include/asm/clint.h b/arch/riscv/include/asm/clint.h
index 6eaa2ee..a279b17 100644
--- a/arch/riscv/include/asm/clint.h
+++ b/arch/riscv/include/asm/clint.h
@@ -15,12 +15,12 @@ static inline void clint_send_ipi_single(unsigned long hartid)
writel(1, clint_ipi_base + hartid);
}
-static inline void clint_send_ipi_mask(const struct cpumask *hartid_mask)
+static inline void clint_send_ipi_mask(const struct cpumask *mask)
{
- int hartid;
+ int cpu;
- for_each_cpu(hartid, hartid_mask)
- clint_send_ipi_single(hartid);
+ for_each_cpu(cpu, mask)
+ clint_send_ipi_single(cpuid_to_hartid_map(cpu));
}
static inline void clint_clear_ipi(unsigned long hartid)
diff --git a/arch/riscv/include/asm/csr.h b/arch/riscv/include/asm/csr.h
index 435b655..8e18d2c 100644
--- a/arch/riscv/include/asm/csr.h
+++ b/arch/riscv/include/asm/csr.h
@@ -72,6 +72,16 @@
#define EXC_LOAD_PAGE_FAULT 13
#define EXC_STORE_PAGE_FAULT 15
+/* PMP configuration */
+#define PMP_R 0x01
+#define PMP_W 0x02
+#define PMP_X 0x04
+#define PMP_A 0x18
+#define PMP_A_TOR 0x08
+#define PMP_A_NA4 0x10
+#define PMP_A_NAPOT 0x18
+#define PMP_L 0x80
+
/* symbolic CSR names: */
#define CSR_CYCLE 0xc00
#define CSR_TIME 0xc01
@@ -100,6 +110,8 @@
#define CSR_MCAUSE 0x342
#define CSR_MTVAL 0x343
#define CSR_MIP 0x344
+#define CSR_PMPCFG0 0x3a0
+#define CSR_PMPADDR0 0x3b0
#define CSR_MHARTID 0xf14
#ifdef CONFIG_RISCV_M_MODE
diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index e430415..393f201 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -19,6 +19,47 @@
#include <asm/tlbflush.h>
#include <linux/mm_types.h>
+#ifdef CONFIG_MMU
+
+#define VMALLOC_SIZE (KERN_VIRT_SIZE >> 1)
+#define VMALLOC_END (PAGE_OFFSET - 1)
+#define VMALLOC_START (PAGE_OFFSET - VMALLOC_SIZE)
+
+#define BPF_JIT_REGION_SIZE (SZ_128M)
+#define BPF_JIT_REGION_START (PAGE_OFFSET - BPF_JIT_REGION_SIZE)
+#define BPF_JIT_REGION_END (VMALLOC_END)
+
+/*
+ * Roughly size the vmemmap space to be large enough to fit enough
+ * struct pages to map half the virtual address space. Then
+ * position vmemmap directly below the VMALLOC region.
+ */
+#define VMEMMAP_SHIFT \
+ (CONFIG_VA_BITS - PAGE_SHIFT - 1 + STRUCT_PAGE_MAX_SHIFT)
+#define VMEMMAP_SIZE BIT(VMEMMAP_SHIFT)
+#define VMEMMAP_END (VMALLOC_START - 1)
+#define VMEMMAP_START (VMALLOC_START - VMEMMAP_SIZE)
+
+/*
+ * Define vmemmap for pfn_to_page & page_to_pfn calls. Needed if kernel
+ * is configured with CONFIG_SPARSEMEM_VMEMMAP enabled.
+ */
+#define vmemmap ((struct page *)VMEMMAP_START)
+
+#define PCI_IO_SIZE SZ_16M
+#define PCI_IO_END VMEMMAP_START
+#define PCI_IO_START (PCI_IO_END - PCI_IO_SIZE)
+
+#define FIXADDR_TOP PCI_IO_START
+#ifdef CONFIG_64BIT
+#define FIXADDR_SIZE PMD_SIZE
+#else
+#define FIXADDR_SIZE PGDIR_SIZE
+#endif
+#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE)
+
+#endif
+
#ifdef CONFIG_64BIT
#include <asm/pgtable-64.h>
#else
@@ -90,31 +131,6 @@ extern pgd_t swapper_pg_dir[];
#define __S110 PAGE_SHARED_EXEC
#define __S111 PAGE_SHARED_EXEC
-#define VMALLOC_SIZE (KERN_VIRT_SIZE >> 1)
-#define VMALLOC_END (PAGE_OFFSET - 1)
-#define VMALLOC_START (PAGE_OFFSET - VMALLOC_SIZE)
-
-#define BPF_JIT_REGION_SIZE (SZ_128M)
-#define BPF_JIT_REGION_START (PAGE_OFFSET - BPF_JIT_REGION_SIZE)
-#define BPF_JIT_REGION_END (VMALLOC_END)
-
-/*
- * Roughly size the vmemmap space to be large enough to fit enough
- * struct pages to map half the virtual address space. Then
- * position vmemmap directly below the VMALLOC region.
- */
-#define VMEMMAP_SHIFT \
- (CONFIG_VA_BITS - PAGE_SHIFT - 1 + STRUCT_PAGE_MAX_SHIFT)
-#define VMEMMAP_SIZE BIT(VMEMMAP_SHIFT)
-#define VMEMMAP_END (VMALLOC_START - 1)
-#define VMEMMAP_START (VMALLOC_START - VMEMMAP_SIZE)
-
-/*
- * Define vmemmap for pfn_to_page & page_to_pfn calls. Needed if kernel
- * is configured with CONFIG_SPARSEMEM_VMEMMAP enabled.
- */
-#define vmemmap ((struct page *)VMEMMAP_START)
-
static inline int pmd_present(pmd_t pmd)
{
return (pmd_val(pmd) & (_PAGE_PRESENT | _PAGE_PROT_NONE));
@@ -432,18 +448,6 @@ static inline int ptep_clear_flush_young(struct vm_area_struct *vma,
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
-#define PCI_IO_SIZE SZ_16M
-#define PCI_IO_END VMEMMAP_START
-#define PCI_IO_START (PCI_IO_END - PCI_IO_SIZE)
-
-#define FIXADDR_TOP PCI_IO_START
-#ifdef CONFIG_64BIT
-#define FIXADDR_SIZE PMD_SIZE
-#else
-#define FIXADDR_SIZE PGDIR_SIZE
-#endif
-#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE)
-
/*
* Task size is 0x4000000000 for RV64 or 0x9fc00000 for RV32.
* Note that PGDIR_SIZE must evenly divide TASK_SIZE.
diff --git a/arch/riscv/include/asm/syscall.h b/arch/riscv/include/asm/syscall.h
index 42347d0..49350c8b 100644
--- a/arch/riscv/include/asm/syscall.h
+++ b/arch/riscv/include/asm/syscall.h
@@ -28,13 +28,6 @@ static inline int syscall_get_nr(struct task_struct *task,
return regs->a7;
}
-static inline void syscall_set_nr(struct task_struct *task,
- struct pt_regs *regs,
- int sysno)
-{
- regs->a7 = sysno;
-}
-
static inline void syscall_rollback(struct task_struct *task,
struct pt_regs *regs)
{
diff --git a/arch/riscv/include/asm/uaccess.h b/arch/riscv/include/asm/uaccess.h
index f462a18..8ce9d60 100644
--- a/arch/riscv/include/asm/uaccess.h
+++ b/arch/riscv/include/asm/uaccess.h
@@ -11,6 +11,24 @@
/*
* User space memory access functions
*/
+
+extern unsigned long __must_check __asm_copy_to_user(void __user *to,
+ const void *from, unsigned long n);
+extern unsigned long __must_check __asm_copy_from_user(void *to,
+ const void __user *from, unsigned long n);
+
+static inline unsigned long
+raw_copy_from_user(void *to, const void __user *from, unsigned long n)
+{
+ return __asm_copy_from_user(to, from, n);
+}
+
+static inline unsigned long
+raw_copy_to_user(void __user *to, const void *from, unsigned long n)
+{
+ return __asm_copy_to_user(to, from, n);
+}
+
#ifdef CONFIG_MMU
#include <linux/errno.h>
#include <linux/compiler.h>
@@ -367,24 +385,6 @@ do { \
-EFAULT; \
})
-
-extern unsigned long __must_check __asm_copy_to_user(void __user *to,
- const void *from, unsigned long n);
-extern unsigned long __must_check __asm_copy_from_user(void *to,
- const void __user *from, unsigned long n);
-
-static inline unsigned long
-raw_copy_from_user(void *to, const void __user *from, unsigned long n)
-{
- return __asm_copy_from_user(to, from, n);
-}
-
-static inline unsigned long
-raw_copy_to_user(void __user *to, const void *from, unsigned long n)
-{
- return __asm_copy_to_user(to, from, n);
-}
-
extern long strncpy_from_user(char *dest, const char __user *src, long count);
extern long __must_check strlen_user(const char __user *str);
diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S
index bad4d85..208702d 100644
--- a/arch/riscv/kernel/entry.S
+++ b/arch/riscv/kernel/entry.S
@@ -229,19 +229,12 @@
li t0, __NR_syscalls
la s0, sys_ni_syscall
/*
- * The tracer can change syscall number to valid/invalid value.
- * We use syscall_set_nr helper in syscall_trace_enter thus we
- * cannot trust the current value in a7 and have to reload from
- * the current task pt_regs.
- */
- REG_L a7, PT_A7(sp)
- /*
* Syscall number held in a7.
* If syscall number is above allowed value, redirect to ni_syscall.
*/
bge a7, t0, 1f
/*
- * Check if syscall is rejected by tracer or seccomp, i.e., a7 == -1.
+ * Check if syscall is rejected by tracer, i.e., a7 == -1.
* If yes, we pretend it was executed.
*/
li t1, -1
@@ -334,6 +327,7 @@
handle_syscall_trace_enter:
move a0, sp
call do_syscall_trace_enter
+ move t0, a0
REG_L a0, PT_A0(sp)
REG_L a1, PT_A1(sp)
REG_L a2, PT_A2(sp)
@@ -342,6 +336,7 @@
REG_L a5, PT_A5(sp)
REG_L a6, PT_A6(sp)
REG_L a7, PT_A7(sp)
+ bnez t0, ret_from_syscall_rejected
j check_syscall_nr
handle_syscall_trace_exit:
move a0, sp
diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S
index 271860f..85f2073 100644
--- a/arch/riscv/kernel/head.S
+++ b/arch/riscv/kernel/head.S
@@ -58,6 +58,12 @@
/* Reset all registers except ra, a0, a1 */
call reset_regs
+ /* Setup a PMP to permit access to all of memory. */
+ li a0, -1
+ csrw CSR_PMPADDR0, a0
+ li a0, (PMP_A_NAPOT | PMP_R | PMP_W | PMP_X)
+ csrw CSR_PMPCFG0, a0
+
/*
* The hartid in a0 is expected later on, and we have no firmware
* to hand it to us.
diff --git a/arch/riscv/kernel/module.c b/arch/riscv/kernel/module.c
index b740185..8bbe5db 100644
--- a/arch/riscv/kernel/module.c
+++ b/arch/riscv/kernel/module.c
@@ -8,6 +8,10 @@
#include <linux/err.h>
#include <linux/errno.h>
#include <linux/moduleloader.h>
+#include <linux/vmalloc.h>
+#include <linux/sizes.h>
+#include <asm/pgtable.h>
+#include <asm/sections.h>
static int apply_r_riscv_32_rela(struct module *me, u32 *location, Elf_Addr v)
{
@@ -386,3 +390,15 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab,
return 0;
}
+
+#if defined(CONFIG_MMU) && defined(CONFIG_64BIT)
+#define VMALLOC_MODULE_START \
+ max(PFN_ALIGN((unsigned long)&_end - SZ_2G), VMALLOC_START)
+void *module_alloc(unsigned long size)
+{
+ return __vmalloc_node_range(size, 1, VMALLOC_MODULE_START,
+ VMALLOC_END, GFP_KERNEL,
+ PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE,
+ __builtin_return_address(0));
+}
+#endif
diff --git a/arch/riscv/kernel/ptrace.c b/arch/riscv/kernel/ptrace.c
index 4074642..444dc7b 100644
--- a/arch/riscv/kernel/ptrace.c
+++ b/arch/riscv/kernel/ptrace.c
@@ -148,21 +148,19 @@ long arch_ptrace(struct task_struct *child, long request,
* Allows PTRACE_SYSCALL to work. These are called from entry.S in
* {handle,ret_from}_syscall.
*/
-__visible void do_syscall_trace_enter(struct pt_regs *regs)
+__visible int do_syscall_trace_enter(struct pt_regs *regs)
{
if (test_thread_flag(TIF_SYSCALL_TRACE))
if (tracehook_report_syscall_entry(regs))
- syscall_set_nr(current, regs, -1);
+ return -1;
/*
* Do the secure computing after ptrace; failures should be fast.
* If this fails we might have return value in a0 from seccomp
* (via SECCOMP_RET_ERRNO/TRACE).
*/
- if (secure_computing() == -1) {
- syscall_set_nr(current, regs, -1);
- return;
- }
+ if (secure_computing() == -1)
+ return -1;
#ifdef CONFIG_HAVE_SYSCALL_TRACEPOINTS
if (test_thread_flag(TIF_SYSCALL_TRACEPOINT))
@@ -170,6 +168,7 @@ __visible void do_syscall_trace_enter(struct pt_regs *regs)
#endif
audit_syscall_entry(regs->a7, regs->a0, regs->a1, regs->a2, regs->a3);
+ return 0;
}
__visible void do_syscall_trace_exit(struct pt_regs *regs)
diff --git a/arch/riscv/kernel/smp.c b/arch/riscv/kernel/smp.c
index eb878ab..e0a6293 100644
--- a/arch/riscv/kernel/smp.c
+++ b/arch/riscv/kernel/smp.c
@@ -96,7 +96,7 @@ static void send_ipi_mask(const struct cpumask *mask, enum ipi_message_type op)
if (IS_ENABLED(CONFIG_RISCV_SBI))
sbi_send_ipi(cpumask_bits(&hartid_mask));
else
- clint_send_ipi_mask(&hartid_mask);
+ clint_send_ipi_mask(mask);
}
static void send_ipi_single(int cpu, enum ipi_message_type op)
diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c
index f4cad51..ffb3d94 100644
--- a/arch/riscv/kernel/traps.c
+++ b/arch/riscv/kernel/traps.c
@@ -156,6 +156,6 @@ void __init trap_init(void)
csr_write(CSR_SCRATCH, 0);
/* Set the exception vector address */
csr_write(CSR_TVEC, &handle_exception);
- /* Enable all interrupts */
- csr_write(CSR_IE, -1);
+ /* Enable interrupts */
+ csr_write(CSR_IE, IE_SIE | IE_EIE);
}
diff --git a/arch/riscv/lib/Makefile b/arch/riscv/lib/Makefile
index 47e7a82..0d0db80 100644
--- a/arch/riscv/lib/Makefile
+++ b/arch/riscv/lib/Makefile
@@ -2,5 +2,5 @@
lib-y += delay.o
lib-y += memcpy.o
lib-y += memset.o
-lib-$(CONFIG_MMU) += uaccess.o
+lib-y += uaccess.o
lib-$(CONFIG_64BIT) += tishift.o
diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
index 965a8cf4..fab8559 100644
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ -131,7 +131,7 @@ void __init setup_bootmem(void)
for_each_memblock(memory, reg) {
phys_addr_t end = reg->base + reg->size;
- if (reg->base <= vmlinux_end && vmlinux_end <= end) {
+ if (reg->base <= vmlinux_start && vmlinux_end <= end) {
mem_size = min(reg->size, (phys_addr_t)-PAGE_OFFSET);
/*
diff --git a/arch/riscv/mm/kasan_init.c b/arch/riscv/mm/kasan_init.c
index f0cc860..ec0ca90 100644
--- a/arch/riscv/mm/kasan_init.c
+++ b/arch/riscv/mm/kasan_init.c
@@ -19,18 +19,20 @@ asmlinkage void __init kasan_early_init(void)
for (i = 0; i < PTRS_PER_PTE; ++i)
set_pte(kasan_early_shadow_pte + i,
mk_pte(virt_to_page(kasan_early_shadow_page),
- PAGE_KERNEL));
+ PAGE_KERNEL));
for (i = 0; i < PTRS_PER_PMD; ++i)
set_pmd(kasan_early_shadow_pmd + i,
- pfn_pmd(PFN_DOWN(__pa((uintptr_t)kasan_early_shadow_pte)),
- __pgprot(_PAGE_TABLE)));
+ pfn_pmd(PFN_DOWN
+ (__pa((uintptr_t) kasan_early_shadow_pte)),
+ __pgprot(_PAGE_TABLE)));
for (i = KASAN_SHADOW_START; i < KASAN_SHADOW_END;
i += PGDIR_SIZE, ++pgd)
set_pgd(pgd,
- pfn_pgd(PFN_DOWN(__pa(((uintptr_t)kasan_early_shadow_pmd))),
- __pgprot(_PAGE_TABLE)));
+ pfn_pgd(PFN_DOWN
+ (__pa(((uintptr_t) kasan_early_shadow_pmd))),
+ __pgprot(_PAGE_TABLE)));
/* init for swapper_pg_dir */
pgd = pgd_offset_k(KASAN_SHADOW_START);
@@ -38,37 +40,43 @@ asmlinkage void __init kasan_early_init(void)
for (i = KASAN_SHADOW_START; i < KASAN_SHADOW_END;
i += PGDIR_SIZE, ++pgd)
set_pgd(pgd,
- pfn_pgd(PFN_DOWN(__pa(((uintptr_t)kasan_early_shadow_pmd))),
- __pgprot(_PAGE_TABLE)));
+ pfn_pgd(PFN_DOWN
+ (__pa(((uintptr_t) kasan_early_shadow_pmd))),
+ __pgprot(_PAGE_TABLE)));
flush_tlb_all();
}
static void __init populate(void *start, void *end)
{
- unsigned long i;
+ unsigned long i, offset;
unsigned long vaddr = (unsigned long)start & PAGE_MASK;
unsigned long vend = PAGE_ALIGN((unsigned long)end);
unsigned long n_pages = (vend - vaddr) / PAGE_SIZE;
+ unsigned long n_ptes =
+ ((n_pages + PTRS_PER_PTE) & -PTRS_PER_PTE) / PTRS_PER_PTE;
unsigned long n_pmds =
- (n_pages % PTRS_PER_PTE) ? n_pages / PTRS_PER_PTE + 1 :
- n_pages / PTRS_PER_PTE;
+ ((n_ptes + PTRS_PER_PMD) & -PTRS_PER_PMD) / PTRS_PER_PMD;
+
+ pte_t *pte =
+ memblock_alloc(n_ptes * PTRS_PER_PTE * sizeof(pte_t), PAGE_SIZE);
+ pmd_t *pmd =
+ memblock_alloc(n_pmds * PTRS_PER_PMD * sizeof(pmd_t), PAGE_SIZE);
pgd_t *pgd = pgd_offset_k(vaddr);
- pmd_t *pmd = memblock_alloc(n_pmds * sizeof(pmd_t), PAGE_SIZE);
- pte_t *pte = memblock_alloc(n_pages * sizeof(pte_t), PAGE_SIZE);
for (i = 0; i < n_pages; i++) {
phys_addr_t phys = memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE);
-
- set_pte(pte + i, pfn_pte(PHYS_PFN(phys), PAGE_KERNEL));
+ set_pte(&pte[i], pfn_pte(PHYS_PFN(phys), PAGE_KERNEL));
}
- for (i = 0; i < n_pmds; ++pgd, i += PTRS_PER_PMD)
- set_pgd(pgd, pfn_pgd(PFN_DOWN(__pa(((uintptr_t)(pmd + i)))),
+ for (i = 0, offset = 0; i < n_ptes; i++, offset += PTRS_PER_PTE)
+ set_pmd(&pmd[i],
+ pfn_pmd(PFN_DOWN(__pa(&pte[offset])),
__pgprot(_PAGE_TABLE)));
- for (i = 0; i < n_pages; ++pmd, i += PTRS_PER_PTE)
- set_pmd(pmd, pfn_pmd(PFN_DOWN(__pa((uintptr_t)(pte + i))),
+ for (i = 0, offset = 0; i < n_pmds; i++, offset += PTRS_PER_PMD)
+ set_pgd(&pgd[i],
+ pfn_pgd(PFN_DOWN(__pa(&pmd[offset])),
__pgprot(_PAGE_TABLE)));
flush_tlb_all();
@@ -81,7 +89,8 @@ void __init kasan_init(void)
unsigned long i;
kasan_populate_early_shadow((void *)KASAN_SHADOW_START,
- (void *)kasan_mem_to_shadow((void *)VMALLOC_END));
+ (void *)kasan_mem_to_shadow((void *)
+ VMALLOC_END));
for_each_memblock(memory, reg) {
void *start = (void *)__va(reg->base);
@@ -90,14 +99,14 @@ void __init kasan_init(void)
if (start >= end)
break;
- populate(kasan_mem_to_shadow(start),
- kasan_mem_to_shadow(end));
+ populate(kasan_mem_to_shadow(start), kasan_mem_to_shadow(end));
};
for (i = 0; i < PTRS_PER_PTE; i++)
set_pte(&kasan_early_shadow_pte[i],
mk_pte(virt_to_page(kasan_early_shadow_page),
- __pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_ACCESSED)));
+ __pgprot(_PAGE_PRESENT | _PAGE_READ |
+ _PAGE_ACCESSED)));
memset(kasan_early_shadow_page, 0, PAGE_SIZE);
init_task.kasan_depth = 0;
diff --git a/arch/s390/Makefile b/arch/s390/Makefile
index e0e3a465..8dfa2cf 100644
--- a/arch/s390/Makefile
+++ b/arch/s390/Makefile
@@ -146,7 +146,7 @@
#KBUILD_IMAGE is necessary for packaging targets like rpm-pkg, deb-pkg...
KBUILD_IMAGE := $(boot)/bzImage
-install: vmlinux
+install:
$(Q)$(MAKE) $(build)=$(boot) $@
bzImage: vmlinux
diff --git a/arch/s390/boot/Makefile b/arch/s390/boot/Makefile
index e2c47d3..0ff9261 100644
--- a/arch/s390/boot/Makefile
+++ b/arch/s390/boot/Makefile
@@ -70,7 +70,7 @@
$(obj)/startup.a: $(OBJECTS) FORCE
$(call if_changed,ar)
-install: $(CONFIGURE) $(obj)/bzImage
+install:
sh -x $(srctree)/$(obj)/install.sh $(KERNELRELEASE) $(obj)/bzImage \
System.map "$(INSTALL_PATH)"
diff --git a/arch/s390/boot/kaslr.c b/arch/s390/boot/kaslr.c
index 5d12352..5591243 100644
--- a/arch/s390/boot/kaslr.c
+++ b/arch/s390/boot/kaslr.c
@@ -75,7 +75,7 @@ static unsigned long get_random(unsigned long limit)
*(unsigned long *) prng.parm_block ^= seed;
for (i = 0; i < 16; i++) {
cpacf_kmc(CPACF_KMC_PRNG, prng.parm_block,
- (char *) entropy, (char *) entropy,
+ (u8 *) entropy, (u8 *) entropy,
sizeof(entropy));
memcpy(prng.parm_block, entropy, sizeof(entropy));
}
diff --git a/arch/s390/boot/uv.c b/arch/s390/boot/uv.c
index ed007f4..3f50115 100644
--- a/arch/s390/boot/uv.c
+++ b/arch/s390/boot/uv.c
@@ -15,7 +15,8 @@ void uv_query_info(void)
if (!test_facility(158))
return;
- if (uv_call(0, (uint64_t)&uvcb))
+ /* rc==0x100 means that there is additional data we do not process */
+ if (uv_call(0, (uint64_t)&uvcb) && uvcb.header.rc != 0x100)
return;
if (test_bit_inv(BIT_UVC_CMD_SET_SHARED_ACCESS, (unsigned long *)uvcb.inst_calls_list) &&
diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig
index 2e60c80..0c86ba1 100644
--- a/arch/s390/configs/debug_defconfig
+++ b/arch/s390/configs/debug_defconfig
@@ -53,6 +53,7 @@
CONFIG_CRASH_DUMP=y
CONFIG_HIBERNATION=y
CONFIG_PM_DEBUG=y
+CONFIG_PROTECTED_VIRTUALIZATION_GUEST=y
CONFIG_CMM=m
CONFIG_APPLDATA_BASE=y
CONFIG_KVM=m
@@ -474,7 +475,6 @@
# CONFIG_NET_VENDOR_EMULEX is not set
# CONFIG_NET_VENDOR_EZCHIP is not set
# CONFIG_NET_VENDOR_GOOGLE is not set
-# CONFIG_NET_VENDOR_HP is not set
# CONFIG_NET_VENDOR_HUAWEI is not set
# CONFIG_NET_VENDOR_INTEL is not set
# CONFIG_NET_VENDOR_MARVELL is not set
@@ -684,7 +684,6 @@
CONFIG_CRYPTO_XCBC=m
CONFIG_CRYPTO_VMAC=m
CONFIG_CRYPTO_CRC32=m
-CONFIG_CRYPTO_XXHASH=m
CONFIG_CRYPTO_MICHAEL_MIC=m
CONFIG_CRYPTO_RMD128=m
CONFIG_CRYPTO_RMD160=m
@@ -748,7 +747,6 @@
CONFIG_GDB_SCRIPTS=y
CONFIG_FRAME_WARN=1024
CONFIG_HEADERS_INSTALL=y
-CONFIG_HEADERS_CHECK=y
CONFIG_DEBUG_SECTION_MISMATCH=y
CONFIG_MAGIC_SYSRQ=y
CONFIG_DEBUG_PAGEALLOC=y
@@ -772,9 +770,9 @@
CONFIG_MEMORY_NOTIFIER_ERROR_INJECT=m
CONFIG_DEBUG_PER_CPU_MAPS=y
CONFIG_DEBUG_SHIRQ=y
+CONFIG_PANIC_ON_OOPS=y
CONFIG_DETECT_HUNG_TASK=y
CONFIG_WQ_WATCHDOG=y
-CONFIG_PANIC_ON_OOPS=y
CONFIG_DEBUG_TIMEKEEPING=y
CONFIG_PROVE_LOCKING=y
CONFIG_LOCK_STAT=y
@@ -783,9 +781,20 @@
CONFIG_DEBUG_LOCKING_API_SELFTESTS=y
CONFIG_DEBUG_SG=y
CONFIG_DEBUG_NOTIFIERS=y
+CONFIG_BUG_ON_DATA_CORRUPTION=y
CONFIG_DEBUG_CREDENTIALS=y
CONFIG_RCU_TORTURE_TEST=m
CONFIG_RCU_CPU_STALL_TIMEOUT=300
+CONFIG_LATENCYTOP=y
+CONFIG_FUNCTION_PROFILER=y
+CONFIG_STACK_TRACER=y
+CONFIG_IRQSOFF_TRACER=y
+CONFIG_PREEMPT_TRACER=y
+CONFIG_SCHED_TRACER=y
+CONFIG_FTRACE_SYSCALLS=y
+CONFIG_BLK_DEV_IO_TRACE=y
+CONFIG_HIST_TRIGGERS=y
+CONFIG_S390_PTDUMP=y
CONFIG_NOTIFIER_ERROR_INJECTION=m
CONFIG_NETDEV_NOTIFIER_ERROR_INJECT=m
CONFIG_FAULT_INJECTION=y
@@ -796,15 +805,6 @@
CONFIG_FAIL_FUTEX=y
CONFIG_FAULT_INJECTION_DEBUG_FS=y
CONFIG_FAULT_INJECTION_STACKTRACE_FILTER=y
-CONFIG_LATENCYTOP=y
-CONFIG_IRQSOFF_TRACER=y
-CONFIG_PREEMPT_TRACER=y
-CONFIG_SCHED_TRACER=y
-CONFIG_FTRACE_SYSCALLS=y
-CONFIG_STACK_TRACER=y
-CONFIG_BLK_DEV_IO_TRACE=y
-CONFIG_FUNCTION_PROFILER=y
-CONFIG_HIST_TRIGGERS=y
CONFIG_LKDTM=m
CONFIG_TEST_LIST_SORT=y
CONFIG_TEST_SORT=y
@@ -814,5 +814,3 @@
CONFIG_PERCPU_TEST=m
CONFIG_ATOMIC64_SELFTEST=y
CONFIG_TEST_BPF=m
-CONFIG_BUG_ON_DATA_CORRUPTION=y
-CONFIG_S390_PTDUMP=y
diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig
index 25f7998..6b27d86 100644
--- a/arch/s390/configs/defconfig
+++ b/arch/s390/configs/defconfig
@@ -53,6 +53,7 @@
CONFIG_CRASH_DUMP=y
CONFIG_HIBERNATION=y
CONFIG_PM_DEBUG=y
+CONFIG_PROTECTED_VIRTUALIZATION_GUEST=y
CONFIG_CMM=m
CONFIG_APPLDATA_BASE=y
CONFIG_KVM=m
@@ -470,7 +471,6 @@
# CONFIG_NET_VENDOR_EMULEX is not set
# CONFIG_NET_VENDOR_EZCHIP is not set
# CONFIG_NET_VENDOR_GOOGLE is not set
-# CONFIG_NET_VENDOR_HP is not set
# CONFIG_NET_VENDOR_HUAWEI is not set
# CONFIG_NET_VENDOR_INTEL is not set
# CONFIG_NET_VENDOR_MARVELL is not set
@@ -677,7 +677,6 @@
CONFIG_CRYPTO_XCBC=m
CONFIG_CRYPTO_VMAC=m
CONFIG_CRYPTO_CRC32=m
-CONFIG_CRYPTO_XXHASH=m
CONFIG_CRYPTO_MICHAEL_MIC=m
CONFIG_CRYPTO_RMD128=m
CONFIG_CRYPTO_RMD160=m
@@ -739,18 +738,18 @@
CONFIG_MAGIC_SYSRQ=y
CONFIG_DEBUG_MEMORY_INIT=y
CONFIG_PANIC_ON_OOPS=y
+CONFIG_BUG_ON_DATA_CORRUPTION=y
CONFIG_RCU_TORTURE_TEST=m
CONFIG_RCU_CPU_STALL_TIMEOUT=60
CONFIG_LATENCYTOP=y
+CONFIG_FUNCTION_PROFILER=y
+CONFIG_STACK_TRACER=y
CONFIG_SCHED_TRACER=y
CONFIG_FTRACE_SYSCALLS=y
-CONFIG_STACK_TRACER=y
CONFIG_BLK_DEV_IO_TRACE=y
-CONFIG_FUNCTION_PROFILER=y
CONFIG_HIST_TRIGGERS=y
+CONFIG_S390_PTDUMP=y
CONFIG_LKDTM=m
CONFIG_PERCPU_TEST=m
CONFIG_ATOMIC64_SELFTEST=y
CONFIG_TEST_BPF=m
-CONFIG_BUG_ON_DATA_CORRUPTION=y
-CONFIG_S390_PTDUMP=y
diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h
index 85e944f..1019efd 100644
--- a/arch/s390/include/asm/page.h
+++ b/arch/s390/include/asm/page.h
@@ -42,7 +42,7 @@ void __storage_key_init_range(unsigned long start, unsigned long end);
static inline void storage_key_init_range(unsigned long start, unsigned long end)
{
- if (PAGE_DEFAULT_KEY)
+ if (PAGE_DEFAULT_KEY != 0)
__storage_key_init_range(start, end);
}
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 137a392..6d7c3b7 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -752,6 +752,12 @@ static inline int pmd_write(pmd_t pmd)
return (pmd_val(pmd) & _SEGMENT_ENTRY_WRITE) != 0;
}
+#define pud_write pud_write
+static inline int pud_write(pud_t pud)
+{
+ return (pud_val(pud) & _REGION3_ENTRY_WRITE) != 0;
+}
+
static inline int pmd_dirty(pmd_t pmd)
{
return (pmd_val(pmd) & _SEGMENT_ENTRY_DIRTY) != 0;
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index 361ef5e..aadb3d0 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -84,7 +84,6 @@ void s390_update_cpu_mhz(void);
void cpu_detect_mhz_feature(void);
extern const struct seq_operations cpuinfo_op;
-extern int sysctl_ieee_emulation_warnings;
extern void execve_tail(void);
extern void __bpon(void);
diff --git a/arch/s390/include/asm/qdio.h b/arch/s390/include/asm/qdio.h
index 71e3f01..1e3517b 100644
--- a/arch/s390/include/asm/qdio.h
+++ b/arch/s390/include/asm/qdio.h
@@ -201,7 +201,7 @@ struct slib {
* @scount: SBAL count
* @sflags: whole SBAL flags
* @length: length
- * @addr: address
+ * @addr: absolute data address
*/
struct qdio_buffer_element {
u8 eflags;
@@ -211,7 +211,7 @@ struct qdio_buffer_element {
u8 scount;
u8 sflags;
u32 length;
- void *addr;
+ u64 addr;
} __attribute__ ((packed, aligned(16)));
/**
@@ -227,7 +227,7 @@ struct qdio_buffer {
* @sbal: absolute SBAL address
*/
struct sl_element {
- unsigned long sbal;
+ u64 sbal;
} __attribute__ ((packed));
/**
diff --git a/arch/s390/include/asm/timex.h b/arch/s390/include/asm/timex.h
index 670f14a..6bf3a45 100644
--- a/arch/s390/include/asm/timex.h
+++ b/arch/s390/include/asm/timex.h
@@ -155,7 +155,7 @@ static inline void get_tod_clock_ext(char *clk)
static inline unsigned long long get_tod_clock(void)
{
- unsigned char clk[STORE_CLOCK_EXT_SIZE];
+ char clk[STORE_CLOCK_EXT_SIZE];
get_tod_clock_ext(clk);
return *((unsigned long long *)&clk[1]);
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index d7ff30e..c2e6d4b 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -3268,7 +3268,10 @@ static void kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
/* Initial reset is a superset of the normal reset */
kvm_arch_vcpu_ioctl_normal_reset(vcpu);
- /* this equals initial cpu reset in pop, but we don't switch to ESA */
+ /*
+ * This equals initial cpu reset in pop, but we don't switch to ESA.
+ * We do not only reset the internal data, but also ...
+ */
vcpu->arch.sie_block->gpsw.mask = 0;
vcpu->arch.sie_block->gpsw.addr = 0;
kvm_s390_set_prefix(vcpu, 0);
@@ -3278,6 +3281,19 @@ static void kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
memset(vcpu->arch.sie_block->gcr, 0, sizeof(vcpu->arch.sie_block->gcr));
vcpu->arch.sie_block->gcr[0] = CR0_INITIAL_MASK;
vcpu->arch.sie_block->gcr[14] = CR14_INITIAL_MASK;
+
+ /* ... the data in sync regs */
+ memset(vcpu->run->s.regs.crs, 0, sizeof(vcpu->run->s.regs.crs));
+ vcpu->run->s.regs.ckc = 0;
+ vcpu->run->s.regs.crs[0] = CR0_INITIAL_MASK;
+ vcpu->run->s.regs.crs[14] = CR14_INITIAL_MASK;
+ vcpu->run->psw_addr = 0;
+ vcpu->run->psw_mask = 0;
+ vcpu->run->s.regs.todpr = 0;
+ vcpu->run->s.regs.cputm = 0;
+ vcpu->run->s.regs.ckc = 0;
+ vcpu->run->s.regs.pp = 0;
+ vcpu->run->s.regs.gbea = 1;
vcpu->run->s.regs.fpc = 0;
vcpu->arch.sie_block->gbea = 1;
vcpu->arch.sie_block->pp = 0;
diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index bc61ea1..60716d1 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -424,7 +424,7 @@ static void zpci_map_resources(struct pci_dev *pdev)
if (zpci_use_mio(zdev))
pdev->resource[i].start =
- (resource_size_t __force) zdev->bars[i].mio_wb;
+ (resource_size_t __force) zdev->bars[i].mio_wt;
else
pdev->resource[i].start = (resource_size_t __force)
pci_iomap_range_fh(pdev, i, 0, 0);
@@ -531,7 +531,7 @@ static int zpci_setup_bus_resources(struct zpci_dev *zdev,
flags |= IORESOURCE_MEM_64;
if (zpci_use_mio(zdev))
- addr = (unsigned long) zdev->bars[i].mio_wb;
+ addr = (unsigned long) zdev->bars[i].mio_wt;
else
addr = ZPCI_ADDR(entry);
size = 1UL << zdev->bars[i].size;
diff --git a/arch/unicore32/include/asm/io.h b/arch/unicore32/include/asm/io.h
index 3ca74e1..bd4e7c3 100644
--- a/arch/unicore32/include/asm/io.h
+++ b/arch/unicore32/include/asm/io.h
@@ -27,7 +27,7 @@ extern void __uc32_iounmap(volatile void __iomem *addr);
* ioremap and friends.
*
* ioremap takes a PCI memory address, as specified in
- * Documentation/io-mapping.txt.
+ * Documentation/driver-api/io-mapping.rst.
*
*/
#define ioremap(cookie, size) __uc32_ioremap(cookie, size)
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 94df086..513a555 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -194,9 +194,10 @@
avx512_instr :=$(call as-instr,vpmovm2b %k1$(comma)%zmm5,-DCONFIG_AS_AVX512=1)
sha1_ni_instr :=$(call as-instr,sha1msg1 %xmm0$(comma)%xmm1,-DCONFIG_AS_SHA1_NI=1)
sha256_ni_instr :=$(call as-instr,sha256msg1 %xmm0$(comma)%xmm1,-DCONFIG_AS_SHA256_NI=1)
+adx_instr := $(call as-instr,adox %r10$(comma)%r10,-DCONFIG_AS_ADX=1)
-KBUILD_AFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) $(asinstr) $(avx_instr) $(avx2_instr) $(avx512_instr) $(sha1_ni_instr) $(sha256_ni_instr)
-KBUILD_CFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) $(asinstr) $(avx_instr) $(avx2_instr) $(avx512_instr) $(sha1_ni_instr) $(sha256_ni_instr)
+KBUILD_AFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) $(asinstr) $(avx_instr) $(avx2_instr) $(avx512_instr) $(sha1_ni_instr) $(sha256_ni_instr) $(adx_instr)
+KBUILD_CFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) $(asinstr) $(avx_instr) $(avx2_instr) $(avx512_instr) $(sha1_ni_instr) $(sha256_ni_instr) $(adx_instr)
KBUILD_LDFLAGS := -m elf_$(UTS_MACHINE)
diff --git a/arch/x86/boot/compressed/kaslr_64.c b/arch/x86/boot/compressed/kaslr_64.c
index 748456c..9557c5a 100644
--- a/arch/x86/boot/compressed/kaslr_64.c
+++ b/arch/x86/boot/compressed/kaslr_64.c
@@ -29,9 +29,6 @@
#define __PAGE_OFFSET __PAGE_OFFSET_BASE
#include "../../mm/ident_map.c"
-/* Used by pgtable.h asm code to force instruction serialization. */
-unsigned long __force_order;
-
/* Used to track our page table allocation area. */
struct alloc_pgt_data {
unsigned char *pgt_buf;
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
index b69e00b..8c2e9ea 100644
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile
@@ -11,6 +11,7 @@
avx512_supported :=$(call as-instr,vpmovm2b %k1$(comma)%zmm5,yes,no)
sha1_ni_supported :=$(call as-instr,sha1msg1 %xmm0$(comma)%xmm1,yes,no)
sha256_ni_supported :=$(call as-instr,sha256msg1 %xmm0$(comma)%xmm1,yes,no)
+adx_supported := $(call as-instr,adox %r10$(comma)%r10,yes,no)
obj-$(CONFIG_CRYPTO_GLUE_HELPER_X86) += glue_helper.o
@@ -39,7 +40,11 @@
obj-$(CONFIG_CRYPTO_NHPOLY1305_SSE2) += nhpoly1305-sse2.o
obj-$(CONFIG_CRYPTO_NHPOLY1305_AVX2) += nhpoly1305-avx2.o
-obj-$(CONFIG_CRYPTO_CURVE25519_X86) += curve25519-x86_64.o
+
+# These modules require the assembler to support ADX.
+ifeq ($(adx_supported),yes)
+ obj-$(CONFIG_CRYPTO_CURVE25519_X86) += curve25519-x86_64.o
+endif
# These modules require assembler to support AVX.
ifeq ($(avx_supported),yes)
diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c
index 1f22b6b..39eb276 100644
--- a/arch/x86/events/amd/core.c
+++ b/arch/x86/events/amd/core.c
@@ -250,6 +250,7 @@ static const u64 amd_f17h_perfmon_event_map[PERF_COUNT_HW_MAX] =
[PERF_COUNT_HW_CPU_CYCLES] = 0x0076,
[PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
[PERF_COUNT_HW_CACHE_REFERENCES] = 0xff60,
+ [PERF_COUNT_HW_CACHE_MISSES] = 0x0964,
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c2,
[PERF_COUNT_HW_BRANCH_MISSES] = 0x00c3,
[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x0287,
diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c
index a6ea07f..4d867a7 100644
--- a/arch/x86/events/amd/uncore.c
+++ b/arch/x86/events/amd/uncore.c
@@ -190,15 +190,12 @@ static int amd_uncore_event_init(struct perf_event *event)
/*
* NB and Last level cache counters (MSRs) are shared across all cores
- * that share the same NB / Last level cache. Interrupts can be directed
- * to a single target core, however, event counts generated by processes
- * running on other cores cannot be masked out. So we do not support
- * sampling and per-thread events.
+ * that share the same NB / Last level cache. On family 16h and below,
+ * Interrupts can be directed to a single target core, however, event
+ * counts generated by processes running on other cores cannot be masked
+ * out. So we do not support sampling and per-thread events via
+ * CAP_NO_INTERRUPT, and we do not enable counter overflow interrupts:
*/
- if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK)
- return -EINVAL;
-
- /* and we do not enable counter overflow interrupts */
hwc->config = event->attr.config & AMD64_RAW_EVENT_MASK_NB;
hwc->idx = -1;
@@ -306,7 +303,7 @@ static struct pmu amd_nb_pmu = {
.start = amd_uncore_start,
.stop = amd_uncore_stop,
.read = amd_uncore_read,
- .capabilities = PERF_PMU_CAP_NO_EXCLUDE,
+ .capabilities = PERF_PMU_CAP_NO_EXCLUDE | PERF_PMU_CAP_NO_INTERRUPT,
};
static struct pmu amd_llc_pmu = {
@@ -317,7 +314,7 @@ static struct pmu amd_llc_pmu = {
.start = amd_uncore_start,
.stop = amd_uncore_stop,
.read = amd_uncore_read,
- .capabilities = PERF_PMU_CAP_NO_EXCLUDE,
+ .capabilities = PERF_PMU_CAP_NO_EXCLUDE | PERF_PMU_CAP_NO_INTERRUPT,
};
static struct amd_uncore *amd_uncore_alloc(unsigned int cpu)
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 3be51aa..dff6623 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -4765,6 +4765,7 @@ __init int intel_pmu_init(void)
break;
case INTEL_FAM6_ATOM_TREMONT_D:
+ case INTEL_FAM6_ATOM_TREMONT:
x86_pmu.late_ack = true;
memcpy(hw_cache_event_ids, glp_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c
index e1daf41..4814c96 100644
--- a/arch/x86/events/intel/cstate.c
+++ b/arch/x86/events/intel/cstate.c
@@ -40,17 +40,18 @@
* Model specific counters:
* MSR_CORE_C1_RES: CORE C1 Residency Counter
* perf code: 0x00
- * Available model: SLM,AMT,GLM,CNL
+ * Available model: SLM,AMT,GLM,CNL,TNT
* Scope: Core (each processor core has a MSR)
* MSR_CORE_C3_RESIDENCY: CORE C3 Residency Counter
* perf code: 0x01
* Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL,GLM,
- * CNL,KBL,CML
+ * CNL,KBL,CML,TNT
* Scope: Core
* MSR_CORE_C6_RESIDENCY: CORE C6 Residency Counter
* perf code: 0x02
* Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW,
- * SKL,KNL,GLM,CNL,KBL,CML,ICL,TGL
+ * SKL,KNL,GLM,CNL,KBL,CML,ICL,TGL,
+ * TNT
* Scope: Core
* MSR_CORE_C7_RESIDENCY: CORE C7 Residency Counter
* perf code: 0x03
@@ -60,17 +61,18 @@
* MSR_PKG_C2_RESIDENCY: Package C2 Residency Counter.
* perf code: 0x00
* Available model: SNB,IVB,HSW,BDW,SKL,KNL,GLM,CNL,
- * KBL,CML,ICL,TGL
+ * KBL,CML,ICL,TGL,TNT
* Scope: Package (physical package)
* MSR_PKG_C3_RESIDENCY: Package C3 Residency Counter.
* perf code: 0x01
* Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL,KNL,
- * GLM,CNL,KBL,CML,ICL,TGL
+ * GLM,CNL,KBL,CML,ICL,TGL,TNT
* Scope: Package (physical package)
* MSR_PKG_C6_RESIDENCY: Package C6 Residency Counter.
* perf code: 0x02
- * Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW
- * SKL,KNL,GLM,CNL,KBL,CML,ICL,TGL
+ * Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW,
+ * SKL,KNL,GLM,CNL,KBL,CML,ICL,TGL,
+ * TNT
* Scope: Package (physical package)
* MSR_PKG_C7_RESIDENCY: Package C7 Residency Counter.
* perf code: 0x03
@@ -87,7 +89,8 @@
* Scope: Package (physical package)
* MSR_PKG_C10_RESIDENCY: Package C10 Residency Counter.
* perf code: 0x06
- * Available model: HSW ULT,KBL,GLM,CNL,CML,ICL,TGL
+ * Available model: HSW ULT,KBL,GLM,CNL,CML,ICL,TGL,
+ * TNT
* Scope: Package (physical package)
*
*/
@@ -640,8 +643,9 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = {
X86_CSTATES_MODEL(INTEL_FAM6_ATOM_GOLDMONT, glm_cstates),
X86_CSTATES_MODEL(INTEL_FAM6_ATOM_GOLDMONT_D, glm_cstates),
-
X86_CSTATES_MODEL(INTEL_FAM6_ATOM_GOLDMONT_PLUS, glm_cstates),
+ X86_CSTATES_MODEL(INTEL_FAM6_ATOM_TREMONT_D, glm_cstates),
+ X86_CSTATES_MODEL(INTEL_FAM6_ATOM_TREMONT, glm_cstates),
X86_CSTATES_MODEL(INTEL_FAM6_ICELAKE_L, icl_cstates),
X86_CSTATES_MODEL(INTEL_FAM6_ICELAKE, icl_cstates),
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index 4b94ae4..dc43cc1 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -1714,6 +1714,8 @@ intel_pmu_save_and_restart_reload(struct perf_event *event, int count)
old = ((s64)(prev_raw_count << shift) >> shift);
local64_add(new - old + count * period, &event->count);
+ local64_set(&hwc->period_left, -new);
+
perf_event_update_userpage(event);
return 0;
diff --git a/arch/x86/events/msr.c b/arch/x86/events/msr.c
index 6f86650..a949f6f 100644
--- a/arch/x86/events/msr.c
+++ b/arch/x86/events/msr.c
@@ -75,8 +75,9 @@ static bool test_intel(int idx, void *data)
case INTEL_FAM6_ATOM_GOLDMONT:
case INTEL_FAM6_ATOM_GOLDMONT_D:
-
case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
+ case INTEL_FAM6_ATOM_TREMONT_D:
+ case INTEL_FAM6_ATOM_TREMONT:
case INTEL_FAM6_XEON_PHI_KNL:
case INTEL_FAM6_XEON_PHI_KNM:
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index f3327cb..4b263ff 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -299,6 +299,7 @@
#define X86_FEATURE_AMD_IBRS (13*32+14) /* "" Indirect Branch Restricted Speculation */
#define X86_FEATURE_AMD_STIBP (13*32+15) /* "" Single Thread Indirect Branch Predictors */
#define X86_FEATURE_AMD_STIBP_ALWAYS_ON (13*32+17) /* "" Single Thread Indirect Branch Predictors always-on preferred */
+#define X86_FEATURE_AMD_PPIN (13*32+23) /* Protected Processor Inventory Number */
#define X86_FEATURE_AMD_SSBD (13*32+24) /* "" Speculative Store Bypass Disable */
#define X86_FEATURE_VIRT_SSBD (13*32+25) /* Virtualized Speculative Store Bypass Disable */
#define X86_FEATURE_AMD_SSB_NO (13*32+26) /* "" Speculative Store Bypass is fixed in hardware. */
diff --git a/arch/x86/include/asm/io_bitmap.h b/arch/x86/include/asm/io_bitmap.h
index 02c6ef8..07344d8 100644
--- a/arch/x86/include/asm/io_bitmap.h
+++ b/arch/x86/include/asm/io_bitmap.h
@@ -19,7 +19,14 @@ struct task_struct;
void io_bitmap_share(struct task_struct *tsk);
void io_bitmap_exit(void);
-void tss_update_io_bitmap(void);
+void native_tss_update_io_bitmap(void);
+
+#ifdef CONFIG_PARAVIRT_XXL
+#include <asm/paravirt.h>
+#else
+#define tss_update_io_bitmap native_tss_update_io_bitmap
+#endif
+
#else
static inline void io_bitmap_share(struct task_struct *tsk) { }
static inline void io_bitmap_exit(void) { }
diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h
index 03946eb..c06e8353 100644
--- a/arch/x86/include/asm/kvm_emulate.h
+++ b/arch/x86/include/asm/kvm_emulate.h
@@ -292,6 +292,14 @@ enum x86emul_mode {
#define X86EMUL_SMM_MASK (1 << 6)
#define X86EMUL_SMM_INSIDE_NMI_MASK (1 << 7)
+/*
+ * fastop functions are declared as taking a never-defined fastop parameter,
+ * so they can't be called from C directly.
+ */
+struct fastop;
+
+typedef void (*fastop_t)(struct fastop *);
+
struct x86_emulate_ctxt {
const struct x86_emulate_ops *ops;
@@ -324,7 +332,10 @@ struct x86_emulate_ctxt {
struct operand src;
struct operand src2;
struct operand dst;
- int (*execute)(struct x86_emulate_ctxt *ctxt);
+ union {
+ int (*execute)(struct x86_emulate_ctxt *ctxt);
+ fastop_t fop;
+ };
int (*check_perm)(struct x86_emulate_ctxt *ctxt);
/*
* The following six fields are cleared together,
@@ -349,7 +360,6 @@ struct x86_emulate_ctxt {
u64 d;
unsigned long _eip;
struct operand memop;
- /* Fields above regs are cleared together. */
unsigned long _regs[NR_VCPU_REGS];
struct operand *memopp;
struct fetch_cache fetch;
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 4dffbc1..98959e8 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -781,9 +781,19 @@ struct kvm_vcpu_arch {
u64 msr_kvm_poll_control;
/*
- * Indicate whether the access faults on its page table in guest
- * which is set when fix page fault and used to detect unhandeable
- * instruction.
+ * Indicates the guest is trying to write a gfn that contains one or
+ * more of the PTEs used to translate the write itself, i.e. the access
+ * is changing its own translation in the guest page tables. KVM exits
+ * to userspace if emulation of the faulting instruction fails and this
+ * flag is set, as KVM cannot make forward progress.
+ *
+ * If emulation fails for a write to guest page tables, KVM unprotects
+ * (zaps) the shadow page for the target gfn and resumes the guest to
+ * retry the non-emulatable instruction (on hardware). Unprotecting the
+ * gfn doesn't allow forward progress for a self-changing access because
+ * doing so also zaps the translation for the gfn, i.e. retrying the
+ * instruction will hit a !PRESENT fault, which results in a new shadow
+ * page and sends KVM back to square one.
*/
bool write_fault_to_shadow_pgtable;
@@ -1112,6 +1122,7 @@ struct kvm_x86_ops {
int (*handle_exit)(struct kvm_vcpu *vcpu,
enum exit_fastpath_completion exit_fastpath);
int (*skip_emulated_instruction)(struct kvm_vcpu *vcpu);
+ void (*update_emulated_instruction)(struct kvm_vcpu *vcpu);
void (*set_interrupt_shadow)(struct kvm_vcpu *vcpu, int mask);
u32 (*get_interrupt_shadow)(struct kvm_vcpu *vcpu);
void (*patch_hypercall)(struct kvm_vcpu *vcpu,
@@ -1136,7 +1147,7 @@ struct kvm_x86_ops {
void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
void (*set_virtual_apic_mode)(struct kvm_vcpu *vcpu);
void (*set_apic_access_page_addr)(struct kvm_vcpu *vcpu, hpa_t hpa);
- void (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector);
+ int (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector);
int (*sync_pir_to_irr)(struct kvm_vcpu *vcpu);
int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
int (*set_identity_map_addr)(struct kvm *kvm, u64 ident_addr);
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 4359b95..9d5b099 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -102,7 +102,7 @@
#define MCE_OVERFLOW 0 /* bit 0 in flags means overflow */
-#define MCE_LOG_LEN 32
+#define MCE_LOG_MIN_LEN 32U
#define MCE_LOG_SIGNATURE "MACHINECHECK"
/* AMD Scalable MCA */
@@ -135,11 +135,11 @@
*/
struct mce_log_buffer {
char signature[12]; /* "MACHINECHECK" */
- unsigned len; /* = MCE_LOG_LEN */
+ unsigned len; /* = elements in .mce_entry[] */
unsigned next;
unsigned flags;
unsigned recordlen; /* length of struct mce */
- struct mce entry[MCE_LOG_LEN];
+ struct mce entry[];
};
enum mce_notifier_prios {
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index ebe1685..d5e517d 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -512,6 +512,8 @@
#define MSR_K7_HWCR 0xc0010015
#define MSR_K7_HWCR_SMMLOCK_BIT 0
#define MSR_K7_HWCR_SMMLOCK BIT_ULL(MSR_K7_HWCR_SMMLOCK_BIT)
+#define MSR_K7_HWCR_IRPERF_EN_BIT 30
+#define MSR_K7_HWCR_IRPERF_EN BIT_ULL(MSR_K7_HWCR_IRPERF_EN_BIT)
#define MSR_K7_FID_VID_CTL 0xc0010041
#define MSR_K7_FID_VID_STATUS 0xc0010042
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 86e7317..694d8da 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -295,6 +295,13 @@ static inline void write_idt_entry(gate_desc *dt, int entry, const gate_desc *g)
PVOP_VCALL3(cpu.write_idt_entry, dt, entry, g);
}
+#ifdef CONFIG_X86_IOPL_IOPERM
+static inline void tss_update_io_bitmap(void)
+{
+ PVOP_VCALL0(cpu.update_io_bitmap);
+}
+#endif
+
static inline void paravirt_activate_mm(struct mm_struct *prev,
struct mm_struct *next)
{
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index 8481296..732f62e 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -140,6 +140,10 @@ struct pv_cpu_ops {
void (*load_sp0)(unsigned long sp0);
+#ifdef CONFIG_X86_IOPL_IOPERM
+ void (*update_io_bitmap)(void);
+#endif
+
void (*wbinvd)(void);
/* cpuid emulation, mostly so that caps bits can be disabled */
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index 2a85287..8521af3 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -72,7 +72,7 @@
#define SECONDARY_EXEC_MODE_BASED_EPT_EXEC VMCS_CONTROL_BIT(MODE_BASED_EPT_EXEC)
#define SECONDARY_EXEC_PT_USE_GPA VMCS_CONTROL_BIT(PT_USE_GPA)
#define SECONDARY_EXEC_TSC_SCALING VMCS_CONTROL_BIT(TSC_SCALING)
-#define SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE 0x04000000
+#define SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE VMCS_CONTROL_BIT(USR_WAIT_PAUSE)
#define PIN_BASED_EXT_INTR_MASK VMCS_CONTROL_BIT(INTR_EXITING)
#define PIN_BASED_NMI_EXITING VMCS_CONTROL_BIT(NMI_EXITING)
diff --git a/arch/x86/include/asm/vmxfeatures.h b/arch/x86/include/asm/vmxfeatures.h
index a50e4a0d..9915990 100644
--- a/arch/x86/include/asm/vmxfeatures.h
+++ b/arch/x86/include/asm/vmxfeatures.h
@@ -81,6 +81,7 @@
#define VMX_FEATURE_MODE_BASED_EPT_EXEC ( 2*32+ 22) /* "ept_mode_based_exec" Enable separate EPT EXEC bits for supervisor vs. user */
#define VMX_FEATURE_PT_USE_GPA ( 2*32+ 24) /* "" Processor Trace logs GPAs */
#define VMX_FEATURE_TSC_SCALING ( 2*32+ 25) /* Scale hardware TSC when read in guest */
+#define VMX_FEATURE_USR_WAIT_PAUSE ( 2*32+ 26) /* Enable TPAUSE, UMONITOR, UMWAIT in guest */
#define VMX_FEATURE_ENCLV_EXITING ( 2*32+ 28) /* "" VM-Exit on ENCLV (leaf dependent) */
#endif /* _ASM_X86_VMXFEATURES_H */
diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
index 503d3f4..3f3f780 100644
--- a/arch/x86/include/uapi/asm/kvm.h
+++ b/arch/x86/include/uapi/asm/kvm.h
@@ -390,6 +390,7 @@ struct kvm_sync_regs {
#define KVM_STATE_NESTED_GUEST_MODE 0x00000001
#define KVM_STATE_NESTED_RUN_PENDING 0x00000002
#define KVM_STATE_NESTED_EVMCS 0x00000004
+#define KVM_STATE_NESTED_MTF_PENDING 0x00000008
#define KVM_STATE_NESTED_SMM_GUEST_MODE 0x00000001
#define KVM_STATE_NESTED_SMM_VMXON 0x00000002
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index 2c5676b..48293d1 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -838,13 +838,15 @@ static void free_moved_vector(struct apic_chip_data *apicd)
bool managed = apicd->is_managed;
/*
- * This should never happen. Managed interrupts are not
- * migrated except on CPU down, which does not involve the
- * cleanup vector. But try to keep the accounting correct
- * nevertheless.
+ * Managed interrupts are usually not migrated away
+ * from an online CPU, but CPU isolation 'managed_irq'
+ * can make that happen.
+ * 1) Activation does not take the isolation into account
+ * to keep the code simple
+ * 2) Migration away from an isolated CPU can happen when
+ * a non-isolated CPU which is in the calculated
+ * affinity mask comes online.
*/
- WARN_ON_ONCE(managed);
-
trace_vector_free_moved(apicd->irq, cpu, vector, managed);
irq_matrix_free(vector_matrix, cpu, vector, managed);
per_cpu(vector_irq, cpu)[vector] = VECTOR_UNUSED;
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index ac83a0f..aef06c3 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -28,6 +28,7 @@
static const int amd_erratum_383[];
static const int amd_erratum_400[];
+static const int amd_erratum_1054[];
static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum);
/*
@@ -393,6 +394,35 @@ static void amd_detect_cmp(struct cpuinfo_x86 *c)
per_cpu(cpu_llc_id, cpu) = c->phys_proc_id;
}
+static void amd_detect_ppin(struct cpuinfo_x86 *c)
+{
+ unsigned long long val;
+
+ if (!cpu_has(c, X86_FEATURE_AMD_PPIN))
+ return;
+
+ /* When PPIN is defined in CPUID, still need to check PPIN_CTL MSR */
+ if (rdmsrl_safe(MSR_AMD_PPIN_CTL, &val))
+ goto clear_ppin;
+
+ /* PPIN is locked in disabled mode, clear feature bit */
+ if ((val & 3UL) == 1UL)
+ goto clear_ppin;
+
+ /* If PPIN is disabled, try to enable it */
+ if (!(val & 2UL)) {
+ wrmsrl_safe(MSR_AMD_PPIN_CTL, val | 2UL);
+ rdmsrl_safe(MSR_AMD_PPIN_CTL, &val);
+ }
+
+ /* If PPIN_EN bit is 1, return from here; otherwise fall through */
+ if (val & 2UL)
+ return;
+
+clear_ppin:
+ clear_cpu_cap(c, X86_FEATURE_AMD_PPIN);
+}
+
u16 amd_get_nb_id(int cpu)
{
return per_cpu(cpu_llc_id, cpu);
@@ -940,6 +970,7 @@ static void init_amd(struct cpuinfo_x86 *c)
amd_detect_cmp(c);
amd_get_topology(c);
srat_detect_node(c);
+ amd_detect_ppin(c);
init_amd_cacheinfo(c);
@@ -972,6 +1003,15 @@ static void init_amd(struct cpuinfo_x86 *c)
/* AMD CPUs don't reset SS attributes on SYSRET, Xen does. */
if (!cpu_has(c, X86_FEATURE_XENPV))
set_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS);
+
+ /*
+ * Turn on the Instructions Retired free counter on machines not
+ * susceptible to erratum #1054 "Instructions Retired Performance
+ * Counter May Be Inaccurate".
+ */
+ if (cpu_has(c, X86_FEATURE_IRPERF) &&
+ !cpu_has_amd_erratum(c, amd_erratum_1054))
+ msr_set_bit(MSR_K7_HWCR, MSR_K7_HWCR_IRPERF_EN_BIT);
}
#ifdef CONFIG_X86_32
@@ -1099,6 +1139,10 @@ static const int amd_erratum_400[] =
static const int amd_erratum_383[] =
AMD_OSVW_ERRATUM(3, AMD_MODEL_RANGE(0x10, 0, 0, 0xff, 0xf));
+/* #1054: Instructions Retired Performance Counter May Be Inaccurate */
+static const int amd_erratum_1054[] =
+ AMD_OSVW_ERRATUM(0, AMD_MODEL_RANGE(0x17, 0, 0, 0x2f, 0xf));
+
static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum)
{
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 52c9bfb..4cdb123 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -445,7 +445,7 @@ static __always_inline void setup_pku(struct cpuinfo_x86 *c)
* cpuid bit to be set. We need to ensure that we
* update that bit in this CPU's "cpu_info".
*/
- get_cpu_cap(c);
+ set_cpu_cap(c, X86_FEATURE_OSPKE);
}
#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c
index b3a50d9..52de616 100644
--- a/arch/x86/kernel/cpu/mce/amd.c
+++ b/arch/x86/kernel/cpu/mce/amd.c
@@ -1163,9 +1163,12 @@ static const struct sysfs_ops threshold_ops = {
.store = store,
};
+static void threshold_block_release(struct kobject *kobj);
+
static struct kobj_type threshold_ktype = {
.sysfs_ops = &threshold_ops,
.default_attrs = default_attrs,
+ .release = threshold_block_release,
};
static const char *get_name(unsigned int bank, struct threshold_block *b)
@@ -1198,8 +1201,9 @@ static const char *get_name(unsigned int bank, struct threshold_block *b)
return buf_mcatype;
}
-static int allocate_threshold_blocks(unsigned int cpu, unsigned int bank,
- unsigned int block, u32 address)
+static int allocate_threshold_blocks(unsigned int cpu, struct threshold_bank *tb,
+ unsigned int bank, unsigned int block,
+ u32 address)
{
struct threshold_block *b = NULL;
u32 low, high;
@@ -1243,16 +1247,12 @@ static int allocate_threshold_blocks(unsigned int cpu, unsigned int bank,
INIT_LIST_HEAD(&b->miscj);
- if (per_cpu(threshold_banks, cpu)[bank]->blocks) {
- list_add(&b->miscj,
- &per_cpu(threshold_banks, cpu)[bank]->blocks->miscj);
- } else {
- per_cpu(threshold_banks, cpu)[bank]->blocks = b;
- }
+ if (tb->blocks)
+ list_add(&b->miscj, &tb->blocks->miscj);
+ else
+ tb->blocks = b;
- err = kobject_init_and_add(&b->kobj, &threshold_ktype,
- per_cpu(threshold_banks, cpu)[bank]->kobj,
- get_name(bank, b));
+ err = kobject_init_and_add(&b->kobj, &threshold_ktype, tb->kobj, get_name(bank, b));
if (err)
goto out_free;
recurse:
@@ -1260,7 +1260,7 @@ static int allocate_threshold_blocks(unsigned int cpu, unsigned int bank,
if (!address)
return 0;
- err = allocate_threshold_blocks(cpu, bank, block, address);
+ err = allocate_threshold_blocks(cpu, tb, bank, block, address);
if (err)
goto out_free;
@@ -1345,8 +1345,6 @@ static int threshold_create_bank(unsigned int cpu, unsigned int bank)
goto out_free;
}
- per_cpu(threshold_banks, cpu)[bank] = b;
-
if (is_shared_bank(bank)) {
refcount_set(&b->cpus, 1);
@@ -1357,9 +1355,13 @@ static int threshold_create_bank(unsigned int cpu, unsigned int bank)
}
}
- err = allocate_threshold_blocks(cpu, bank, 0, msr_ops.misc(bank));
- if (!err)
- goto out;
+ err = allocate_threshold_blocks(cpu, b, bank, 0, msr_ops.misc(bank));
+ if (err)
+ goto out_free;
+
+ per_cpu(threshold_banks, cpu)[bank] = b;
+
+ return 0;
out_free:
kfree(b);
@@ -1368,8 +1370,12 @@ static int threshold_create_bank(unsigned int cpu, unsigned int bank)
return err;
}
-static void deallocate_threshold_block(unsigned int cpu,
- unsigned int bank)
+static void threshold_block_release(struct kobject *kobj)
+{
+ kfree(to_block(kobj));
+}
+
+static void deallocate_threshold_block(unsigned int cpu, unsigned int bank)
{
struct threshold_block *pos = NULL;
struct threshold_block *tmp = NULL;
@@ -1379,13 +1385,11 @@ static void deallocate_threshold_block(unsigned int cpu,
return;
list_for_each_entry_safe(pos, tmp, &head->blocks->miscj, miscj) {
- kobject_put(&pos->kobj);
list_del(&pos->miscj);
- kfree(pos);
+ kobject_put(&pos->kobj);
}
- kfree(per_cpu(threshold_banks, cpu)[bank]->blocks);
- per_cpu(threshold_banks, cpu)[bank]->blocks = NULL;
+ kobject_put(&head->blocks->kobj);
}
static void __threshold_remove_blocks(struct threshold_bank *b)
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 2c4f949..dd06fce 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -142,6 +142,8 @@ void mce_setup(struct mce *m)
if (this_cpu_has(X86_FEATURE_INTEL_PPIN))
rdmsrl(MSR_PPIN, m->ppin);
+ else if (this_cpu_has(X86_FEATURE_AMD_PPIN))
+ rdmsrl(MSR_AMD_PPIN, m->ppin);
m->microcode = boot_cpu_data.microcode;
}
@@ -1877,6 +1879,8 @@ bool filter_mce(struct mce *m)
{
if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
return amd_filter_mce(m);
+ if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
+ return intel_filter_mce(m);
return false;
}
diff --git a/arch/x86/kernel/cpu/mce/dev-mcelog.c b/arch/x86/kernel/cpu/mce/dev-mcelog.c
index 7c8958d..d089567 100644
--- a/arch/x86/kernel/cpu/mce/dev-mcelog.c
+++ b/arch/x86/kernel/cpu/mce/dev-mcelog.c
@@ -29,11 +29,7 @@ static char *mce_helper_argv[2] = { mce_helper, NULL };
* separate MCEs from kernel messages to avoid bogus bug reports.
*/
-static struct mce_log_buffer mcelog = {
- .signature = MCE_LOG_SIGNATURE,
- .len = MCE_LOG_LEN,
- .recordlen = sizeof(struct mce),
-};
+static struct mce_log_buffer *mcelog;
static DECLARE_WAIT_QUEUE_HEAD(mce_chrdev_wait);
@@ -45,21 +41,21 @@ static int dev_mce_log(struct notifier_block *nb, unsigned long val,
mutex_lock(&mce_chrdev_read_mutex);
- entry = mcelog.next;
+ entry = mcelog->next;
/*
* When the buffer fills up discard new entries. Assume that the
* earlier errors are the more interesting ones:
*/
- if (entry >= MCE_LOG_LEN) {
- set_bit(MCE_OVERFLOW, (unsigned long *)&mcelog.flags);
+ if (entry >= mcelog->len) {
+ set_bit(MCE_OVERFLOW, (unsigned long *)&mcelog->flags);
goto unlock;
}
- mcelog.next = entry + 1;
+ mcelog->next = entry + 1;
- memcpy(mcelog.entry + entry, mce, sizeof(struct mce));
- mcelog.entry[entry].finished = 1;
+ memcpy(mcelog->entry + entry, mce, sizeof(struct mce));
+ mcelog->entry[entry].finished = 1;
/* wake processes polling /dev/mcelog */
wake_up_interruptible(&mce_chrdev_wait);
@@ -214,21 +210,21 @@ static ssize_t mce_chrdev_read(struct file *filp, char __user *ubuf,
/* Only supports full reads right now */
err = -EINVAL;
- if (*off != 0 || usize < MCE_LOG_LEN*sizeof(struct mce))
+ if (*off != 0 || usize < mcelog->len * sizeof(struct mce))
goto out;
- next = mcelog.next;
+ next = mcelog->next;
err = 0;
for (i = 0; i < next; i++) {
- struct mce *m = &mcelog.entry[i];
+ struct mce *m = &mcelog->entry[i];
err |= copy_to_user(buf, m, sizeof(*m));
buf += sizeof(*m);
}
- memset(mcelog.entry, 0, next * sizeof(struct mce));
- mcelog.next = 0;
+ memset(mcelog->entry, 0, next * sizeof(struct mce));
+ mcelog->next = 0;
if (err)
err = -EFAULT;
@@ -242,7 +238,7 @@ static ssize_t mce_chrdev_read(struct file *filp, char __user *ubuf,
static __poll_t mce_chrdev_poll(struct file *file, poll_table *wait)
{
poll_wait(file, &mce_chrdev_wait, wait);
- if (READ_ONCE(mcelog.next))
+ if (READ_ONCE(mcelog->next))
return EPOLLIN | EPOLLRDNORM;
if (!mce_apei_read_done && apei_check_mce())
return EPOLLIN | EPOLLRDNORM;
@@ -261,13 +257,13 @@ static long mce_chrdev_ioctl(struct file *f, unsigned int cmd,
case MCE_GET_RECORD_LEN:
return put_user(sizeof(struct mce), p);
case MCE_GET_LOG_LEN:
- return put_user(MCE_LOG_LEN, p);
+ return put_user(mcelog->len, p);
case MCE_GETCLEAR_FLAGS: {
unsigned flags;
do {
- flags = mcelog.flags;
- } while (cmpxchg(&mcelog.flags, flags, 0) != flags);
+ flags = mcelog->flags;
+ } while (cmpxchg(&mcelog->flags, flags, 0) != flags);
return put_user(flags, p);
}
@@ -339,8 +335,18 @@ static struct miscdevice mce_chrdev_device = {
static __init int dev_mcelog_init_device(void)
{
+ int mce_log_len;
int err;
+ mce_log_len = max(MCE_LOG_MIN_LEN, num_online_cpus());
+ mcelog = kzalloc(sizeof(*mcelog) + mce_log_len * sizeof(struct mce), GFP_KERNEL);
+ if (!mcelog)
+ return -ENOMEM;
+
+ strncpy(mcelog->signature, MCE_LOG_SIGNATURE, sizeof(mcelog->signature));
+ mcelog->len = mce_log_len;
+ mcelog->recordlen = sizeof(struct mce);
+
/* register character device /dev/mcelog */
err = misc_register(&mce_chrdev_device);
if (err) {
@@ -350,6 +356,7 @@ static __init int dev_mcelog_init_device(void)
else
pr_err("Unable to init device /dev/mcelog (rc: %d)\n", err);
+ kfree(mcelog);
return err;
}
diff --git a/arch/x86/kernel/cpu/mce/intel.c b/arch/x86/kernel/cpu/mce/intel.c
index 5627b10..d8f9230 100644
--- a/arch/x86/kernel/cpu/mce/intel.c
+++ b/arch/x86/kernel/cpu/mce/intel.c
@@ -493,17 +493,18 @@ static void intel_ppin_init(struct cpuinfo_x86 *c)
return;
if ((val & 3UL) == 1UL) {
- /* PPIN available but disabled: */
+ /* PPIN locked in disabled mode */
return;
}
- /* If PPIN is disabled, but not locked, try to enable: */
- if (!(val & 3UL)) {
+ /* If PPIN is disabled, try to enable */
+ if (!(val & 2UL)) {
wrmsrl_safe(MSR_PPIN_CTL, val | 2UL);
rdmsrl_safe(MSR_PPIN_CTL, &val);
}
- if ((val & 3UL) == 2UL)
+ /* Is the enable bit set? */
+ if (val & 2UL)
set_cpu_cap(c, X86_FEATURE_INTEL_PPIN);
}
}
@@ -520,3 +521,20 @@ void mce_intel_feature_clear(struct cpuinfo_x86 *c)
{
intel_clear_lmce();
}
+
+bool intel_filter_mce(struct mce *m)
+{
+ struct cpuinfo_x86 *c = &boot_cpu_data;
+
+ /* MCE errata HSD131, HSM142, HSW131, BDM48, and HSM142 */
+ if ((c->x86 == 6) &&
+ ((c->x86_model == INTEL_FAM6_HASWELL) ||
+ (c->x86_model == INTEL_FAM6_HASWELL_L) ||
+ (c->x86_model == INTEL_FAM6_BROADWELL) ||
+ (c->x86_model == INTEL_FAM6_HASWELL_G)) &&
+ (m->bank == 0) &&
+ ((m->status & 0xa0000000ffffffff) == 0x80000000000f0005))
+ return true;
+
+ return false;
+}
diff --git a/arch/x86/kernel/cpu/mce/internal.h b/arch/x86/kernel/cpu/mce/internal.h
index b785c0d..97db184 100644
--- a/arch/x86/kernel/cpu/mce/internal.h
+++ b/arch/x86/kernel/cpu/mce/internal.h
@@ -48,6 +48,7 @@ void cmci_disable_bank(int bank);
void intel_init_cmci(void);
void intel_init_lmce(void);
void intel_clear_lmce(void);
+bool intel_filter_mce(struct mce *m);
#else
# define cmci_intel_adjust_timer mce_adjust_timer_default
static inline bool mce_intel_cmci_poll(void) { return false; }
@@ -56,6 +57,7 @@ static inline void cmci_disable_bank(int bank) { }
static inline void intel_init_cmci(void) { }
static inline void intel_init_lmce(void) { }
static inline void intel_clear_lmce(void) { }
+static inline bool intel_filter_mce(struct mce *m) { return false; };
#endif
void mce_timer_kick(unsigned long interval);
diff --git a/arch/x86/kernel/cpu/mce/therm_throt.c b/arch/x86/kernel/cpu/mce/therm_throt.c
index 58b4ee3c..f36dc07 100644
--- a/arch/x86/kernel/cpu/mce/therm_throt.c
+++ b/arch/x86/kernel/cpu/mce/therm_throt.c
@@ -486,9 +486,14 @@ static int thermal_throttle_offline(unsigned int cpu)
{
struct thermal_state *state = &per_cpu(thermal_state, cpu);
struct device *dev = get_cpu_device(cpu);
+ u32 l;
- cancel_delayed_work(&state->package_throttle.therm_work);
- cancel_delayed_work(&state->core_throttle.therm_work);
+ /* Mask the thermal vector before draining evtl. pending work */
+ l = apic_read(APIC_LVTTHMR);
+ apic_write(APIC_LVTTHMR, l | APIC_LVT_MASKED);
+
+ cancel_delayed_work_sync(&state->package_throttle.therm_work);
+ cancel_delayed_work_sync(&state->core_throttle.therm_work);
state->package_throttle.rate_control_active = false;
state->core_throttle.rate_control_active = false;
diff --git a/arch/x86/kernel/ima_arch.c b/arch/x86/kernel/ima_arch.c
index 4d4f5d9..2305490 100644
--- a/arch/x86/kernel/ima_arch.c
+++ b/arch/x86/kernel/ima_arch.c
@@ -10,8 +10,6 @@ extern struct boot_params boot_params;
static enum efi_secureboot_mode get_sb_mode(void)
{
- efi_char16_t efi_SecureBoot_name[] = L"SecureBoot";
- efi_char16_t efi_SetupMode_name[] = L"SecureBoot";
efi_guid_t efi_variable_guid = EFI_GLOBAL_VARIABLE_GUID;
efi_status_t status;
unsigned long size;
@@ -25,7 +23,7 @@ static enum efi_secureboot_mode get_sb_mode(void)
}
/* Get variable contents into buffer */
- status = efi.get_variable(efi_SecureBoot_name, &efi_variable_guid,
+ status = efi.get_variable(L"SecureBoot", &efi_variable_guid,
NULL, &size, &secboot);
if (status == EFI_NOT_FOUND) {
pr_info("ima: secureboot mode disabled\n");
@@ -38,7 +36,7 @@ static enum efi_secureboot_mode get_sb_mode(void)
}
size = sizeof(setupmode);
- status = efi.get_variable(efi_SetupMode_name, &efi_variable_guid,
+ status = efi.get_variable(L"SetupMode", &efi_variable_guid,
NULL, &size, &setupmode);
if (status != EFI_SUCCESS) /* ignore unknown SetupMode */
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index d817f25..6efe041 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -425,7 +425,29 @@ static void __init sev_map_percpu_data(void)
}
}
+static bool pv_tlb_flush_supported(void)
+{
+ return (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH) &&
+ !kvm_para_has_hint(KVM_HINTS_REALTIME) &&
+ kvm_para_has_feature(KVM_FEATURE_STEAL_TIME));
+}
+
+static DEFINE_PER_CPU(cpumask_var_t, __pv_cpu_mask);
+
#ifdef CONFIG_SMP
+
+static bool pv_ipi_supported(void)
+{
+ return kvm_para_has_feature(KVM_FEATURE_PV_SEND_IPI);
+}
+
+static bool pv_sched_yield_supported(void)
+{
+ return (kvm_para_has_feature(KVM_FEATURE_PV_SCHED_YIELD) &&
+ !kvm_para_has_hint(KVM_HINTS_REALTIME) &&
+ kvm_para_has_feature(KVM_FEATURE_STEAL_TIME));
+}
+
#define KVM_IPI_CLUSTER_SIZE (2 * BITS_PER_LONG)
static void __send_ipi_mask(const struct cpumask *mask, int vector)
@@ -490,12 +512,12 @@ static void kvm_send_ipi_mask(const struct cpumask *mask, int vector)
static void kvm_send_ipi_mask_allbutself(const struct cpumask *mask, int vector)
{
unsigned int this_cpu = smp_processor_id();
- struct cpumask new_mask;
+ struct cpumask *new_mask = this_cpu_cpumask_var_ptr(__pv_cpu_mask);
const struct cpumask *local_mask;
- cpumask_copy(&new_mask, mask);
- cpumask_clear_cpu(this_cpu, &new_mask);
- local_mask = &new_mask;
+ cpumask_copy(new_mask, mask);
+ cpumask_clear_cpu(this_cpu, new_mask);
+ local_mask = new_mask;
__send_ipi_mask(local_mask, vector);
}
@@ -575,7 +597,6 @@ static void __init kvm_apf_trap_init(void)
update_intr_gate(X86_TRAP_PF, async_page_fault);
}
-static DEFINE_PER_CPU(cpumask_var_t, __pv_tlb_mask);
static void kvm_flush_tlb_others(const struct cpumask *cpumask,
const struct flush_tlb_info *info)
@@ -583,7 +604,7 @@ static void kvm_flush_tlb_others(const struct cpumask *cpumask,
u8 state;
int cpu;
struct kvm_steal_time *src;
- struct cpumask *flushmask = this_cpu_cpumask_var_ptr(__pv_tlb_mask);
+ struct cpumask *flushmask = this_cpu_cpumask_var_ptr(__pv_cpu_mask);
cpumask_copy(flushmask, cpumask);
/*
@@ -619,11 +640,10 @@ static void __init kvm_guest_init(void)
pv_ops.time.steal_clock = kvm_steal_clock;
}
- if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH) &&
- !kvm_para_has_hint(KVM_HINTS_REALTIME) &&
- kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) {
+ if (pv_tlb_flush_supported()) {
pv_ops.mmu.flush_tlb_others = kvm_flush_tlb_others;
pv_ops.mmu.tlb_remove_table = tlb_remove_table;
+ pr_info("KVM setup pv remote TLB flush\n");
}
if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
@@ -632,9 +652,7 @@ static void __init kvm_guest_init(void)
#ifdef CONFIG_SMP
smp_ops.smp_prepare_cpus = kvm_smp_prepare_cpus;
smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu;
- if (kvm_para_has_feature(KVM_FEATURE_PV_SCHED_YIELD) &&
- !kvm_para_has_hint(KVM_HINTS_REALTIME) &&
- kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) {
+ if (pv_sched_yield_supported()) {
smp_ops.send_call_func_ipi = kvm_smp_send_call_func_ipi;
pr_info("KVM setup pv sched yield\n");
}
@@ -700,7 +718,7 @@ static uint32_t __init kvm_detect(void)
static void __init kvm_apic_init(void)
{
#if defined(CONFIG_SMP)
- if (kvm_para_has_feature(KVM_FEATURE_PV_SEND_IPI))
+ if (pv_ipi_supported())
kvm_setup_pv_ipi();
#endif
}
@@ -732,26 +750,31 @@ static __init int activate_jump_labels(void)
}
arch_initcall(activate_jump_labels);
-static __init int kvm_setup_pv_tlb_flush(void)
+static __init int kvm_alloc_cpumask(void)
{
int cpu;
+ bool alloc = false;
if (!kvm_para_available() || nopv)
return 0;
- if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH) &&
- !kvm_para_has_hint(KVM_HINTS_REALTIME) &&
- kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) {
+ if (pv_tlb_flush_supported())
+ alloc = true;
+
+#if defined(CONFIG_SMP)
+ if (pv_ipi_supported())
+ alloc = true;
+#endif
+
+ if (alloc)
for_each_possible_cpu(cpu) {
- zalloc_cpumask_var_node(per_cpu_ptr(&__pv_tlb_mask, cpu),
+ zalloc_cpumask_var_node(per_cpu_ptr(&__pv_cpu_mask, cpu),
GFP_KERNEL, cpu_to_node(cpu));
}
- pr_info("KVM setup pv remote TLB flush\n");
- }
return 0;
}
-arch_initcall(kvm_setup_pv_tlb_flush);
+arch_initcall(kvm_alloc_cpumask);
#ifdef CONFIG_PARAVIRT_SPINLOCKS
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 789f5e4..c131ba4 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -30,6 +30,7 @@
#include <asm/timer.h>
#include <asm/special_insns.h>
#include <asm/tlb.h>
+#include <asm/io_bitmap.h>
/*
* nop stub, which must not clobber anything *including the stack* to
@@ -341,6 +342,10 @@ struct paravirt_patch_template pv_ops = {
.cpu.iret = native_iret,
.cpu.swapgs = native_swapgs,
+#ifdef CONFIG_X86_IOPL_IOPERM
+ .cpu.update_io_bitmap = native_tss_update_io_bitmap,
+#endif
+
.cpu.start_context_switch = paravirt_nop,
.cpu.end_context_switch = paravirt_nop,
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 839b524..3053c85 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -374,7 +374,7 @@ static void tss_copy_io_bitmap(struct tss_struct *tss, struct io_bitmap *iobm)
/**
* tss_update_io_bitmap - Update I/O bitmap before exiting to usermode
*/
-void tss_update_io_bitmap(void)
+void native_tss_update_io_bitmap(void)
{
struct tss_struct *tss = this_cpu_ptr(&cpu_tss_rw);
struct thread_struct *t = ¤t->thread;
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index 991019d..9fea075 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -59,6 +59,19 @@
If unsure, say N.
+config KVM_WERROR
+ bool "Compile KVM with -Werror"
+ # KASAN may cause the build to fail due to larger frames
+ default y if X86_64 && !KASAN
+ # We use the dependency on !COMPILE_TEST to not be enabled
+ # blindly in allmodconfig or allyesconfig configurations
+ depends on (X86_64 && !KASAN) || !COMPILE_TEST
+ depends on EXPERT
+ help
+ Add -Werror to the build flags for KVM.
+
+ If in doubt, say "N".
+
config KVM_INTEL
tristate "KVM for Intel (and compatible) processors support"
depends on KVM && IA32_FEAT_CTL
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index b19ef42..e553f0f 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -1,6 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
ccflags-y += -Iarch/x86/kvm
+ccflags-$(CONFIG_KVM_WERROR) += -Werror
KVM := ../../../virt/kvm
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index ddbc619..bc00642 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -191,25 +191,6 @@
#define NR_FASTOP (ilog2(sizeof(ulong)) + 1)
#define FASTOP_SIZE 8
-/*
- * fastop functions have a special calling convention:
- *
- * dst: rax (in/out)
- * src: rdx (in/out)
- * src2: rcx (in)
- * flags: rflags (in/out)
- * ex: rsi (in:fastop pointer, out:zero if exception)
- *
- * Moreover, they are all exactly FASTOP_SIZE bytes long, so functions for
- * different operand sizes can be reached by calculation, rather than a jump
- * table (which would be bigger than the code).
- *
- * fastop functions are declared as taking a never-defined fastop parameter,
- * so they can't be called from C directly.
- */
-
-struct fastop;
-
struct opcode {
u64 flags : 56;
u64 intercept : 8;
@@ -311,8 +292,19 @@ static void invalidate_registers(struct x86_emulate_ctxt *ctxt)
#define ON64(x)
#endif
-typedef void (*fastop_t)(struct fastop *);
-
+/*
+ * fastop functions have a special calling convention:
+ *
+ * dst: rax (in/out)
+ * src: rdx (in/out)
+ * src2: rcx (in)
+ * flags: rflags (in/out)
+ * ex: rsi (in:fastop pointer, out:zero if exception)
+ *
+ * Moreover, they are all exactly FASTOP_SIZE bytes long, so functions for
+ * different operand sizes can be reached by calculation, rather than a jump
+ * table (which would be bigger than the code).
+ */
static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop);
#define __FOP_FUNC(name) \
@@ -5181,6 +5173,7 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len)
ctxt->fetch.ptr = ctxt->fetch.data;
ctxt->fetch.end = ctxt->fetch.data + insn_len;
ctxt->opcode_len = 1;
+ ctxt->intercept = x86_intercept_none;
if (insn_len > 0)
memcpy(ctxt->fetch.data, insn, insn_len);
else {
@@ -5683,7 +5676,7 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
if (ctxt->execute) {
if (ctxt->d & Fastop)
- rc = fastop(ctxt, (fastop_t)ctxt->execute);
+ rc = fastop(ctxt, ctxt->fop);
else
rc = ctxt->execute(ctxt);
if (rc != X86EMUL_CONTINUE)
diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c
index 7668fed..750ff0b 100644
--- a/arch/x86/kvm/ioapic.c
+++ b/arch/x86/kvm/ioapic.c
@@ -378,12 +378,15 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
if (e->fields.delivery_mode == APIC_DM_FIXED) {
struct kvm_lapic_irq irq;
- irq.shorthand = APIC_DEST_NOSHORT;
irq.vector = e->fields.vector;
irq.delivery_mode = e->fields.delivery_mode << 8;
- irq.dest_id = e->fields.dest_id;
irq.dest_mode =
kvm_lapic_irq_dest_mode(!!e->fields.dest_mode);
+ irq.level = false;
+ irq.trig_mode = e->fields.trig_mode;
+ irq.shorthand = APIC_DEST_NOSHORT;
+ irq.dest_id = e->fields.dest_id;
+ irq.msi_redir_hint = false;
bitmap_zero(&vcpu_bitmap, 16);
kvm_bitmap_or_dest_vcpus(ioapic->kvm, &irq,
&vcpu_bitmap);
diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
index 79afa0b..c47d2ac 100644
--- a/arch/x86/kvm/irq_comm.c
+++ b/arch/x86/kvm/irq_comm.c
@@ -417,7 +417,7 @@ void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu,
kvm_set_msi_irq(vcpu->kvm, entry, &irq);
- if (irq.level &&
+ if (irq.trig_mode &&
kvm_apic_match_dest(vcpu, NULL, APIC_DEST_NOSHORT,
irq.dest_id, irq.dest_mode))
__set_bit(irq.vector, ioapic_handled_vectors);
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index eafc631..7356a56e 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -627,9 +627,11 @@ static inline bool pv_eoi_enabled(struct kvm_vcpu *vcpu)
static bool pv_eoi_get_pending(struct kvm_vcpu *vcpu)
{
u8 val;
- if (pv_eoi_get_user(vcpu, &val) < 0)
+ if (pv_eoi_get_user(vcpu, &val) < 0) {
printk(KERN_WARNING "Can't read EOI MSR value: 0x%llx\n",
(unsigned long long)vcpu->arch.pv_eoi.msr_val);
+ return false;
+ }
return val & 0x1;
}
@@ -1046,11 +1048,8 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
apic->regs + APIC_TMR);
}
- if (vcpu->arch.apicv_active)
- kvm_x86_ops->deliver_posted_interrupt(vcpu, vector);
- else {
+ if (kvm_x86_ops->deliver_posted_interrupt(vcpu, vector)) {
kvm_lapic_set_irr(vector, apic);
-
kvm_make_request(KVM_REQ_EVENT, vcpu);
kvm_vcpu_kick(vcpu);
}
@@ -1080,9 +1079,6 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
result = 1;
/* assumes that there are only KVM_APIC_INIT/SIPI */
apic->pending_events = (1UL << KVM_APIC_INIT);
- /* make sure pending_events is visible before sending
- * the request */
- smp_wmb();
kvm_make_request(KVM_REQ_EVENT, vcpu);
kvm_vcpu_kick(vcpu);
}
@@ -1449,6 +1445,8 @@ static void limit_periodic_timer_frequency(struct kvm_lapic *apic)
}
}
+static void cancel_hv_timer(struct kvm_lapic *apic);
+
static void apic_update_lvtt(struct kvm_lapic *apic)
{
u32 timer_mode = kvm_lapic_get_reg(apic, APIC_LVTT) &
@@ -1458,6 +1456,10 @@ static void apic_update_lvtt(struct kvm_lapic *apic)
if (apic_lvtt_tscdeadline(apic) != (timer_mode ==
APIC_LVT_TIMER_TSCDEADLINE)) {
hrtimer_cancel(&apic->lapic_timer.timer);
+ preempt_disable();
+ if (apic->lapic_timer.hv_timer_in_use)
+ cancel_hv_timer(apic);
+ preempt_enable();
kvm_lapic_set_reg(apic, APIC_TMICT, 0);
apic->lapic_timer.period = 0;
apic->lapic_timer.tscdeadline = 0;
@@ -1719,7 +1721,7 @@ static void start_sw_period(struct kvm_lapic *apic)
hrtimer_start(&apic->lapic_timer.timer,
apic->lapic_timer.target_expiration,
- HRTIMER_MODE_ABS);
+ HRTIMER_MODE_ABS_HARD);
}
bool kvm_lapic_hv_timer_in_use(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index d55674f..a647601 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -102,6 +102,19 @@ static inline void kvm_mmu_load_cr3(struct kvm_vcpu *vcpu)
kvm_get_active_pcid(vcpu));
}
+int kvm_tdp_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code,
+ bool prefault);
+
+static inline int kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
+ u32 err, bool prefault)
+{
+#ifdef CONFIG_RETPOLINE
+ if (likely(vcpu->arch.mmu->page_fault == kvm_tdp_page_fault))
+ return kvm_tdp_page_fault(vcpu, cr2_or_gpa, err, prefault);
+#endif
+ return vcpu->arch.mmu->page_fault(vcpu, cr2_or_gpa, err, prefault);
+}
+
/*
* Currently, we have two sorts of write-protection, a) the first one
* write-protects guest page to sync the guest modification, b) another one is
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 7011a4e..87e9ba2 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -4219,8 +4219,8 @@ int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code,
}
EXPORT_SYMBOL_GPL(kvm_handle_page_fault);
-static int tdp_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code,
- bool prefault)
+int kvm_tdp_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code,
+ bool prefault)
{
int max_level;
@@ -4925,7 +4925,7 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
return;
context->mmu_role.as_u64 = new_role.as_u64;
- context->page_fault = tdp_page_fault;
+ context->page_fault = kvm_tdp_page_fault;
context->sync_page = nonpaging_sync_page;
context->invlpg = nonpaging_invlpg;
context->update_pte = nonpaging_update_pte;
@@ -5436,9 +5436,8 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 error_code,
}
if (r == RET_PF_INVALID) {
- r = vcpu->arch.mmu->page_fault(vcpu, cr2_or_gpa,
- lower_32_bits(error_code),
- false);
+ r = kvm_mmu_do_page_fault(vcpu, cr2_or_gpa,
+ lower_32_bits(error_code), false);
WARN_ON(r == RET_PF_INVALID);
}
diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h
index 4e1ef04..e4c8a4c 100644
--- a/arch/x86/kvm/mmu/paging_tmpl.h
+++ b/arch/x86/kvm/mmu/paging_tmpl.h
@@ -33,7 +33,7 @@
#define PT_GUEST_ACCESSED_SHIFT PT_ACCESSED_SHIFT
#define PT_HAVE_ACCESSED_DIRTY(mmu) true
#ifdef CONFIG_X86_64
- #define PT_MAX_FULL_LEVELS 4
+ #define PT_MAX_FULL_LEVELS PT64_ROOT_MAX_LEVEL
#define CMPXCHG cmpxchg
#else
#define CMPXCHG cmpxchg64
diff --git a/arch/x86/kvm/mmutrace.h b/arch/x86/kvm/mmutrace.h
index 3c6522b..ffcd96f 100644
--- a/arch/x86/kvm/mmutrace.h
+++ b/arch/x86/kvm/mmutrace.h
@@ -339,7 +339,7 @@ TRACE_EVENT(
/* These depend on page entry type, so compute them now. */
__field(bool, r)
__field(bool, x)
- __field(u8, u)
+ __field(signed char, u)
),
TP_fast_assign(
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index a3e32d6..50d1eba 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -57,11 +57,13 @@
MODULE_AUTHOR("Qumranet");
MODULE_LICENSE("GPL");
+#ifdef MODULE
static const struct x86_cpu_id svm_cpu_id[] = {
X86_FEATURE_MATCH(X86_FEATURE_SVM),
{}
};
MODULE_DEVICE_TABLE(x86cpu, svm_cpu_id);
+#endif
#define IOPM_ALLOC_ORDER 2
#define MSRPM_ALLOC_ORDER 1
@@ -1005,33 +1007,32 @@ static void svm_cpu_uninit(int cpu)
static int svm_cpu_init(int cpu)
{
struct svm_cpu_data *sd;
- int r;
sd = kzalloc(sizeof(struct svm_cpu_data), GFP_KERNEL);
if (!sd)
return -ENOMEM;
sd->cpu = cpu;
- r = -ENOMEM;
sd->save_area = alloc_page(GFP_KERNEL);
if (!sd->save_area)
- goto err_1;
+ goto free_cpu_data;
if (svm_sev_enabled()) {
- r = -ENOMEM;
sd->sev_vmcbs = kmalloc_array(max_sev_asid + 1,
sizeof(void *),
GFP_KERNEL);
if (!sd->sev_vmcbs)
- goto err_1;
+ goto free_save_area;
}
per_cpu(svm_data, cpu) = sd;
return 0;
-err_1:
+free_save_area:
+ __free_page(sd->save_area);
+free_cpu_data:
kfree(sd);
- return r;
+ return -ENOMEM;
}
@@ -1350,6 +1351,24 @@ static __init void svm_adjust_mmio_mask(void)
kvm_mmu_set_mmio_spte_mask(mask, mask, PT_WRITABLE_MASK | PT_USER_MASK);
}
+static void svm_hardware_teardown(void)
+{
+ int cpu;
+
+ if (svm_sev_enabled()) {
+ bitmap_free(sev_asid_bitmap);
+ bitmap_free(sev_reclaim_asid_bitmap);
+
+ sev_flush_asids();
+ }
+
+ for_each_possible_cpu(cpu)
+ svm_cpu_uninit(cpu);
+
+ __free_pages(pfn_to_page(iopm_base >> PAGE_SHIFT), IOPM_ALLOC_ORDER);
+ iopm_base = 0;
+}
+
static __init int svm_hardware_setup(void)
{
int cpu;
@@ -1463,29 +1482,10 @@ static __init int svm_hardware_setup(void)
return 0;
err:
- __free_pages(iopm_pages, IOPM_ALLOC_ORDER);
- iopm_base = 0;
+ svm_hardware_teardown();
return r;
}
-static __exit void svm_hardware_unsetup(void)
-{
- int cpu;
-
- if (svm_sev_enabled()) {
- bitmap_free(sev_asid_bitmap);
- bitmap_free(sev_reclaim_asid_bitmap);
-
- sev_flush_asids();
- }
-
- for_each_possible_cpu(cpu)
- svm_cpu_uninit(cpu);
-
- __free_pages(pfn_to_page(iopm_base >> PAGE_SHIFT), IOPM_ALLOC_ORDER);
- iopm_base = 0;
-}
-
static void init_seg(struct vmcb_seg *seg)
{
seg->selector = 0;
@@ -1933,14 +1933,6 @@ static void sev_clflush_pages(struct page *pages[], unsigned long npages)
static void __unregister_enc_region_locked(struct kvm *kvm,
struct enc_region *region)
{
- /*
- * The guest may change the memory encryption attribute from C=0 -> C=1
- * or vice versa for this memory range. Lets make sure caches are
- * flushed to ensure that guest data gets written into memory with
- * correct C-bit.
- */
- sev_clflush_pages(region->pages, region->npages);
-
sev_unpin_memory(kvm, region->pages, region->npages);
list_del(®ion->list);
kfree(region);
@@ -1971,6 +1963,13 @@ static void sev_vm_destroy(struct kvm *kvm)
mutex_lock(&kvm->lock);
/*
+ * Ensure that all guest tagged cache entries are flushed before
+ * releasing the pages back to the system for use. CLFLUSH will
+ * not do this, so issue a WBINVD.
+ */
+ wbinvd_on_all_cpus();
+
+ /*
* if userspace was terminated before unregistering the memory regions
* then lets unpin all the registered memory.
*/
@@ -2175,7 +2174,6 @@ static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
u32 dummy;
u32 eax = 1;
- vcpu->arch.microcode_version = 0x01000065;
svm->spec_ctrl = 0;
svm->virt_spec_ctrl = 0;
@@ -2197,8 +2195,9 @@ static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
static int avic_init_vcpu(struct vcpu_svm *svm)
{
int ret;
+ struct kvm_vcpu *vcpu = &svm->vcpu;
- if (!kvm_vcpu_apicv_active(&svm->vcpu))
+ if (!avic || !irqchip_in_kernel(vcpu->kvm))
return 0;
ret = avic_init_backing_page(&svm->vcpu);
@@ -2266,6 +2265,7 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu)
init_vmcb(svm);
svm_init_osvw(vcpu);
+ vcpu->arch.microcode_version = 0x01000065;
return 0;
@@ -5232,6 +5232,9 @@ static void svm_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
struct vmcb *vmcb = svm->vmcb;
bool activated = kvm_vcpu_apicv_active(vcpu);
+ if (!avic)
+ return;
+
if (activated) {
/**
* During AVIC temporary deactivation, guest could update
@@ -5255,8 +5258,11 @@ static void svm_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
return;
}
-static void svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec)
+static int svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec)
{
+ if (!vcpu->arch.apicv_active)
+ return -1;
+
kvm_lapic_set_irr(vec, vcpu->arch.apic);
smp_mb__after_atomic();
@@ -5268,6 +5274,8 @@ static void svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec)
put_cpu();
} else
kvm_vcpu_wake_up(vcpu);
+
+ return 0;
}
static bool svm_dy_apicv_has_pending_interrupt(struct kvm_vcpu *vcpu)
@@ -6303,7 +6311,8 @@ static void svm_handle_exit_irqoff(struct kvm_vcpu *vcpu,
enum exit_fastpath_completion *exit_fastpath)
{
if (!is_guest_mode(vcpu) &&
- to_svm(vcpu)->vmcb->control.exit_code == EXIT_REASON_MSR_WRITE)
+ to_svm(vcpu)->vmcb->control.exit_code == SVM_EXIT_MSR &&
+ to_svm(vcpu)->vmcb->control.exit_info_1)
*exit_fastpath = handle_fastpath_set_msr_irqoff(vcpu);
}
@@ -7148,6 +7157,9 @@ static int svm_mem_enc_op(struct kvm *kvm, void __user *argp)
if (!svm_sev_enabled())
return -ENOTTY;
+ if (!argp)
+ return 0;
+
if (copy_from_user(&sev_cmd, argp, sizeof(struct kvm_sev_cmd)))
return -EFAULT;
@@ -7275,6 +7287,13 @@ static int svm_unregister_enc_region(struct kvm *kvm,
goto failed;
}
+ /*
+ * Ensure that all guest tagged cache entries are flushed before
+ * releasing the pages back to the system for use. CLFLUSH will
+ * not do this, so issue a WBINVD.
+ */
+ wbinvd_on_all_cpus();
+
__unregister_enc_region_locked(kvm, region);
mutex_unlock(&kvm->lock);
@@ -7378,7 +7397,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
.cpu_has_kvm_support = has_svm,
.disabled_by_bios = is_disabled,
.hardware_setup = svm_hardware_setup,
- .hardware_unsetup = svm_hardware_unsetup,
+ .hardware_unsetup = svm_hardware_teardown,
.check_processor_compatibility = svm_check_processor_compat,
.hardware_enable = svm_hardware_enable,
.hardware_disable = svm_hardware_disable,
@@ -7433,6 +7452,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
.run = svm_vcpu_run,
.handle_exit = handle_exit,
.skip_emulated_instruction = skip_emulated_instruction,
+ .update_emulated_instruction = NULL,
.set_interrupt_shadow = svm_set_interrupt_shadow,
.get_interrupt_shadow = svm_get_interrupt_shadow,
.patch_hypercall = svm_patch_hypercall,
diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h
index 283bdb7..f486e26 100644
--- a/arch/x86/kvm/vmx/capabilities.h
+++ b/arch/x86/kvm/vmx/capabilities.h
@@ -12,6 +12,7 @@ extern bool __read_mostly enable_ept;
extern bool __read_mostly enable_unrestricted_guest;
extern bool __read_mostly enable_ept_ad_bits;
extern bool __read_mostly enable_pml;
+extern bool __read_mostly enable_apicv;
extern int __read_mostly pt_mode;
#define PT_MODE_SYSTEM 0
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 657c2ed..9750e59 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -224,7 +224,7 @@ static inline void nested_release_evmcs(struct kvm_vcpu *vcpu)
return;
kvm_vcpu_unmap(vcpu, &vmx->nested.hv_evmcs_map, true);
- vmx->nested.hv_evmcs_vmptr = -1ull;
+ vmx->nested.hv_evmcs_vmptr = 0;
vmx->nested.hv_evmcs = NULL;
}
@@ -544,7 +544,8 @@ static void nested_vmx_disable_intercept_for_msr(unsigned long *msr_bitmap_l1,
}
}
-static inline void enable_x2apic_msr_intercepts(unsigned long *msr_bitmap) {
+static inline void enable_x2apic_msr_intercepts(unsigned long *msr_bitmap)
+{
int msr;
for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) {
@@ -1922,7 +1923,8 @@ static int nested_vmx_handle_enlightened_vmptrld(struct kvm_vcpu *vcpu,
if (!nested_enlightened_vmentry(vcpu, &evmcs_gpa))
return 1;
- if (unlikely(evmcs_gpa != vmx->nested.hv_evmcs_vmptr)) {
+ if (unlikely(!vmx->nested.hv_evmcs ||
+ evmcs_gpa != vmx->nested.hv_evmcs_vmptr)) {
if (!vmx->nested.hv_evmcs)
vmx->nested.current_vmptr = -1ull;
@@ -1981,7 +1983,7 @@ static int nested_vmx_handle_enlightened_vmptrld(struct kvm_vcpu *vcpu,
}
/*
- * Clean fields data can't de used on VMLAUNCH and when we switch
+ * Clean fields data can't be used on VMLAUNCH and when we switch
* between different L2 guests as KVM keeps a single VMCS12 per L1.
*/
if (from_launch || evmcs_gpa_changed)
@@ -3160,10 +3162,10 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
* or KVM_SET_NESTED_STATE). Otherwise it's called from vmlaunch/vmresume.
*
* Returns:
- * NVMX_ENTRY_SUCCESS: Entered VMX non-root mode
- * NVMX_ENTRY_VMFAIL: Consistency check VMFail
- * NVMX_ENTRY_VMEXIT: Consistency check VMExit
- * NVMX_ENTRY_KVM_INTERNAL_ERROR: KVM internal error
+ * NVMX_VMENTRY_SUCCESS: Entered VMX non-root mode
+ * NVMX_VMENTRY_VMFAIL: Consistency check VMFail
+ * NVMX_VMENTRY_VMEXIT: Consistency check VMExit
+ * NVMX_VMENTRY_KVM_INTERNAL_ERROR: KVM internal error
*/
enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu,
bool from_vmentry)
@@ -3575,25 +3577,80 @@ static void nested_vmx_inject_exception_vmexit(struct kvm_vcpu *vcpu,
nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI, intr_info, exit_qual);
}
+/*
+ * Returns true if a debug trap is pending delivery.
+ *
+ * In KVM, debug traps bear an exception payload. As such, the class of a #DB
+ * exception may be inferred from the presence of an exception payload.
+ */
+static inline bool vmx_pending_dbg_trap(struct kvm_vcpu *vcpu)
+{
+ return vcpu->arch.exception.pending &&
+ vcpu->arch.exception.nr == DB_VECTOR &&
+ vcpu->arch.exception.payload;
+}
+
+/*
+ * Certain VM-exits set the 'pending debug exceptions' field to indicate a
+ * recognized #DB (data or single-step) that has yet to be delivered. Since KVM
+ * represents these debug traps with a payload that is said to be compatible
+ * with the 'pending debug exceptions' field, write the payload to the VMCS
+ * field if a VM-exit is delivered before the debug trap.
+ */
+static void nested_vmx_update_pending_dbg(struct kvm_vcpu *vcpu)
+{
+ if (vmx_pending_dbg_trap(vcpu))
+ vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS,
+ vcpu->arch.exception.payload);
+}
+
static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
unsigned long exit_qual;
bool block_nested_events =
vmx->nested.nested_run_pending || kvm_event_needs_reinjection(vcpu);
+ bool mtf_pending = vmx->nested.mtf_pending;
struct kvm_lapic *apic = vcpu->arch.apic;
+ /*
+ * Clear the MTF state. If a higher priority VM-exit is delivered first,
+ * this state is discarded.
+ */
+ vmx->nested.mtf_pending = false;
+
if (lapic_in_kernel(vcpu) &&
test_bit(KVM_APIC_INIT, &apic->pending_events)) {
if (block_nested_events)
return -EBUSY;
+ nested_vmx_update_pending_dbg(vcpu);
clear_bit(KVM_APIC_INIT, &apic->pending_events);
nested_vmx_vmexit(vcpu, EXIT_REASON_INIT_SIGNAL, 0, 0);
return 0;
}
+ /*
+ * Process any exceptions that are not debug traps before MTF.
+ */
if (vcpu->arch.exception.pending &&
- nested_vmx_check_exception(vcpu, &exit_qual)) {
+ !vmx_pending_dbg_trap(vcpu) &&
+ nested_vmx_check_exception(vcpu, &exit_qual)) {
+ if (block_nested_events)
+ return -EBUSY;
+ nested_vmx_inject_exception_vmexit(vcpu, exit_qual);
+ return 0;
+ }
+
+ if (mtf_pending) {
+ if (block_nested_events)
+ return -EBUSY;
+ nested_vmx_update_pending_dbg(vcpu);
+ nested_vmx_vmexit(vcpu, EXIT_REASON_MONITOR_TRAP_FLAG, 0, 0);
+ return 0;
+ }
+
+ if (vcpu->arch.exception.pending &&
+ nested_vmx_check_exception(vcpu, &exit_qual)) {
if (block_nested_events)
return -EBUSY;
nested_vmx_inject_exception_vmexit(vcpu, exit_qual);
@@ -5256,24 +5313,17 @@ static int handle_vmfunc(struct kvm_vcpu *vcpu)
return 1;
}
-
-static bool nested_vmx_exit_handled_io(struct kvm_vcpu *vcpu,
- struct vmcs12 *vmcs12)
+/*
+ * Return true if an IO instruction with the specified port and size should cause
+ * a VM-exit into L1.
+ */
+bool nested_vmx_check_io_bitmaps(struct kvm_vcpu *vcpu, unsigned int port,
+ int size)
{
- unsigned long exit_qualification;
+ struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
gpa_t bitmap, last_bitmap;
- unsigned int port;
- int size;
u8 b;
- if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS))
- return nested_cpu_has(vmcs12, CPU_BASED_UNCOND_IO_EXITING);
-
- exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
-
- port = exit_qualification >> 16;
- size = (exit_qualification & 7) + 1;
-
last_bitmap = (gpa_t)-1;
b = -1;
@@ -5300,8 +5350,26 @@ static bool nested_vmx_exit_handled_io(struct kvm_vcpu *vcpu,
return false;
}
+static bool nested_vmx_exit_handled_io(struct kvm_vcpu *vcpu,
+ struct vmcs12 *vmcs12)
+{
+ unsigned long exit_qualification;
+ unsigned short port;
+ int size;
+
+ if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS))
+ return nested_cpu_has(vmcs12, CPU_BASED_UNCOND_IO_EXITING);
+
+ exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
+
+ port = exit_qualification >> 16;
+ size = (exit_qualification & 7) + 1;
+
+ return nested_vmx_check_io_bitmaps(vcpu, port, size);
+}
+
/*
- * Return 1 if we should exit from L2 to L1 to handle an MSR access access,
+ * Return 1 if we should exit from L2 to L1 to handle an MSR access,
* rather than handle it ourselves in L0. I.e., check whether L1 expressed
* disinterest in the current event (read or write a specific MSR) by using an
* MSR bitmap. This may be the case even when L0 doesn't use MSR bitmaps.
@@ -5683,6 +5751,9 @@ static int vmx_get_nested_state(struct kvm_vcpu *vcpu,
if (vmx->nested.nested_run_pending)
kvm_state.flags |= KVM_STATE_NESTED_RUN_PENDING;
+
+ if (vmx->nested.mtf_pending)
+ kvm_state.flags |= KVM_STATE_NESTED_MTF_PENDING;
}
}
@@ -5863,6 +5934,9 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu,
vmx->nested.nested_run_pending =
!!(kvm_state->flags & KVM_STATE_NESTED_RUN_PENDING);
+ vmx->nested.mtf_pending =
+ !!(kvm_state->flags & KVM_STATE_NESTED_MTF_PENDING);
+
ret = -EINVAL;
if (nested_cpu_has_shadow_vmcs(vmcs12) &&
vmcs12->vmcs_link_pointer != -1ull) {
@@ -5920,8 +5994,7 @@ void nested_vmx_set_vmcs_shadowing_bitmap(void)
* bit in the high half is on if the corresponding bit in the control field
* may be on. See also vmx_control_verify().
*/
-void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps,
- bool apicv)
+void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps)
{
/*
* Note that as a general rule, the high half of the MSRs (bits in
@@ -5948,7 +6021,7 @@ void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps,
PIN_BASED_EXT_INTR_MASK |
PIN_BASED_NMI_EXITING |
PIN_BASED_VIRTUAL_NMIS |
- (apicv ? PIN_BASED_POSTED_INTR : 0);
+ (enable_apicv ? PIN_BASED_POSTED_INTR : 0);
msrs->pinbased_ctls_high |=
PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR |
PIN_BASED_VMX_PREEMPTION_TIMER;
diff --git a/arch/x86/kvm/vmx/nested.h b/arch/x86/kvm/vmx/nested.h
index fc874d4..9aeda46 100644
--- a/arch/x86/kvm/vmx/nested.h
+++ b/arch/x86/kvm/vmx/nested.h
@@ -17,8 +17,7 @@ enum nvmx_vmentry_status {
};
void vmx_leave_nested(struct kvm_vcpu *vcpu);
-void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps,
- bool apicv);
+void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps);
void nested_vmx_hardware_unsetup(void);
__init int nested_vmx_hardware_setup(int (*exit_handlers[])(struct kvm_vcpu *));
void nested_vmx_set_vmcs_shadowing_bitmap(void);
@@ -34,6 +33,8 @@ int vmx_get_vmx_msr(struct nested_vmx_msrs *msrs, u32 msr_index, u64 *pdata);
int get_vmx_mem_address(struct kvm_vcpu *vcpu, unsigned long exit_qualification,
u32 vmx_instruction_info, bool wr, int len, gva_t *ret);
void nested_vmx_pmu_entry_exit_ctls_update(struct kvm_vcpu *vcpu);
+bool nested_vmx_check_io_bitmaps(struct kvm_vcpu *vcpu, unsigned int port,
+ int size);
static inline struct vmcs12 *get_vmcs12(struct kvm_vcpu *vcpu)
{
@@ -175,6 +176,11 @@ static inline bool nested_cpu_has_virtual_nmis(struct vmcs12 *vmcs12)
return vmcs12->pin_based_vm_exec_control & PIN_BASED_VIRTUAL_NMIS;
}
+static inline int nested_cpu_has_mtf(struct vmcs12 *vmcs12)
+{
+ return nested_cpu_has(vmcs12, CPU_BASED_MONITOR_TRAP_FLAG);
+}
+
static inline int nested_cpu_has_ept(struct vmcs12 *vmcs12)
{
return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_EPT);
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 9a66648..079d9fb 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -64,11 +64,13 @@
MODULE_AUTHOR("Qumranet");
MODULE_LICENSE("GPL");
+#ifdef MODULE
static const struct x86_cpu_id vmx_cpu_id[] = {
X86_FEATURE_MATCH(X86_FEATURE_VMX),
{}
};
MODULE_DEVICE_TABLE(x86cpu, vmx_cpu_id);
+#endif
bool __read_mostly enable_vpid = 1;
module_param_named(vpid, enable_vpid, bool, 0444);
@@ -95,7 +97,7 @@ module_param(emulate_invalid_guest_state, bool, S_IRUGO);
static bool __read_mostly fasteoi = 1;
module_param(fasteoi, bool, S_IRUGO);
-static bool __read_mostly enable_apicv = 1;
+bool __read_mostly enable_apicv = 1;
module_param(enable_apicv, bool, S_IRUGO);
/*
@@ -1175,6 +1177,10 @@ void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
vmx->guest_msrs[i].mask);
}
+
+ if (vmx->nested.need_vmcs12_to_shadow_sync)
+ nested_sync_vmcs12_to_shadow(vcpu);
+
if (vmx->guest_state_loaded)
return;
@@ -1599,6 +1605,40 @@ static int skip_emulated_instruction(struct kvm_vcpu *vcpu)
return 1;
}
+
+/*
+ * Recognizes a pending MTF VM-exit and records the nested state for later
+ * delivery.
+ */
+static void vmx_update_emulated_instruction(struct kvm_vcpu *vcpu)
+{
+ struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+ if (!is_guest_mode(vcpu))
+ return;
+
+ /*
+ * Per the SDM, MTF takes priority over debug-trap exceptions besides
+ * T-bit traps. As instruction emulation is completed (i.e. at the
+ * instruction boundary), any #DB exception pending delivery must be a
+ * debug-trap. Record the pending MTF state to be delivered in
+ * vmx_check_nested_events().
+ */
+ if (nested_cpu_has_mtf(vmcs12) &&
+ (!vcpu->arch.exception.pending ||
+ vcpu->arch.exception.nr == DB_VECTOR))
+ vmx->nested.mtf_pending = true;
+ else
+ vmx->nested.mtf_pending = false;
+}
+
+static int vmx_skip_emulated_instruction(struct kvm_vcpu *vcpu)
+{
+ vmx_update_emulated_instruction(vcpu);
+ return skip_emulated_instruction(vcpu);
+}
+
static void vmx_clear_hlt(struct kvm_vcpu *vcpu)
{
/*
@@ -2298,6 +2338,17 @@ static void hardware_disable(void)
kvm_cpu_vmxoff();
}
+/*
+ * There is no X86_FEATURE for SGX yet, but anyway we need to query CPUID
+ * directly instead of going through cpu_has(), to ensure KVM is trapping
+ * ENCLS whenever it's supported in hardware. It does not matter whether
+ * the host OS supports or has enabled SGX.
+ */
+static bool cpu_has_sgx(void)
+{
+ return cpuid_eax(0) >= 0x12 && (cpuid_eax(0x12) & BIT(0));
+}
+
static __init int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt,
u32 msr, u32 *result)
{
@@ -2378,8 +2429,9 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf,
SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE |
SECONDARY_EXEC_PT_USE_GPA |
SECONDARY_EXEC_PT_CONCEAL_VMX |
- SECONDARY_EXEC_ENABLE_VMFUNC |
- SECONDARY_EXEC_ENCLS_EXITING;
+ SECONDARY_EXEC_ENABLE_VMFUNC;
+ if (cpu_has_sgx())
+ opt2 |= SECONDARY_EXEC_ENCLS_EXITING;
if (adjust_vmx_controls(min2, opt2,
MSR_IA32_VMX_PROCBASED_CTLS2,
&_cpu_based_2nd_exec_control) < 0)
@@ -2947,6 +2999,9 @@ void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
static int get_ept_level(struct kvm_vcpu *vcpu)
{
+ /* Nested EPT currently only supports 4-level walks. */
+ if (is_guest_mode(vcpu) && nested_cpu_has_ept(get_vmcs12(vcpu)))
+ return 4;
if (cpu_has_vmx_ept_5levels() && (cpuid_maxphyaddr(vcpu) > 48))
return 5;
return 4;
@@ -3815,24 +3870,29 @@ static int vmx_deliver_nested_posted_interrupt(struct kvm_vcpu *vcpu,
* 2. If target vcpu isn't running(root mode), kick it to pick up the
* interrupt from PIR in next vmentry.
*/
-static void vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu, int vector)
+static int vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu, int vector)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
int r;
r = vmx_deliver_nested_posted_interrupt(vcpu, vector);
if (!r)
- return;
+ return 0;
+
+ if (!vcpu->arch.apicv_active)
+ return -1;
if (pi_test_and_set_pir(vector, &vmx->pi_desc))
- return;
+ return 0;
/* If a previous notification has sent the IPI, nothing to do. */
if (pi_test_and_set_on(&vmx->pi_desc))
- return;
+ return 0;
if (!kvm_vcpu_trigger_posted_interrupt(vcpu, false))
kvm_vcpu_kick(vcpu);
+
+ return 0;
}
/*
@@ -4238,7 +4298,6 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
vmx->msr_ia32_umwait_control = 0;
- vcpu->arch.microcode_version = 0x100000000ULL;
vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val();
vmx->hv_deadline_tsc = -1;
kvm_set_cr8(vcpu, 0);
@@ -6228,7 +6287,7 @@ static void handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu)
#endif
ASM_CALL_CONSTRAINT
:
- THUNK_TARGET(entry),
+ [thunk_target]"r"(entry),
[ss]"i"(__KERNEL_DS),
[cs]"i"(__KERNEL_CS)
);
@@ -6480,8 +6539,11 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
vmcs_write32(PLE_WINDOW, vmx->ple_window);
}
- if (vmx->nested.need_vmcs12_to_shadow_sync)
- nested_sync_vmcs12_to_shadow(vcpu);
+ /*
+ * We did this in prepare_switch_to_guest, because it needs to
+ * be within srcu_read_lock.
+ */
+ WARN_ON_ONCE(vmx->nested.need_vmcs12_to_shadow_sync);
if (kvm_register_is_dirty(vcpu, VCPU_REGS_RSP))
vmcs_writel(GUEST_RSP, vcpu->arch.regs[VCPU_REGS_RSP]);
@@ -6755,14 +6817,14 @@ static int vmx_create_vcpu(struct kvm_vcpu *vcpu)
if (nested)
nested_vmx_setup_ctls_msrs(&vmx->nested.msrs,
- vmx_capability.ept,
- kvm_vcpu_apicv_active(vcpu));
+ vmx_capability.ept);
else
memset(&vmx->nested.msrs, 0, sizeof(vmx->nested.msrs));
vmx->nested.posted_intr_nv = -1;
vmx->nested.current_vmptr = -1ull;
+ vcpu->arch.microcode_version = 0x100000000ULL;
vmx->msr_ia32_feature_control_valid_bits = FEAT_CTL_LOCKED;
/*
@@ -6836,8 +6898,7 @@ static int __init vmx_check_processor_compat(void)
if (setup_vmcs_config(&vmcs_conf, &vmx_cap) < 0)
return -EIO;
if (nested)
- nested_vmx_setup_ctls_msrs(&vmcs_conf.nested, vmx_cap.ept,
- enable_apicv);
+ nested_vmx_setup_ctls_msrs(&vmcs_conf.nested, vmx_cap.ept);
if (memcmp(&vmcs_config, &vmcs_conf, sizeof(struct vmcs_config)) != 0) {
printk(KERN_ERR "kvm: CPU %d feature inconsistency!\n",
smp_processor_id());
@@ -7098,6 +7159,40 @@ static void vmx_request_immediate_exit(struct kvm_vcpu *vcpu)
to_vmx(vcpu)->req_immediate_exit = true;
}
+static int vmx_check_intercept_io(struct kvm_vcpu *vcpu,
+ struct x86_instruction_info *info)
+{
+ struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
+ unsigned short port;
+ bool intercept;
+ int size;
+
+ if (info->intercept == x86_intercept_in ||
+ info->intercept == x86_intercept_ins) {
+ port = info->src_val;
+ size = info->dst_bytes;
+ } else {
+ port = info->dst_val;
+ size = info->src_bytes;
+ }
+
+ /*
+ * If the 'use IO bitmaps' VM-execution control is 0, IO instruction
+ * VM-exits depend on the 'unconditional IO exiting' VM-execution
+ * control.
+ *
+ * Otherwise, IO instruction VM-exits are controlled by the IO bitmaps.
+ */
+ if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS))
+ intercept = nested_cpu_has(vmcs12,
+ CPU_BASED_UNCOND_IO_EXITING);
+ else
+ intercept = nested_vmx_check_io_bitmaps(vcpu, port, size);
+
+ /* FIXME: produce nested vmexit and return X86EMUL_INTERCEPTED. */
+ return intercept ? X86EMUL_UNHANDLEABLE : X86EMUL_CONTINUE;
+}
+
static int vmx_check_intercept(struct kvm_vcpu *vcpu,
struct x86_instruction_info *info,
enum x86_intercept_stage stage)
@@ -7105,19 +7200,45 @@ static int vmx_check_intercept(struct kvm_vcpu *vcpu,
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
+ switch (info->intercept) {
/*
* RDPID causes #UD if disabled through secondary execution controls.
* Because it is marked as EmulateOnUD, we need to intercept it here.
*/
- if (info->intercept == x86_intercept_rdtscp &&
- !nested_cpu_has2(vmcs12, SECONDARY_EXEC_RDTSCP)) {
- ctxt->exception.vector = UD_VECTOR;
- ctxt->exception.error_code_valid = false;
- return X86EMUL_PROPAGATE_FAULT;
- }
+ case x86_intercept_rdtscp:
+ if (!nested_cpu_has2(vmcs12, SECONDARY_EXEC_RDTSCP)) {
+ ctxt->exception.vector = UD_VECTOR;
+ ctxt->exception.error_code_valid = false;
+ return X86EMUL_PROPAGATE_FAULT;
+ }
+ break;
+
+ case x86_intercept_in:
+ case x86_intercept_ins:
+ case x86_intercept_out:
+ case x86_intercept_outs:
+ return vmx_check_intercept_io(vcpu, info);
+
+ case x86_intercept_lgdt:
+ case x86_intercept_lidt:
+ case x86_intercept_lldt:
+ case x86_intercept_ltr:
+ case x86_intercept_sgdt:
+ case x86_intercept_sidt:
+ case x86_intercept_sldt:
+ case x86_intercept_str:
+ if (!nested_cpu_has2(vmcs12, SECONDARY_EXEC_DESC))
+ return X86EMUL_CONTINUE;
+
+ /* FIXME: produce nested vmexit and return X86EMUL_INTERCEPTED. */
+ break;
/* TODO: check more intercepts... */
- return X86EMUL_CONTINUE;
+ default:
+ break;
+ }
+
+ return X86EMUL_UNHANDLEABLE;
}
#ifdef CONFIG_X86_64
@@ -7699,7 +7820,7 @@ static __init int hardware_setup(void)
if (nested) {
nested_vmx_setup_ctls_msrs(&vmcs_config.nested,
- vmx_capability.ept, enable_apicv);
+ vmx_capability.ept);
r = nested_vmx_hardware_setup(kvm_vmx_exit_handlers);
if (r)
@@ -7783,7 +7904,8 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
.run = vmx_vcpu_run,
.handle_exit = vmx_handle_exit,
- .skip_emulated_instruction = skip_emulated_instruction,
+ .skip_emulated_instruction = vmx_skip_emulated_instruction,
+ .update_emulated_instruction = vmx_update_emulated_instruction,
.set_interrupt_shadow = vmx_set_interrupt_shadow,
.get_interrupt_shadow = vmx_get_interrupt_shadow,
.patch_hypercall = vmx_patch_hypercall,
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index 7f42cf3..e64da06 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -150,6 +150,9 @@ struct nested_vmx {
/* L2 must run next, and mustn't decide to exit to L1. */
bool nested_run_pending;
+ /* Pending MTF VM-exit into L1. */
+ bool mtf_pending;
+
struct loaded_vmcs vmcs02;
/*
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index fbabb2f..cf95c36 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -438,6 +438,14 @@ void kvm_deliver_exception_payload(struct kvm_vcpu *vcpu)
* for #DB exceptions under VMX.
*/
vcpu->arch.dr6 ^= payload & DR6_RTM;
+
+ /*
+ * The #DB payload is defined as compatible with the 'pending
+ * debug exceptions' field under VMX, not DR6. While bit 12 is
+ * defined in the 'pending debug exceptions' field (enabled
+ * breakpoint), it is reserved and must be zero in DR6.
+ */
+ vcpu->arch.dr6 &= ~BIT(12);
break;
case PF_VECTOR:
vcpu->arch.cr2 = payload;
@@ -490,19 +498,7 @@ static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
vcpu->arch.exception.error_code = error_code;
vcpu->arch.exception.has_payload = has_payload;
vcpu->arch.exception.payload = payload;
- /*
- * In guest mode, payload delivery should be deferred,
- * so that the L1 hypervisor can intercept #PF before
- * CR2 is modified (or intercept #DB before DR6 is
- * modified under nVMX). However, for ABI
- * compatibility with KVM_GET_VCPU_EVENTS and
- * KVM_SET_VCPU_EVENTS, we can't delay payload
- * delivery unless userspace has enabled this
- * functionality via the per-VM capability,
- * KVM_CAP_EXCEPTION_PAYLOAD.
- */
- if (!vcpu->kvm->arch.exception_payload_enabled ||
- !is_guest_mode(vcpu))
+ if (!is_guest_mode(vcpu))
kvm_deliver_exception_payload(vcpu);
return;
}
@@ -1558,7 +1554,10 @@ EXPORT_SYMBOL_GPL(kvm_emulate_wrmsr);
*/
static int handle_fastpath_set_x2apic_icr_irqoff(struct kvm_vcpu *vcpu, u64 data)
{
- if (lapic_in_kernel(vcpu) && apic_x2apic_mode(vcpu->arch.apic) &&
+ if (!lapic_in_kernel(vcpu) || !apic_x2apic_mode(vcpu->arch.apic))
+ return 1;
+
+ if (((data & APIC_SHORT_MASK) == APIC_DEST_NOSHORT) &&
((data & APIC_DEST_MASK) == APIC_DEST_PHYSICAL) &&
((data & APIC_MODE_MASK) == APIC_DM_FIXED)) {
@@ -2448,7 +2447,6 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
vcpu->hv_clock.tsc_timestamp = tsc_timestamp;
vcpu->hv_clock.system_time = kernel_ns + v->kvm->arch.kvmclock_offset;
vcpu->last_guest_tsc = tsc_timestamp;
- WARN_ON(vcpu->hv_clock.system_time < 0);
/* If the host uses TSC clocksource, then it is stable */
pvclock_flags = 0;
@@ -3796,6 +3794,21 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
process_nmi(vcpu);
/*
+ * In guest mode, payload delivery should be deferred,
+ * so that the L1 hypervisor can intercept #PF before
+ * CR2 is modified (or intercept #DB before DR6 is
+ * modified under nVMX). Unless the per-VM capability,
+ * KVM_CAP_EXCEPTION_PAYLOAD, is set, we may not defer the delivery of
+ * an exception payload and handle after a KVM_GET_VCPU_EVENTS. Since we
+ * opportunistically defer the exception payload, deliver it if the
+ * capability hasn't been requested before processing a
+ * KVM_GET_VCPU_EVENTS.
+ */
+ if (!vcpu->kvm->arch.exception_payload_enabled &&
+ vcpu->arch.exception.pending && vcpu->arch.exception.has_payload)
+ kvm_deliver_exception_payload(vcpu);
+
+ /*
* The API doesn't provide the instruction length for software
* exceptions, so don't report them. As long as the guest RIP
* isn't advanced, we should expect to encounter the exception
@@ -6880,6 +6893,8 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
kvm_rip_write(vcpu, ctxt->eip);
if (r && ctxt->tf)
r = kvm_vcpu_do_singlestep(vcpu);
+ if (kvm_x86_ops->update_emulated_instruction)
+ kvm_x86_ops->update_emulated_instruction(vcpu);
__kvm_set_rflags(vcpu, ctxt->eflags);
}
@@ -7177,14 +7192,16 @@ static void kvm_timer_init(void)
if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
#ifdef CONFIG_CPU_FREQ
- struct cpufreq_policy policy;
+ struct cpufreq_policy *policy;
int cpu;
- memset(&policy, 0, sizeof(policy));
cpu = get_cpu();
- cpufreq_get_policy(&policy, cpu);
- if (policy.cpuinfo.max_freq)
- max_tsc_khz = policy.cpuinfo.max_freq;
+ policy = cpufreq_cpu_get(cpu);
+ if (policy) {
+ if (policy->cpuinfo.max_freq)
+ max_tsc_khz = policy->cpuinfo.max_freq;
+ cpufreq_cpu_put(policy);
+ }
put_cpu();
#endif
cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block,
@@ -7295,12 +7312,12 @@ int kvm_arch_init(void *opaque)
}
if (!ops->cpu_has_kvm_support()) {
- printk(KERN_ERR "kvm: no hardware support\n");
+ pr_err_ratelimited("kvm: no hardware support\n");
r = -EOPNOTSUPP;
goto out;
}
if (ops->disabled_by_bios()) {
- printk(KERN_ERR "kvm: disabled by bios\n");
+ pr_err_ratelimited("kvm: disabled by bios\n");
r = -EOPNOTSUPP;
goto out;
}
@@ -8942,7 +8959,6 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index,
kvm_rip_write(vcpu, ctxt->eip);
kvm_set_rflags(vcpu, ctxt->eflags);
- kvm_make_request(KVM_REQ_EVENT, vcpu);
return 1;
}
EXPORT_SYMBOL_GPL(kvm_task_switch);
@@ -10182,7 +10198,7 @@ void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work)
work->arch.cr3 != vcpu->arch.mmu->get_cr3(vcpu))
return;
- vcpu->arch.mmu->page_fault(vcpu, work->cr2_or_gpa, 0, true);
+ kvm_mmu_do_page_fault(vcpu, work->cr2_or_gpa, 0, true);
}
static inline u32 kvm_async_pf_hash_fn(gfn_t gfn)
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
index 64229da..69309cd 100644
--- a/arch/x86/mm/dump_pagetables.c
+++ b/arch/x86/mm/dump_pagetables.c
@@ -363,13 +363,8 @@ static void ptdump_walk_pgd_level_core(struct seq_file *m,
{
const struct ptdump_range ptdump_ranges[] = {
#ifdef CONFIG_X86_64
-
-#define normalize_addr_shift (64 - (__VIRTUAL_MASK_SHIFT + 1))
-#define normalize_addr(u) ((signed long)((u) << normalize_addr_shift) >> \
- normalize_addr_shift)
-
{0, PTRS_PER_PGD * PGD_LEVEL_MULT / 2},
- {normalize_addr(PTRS_PER_PGD * PGD_LEVEL_MULT / 2), ~0UL},
+ {GUARD_HOLE_END_ADDR, ~0UL},
#else
{0, ~0UL},
#endif
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index fa4ea09..629fdf1 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -190,7 +190,7 @@ static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address)
return pmd_k;
}
-void vmalloc_sync_all(void)
+static void vmalloc_sync(void)
{
unsigned long address;
@@ -217,6 +217,16 @@ void vmalloc_sync_all(void)
}
}
+void vmalloc_sync_mappings(void)
+{
+ vmalloc_sync();
+}
+
+void vmalloc_sync_unmappings(void)
+{
+ vmalloc_sync();
+}
+
/*
* 32-bit:
*
@@ -319,11 +329,23 @@ static void dump_pagetable(unsigned long address)
#else /* CONFIG_X86_64: */
-void vmalloc_sync_all(void)
+void vmalloc_sync_mappings(void)
{
+ /*
+ * 64-bit mappings might allocate new p4d/pud pages
+ * that need to be propagated to all tasks' PGDs.
+ */
sync_global_pgds(VMALLOC_START & PGDIR_MASK, VMALLOC_END);
}
+void vmalloc_sync_unmappings(void)
+{
+ /*
+ * Unmappings never allocate or free p4d/pud pages.
+ * No work is required here.
+ */
+}
+
/*
* 64-bit:
*
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 44e4beb..18c637c 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -106,6 +106,22 @@ static unsigned int __ioremap_check_encrypted(struct resource *res)
return 0;
}
+/*
+ * The EFI runtime services data area is not covered by walk_mem_res(), but must
+ * be mapped encrypted when SEV is active.
+ */
+static void __ioremap_check_other(resource_size_t addr, struct ioremap_desc *desc)
+{
+ if (!sev_active())
+ return;
+
+ if (!IS_ENABLED(CONFIG_EFI))
+ return;
+
+ if (efi_mem_type(addr) == EFI_RUNTIME_SERVICES_DATA)
+ desc->flags |= IORES_MAP_ENCRYPTED;
+}
+
static int __ioremap_collect_map_flags(struct resource *res, void *arg)
{
struct ioremap_desc *desc = arg;
@@ -124,6 +140,9 @@ static int __ioremap_collect_map_flags(struct resource *res, void *arg)
* To avoid multiple resource walks, this function walks resources marked as
* IORESOURCE_MEM and IORESOURCE_BUSY and looking for system RAM and/or a
* resource described not as IORES_DESC_NONE (e.g. IORES_DESC_ACPI_TABLES).
+ *
+ * After that, deal with misc other ranges in __ioremap_check_other() which do
+ * not fall into the above category.
*/
static void __ioremap_check_mem(resource_size_t addr, unsigned long size,
struct ioremap_desc *desc)
@@ -135,6 +154,8 @@ static void __ioremap_check_mem(resource_size_t addr, unsigned long size,
memset(desc, 0, sizeof(struct ioremap_desc));
walk_mem_res(start, end, desc, __ioremap_collect_map_flags);
+
+ __ioremap_check_other(addr, desc);
}
/*
diff --git a/arch/x86/net/bpf_jit_comp32.c b/arch/x86/net/bpf_jit_comp32.c
index 393d251..4d2a7a7 100644
--- a/arch/x86/net/bpf_jit_comp32.c
+++ b/arch/x86/net/bpf_jit_comp32.c
@@ -2039,10 +2039,12 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
}
/* and dreg_lo,sreg_lo */
EMIT2(0x23, add_2reg(0xC0, sreg_lo, dreg_lo));
- /* and dreg_hi,sreg_hi */
- EMIT2(0x23, add_2reg(0xC0, sreg_hi, dreg_hi));
- /* or dreg_lo,dreg_hi */
- EMIT2(0x09, add_2reg(0xC0, dreg_lo, dreg_hi));
+ if (is_jmp64) {
+ /* and dreg_hi,sreg_hi */
+ EMIT2(0x23, add_2reg(0xC0, sreg_hi, dreg_hi));
+ /* or dreg_lo,dreg_hi */
+ EMIT2(0x09, add_2reg(0xC0, dreg_lo, dreg_hi));
+ }
goto emit_cond_jmp;
}
case BPF_JMP | BPF_JSET | BPF_K:
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
index fa8506e..d19a2ed 100644
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c
@@ -180,7 +180,7 @@ void efi_sync_low_kernel_mappings(void)
static inline phys_addr_t
virt_to_phys_or_null_size(void *va, unsigned long size)
{
- bool bad_size;
+ phys_addr_t pa;
if (!va)
return 0;
@@ -188,16 +188,13 @@ virt_to_phys_or_null_size(void *va, unsigned long size)
if (virt_addr_valid(va))
return virt_to_phys(va);
- /*
- * A fully aligned variable on the stack is guaranteed not to
- * cross a page bounary. Try to catch strings on the stack by
- * checking that 'size' is a power of two.
- */
- bad_size = size > PAGE_SIZE || !is_power_of_2(size);
+ pa = slow_virt_to_phys(va);
- WARN_ON(!IS_ALIGNED((unsigned long)va, size) || bad_size);
+ /* check if the object crosses a page boundary */
+ if (WARN_ON((pa ^ (pa + size - 1)) & PAGE_MASK))
+ return 0;
- return slow_virt_to_phys(va);
+ return pa;
}
#define virt_to_phys_or_null(addr) \
@@ -568,85 +565,25 @@ efi_thunk_set_virtual_address_map(unsigned long memory_map_size,
static efi_status_t efi_thunk_get_time(efi_time_t *tm, efi_time_cap_t *tc)
{
- efi_status_t status;
- u32 phys_tm, phys_tc;
- unsigned long flags;
-
- spin_lock(&rtc_lock);
- spin_lock_irqsave(&efi_runtime_lock, flags);
-
- phys_tm = virt_to_phys_or_null(tm);
- phys_tc = virt_to_phys_or_null(tc);
-
- status = efi_thunk(get_time, phys_tm, phys_tc);
-
- spin_unlock_irqrestore(&efi_runtime_lock, flags);
- spin_unlock(&rtc_lock);
-
- return status;
+ return EFI_UNSUPPORTED;
}
static efi_status_t efi_thunk_set_time(efi_time_t *tm)
{
- efi_status_t status;
- u32 phys_tm;
- unsigned long flags;
-
- spin_lock(&rtc_lock);
- spin_lock_irqsave(&efi_runtime_lock, flags);
-
- phys_tm = virt_to_phys_or_null(tm);
-
- status = efi_thunk(set_time, phys_tm);
-
- spin_unlock_irqrestore(&efi_runtime_lock, flags);
- spin_unlock(&rtc_lock);
-
- return status;
+ return EFI_UNSUPPORTED;
}
static efi_status_t
efi_thunk_get_wakeup_time(efi_bool_t *enabled, efi_bool_t *pending,
efi_time_t *tm)
{
- efi_status_t status;
- u32 phys_enabled, phys_pending, phys_tm;
- unsigned long flags;
-
- spin_lock(&rtc_lock);
- spin_lock_irqsave(&efi_runtime_lock, flags);
-
- phys_enabled = virt_to_phys_or_null(enabled);
- phys_pending = virt_to_phys_or_null(pending);
- phys_tm = virt_to_phys_or_null(tm);
-
- status = efi_thunk(get_wakeup_time, phys_enabled,
- phys_pending, phys_tm);
-
- spin_unlock_irqrestore(&efi_runtime_lock, flags);
- spin_unlock(&rtc_lock);
-
- return status;
+ return EFI_UNSUPPORTED;
}
static efi_status_t
efi_thunk_set_wakeup_time(efi_bool_t enabled, efi_time_t *tm)
{
- efi_status_t status;
- u32 phys_tm;
- unsigned long flags;
-
- spin_lock(&rtc_lock);
- spin_lock_irqsave(&efi_runtime_lock, flags);
-
- phys_tm = virt_to_phys_or_null(tm);
-
- status = efi_thunk(set_wakeup_time, enabled, phys_tm);
-
- spin_unlock_irqrestore(&efi_runtime_lock, flags);
- spin_unlock(&rtc_lock);
-
- return status;
+ return EFI_UNSUPPORTED;
}
static unsigned long efi_name_size(efi_char16_t *name)
@@ -658,6 +595,8 @@ static efi_status_t
efi_thunk_get_variable(efi_char16_t *name, efi_guid_t *vendor,
u32 *attr, unsigned long *data_size, void *data)
{
+ u8 buf[24] __aligned(8);
+ efi_guid_t *vnd = PTR_ALIGN((efi_guid_t *)buf, sizeof(*vnd));
efi_status_t status;
u32 phys_name, phys_vendor, phys_attr;
u32 phys_data_size, phys_data;
@@ -665,14 +604,19 @@ efi_thunk_get_variable(efi_char16_t *name, efi_guid_t *vendor,
spin_lock_irqsave(&efi_runtime_lock, flags);
+ *vnd = *vendor;
+
phys_data_size = virt_to_phys_or_null(data_size);
- phys_vendor = virt_to_phys_or_null(vendor);
+ phys_vendor = virt_to_phys_or_null(vnd);
phys_name = virt_to_phys_or_null_size(name, efi_name_size(name));
phys_attr = virt_to_phys_or_null(attr);
phys_data = virt_to_phys_or_null_size(data, *data_size);
- status = efi_thunk(get_variable, phys_name, phys_vendor,
- phys_attr, phys_data_size, phys_data);
+ if (!phys_name || (data && !phys_data))
+ status = EFI_INVALID_PARAMETER;
+ else
+ status = efi_thunk(get_variable, phys_name, phys_vendor,
+ phys_attr, phys_data_size, phys_data);
spin_unlock_irqrestore(&efi_runtime_lock, flags);
@@ -683,19 +627,25 @@ static efi_status_t
efi_thunk_set_variable(efi_char16_t *name, efi_guid_t *vendor,
u32 attr, unsigned long data_size, void *data)
{
+ u8 buf[24] __aligned(8);
+ efi_guid_t *vnd = PTR_ALIGN((efi_guid_t *)buf, sizeof(*vnd));
u32 phys_name, phys_vendor, phys_data;
efi_status_t status;
unsigned long flags;
spin_lock_irqsave(&efi_runtime_lock, flags);
+ *vnd = *vendor;
+
phys_name = virt_to_phys_or_null_size(name, efi_name_size(name));
- phys_vendor = virt_to_phys_or_null(vendor);
+ phys_vendor = virt_to_phys_or_null(vnd);
phys_data = virt_to_phys_or_null_size(data, data_size);
- /* If data_size is > sizeof(u32) we've got problems */
- status = efi_thunk(set_variable, phys_name, phys_vendor,
- attr, data_size, phys_data);
+ if (!phys_name || !phys_data)
+ status = EFI_INVALID_PARAMETER;
+ else
+ status = efi_thunk(set_variable, phys_name, phys_vendor,
+ attr, data_size, phys_data);
spin_unlock_irqrestore(&efi_runtime_lock, flags);
@@ -707,6 +657,8 @@ efi_thunk_set_variable_nonblocking(efi_char16_t *name, efi_guid_t *vendor,
u32 attr, unsigned long data_size,
void *data)
{
+ u8 buf[24] __aligned(8);
+ efi_guid_t *vnd = PTR_ALIGN((efi_guid_t *)buf, sizeof(*vnd));
u32 phys_name, phys_vendor, phys_data;
efi_status_t status;
unsigned long flags;
@@ -714,13 +666,17 @@ efi_thunk_set_variable_nonblocking(efi_char16_t *name, efi_guid_t *vendor,
if (!spin_trylock_irqsave(&efi_runtime_lock, flags))
return EFI_NOT_READY;
+ *vnd = *vendor;
+
phys_name = virt_to_phys_or_null_size(name, efi_name_size(name));
- phys_vendor = virt_to_phys_or_null(vendor);
+ phys_vendor = virt_to_phys_or_null(vnd);
phys_data = virt_to_phys_or_null_size(data, data_size);
- /* If data_size is > sizeof(u32) we've got problems */
- status = efi_thunk(set_variable, phys_name, phys_vendor,
- attr, data_size, phys_data);
+ if (!phys_name || !phys_data)
+ status = EFI_INVALID_PARAMETER;
+ else
+ status = efi_thunk(set_variable, phys_name, phys_vendor,
+ attr, data_size, phys_data);
spin_unlock_irqrestore(&efi_runtime_lock, flags);
@@ -732,39 +688,36 @@ efi_thunk_get_next_variable(unsigned long *name_size,
efi_char16_t *name,
efi_guid_t *vendor)
{
+ u8 buf[24] __aligned(8);
+ efi_guid_t *vnd = PTR_ALIGN((efi_guid_t *)buf, sizeof(*vnd));
efi_status_t status;
u32 phys_name_size, phys_name, phys_vendor;
unsigned long flags;
spin_lock_irqsave(&efi_runtime_lock, flags);
+ *vnd = *vendor;
+
phys_name_size = virt_to_phys_or_null(name_size);
- phys_vendor = virt_to_phys_or_null(vendor);
+ phys_vendor = virt_to_phys_or_null(vnd);
phys_name = virt_to_phys_or_null_size(name, *name_size);
- status = efi_thunk(get_next_variable, phys_name_size,
- phys_name, phys_vendor);
+ if (!phys_name)
+ status = EFI_INVALID_PARAMETER;
+ else
+ status = efi_thunk(get_next_variable, phys_name_size,
+ phys_name, phys_vendor);
spin_unlock_irqrestore(&efi_runtime_lock, flags);
+ *vendor = *vnd;
return status;
}
static efi_status_t
efi_thunk_get_next_high_mono_count(u32 *count)
{
- efi_status_t status;
- u32 phys_count;
- unsigned long flags;
-
- spin_lock_irqsave(&efi_runtime_lock, flags);
-
- phys_count = virt_to_phys_or_null(count);
- status = efi_thunk(get_next_high_mono_count, phys_count);
-
- spin_unlock_irqrestore(&efi_runtime_lock, flags);
-
- return status;
+ return EFI_UNSUPPORTED;
}
static void
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index 1f756ff..507f4fb 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -72,6 +72,9 @@
#include <asm/mwait.h>
#include <asm/pci_x86.h>
#include <asm/cpu.h>
+#ifdef CONFIG_X86_IOPL_IOPERM
+#include <asm/io_bitmap.h>
+#endif
#ifdef CONFIG_ACPI
#include <linux/acpi.h>
@@ -837,6 +840,25 @@ static void xen_load_sp0(unsigned long sp0)
this_cpu_write(cpu_tss_rw.x86_tss.sp0, sp0);
}
+#ifdef CONFIG_X86_IOPL_IOPERM
+static void xen_update_io_bitmap(void)
+{
+ struct physdev_set_iobitmap iobitmap;
+ struct tss_struct *tss = this_cpu_ptr(&cpu_tss_rw);
+
+ native_tss_update_io_bitmap();
+
+ iobitmap.bitmap = (uint8_t *)(&tss->x86_tss) +
+ tss->x86_tss.io_bitmap_base;
+ if (tss->x86_tss.io_bitmap_base == IO_BITMAP_OFFSET_INVALID)
+ iobitmap.nr_ports = 0;
+ else
+ iobitmap.nr_ports = IO_BITMAP_BITS;
+
+ HYPERVISOR_physdev_op(PHYSDEVOP_set_iobitmap, &iobitmap);
+}
+#endif
+
static void xen_io_delay(void)
{
}
@@ -896,14 +918,15 @@ static u64 xen_read_msr_safe(unsigned int msr, int *err)
static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high)
{
int ret;
+#ifdef CONFIG_X86_64
+ unsigned int which;
+ u64 base;
+#endif
ret = 0;
switch (msr) {
#ifdef CONFIG_X86_64
- unsigned which;
- u64 base;
-
case MSR_FS_BASE: which = SEGBASE_FS; goto set;
case MSR_KERNEL_GS_BASE: which = SEGBASE_GS_USER; goto set;
case MSR_GS_BASE: which = SEGBASE_GS_KERNEL; goto set;
@@ -1046,6 +1069,9 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = {
.write_idt_entry = xen_write_idt_entry,
.load_sp0 = xen_load_sp0,
+#ifdef CONFIG_X86_IOPL_IOPERM
+ .update_io_bitmap = xen_update_io_bitmap,
+#endif
.io_delay = xen_io_delay,
/* Xen takes care of %gs when switching to usermode for us */
diff --git a/arch/xtensa/platforms/iss/simdisk.c b/arch/xtensa/platforms/iss/simdisk.c
index 8331098..49322b66 100644
--- a/arch/xtensa/platforms/iss/simdisk.c
+++ b/arch/xtensa/platforms/iss/simdisk.c
@@ -267,13 +267,12 @@ static int __init simdisk_setup(struct simdisk *dev, int which,
spin_lock_init(&dev->lock);
dev->users = 0;
- dev->queue = blk_alloc_queue(GFP_KERNEL);
+ dev->queue = blk_alloc_queue(simdisk_make_request, NUMA_NO_NODE);
if (dev->queue == NULL) {
pr_err("blk_alloc_queue failed\n");
goto out_alloc_queue;
}
- blk_queue_make_request(dev->queue, simdisk_make_request);
dev->queue->queuedata = dev;
dev->gd = alloc_disk(SIMDISK_MINORS);
diff --git a/block/Makefile b/block/Makefile
index 1a43750..206b96e 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -8,8 +8,7 @@
blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \
blk-lib.o blk-mq.o blk-mq-tag.o blk-stat.o \
blk-mq-sysfs.o blk-mq-cpumap.o blk-mq-sched.o ioctl.o \
- genhd.o partition-generic.o ioprio.o \
- badblocks.o partitions/ blk-rq-qos.o
+ genhd.o ioprio.o badblocks.o partitions/ blk-rq-qos.o
obj-$(CONFIG_BOUNCE) += bounce.o
obj-$(CONFIG_BLK_SCSI_REQUEST) += scsi_ioctl.o
diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c
index 09b69a3..68882b9 100644
--- a/block/bfq-cgroup.c
+++ b/block/bfq-cgroup.c
@@ -610,12 +610,13 @@ struct bfq_group *bfq_find_set_group(struct bfq_data *bfqd,
*/
entity = &bfqg->entity;
for_each_entity(entity) {
- bfqg = container_of(entity, struct bfq_group, entity);
- if (bfqg != bfqd->root_group) {
- parent = bfqg_parent(bfqg);
+ struct bfq_group *curr_bfqg = container_of(entity,
+ struct bfq_group, entity);
+ if (curr_bfqg != bfqd->root_group) {
+ parent = bfqg_parent(curr_bfqg);
if (!parent)
parent = bfqd->root_group;
- bfq_group_set_parent(bfqg, parent);
+ bfq_group_set_parent(curr_bfqg, parent);
}
}
@@ -641,6 +642,12 @@ void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq,
{
struct bfq_entity *entity = &bfqq->entity;
+ /*
+ * Get extra reference to prevent bfqq from being freed in
+ * next possible expire or deactivate.
+ */
+ bfqq->ref++;
+
/* If bfqq is empty, then bfq_bfqq_expire also invokes
* bfq_del_bfqq_busy, thereby removing bfqq and its entity
* from data structures related to current group. Otherwise we
@@ -651,12 +658,6 @@ void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq,
bfq_bfqq_expire(bfqd, bfqd->in_service_queue,
false, BFQQE_PREEMPTED);
- /*
- * get extra reference to prevent bfqq from being freed in
- * next possible deactivate
- */
- bfqq->ref++;
-
if (bfq_bfqq_busy(bfqq))
bfq_deactivate_bfqq(bfqd, bfqq, false, false);
else if (entity->on_st_or_in_serv)
@@ -676,7 +677,7 @@ void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq,
if (!bfqd->in_service_queue && !bfqd->rq_in_driver)
bfq_schedule_dispatch(bfqd);
- /* release extra ref taken above */
+ /* release extra ref taken above, bfqq may happen to be freed now */
bfq_put_queue(bfqq);
}
@@ -713,10 +714,7 @@ static struct bfq_group *__bfq_bic_change_cgroup(struct bfq_data *bfqd,
if (entity->sched_data != &bfqg->sched_data) {
bic_set_bfqq(bic, NULL, 0);
- bfq_log_bfqq(bfqd, async_bfqq,
- "bic_change_group: %p %d",
- async_bfqq, async_bfqq->ref);
- bfq_put_queue(async_bfqq);
+ bfq_release_process_ref(bfqd, async_bfqq);
}
}
@@ -817,39 +815,53 @@ static void bfq_flush_idle_tree(struct bfq_service_tree *st)
/**
* bfq_reparent_leaf_entity - move leaf entity to the root_group.
* @bfqd: the device data structure with the root group.
- * @entity: the entity to move.
+ * @entity: the entity to move, if entity is a leaf; or the parent entity
+ * of an active leaf entity to move, if entity is not a leaf.
*/
static void bfq_reparent_leaf_entity(struct bfq_data *bfqd,
- struct bfq_entity *entity)
+ struct bfq_entity *entity,
+ int ioprio_class)
{
- struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
+ struct bfq_queue *bfqq;
+ struct bfq_entity *child_entity = entity;
+ while (child_entity->my_sched_data) { /* leaf not reached yet */
+ struct bfq_sched_data *child_sd = child_entity->my_sched_data;
+ struct bfq_service_tree *child_st = child_sd->service_tree +
+ ioprio_class;
+ struct rb_root *child_active = &child_st->active;
+
+ child_entity = bfq_entity_of(rb_first(child_active));
+
+ if (!child_entity)
+ child_entity = child_sd->in_service_entity;
+ }
+
+ bfqq = bfq_entity_to_bfqq(child_entity);
bfq_bfqq_move(bfqd, bfqq, bfqd->root_group);
}
/**
- * bfq_reparent_active_entities - move to the root group all active
- * entities.
+ * bfq_reparent_active_queues - move to the root group all active queues.
* @bfqd: the device data structure with the root group.
* @bfqg: the group to move from.
- * @st: the service tree with the entities.
+ * @st: the service tree to start the search from.
*/
-static void bfq_reparent_active_entities(struct bfq_data *bfqd,
- struct bfq_group *bfqg,
- struct bfq_service_tree *st)
+static void bfq_reparent_active_queues(struct bfq_data *bfqd,
+ struct bfq_group *bfqg,
+ struct bfq_service_tree *st,
+ int ioprio_class)
{
struct rb_root *active = &st->active;
- struct bfq_entity *entity = NULL;
+ struct bfq_entity *entity;
- if (!RB_EMPTY_ROOT(&st->active))
- entity = bfq_entity_of(rb_first(active));
-
- for (; entity ; entity = bfq_entity_of(rb_first(active)))
- bfq_reparent_leaf_entity(bfqd, entity);
+ while ((entity = bfq_entity_of(rb_first(active))))
+ bfq_reparent_leaf_entity(bfqd, entity, ioprio_class);
if (bfqg->sched_data.in_service_entity)
bfq_reparent_leaf_entity(bfqd,
- bfqg->sched_data.in_service_entity);
+ bfqg->sched_data.in_service_entity,
+ ioprio_class);
}
/**
@@ -882,13 +894,6 @@ static void bfq_pd_offline(struct blkg_policy_data *pd)
st = bfqg->sched_data.service_tree + i;
/*
- * The idle tree may still contain bfq_queues belonging
- * to exited task because they never migrated to a different
- * cgroup from the one being destroyed now.
- */
- bfq_flush_idle_tree(st);
-
- /*
* It may happen that some queues are still active
* (busy) upon group destruction (if the corresponding
* processes have been forced to terminate). We move
@@ -900,7 +905,20 @@ static void bfq_pd_offline(struct blkg_policy_data *pd)
* There is no need to put the sync queues, as the
* scheduler has taken no reference.
*/
- bfq_reparent_active_entities(bfqd, bfqg, st);
+ bfq_reparent_active_queues(bfqd, bfqg, st, i);
+
+ /*
+ * The idle tree may still contain bfq_queues
+ * belonging to exited task because they never
+ * migrated to a different cgroup from the one being
+ * destroyed now. In addition, even
+ * bfq_reparent_active_queues() may happen to add some
+ * entities to the idle tree. It happens if, in some
+ * of the calls to bfq_bfqq_move() performed by
+ * bfq_reparent_active_queues(), the queue to move is
+ * empty and gets expired.
+ */
+ bfq_flush_idle_tree(st);
}
__bfq_deactivate_entity(entity, false);
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index 8c436ab..78ba57e 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -2716,8 +2716,6 @@ static void bfq_bfqq_save_state(struct bfq_queue *bfqq)
}
}
-
-static
void bfq_release_process_ref(struct bfq_data *bfqd, struct bfq_queue *bfqq)
{
/*
@@ -6215,20 +6213,28 @@ static struct bfq_queue *bfq_init_rq(struct request *rq)
return bfqq;
}
-static void bfq_idle_slice_timer_body(struct bfq_queue *bfqq)
+static void
+bfq_idle_slice_timer_body(struct bfq_data *bfqd, struct bfq_queue *bfqq)
{
- struct bfq_data *bfqd = bfqq->bfqd;
enum bfqq_expiration reason;
unsigned long flags;
spin_lock_irqsave(&bfqd->lock, flags);
- bfq_clear_bfqq_wait_request(bfqq);
+ /*
+ * Considering that bfqq may be in race, we should firstly check
+ * whether bfqq is in service before doing something on it. If
+ * the bfqq in race is not in service, it has already been expired
+ * through __bfq_bfqq_expire func and its wait_request flags has
+ * been cleared in __bfq_bfqd_reset_in_service func.
+ */
if (bfqq != bfqd->in_service_queue) {
spin_unlock_irqrestore(&bfqd->lock, flags);
return;
}
+ bfq_clear_bfqq_wait_request(bfqq);
+
if (bfq_bfqq_budget_timeout(bfqq))
/*
* Also here the queue can be safely expired
@@ -6273,7 +6279,7 @@ static enum hrtimer_restart bfq_idle_slice_timer(struct hrtimer *timer)
* early.
*/
if (bfqq)
- bfq_idle_slice_timer_body(bfqq);
+ bfq_idle_slice_timer_body(bfqd, bfqq);
return HRTIMER_NORESTART;
}
diff --git a/block/bfq-iosched.h b/block/bfq-iosched.h
index d1233af..cd224aa 100644
--- a/block/bfq-iosched.h
+++ b/block/bfq-iosched.h
@@ -955,6 +955,7 @@ void bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq,
bool compensate, enum bfqq_expiration reason);
void bfq_put_queue(struct bfq_queue *bfqq);
void bfq_end_wr_async_queues(struct bfq_data *bfqd, struct bfq_group *bfqg);
+void bfq_release_process_ref(struct bfq_data *bfqd, struct bfq_queue *bfqq);
void bfq_schedule_dispatch(struct bfq_data *bfqd);
void bfq_put_async_queues(struct bfq_data *bfqd, struct bfq_group *bfqg);
diff --git a/block/bio.c b/block/bio.c
index 94d6972..21cbaa6 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -17,6 +17,7 @@
#include <linux/cgroup.h>
#include <linux/blk-cgroup.h>
#include <linux/highmem.h>
+#include <linux/sched/sysctl.h>
#include <trace/events/block.h>
#include "blk.h"
@@ -588,6 +589,49 @@ void bio_truncate(struct bio *bio, unsigned new_size)
}
/**
+ * guard_bio_eod - truncate a BIO to fit the block device
+ * @bio: bio to truncate
+ *
+ * This allows us to do IO even on the odd last sectors of a device, even if the
+ * block size is some multiple of the physical sector size.
+ *
+ * We'll just truncate the bio to the size of the device, and clear the end of
+ * the buffer head manually. Truly out-of-range accesses will turn into actual
+ * I/O errors, this only handles the "we need to be able to do I/O at the final
+ * sector" case.
+ */
+void guard_bio_eod(struct bio *bio)
+{
+ sector_t maxsector;
+ struct hd_struct *part;
+
+ rcu_read_lock();
+ part = __disk_get_part(bio->bi_disk, bio->bi_partno);
+ if (part)
+ maxsector = part_nr_sects_read(part);
+ else
+ maxsector = get_capacity(bio->bi_disk);
+ rcu_read_unlock();
+
+ if (!maxsector)
+ return;
+
+ /*
+ * If the *whole* IO is past the end of the device,
+ * let it through, and the IO layer will turn it into
+ * an EIO.
+ */
+ if (unlikely(bio->bi_iter.bi_sector >= maxsector))
+ return;
+
+ maxsector -= bio->bi_iter.bi_sector;
+ if (likely((bio->bi_iter.bi_size >> 9) <= maxsector))
+ return;
+
+ bio_truncate(bio, maxsector << 9);
+}
+
+/**
* bio_put - release a reference to a bio
* @bio: bio to release reference to
*
@@ -679,6 +723,12 @@ struct bio *bio_clone_fast(struct bio *bio, gfp_t gfp_mask, struct bio_set *bs)
}
EXPORT_SYMBOL(bio_clone_fast);
+const char *bio_devname(struct bio *bio, char *buf)
+{
+ return disk_name(bio->bi_disk, bio->bi_partno, buf);
+}
+EXPORT_SYMBOL(bio_devname);
+
static inline bool page_is_mergeable(const struct bio_vec *bv,
struct page *page, unsigned int len, unsigned int off,
bool *same_page)
@@ -730,7 +780,7 @@ static bool bio_try_merge_pc_page(struct request_queue *q, struct bio *bio,
*
* This should only be used by passthrough bios.
*/
-static int __bio_add_pc_page(struct request_queue *q, struct bio *bio,
+int __bio_add_pc_page(struct request_queue *q, struct bio *bio,
struct page *page, unsigned int len, unsigned int offset,
bool *same_page)
{
@@ -1019,12 +1069,21 @@ static void submit_bio_wait_endio(struct bio *bio)
int submit_bio_wait(struct bio *bio)
{
DECLARE_COMPLETION_ONSTACK_MAP(done, bio->bi_disk->lockdep_map);
+ unsigned long hang_check;
bio->bi_private = &done;
bio->bi_end_io = submit_bio_wait_endio;
bio->bi_opf |= REQ_SYNC;
submit_bio(bio);
- wait_for_completion_io(&done);
+
+ /* Prevent hang_check timer from firing at us during very long I/O */
+ hang_check = sysctl_hung_task_timeout_secs;
+ if (hang_check)
+ while (!wait_for_completion_io_timeout(&done,
+ hang_check * (HZ/2)))
+ ;
+ else
+ wait_for_completion_io(&done);
return blk_status_to_errno(bio->bi_status);
}
@@ -1135,90 +1194,6 @@ void bio_list_copy_data(struct bio *dst, struct bio *src)
}
EXPORT_SYMBOL(bio_list_copy_data);
-struct bio_map_data {
- int is_our_pages;
- struct iov_iter iter;
- struct iovec iov[];
-};
-
-static struct bio_map_data *bio_alloc_map_data(struct iov_iter *data,
- gfp_t gfp_mask)
-{
- struct bio_map_data *bmd;
- if (data->nr_segs > UIO_MAXIOV)
- return NULL;
-
- bmd = kmalloc(struct_size(bmd, iov, data->nr_segs), gfp_mask);
- if (!bmd)
- return NULL;
- memcpy(bmd->iov, data->iov, sizeof(struct iovec) * data->nr_segs);
- bmd->iter = *data;
- bmd->iter.iov = bmd->iov;
- return bmd;
-}
-
-/**
- * bio_copy_from_iter - copy all pages from iov_iter to bio
- * @bio: The &struct bio which describes the I/O as destination
- * @iter: iov_iter as source
- *
- * Copy all pages from iov_iter to bio.
- * Returns 0 on success, or error on failure.
- */
-static int bio_copy_from_iter(struct bio *bio, struct iov_iter *iter)
-{
- struct bio_vec *bvec;
- struct bvec_iter_all iter_all;
-
- bio_for_each_segment_all(bvec, bio, iter_all) {
- ssize_t ret;
-
- ret = copy_page_from_iter(bvec->bv_page,
- bvec->bv_offset,
- bvec->bv_len,
- iter);
-
- if (!iov_iter_count(iter))
- break;
-
- if (ret < bvec->bv_len)
- return -EFAULT;
- }
-
- return 0;
-}
-
-/**
- * bio_copy_to_iter - copy all pages from bio to iov_iter
- * @bio: The &struct bio which describes the I/O as source
- * @iter: iov_iter as destination
- *
- * Copy all pages from bio to iov_iter.
- * Returns 0 on success, or error on failure.
- */
-static int bio_copy_to_iter(struct bio *bio, struct iov_iter iter)
-{
- struct bio_vec *bvec;
- struct bvec_iter_all iter_all;
-
- bio_for_each_segment_all(bvec, bio, iter_all) {
- ssize_t ret;
-
- ret = copy_page_to_iter(bvec->bv_page,
- bvec->bv_offset,
- bvec->bv_len,
- &iter);
-
- if (!iov_iter_count(&iter))
- break;
-
- if (ret < bvec->bv_len)
- return -EFAULT;
- }
-
- return 0;
-}
-
void bio_free_pages(struct bio *bio)
{
struct bio_vec *bvec;
@@ -1229,430 +1204,6 @@ void bio_free_pages(struct bio *bio)
}
EXPORT_SYMBOL(bio_free_pages);
-/**
- * bio_uncopy_user - finish previously mapped bio
- * @bio: bio being terminated
- *
- * Free pages allocated from bio_copy_user_iov() and write back data
- * to user space in case of a read.
- */
-int bio_uncopy_user(struct bio *bio)
-{
- struct bio_map_data *bmd = bio->bi_private;
- int ret = 0;
-
- if (!bio_flagged(bio, BIO_NULL_MAPPED)) {
- /*
- * if we're in a workqueue, the request is orphaned, so
- * don't copy into a random user address space, just free
- * and return -EINTR so user space doesn't expect any data.
- */
- if (!current->mm)
- ret = -EINTR;
- else if (bio_data_dir(bio) == READ)
- ret = bio_copy_to_iter(bio, bmd->iter);
- if (bmd->is_our_pages)
- bio_free_pages(bio);
- }
- kfree(bmd);
- bio_put(bio);
- return ret;
-}
-
-/**
- * bio_copy_user_iov - copy user data to bio
- * @q: destination block queue
- * @map_data: pointer to the rq_map_data holding pages (if necessary)
- * @iter: iovec iterator
- * @gfp_mask: memory allocation flags
- *
- * Prepares and returns a bio for indirect user io, bouncing data
- * to/from kernel pages as necessary. Must be paired with
- * call bio_uncopy_user() on io completion.
- */
-struct bio *bio_copy_user_iov(struct request_queue *q,
- struct rq_map_data *map_data,
- struct iov_iter *iter,
- gfp_t gfp_mask)
-{
- struct bio_map_data *bmd;
- struct page *page;
- struct bio *bio;
- int i = 0, ret;
- int nr_pages;
- unsigned int len = iter->count;
- unsigned int offset = map_data ? offset_in_page(map_data->offset) : 0;
-
- bmd = bio_alloc_map_data(iter, gfp_mask);
- if (!bmd)
- return ERR_PTR(-ENOMEM);
-
- /*
- * We need to do a deep copy of the iov_iter including the iovecs.
- * The caller provided iov might point to an on-stack or otherwise
- * shortlived one.
- */
- bmd->is_our_pages = map_data ? 0 : 1;
-
- nr_pages = DIV_ROUND_UP(offset + len, PAGE_SIZE);
- if (nr_pages > BIO_MAX_PAGES)
- nr_pages = BIO_MAX_PAGES;
-
- ret = -ENOMEM;
- bio = bio_kmalloc(gfp_mask, nr_pages);
- if (!bio)
- goto out_bmd;
-
- ret = 0;
-
- if (map_data) {
- nr_pages = 1 << map_data->page_order;
- i = map_data->offset / PAGE_SIZE;
- }
- while (len) {
- unsigned int bytes = PAGE_SIZE;
-
- bytes -= offset;
-
- if (bytes > len)
- bytes = len;
-
- if (map_data) {
- if (i == map_data->nr_entries * nr_pages) {
- ret = -ENOMEM;
- break;
- }
-
- page = map_data->pages[i / nr_pages];
- page += (i % nr_pages);
-
- i++;
- } else {
- page = alloc_page(q->bounce_gfp | gfp_mask);
- if (!page) {
- ret = -ENOMEM;
- break;
- }
- }
-
- if (bio_add_pc_page(q, bio, page, bytes, offset) < bytes) {
- if (!map_data)
- __free_page(page);
- break;
- }
-
- len -= bytes;
- offset = 0;
- }
-
- if (ret)
- goto cleanup;
-
- if (map_data)
- map_data->offset += bio->bi_iter.bi_size;
-
- /*
- * success
- */
- if ((iov_iter_rw(iter) == WRITE && (!map_data || !map_data->null_mapped)) ||
- (map_data && map_data->from_user)) {
- ret = bio_copy_from_iter(bio, iter);
- if (ret)
- goto cleanup;
- } else {
- if (bmd->is_our_pages)
- zero_fill_bio(bio);
- iov_iter_advance(iter, bio->bi_iter.bi_size);
- }
-
- bio->bi_private = bmd;
- if (map_data && map_data->null_mapped)
- bio_set_flag(bio, BIO_NULL_MAPPED);
- return bio;
-cleanup:
- if (!map_data)
- bio_free_pages(bio);
- bio_put(bio);
-out_bmd:
- kfree(bmd);
- return ERR_PTR(ret);
-}
-
-/**
- * bio_map_user_iov - map user iovec into bio
- * @q: the struct request_queue for the bio
- * @iter: iovec iterator
- * @gfp_mask: memory allocation flags
- *
- * Map the user space address into a bio suitable for io to a block
- * device. Returns an error pointer in case of error.
- */
-struct bio *bio_map_user_iov(struct request_queue *q,
- struct iov_iter *iter,
- gfp_t gfp_mask)
-{
- int j;
- struct bio *bio;
- int ret;
-
- if (!iov_iter_count(iter))
- return ERR_PTR(-EINVAL);
-
- bio = bio_kmalloc(gfp_mask, iov_iter_npages(iter, BIO_MAX_PAGES));
- if (!bio)
- return ERR_PTR(-ENOMEM);
-
- while (iov_iter_count(iter)) {
- struct page **pages;
- ssize_t bytes;
- size_t offs, added = 0;
- int npages;
-
- bytes = iov_iter_get_pages_alloc(iter, &pages, LONG_MAX, &offs);
- if (unlikely(bytes <= 0)) {
- ret = bytes ? bytes : -EFAULT;
- goto out_unmap;
- }
-
- npages = DIV_ROUND_UP(offs + bytes, PAGE_SIZE);
-
- if (unlikely(offs & queue_dma_alignment(q))) {
- ret = -EINVAL;
- j = 0;
- } else {
- for (j = 0; j < npages; j++) {
- struct page *page = pages[j];
- unsigned int n = PAGE_SIZE - offs;
- bool same_page = false;
-
- if (n > bytes)
- n = bytes;
-
- if (!__bio_add_pc_page(q, bio, page, n, offs,
- &same_page)) {
- if (same_page)
- put_page(page);
- break;
- }
-
- added += n;
- bytes -= n;
- offs = 0;
- }
- iov_iter_advance(iter, added);
- }
- /*
- * release the pages we didn't map into the bio, if any
- */
- while (j < npages)
- put_page(pages[j++]);
- kvfree(pages);
- /* couldn't stuff something into bio? */
- if (bytes)
- break;
- }
-
- bio_set_flag(bio, BIO_USER_MAPPED);
-
- /*
- * subtle -- if bio_map_user_iov() ended up bouncing a bio,
- * it would normally disappear when its bi_end_io is run.
- * however, we need it for the unmap, so grab an extra
- * reference to it
- */
- bio_get(bio);
- return bio;
-
- out_unmap:
- bio_release_pages(bio, false);
- bio_put(bio);
- return ERR_PTR(ret);
-}
-
-/**
- * bio_unmap_user - unmap a bio
- * @bio: the bio being unmapped
- *
- * Unmap a bio previously mapped by bio_map_user_iov(). Must be called from
- * process context.
- *
- * bio_unmap_user() may sleep.
- */
-void bio_unmap_user(struct bio *bio)
-{
- bio_release_pages(bio, bio_data_dir(bio) == READ);
- bio_put(bio);
- bio_put(bio);
-}
-
-static void bio_invalidate_vmalloc_pages(struct bio *bio)
-{
-#ifdef ARCH_HAS_FLUSH_KERNEL_DCACHE_PAGE
- if (bio->bi_private && !op_is_write(bio_op(bio))) {
- unsigned long i, len = 0;
-
- for (i = 0; i < bio->bi_vcnt; i++)
- len += bio->bi_io_vec[i].bv_len;
- invalidate_kernel_vmap_range(bio->bi_private, len);
- }
-#endif
-}
-
-static void bio_map_kern_endio(struct bio *bio)
-{
- bio_invalidate_vmalloc_pages(bio);
- bio_put(bio);
-}
-
-/**
- * bio_map_kern - map kernel address into bio
- * @q: the struct request_queue for the bio
- * @data: pointer to buffer to map
- * @len: length in bytes
- * @gfp_mask: allocation flags for bio allocation
- *
- * Map the kernel address into a bio suitable for io to a block
- * device. Returns an error pointer in case of error.
- */
-struct bio *bio_map_kern(struct request_queue *q, void *data, unsigned int len,
- gfp_t gfp_mask)
-{
- unsigned long kaddr = (unsigned long)data;
- unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
- unsigned long start = kaddr >> PAGE_SHIFT;
- const int nr_pages = end - start;
- bool is_vmalloc = is_vmalloc_addr(data);
- struct page *page;
- int offset, i;
- struct bio *bio;
-
- bio = bio_kmalloc(gfp_mask, nr_pages);
- if (!bio)
- return ERR_PTR(-ENOMEM);
-
- if (is_vmalloc) {
- flush_kernel_vmap_range(data, len);
- bio->bi_private = data;
- }
-
- offset = offset_in_page(kaddr);
- for (i = 0; i < nr_pages; i++) {
- unsigned int bytes = PAGE_SIZE - offset;
-
- if (len <= 0)
- break;
-
- if (bytes > len)
- bytes = len;
-
- if (!is_vmalloc)
- page = virt_to_page(data);
- else
- page = vmalloc_to_page(data);
- if (bio_add_pc_page(q, bio, page, bytes,
- offset) < bytes) {
- /* we don't support partial mappings */
- bio_put(bio);
- return ERR_PTR(-EINVAL);
- }
-
- data += bytes;
- len -= bytes;
- offset = 0;
- }
-
- bio->bi_end_io = bio_map_kern_endio;
- return bio;
-}
-
-static void bio_copy_kern_endio(struct bio *bio)
-{
- bio_free_pages(bio);
- bio_put(bio);
-}
-
-static void bio_copy_kern_endio_read(struct bio *bio)
-{
- char *p = bio->bi_private;
- struct bio_vec *bvec;
- struct bvec_iter_all iter_all;
-
- bio_for_each_segment_all(bvec, bio, iter_all) {
- memcpy(p, page_address(bvec->bv_page), bvec->bv_len);
- p += bvec->bv_len;
- }
-
- bio_copy_kern_endio(bio);
-}
-
-/**
- * bio_copy_kern - copy kernel address into bio
- * @q: the struct request_queue for the bio
- * @data: pointer to buffer to copy
- * @len: length in bytes
- * @gfp_mask: allocation flags for bio and page allocation
- * @reading: data direction is READ
- *
- * copy the kernel address into a bio suitable for io to a block
- * device. Returns an error pointer in case of error.
- */
-struct bio *bio_copy_kern(struct request_queue *q, void *data, unsigned int len,
- gfp_t gfp_mask, int reading)
-{
- unsigned long kaddr = (unsigned long)data;
- unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
- unsigned long start = kaddr >> PAGE_SHIFT;
- struct bio *bio;
- void *p = data;
- int nr_pages = 0;
-
- /*
- * Overflow, abort
- */
- if (end < start)
- return ERR_PTR(-EINVAL);
-
- nr_pages = end - start;
- bio = bio_kmalloc(gfp_mask, nr_pages);
- if (!bio)
- return ERR_PTR(-ENOMEM);
-
- while (len) {
- struct page *page;
- unsigned int bytes = PAGE_SIZE;
-
- if (bytes > len)
- bytes = len;
-
- page = alloc_page(q->bounce_gfp | gfp_mask);
- if (!page)
- goto cleanup;
-
- if (!reading)
- memcpy(page_address(page), p, bytes);
-
- if (bio_add_pc_page(q, bio, page, bytes, 0) < bytes)
- break;
-
- len -= bytes;
- p += bytes;
- }
-
- if (reading) {
- bio->bi_end_io = bio_copy_kern_endio_read;
- bio->bi_private = data;
- } else {
- bio->bi_end_io = bio_copy_kern_endio;
- }
-
- return bio;
-
-cleanup:
- bio_free_pages(bio);
- bio_put(bio);
- return ERR_PTR(-ENOMEM);
-}
-
/*
* bio_set_pages_dirty() and bio_check_pages_dirty() are support functions
* for performing direct-IO in BIOs.
@@ -1752,14 +1303,14 @@ void bio_check_pages_dirty(struct bio *bio)
schedule_work(&bio_dirty_work);
}
-void update_io_ticks(struct hd_struct *part, unsigned long now)
+void update_io_ticks(struct hd_struct *part, unsigned long now, bool end)
{
unsigned long stamp;
again:
stamp = READ_ONCE(part->stamp);
if (unlikely(stamp != now)) {
if (likely(cmpxchg(&part->stamp, stamp, now) == stamp)) {
- __part_stat_add(part, io_ticks, 1);
+ __part_stat_add(part, io_ticks, end ? now - stamp : 1);
}
}
if (part->partno) {
@@ -1775,7 +1326,7 @@ void generic_start_io_acct(struct request_queue *q, int op,
part_stat_lock();
- update_io_ticks(part, jiffies);
+ update_io_ticks(part, jiffies, false);
part_stat_inc(part, ios[sgrp]);
part_stat_add(part, sectors[sgrp], sectors);
part_inc_in_flight(q, part, op_is_write(op));
@@ -1793,9 +1344,8 @@ void generic_end_io_acct(struct request_queue *q, int req_op,
part_stat_lock();
- update_io_ticks(part, now);
+ update_io_ticks(part, now, true);
part_stat_add(part, nsecs[sgrp], jiffies_to_nsecs(duration));
- part_stat_add(part, time_in_queue, duration);
part_dec_in_flight(q, part, op_is_write(req_op));
part_stat_unlock();
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index a229b94..c15a260 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -1010,7 +1010,7 @@ blkcg_css_alloc(struct cgroup_subsys_state *parent_css)
* blkcg_init_queue - initialize blkcg part of request queue
* @q: request_queue to initialize
*
- * Called from blk_alloc_queue_node(). Responsible for initializing blkcg
+ * Called from __blk_alloc_queue(). Responsible for initializing blkcg
* part of new request_queue @q.
*
* RETURNS:
diff --git a/block/blk-core.c b/block/blk-core.c
index 089e890..7e4a1da 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -346,7 +346,6 @@ void blk_cleanup_queue(struct request_queue *q)
blk_queue_flag_set(QUEUE_FLAG_NOMERGES, q);
blk_queue_flag_set(QUEUE_FLAG_NOXMERGES, q);
- blk_queue_flag_set(QUEUE_FLAG_DYING, q);
/*
* Drain all requests queued before DYING marking. Set DEAD flag to
@@ -389,12 +388,6 @@ void blk_cleanup_queue(struct request_queue *q)
}
EXPORT_SYMBOL(blk_cleanup_queue);
-struct request_queue *blk_alloc_queue(gfp_t gfp_mask)
-{
- return blk_alloc_queue_node(gfp_mask, NUMA_NO_NODE);
-}
-EXPORT_SYMBOL(blk_alloc_queue);
-
/**
* blk_queue_enter() - try to increase q->q_usage_counter
* @q: request queue pointer
@@ -471,24 +464,19 @@ static void blk_timeout_work(struct work_struct *work)
{
}
-/**
- * blk_alloc_queue_node - allocate a request queue
- * @gfp_mask: memory allocation flags
- * @node_id: NUMA node to allocate memory from
- */
-struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
+struct request_queue *__blk_alloc_queue(int node_id)
{
struct request_queue *q;
int ret;
q = kmem_cache_alloc_node(blk_requestq_cachep,
- gfp_mask | __GFP_ZERO, node_id);
+ GFP_KERNEL | __GFP_ZERO, node_id);
if (!q)
return NULL;
q->last_merge = NULL;
- q->id = ida_simple_get(&blk_queue_ida, 0, 0, gfp_mask);
+ q->id = ida_simple_get(&blk_queue_ida, 0, 0, GFP_KERNEL);
if (q->id < 0)
goto fail_q;
@@ -496,7 +484,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
if (ret)
goto fail_id;
- q->backing_dev_info = bdi_alloc_node(gfp_mask, node_id);
+ q->backing_dev_info = bdi_alloc_node(GFP_KERNEL, node_id);
if (!q->backing_dev_info)
goto fail_split;
@@ -542,6 +530,9 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
if (blkcg_init_queue(q))
goto fail_ref;
+ blk_queue_dma_alignment(q, 511);
+ blk_set_default_limits(&q->limits);
+
return q;
fail_ref:
@@ -558,7 +549,22 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
kmem_cache_free(blk_requestq_cachep, q);
return NULL;
}
-EXPORT_SYMBOL(blk_alloc_queue_node);
+
+struct request_queue *blk_alloc_queue(make_request_fn make_request, int node_id)
+{
+ struct request_queue *q;
+
+ if (WARN_ON_ONCE(!make_request))
+ return NULL;
+
+ q = __blk_alloc_queue(node_id);
+ if (!q)
+ return NULL;
+ q->make_request_fn = make_request;
+ q->nr_requests = BLKDEV_MAX_RQ;
+ return q;
+}
+EXPORT_SYMBOL(blk_alloc_queue);
bool blk_get_queue(struct request_queue *q)
{
@@ -1121,10 +1127,9 @@ blk_qc_t direct_make_request(struct bio *bio)
if (unlikely(blk_queue_enter(q, nowait ? BLK_MQ_REQ_NOWAIT : 0))) {
if (nowait && !blk_queue_dying(q))
- bio->bi_status = BLK_STS_AGAIN;
+ bio_wouldblock_error(bio);
else
- bio->bi_status = BLK_STS_IOERR;
- bio_endio(bio);
+ bio_io_error(bio);
return BLK_QC_T_NONE;
}
@@ -1203,7 +1208,7 @@ EXPORT_SYMBOL(submit_bio);
/**
* blk_cloned_rq_check_limits - Helper function to check a cloned request
- * for new the queue limits
+ * for the new queue limits
* @q: the queue
* @rq: the request being checked
*
@@ -1339,10 +1344,9 @@ void blk_account_io_done(struct request *req, u64 now)
part_stat_lock();
part = req->part;
- update_io_ticks(part, jiffies);
+ update_io_ticks(part, jiffies, true);
part_stat_inc(part, ios[sgrp]);
part_stat_add(part, nsecs[sgrp], now - req->start_time_ns);
- part_stat_add(part, time_in_queue, nsecs_to_jiffies64(now - req->start_time_ns));
part_dec_in_flight(req->q, part, rq_data_dir(req));
hd_struct_put(part);
@@ -1381,7 +1385,7 @@ void blk_account_io_start(struct request *rq, bool new_io)
rq->part = part;
}
- update_io_ticks(part, jiffies);
+ update_io_ticks(part, jiffies, false);
part_stat_unlock();
}
@@ -1583,23 +1587,6 @@ void blk_rq_unprep_clone(struct request *rq)
}
EXPORT_SYMBOL_GPL(blk_rq_unprep_clone);
-/*
- * Copy attributes of the original request to the clone request.
- * The actual data parts (e.g. ->cmd, ->sense) are not copied.
- */
-static void __blk_rq_prep_clone(struct request *dst, struct request *src)
-{
- dst->__sector = blk_rq_pos(src);
- dst->__data_len = blk_rq_bytes(src);
- if (src->rq_flags & RQF_SPECIAL_PAYLOAD) {
- dst->rq_flags |= RQF_SPECIAL_PAYLOAD;
- dst->special_vec = src->special_vec;
- }
- dst->nr_phys_segments = src->nr_phys_segments;
- dst->ioprio = src->ioprio;
- dst->extra_len = src->extra_len;
-}
-
/**
* blk_rq_prep_clone - Helper function to setup clone request
* @rq: the request to be setup
@@ -1612,8 +1599,6 @@ static void __blk_rq_prep_clone(struct request *dst, struct request *src)
*
* Description:
* Clones bios in @rq_src to @rq, and copies attributes of @rq_src to @rq.
- * The actual data parts of @rq_src (e.g. ->cmd, ->sense)
- * are not copied, and copying such parts is the caller's responsibility.
* Also, pages which the original bios are pointing to are not copied
* and the cloned bios just point same pages.
* So cloned bios must be completed before original bios, which means
@@ -1644,7 +1629,16 @@ int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
rq->bio = rq->biotail = bio;
}
- __blk_rq_prep_clone(rq, rq_src);
+ /* Copy attributes of the original request to the clone request. */
+ rq->__sector = blk_rq_pos(rq_src);
+ rq->__data_len = blk_rq_bytes(rq_src);
+ if (rq_src->rq_flags & RQF_SPECIAL_PAYLOAD) {
+ rq->rq_flags |= RQF_SPECIAL_PAYLOAD;
+ rq->special_vec = rq_src->special_vec;
+ }
+ rq->nr_phys_segments = rq_src->nr_phys_segments;
+ rq->ioprio = rq_src->ioprio;
+ rq->extra_len = rq_src->extra_len;
return 0;
@@ -1663,12 +1657,6 @@ int kblockd_schedule_work(struct work_struct *work)
}
EXPORT_SYMBOL(kblockd_schedule_work);
-int kblockd_schedule_work_on(int cpu, struct work_struct *work)
-{
- return queue_work_on(cpu, kblockd_workqueue, work);
-}
-EXPORT_SYMBOL(kblockd_schedule_work_on);
-
int kblockd_mod_delayed_work_on(int cpu, struct delayed_work *dwork,
unsigned long delay)
{
diff --git a/block/blk-flush.c b/block/blk-flush.c
index 3f977c5..c7f396e 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -160,9 +160,6 @@ static void blk_account_io_flush(struct request *rq)
*
* CONTEXT:
* spin_lock_irq(fq->mq_flush_lock)
- *
- * RETURNS:
- * %true if requests were added to the dispatch queue, %false otherwise.
*/
static void blk_flush_complete_seq(struct request *rq,
struct blk_flush_queue *fq,
@@ -412,7 +409,7 @@ void blk_insert_flush(struct request *rq)
*/
if ((policy & REQ_FSEQ_DATA) &&
!(policy & (REQ_FSEQ_PREFLUSH | REQ_FSEQ_POSTFLUSH))) {
- blk_mq_request_bypass_insert(rq, false);
+ blk_mq_request_bypass_insert(rq, false, false);
return;
}
@@ -457,15 +454,6 @@ int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask,
if (!q)
return -ENXIO;
- /*
- * some block devices may not have their queue correctly set up here
- * (e.g. loop device without a backing file) and so issuing a flush
- * here will panic. Ensure there is a request function before issuing
- * the flush.
- */
- if (!q->make_request_fn)
- return -ENXIO;
-
bio = bio_alloc(gfp_mask, 0);
bio_set_dev(bio, bdev);
bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
@@ -485,8 +473,8 @@ int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask,
}
EXPORT_SYMBOL(blkdev_issue_flush);
-struct blk_flush_queue *blk_alloc_flush_queue(struct request_queue *q,
- int node, int cmd_size, gfp_t flags)
+struct blk_flush_queue *blk_alloc_flush_queue(int node, int cmd_size,
+ gfp_t flags)
{
struct blk_flush_queue *fq;
int rq_sz = sizeof(struct request);
diff --git a/block/blk-ioc.c b/block/blk-ioc.c
index 5ed59ac..9df50fb 100644
--- a/block/blk-ioc.c
+++ b/block/blk-ioc.c
@@ -84,6 +84,7 @@ static void ioc_destroy_icq(struct io_cq *icq)
* making it impossible to determine icq_cache. Record it in @icq.
*/
icq->__rcu_icq_cache = et->icq_cache;
+ icq->flags |= ICQ_DESTROYED;
call_rcu(&icq->__rcu_head, icq_free_icq_rcu);
}
@@ -212,15 +213,21 @@ static void __ioc_clear_queue(struct list_head *icq_list)
{
unsigned long flags;
+ rcu_read_lock();
while (!list_empty(icq_list)) {
struct io_cq *icq = list_entry(icq_list->next,
struct io_cq, q_node);
struct io_context *ioc = icq->ioc;
spin_lock_irqsave(&ioc->lock, flags);
+ if (icq->flags & ICQ_DESTROYED) {
+ spin_unlock_irqrestore(&ioc->lock, flags);
+ continue;
+ }
ioc_destroy_icq(icq);
spin_unlock_irqrestore(&ioc->lock, flags);
}
+ rcu_read_unlock();
}
/**
diff --git a/block/blk-iocost.c b/block/blk-iocost.c
index 27ca686..db35ee6 100644
--- a/block/blk-iocost.c
+++ b/block/blk-iocost.c
@@ -46,9 +46,6 @@
* If needed, tools/cgroup/iocost_coef_gen.py can be used to generate
* device-specific coefficients.
*
- * If needed, tools/cgroup/iocost_coef_gen.py can be used to generate
- * device-specific coefficients.
- *
* 2. Control Strategy
*
* The device virtual time (vtime) is used as the primary control metric.
@@ -1318,7 +1315,7 @@ static bool iocg_is_idle(struct ioc_gq *iocg)
return false;
/* is something in flight? */
- if (atomic64_read(&iocg->done_vtime) < atomic64_read(&iocg->vtime))
+ if (atomic64_read(&iocg->done_vtime) != atomic64_read(&iocg->vtime))
return false;
return true;
diff --git a/block/blk-map.c b/block/blk-map.c
index b079026..b72c361 100644
--- a/block/blk-map.c
+++ b/block/blk-map.c
@@ -11,6 +11,514 @@
#include "blk.h"
+struct bio_map_data {
+ int is_our_pages;
+ struct iov_iter iter;
+ struct iovec iov[];
+};
+
+static struct bio_map_data *bio_alloc_map_data(struct iov_iter *data,
+ gfp_t gfp_mask)
+{
+ struct bio_map_data *bmd;
+
+ if (data->nr_segs > UIO_MAXIOV)
+ return NULL;
+
+ bmd = kmalloc(struct_size(bmd, iov, data->nr_segs), gfp_mask);
+ if (!bmd)
+ return NULL;
+ memcpy(bmd->iov, data->iov, sizeof(struct iovec) * data->nr_segs);
+ bmd->iter = *data;
+ bmd->iter.iov = bmd->iov;
+ return bmd;
+}
+
+/**
+ * bio_copy_from_iter - copy all pages from iov_iter to bio
+ * @bio: The &struct bio which describes the I/O as destination
+ * @iter: iov_iter as source
+ *
+ * Copy all pages from iov_iter to bio.
+ * Returns 0 on success, or error on failure.
+ */
+static int bio_copy_from_iter(struct bio *bio, struct iov_iter *iter)
+{
+ struct bio_vec *bvec;
+ struct bvec_iter_all iter_all;
+
+ bio_for_each_segment_all(bvec, bio, iter_all) {
+ ssize_t ret;
+
+ ret = copy_page_from_iter(bvec->bv_page,
+ bvec->bv_offset,
+ bvec->bv_len,
+ iter);
+
+ if (!iov_iter_count(iter))
+ break;
+
+ if (ret < bvec->bv_len)
+ return -EFAULT;
+ }
+
+ return 0;
+}
+
+/**
+ * bio_copy_to_iter - copy all pages from bio to iov_iter
+ * @bio: The &struct bio which describes the I/O as source
+ * @iter: iov_iter as destination
+ *
+ * Copy all pages from bio to iov_iter.
+ * Returns 0 on success, or error on failure.
+ */
+static int bio_copy_to_iter(struct bio *bio, struct iov_iter iter)
+{
+ struct bio_vec *bvec;
+ struct bvec_iter_all iter_all;
+
+ bio_for_each_segment_all(bvec, bio, iter_all) {
+ ssize_t ret;
+
+ ret = copy_page_to_iter(bvec->bv_page,
+ bvec->bv_offset,
+ bvec->bv_len,
+ &iter);
+
+ if (!iov_iter_count(&iter))
+ break;
+
+ if (ret < bvec->bv_len)
+ return -EFAULT;
+ }
+
+ return 0;
+}
+
+/**
+ * bio_uncopy_user - finish previously mapped bio
+ * @bio: bio being terminated
+ *
+ * Free pages allocated from bio_copy_user_iov() and write back data
+ * to user space in case of a read.
+ */
+static int bio_uncopy_user(struct bio *bio)
+{
+ struct bio_map_data *bmd = bio->bi_private;
+ int ret = 0;
+
+ if (!bio_flagged(bio, BIO_NULL_MAPPED)) {
+ /*
+ * if we're in a workqueue, the request is orphaned, so
+ * don't copy into a random user address space, just free
+ * and return -EINTR so user space doesn't expect any data.
+ */
+ if (!current->mm)
+ ret = -EINTR;
+ else if (bio_data_dir(bio) == READ)
+ ret = bio_copy_to_iter(bio, bmd->iter);
+ if (bmd->is_our_pages)
+ bio_free_pages(bio);
+ }
+ kfree(bmd);
+ bio_put(bio);
+ return ret;
+}
+
+/**
+ * bio_copy_user_iov - copy user data to bio
+ * @q: destination block queue
+ * @map_data: pointer to the rq_map_data holding pages (if necessary)
+ * @iter: iovec iterator
+ * @gfp_mask: memory allocation flags
+ *
+ * Prepares and returns a bio for indirect user io, bouncing data
+ * to/from kernel pages as necessary. Must be paired with
+ * call bio_uncopy_user() on io completion.
+ */
+static struct bio *bio_copy_user_iov(struct request_queue *q,
+ struct rq_map_data *map_data, struct iov_iter *iter,
+ gfp_t gfp_mask)
+{
+ struct bio_map_data *bmd;
+ struct page *page;
+ struct bio *bio;
+ int i = 0, ret;
+ int nr_pages;
+ unsigned int len = iter->count;
+ unsigned int offset = map_data ? offset_in_page(map_data->offset) : 0;
+
+ bmd = bio_alloc_map_data(iter, gfp_mask);
+ if (!bmd)
+ return ERR_PTR(-ENOMEM);
+
+ /*
+ * We need to do a deep copy of the iov_iter including the iovecs.
+ * The caller provided iov might point to an on-stack or otherwise
+ * shortlived one.
+ */
+ bmd->is_our_pages = map_data ? 0 : 1;
+
+ nr_pages = DIV_ROUND_UP(offset + len, PAGE_SIZE);
+ if (nr_pages > BIO_MAX_PAGES)
+ nr_pages = BIO_MAX_PAGES;
+
+ ret = -ENOMEM;
+ bio = bio_kmalloc(gfp_mask, nr_pages);
+ if (!bio)
+ goto out_bmd;
+
+ ret = 0;
+
+ if (map_data) {
+ nr_pages = 1 << map_data->page_order;
+ i = map_data->offset / PAGE_SIZE;
+ }
+ while (len) {
+ unsigned int bytes = PAGE_SIZE;
+
+ bytes -= offset;
+
+ if (bytes > len)
+ bytes = len;
+
+ if (map_data) {
+ if (i == map_data->nr_entries * nr_pages) {
+ ret = -ENOMEM;
+ break;
+ }
+
+ page = map_data->pages[i / nr_pages];
+ page += (i % nr_pages);
+
+ i++;
+ } else {
+ page = alloc_page(q->bounce_gfp | gfp_mask);
+ if (!page) {
+ ret = -ENOMEM;
+ break;
+ }
+ }
+
+ if (bio_add_pc_page(q, bio, page, bytes, offset) < bytes) {
+ if (!map_data)
+ __free_page(page);
+ break;
+ }
+
+ len -= bytes;
+ offset = 0;
+ }
+
+ if (ret)
+ goto cleanup;
+
+ if (map_data)
+ map_data->offset += bio->bi_iter.bi_size;
+
+ /*
+ * success
+ */
+ if ((iov_iter_rw(iter) == WRITE &&
+ (!map_data || !map_data->null_mapped)) ||
+ (map_data && map_data->from_user)) {
+ ret = bio_copy_from_iter(bio, iter);
+ if (ret)
+ goto cleanup;
+ } else {
+ if (bmd->is_our_pages)
+ zero_fill_bio(bio);
+ iov_iter_advance(iter, bio->bi_iter.bi_size);
+ }
+
+ bio->bi_private = bmd;
+ if (map_data && map_data->null_mapped)
+ bio_set_flag(bio, BIO_NULL_MAPPED);
+ return bio;
+cleanup:
+ if (!map_data)
+ bio_free_pages(bio);
+ bio_put(bio);
+out_bmd:
+ kfree(bmd);
+ return ERR_PTR(ret);
+}
+
+/**
+ * bio_map_user_iov - map user iovec into bio
+ * @q: the struct request_queue for the bio
+ * @iter: iovec iterator
+ * @gfp_mask: memory allocation flags
+ *
+ * Map the user space address into a bio suitable for io to a block
+ * device. Returns an error pointer in case of error.
+ */
+static struct bio *bio_map_user_iov(struct request_queue *q,
+ struct iov_iter *iter, gfp_t gfp_mask)
+{
+ int j;
+ struct bio *bio;
+ int ret;
+
+ if (!iov_iter_count(iter))
+ return ERR_PTR(-EINVAL);
+
+ bio = bio_kmalloc(gfp_mask, iov_iter_npages(iter, BIO_MAX_PAGES));
+ if (!bio)
+ return ERR_PTR(-ENOMEM);
+
+ while (iov_iter_count(iter)) {
+ struct page **pages;
+ ssize_t bytes;
+ size_t offs, added = 0;
+ int npages;
+
+ bytes = iov_iter_get_pages_alloc(iter, &pages, LONG_MAX, &offs);
+ if (unlikely(bytes <= 0)) {
+ ret = bytes ? bytes : -EFAULT;
+ goto out_unmap;
+ }
+
+ npages = DIV_ROUND_UP(offs + bytes, PAGE_SIZE);
+
+ if (unlikely(offs & queue_dma_alignment(q))) {
+ ret = -EINVAL;
+ j = 0;
+ } else {
+ for (j = 0; j < npages; j++) {
+ struct page *page = pages[j];
+ unsigned int n = PAGE_SIZE - offs;
+ bool same_page = false;
+
+ if (n > bytes)
+ n = bytes;
+
+ if (!__bio_add_pc_page(q, bio, page, n, offs,
+ &same_page)) {
+ if (same_page)
+ put_page(page);
+ break;
+ }
+
+ added += n;
+ bytes -= n;
+ offs = 0;
+ }
+ iov_iter_advance(iter, added);
+ }
+ /*
+ * release the pages we didn't map into the bio, if any
+ */
+ while (j < npages)
+ put_page(pages[j++]);
+ kvfree(pages);
+ /* couldn't stuff something into bio? */
+ if (bytes)
+ break;
+ }
+
+ bio_set_flag(bio, BIO_USER_MAPPED);
+
+ /*
+ * subtle -- if bio_map_user_iov() ended up bouncing a bio,
+ * it would normally disappear when its bi_end_io is run.
+ * however, we need it for the unmap, so grab an extra
+ * reference to it
+ */
+ bio_get(bio);
+ return bio;
+
+ out_unmap:
+ bio_release_pages(bio, false);
+ bio_put(bio);
+ return ERR_PTR(ret);
+}
+
+/**
+ * bio_unmap_user - unmap a bio
+ * @bio: the bio being unmapped
+ *
+ * Unmap a bio previously mapped by bio_map_user_iov(). Must be called from
+ * process context.
+ *
+ * bio_unmap_user() may sleep.
+ */
+static void bio_unmap_user(struct bio *bio)
+{
+ bio_release_pages(bio, bio_data_dir(bio) == READ);
+ bio_put(bio);
+ bio_put(bio);
+}
+
+static void bio_invalidate_vmalloc_pages(struct bio *bio)
+{
+#ifdef ARCH_HAS_FLUSH_KERNEL_DCACHE_PAGE
+ if (bio->bi_private && !op_is_write(bio_op(bio))) {
+ unsigned long i, len = 0;
+
+ for (i = 0; i < bio->bi_vcnt; i++)
+ len += bio->bi_io_vec[i].bv_len;
+ invalidate_kernel_vmap_range(bio->bi_private, len);
+ }
+#endif
+}
+
+static void bio_map_kern_endio(struct bio *bio)
+{
+ bio_invalidate_vmalloc_pages(bio);
+ bio_put(bio);
+}
+
+/**
+ * bio_map_kern - map kernel address into bio
+ * @q: the struct request_queue for the bio
+ * @data: pointer to buffer to map
+ * @len: length in bytes
+ * @gfp_mask: allocation flags for bio allocation
+ *
+ * Map the kernel address into a bio suitable for io to a block
+ * device. Returns an error pointer in case of error.
+ */
+static struct bio *bio_map_kern(struct request_queue *q, void *data,
+ unsigned int len, gfp_t gfp_mask)
+{
+ unsigned long kaddr = (unsigned long)data;
+ unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
+ unsigned long start = kaddr >> PAGE_SHIFT;
+ const int nr_pages = end - start;
+ bool is_vmalloc = is_vmalloc_addr(data);
+ struct page *page;
+ int offset, i;
+ struct bio *bio;
+
+ bio = bio_kmalloc(gfp_mask, nr_pages);
+ if (!bio)
+ return ERR_PTR(-ENOMEM);
+
+ if (is_vmalloc) {
+ flush_kernel_vmap_range(data, len);
+ bio->bi_private = data;
+ }
+
+ offset = offset_in_page(kaddr);
+ for (i = 0; i < nr_pages; i++) {
+ unsigned int bytes = PAGE_SIZE - offset;
+
+ if (len <= 0)
+ break;
+
+ if (bytes > len)
+ bytes = len;
+
+ if (!is_vmalloc)
+ page = virt_to_page(data);
+ else
+ page = vmalloc_to_page(data);
+ if (bio_add_pc_page(q, bio, page, bytes,
+ offset) < bytes) {
+ /* we don't support partial mappings */
+ bio_put(bio);
+ return ERR_PTR(-EINVAL);
+ }
+
+ data += bytes;
+ len -= bytes;
+ offset = 0;
+ }
+
+ bio->bi_end_io = bio_map_kern_endio;
+ return bio;
+}
+
+static void bio_copy_kern_endio(struct bio *bio)
+{
+ bio_free_pages(bio);
+ bio_put(bio);
+}
+
+static void bio_copy_kern_endio_read(struct bio *bio)
+{
+ char *p = bio->bi_private;
+ struct bio_vec *bvec;
+ struct bvec_iter_all iter_all;
+
+ bio_for_each_segment_all(bvec, bio, iter_all) {
+ memcpy(p, page_address(bvec->bv_page), bvec->bv_len);
+ p += bvec->bv_len;
+ }
+
+ bio_copy_kern_endio(bio);
+}
+
+/**
+ * bio_copy_kern - copy kernel address into bio
+ * @q: the struct request_queue for the bio
+ * @data: pointer to buffer to copy
+ * @len: length in bytes
+ * @gfp_mask: allocation flags for bio and page allocation
+ * @reading: data direction is READ
+ *
+ * copy the kernel address into a bio suitable for io to a block
+ * device. Returns an error pointer in case of error.
+ */
+static struct bio *bio_copy_kern(struct request_queue *q, void *data,
+ unsigned int len, gfp_t gfp_mask, int reading)
+{
+ unsigned long kaddr = (unsigned long)data;
+ unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
+ unsigned long start = kaddr >> PAGE_SHIFT;
+ struct bio *bio;
+ void *p = data;
+ int nr_pages = 0;
+
+ /*
+ * Overflow, abort
+ */
+ if (end < start)
+ return ERR_PTR(-EINVAL);
+
+ nr_pages = end - start;
+ bio = bio_kmalloc(gfp_mask, nr_pages);
+ if (!bio)
+ return ERR_PTR(-ENOMEM);
+
+ while (len) {
+ struct page *page;
+ unsigned int bytes = PAGE_SIZE;
+
+ if (bytes > len)
+ bytes = len;
+
+ page = alloc_page(q->bounce_gfp | gfp_mask);
+ if (!page)
+ goto cleanup;
+
+ if (!reading)
+ memcpy(page_address(page), p, bytes);
+
+ if (bio_add_pc_page(q, bio, page, bytes, 0) < bytes)
+ break;
+
+ len -= bytes;
+ p += bytes;
+ }
+
+ if (reading) {
+ bio->bi_end_io = bio_copy_kern_endio_read;
+ bio->bi_private = data;
+ } else {
+ bio->bi_end_io = bio_copy_kern_endio;
+ }
+
+ return bio;
+
+cleanup:
+ bio_free_pages(bio);
+ bio_put(bio);
+ return ERR_PTR(-ENOMEM);
+}
+
/*
* Append a bio to a passthrough request. Only works if the bio can be merged
* into the request based on the driver constraints.
diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index ca22afd..74cedea 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -361,13 +361,19 @@ static bool blk_mq_sched_bypass_insert(struct blk_mq_hw_ctx *hctx,
bool has_sched,
struct request *rq)
{
- /* dispatch flush rq directly */
- if (rq->rq_flags & RQF_FLUSH_SEQ) {
- spin_lock(&hctx->lock);
- list_add(&rq->queuelist, &hctx->dispatch);
- spin_unlock(&hctx->lock);
+ /*
+ * dispatch flush and passthrough rq directly
+ *
+ * passthrough request has to be added to hctx->dispatch directly.
+ * For some reason, device may be in one situation which can't
+ * handle FS request, so STS_RESOURCE is always returned and the
+ * FS request will be added to hctx->dispatch. However passthrough
+ * request may be required at that time for fixing the problem. If
+ * passthrough request is added to scheduler queue, there isn't any
+ * chance to dispatch it given we prioritize requests in hctx->dispatch.
+ */
+ if ((rq->rq_flags & RQF_FLUSH_SEQ) || blk_rq_is_passthrough(rq))
return true;
- }
if (has_sched)
rq->rq_flags |= RQF_SORTED;
@@ -391,8 +397,32 @@ void blk_mq_sched_insert_request(struct request *rq, bool at_head,
WARN_ON(e && (rq->tag != -1));
- if (blk_mq_sched_bypass_insert(hctx, !!e, rq))
+ if (blk_mq_sched_bypass_insert(hctx, !!e, rq)) {
+ /*
+ * Firstly normal IO request is inserted to scheduler queue or
+ * sw queue, meantime we add flush request to dispatch queue(
+ * hctx->dispatch) directly and there is at most one in-flight
+ * flush request for each hw queue, so it doesn't matter to add
+ * flush request to tail or front of the dispatch queue.
+ *
+ * Secondly in case of NCQ, flush request belongs to non-NCQ
+ * command, and queueing it will fail when there is any
+ * in-flight normal IO request(NCQ command). When adding flush
+ * rq to the front of hctx->dispatch, it is easier to introduce
+ * extra time to flush rq's latency because of S_SCHED_RESTART
+ * compared with adding to the tail of dispatch queue, then
+ * chance of flush merge is increased, and less flush requests
+ * will be issued to controller. It is observed that ~10% time
+ * is saved in blktests block/004 on disk attached to AHCI/NCQ
+ * drive when adding flush rq to the front of hctx->dispatch.
+ *
+ * Simply queue flush rq to the front of hctx->dispatch so that
+ * intensive flush workloads can benefit in case of NCQ HW.
+ */
+ at_head = (rq->rq_flags & RQF_FLUSH_SEQ) ? true : at_head;
+ blk_mq_request_bypass_insert(rq, at_head, false);
goto run;
+ }
if (e && e->type->ops.insert_requests) {
LIST_HEAD(list);
diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index fbacde4..586c9d6 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -183,8 +183,8 @@ unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data)
return tag + tag_offset;
}
-void blk_mq_put_tag(struct blk_mq_hw_ctx *hctx, struct blk_mq_tags *tags,
- struct blk_mq_ctx *ctx, unsigned int tag)
+void blk_mq_put_tag(struct blk_mq_tags *tags, struct blk_mq_ctx *ctx,
+ unsigned int tag)
{
if (!blk_mq_tag_is_reserved(tags, tag)) {
const int real_tag = tag - tags->nr_reserved_tags;
diff --git a/block/blk-mq-tag.h b/block/blk-mq-tag.h
index 15bc74a..2b8321e 100644
--- a/block/blk-mq-tag.h
+++ b/block/blk-mq-tag.h
@@ -26,8 +26,8 @@ extern struct blk_mq_tags *blk_mq_init_tags(unsigned int nr_tags, unsigned int r
extern void blk_mq_free_tags(struct blk_mq_tags *tags);
extern unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data);
-extern void blk_mq_put_tag(struct blk_mq_hw_ctx *hctx, struct blk_mq_tags *tags,
- struct blk_mq_ctx *ctx, unsigned int tag);
+extern void blk_mq_put_tag(struct blk_mq_tags *tags, struct blk_mq_ctx *ctx,
+ unsigned int tag);
extern int blk_mq_tag_update_depth(struct blk_mq_hw_ctx *hctx,
struct blk_mq_tags **tags,
unsigned int depth, bool can_grow);
diff --git a/block/blk-mq.c b/block/blk-mq.c
index a12b176..f6291ce 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -477,9 +477,9 @@ static void __blk_mq_free_request(struct request *rq)
blk_pm_mark_last_busy(rq);
rq->mq_hctx = NULL;
if (rq->tag != -1)
- blk_mq_put_tag(hctx, hctx->tags, ctx, rq->tag);
+ blk_mq_put_tag(hctx->tags, ctx, rq->tag);
if (sched_tag != -1)
- blk_mq_put_tag(hctx, hctx->sched_tags, ctx, sched_tag);
+ blk_mq_put_tag(hctx->sched_tags, ctx, sched_tag);
blk_mq_sched_restart(hctx);
blk_queue_exit(q);
}
@@ -735,7 +735,7 @@ static void blk_mq_requeue_work(struct work_struct *work)
* merge.
*/
if (rq->rq_flags & RQF_DONTPREP)
- blk_mq_request_bypass_insert(rq, false);
+ blk_mq_request_bypass_insert(rq, false, false);
else
blk_mq_sched_insert_request(rq, true, false, false);
}
@@ -1178,6 +1178,23 @@ static void blk_mq_update_dispatch_busy(struct blk_mq_hw_ctx *hctx, bool busy)
#define BLK_MQ_RESOURCE_DELAY 3 /* ms units */
+static void blk_mq_handle_dev_resource(struct request *rq,
+ struct list_head *list)
+{
+ struct request *next =
+ list_first_entry_or_null(list, struct request, queuelist);
+
+ /*
+ * If an I/O scheduler has been configured and we got a driver tag for
+ * the next request already, free it.
+ */
+ if (next)
+ blk_mq_put_driver_tag(next);
+
+ list_add(&rq->queuelist, list);
+ __blk_mq_requeue_request(rq);
+}
+
/*
* Returns true if we did some work AND can potentially do more.
*/
@@ -1245,17 +1262,7 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list,
ret = q->mq_ops->queue_rq(hctx, &bd);
if (ret == BLK_STS_RESOURCE || ret == BLK_STS_DEV_RESOURCE) {
- /*
- * If an I/O scheduler has been configured and we got a
- * driver tag for the next request already, free it
- * again.
- */
- if (!list_empty(list)) {
- nxt = list_first_entry(list, struct request, queuelist);
- blk_mq_put_driver_tag(nxt);
- }
- list_add(&rq->queuelist, list);
- __blk_mq_requeue_request(rq);
+ blk_mq_handle_dev_resource(rq, list);
break;
}
@@ -1286,7 +1293,7 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list,
q->mq_ops->commit_rqs(hctx);
spin_lock(&hctx->lock);
- list_splice_init(list, &hctx->dispatch);
+ list_splice_tail_init(list, &hctx->dispatch);
spin_unlock(&hctx->lock);
/*
@@ -1677,12 +1684,16 @@ void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
* Should only be used carefully, when the caller knows we want to
* bypass a potential IO scheduler on the target device.
*/
-void blk_mq_request_bypass_insert(struct request *rq, bool run_queue)
+void blk_mq_request_bypass_insert(struct request *rq, bool at_head,
+ bool run_queue)
{
struct blk_mq_hw_ctx *hctx = rq->mq_hctx;
spin_lock(&hctx->lock);
- list_add_tail(&rq->queuelist, &hctx->dispatch);
+ if (at_head)
+ list_add(&rq->queuelist, &hctx->dispatch);
+ else
+ list_add_tail(&rq->queuelist, &hctx->dispatch);
spin_unlock(&hctx->lock);
if (run_queue)
@@ -1849,7 +1860,7 @@ static blk_status_t __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
if (bypass_insert)
return BLK_STS_RESOURCE;
- blk_mq_request_bypass_insert(rq, run_queue);
+ blk_mq_request_bypass_insert(rq, false, run_queue);
return BLK_STS_OK;
}
@@ -1876,7 +1887,7 @@ static void blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
ret = __blk_mq_try_issue_directly(hctx, rq, cookie, false, true);
if (ret == BLK_STS_RESOURCE || ret == BLK_STS_DEV_RESOURCE)
- blk_mq_request_bypass_insert(rq, true);
+ blk_mq_request_bypass_insert(rq, false, true);
else if (ret != BLK_STS_OK)
blk_mq_end_request(rq, ret);
@@ -1910,7 +1921,7 @@ void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx,
if (ret != BLK_STS_OK) {
if (ret == BLK_STS_RESOURCE ||
ret == BLK_STS_DEV_RESOURCE) {
- blk_mq_request_bypass_insert(rq,
+ blk_mq_request_bypass_insert(rq, false,
list_empty(list));
break;
}
@@ -2405,8 +2416,7 @@ blk_mq_alloc_hctx(struct request_queue *q, struct blk_mq_tag_set *set,
init_waitqueue_func_entry(&hctx->dispatch_wait, blk_mq_dispatch_wake);
INIT_LIST_HEAD(&hctx->dispatch_wait.entry);
- hctx->fq = blk_alloc_flush_queue(q, hctx->numa_node, set->cmd_size,
- gfp);
+ hctx->fq = blk_alloc_flush_queue(hctx->numa_node, set->cmd_size, gfp);
if (!hctx->fq)
goto free_bitmap;
@@ -2714,13 +2724,15 @@ void blk_mq_release(struct request_queue *q)
blk_mq_sysfs_deinit(q);
}
-struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
+struct request_queue *blk_mq_init_queue_data(struct blk_mq_tag_set *set,
+ void *queuedata)
{
struct request_queue *uninit_q, *q;
- uninit_q = blk_alloc_queue_node(GFP_KERNEL, set->numa_node);
+ uninit_q = __blk_alloc_queue(set->numa_node);
if (!uninit_q)
return ERR_PTR(-ENOMEM);
+ uninit_q->queuedata = queuedata;
/*
* Initialize the queue without an elevator. device_add_disk() will do
@@ -2732,6 +2744,12 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
return q;
}
+EXPORT_SYMBOL_GPL(blk_mq_init_queue_data);
+
+struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
+{
+ return blk_mq_init_queue_data(set, NULL);
+}
EXPORT_SYMBOL(blk_mq_init_queue);
/*
@@ -2820,7 +2838,6 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
memcpy(new_hctxs, hctxs, q->nr_hw_queues *
sizeof(*hctxs));
q->queue_hw_ctx = new_hctxs;
- q->nr_hw_queues = set->nr_hw_queues;
kfree(hctxs);
hctxs = new_hctxs;
}
@@ -2922,11 +2939,7 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
INIT_LIST_HEAD(&q->requeue_list);
spin_lock_init(&q->requeue_lock);
- blk_queue_make_request(q, blk_mq_make_request);
-
- /*
- * Do this after blk_queue_make_request() overrides it...
- */
+ q->make_request_fn = blk_mq_make_request;
q->nr_requests = set->queue_depth;
/*
@@ -3019,6 +3032,14 @@ static int blk_mq_alloc_rq_maps(struct blk_mq_tag_set *set)
static int blk_mq_update_queue_map(struct blk_mq_tag_set *set)
{
+ /*
+ * blk_mq_map_queues() and multiple .map_queues() implementations
+ * expect that set->map[HCTX_TYPE_DEFAULT].nr_queues is set to the
+ * number of hardware queues.
+ */
+ if (set->nr_maps == 1)
+ set->map[HCTX_TYPE_DEFAULT].nr_queues = set->nr_hw_queues;
+
if (set->ops->map_queues && !is_kdump_kernel()) {
int i;
@@ -3398,7 +3419,6 @@ static void blk_mq_poll_stats_fn(struct blk_stat_callback *cb)
}
static unsigned long blk_mq_poll_nsecs(struct request_queue *q,
- struct blk_mq_hw_ctx *hctx,
struct request *rq)
{
unsigned long ret = 0;
@@ -3431,7 +3451,6 @@ static unsigned long blk_mq_poll_nsecs(struct request_queue *q,
}
static bool blk_mq_poll_hybrid_sleep(struct request_queue *q,
- struct blk_mq_hw_ctx *hctx,
struct request *rq)
{
struct hrtimer_sleeper hs;
@@ -3451,7 +3470,7 @@ static bool blk_mq_poll_hybrid_sleep(struct request_queue *q,
if (q->poll_nsec > 0)
nsecs = q->poll_nsec;
else
- nsecs = blk_mq_poll_nsecs(q, hctx, rq);
+ nsecs = blk_mq_poll_nsecs(q, rq);
if (!nsecs)
return false;
@@ -3506,7 +3525,7 @@ static bool blk_mq_poll_hybrid(struct request_queue *q,
return false;
}
- return blk_mq_poll_hybrid_sleep(q, hctx, rq);
+ return blk_mq_poll_hybrid_sleep(q, rq);
}
/**
diff --git a/block/blk-mq.h b/block/blk-mq.h
index eaaca8f..10bfdfb 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -66,7 +66,8 @@ int blk_mq_alloc_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags,
*/
void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
bool at_head);
-void blk_mq_request_bypass_insert(struct request *rq, bool run_queue);
+void blk_mq_request_bypass_insert(struct request *rq, bool at_head,
+ bool run_queue);
void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
struct list_head *list);
@@ -199,7 +200,7 @@ static inline bool blk_mq_get_dispatch_budget(struct blk_mq_hw_ctx *hctx)
static inline void __blk_mq_put_driver_tag(struct blk_mq_hw_ctx *hctx,
struct request *rq)
{
- blk_mq_put_tag(hctx, hctx->tags, rq->mq_ctx, rq->tag);
+ blk_mq_put_tag(hctx->tags, rq->mq_ctx, rq->tag);
rq->tag = -1;
if (rq->rq_flags & RQF_MQ_INFLIGHT) {
diff --git a/block/blk-settings.c b/block/blk-settings.c
index c8eda2e..14397b4 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -87,42 +87,6 @@ void blk_set_stacking_limits(struct queue_limits *lim)
EXPORT_SYMBOL(blk_set_stacking_limits);
/**
- * blk_queue_make_request - define an alternate make_request function for a device
- * @q: the request queue for the device to be affected
- * @mfn: the alternate make_request function
- *
- * Description:
- * The normal way for &struct bios to be passed to a device
- * driver is for them to be collected into requests on a request
- * queue, and then to allow the device driver to select requests
- * off that queue when it is ready. This works well for many block
- * devices. However some block devices (typically virtual devices
- * such as md or lvm) do not benefit from the processing on the
- * request queue, and are served best by having the requests passed
- * directly to them. This can be achieved by providing a function
- * to blk_queue_make_request().
- *
- * Caveat:
- * The driver that does this *must* be able to deal appropriately
- * with buffers in "highmemory". This can be accomplished by either calling
- * kmap_atomic() to get a temporary kernel mapping, or by calling
- * blk_queue_bounce() to create a buffer in normal memory.
- **/
-void blk_queue_make_request(struct request_queue *q, make_request_fn *mfn)
-{
- /*
- * set defaults
- */
- q->nr_requests = BLKDEV_MAX_RQ;
-
- q->make_request_fn = mfn;
- blk_queue_dma_alignment(q, 511);
-
- blk_set_default_limits(&q->limits);
-}
-EXPORT_SYMBOL(blk_queue_make_request);
-
-/**
* blk_queue_bounce_limit - set bounce buffer limit for queue
* @q: the request queue for the device
* @max_addr: the maximum address the device can handle
@@ -664,6 +628,9 @@ void disk_stack_limits(struct gendisk *disk, struct block_device *bdev,
printk(KERN_NOTICE "%s: Warning: Device %s is misaligned\n",
top, bottom);
}
+
+ t->backing_dev_info->io_pages =
+ t->limits.max_sectors >> (PAGE_SHIFT - 9);
}
EXPORT_SYMBOL(disk_stack_limits);
diff --git a/block/blk-zoned.c b/block/blk-zoned.c
index 05741c6..f87956e 100644
--- a/block/blk-zoned.c
+++ b/block/blk-zoned.c
@@ -20,6 +20,38 @@
#include "blk.h"
+#define ZONE_COND_NAME(name) [BLK_ZONE_COND_##name] = #name
+static const char *const zone_cond_name[] = {
+ ZONE_COND_NAME(NOT_WP),
+ ZONE_COND_NAME(EMPTY),
+ ZONE_COND_NAME(IMP_OPEN),
+ ZONE_COND_NAME(EXP_OPEN),
+ ZONE_COND_NAME(CLOSED),
+ ZONE_COND_NAME(READONLY),
+ ZONE_COND_NAME(FULL),
+ ZONE_COND_NAME(OFFLINE),
+};
+#undef ZONE_COND_NAME
+
+/**
+ * blk_zone_cond_str - Return string XXX in BLK_ZONE_COND_XXX.
+ * @zone_cond: BLK_ZONE_COND_XXX.
+ *
+ * Description: Centralize block layer function to convert BLK_ZONE_COND_XXX
+ * into string format. Useful in the debugging and tracing zone conditions. For
+ * invalid BLK_ZONE_COND_XXX it returns string "UNKNOWN".
+ */
+const char *blk_zone_cond_str(enum blk_zone_cond zone_cond)
+{
+ static const char *zone_cond_str = "UNKNOWN";
+
+ if (zone_cond < ARRAY_SIZE(zone_cond_name) && zone_cond_name[zone_cond])
+ zone_cond_str = zone_cond_name[zone_cond];
+
+ return zone_cond_str;
+}
+EXPORT_SYMBOL_GPL(blk_zone_cond_str);
+
static inline sector_t blk_zone_start(struct request_queue *q,
sector_t sector)
{
@@ -173,7 +205,7 @@ int blkdev_zone_mgmt(struct block_device *bdev, enum req_opf op,
if (!op_is_zone_mgmt(op))
return -EOPNOTSUPP;
- if (!nr_sectors || end_sector > capacity)
+ if (end_sector <= sector || end_sector > capacity)
/* Out of range */
return -EINVAL;
diff --git a/block/blk.h b/block/blk.h
index 0b88843..0a94ec6 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -4,6 +4,7 @@
#include <linux/idr.h>
#include <linux/blk-mq.h>
+#include <linux/part_stat.h>
#include <xen/xen.h>
#include "blk-mq.h"
#include "blk-mq-sched.h"
@@ -55,8 +56,8 @@ is_flush_rq(struct request *req, struct blk_mq_hw_ctx *hctx)
return hctx->fq->flush_rq == req;
}
-struct blk_flush_queue *blk_alloc_flush_queue(struct request_queue *q,
- int node, int cmd_size, gfp_t flags);
+struct blk_flush_queue *blk_alloc_flush_queue(int node, int cmd_size,
+ gfp_t flags);
void blk_free_flush_queue(struct blk_flush_queue *q);
void blk_freeze_queue(struct request_queue *q);
@@ -149,6 +150,9 @@ static inline bool integrity_req_gap_front_merge(struct request *req,
return bvec_gap_to_prev(req->q, &bip->bip_vec[bip->bip_vcnt - 1],
bip_next->bip_vec[0].bv_offset);
}
+
+void blk_integrity_add(struct gendisk *);
+void blk_integrity_del(struct gendisk *);
#else /* CONFIG_BLK_DEV_INTEGRITY */
static inline bool integrity_req_gap_back_merge(struct request *req,
struct bio *next)
@@ -171,6 +175,12 @@ static inline bool bio_integrity_endio(struct bio *bio)
static inline void bio_integrity_free(struct bio *bio)
{
}
+static inline void blk_integrity_add(struct gendisk *disk)
+{
+}
+static inline void blk_integrity_del(struct gendisk *disk)
+{
+}
#endif /* CONFIG_BLK_DEV_INTEGRITY */
unsigned long blk_rq_timeout(unsigned long timeout);
@@ -214,6 +224,17 @@ static inline void elevator_exit(struct request_queue *q,
struct hd_struct *__disk_get_part(struct gendisk *disk, int partno);
+ssize_t part_size_show(struct device *dev, struct device_attribute *attr,
+ char *buf);
+ssize_t part_stat_show(struct device *dev, struct device_attribute *attr,
+ char *buf);
+ssize_t part_inflight_show(struct device *dev, struct device_attribute *attr,
+ char *buf);
+ssize_t part_fail_show(struct device *dev, struct device_attribute *attr,
+ char *buf);
+ssize_t part_fail_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count);
+
#ifdef CONFIG_FAIL_IO_TIMEOUT
int blk_should_fake_timeout(struct request_queue *);
ssize_t part_timeout_show(struct device *, struct device_attribute *, char *);
@@ -354,4 +375,117 @@ void blk_queue_free_zone_bitmaps(struct request_queue *q);
static inline void blk_queue_free_zone_bitmaps(struct request_queue *q) {}
#endif
+void part_dec_in_flight(struct request_queue *q, struct hd_struct *part,
+ int rw);
+void part_inc_in_flight(struct request_queue *q, struct hd_struct *part,
+ int rw);
+void update_io_ticks(struct hd_struct *part, unsigned long now, bool end);
+struct hd_struct *disk_map_sector_rcu(struct gendisk *disk, sector_t sector);
+
+int blk_alloc_devt(struct hd_struct *part, dev_t *devt);
+void blk_free_devt(dev_t devt);
+void blk_invalidate_devt(dev_t devt);
+char *disk_name(struct gendisk *hd, int partno, char *buf);
+#define ADDPART_FLAG_NONE 0
+#define ADDPART_FLAG_RAID 1
+#define ADDPART_FLAG_WHOLEDISK 2
+struct hd_struct *__must_check add_partition(struct gendisk *disk, int partno,
+ sector_t start, sector_t len, int flags,
+ struct partition_meta_info *info);
+void __delete_partition(struct percpu_ref *ref);
+void delete_partition(struct gendisk *disk, int partno);
+int disk_expand_part_tbl(struct gendisk *disk, int target);
+
+static inline int hd_ref_init(struct hd_struct *part)
+{
+ if (percpu_ref_init(&part->ref, __delete_partition, 0,
+ GFP_KERNEL))
+ return -ENOMEM;
+ return 0;
+}
+
+static inline void hd_struct_get(struct hd_struct *part)
+{
+ percpu_ref_get(&part->ref);
+}
+
+static inline int hd_struct_try_get(struct hd_struct *part)
+{
+ return percpu_ref_tryget_live(&part->ref);
+}
+
+static inline void hd_struct_put(struct hd_struct *part)
+{
+ percpu_ref_put(&part->ref);
+}
+
+static inline void hd_struct_kill(struct hd_struct *part)
+{
+ percpu_ref_kill(&part->ref);
+}
+
+static inline void hd_free_part(struct hd_struct *part)
+{
+ free_part_stats(part);
+ kfree(part->info);
+ percpu_ref_exit(&part->ref);
+}
+
+/*
+ * Any access of part->nr_sects which is not protected by partition
+ * bd_mutex or gendisk bdev bd_mutex, should be done using this
+ * accessor function.
+ *
+ * Code written along the lines of i_size_read() and i_size_write().
+ * CONFIG_PREEMPTION case optimizes the case of UP kernel with preemption
+ * on.
+ */
+static inline sector_t part_nr_sects_read(struct hd_struct *part)
+{
+#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
+ sector_t nr_sects;
+ unsigned seq;
+ do {
+ seq = read_seqcount_begin(&part->nr_sects_seq);
+ nr_sects = part->nr_sects;
+ } while (read_seqcount_retry(&part->nr_sects_seq, seq));
+ return nr_sects;
+#elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPTION)
+ sector_t nr_sects;
+
+ preempt_disable();
+ nr_sects = part->nr_sects;
+ preempt_enable();
+ return nr_sects;
+#else
+ return part->nr_sects;
+#endif
+}
+
+/*
+ * Should be called with mutex lock held (typically bd_mutex) of partition
+ * to provide mutual exlusion among writers otherwise seqcount might be
+ * left in wrong state leaving the readers spinning infinitely.
+ */
+static inline void part_nr_sects_write(struct hd_struct *part, sector_t size)
+{
+#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
+ write_seqcount_begin(&part->nr_sects_seq);
+ part->nr_sects = size;
+ write_seqcount_end(&part->nr_sects_seq);
+#elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPTION)
+ preempt_disable();
+ part->nr_sects = size;
+ preempt_enable();
+#else
+ part->nr_sects = size;
+#endif
+}
+
+struct request_queue *__blk_alloc_queue(int node_id);
+
+int __bio_add_pc_page(struct request_queue *q, struct bio *bio,
+ struct page *page, unsigned int len, unsigned int offset,
+ bool *same_page);
+
#endif /* BLK_INTERNAL_H */
diff --git a/block/genhd.c b/block/genhd.c
index ff626897..06b642b 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -4,6 +4,7 @@
*/
#include <linux/module.h>
+#include <linux/ctype.h>
#include <linux/fs.h>
#include <linux/genhd.h>
#include <linux/kdev_t.h>
@@ -26,7 +27,7 @@
#include "blk.h"
static DEFINE_MUTEX(block_class_lock);
-struct kobject *block_depr;
+static struct kobject *block_depr;
/* for extended dynamic devt allocation, currently only one major is used */
#define NR_EXT_DEVT (1 << MINORBITS)
@@ -46,6 +47,78 @@ static void disk_add_events(struct gendisk *disk);
static void disk_del_events(struct gendisk *disk);
static void disk_release_events(struct gendisk *disk);
+/*
+ * Set disk capacity and notify if the size is not currently
+ * zero and will not be set to zero
+ */
+void set_capacity_revalidate_and_notify(struct gendisk *disk, sector_t size,
+ bool revalidate)
+{
+ sector_t capacity = get_capacity(disk);
+
+ set_capacity(disk, size);
+
+ if (revalidate)
+ revalidate_disk(disk);
+
+ if (capacity != size && capacity != 0 && size != 0) {
+ char *envp[] = { "RESIZE=1", NULL };
+
+ kobject_uevent_env(&disk_to_dev(disk)->kobj, KOBJ_CHANGE, envp);
+ }
+}
+
+EXPORT_SYMBOL_GPL(set_capacity_revalidate_and_notify);
+
+/*
+ * Format the device name of the indicated disk into the supplied buffer and
+ * return a pointer to that same buffer for convenience.
+ */
+char *disk_name(struct gendisk *hd, int partno, char *buf)
+{
+ if (!partno)
+ snprintf(buf, BDEVNAME_SIZE, "%s", hd->disk_name);
+ else if (isdigit(hd->disk_name[strlen(hd->disk_name)-1]))
+ snprintf(buf, BDEVNAME_SIZE, "%sp%d", hd->disk_name, partno);
+ else
+ snprintf(buf, BDEVNAME_SIZE, "%s%d", hd->disk_name, partno);
+
+ return buf;
+}
+
+const char *bdevname(struct block_device *bdev, char *buf)
+{
+ return disk_name(bdev->bd_disk, bdev->bd_part->partno, buf);
+}
+EXPORT_SYMBOL(bdevname);
+
+#ifdef CONFIG_SMP
+static void part_stat_read_all(struct hd_struct *part, struct disk_stats *stat)
+{
+ int cpu;
+
+ memset(stat, 0, sizeof(struct disk_stats));
+ for_each_possible_cpu(cpu) {
+ struct disk_stats *ptr = per_cpu_ptr(part->dkstats, cpu);
+ int group;
+
+ for (group = 0; group < NR_STAT_GROUPS; group++) {
+ stat->nsecs[group] += ptr->nsecs[group];
+ stat->sectors[group] += ptr->sectors[group];
+ stat->ios[group] += ptr->ios[group];
+ stat->merges[group] += ptr->merges[group];
+ }
+
+ stat->io_ticks += ptr->io_ticks;
+ }
+}
+#else /* CONFIG_SMP */
+static void part_stat_read_all(struct hd_struct *part, struct disk_stats *stat)
+{
+ memcpy(stat, &part->dkstats, sizeof(struct disk_stats));
+}
+#endif /* CONFIG_SMP */
+
void part_inc_in_flight(struct request_queue *q, struct hd_struct *part, int rw)
{
if (queue_is_mq(q))
@@ -66,7 +139,8 @@ void part_dec_in_flight(struct request_queue *q, struct hd_struct *part, int rw)
part_stat_local_dec(&part_to_disk(part)->part0, in_flight[rw]);
}
-unsigned int part_in_flight(struct request_queue *q, struct hd_struct *part)
+static unsigned int part_in_flight(struct request_queue *q,
+ struct hd_struct *part)
{
int cpu;
unsigned int inflight;
@@ -86,8 +160,8 @@ unsigned int part_in_flight(struct request_queue *q, struct hd_struct *part)
return inflight;
}
-void part_in_flight_rw(struct request_queue *q, struct hd_struct *part,
- unsigned int inflight[2])
+static void part_in_flight_rw(struct request_queue *q, struct hd_struct *part,
+ unsigned int inflight[2])
{
int cpu;
@@ -143,7 +217,6 @@ struct hd_struct *disk_get_part(struct gendisk *disk, int partno)
return part;
}
-EXPORT_SYMBOL_GPL(disk_get_part);
/**
* disk_part_iter_init - initialize partition iterator
@@ -299,7 +372,42 @@ struct hd_struct *disk_map_sector_rcu(struct gendisk *disk, sector_t sector)
}
return &disk->part0;
}
-EXPORT_SYMBOL_GPL(disk_map_sector_rcu);
+
+/**
+ * disk_has_partitions
+ * @disk: gendisk of interest
+ *
+ * Walk through the partition table and check if valid partition exists.
+ *
+ * CONTEXT:
+ * Don't care.
+ *
+ * RETURNS:
+ * True if the gendisk has at least one valid non-zero size partition.
+ * Otherwise false.
+ */
+bool disk_has_partitions(struct gendisk *disk)
+{
+ struct disk_part_tbl *ptbl;
+ int i;
+ bool ret = false;
+
+ rcu_read_lock();
+ ptbl = rcu_dereference(disk->part_tbl);
+
+ /* Iterate partitions skipping the whole device at index 0 */
+ for (i = 1; i < ptbl->len; i++) {
+ if (rcu_dereference(ptbl->part[i])) {
+ ret = true;
+ break;
+ }
+ }
+
+ rcu_read_unlock();
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(disk_has_partitions);
/*
* Can be deleted altogether. Later.
@@ -908,7 +1016,6 @@ struct gendisk *get_gendisk(dev_t devt, int *partno)
}
return disk;
}
-EXPORT_SYMBOL(get_gendisk);
/**
* bdget_disk - do bdget() by gendisk and partition number
@@ -1154,6 +1261,67 @@ static ssize_t disk_ro_show(struct device *dev,
return sprintf(buf, "%d\n", get_disk_ro(disk) ? 1 : 0);
}
+ssize_t part_size_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct hd_struct *p = dev_to_part(dev);
+
+ return sprintf(buf, "%llu\n",
+ (unsigned long long)part_nr_sects_read(p));
+}
+
+ssize_t part_stat_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct hd_struct *p = dev_to_part(dev);
+ struct request_queue *q = part_to_disk(p)->queue;
+ struct disk_stats stat;
+ unsigned int inflight;
+
+ part_stat_read_all(p, &stat);
+ inflight = part_in_flight(q, p);
+
+ return sprintf(buf,
+ "%8lu %8lu %8llu %8u "
+ "%8lu %8lu %8llu %8u "
+ "%8u %8u %8u "
+ "%8lu %8lu %8llu %8u "
+ "%8lu %8u"
+ "\n",
+ stat.ios[STAT_READ],
+ stat.merges[STAT_READ],
+ (unsigned long long)stat.sectors[STAT_READ],
+ (unsigned int)div_u64(stat.nsecs[STAT_READ], NSEC_PER_MSEC),
+ stat.ios[STAT_WRITE],
+ stat.merges[STAT_WRITE],
+ (unsigned long long)stat.sectors[STAT_WRITE],
+ (unsigned int)div_u64(stat.nsecs[STAT_WRITE], NSEC_PER_MSEC),
+ inflight,
+ jiffies_to_msecs(stat.io_ticks),
+ (unsigned int)div_u64(stat.nsecs[STAT_READ] +
+ stat.nsecs[STAT_WRITE] +
+ stat.nsecs[STAT_DISCARD] +
+ stat.nsecs[STAT_FLUSH],
+ NSEC_PER_MSEC),
+ stat.ios[STAT_DISCARD],
+ stat.merges[STAT_DISCARD],
+ (unsigned long long)stat.sectors[STAT_DISCARD],
+ (unsigned int)div_u64(stat.nsecs[STAT_DISCARD], NSEC_PER_MSEC),
+ stat.ios[STAT_FLUSH],
+ (unsigned int)div_u64(stat.nsecs[STAT_FLUSH], NSEC_PER_MSEC));
+}
+
+ssize_t part_inflight_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct hd_struct *p = dev_to_part(dev);
+ struct request_queue *q = part_to_disk(p)->queue;
+ unsigned int inflight[2];
+
+ part_in_flight_rw(q, p, inflight);
+ return sprintf(buf, "%8u %8u\n", inflight[0], inflight[1]);
+}
+
static ssize_t disk_capability_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
@@ -1192,10 +1360,33 @@ static DEVICE_ATTR(capability, 0444, disk_capability_show, NULL);
static DEVICE_ATTR(stat, 0444, part_stat_show, NULL);
static DEVICE_ATTR(inflight, 0444, part_inflight_show, NULL);
static DEVICE_ATTR(badblocks, 0644, disk_badblocks_show, disk_badblocks_store);
+
#ifdef CONFIG_FAIL_MAKE_REQUEST
+ssize_t part_fail_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct hd_struct *p = dev_to_part(dev);
+
+ return sprintf(buf, "%d\n", p->make_it_fail);
+}
+
+ssize_t part_fail_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct hd_struct *p = dev_to_part(dev);
+ int i;
+
+ if (count > 0 && sscanf(buf, "%d", &i) > 0)
+ p->make_it_fail = (i == 0) ? 0 : 1;
+
+ return count;
+}
+
static struct device_attribute dev_attr_fail =
__ATTR(make-it-fail, 0644, part_fail_show, part_fail_store);
-#endif
+#endif /* CONFIG_FAIL_MAKE_REQUEST */
+
#ifdef CONFIG_FAIL_IO_TIMEOUT
static struct device_attribute dev_attr_fail_timeout =
__ATTR(io-timeout-fail, 0644, part_timeout_show, part_timeout_store);
@@ -1342,8 +1533,8 @@ static char *block_devnode(struct device *dev, umode_t *mode,
{
struct gendisk *disk = dev_to_disk(dev);
- if (disk->devnode)
- return disk->devnode(disk, mode);
+ if (disk->fops->devnode)
+ return disk->fops->devnode(disk, mode);
return NULL;
}
@@ -1369,6 +1560,7 @@ static int diskstats_show(struct seq_file *seqf, void *v)
struct hd_struct *hd;
char buf[BDEVNAME_SIZE];
unsigned int inflight;
+ struct disk_stats stat;
/*
if (&disk_to_dev(gp)->kobj.entry == block_class.devices.next)
@@ -1380,7 +1572,9 @@ static int diskstats_show(struct seq_file *seqf, void *v)
disk_part_iter_init(&piter, gp, DISK_PITER_INCL_EMPTY_PART0);
while ((hd = disk_part_iter_next(&piter))) {
+ part_stat_read_all(hd, &stat);
inflight = part_in_flight(gp->queue, hd);
+
seq_printf(seqf, "%4d %7d %s "
"%lu %lu %lu %u "
"%lu %lu %lu %u "
@@ -1390,23 +1584,31 @@ static int diskstats_show(struct seq_file *seqf, void *v)
"\n",
MAJOR(part_devt(hd)), MINOR(part_devt(hd)),
disk_name(gp, hd->partno, buf),
- part_stat_read(hd, ios[STAT_READ]),
- part_stat_read(hd, merges[STAT_READ]),
- part_stat_read(hd, sectors[STAT_READ]),
- (unsigned int)part_stat_read_msecs(hd, STAT_READ),
- part_stat_read(hd, ios[STAT_WRITE]),
- part_stat_read(hd, merges[STAT_WRITE]),
- part_stat_read(hd, sectors[STAT_WRITE]),
- (unsigned int)part_stat_read_msecs(hd, STAT_WRITE),
+ stat.ios[STAT_READ],
+ stat.merges[STAT_READ],
+ stat.sectors[STAT_READ],
+ (unsigned int)div_u64(stat.nsecs[STAT_READ],
+ NSEC_PER_MSEC),
+ stat.ios[STAT_WRITE],
+ stat.merges[STAT_WRITE],
+ stat.sectors[STAT_WRITE],
+ (unsigned int)div_u64(stat.nsecs[STAT_WRITE],
+ NSEC_PER_MSEC),
inflight,
- jiffies_to_msecs(part_stat_read(hd, io_ticks)),
- jiffies_to_msecs(part_stat_read(hd, time_in_queue)),
- part_stat_read(hd, ios[STAT_DISCARD]),
- part_stat_read(hd, merges[STAT_DISCARD]),
- part_stat_read(hd, sectors[STAT_DISCARD]),
- (unsigned int)part_stat_read_msecs(hd, STAT_DISCARD),
- part_stat_read(hd, ios[STAT_FLUSH]),
- (unsigned int)part_stat_read_msecs(hd, STAT_FLUSH)
+ jiffies_to_msecs(stat.io_ticks),
+ (unsigned int)div_u64(stat.nsecs[STAT_READ] +
+ stat.nsecs[STAT_WRITE] +
+ stat.nsecs[STAT_DISCARD] +
+ stat.nsecs[STAT_FLUSH],
+ NSEC_PER_MSEC),
+ stat.ios[STAT_DISCARD],
+ stat.merges[STAT_DISCARD],
+ stat.sectors[STAT_DISCARD],
+ (unsigned int)div_u64(stat.nsecs[STAT_DISCARD],
+ NSEC_PER_MSEC),
+ stat.ios[STAT_FLUSH],
+ (unsigned int)div_u64(stat.nsecs[STAT_FLUSH],
+ NSEC_PER_MSEC)
);
}
disk_part_iter_exit(&piter);
@@ -1463,7 +1665,6 @@ dev_t blk_lookup_devt(const char *name, int partno)
class_dev_iter_exit(&iter);
return devt;
}
-EXPORT_SYMBOL(blk_lookup_devt);
struct gendisk *__alloc_disk_node(int minors, int node_id)
{
diff --git a/block/ioctl.c b/block/ioctl.c
index 127194b..6e827de 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -11,6 +11,7 @@
#include <linux/blktrace_api.h>
#include <linux/pr.h>
#include <linux/uaccess.h>
+#include "blk.h"
static int blkpg_do_ioctl(struct block_device *bdev,
struct blkpg_partition __user *upart, int op)
diff --git a/block/opal_proto.h b/block/opal_proto.h
index 325cbba2..b486b3e 100644
--- a/block/opal_proto.h
+++ b/block/opal_proto.h
@@ -36,6 +36,7 @@ enum opal_response_token {
#define DTAERROR_NO_METHOD_STATUS 0x89
#define GENERIC_HOST_SESSION_NUM 0x41
+#define FIRST_TPER_SESSION_NUM 4096
#define TPER_SYNC_SUPPORTED 0x01
#define MBR_ENABLED_MASK 0x10
diff --git a/block/partitions/Makefile b/block/partitions/Makefile
index 2f276b6..a7f05cd 100644
--- a/block/partitions/Makefile
+++ b/block/partitions/Makefile
@@ -3,8 +3,7 @@
# Makefile for the linux kernel.
#
-obj-$(CONFIG_BLOCK) := check.o
-
+obj-$(CONFIG_BLOCK) += core.o
obj-$(CONFIG_ACORN_PARTITION) += acorn.o
obj-$(CONFIG_AMIGA_PARTITION) += amiga.o
obj-$(CONFIG_ATARI_PARTITION) += atari.o
diff --git a/block/partitions/acorn.c b/block/partitions/acorn.c
index 7587700..c64c57b9 100644
--- a/block/partitions/acorn.c
+++ b/block/partitions/acorn.c
@@ -11,7 +11,6 @@
#include <linux/adfs_fs.h>
#include "check.h"
-#include "acorn.h"
/*
* Partition types. (Oh for reusability)
diff --git a/block/partitions/acorn.h b/block/partitions/acorn.h
deleted file mode 100644
index 67b0660..0000000
--- a/block/partitions/acorn.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * linux/fs/partitions/acorn.h
- *
- * Copyright (C) 1996-2001 Russell King.
- *
- * I _hate_ this partitioning mess - why can't we have one defined
- * format, and everyone stick to it?
- */
-
-int adfspart_check_CUMANA(struct parsed_partitions *state);
-int adfspart_check_ADFS(struct parsed_partitions *state);
-int adfspart_check_ICS(struct parsed_partitions *state);
-int adfspart_check_POWERTEC(struct parsed_partitions *state);
-int adfspart_check_EESOX(struct parsed_partitions *state);
diff --git a/block/partitions/aix.c b/block/partitions/aix.c
index 903f3ed..c7b4fd1 100644
--- a/block/partitions/aix.c
+++ b/block/partitions/aix.c
@@ -6,7 +6,6 @@
*/
#include "check.h"
-#include "aix.h"
struct lvm_rec {
char lvm_id[4]; /* "_LVM" */
diff --git a/block/partitions/aix.h b/block/partitions/aix.h
deleted file mode 100644
index b4449f0..0000000
--- a/block/partitions/aix.h
+++ /dev/null
@@ -1,2 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-extern int aix_partition(struct parsed_partitions *state);
diff --git a/block/partitions/amiga.c b/block/partitions/amiga.c
index 5609366..9526491 100644
--- a/block/partitions/amiga.c
+++ b/block/partitions/amiga.c
@@ -14,7 +14,6 @@
#include <linux/affs_hardblocks.h>
#include "check.h"
-#include "amiga.h"
static __inline__ u32
checksum_block(__be32 *m, int size)
@@ -42,9 +41,8 @@ int amiga_partition(struct parsed_partitions *state)
goto rdb_done;
data = read_part_sector(state, blk, §);
if (!data) {
- if (warn_no_part)
- pr_err("Dev %s: unable to read RDB block %d\n",
- bdevname(state->bdev, b), blk);
+ pr_err("Dev %s: unable to read RDB block %d\n",
+ bdevname(state->bdev, b), blk);
res = -1;
goto rdb_done;
}
@@ -85,9 +83,8 @@ int amiga_partition(struct parsed_partitions *state)
blk *= blksize; /* Read in terms partition table understands */
data = read_part_sector(state, blk, §);
if (!data) {
- if (warn_no_part)
- pr_err("Dev %s: unable to read partition block %d\n",
- bdevname(state->bdev, b), blk);
+ pr_err("Dev %s: unable to read partition block %d\n",
+ bdevname(state->bdev, b), blk);
res = -1;
goto rdb_done;
}
diff --git a/block/partitions/amiga.h b/block/partitions/amiga.h
deleted file mode 100644
index 7e63f4d..0000000
--- a/block/partitions/amiga.h
+++ /dev/null
@@ -1,7 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * fs/partitions/amiga.h
- */
-
-int amiga_partition(struct parsed_partitions *state);
-
diff --git a/block/partitions/atari.h b/block/partitions/atari.h
index 01c2b94..6782024 100644
--- a/block/partitions/atari.h
+++ b/block/partitions/atari.h
@@ -34,4 +34,3 @@ struct rootsector
u16 checksum; /* checksum for bootable disks */
} __packed;
-int atari_partition(struct parsed_partitions *state);
diff --git a/block/partitions/check.c b/block/partitions/check.c
deleted file mode 100644
index ffe408f..0000000
--- a/block/partitions/check.c
+++ /dev/null
@@ -1,198 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * fs/partitions/check.c
- *
- * Code extracted from drivers/block/genhd.c
- * Copyright (C) 1991-1998 Linus Torvalds
- * Re-organised Feb 1998 Russell King
- *
- * We now have independent partition support from the
- * block drivers, which allows all the partition code to
- * be grouped in one location, and it to be mostly self
- * contained.
- *
- * Added needed MAJORS for new pairs, {hdi,hdj}, {hdk,hdl}
- */
-
-#include <linux/slab.h>
-#include <linux/vmalloc.h>
-#include <linux/ctype.h>
-#include <linux/genhd.h>
-
-#include "check.h"
-
-#include "acorn.h"
-#include "amiga.h"
-#include "atari.h"
-#include "ldm.h"
-#include "mac.h"
-#include "msdos.h"
-#include "osf.h"
-#include "sgi.h"
-#include "sun.h"
-#include "ibm.h"
-#include "ultrix.h"
-#include "efi.h"
-#include "karma.h"
-#include "sysv68.h"
-#include "cmdline.h"
-
-int warn_no_part = 1; /*This is ugly: should make genhd removable media aware*/
-
-static int (*check_part[])(struct parsed_partitions *) = {
- /*
- * Probe partition formats with tables at disk address 0
- * that also have an ADFS boot block at 0xdc0.
- */
-#ifdef CONFIG_ACORN_PARTITION_ICS
- adfspart_check_ICS,
-#endif
-#ifdef CONFIG_ACORN_PARTITION_POWERTEC
- adfspart_check_POWERTEC,
-#endif
-#ifdef CONFIG_ACORN_PARTITION_EESOX
- adfspart_check_EESOX,
-#endif
-
- /*
- * Now move on to formats that only have partition info at
- * disk address 0xdc0. Since these may also have stale
- * PC/BIOS partition tables, they need to come before
- * the msdos entry.
- */
-#ifdef CONFIG_ACORN_PARTITION_CUMANA
- adfspart_check_CUMANA,
-#endif
-#ifdef CONFIG_ACORN_PARTITION_ADFS
- adfspart_check_ADFS,
-#endif
-
-#ifdef CONFIG_CMDLINE_PARTITION
- cmdline_partition,
-#endif
-#ifdef CONFIG_EFI_PARTITION
- efi_partition, /* this must come before msdos */
-#endif
-#ifdef CONFIG_SGI_PARTITION
- sgi_partition,
-#endif
-#ifdef CONFIG_LDM_PARTITION
- ldm_partition, /* this must come before msdos */
-#endif
-#ifdef CONFIG_MSDOS_PARTITION
- msdos_partition,
-#endif
-#ifdef CONFIG_OSF_PARTITION
- osf_partition,
-#endif
-#ifdef CONFIG_SUN_PARTITION
- sun_partition,
-#endif
-#ifdef CONFIG_AMIGA_PARTITION
- amiga_partition,
-#endif
-#ifdef CONFIG_ATARI_PARTITION
- atari_partition,
-#endif
-#ifdef CONFIG_MAC_PARTITION
- mac_partition,
-#endif
-#ifdef CONFIG_ULTRIX_PARTITION
- ultrix_partition,
-#endif
-#ifdef CONFIG_IBM_PARTITION
- ibm_partition,
-#endif
-#ifdef CONFIG_KARMA_PARTITION
- karma_partition,
-#endif
-#ifdef CONFIG_SYSV68_PARTITION
- sysv68_partition,
-#endif
- NULL
-};
-
-static struct parsed_partitions *allocate_partitions(struct gendisk *hd)
-{
- struct parsed_partitions *state;
- int nr;
-
- state = kzalloc(sizeof(*state), GFP_KERNEL);
- if (!state)
- return NULL;
-
- nr = disk_max_parts(hd);
- state->parts = vzalloc(array_size(nr, sizeof(state->parts[0])));
- if (!state->parts) {
- kfree(state);
- return NULL;
- }
-
- state->limit = nr;
-
- return state;
-}
-
-void free_partitions(struct parsed_partitions *state)
-{
- vfree(state->parts);
- kfree(state);
-}
-
-struct parsed_partitions *
-check_partition(struct gendisk *hd, struct block_device *bdev)
-{
- struct parsed_partitions *state;
- int i, res, err;
-
- state = allocate_partitions(hd);
- if (!state)
- return NULL;
- state->pp_buf = (char *)__get_free_page(GFP_KERNEL);
- if (!state->pp_buf) {
- free_partitions(state);
- return NULL;
- }
- state->pp_buf[0] = '\0';
-
- state->bdev = bdev;
- disk_name(hd, 0, state->name);
- snprintf(state->pp_buf, PAGE_SIZE, " %s:", state->name);
- if (isdigit(state->name[strlen(state->name)-1]))
- sprintf(state->name, "p");
-
- i = res = err = 0;
- while (!res && check_part[i]) {
- memset(state->parts, 0, state->limit * sizeof(state->parts[0]));
- res = check_part[i++](state);
- if (res < 0) {
- /* We have hit an I/O error which we don't report now.
- * But record it, and let the others do their job.
- */
- err = res;
- res = 0;
- }
-
- }
- if (res > 0) {
- printk(KERN_INFO "%s", state->pp_buf);
-
- free_page((unsigned long)state->pp_buf);
- return state;
- }
- if (state->access_beyond_eod)
- err = -ENOSPC;
- if (err)
- /* The partition is unrecognized. So report I/O errors if there were any */
- res = err;
- if (res) {
- if (warn_no_part)
- strlcat(state->pp_buf,
- " unable to read partition table\n", PAGE_SIZE);
- printk(KERN_INFO "%s", state->pp_buf);
- }
-
- free_page((unsigned long)state->pp_buf);
- free_partitions(state);
- return ERR_PTR(res);
-}
diff --git a/block/partitions/check.h b/block/partitions/check.h
index 6042f76..c577e9e 100644
--- a/block/partitions/check.h
+++ b/block/partitions/check.h
@@ -2,6 +2,7 @@
#include <linux/pagemap.h>
#include <linux/blkdev.h>
#include <linux/genhd.h>
+#include "../blk.h"
/*
* add_gd_partition adds a partitions details to the devices partition
@@ -23,19 +24,14 @@ struct parsed_partitions {
char *pp_buf;
};
-void free_partitions(struct parsed_partitions *state);
+typedef struct {
+ struct page *v;
+} Sector;
-struct parsed_partitions *
-check_partition(struct gendisk *, struct block_device *);
-
-static inline void *read_part_sector(struct parsed_partitions *state,
- sector_t n, Sector *p)
+void *read_part_sector(struct parsed_partitions *state, sector_t n, Sector *p);
+static inline void put_dev_sector(Sector p)
{
- if (n >= get_capacity(state->bdev->bd_disk)) {
- state->access_beyond_eod = true;
- return NULL;
- }
- return read_dev_sector(state->bdev, n, p);
+ put_page(p.v);
}
static inline void
@@ -51,5 +47,24 @@ put_partition(struct parsed_partitions *p, int n, sector_t from, sector_t size)
}
}
-extern int warn_no_part;
-
+/* detection routines go here in alphabetical order: */
+int adfspart_check_ADFS(struct parsed_partitions *state);
+int adfspart_check_CUMANA(struct parsed_partitions *state);
+int adfspart_check_EESOX(struct parsed_partitions *state);
+int adfspart_check_ICS(struct parsed_partitions *state);
+int adfspart_check_POWERTEC(struct parsed_partitions *state);
+int aix_partition(struct parsed_partitions *state);
+int amiga_partition(struct parsed_partitions *state);
+int atari_partition(struct parsed_partitions *state);
+int cmdline_partition(struct parsed_partitions *state);
+int efi_partition(struct parsed_partitions *state);
+int ibm_partition(struct parsed_partitions *);
+int karma_partition(struct parsed_partitions *state);
+int ldm_partition(struct parsed_partitions *state);
+int mac_partition(struct parsed_partitions *state);
+int msdos_partition(struct parsed_partitions *state);
+int osf_partition(struct parsed_partitions *state);
+int sgi_partition(struct parsed_partitions *state);
+int sun_partition(struct parsed_partitions *state);
+int sysv68_partition(struct parsed_partitions *state);
+int ultrix_partition(struct parsed_partitions *state);
diff --git a/block/partitions/cmdline.c b/block/partitions/cmdline.c
index f1edd54..8f545c3 100644
--- a/block/partitions/cmdline.c
+++ b/block/partitions/cmdline.c
@@ -18,7 +18,6 @@
#include <linux/cmdline-parser.h>
#include "check.h"
-#include "cmdline.h"
static char *cmdline;
static struct cmdline_parts *bdev_parts;
diff --git a/block/partitions/cmdline.h b/block/partitions/cmdline.h
deleted file mode 100644
index e64a316..0000000
--- a/block/partitions/cmdline.h
+++ /dev/null
@@ -1,3 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-
-int cmdline_partition(struct parsed_partitions *state);
diff --git a/block/partition-generic.c b/block/partitions/core.c
similarity index 72%
rename from block/partition-generic.c
rename to block/partitions/core.c
index 564fae7..b79c451 100644
--- a/block/partition-generic.c
+++ b/block/partitions/core.c
@@ -1,75 +1,176 @@
// SPDX-License-Identifier: GPL-2.0
/*
- * Code extracted from drivers/block/genhd.c
- * Copyright (C) 1991-1998 Linus Torvalds
- * Re-organised Feb 1998 Russell King
- *
- * We now have independent partition support from the
- * block drivers, which allows all the partition code to
- * be grouped in one location, and it to be mostly self
- * contained.
+ * Copyright (C) 1991-1998 Linus Torvalds
+ * Re-organised Feb 1998 Russell King
*/
-
-#include <linux/init.h>
-#include <linux/module.h>
#include <linux/fs.h>
#include <linux/slab.h>
-#include <linux/kmod.h>
#include <linux/ctype.h>
#include <linux/genhd.h>
+#include <linux/vmalloc.h>
#include <linux/blktrace_api.h>
+#include <linux/raid/detect.h>
+#include "check.h"
-#include "partitions/check.h"
-
-#ifdef CONFIG_BLK_DEV_MD
-extern void md_autodetect_dev(dev_t dev);
+static int (*check_part[])(struct parsed_partitions *) = {
+ /*
+ * Probe partition formats with tables at disk address 0
+ * that also have an ADFS boot block at 0xdc0.
+ */
+#ifdef CONFIG_ACORN_PARTITION_ICS
+ adfspart_check_ICS,
#endif
-
-/*
- * disk_name() is used by partition check code and the genhd driver.
- * It formats the devicename of the indicated disk into
- * the supplied buffer (of size at least 32), and returns
- * a pointer to that same buffer (for convenience).
- */
+#ifdef CONFIG_ACORN_PARTITION_POWERTEC
+ adfspart_check_POWERTEC,
+#endif
+#ifdef CONFIG_ACORN_PARTITION_EESOX
+ adfspart_check_EESOX,
+#endif
-char *disk_name(struct gendisk *hd, int partno, char *buf)
+ /*
+ * Now move on to formats that only have partition info at
+ * disk address 0xdc0. Since these may also have stale
+ * PC/BIOS partition tables, they need to come before
+ * the msdos entry.
+ */
+#ifdef CONFIG_ACORN_PARTITION_CUMANA
+ adfspart_check_CUMANA,
+#endif
+#ifdef CONFIG_ACORN_PARTITION_ADFS
+ adfspart_check_ADFS,
+#endif
+
+#ifdef CONFIG_CMDLINE_PARTITION
+ cmdline_partition,
+#endif
+#ifdef CONFIG_EFI_PARTITION
+ efi_partition, /* this must come before msdos */
+#endif
+#ifdef CONFIG_SGI_PARTITION
+ sgi_partition,
+#endif
+#ifdef CONFIG_LDM_PARTITION
+ ldm_partition, /* this must come before msdos */
+#endif
+#ifdef CONFIG_MSDOS_PARTITION
+ msdos_partition,
+#endif
+#ifdef CONFIG_OSF_PARTITION
+ osf_partition,
+#endif
+#ifdef CONFIG_SUN_PARTITION
+ sun_partition,
+#endif
+#ifdef CONFIG_AMIGA_PARTITION
+ amiga_partition,
+#endif
+#ifdef CONFIG_ATARI_PARTITION
+ atari_partition,
+#endif
+#ifdef CONFIG_MAC_PARTITION
+ mac_partition,
+#endif
+#ifdef CONFIG_ULTRIX_PARTITION
+ ultrix_partition,
+#endif
+#ifdef CONFIG_IBM_PARTITION
+ ibm_partition,
+#endif
+#ifdef CONFIG_KARMA_PARTITION
+ karma_partition,
+#endif
+#ifdef CONFIG_SYSV68_PARTITION
+ sysv68_partition,
+#endif
+ NULL
+};
+
+static struct parsed_partitions *allocate_partitions(struct gendisk *hd)
{
- if (!partno)
- snprintf(buf, BDEVNAME_SIZE, "%s", hd->disk_name);
- else if (isdigit(hd->disk_name[strlen(hd->disk_name)-1]))
- snprintf(buf, BDEVNAME_SIZE, "%sp%d", hd->disk_name, partno);
- else
- snprintf(buf, BDEVNAME_SIZE, "%s%d", hd->disk_name, partno);
+ struct parsed_partitions *state;
+ int nr;
- return buf;
+ state = kzalloc(sizeof(*state), GFP_KERNEL);
+ if (!state)
+ return NULL;
+
+ nr = disk_max_parts(hd);
+ state->parts = vzalloc(array_size(nr, sizeof(state->parts[0])));
+ if (!state->parts) {
+ kfree(state);
+ return NULL;
+ }
+
+ state->limit = nr;
+
+ return state;
}
-const char *bdevname(struct block_device *bdev, char *buf)
+static void free_partitions(struct parsed_partitions *state)
{
- return disk_name(bdev->bd_disk, bdev->bd_part->partno, buf);
+ vfree(state->parts);
+ kfree(state);
}
-EXPORT_SYMBOL(bdevname);
-
-const char *bio_devname(struct bio *bio, char *buf)
+static struct parsed_partitions *check_partition(struct gendisk *hd,
+ struct block_device *bdev)
{
- return disk_name(bio->bi_disk, bio->bi_partno, buf);
-}
-EXPORT_SYMBOL(bio_devname);
+ struct parsed_partitions *state;
+ int i, res, err;
-/*
- * There's very little reason to use this, you should really
- * have a struct block_device just about everywhere and use
- * bdevname() instead.
- */
-const char *__bdevname(dev_t dev, char *buffer)
-{
- scnprintf(buffer, BDEVNAME_SIZE, "unknown-block(%u,%u)",
- MAJOR(dev), MINOR(dev));
- return buffer;
-}
+ state = allocate_partitions(hd);
+ if (!state)
+ return NULL;
+ state->pp_buf = (char *)__get_free_page(GFP_KERNEL);
+ if (!state->pp_buf) {
+ free_partitions(state);
+ return NULL;
+ }
+ state->pp_buf[0] = '\0';
-EXPORT_SYMBOL(__bdevname);
+ state->bdev = bdev;
+ disk_name(hd, 0, state->name);
+ snprintf(state->pp_buf, PAGE_SIZE, " %s:", state->name);
+ if (isdigit(state->name[strlen(state->name)-1]))
+ sprintf(state->name, "p");
+
+ i = res = err = 0;
+ while (!res && check_part[i]) {
+ memset(state->parts, 0, state->limit * sizeof(state->parts[0]));
+ res = check_part[i++](state);
+ if (res < 0) {
+ /*
+ * We have hit an I/O error which we don't report now.
+ * But record it, and let the others do their job.
+ */
+ err = res;
+ res = 0;
+ }
+
+ }
+ if (res > 0) {
+ printk(KERN_INFO "%s", state->pp_buf);
+
+ free_page((unsigned long)state->pp_buf);
+ return state;
+ }
+ if (state->access_beyond_eod)
+ err = -ENOSPC;
+ /*
+ * The partition is unrecognized. So report I/O errors if there were any
+ */
+ if (err)
+ res = err;
+ if (res) {
+ strlcat(state->pp_buf,
+ " unable to read partition table\n", PAGE_SIZE);
+ printk(KERN_INFO "%s", state->pp_buf);
+ }
+
+ free_page((unsigned long)state->pp_buf);
+ free_partitions(state);
+ return ERR_PTR(res);
+}
static ssize_t part_partition_show(struct device *dev,
struct device_attribute *attr, char *buf)
@@ -87,13 +188,6 @@ static ssize_t part_start_show(struct device *dev,
return sprintf(buf, "%llu\n",(unsigned long long)p->start_sect);
}
-ssize_t part_size_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct hd_struct *p = dev_to_part(dev);
- return sprintf(buf, "%llu\n",(unsigned long long)part_nr_sects_read(p));
-}
-
static ssize_t part_ro_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
@@ -115,74 +209,6 @@ static ssize_t part_discard_alignment_show(struct device *dev,
return sprintf(buf, "%u\n", p->discard_alignment);
}
-ssize_t part_stat_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct hd_struct *p = dev_to_part(dev);
- struct request_queue *q = part_to_disk(p)->queue;
- unsigned int inflight;
-
- inflight = part_in_flight(q, p);
- return sprintf(buf,
- "%8lu %8lu %8llu %8u "
- "%8lu %8lu %8llu %8u "
- "%8u %8u %8u "
- "%8lu %8lu %8llu %8u "
- "%8lu %8u"
- "\n",
- part_stat_read(p, ios[STAT_READ]),
- part_stat_read(p, merges[STAT_READ]),
- (unsigned long long)part_stat_read(p, sectors[STAT_READ]),
- (unsigned int)part_stat_read_msecs(p, STAT_READ),
- part_stat_read(p, ios[STAT_WRITE]),
- part_stat_read(p, merges[STAT_WRITE]),
- (unsigned long long)part_stat_read(p, sectors[STAT_WRITE]),
- (unsigned int)part_stat_read_msecs(p, STAT_WRITE),
- inflight,
- jiffies_to_msecs(part_stat_read(p, io_ticks)),
- jiffies_to_msecs(part_stat_read(p, time_in_queue)),
- part_stat_read(p, ios[STAT_DISCARD]),
- part_stat_read(p, merges[STAT_DISCARD]),
- (unsigned long long)part_stat_read(p, sectors[STAT_DISCARD]),
- (unsigned int)part_stat_read_msecs(p, STAT_DISCARD),
- part_stat_read(p, ios[STAT_FLUSH]),
- (unsigned int)part_stat_read_msecs(p, STAT_FLUSH));
-}
-
-ssize_t part_inflight_show(struct device *dev, struct device_attribute *attr,
- char *buf)
-{
- struct hd_struct *p = dev_to_part(dev);
- struct request_queue *q = part_to_disk(p)->queue;
- unsigned int inflight[2];
-
- part_in_flight_rw(q, p, inflight);
- return sprintf(buf, "%8u %8u\n", inflight[0], inflight[1]);
-}
-
-#ifdef CONFIG_FAIL_MAKE_REQUEST
-ssize_t part_fail_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct hd_struct *p = dev_to_part(dev);
-
- return sprintf(buf, "%d\n", p->make_it_fail);
-}
-
-ssize_t part_fail_store(struct device *dev,
- struct device_attribute *attr,
- const char *buf, size_t count)
-{
- struct hd_struct *p = dev_to_part(dev);
- int i;
-
- if (count > 0 && sscanf(buf, "%d", &i) > 0)
- p->make_it_fail = (i == 0) ? 0 : 1;
-
- return count;
-}
-#endif
-
static DEVICE_ATTR(partition, 0444, part_partition_show, NULL);
static DEVICE_ATTR(start, 0444, part_start_show, NULL);
static DEVICE_ATTR(size, 0444, part_size_show, NULL);
@@ -369,7 +395,9 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno,
p->policy = get_disk_ro(disk);
if (info) {
- struct partition_meta_info *pinfo = alloc_part_info(disk);
+ struct partition_meta_info *pinfo;
+
+ pinfo = kzalloc_node(sizeof(*pinfo), GFP_KERNEL, disk->node_id);
if (!pinfo) {
err = -ENOMEM;
goto out_free_stats;
@@ -428,7 +456,7 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno,
return p;
out_free_info:
- free_part_info(p);
+ kfree(p->info);
out_free_stats:
free_part_stats(p);
out_free:
@@ -525,10 +553,10 @@ static bool blk_add_partition(struct gendisk *disk, struct block_device *bdev,
return true;
}
-#ifdef CONFIG_BLK_DEV_MD
- if (state->parts[p].flags & ADDPART_FLAG_RAID)
+ if (IS_BUILTIN(CONFIG_BLK_DEV_MD) &&
+ (state->parts[p].flags & ADDPART_FLAG_RAID))
md_autodetect_dev(part_to_dev(part)->devt);
-#endif
+
return true;
}
@@ -602,22 +630,29 @@ int blk_add_partitions(struct gendisk *disk, struct block_device *bdev)
return ret;
}
-unsigned char *read_dev_sector(struct block_device *bdev, sector_t n, Sector *p)
+void *read_part_sector(struct parsed_partitions *state, sector_t n, Sector *p)
{
- struct address_space *mapping = bdev->bd_inode->i_mapping;
+ struct address_space *mapping = state->bdev->bd_inode->i_mapping;
struct page *page;
- page = read_mapping_page(mapping, (pgoff_t)(n >> (PAGE_SHIFT-9)), NULL);
- if (!IS_ERR(page)) {
- if (PageError(page))
- goto fail;
- p->v = page;
- return (unsigned char *)page_address(page) + ((n & ((1 << (PAGE_SHIFT - 9)) - 1)) << 9);
-fail:
- put_page(page);
+ if (n >= get_capacity(state->bdev->bd_disk)) {
+ state->access_beyond_eod = true;
+ return NULL;
}
+
+ page = read_mapping_page(mapping,
+ (pgoff_t)(n >> (PAGE_SHIFT - 9)), NULL);
+ if (IS_ERR(page))
+ goto out;
+ if (PageError(page))
+ goto out_put_page;
+
+ p->v = page;
+ return (unsigned char *)page_address(page) +
+ ((n & ((1 << (PAGE_SHIFT - 9)) - 1)) << SECTOR_SHIFT);
+out_put_page:
+ put_page(page);
+out:
p->v = NULL;
return NULL;
}
-
-EXPORT_SYMBOL(read_dev_sector);
diff --git a/block/partitions/efi.h b/block/partitions/efi.h
index 3e85761..907bac5 100644
--- a/block/partitions/efi.h
+++ b/block/partitions/efi.h
@@ -113,7 +113,4 @@ typedef struct _legacy_mbr {
__le16 signature;
} __packed legacy_mbr;
-/* Functions */
-extern int efi_partition(struct parsed_partitions *state);
-
#endif
diff --git a/block/partitions/ibm.c b/block/partitions/ibm.c
index a5d480f..073faa6 100644
--- a/block/partitions/ibm.c
+++ b/block/partitions/ibm.c
@@ -15,7 +15,6 @@
#include <asm/vtoc.h>
#include "check.h"
-#include "ibm.h"
union label_t {
diff --git a/block/partitions/ibm.h b/block/partitions/ibm.h
deleted file mode 100644
index 8bf13fe..0000000
--- a/block/partitions/ibm.h
+++ /dev/null
@@ -1,2 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-int ibm_partition(struct parsed_partitions *);
diff --git a/block/partitions/karma.c b/block/partitions/karma.c
index 59812d7..4d93512 100644
--- a/block/partitions/karma.c
+++ b/block/partitions/karma.c
@@ -8,9 +8,10 @@
*/
#include "check.h"
-#include "karma.h"
#include <linux/compiler.h>
+#define KARMA_LABEL_MAGIC 0xAB56
+
int karma_partition(struct parsed_partitions *state)
{
int i;
diff --git a/block/partitions/karma.h b/block/partitions/karma.h
deleted file mode 100644
index 48e074d..0000000
--- a/block/partitions/karma.h
+++ /dev/null
@@ -1,9 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * fs/partitions/karma.h
- */
-
-#define KARMA_LABEL_MAGIC 0xAB56
-
-int karma_partition(struct parsed_partitions *state);
-
diff --git a/block/partitions/ldm.c b/block/partitions/ldm.c
index a2d97ee..6fdfcb4 100644
--- a/block/partitions/ldm.c
+++ b/block/partitions/ldm.c
@@ -14,10 +14,10 @@
#include <linux/stringify.h>
#include <linux/kernel.h>
#include <linux/uuid.h>
+#include <linux/msdos_partition.h>
#include "ldm.h"
#include "check.h"
-#include "msdos.h"
/*
* ldm_debug/info/error/crit - Output an error message
@@ -493,7 +493,7 @@ static bool ldm_validate_partition_table(struct parsed_partitions *state)
{
Sector sect;
u8 *data;
- struct partition *p;
+ struct msdos_partition *p;
int i;
bool result = false;
@@ -508,7 +508,7 @@ static bool ldm_validate_partition_table(struct parsed_partitions *state)
if (*(__le16*) (data + 0x01FE) != cpu_to_le16 (MSDOS_LABEL_MAGIC))
goto out;
- p = (struct partition*)(data + 0x01BE);
+ p = (struct msdos_partition *)(data + 0x01BE);
for (i = 0; i < 4; i++, p++)
if (SYS_IND (p) == LDM_PARTITION) {
result = true;
diff --git a/block/partitions/ldm.h b/block/partitions/ldm.h
index 1ca63e9..841580a 100644
--- a/block/partitions/ldm.h
+++ b/block/partitions/ldm.h
@@ -193,7 +193,5 @@ struct ldmdb { /* Cache of the database */
struct list_head v_part;
};
-int ldm_partition(struct parsed_partitions *state);
-
#endif /* _FS_PT_LDM_H_ */
diff --git a/block/partitions/mac.h b/block/partitions/mac.h
index 453ed29..0e41c9d 100644
--- a/block/partitions/mac.h
+++ b/block/partitions/mac.h
@@ -42,4 +42,3 @@ struct mac_driver_desc {
/* ... more stuff */
};
-int mac_partition(struct parsed_partitions *state);
diff --git a/block/partitions/msdos.c b/block/partitions/msdos.c
index 82c44f7..8f2fcc0 100644
--- a/block/partitions/msdos.c
+++ b/block/partitions/msdos.c
@@ -18,13 +18,18 @@
* Check partition table on IDE disks for common CHS translations
*
* Re-organised Feb 1998 Russell King
+ *
+ * BSD disklabel support by Yossi Gottlieb <yogo@math.tau.ac.il>
+ * updated by Marc Espie <Marc.Espie@openbsd.org>
+ *
+ * Unixware slices support by Andrzej Krzysztofowicz <ankry@mif.pg.gda.pl>
+ * and Krzysztof G. Baranowski <kgb@knm.org.pl>
*/
#include <linux/msdos_fs.h>
+#include <linux/msdos_partition.h>
#include "check.h"
-#include "msdos.h"
#include "efi.h"
-#include "aix.h"
/*
* Many architectures don't like unaligned accesses, while
@@ -35,17 +40,17 @@
#define SYS_IND(p) get_unaligned(&p->sys_ind)
-static inline sector_t nr_sects(struct partition *p)
+static inline sector_t nr_sects(struct msdos_partition *p)
{
return (sector_t)get_unaligned_le32(&p->nr_sects);
}
-static inline sector_t start_sect(struct partition *p)
+static inline sector_t start_sect(struct msdos_partition *p)
{
return (sector_t)get_unaligned_le32(&p->start_sect);
}
-static inline int is_extended_partition(struct partition *p)
+static inline int is_extended_partition(struct msdos_partition *p)
{
return (SYS_IND(p) == DOS_EXTENDED_PARTITION ||
SYS_IND(p) == WIN98_EXTENDED_PARTITION ||
@@ -68,7 +73,7 @@ msdos_magic_present(unsigned char *p)
#define AIX_LABEL_MAGIC4 0xC1
static int aix_magic_present(struct parsed_partitions *state, unsigned char *p)
{
- struct partition *pt = (struct partition *) (p + 0x1be);
+ struct msdos_partition *pt = (struct msdos_partition *) (p + 0x1be);
Sector sect;
unsigned char *d;
int slot, ret = 0;
@@ -78,13 +83,19 @@ static int aix_magic_present(struct parsed_partitions *state, unsigned char *p)
p[2] == AIX_LABEL_MAGIC3 &&
p[3] == AIX_LABEL_MAGIC4))
return 0;
- /* Assume the partition table is valid if Linux partitions exists */
+
+ /*
+ * Assume the partition table is valid if Linux partitions exists.
+ * Note that old Solaris/x86 partitions use the same indicator as
+ * Linux swap partitions, so we consider that a Linux partition as
+ * well.
+ */
for (slot = 1; slot <= 4; slot++, pt++) {
- if (pt->sys_ind == LINUX_SWAP_PARTITION ||
- pt->sys_ind == LINUX_RAID_PARTITION ||
- pt->sys_ind == LINUX_DATA_PARTITION ||
- pt->sys_ind == LINUX_LVM_PARTITION ||
- is_extended_partition(pt))
+ if (pt->sys_ind == SOLARIS_X86_PARTITION ||
+ pt->sys_ind == LINUX_RAID_PARTITION ||
+ pt->sys_ind == LINUX_DATA_PARTITION ||
+ pt->sys_ind == LINUX_LVM_PARTITION ||
+ is_extended_partition(pt))
return 0;
}
d = read_part_sector(state, 7, §);
@@ -122,7 +133,7 @@ static void parse_extended(struct parsed_partitions *state,
sector_t first_sector, sector_t first_size,
u32 disksig)
{
- struct partition *p;
+ struct msdos_partition *p;
Sector sect;
unsigned char *data;
sector_t this_sector, this_size;
@@ -146,7 +157,7 @@ static void parse_extended(struct parsed_partitions *state,
if (!msdos_magic_present(data + 510))
goto done;
- p = (struct partition *) (data + 0x1be);
+ p = (struct msdos_partition *) (data + 0x1be);
/*
* Usually, the first entry is the real data partition,
@@ -210,6 +221,30 @@ static void parse_extended(struct parsed_partitions *state,
put_dev_sector(sect);
}
+#define SOLARIS_X86_NUMSLICE 16
+#define SOLARIS_X86_VTOC_SANE (0x600DDEEEUL)
+
+struct solaris_x86_slice {
+ __le16 s_tag; /* ID tag of partition */
+ __le16 s_flag; /* permission flags */
+ __le32 s_start; /* start sector no of partition */
+ __le32 s_size; /* # of blocks in partition */
+};
+
+struct solaris_x86_vtoc {
+ unsigned int v_bootinfo[3]; /* info needed by mboot */
+ __le32 v_sanity; /* to verify vtoc sanity */
+ __le32 v_version; /* layout version */
+ char v_volume[8]; /* volume name */
+ __le16 v_sectorsz; /* sector size in bytes */
+ __le16 v_nparts; /* number of partitions */
+ unsigned int v_reserved[10]; /* free space */
+ struct solaris_x86_slice
+ v_slice[SOLARIS_X86_NUMSLICE]; /* slice headers */
+ unsigned int timestamp[SOLARIS_X86_NUMSLICE]; /* timestamp */
+ char v_asciilabel[128]; /* for compatibility */
+};
+
/* james@bpgc.com: Solaris has a nasty indicator: 0x82 which also
indicates linux swap. Be careful before believing this is Solaris. */
@@ -265,6 +300,54 @@ static void parse_solaris_x86(struct parsed_partitions *state,
#endif
}
+/* check against BSD src/sys/sys/disklabel.h for consistency */
+#define BSD_DISKMAGIC (0x82564557UL) /* The disk magic number */
+#define BSD_MAXPARTITIONS 16
+#define OPENBSD_MAXPARTITIONS 16
+#define BSD_FS_UNUSED 0 /* disklabel unused partition entry ID */
+struct bsd_disklabel {
+ __le32 d_magic; /* the magic number */
+ __s16 d_type; /* drive type */
+ __s16 d_subtype; /* controller/d_type specific */
+ char d_typename[16]; /* type name, e.g. "eagle" */
+ char d_packname[16]; /* pack identifier */
+ __u32 d_secsize; /* # of bytes per sector */
+ __u32 d_nsectors; /* # of data sectors per track */
+ __u32 d_ntracks; /* # of tracks per cylinder */
+ __u32 d_ncylinders; /* # of data cylinders per unit */
+ __u32 d_secpercyl; /* # of data sectors per cylinder */
+ __u32 d_secperunit; /* # of data sectors per unit */
+ __u16 d_sparespertrack; /* # of spare sectors per track */
+ __u16 d_sparespercyl; /* # of spare sectors per cylinder */
+ __u32 d_acylinders; /* # of alt. cylinders per unit */
+ __u16 d_rpm; /* rotational speed */
+ __u16 d_interleave; /* hardware sector interleave */
+ __u16 d_trackskew; /* sector 0 skew, per track */
+ __u16 d_cylskew; /* sector 0 skew, per cylinder */
+ __u32 d_headswitch; /* head switch time, usec */
+ __u32 d_trkseek; /* track-to-track seek, usec */
+ __u32 d_flags; /* generic flags */
+#define NDDATA 5
+ __u32 d_drivedata[NDDATA]; /* drive-type specific information */
+#define NSPARE 5
+ __u32 d_spare[NSPARE]; /* reserved for future use */
+ __le32 d_magic2; /* the magic number (again) */
+ __le16 d_checksum; /* xor of data incl. partitions */
+
+ /* filesystem and partition information: */
+ __le16 d_npartitions; /* number of partitions in following */
+ __le32 d_bbsize; /* size of boot area at sn0, bytes */
+ __le32 d_sbsize; /* max size of fs superblock, bytes */
+ struct bsd_partition { /* the partition table */
+ __le32 p_size; /* number of sectors in partition */
+ __le32 p_offset; /* starting sector */
+ __le32 p_fsize; /* filesystem basic fragment size */
+ __u8 p_fstype; /* filesystem type, see below */
+ __u8 p_frag; /* filesystem fragments per block */
+ __le16 p_cpg; /* filesystem cylinders per group */
+ } d_partitions[BSD_MAXPARTITIONS]; /* actually may be more */
+};
+
#if defined(CONFIG_BSD_DISKLABEL)
/*
* Create devices for BSD partitions listed in a disklabel, under a
@@ -349,6 +432,51 @@ static void parse_openbsd(struct parsed_partitions *state,
#endif
}
+#define UNIXWARE_DISKMAGIC (0xCA5E600DUL) /* The disk magic number */
+#define UNIXWARE_DISKMAGIC2 (0x600DDEEEUL) /* The slice table magic nr */
+#define UNIXWARE_NUMSLICE 16
+#define UNIXWARE_FS_UNUSED 0 /* Unused slice entry ID */
+
+struct unixware_slice {
+ __le16 s_label; /* label */
+ __le16 s_flags; /* permission flags */
+ __le32 start_sect; /* starting sector */
+ __le32 nr_sects; /* number of sectors in slice */
+};
+
+struct unixware_disklabel {
+ __le32 d_type; /* drive type */
+ __le32 d_magic; /* the magic number */
+ __le32 d_version; /* version number */
+ char d_serial[12]; /* serial number of the device */
+ __le32 d_ncylinders; /* # of data cylinders per device */
+ __le32 d_ntracks; /* # of tracks per cylinder */
+ __le32 d_nsectors; /* # of data sectors per track */
+ __le32 d_secsize; /* # of bytes per sector */
+ __le32 d_part_start; /* # of first sector of this partition*/
+ __le32 d_unknown1[12]; /* ? */
+ __le32 d_alt_tbl; /* byte offset of alternate table */
+ __le32 d_alt_len; /* byte length of alternate table */
+ __le32 d_phys_cyl; /* # of physical cylinders per device */
+ __le32 d_phys_trk; /* # of physical tracks per cylinder */
+ __le32 d_phys_sec; /* # of physical sectors per track */
+ __le32 d_phys_bytes; /* # of physical bytes per sector */
+ __le32 d_unknown2; /* ? */
+ __le32 d_unknown3; /* ? */
+ __le32 d_pad[8]; /* pad */
+
+ struct unixware_vtoc {
+ __le32 v_magic; /* the magic number */
+ __le32 v_version; /* version number */
+ char v_name[8]; /* volume name */
+ __le16 v_nslices; /* # of slices */
+ __le16 v_unknown1; /* ? */
+ __le32 v_reserved[10]; /* reserved */
+ struct unixware_slice
+ v_slice[UNIXWARE_NUMSLICE]; /* slice headers */
+ } vtoc;
+}; /* 408 */
+
/*
* Create devices for Unixware partitions listed in a disklabel, under a
* dos-like partition. See parse_extended() for more information.
@@ -392,6 +520,8 @@ static void parse_unixware(struct parsed_partitions *state,
#endif
}
+#define MINIX_NR_SUBPARTITIONS 4
+
/*
* Minix 2.0.0/2.0.2 subpartition support.
* Anand Krishnamurthy <anandk@wiproge.med.ge.com>
@@ -403,14 +533,14 @@ static void parse_minix(struct parsed_partitions *state,
#ifdef CONFIG_MINIX_SUBPARTITION
Sector sect;
unsigned char *data;
- struct partition *p;
+ struct msdos_partition *p;
int i;
data = read_part_sector(state, offset, §);
if (!data)
return;
- p = (struct partition *)(data + 0x1be);
+ p = (struct msdos_partition *)(data + 0x1be);
/* The first sector of a Minix partition can have either
* a secondary MBR describing its subpartitions, or
@@ -454,7 +584,7 @@ int msdos_partition(struct parsed_partitions *state)
sector_t sector_size = bdev_logical_block_size(state->bdev) / 512;
Sector sect;
unsigned char *data;
- struct partition *p;
+ struct msdos_partition *p;
struct fat_boot_sector *fb;
int slot;
u32 disksig;
@@ -488,7 +618,7 @@ int msdos_partition(struct parsed_partitions *state)
* partition table. Reject this in case the boot indicator
* is not 0 or 0x80.
*/
- p = (struct partition *) (data + 0x1be);
+ p = (struct msdos_partition *) (data + 0x1be);
for (slot = 1; slot <= 4; slot++, p++) {
if (p->boot_ind != 0 && p->boot_ind != 0x80) {
/*
@@ -510,7 +640,7 @@ int msdos_partition(struct parsed_partitions *state)
}
#ifdef CONFIG_EFI_PARTITION
- p = (struct partition *) (data + 0x1be);
+ p = (struct msdos_partition *) (data + 0x1be);
for (slot = 1 ; slot <= 4 ; slot++, p++) {
/* If this is an EFI GPT disk, msdos should ignore it. */
if (SYS_IND(p) == EFI_PMBR_OSTYPE_EFI_GPT) {
@@ -519,7 +649,7 @@ int msdos_partition(struct parsed_partitions *state)
}
}
#endif
- p = (struct partition *) (data + 0x1be);
+ p = (struct msdos_partition *) (data + 0x1be);
disksig = le32_to_cpup((__le32 *)(data + 0x1b8));
@@ -566,7 +696,7 @@ int msdos_partition(struct parsed_partitions *state)
strlcat(state->pp_buf, "\n", PAGE_SIZE);
/* second pass - output for each on a separate line */
- p = (struct partition *) (0x1be + data);
+ p = (struct msdos_partition *) (0x1be + data);
for (slot = 1 ; slot <= 4 ; slot++, p++) {
unsigned char id = SYS_IND(p);
int n;
diff --git a/block/partitions/msdos.h b/block/partitions/msdos.h
deleted file mode 100644
index fcacfc4..0000000
--- a/block/partitions/msdos.h
+++ /dev/null
@@ -1,9 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * fs/partitions/msdos.h
- */
-
-#define MSDOS_LABEL_MAGIC 0xAA55
-
-int msdos_partition(struct parsed_partitions *state);
-
diff --git a/block/partitions/osf.c b/block/partitions/osf.c
index 4b87397..84560d0 100644
--- a/block/partitions/osf.c
+++ b/block/partitions/osf.c
@@ -9,9 +9,9 @@
*/
#include "check.h"
-#include "osf.h"
#define MAX_OSF_PARTITIONS 18
+#define DISKLABELMAGIC (0x82564557UL)
int osf_partition(struct parsed_partitions *state)
{
diff --git a/block/partitions/osf.h b/block/partitions/osf.h
deleted file mode 100644
index 4d8088e..0000000
--- a/block/partitions/osf.h
+++ /dev/null
@@ -1,8 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * fs/partitions/osf.h
- */
-
-#define DISKLABELMAGIC (0x82564557UL)
-
-int osf_partition(struct parsed_partitions *state);
diff --git a/block/partitions/sgi.c b/block/partitions/sgi.c
index d7b421c..4273f1b 100644
--- a/block/partitions/sgi.c
+++ b/block/partitions/sgi.c
@@ -6,7 +6,12 @@
*/
#include "check.h"
-#include "sgi.h"
+
+#define SGI_LABEL_MAGIC 0x0be5a941
+
+enum {
+ LINUX_RAID_PARTITION = 0xfd, /* autodetect RAID partition */
+};
struct sgi_disklabel {
__be32 magic_mushroom; /* Big fat spliff... */
diff --git a/block/partitions/sgi.h b/block/partitions/sgi.h
deleted file mode 100644
index a5b77c3..0000000
--- a/block/partitions/sgi.h
+++ /dev/null
@@ -1,9 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * fs/partitions/sgi.h
- */
-
-extern int sgi_partition(struct parsed_partitions *state);
-
-#define SGI_LABEL_MAGIC 0x0be5a941
-
diff --git a/block/partitions/sun.c b/block/partitions/sun.c
index 90f3672..47dc53e 100644
--- a/block/partitions/sun.c
+++ b/block/partitions/sun.c
@@ -9,7 +9,14 @@
*/
#include "check.h"
-#include "sun.h"
+
+#define SUN_LABEL_MAGIC 0xDABE
+#define SUN_VTOC_SANITY 0x600DDEEE
+
+enum {
+ SUN_WHOLE_DISK = 5,
+ LINUX_RAID_PARTITION = 0xfd, /* autodetect RAID partition */
+};
int sun_partition(struct parsed_partitions *state)
{
diff --git a/block/partitions/sun.h b/block/partitions/sun.h
deleted file mode 100644
index ae1b9ee..0000000
--- a/block/partitions/sun.h
+++ /dev/null
@@ -1,9 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * fs/partitions/sun.h
- */
-
-#define SUN_LABEL_MAGIC 0xDABE
-#define SUN_VTOC_SANITY 0x600DDEEE
-
-int sun_partition(struct parsed_partitions *state);
diff --git a/block/partitions/sysv68.c b/block/partitions/sysv68.c
index 92e8108..6f6257f 100644
--- a/block/partitions/sysv68.c
+++ b/block/partitions/sysv68.c
@@ -6,7 +6,6 @@
*/
#include "check.h"
-#include "sysv68.h"
/*
* Volume ID structure: on first 256-bytes sector of disk
diff --git a/block/partitions/sysv68.h b/block/partitions/sysv68.h
deleted file mode 100644
index 4fb6b8e..0000000
--- a/block/partitions/sysv68.h
+++ /dev/null
@@ -1,2 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-extern int sysv68_partition(struct parsed_partitions *state);
diff --git a/block/partitions/ultrix.c b/block/partitions/ultrix.c
index ecd0d73..4aaa810 100644
--- a/block/partitions/ultrix.c
+++ b/block/partitions/ultrix.c
@@ -8,7 +8,6 @@
*/
#include "check.h"
-#include "ultrix.h"
int ultrix_partition(struct parsed_partitions *state)
{
diff --git a/block/partitions/ultrix.h b/block/partitions/ultrix.h
deleted file mode 100644
index 9f676ce..0000000
--- a/block/partitions/ultrix.h
+++ /dev/null
@@ -1,6 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * fs/partitions/ultrix.h
- */
-
-int ultrix_partition(struct parsed_partitions *state);
diff --git a/block/sed-opal.c b/block/sed-opal.c
index 880cc57..daafadb 100644
--- a/block/sed-opal.c
+++ b/block/sed-opal.c
@@ -1056,7 +1056,7 @@ static int start_opal_session_cont(struct opal_dev *dev)
hsn = response_get_u64(&dev->parsed, 4);
tsn = response_get_u64(&dev->parsed, 5);
- if (hsn == 0 && tsn == 0) {
+ if (hsn != GENERIC_HOST_SESSION_NUM || tsn < FIRST_TPER_SESSION_NUM) {
pr_debug("Couldn't authenticate session\n");
return -EPERM;
}
diff --git a/crypto/Kconfig b/crypto/Kconfig
index cdb51d4..c24a474 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -136,8 +136,6 @@
Userspace configuration for cryptographic instantiations such as
cbc(aes).
-if CRYPTO_MANAGER2
-
config CRYPTO_MANAGER_DISABLE_TESTS
bool "Disable run-time self tests"
default y
@@ -155,8 +153,6 @@
This is intended for developer use only, as these tests take much
longer to run than the normal self tests.
-endif # if CRYPTO_MANAGER2
-
config CRYPTO_GF128MUL
tristate
diff --git a/crypto/hash_info.c b/crypto/hash_info.c
index c754cb7..a49ff96 100644
--- a/crypto/hash_info.c
+++ b/crypto/hash_info.c
@@ -26,7 +26,7 @@ const char *const hash_algo_name[HASH_ALGO__LAST] = {
[HASH_ALGO_TGR_128] = "tgr128",
[HASH_ALGO_TGR_160] = "tgr160",
[HASH_ALGO_TGR_192] = "tgr192",
- [HASH_ALGO_SM3_256] = "sm3-256",
+ [HASH_ALGO_SM3_256] = "sm3",
[HASH_ALGO_STREEBOG_256] = "streebog256",
[HASH_ALGO_STREEBOG_512] = "streebog512",
};
diff --git a/crypto/testmgr.c b/crypto/testmgr.c
index 88f33c0..ccb3d60 100644
--- a/crypto/testmgr.c
+++ b/crypto/testmgr.c
@@ -4436,6 +4436,15 @@ static const struct alg_test_desc alg_test_descs[] = {
.cipher = __VECS(tf_cbc_tv_template)
},
}, {
+#if IS_ENABLED(CONFIG_CRYPTO_PAES_S390)
+ .alg = "cbc-paes-s390",
+ .fips_allowed = 1,
+ .test = alg_test_skcipher,
+ .suite = {
+ .cipher = __VECS(aes_cbc_tv_template)
+ }
+ }, {
+#endif
.alg = "cbcmac(aes)",
.fips_allowed = 1,
.test = alg_test_hash,
@@ -4587,6 +4596,15 @@ static const struct alg_test_desc alg_test_descs[] = {
.cipher = __VECS(tf_ctr_tv_template)
}
}, {
+#if IS_ENABLED(CONFIG_CRYPTO_PAES_S390)
+ .alg = "ctr-paes-s390",
+ .fips_allowed = 1,
+ .test = alg_test_skcipher,
+ .suite = {
+ .cipher = __VECS(aes_ctr_tv_template)
+ }
+ }, {
+#endif
.alg = "cts(cbc(aes))",
.test = alg_test_skcipher,
.fips_allowed = 1,
@@ -4879,6 +4897,15 @@ static const struct alg_test_desc alg_test_descs[] = {
.cipher = __VECS(xtea_tv_template)
}
}, {
+#if IS_ENABLED(CONFIG_CRYPTO_PAES_S390)
+ .alg = "ecb-paes-s390",
+ .fips_allowed = 1,
+ .test = alg_test_skcipher,
+ .suite = {
+ .cipher = __VECS(aes_tv_template)
+ }
+ }, {
+#endif
.alg = "ecdh",
.test = alg_test_kpp,
.fips_allowed = 1,
@@ -5465,6 +5492,15 @@ static const struct alg_test_desc alg_test_descs[] = {
.cipher = __VECS(tf_xts_tv_template)
}
}, {
+#if IS_ENABLED(CONFIG_CRYPTO_PAES_S390)
+ .alg = "xts-paes-s390",
+ .fips_allowed = 1,
+ .test = alg_test_skcipher,
+ .suite = {
+ .cipher = __VECS(aes_xts_tv_template)
+ }
+ }, {
+#endif
.alg = "xts4096(paes)",
.test = alg_test_null,
.fips_allowed = 1,
diff --git a/drivers/acpi/acpi_watchdog.c b/drivers/acpi/acpi_watchdog.c
index b5516b0..6e9ec6e 100644
--- a/drivers/acpi/acpi_watchdog.c
+++ b/drivers/acpi/acpi_watchdog.c
@@ -55,12 +55,14 @@ static bool acpi_watchdog_uses_rtc(const struct acpi_table_wdat *wdat)
}
#endif
+static bool acpi_no_watchdog;
+
static const struct acpi_table_wdat *acpi_watchdog_get_wdat(void)
{
const struct acpi_table_wdat *wdat = NULL;
acpi_status status;
- if (acpi_disabled)
+ if (acpi_disabled || acpi_no_watchdog)
return NULL;
status = acpi_get_table(ACPI_SIG_WDAT, 0,
@@ -88,6 +90,14 @@ bool acpi_has_watchdog(void)
}
EXPORT_SYMBOL_GPL(acpi_has_watchdog);
+/* ACPI watchdog can be disabled on boot command line */
+static int __init disable_acpi_watchdog(char *str)
+{
+ acpi_no_watchdog = true;
+ return 1;
+}
+__setup("acpi_no_watchdog", disable_acpi_watchdog);
+
void __init acpi_watchdog_init(void)
{
const struct acpi_wdat_entry *entries;
@@ -126,12 +136,11 @@ void __init acpi_watchdog_init(void)
gas = &entries[i].register_region;
res.start = gas->address;
+ res.end = res.start + ACPI_ACCESS_BYTE_WIDTH(gas->access_width) - 1;
if (gas->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) {
res.flags = IORESOURCE_MEM;
- res.end = res.start + ALIGN(gas->access_width, 4) - 1;
} else if (gas->space_id == ACPI_ADR_SPACE_SYSTEM_IO) {
res.flags = IORESOURCE_IO;
- res.end = res.start + gas->access_width - 1;
} else {
pr_warn("Unsupported address space: %u\n",
gas->space_id);
diff --git a/drivers/acpi/acpica/achware.h b/drivers/acpi/acpica/achware.h
index 67f282e..6ad0517 100644
--- a/drivers/acpi/acpica/achware.h
+++ b/drivers/acpi/acpica/achware.h
@@ -101,6 +101,8 @@ acpi_status acpi_hw_enable_all_runtime_gpes(void);
acpi_status acpi_hw_enable_all_wakeup_gpes(void);
+u8 acpi_hw_check_all_gpes(void);
+
acpi_status
acpi_hw_enable_runtime_gpe_block(struct acpi_gpe_xrupt_info *gpe_xrupt_info,
struct acpi_gpe_block_info *gpe_block,
diff --git a/drivers/acpi/acpica/evevent.c b/drivers/acpi/acpica/evevent.c
index 8c83d8c..789d5e9 100644
--- a/drivers/acpi/acpica/evevent.c
+++ b/drivers/acpi/acpica/evevent.c
@@ -265,4 +265,49 @@ static u32 acpi_ev_fixed_event_dispatch(u32 event)
handler) (acpi_gbl_fixed_event_handlers[event].context));
}
+/*******************************************************************************
+ *
+ * FUNCTION: acpi_any_fixed_event_status_set
+ *
+ * PARAMETERS: None
+ *
+ * RETURN: TRUE or FALSE
+ *
+ * DESCRIPTION: Checks the PM status register for active fixed events
+ *
+ ******************************************************************************/
+
+u32 acpi_any_fixed_event_status_set(void)
+{
+ acpi_status status;
+ u32 in_status;
+ u32 in_enable;
+ u32 i;
+
+ status = acpi_hw_register_read(ACPI_REGISTER_PM1_ENABLE, &in_enable);
+ if (ACPI_FAILURE(status)) {
+ return (FALSE);
+ }
+
+ status = acpi_hw_register_read(ACPI_REGISTER_PM1_STATUS, &in_status);
+ if (ACPI_FAILURE(status)) {
+ return (FALSE);
+ }
+
+ /*
+ * Check for all possible Fixed Events and dispatch those that are active
+ */
+ for (i = 0; i < ACPI_NUM_FIXED_EVENTS; i++) {
+
+ /* Both the status and enable bits must be on for this event */
+
+ if ((in_status & acpi_gbl_fixed_event_info[i].status_bit_mask) &&
+ (in_enable & acpi_gbl_fixed_event_info[i].enable_bit_mask)) {
+ return (TRUE);
+ }
+ }
+
+ return (FALSE);
+}
+
#endif /* !ACPI_REDUCED_HARDWARE */
diff --git a/drivers/acpi/acpica/evxfgpe.c b/drivers/acpi/acpica/evxfgpe.c
index 2c39ff2..f2de66b 100644
--- a/drivers/acpi/acpica/evxfgpe.c
+++ b/drivers/acpi/acpica/evxfgpe.c
@@ -795,6 +795,38 @@ acpi_status acpi_enable_all_wakeup_gpes(void)
ACPI_EXPORT_SYMBOL(acpi_enable_all_wakeup_gpes)
+/******************************************************************************
+ *
+ * FUNCTION: acpi_any_gpe_status_set
+ *
+ * PARAMETERS: None
+ *
+ * RETURN: Whether or not the status bit is set for any GPE
+ *
+ * DESCRIPTION: Check the status bits of all enabled GPEs and return TRUE if any
+ * of them is set or FALSE otherwise.
+ *
+ ******************************************************************************/
+u32 acpi_any_gpe_status_set(void)
+{
+ acpi_status status;
+ u8 ret;
+
+ ACPI_FUNCTION_TRACE(acpi_any_gpe_status_set);
+
+ status = acpi_ut_acquire_mutex(ACPI_MTX_EVENTS);
+ if (ACPI_FAILURE(status)) {
+ return (FALSE);
+ }
+
+ ret = acpi_hw_check_all_gpes();
+ (void)acpi_ut_release_mutex(ACPI_MTX_EVENTS);
+
+ return (ret);
+}
+
+ACPI_EXPORT_SYMBOL(acpi_any_gpe_status_set)
+
/*******************************************************************************
*
* FUNCTION: acpi_install_gpe_block
diff --git a/drivers/acpi/acpica/hwgpe.c b/drivers/acpi/acpica/hwgpe.c
index 1b4252b..f4c285c 100644
--- a/drivers/acpi/acpica/hwgpe.c
+++ b/drivers/acpi/acpica/hwgpe.c
@@ -446,6 +446,53 @@ acpi_hw_enable_wakeup_gpe_block(struct acpi_gpe_xrupt_info *gpe_xrupt_info,
/******************************************************************************
*
+ * FUNCTION: acpi_hw_get_gpe_block_status
+ *
+ * PARAMETERS: gpe_xrupt_info - GPE Interrupt info
+ * gpe_block - Gpe Block info
+ *
+ * RETURN: Success
+ *
+ * DESCRIPTION: Produce a combined GPE status bits mask for the given block.
+ *
+ ******************************************************************************/
+
+static acpi_status
+acpi_hw_get_gpe_block_status(struct acpi_gpe_xrupt_info *gpe_xrupt_info,
+ struct acpi_gpe_block_info *gpe_block,
+ void *ret_ptr)
+{
+ struct acpi_gpe_register_info *gpe_register_info;
+ u64 in_enable, in_status;
+ acpi_status status;
+ u8 *ret = ret_ptr;
+ u32 i;
+
+ /* Examine each GPE Register within the block */
+
+ for (i = 0; i < gpe_block->register_count; i++) {
+ gpe_register_info = &gpe_block->register_info[i];
+
+ status = acpi_hw_read(&in_enable,
+ &gpe_register_info->enable_address);
+ if (ACPI_FAILURE(status)) {
+ continue;
+ }
+
+ status = acpi_hw_read(&in_status,
+ &gpe_register_info->status_address);
+ if (ACPI_FAILURE(status)) {
+ continue;
+ }
+
+ *ret |= in_enable & in_status;
+ }
+
+ return (AE_OK);
+}
+
+/******************************************************************************
+ *
* FUNCTION: acpi_hw_disable_all_gpes
*
* PARAMETERS: None
@@ -510,4 +557,28 @@ acpi_status acpi_hw_enable_all_wakeup_gpes(void)
return_ACPI_STATUS(status);
}
+/******************************************************************************
+ *
+ * FUNCTION: acpi_hw_check_all_gpes
+ *
+ * PARAMETERS: None
+ *
+ * RETURN: Combined status of all GPEs
+ *
+ * DESCRIPTION: Check all enabled GPEs in all GPE blocks and return TRUE if the
+ * status bit is set for at least one of them of FALSE otherwise.
+ *
+ ******************************************************************************/
+
+u8 acpi_hw_check_all_gpes(void)
+{
+ u8 ret = 0;
+
+ ACPI_FUNCTION_TRACE(acpi_hw_check_all_gpes);
+
+ (void)acpi_ev_walk_gpe_list(acpi_hw_get_gpe_block_status, &ret);
+
+ return (ret != 0);
+}
+
#endif /* !ACPI_REDUCED_HARDWARE */
diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index 103acbb..24c9642 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -171,7 +171,7 @@ int ghes_estatus_pool_init(int num_ghes)
* New allocation must be visible in all pgd before it can be found by
* an NMI allocating from the pool.
*/
- vmalloc_sync_all();
+ vmalloc_sync_mappings();
rc = gen_pool_add(ghes_estatus_pool, addr, PAGE_ALIGN(len), -1);
if (rc)
diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c
index 08bc975..d1f1cf5 100644
--- a/drivers/acpi/ec.c
+++ b/drivers/acpi/ec.c
@@ -179,6 +179,7 @@ EXPORT_SYMBOL(first_ec);
static struct acpi_ec *boot_ec;
static bool boot_ec_is_ecdt = false;
+static struct workqueue_struct *ec_wq;
static struct workqueue_struct *ec_query_wq;
static int EC_FLAGS_QUERY_HANDSHAKE; /* Needs QR_EC issued when SCI_EVT set */
@@ -469,7 +470,7 @@ static void acpi_ec_submit_query(struct acpi_ec *ec)
ec_dbg_evt("Command(%s) submitted/blocked",
acpi_ec_cmd_string(ACPI_EC_COMMAND_QUERY));
ec->nr_pending_queries++;
- schedule_work(&ec->work);
+ queue_work(ec_wq, &ec->work);
}
}
@@ -535,7 +536,7 @@ static void acpi_ec_enable_event(struct acpi_ec *ec)
#ifdef CONFIG_PM_SLEEP
static void __acpi_ec_flush_work(void)
{
- flush_scheduled_work(); /* flush ec->work */
+ drain_workqueue(ec_wq); /* flush ec->work */
flush_workqueue(ec_query_wq); /* flush queries */
}
@@ -556,8 +557,8 @@ static void acpi_ec_disable_event(struct acpi_ec *ec)
void acpi_ec_flush_work(void)
{
- /* Without ec_query_wq there is nothing to flush. */
- if (!ec_query_wq)
+ /* Without ec_wq there is nothing to flush. */
+ if (!ec_wq)
return;
__acpi_ec_flush_work();
@@ -2107,25 +2108,33 @@ static struct acpi_driver acpi_ec_driver = {
.drv.pm = &acpi_ec_pm,
};
-static inline int acpi_ec_query_init(void)
+static void acpi_ec_destroy_workqueues(void)
{
- if (!ec_query_wq) {
- ec_query_wq = alloc_workqueue("kec_query", 0,
- ec_max_queries);
- if (!ec_query_wq)
- return -ENODEV;
+ if (ec_wq) {
+ destroy_workqueue(ec_wq);
+ ec_wq = NULL;
}
- return 0;
-}
-
-static inline void acpi_ec_query_exit(void)
-{
if (ec_query_wq) {
destroy_workqueue(ec_query_wq);
ec_query_wq = NULL;
}
}
+static int acpi_ec_init_workqueues(void)
+{
+ if (!ec_wq)
+ ec_wq = alloc_ordered_workqueue("kec", 0);
+
+ if (!ec_query_wq)
+ ec_query_wq = alloc_workqueue("kec_query", 0, ec_max_queries);
+
+ if (!ec_wq || !ec_query_wq) {
+ acpi_ec_destroy_workqueues();
+ return -ENODEV;
+ }
+ return 0;
+}
+
static const struct dmi_system_id acpi_ec_no_wakeup[] = {
{
.ident = "Thinkpad X1 Carbon 6th",
@@ -2156,8 +2165,7 @@ int __init acpi_ec_init(void)
int result;
int ecdt_fail, dsdt_fail;
- /* register workqueue for _Qxx evaluations */
- result = acpi_ec_query_init();
+ result = acpi_ec_init_workqueues();
if (result)
return result;
@@ -2188,6 +2196,6 @@ static void __exit acpi_ec_exit(void)
{
acpi_bus_unregister_driver(&acpi_ec_driver);
- acpi_ec_query_exit();
+ acpi_ec_destroy_workqueues();
}
#endif /* 0 */
diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c
index 43988062..e5f9592 100644
--- a/drivers/acpi/sleep.c
+++ b/drivers/acpi/sleep.c
@@ -990,21 +990,41 @@ static void acpi_s2idle_sync(void)
acpi_os_wait_events_complete(); /* synchronize Notify handling */
}
-static void acpi_s2idle_wake(void)
+static bool acpi_s2idle_wake(void)
{
- /*
- * If IRQD_WAKEUP_ARMED is set for the SCI at this point, the SCI has
- * not triggered while suspended, so bail out.
- */
- if (!acpi_sci_irq_valid() ||
- irqd_is_wakeup_armed(irq_get_irq_data(acpi_sci_irq)))
- return;
+ if (!acpi_sci_irq_valid())
+ return pm_wakeup_pending();
- /*
- * If there are EC events to process, the wakeup may be a spurious one
- * coming from the EC.
- */
- if (acpi_ec_dispatch_gpe()) {
+ while (pm_wakeup_pending()) {
+ /*
+ * If IRQD_WAKEUP_ARMED is set for the SCI at this point, the
+ * SCI has not triggered while suspended, so bail out (the
+ * wakeup is pending anyway and the SCI is not the source of
+ * it).
+ */
+ if (irqd_is_wakeup_armed(irq_get_irq_data(acpi_sci_irq)))
+ return true;
+
+ /*
+ * If the status bit of any enabled fixed event is set, the
+ * wakeup is regarded as valid.
+ */
+ if (acpi_any_fixed_event_status_set())
+ return true;
+
+ /*
+ * If there are no EC events to process and at least one of the
+ * other enabled GPEs is active, the wakeup is regarded as a
+ * genuine one.
+ *
+ * Note that the checks below must be carried out in this order
+ * to avoid returning prematurely due to a change of the EC GPE
+ * status bit from unset to set between the checks with the
+ * status bits of all the other GPEs unset.
+ */
+ if (acpi_any_gpe_status_set() && !acpi_ec_dispatch_gpe())
+ return true;
+
/*
* Cancel the wakeup and process all pending events in case
* there are any wakeup ones in there.
@@ -1017,8 +1037,19 @@ static void acpi_s2idle_wake(void)
acpi_s2idle_sync();
+ /*
+ * The SCI is in the "suspended" state now and it cannot produce
+ * new wakeup events till the rearming below, so if any of them
+ * are pending here, they must be resulting from the processing
+ * of EC events above or coming from somewhere else.
+ */
+ if (pm_wakeup_pending())
+ return true;
+
rearm_wake_irq(acpi_sci_irq);
}
+
+ return false;
}
static void acpi_s2idle_restore_early(void)
diff --git a/drivers/android/binder.c b/drivers/android/binder.c
index a6b2082..e47c8a4 100644
--- a/drivers/android/binder.c
+++ b/drivers/android/binder.c
@@ -5228,6 +5228,7 @@ static int binder_open(struct inode *nodp, struct file *filp)
binder_dev = container_of(filp->private_data,
struct binder_device, miscdev);
}
+ refcount_inc(&binder_dev->ref);
proc->context = &binder_dev->context;
binder_alloc_init(&proc->alloc);
@@ -5405,6 +5406,7 @@ static int binder_node_release(struct binder_node *node, int refs)
static void binder_deferred_release(struct binder_proc *proc)
{
struct binder_context *context = proc->context;
+ struct binder_device *device;
struct rb_node *n;
int threads, nodes, incoming_refs, outgoing_refs, active_transactions;
@@ -5421,6 +5423,12 @@ static void binder_deferred_release(struct binder_proc *proc)
context->binder_context_mgr_node = NULL;
}
mutex_unlock(&context->context_mgr_node_lock);
+ device = container_of(proc->context, struct binder_device, context);
+ if (refcount_dec_and_test(&device->ref)) {
+ kfree(context->name);
+ kfree(device);
+ }
+ proc->context = NULL;
binder_inner_proc_lock(proc);
/*
* Make sure proc stays alive after we
@@ -6077,6 +6085,7 @@ static int __init init_binder_device(const char *name)
binder_device->miscdev.minor = MISC_DYNAMIC_MINOR;
binder_device->miscdev.name = name;
+ refcount_set(&binder_device->ref, 1);
binder_device->context.binder_context_mgr_uid = INVALID_UID;
binder_device->context.name = name;
mutex_init(&binder_device->context.context_mgr_node_lock);
diff --git a/drivers/android/binder_internal.h b/drivers/android/binder_internal.h
index ae99109..283d3cb 100644
--- a/drivers/android/binder_internal.h
+++ b/drivers/android/binder_internal.h
@@ -8,6 +8,7 @@
#include <linux/list.h>
#include <linux/miscdevice.h>
#include <linux/mutex.h>
+#include <linux/refcount.h>
#include <linux/stddef.h>
#include <linux/types.h>
#include <linux/uidgid.h>
@@ -33,6 +34,7 @@ struct binder_device {
struct miscdevice miscdev;
struct binder_context context;
struct inode *binderfs_inode;
+ refcount_t ref;
};
/**
diff --git a/drivers/android/binderfs.c b/drivers/android/binderfs.c
index e2580e5..f303106 100644
--- a/drivers/android/binderfs.c
+++ b/drivers/android/binderfs.c
@@ -154,6 +154,7 @@ static int binderfs_binder_device_create(struct inode *ref_inode,
if (!name)
goto err;
+ refcount_set(&device->ref, 1);
device->binderfs_inode = inode;
device->context.binder_context_mgr_uid = INVALID_UID;
device->context.name = name;
@@ -257,8 +258,10 @@ static void binderfs_evict_inode(struct inode *inode)
ida_free(&binderfs_minors, device->miscdev.minor);
mutex_unlock(&binderfs_minors_mutex);
- kfree(device->context.name);
- kfree(device);
+ if (refcount_dec_and_test(&device->ref)) {
+ kfree(device->context.name);
+ kfree(device);
+ }
}
/**
@@ -445,6 +448,7 @@ static int binderfs_binder_ctl_create(struct super_block *sb)
inode->i_uid = info->root_uid;
inode->i_gid = info->root_gid;
+ refcount_set(&device->ref, 1);
device->binderfs_inode = inode;
device->miscdev.minor = minor;
diff --git a/drivers/ata/Kconfig b/drivers/ata/Kconfig
index a6beb2c..05ecdce 100644
--- a/drivers/ata/Kconfig
+++ b/drivers/ata/Kconfig
@@ -34,6 +34,12 @@
config ATA_NONSTANDARD
bool
+config SATA_HOST
+ bool
+
+config PATA_TIMINGS
+ bool
+
config ATA_VERBOSE_ERROR
bool "Verbose ATA error reporting"
default y
@@ -45,9 +51,26 @@
If unsure, say Y.
+config ATA_FORCE
+ bool "\"libata.force=\" kernel parameter support" if EXPERT
+ default y
+ help
+ This option adds support for "libata.force=" kernel parameter for
+ forcing configuration settings.
+
+ For further information, please read
+ <file:Documentation/admin-guide/kernel-parameters.txt>.
+
+ This option will enlarge the kernel by approx. 3KB. Disable it if
+ kernel size is more important than ability to override the default
+ configuration settings.
+
+ If unsure, say Y.
+
config ATA_ACPI
bool "ATA ACPI Support"
depends on ACPI
+ select PATA_TIMINGS
default y
help
This option adds support for ATA-related ACPI objects.
@@ -73,6 +96,7 @@
config SATA_PMP
bool "SATA Port Multiplier support"
+ depends on SATA_HOST
default y
help
This option adds support for SATA Port Multipliers
@@ -85,6 +109,7 @@
config SATA_AHCI
tristate "AHCI SATA support"
depends on PCI
+ select SATA_HOST
help
This option enables support for AHCI Serial ATA.
@@ -111,6 +136,7 @@
config SATA_AHCI_PLATFORM
tristate "Platform AHCI SATA support"
+ select SATA_HOST
help
This option enables support for Platform AHCI Serial ATA
controllers.
@@ -121,6 +147,7 @@
tristate "Broadcom AHCI SATA support"
depends on ARCH_BRCMSTB || BMIPS_GENERIC || ARCH_BCM_NSP || \
ARCH_BCM_63XX
+ select SATA_HOST
help
This option enables support for the AHCI SATA3 controller found on
Broadcom SoC's.
@@ -130,6 +157,7 @@
config AHCI_DA850
tristate "DaVinci DA850 AHCI SATA support"
depends on ARCH_DAVINCI_DA850
+ select SATA_HOST
help
This option enables support for the DaVinci DA850 SoC's
onboard AHCI SATA.
@@ -139,6 +167,7 @@
config AHCI_DM816
tristate "DaVinci DM816 AHCI SATA support"
depends on ARCH_OMAP2PLUS
+ select SATA_HOST
help
This option enables support for the DaVinci DM816 SoC's
onboard AHCI SATA controller.
@@ -148,6 +177,7 @@
config AHCI_ST
tristate "ST AHCI SATA support"
depends on ARCH_STI
+ select SATA_HOST
help
This option enables support for ST AHCI SATA controller.
@@ -157,6 +187,7 @@
tristate "Freescale i.MX AHCI SATA support"
depends on MFD_SYSCON && (ARCH_MXC || COMPILE_TEST)
depends on (HWMON && (THERMAL || !THERMAL_OF)) || !HWMON
+ select SATA_HOST
help
This option enables support for the Freescale i.MX SoC's
onboard AHCI SATA.
@@ -166,6 +197,7 @@
config AHCI_CEVA
tristate "CEVA AHCI SATA support"
depends on OF
+ select SATA_HOST
help
This option enables support for the CEVA AHCI SATA.
It can be found on the Xilinx Zynq UltraScale+ MPSoC.
@@ -176,6 +208,7 @@
tristate "MediaTek AHCI SATA support"
depends on ARCH_MEDIATEK
select MFD_SYSCON
+ select SATA_HOST
help
This option enables support for the MediaTek SoC's
onboard AHCI SATA controller.
@@ -185,6 +218,7 @@
config AHCI_MVEBU
tristate "Marvell EBU AHCI SATA support"
depends on ARCH_MVEBU
+ select SATA_HOST
help
This option enables support for the Marvebu EBU SoC's
onboard AHCI SATA.
@@ -203,6 +237,7 @@
config AHCI_SUNXI
tristate "Allwinner sunxi AHCI SATA support"
depends on ARCH_SUNXI
+ select SATA_HOST
help
This option enables support for the Allwinner sunxi SoC's
onboard AHCI SATA.
@@ -212,6 +247,7 @@
config AHCI_TEGRA
tristate "NVIDIA Tegra AHCI SATA support"
depends on ARCH_TEGRA
+ select SATA_HOST
help
This option enables support for the NVIDIA Tegra SoC's
onboard AHCI SATA.
@@ -221,12 +257,14 @@
config AHCI_XGENE
tristate "APM X-Gene 6.0Gbps AHCI SATA host controller support"
depends on PHY_XGENE
+ select SATA_HOST
help
This option enables support for APM X-Gene SoC SATA host controller.
config AHCI_QORIQ
tristate "Freescale QorIQ AHCI SATA support"
depends on OF
+ select SATA_HOST
help
This option enables support for the Freescale QorIQ AHCI SoC's
onboard AHCI SATA.
@@ -236,6 +274,7 @@
config SATA_FSL
tristate "Freescale 3.0Gbps SATA support"
depends on FSL_SOC
+ select SATA_HOST
help
This option enables support for Freescale 3.0Gbps SATA controller.
It can be found on MPC837x and MPC8315.
@@ -245,6 +284,7 @@
config SATA_GEMINI
tristate "Gemini SATA bridge support"
depends on ARCH_GEMINI || COMPILE_TEST
+ select SATA_HOST
default ARCH_GEMINI
help
This enabled support for the FTIDE010 to SATA bridge
@@ -255,6 +295,7 @@
config SATA_AHCI_SEATTLE
tristate "AMD Seattle 6.0Gbps AHCI SATA host controller support"
depends on ARCH_SEATTLE
+ select SATA_HOST
help
This option enables support for AMD Seattle SATA host controller.
@@ -263,12 +304,14 @@
config SATA_INIC162X
tristate "Initio 162x SATA support (Very Experimental)"
depends on PCI
+ select SATA_HOST
help
This option enables support for Initio 162x Serial ATA.
config SATA_ACARD_AHCI
tristate "ACard AHCI variant (ATP 8620)"
depends on PCI
+ select SATA_HOST
help
This option enables support for Acard.
@@ -277,6 +320,7 @@
config SATA_SIL24
tristate "Silicon Image 3124/3132 SATA support"
depends on PCI
+ select SATA_HOST
help
This option enables support for Silicon Image 3124/3132 Serial ATA.
@@ -317,6 +361,7 @@
config PATA_OCTEON_CF
tristate "OCTEON Boot Bus Compact Flash support"
depends on CAVIUM_OCTEON_SOC
+ select PATA_TIMINGS
help
This option enables a polled compact flash driver for use with
compact flash cards attached to the OCTEON boot bus.
@@ -326,6 +371,7 @@
config SATA_QSTOR
tristate "Pacific Digital SATA QStor support"
depends on PCI
+ select SATA_HOST
help
This option enables support for Pacific Digital Serial ATA QStor.
@@ -334,6 +380,7 @@
config SATA_SX4
tristate "Promise SATA SX4 support (Experimental)"
depends on PCI
+ select SATA_HOST
help
This option enables support for Promise Serial ATA SX4.
@@ -357,6 +404,7 @@
config ATA_PIIX
tristate "Intel ESB, ICH, PIIX3, PIIX4 PATA/SATA support"
depends on PCI
+ select SATA_HOST
help
This option enables support for ICH5/6/7/8 Serial ATA
and support for PATA on the Intel ESB/ICH/PIIX3/PIIX4 series
@@ -368,6 +416,7 @@
tristate "DesignWare Cores SATA support"
depends on DMADEVICES
select GENERIC_PHY
+ select SATA_HOST
help
This option enables support for the on-chip SATA controller of the
AppliedMicro processor 460EX.
@@ -398,6 +447,7 @@
config SATA_HIGHBANK
tristate "Calxeda Highbank SATA support"
depends on ARCH_HIGHBANK || COMPILE_TEST
+ select SATA_HOST
help
This option enables support for the Calxeda Highbank SoC's
onboard SATA.
@@ -409,6 +459,7 @@
depends on PCI || ARCH_DOVE || ARCH_MV78XX0 || \
ARCH_MVEBU || ARCH_ORION5X || COMPILE_TEST
select GENERIC_PHY
+ select SATA_HOST
help
This option enables support for the Marvell Serial ATA family.
Currently supports 88SX[56]0[48][01] PCI(-X) chips,
@@ -419,6 +470,7 @@
config SATA_NV
tristate "NVIDIA SATA support"
depends on PCI
+ select SATA_HOST
help
This option enables support for NVIDIA Serial ATA.
@@ -427,6 +479,7 @@
config SATA_PROMISE
tristate "Promise SATA TX2/TX4 support"
depends on PCI
+ select SATA_HOST
help
This option enables support for Promise Serial ATA TX2/TX4.
@@ -435,6 +488,7 @@
config SATA_RCAR
tristate "Renesas R-Car SATA support"
depends on ARCH_RENESAS || COMPILE_TEST
+ select SATA_HOST
help
This option enables support for Renesas R-Car Serial ATA.
@@ -443,6 +497,7 @@
config SATA_SIL
tristate "Silicon Image SATA support"
depends on PCI
+ select SATA_HOST
help
This option enables support for Silicon Image Serial ATA.
@@ -452,6 +507,7 @@
tristate "SiS 964/965/966/180 SATA support"
depends on PCI
select PATA_SIS
+ select SATA_HOST
help
This option enables support for SiS Serial ATA on
SiS 964/965/966/180 and Parallel ATA on SiS 180.
@@ -462,6 +518,7 @@
config SATA_SVW
tristate "ServerWorks Frodo / Apple K2 SATA support"
depends on PCI
+ select SATA_HOST
help
This option enables support for Broadcom/Serverworks/Apple K2
SATA support.
@@ -471,6 +528,7 @@
config SATA_ULI
tristate "ULi Electronics SATA support"
depends on PCI
+ select SATA_HOST
help
This option enables support for ULi Electronics SATA.
@@ -479,6 +537,7 @@
config SATA_VIA
tristate "VIA SATA support"
depends on PCI
+ select SATA_HOST
help
This option enables support for VIA Serial ATA.
@@ -487,6 +546,7 @@
config SATA_VITESSE
tristate "VITESSE VSC-7174 / INTEL 31244 SATA support"
depends on PCI
+ select SATA_HOST
help
This option enables support for Vitesse VSC7174 and Intel 31244 Serial ATA.
@@ -497,6 +557,7 @@
config PATA_ALI
tristate "ALi PATA support"
depends on PCI
+ select PATA_TIMINGS
help
This option enables support for the ALi ATA interfaces
found on the many ALi chipsets.
@@ -506,6 +567,7 @@
config PATA_AMD
tristate "AMD/NVidia PATA support"
depends on PCI
+ select PATA_TIMINGS
help
This option enables support for the AMD and NVidia PATA
interfaces found on the chipsets for Athlon/Athlon64.
@@ -540,6 +602,7 @@
config PATA_ATP867X
tristate "ARTOP/Acard ATP867X PATA support"
depends on PCI
+ select PATA_TIMINGS
help
This option enables support for ARTOP/Acard ATP867X PATA
controllers.
@@ -549,6 +612,7 @@
config PATA_BK3710
tristate "Palmchip BK3710 PATA support"
depends on ARCH_DAVINCI
+ select PATA_TIMINGS
help
This option enables support for the integrated IDE controller on
the TI DaVinci SoC.
@@ -558,6 +622,7 @@
config PATA_CMD64X
tristate "CMD64x PATA support"
depends on PCI
+ select PATA_TIMINGS
help
This option enables support for the CMD64x series chips
except for the CMD640.
@@ -603,6 +668,7 @@
config PATA_CYPRESS
tristate "Cypress CY82C693 PATA support (Very Experimental)"
depends on PCI
+ select PATA_TIMINGS
help
This option enables support for the Cypress/Contaq CY82C693
chipset found in some Alpha systems
@@ -621,6 +687,7 @@
config PATA_EP93XX
tristate "Cirrus Logic EP93xx PATA support"
depends on ARCH_EP93XX
+ select PATA_TIMINGS
help
This option enables support for the PATA controller in
the Cirrus Logic EP9312 and EP9315 ARM CPU.
@@ -685,6 +752,7 @@
config PATA_ICSIDE
tristate "Acorn ICS PATA support"
depends on ARM && ARCH_ACORN
+ select PATA_TIMINGS
help
On Acorn systems, say Y here if you wish to use the ICS PATA
interface card. This is not required for ICS partition support.
@@ -693,6 +761,7 @@
config PATA_IMX
tristate "PATA support for Freescale iMX"
depends on ARCH_MXC
+ select PATA_TIMINGS
help
This option enables support for the PATA host available on Freescale
iMX SoCs.
@@ -778,6 +847,7 @@
config PATA_NS87415
tristate "Nat Semi NS87415 PATA support"
depends on PCI
+ select PATA_TIMINGS
help
This option enables support for the National Semiconductor
NS87415 PCI-IDE controller.
@@ -902,6 +972,7 @@
config PATA_VIA
tristate "VIA PATA support"
depends on PCI
+ select PATA_TIMINGS
help
This option enables support for the VIA PATA interfaces
found on the many VIA chipsets.
@@ -935,6 +1006,7 @@
config PATA_CMD640_PCI
tristate "CMD640 PCI PATA support (Experimental)"
depends on PCI
+ select PATA_TIMINGS
help
This option enables support for the CMD640 PCI IDE
interface chip. Only the primary channel is currently
@@ -1005,6 +1077,7 @@
config PATA_NS87410
tristate "Nat Semi NS87410 PATA support"
depends on PCI
+ select PATA_TIMINGS
help
This option enables support for the National Semiconductor
NS87410 PCI-IDE controller.
@@ -1085,6 +1158,7 @@
config PATA_SAMSUNG_CF
tristate "Samsung SoC PATA support"
depends on SAMSUNG_DEV_IDE
+ select PATA_TIMINGS
help
This option enables basic support for Samsung's S3C/S5P board
PATA controllers via the new ATA layer
@@ -1104,6 +1178,7 @@
config PATA_ACPI
tristate "ACPI firmware driver for PATA"
depends on ATA_ACPI && ATA_BMDMA && PCI
+ select PATA_TIMINGS
help
This option enables an ACPI method driver which drives
motherboard PATA controller interfaces through the ACPI
@@ -1113,6 +1188,7 @@
config ATA_GENERIC
tristate "Generic ATA support"
depends on PCI && ATA_BMDMA
+ select SATA_HOST
help
This option enables support for generic BIOS configured
ATA controllers via the new ATA layer
@@ -1122,6 +1198,7 @@
config PATA_LEGACY
tristate "Legacy ISA PATA support (Experimental)"
depends on (ISA || PCI)
+ select PATA_TIMINGS
help
This option enables support for ISA/VLB/PCI bus legacy PATA
ports and allows them to be accessed via the new ATA layer.
diff --git a/drivers/ata/Makefile b/drivers/ata/Makefile
index d8cc2e0..b8aebfb 100644
--- a/drivers/ata/Makefile
+++ b/drivers/ata/Makefile
@@ -123,7 +123,9 @@
libata-y := libata-core.o libata-scsi.o libata-eh.o \
libata-transport.o libata-trace.o
+libata-$(CONFIG_SATA_HOST) += libata-sata.o
libata-$(CONFIG_ATA_SFF) += libata-sff.o
libata-$(CONFIG_SATA_PMP) += libata-pmp.o
libata-$(CONFIG_ATA_ACPI) += libata-acpi.o
libata-$(CONFIG_SATA_ZPODD) += libata-zpodd.o
+libata-$(CONFIG_PATA_TIMINGS) += libata-pata-timings.o
diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c
index 11ea1af..ad0185c 100644
--- a/drivers/ata/ahci.c
+++ b/drivers/ata/ahci.c
@@ -40,6 +40,7 @@
enum {
AHCI_PCI_BAR_STA2X11 = 0,
AHCI_PCI_BAR_CAVIUM = 0,
+ AHCI_PCI_BAR_LOONGSON = 0,
AHCI_PCI_BAR_ENMOTUS = 2,
AHCI_PCI_BAR_CAVIUM_GEN5 = 4,
AHCI_PCI_BAR_STANDARD = 5,
@@ -245,6 +246,7 @@ static const struct ata_port_info ahci_port_info[] = {
static const struct pci_device_id ahci_pci_tbl[] = {
/* Intel */
+ { PCI_VDEVICE(INTEL, 0x06d6), board_ahci }, /* Comet Lake PCH-H RAID */
{ PCI_VDEVICE(INTEL, 0x2652), board_ahci }, /* ICH6 */
{ PCI_VDEVICE(INTEL, 0x2653), board_ahci }, /* ICH6M */
{ PCI_VDEVICE(INTEL, 0x27c1), board_ahci }, /* ICH7 */
@@ -401,6 +403,8 @@ static const struct pci_device_id ahci_pci_tbl[] = {
{ PCI_VDEVICE(INTEL, 0xa252), board_ahci }, /* Lewisburg RAID*/
{ PCI_VDEVICE(INTEL, 0xa256), board_ahci }, /* Lewisburg RAID*/
{ PCI_VDEVICE(INTEL, 0xa356), board_ahci }, /* Cannon Lake PCH-H RAID */
+ { PCI_VDEVICE(INTEL, 0x06d7), board_ahci }, /* Comet Lake-H RAID */
+ { PCI_VDEVICE(INTEL, 0xa386), board_ahci }, /* Comet Lake PCH-V RAID */
{ PCI_VDEVICE(INTEL, 0x0f22), board_ahci_mobile }, /* Bay Trail AHCI */
{ PCI_VDEVICE(INTEL, 0x0f23), board_ahci_mobile }, /* Bay Trail AHCI */
{ PCI_VDEVICE(INTEL, 0x22a3), board_ahci_mobile }, /* Cherry Tr. AHCI */
@@ -589,6 +593,9 @@ static const struct pci_device_id ahci_pci_tbl[] = {
/* Enmotus */
{ PCI_DEVICE(0x1c44, 0x8000), board_ahci },
+ /* Loongson */
+ { PCI_VDEVICE(LOONGSON, 0x7a08), board_ahci },
+
/* Generic, PCI class code for AHCI */
{ PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID,
PCI_CLASS_STORAGE_SATA_AHCI, 0xffffff, board_ahci },
@@ -1680,6 +1687,9 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
ahci_pci_bar = AHCI_PCI_BAR_CAVIUM;
if (pdev->device == 0xa084)
ahci_pci_bar = AHCI_PCI_BAR_CAVIUM_GEN5;
+ } else if (pdev->vendor == PCI_VENDOR_ID_LOONGSON) {
+ if (pdev->device == 0x7a08)
+ ahci_pci_bar = AHCI_PCI_BAR_LOONGSON;
}
/* acquire resources */
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 42c8728..beca5f9 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -2,10 +2,6 @@
/*
* libata-core.c - helper library for ATA
*
- * Maintained by: Tejun Heo <tj@kernel.org>
- * Please ALWAYS copy linux-ide@vger.kernel.org
- * on emails.
- *
* Copyright 2003-2004 Red Hat, Inc. All rights reserved.
* Copyright 2003-2004 Jeff Garzik
*
@@ -22,6 +18,11 @@
* http://www.compactflash.org (CF)
* http://www.qic.org (QIC157 - Tape and DSC)
* http://www.ce-ata.org (CE-ATA: not supported)
+ *
+ * libata is essentially a library of internal helper functions for
+ * low-level ATA host controller drivers. As such, the API/ABI is
+ * likely to change as new drivers are added and updated.
+ * Do not depend on ABI/API stability.
*/
#include <linux/kernel.h>
@@ -56,6 +57,7 @@
#include <linux/leds.h>
#include <linux/pm_runtime.h>
#include <linux/platform_device.h>
+#include <asm/setup.h>
#define CREATE_TRACE_POINTS
#include <trace/events/libata.h>
@@ -63,11 +65,6 @@
#include "libata.h"
#include "libata-transport.h"
-/* debounce timing parameters in msecs { interval, duration, timeout } */
-const unsigned long sata_deb_timing_normal[] = { 5, 100, 2000 };
-const unsigned long sata_deb_timing_hotplug[] = { 25, 500, 2000 };
-const unsigned long sata_deb_timing_long[] = { 100, 2000, 5000 };
-
const struct ata_port_operations ata_base_port_ops = {
.prereset = ata_std_prereset,
.postreset = ata_std_postreset,
@@ -82,6 +79,7 @@ const struct ata_port_operations sata_port_ops = {
.qc_defer = ata_std_qc_defer,
.hardreset = sata_std_hardreset,
};
+EXPORT_SYMBOL_GPL(sata_port_ops);
static unsigned int ata_dev_init_params(struct ata_device *dev,
u16 heads, u16 sectors);
@@ -91,14 +89,15 @@ static unsigned long ata_dev_blacklisted(const struct ata_device *dev);
atomic_t ata_print_id = ATOMIC_INIT(0);
+#ifdef CONFIG_ATA_FORCE
struct ata_force_param {
const char *name;
- unsigned int cbl;
- int spd_limit;
+ u8 cbl;
+ u8 spd_limit;
unsigned long xfer_mask;
unsigned int horkage_on;
unsigned int horkage_off;
- unsigned int lflags;
+ u16 lflags;
};
struct ata_force_ent {
@@ -110,10 +109,11 @@ struct ata_force_ent {
static struct ata_force_ent *ata_force_tbl;
static int ata_force_tbl_size;
-static char ata_force_param_buf[PAGE_SIZE] __initdata;
+static char ata_force_param_buf[COMMAND_LINE_SIZE] __initdata;
/* param_buf is thrown away after initialization, disallow read */
module_param_string(force, ata_force_param_buf, sizeof(ata_force_param_buf), 0);
MODULE_PARM_DESC(force, "Force ATA configurations including cable type, link speed and transfer mode (see Documentation/admin-guide/kernel-parameters.rst for details)");
+#endif
static int atapi_enabled = 1;
module_param(atapi_enabled, int, 0444);
@@ -224,6 +224,7 @@ struct ata_link *ata_link_next(struct ata_link *link, struct ata_port *ap,
return NULL;
}
+EXPORT_SYMBOL_GPL(ata_link_next);
/**
* ata_dev_next - device iteration helper
@@ -277,6 +278,7 @@ struct ata_device *ata_dev_next(struct ata_device *dev, struct ata_link *link,
goto next;
return dev;
}
+EXPORT_SYMBOL_GPL(ata_dev_next);
/**
* ata_dev_phys_link - find physical link for a device
@@ -303,6 +305,7 @@ struct ata_link *ata_dev_phys_link(struct ata_device *dev)
return ap->slave_link;
}
+#ifdef CONFIG_ATA_FORCE
/**
* ata_force_cbl - force cable type according to libata.force
* @ap: ATA port of interest
@@ -483,6 +486,11 @@ static void ata_force_horkage(struct ata_device *dev)
fe->param.name);
}
}
+#else
+static inline void ata_force_link_limits(struct ata_link *link) { }
+static inline void ata_force_xfermask(struct ata_device *dev) { }
+static inline void ata_force_horkage(struct ata_device *dev) { }
+#endif
/**
* atapi_cmd_type - Determine ATAPI command type from SCSI opcode
@@ -521,79 +529,7 @@ int atapi_cmd_type(u8 opcode)
return ATAPI_MISC;
}
}
-
-/**
- * ata_tf_to_fis - Convert ATA taskfile to SATA FIS structure
- * @tf: Taskfile to convert
- * @pmp: Port multiplier port
- * @is_cmd: This FIS is for command
- * @fis: Buffer into which data will output
- *
- * Converts a standard ATA taskfile to a Serial ATA
- * FIS structure (Register - Host to Device).
- *
- * LOCKING:
- * Inherited from caller.
- */
-void ata_tf_to_fis(const struct ata_taskfile *tf, u8 pmp, int is_cmd, u8 *fis)
-{
- fis[0] = 0x27; /* Register - Host to Device FIS */
- fis[1] = pmp & 0xf; /* Port multiplier number*/
- if (is_cmd)
- fis[1] |= (1 << 7); /* bit 7 indicates Command FIS */
-
- fis[2] = tf->command;
- fis[3] = tf->feature;
-
- fis[4] = tf->lbal;
- fis[5] = tf->lbam;
- fis[6] = tf->lbah;
- fis[7] = tf->device;
-
- fis[8] = tf->hob_lbal;
- fis[9] = tf->hob_lbam;
- fis[10] = tf->hob_lbah;
- fis[11] = tf->hob_feature;
-
- fis[12] = tf->nsect;
- fis[13] = tf->hob_nsect;
- fis[14] = 0;
- fis[15] = tf->ctl;
-
- fis[16] = tf->auxiliary & 0xff;
- fis[17] = (tf->auxiliary >> 8) & 0xff;
- fis[18] = (tf->auxiliary >> 16) & 0xff;
- fis[19] = (tf->auxiliary >> 24) & 0xff;
-}
-
-/**
- * ata_tf_from_fis - Convert SATA FIS to ATA taskfile
- * @fis: Buffer from which data will be input
- * @tf: Taskfile to output
- *
- * Converts a serial ATA FIS structure to a standard ATA taskfile.
- *
- * LOCKING:
- * Inherited from caller.
- */
-
-void ata_tf_from_fis(const u8 *fis, struct ata_taskfile *tf)
-{
- tf->command = fis[2]; /* status */
- tf->feature = fis[3]; /* error */
-
- tf->lbal = fis[4];
- tf->lbam = fis[5];
- tf->lbah = fis[6];
- tf->device = fis[7];
-
- tf->hob_lbal = fis[8];
- tf->hob_lbam = fis[9];
- tf->hob_lbah = fis[10];
-
- tf->nsect = fis[12];
- tf->hob_nsect = fis[13];
-}
+EXPORT_SYMBOL_GPL(atapi_cmd_type);
static const u8 ata_rw_cmds[] = {
/* pio multi */
@@ -868,6 +804,7 @@ unsigned long ata_pack_xfermask(unsigned long pio_mask,
((mwdma_mask << ATA_SHIFT_MWDMA) & ATA_MASK_MWDMA) |
((udma_mask << ATA_SHIFT_UDMA) & ATA_MASK_UDMA);
}
+EXPORT_SYMBOL_GPL(ata_pack_xfermask);
/**
* ata_unpack_xfermask - Unpack xfer_mask into pio, mwdma and udma masks
@@ -923,6 +860,7 @@ u8 ata_xfer_mask2mode(unsigned long xfer_mask)
return ent->base + highbit - ent->shift;
return 0xff;
}
+EXPORT_SYMBOL_GPL(ata_xfer_mask2mode);
/**
* ata_xfer_mode2mask - Find matching xfer_mask for XFER_*
@@ -946,6 +884,7 @@ unsigned long ata_xfer_mode2mask(u8 xfer_mode)
& ~((1 << ent->shift) - 1);
return 0;
}
+EXPORT_SYMBOL_GPL(ata_xfer_mode2mask);
/**
* ata_xfer_mode2shift - Find matching xfer_shift for XFER_*
@@ -968,6 +907,7 @@ int ata_xfer_mode2shift(unsigned long xfer_mode)
return ent->shift;
return -1;
}
+EXPORT_SYMBOL_GPL(ata_xfer_mode2shift);
/**
* ata_mode_string - convert xfer_mask to string
@@ -1014,6 +954,7 @@ const char *ata_mode_string(unsigned long xfer_mask)
return xfer_mode_str[highbit];
return "<n/a>";
}
+EXPORT_SYMBOL_GPL(ata_mode_string);
const char *sata_spd_string(unsigned int spd)
{
@@ -1094,6 +1035,7 @@ unsigned int ata_dev_classify(const struct ata_taskfile *tf)
DPRINTK("unknown device\n");
return ATA_DEV_UNKNOWN;
}
+EXPORT_SYMBOL_GPL(ata_dev_classify);
/**
* ata_id_string - Convert IDENTIFY DEVICE page into string
@@ -1130,6 +1072,7 @@ void ata_id_string(const u16 *id, unsigned char *s,
len -= 2;
}
}
+EXPORT_SYMBOL_GPL(ata_id_string);
/**
* ata_id_c_string - Convert IDENTIFY DEVICE page into C string
@@ -1157,6 +1100,7 @@ void ata_id_c_string(const u16 *id, unsigned char *s,
p--;
*p = '\0';
}
+EXPORT_SYMBOL_GPL(ata_id_c_string);
static u64 ata_id_n_sectors(const u16 *id)
{
@@ -1514,6 +1458,7 @@ unsigned long ata_id_xfermask(const u16 *id)
return ata_pack_xfermask(pio_mask, mwdma_mask, udma_mask);
}
+EXPORT_SYMBOL_GPL(ata_id_xfermask);
static void ata_qc_complete_internal(struct ata_queued_cmd *qc)
{
@@ -1771,6 +1716,7 @@ unsigned int ata_pio_need_iordy(const struct ata_device *adev)
return 1;
return 0;
}
+EXPORT_SYMBOL_GPL(ata_pio_need_iordy);
/**
* ata_pio_mask_no_iordy - Return the non IORDY mask
@@ -1811,6 +1757,7 @@ unsigned int ata_do_dev_read_id(struct ata_device *dev,
return ata_exec_internal(dev, tf, NULL, DMA_FROM_DEVICE,
id, sizeof(id[0]) * ATA_ID_WORDS, 0);
}
+EXPORT_SYMBOL_GPL(ata_do_dev_read_id);
/**
* ata_dev_read_id - Read ID data from the specified device
@@ -2265,6 +2212,8 @@ static int ata_dev_config_ncq(struct ata_device *dev,
desc[0] = '\0';
return 0;
}
+ if (!IS_ENABLED(CONFIG_SATA_HOST))
+ return 0;
if (dev->horkage & ATA_HORKAGE_NONCQ) {
snprintf(desc, desc_sz, "NCQ (not used)");
return 0;
@@ -2783,6 +2732,7 @@ int ata_cable_40wire(struct ata_port *ap)
{
return ATA_CBL_PATA40;
}
+EXPORT_SYMBOL_GPL(ata_cable_40wire);
/**
* ata_cable_80wire - return 80 wire cable type
@@ -2796,6 +2746,7 @@ int ata_cable_80wire(struct ata_port *ap)
{
return ATA_CBL_PATA80;
}
+EXPORT_SYMBOL_GPL(ata_cable_80wire);
/**
* ata_cable_unknown - return unknown PATA cable.
@@ -2808,6 +2759,7 @@ int ata_cable_unknown(struct ata_port *ap)
{
return ATA_CBL_PATA_UNK;
}
+EXPORT_SYMBOL_GPL(ata_cable_unknown);
/**
* ata_cable_ignore - return ignored PATA cable.
@@ -2820,6 +2772,7 @@ int ata_cable_ignore(struct ata_port *ap)
{
return ATA_CBL_PATA_IGN;
}
+EXPORT_SYMBOL_GPL(ata_cable_ignore);
/**
* ata_cable_sata - return SATA cable type
@@ -2832,6 +2785,7 @@ int ata_cable_sata(struct ata_port *ap)
{
return ATA_CBL_SATA;
}
+EXPORT_SYMBOL_GPL(ata_cable_sata);
/**
* ata_bus_probe - Reset and probe ATA bus
@@ -3014,6 +2968,7 @@ struct ata_device *ata_dev_pair(struct ata_device *adev)
return NULL;
return pair;
}
+EXPORT_SYMBOL_GPL(ata_dev_pair);
/**
* sata_down_spd_limit - adjust SATA spd limit downward
@@ -3095,252 +3050,7 @@ int sata_down_spd_limit(struct ata_link *link, u32 spd_limit)
return 0;
}
-static int __sata_set_spd_needed(struct ata_link *link, u32 *scontrol)
-{
- struct ata_link *host_link = &link->ap->link;
- u32 limit, target, spd;
-
- limit = link->sata_spd_limit;
-
- /* Don't configure downstream link faster than upstream link.
- * It doesn't speed up anything and some PMPs choke on such
- * configuration.
- */
- if (!ata_is_host_link(link) && host_link->sata_spd)
- limit &= (1 << host_link->sata_spd) - 1;
-
- if (limit == UINT_MAX)
- target = 0;
- else
- target = fls(limit);
-
- spd = (*scontrol >> 4) & 0xf;
- *scontrol = (*scontrol & ~0xf0) | ((target & 0xf) << 4);
-
- return spd != target;
-}
-
-/**
- * sata_set_spd_needed - is SATA spd configuration needed
- * @link: Link in question
- *
- * Test whether the spd limit in SControl matches
- * @link->sata_spd_limit. This function is used to determine
- * whether hardreset is necessary to apply SATA spd
- * configuration.
- *
- * LOCKING:
- * Inherited from caller.
- *
- * RETURNS:
- * 1 if SATA spd configuration is needed, 0 otherwise.
- */
-static int sata_set_spd_needed(struct ata_link *link)
-{
- u32 scontrol;
-
- if (sata_scr_read(link, SCR_CONTROL, &scontrol))
- return 1;
-
- return __sata_set_spd_needed(link, &scontrol);
-}
-
-/**
- * sata_set_spd - set SATA spd according to spd limit
- * @link: Link to set SATA spd for
- *
- * Set SATA spd of @link according to sata_spd_limit.
- *
- * LOCKING:
- * Inherited from caller.
- *
- * RETURNS:
- * 0 if spd doesn't need to be changed, 1 if spd has been
- * changed. Negative errno if SCR registers are inaccessible.
- */
-int sata_set_spd(struct ata_link *link)
-{
- u32 scontrol;
- int rc;
-
- if ((rc = sata_scr_read(link, SCR_CONTROL, &scontrol)))
- return rc;
-
- if (!__sata_set_spd_needed(link, &scontrol))
- return 0;
-
- if ((rc = sata_scr_write(link, SCR_CONTROL, scontrol)))
- return rc;
-
- return 1;
-}
-
-/*
- * This mode timing computation functionality is ported over from
- * drivers/ide/ide-timing.h and was originally written by Vojtech Pavlik
- */
-/*
- * PIO 0-4, MWDMA 0-2 and UDMA 0-6 timings (in nanoseconds).
- * These were taken from ATA/ATAPI-6 standard, rev 0a, except
- * for UDMA6, which is currently supported only by Maxtor drives.
- *
- * For PIO 5/6 MWDMA 3/4 see the CFA specification 3.0.
- */
-
-static const struct ata_timing ata_timing[] = {
-/* { XFER_PIO_SLOW, 120, 290, 240, 960, 290, 240, 0, 960, 0 }, */
- { XFER_PIO_0, 70, 290, 240, 600, 165, 150, 0, 600, 0 },
- { XFER_PIO_1, 50, 290, 93, 383, 125, 100, 0, 383, 0 },
- { XFER_PIO_2, 30, 290, 40, 330, 100, 90, 0, 240, 0 },
- { XFER_PIO_3, 30, 80, 70, 180, 80, 70, 0, 180, 0 },
- { XFER_PIO_4, 25, 70, 25, 120, 70, 25, 0, 120, 0 },
- { XFER_PIO_5, 15, 65, 25, 100, 65, 25, 0, 100, 0 },
- { XFER_PIO_6, 10, 55, 20, 80, 55, 20, 0, 80, 0 },
-
- { XFER_SW_DMA_0, 120, 0, 0, 0, 480, 480, 50, 960, 0 },
- { XFER_SW_DMA_1, 90, 0, 0, 0, 240, 240, 30, 480, 0 },
- { XFER_SW_DMA_2, 60, 0, 0, 0, 120, 120, 20, 240, 0 },
-
- { XFER_MW_DMA_0, 60, 0, 0, 0, 215, 215, 20, 480, 0 },
- { XFER_MW_DMA_1, 45, 0, 0, 0, 80, 50, 5, 150, 0 },
- { XFER_MW_DMA_2, 25, 0, 0, 0, 70, 25, 5, 120, 0 },
- { XFER_MW_DMA_3, 25, 0, 0, 0, 65, 25, 5, 100, 0 },
- { XFER_MW_DMA_4, 25, 0, 0, 0, 55, 20, 5, 80, 0 },
-
-/* { XFER_UDMA_SLOW, 0, 0, 0, 0, 0, 0, 0, 0, 150 }, */
- { XFER_UDMA_0, 0, 0, 0, 0, 0, 0, 0, 0, 120 },
- { XFER_UDMA_1, 0, 0, 0, 0, 0, 0, 0, 0, 80 },
- { XFER_UDMA_2, 0, 0, 0, 0, 0, 0, 0, 0, 60 },
- { XFER_UDMA_3, 0, 0, 0, 0, 0, 0, 0, 0, 45 },
- { XFER_UDMA_4, 0, 0, 0, 0, 0, 0, 0, 0, 30 },
- { XFER_UDMA_5, 0, 0, 0, 0, 0, 0, 0, 0, 20 },
- { XFER_UDMA_6, 0, 0, 0, 0, 0, 0, 0, 0, 15 },
-
- { 0xFF }
-};
-
-#define ENOUGH(v, unit) (((v)-1)/(unit)+1)
-#define EZ(v, unit) ((v)?ENOUGH(((v) * 1000), unit):0)
-
-static void ata_timing_quantize(const struct ata_timing *t, struct ata_timing *q, int T, int UT)
-{
- q->setup = EZ(t->setup, T);
- q->act8b = EZ(t->act8b, T);
- q->rec8b = EZ(t->rec8b, T);
- q->cyc8b = EZ(t->cyc8b, T);
- q->active = EZ(t->active, T);
- q->recover = EZ(t->recover, T);
- q->dmack_hold = EZ(t->dmack_hold, T);
- q->cycle = EZ(t->cycle, T);
- q->udma = EZ(t->udma, UT);
-}
-
-void ata_timing_merge(const struct ata_timing *a, const struct ata_timing *b,
- struct ata_timing *m, unsigned int what)
-{
- if (what & ATA_TIMING_SETUP ) m->setup = max(a->setup, b->setup);
- if (what & ATA_TIMING_ACT8B ) m->act8b = max(a->act8b, b->act8b);
- if (what & ATA_TIMING_REC8B ) m->rec8b = max(a->rec8b, b->rec8b);
- if (what & ATA_TIMING_CYC8B ) m->cyc8b = max(a->cyc8b, b->cyc8b);
- if (what & ATA_TIMING_ACTIVE ) m->active = max(a->active, b->active);
- if (what & ATA_TIMING_RECOVER) m->recover = max(a->recover, b->recover);
- if (what & ATA_TIMING_DMACK_HOLD) m->dmack_hold = max(a->dmack_hold, b->dmack_hold);
- if (what & ATA_TIMING_CYCLE ) m->cycle = max(a->cycle, b->cycle);
- if (what & ATA_TIMING_UDMA ) m->udma = max(a->udma, b->udma);
-}
-
-const struct ata_timing *ata_timing_find_mode(u8 xfer_mode)
-{
- const struct ata_timing *t = ata_timing;
-
- while (xfer_mode > t->mode)
- t++;
-
- if (xfer_mode == t->mode)
- return t;
-
- WARN_ONCE(true, "%s: unable to find timing for xfer_mode 0x%x\n",
- __func__, xfer_mode);
-
- return NULL;
-}
-
-int ata_timing_compute(struct ata_device *adev, unsigned short speed,
- struct ata_timing *t, int T, int UT)
-{
- const u16 *id = adev->id;
- const struct ata_timing *s;
- struct ata_timing p;
-
- /*
- * Find the mode.
- */
-
- if (!(s = ata_timing_find_mode(speed)))
- return -EINVAL;
-
- memcpy(t, s, sizeof(*s));
-
- /*
- * If the drive is an EIDE drive, it can tell us it needs extended
- * PIO/MW_DMA cycle timing.
- */
-
- if (id[ATA_ID_FIELD_VALID] & 2) { /* EIDE drive */
- memset(&p, 0, sizeof(p));
-
- if (speed >= XFER_PIO_0 && speed < XFER_SW_DMA_0) {
- if (speed <= XFER_PIO_2)
- p.cycle = p.cyc8b = id[ATA_ID_EIDE_PIO];
- else if ((speed <= XFER_PIO_4) ||
- (speed == XFER_PIO_5 && !ata_id_is_cfa(id)))
- p.cycle = p.cyc8b = id[ATA_ID_EIDE_PIO_IORDY];
- } else if (speed >= XFER_MW_DMA_0 && speed <= XFER_MW_DMA_2)
- p.cycle = id[ATA_ID_EIDE_DMA_MIN];
-
- ata_timing_merge(&p, t, t, ATA_TIMING_CYCLE | ATA_TIMING_CYC8B);
- }
-
- /*
- * Convert the timing to bus clock counts.
- */
-
- ata_timing_quantize(t, t, T, UT);
-
- /*
- * Even in DMA/UDMA modes we still use PIO access for IDENTIFY,
- * S.M.A.R.T * and some other commands. We have to ensure that the
- * DMA cycle timing is slower/equal than the fastest PIO timing.
- */
-
- if (speed > XFER_PIO_6) {
- ata_timing_compute(adev, adev->pio_mode, &p, T, UT);
- ata_timing_merge(&p, t, t, ATA_TIMING_ALL);
- }
-
- /*
- * Lengthen active & recovery time so that cycle time is correct.
- */
-
- if (t->act8b + t->rec8b < t->cyc8b) {
- t->act8b += (t->cyc8b - (t->act8b + t->rec8b)) / 2;
- t->rec8b = t->cyc8b - t->act8b;
- }
-
- if (t->active + t->recover < t->cycle) {
- t->active += (t->cycle - (t->active + t->recover)) / 2;
- t->recover = t->cycle - t->active;
- }
-
- /* In a few cases quantisation may produce enough errors to
- leave t->cycle too low for the sum of active and recovery
- if so we must correct this */
- if (t->active + t->recover > t->cycle)
- t->cycle = t->active + t->recover;
-
- return 0;
-}
-
+#ifdef CONFIG_ATA_ACPI
/**
* ata_timing_cycle2mode - find xfer mode for the specified cycle duration
* @xfer_shift: ATA_SHIFT_* value for transfer type to examine.
@@ -3391,6 +3101,7 @@ u8 ata_timing_cycle2mode(unsigned int xfer_shift, int cycle)
return last_mode;
}
+#endif
/**
* ata_down_xfermask_limit - adjust dev xfer masks downward
@@ -3662,6 +3373,7 @@ int ata_do_set_mode(struct ata_link *link, struct ata_device **r_failed_dev)
*r_failed_dev = dev;
return rc;
}
+EXPORT_SYMBOL_GPL(ata_do_set_mode);
/**
* ata_wait_ready - wait for link to become ready
@@ -3771,216 +3483,7 @@ int ata_wait_after_reset(struct ata_link *link, unsigned long deadline,
return ata_wait_ready(link, deadline, check_ready);
}
-
-/**
- * sata_link_debounce - debounce SATA phy status
- * @link: ATA link to debounce SATA phy status for
- * @params: timing parameters { interval, duration, timeout } in msec
- * @deadline: deadline jiffies for the operation
- *
- * Make sure SStatus of @link reaches stable state, determined by
- * holding the same value where DET is not 1 for @duration polled
- * every @interval, before @timeout. Timeout constraints the
- * beginning of the stable state. Because DET gets stuck at 1 on
- * some controllers after hot unplugging, this functions waits
- * until timeout then returns 0 if DET is stable at 1.
- *
- * @timeout is further limited by @deadline. The sooner of the
- * two is used.
- *
- * LOCKING:
- * Kernel thread context (may sleep)
- *
- * RETURNS:
- * 0 on success, -errno on failure.
- */
-int sata_link_debounce(struct ata_link *link, const unsigned long *params,
- unsigned long deadline)
-{
- unsigned long interval = params[0];
- unsigned long duration = params[1];
- unsigned long last_jiffies, t;
- u32 last, cur;
- int rc;
-
- t = ata_deadline(jiffies, params[2]);
- if (time_before(t, deadline))
- deadline = t;
-
- if ((rc = sata_scr_read(link, SCR_STATUS, &cur)))
- return rc;
- cur &= 0xf;
-
- last = cur;
- last_jiffies = jiffies;
-
- while (1) {
- ata_msleep(link->ap, interval);
- if ((rc = sata_scr_read(link, SCR_STATUS, &cur)))
- return rc;
- cur &= 0xf;
-
- /* DET stable? */
- if (cur == last) {
- if (cur == 1 && time_before(jiffies, deadline))
- continue;
- if (time_after(jiffies,
- ata_deadline(last_jiffies, duration)))
- return 0;
- continue;
- }
-
- /* unstable, start over */
- last = cur;
- last_jiffies = jiffies;
-
- /* Check deadline. If debouncing failed, return
- * -EPIPE to tell upper layer to lower link speed.
- */
- if (time_after(jiffies, deadline))
- return -EPIPE;
- }
-}
-
-/**
- * sata_link_resume - resume SATA link
- * @link: ATA link to resume SATA
- * @params: timing parameters { interval, duration, timeout } in msec
- * @deadline: deadline jiffies for the operation
- *
- * Resume SATA phy @link and debounce it.
- *
- * LOCKING:
- * Kernel thread context (may sleep)
- *
- * RETURNS:
- * 0 on success, -errno on failure.
- */
-int sata_link_resume(struct ata_link *link, const unsigned long *params,
- unsigned long deadline)
-{
- int tries = ATA_LINK_RESUME_TRIES;
- u32 scontrol, serror;
- int rc;
-
- if ((rc = sata_scr_read(link, SCR_CONTROL, &scontrol)))
- return rc;
-
- /*
- * Writes to SControl sometimes get ignored under certain
- * controllers (ata_piix SIDPR). Make sure DET actually is
- * cleared.
- */
- do {
- scontrol = (scontrol & 0x0f0) | 0x300;
- if ((rc = sata_scr_write(link, SCR_CONTROL, scontrol)))
- return rc;
- /*
- * Some PHYs react badly if SStatus is pounded
- * immediately after resuming. Delay 200ms before
- * debouncing.
- */
- if (!(link->flags & ATA_LFLAG_NO_DB_DELAY))
- ata_msleep(link->ap, 200);
-
- /* is SControl restored correctly? */
- if ((rc = sata_scr_read(link, SCR_CONTROL, &scontrol)))
- return rc;
- } while ((scontrol & 0xf0f) != 0x300 && --tries);
-
- if ((scontrol & 0xf0f) != 0x300) {
- ata_link_warn(link, "failed to resume link (SControl %X)\n",
- scontrol);
- return 0;
- }
-
- if (tries < ATA_LINK_RESUME_TRIES)
- ata_link_warn(link, "link resume succeeded after %d retries\n",
- ATA_LINK_RESUME_TRIES - tries);
-
- if ((rc = sata_link_debounce(link, params, deadline)))
- return rc;
-
- /* clear SError, some PHYs require this even for SRST to work */
- if (!(rc = sata_scr_read(link, SCR_ERROR, &serror)))
- rc = sata_scr_write(link, SCR_ERROR, serror);
-
- return rc != -EINVAL ? rc : 0;
-}
-
-/**
- * sata_link_scr_lpm - manipulate SControl IPM and SPM fields
- * @link: ATA link to manipulate SControl for
- * @policy: LPM policy to configure
- * @spm_wakeup: initiate LPM transition to active state
- *
- * Manipulate the IPM field of the SControl register of @link
- * according to @policy. If @policy is ATA_LPM_MAX_POWER and
- * @spm_wakeup is %true, the SPM field is manipulated to wake up
- * the link. This function also clears PHYRDY_CHG before
- * returning.
- *
- * LOCKING:
- * EH context.
- *
- * RETURNS:
- * 0 on success, -errno otherwise.
- */
-int sata_link_scr_lpm(struct ata_link *link, enum ata_lpm_policy policy,
- bool spm_wakeup)
-{
- struct ata_eh_context *ehc = &link->eh_context;
- bool woken_up = false;
- u32 scontrol;
- int rc;
-
- rc = sata_scr_read(link, SCR_CONTROL, &scontrol);
- if (rc)
- return rc;
-
- switch (policy) {
- case ATA_LPM_MAX_POWER:
- /* disable all LPM transitions */
- scontrol |= (0x7 << 8);
- /* initiate transition to active state */
- if (spm_wakeup) {
- scontrol |= (0x4 << 12);
- woken_up = true;
- }
- break;
- case ATA_LPM_MED_POWER:
- /* allow LPM to PARTIAL */
- scontrol &= ~(0x1 << 8);
- scontrol |= (0x6 << 8);
- break;
- case ATA_LPM_MED_POWER_WITH_DIPM:
- case ATA_LPM_MIN_POWER_WITH_PARTIAL:
- case ATA_LPM_MIN_POWER:
- if (ata_link_nr_enabled(link) > 0)
- /* no restrictions on LPM transitions */
- scontrol &= ~(0x7 << 8);
- else {
- /* empty port, power off */
- scontrol &= ~0xf;
- scontrol |= (0x1 << 2);
- }
- break;
- default:
- WARN_ON(1);
- }
-
- rc = sata_scr_write(link, SCR_CONTROL, scontrol);
- if (rc)
- return rc;
-
- /* give the link time to transit out of LPM state */
- if (woken_up)
- msleep(10);
-
- /* clear PHYRDY_CHG from SError */
- ehc->i.serror &= ~SERR_PHYRDY_CHG;
- return sata_scr_write(link, SCR_ERROR, SERR_PHYRDY_CHG);
-}
+EXPORT_SYMBOL_GPL(ata_wait_after_reset);
/**
* ata_std_prereset - prepare for reset
@@ -4026,118 +3529,7 @@ int ata_std_prereset(struct ata_link *link, unsigned long deadline)
return 0;
}
-
-/**
- * sata_link_hardreset - reset link via SATA phy reset
- * @link: link to reset
- * @timing: timing parameters { interval, duration, timeout } in msec
- * @deadline: deadline jiffies for the operation
- * @online: optional out parameter indicating link onlineness
- * @check_ready: optional callback to check link readiness
- *
- * SATA phy-reset @link using DET bits of SControl register.
- * After hardreset, link readiness is waited upon using
- * ata_wait_ready() if @check_ready is specified. LLDs are
- * allowed to not specify @check_ready and wait itself after this
- * function returns. Device classification is LLD's
- * responsibility.
- *
- * *@online is set to one iff reset succeeded and @link is online
- * after reset.
- *
- * LOCKING:
- * Kernel thread context (may sleep)
- *
- * RETURNS:
- * 0 on success, -errno otherwise.
- */
-int sata_link_hardreset(struct ata_link *link, const unsigned long *timing,
- unsigned long deadline,
- bool *online, int (*check_ready)(struct ata_link *))
-{
- u32 scontrol;
- int rc;
-
- DPRINTK("ENTER\n");
-
- if (online)
- *online = false;
-
- if (sata_set_spd_needed(link)) {
- /* SATA spec says nothing about how to reconfigure
- * spd. To be on the safe side, turn off phy during
- * reconfiguration. This works for at least ICH7 AHCI
- * and Sil3124.
- */
- if ((rc = sata_scr_read(link, SCR_CONTROL, &scontrol)))
- goto out;
-
- scontrol = (scontrol & 0x0f0) | 0x304;
-
- if ((rc = sata_scr_write(link, SCR_CONTROL, scontrol)))
- goto out;
-
- sata_set_spd(link);
- }
-
- /* issue phy wake/reset */
- if ((rc = sata_scr_read(link, SCR_CONTROL, &scontrol)))
- goto out;
-
- scontrol = (scontrol & 0x0f0) | 0x301;
-
- if ((rc = sata_scr_write_flush(link, SCR_CONTROL, scontrol)))
- goto out;
-
- /* Couldn't find anything in SATA I/II specs, but AHCI-1.1
- * 10.4.2 says at least 1 ms.
- */
- ata_msleep(link->ap, 1);
-
- /* bring link back */
- rc = sata_link_resume(link, timing, deadline);
- if (rc)
- goto out;
- /* if link is offline nothing more to do */
- if (ata_phys_link_offline(link))
- goto out;
-
- /* Link is online. From this point, -ENODEV too is an error. */
- if (online)
- *online = true;
-
- if (sata_pmp_supported(link->ap) && ata_is_host_link(link)) {
- /* If PMP is supported, we have to do follow-up SRST.
- * Some PMPs don't send D2H Reg FIS after hardreset if
- * the first port is empty. Wait only for
- * ATA_TMOUT_PMP_SRST_WAIT.
- */
- if (check_ready) {
- unsigned long pmp_deadline;
-
- pmp_deadline = ata_deadline(jiffies,
- ATA_TMOUT_PMP_SRST_WAIT);
- if (time_after(pmp_deadline, deadline))
- pmp_deadline = deadline;
- ata_wait_ready(link, pmp_deadline, check_ready);
- }
- rc = -EAGAIN;
- goto out;
- }
-
- rc = 0;
- if (check_ready)
- rc = ata_wait_ready(link, deadline, check_ready);
- out:
- if (rc && rc != -EAGAIN) {
- /* online is set iff link is online && reset succeeded */
- if (online)
- *online = false;
- ata_link_err(link, "COMRESET failed (errno=%d)\n", rc);
- }
- DPRINTK("EXIT, rc=%d\n", rc);
- return rc;
-}
+EXPORT_SYMBOL_GPL(ata_std_prereset);
/**
* sata_std_hardreset - COMRESET w/o waiting or classification
@@ -4164,6 +3556,7 @@ int sata_std_hardreset(struct ata_link *link, unsigned int *class,
rc = sata_link_hardreset(link, timing, deadline, &online, NULL);
return online ? -EAGAIN : rc;
}
+EXPORT_SYMBOL_GPL(sata_std_hardreset);
/**
* ata_std_postreset - standard postreset callback
@@ -4192,6 +3585,7 @@ void ata_std_postreset(struct ata_link *link, unsigned int *classes)
DPRINTK("EXIT\n");
}
+EXPORT_SYMBOL_GPL(ata_std_postreset);
/**
* ata_dev_same_device - Determine whether new ID matches configured device
@@ -4979,11 +4373,13 @@ int ata_std_qc_defer(struct ata_queued_cmd *qc)
return ATA_DEFER_LINK;
}
+EXPORT_SYMBOL_GPL(ata_std_qc_defer);
enum ata_completion_errors ata_noop_qc_prep(struct ata_queued_cmd *qc)
{
return AC_ERR_OK;
}
+EXPORT_SYMBOL_GPL(ata_noop_qc_prep);
/**
* ata_sg_init - Associate command with scatter-gather table.
@@ -5327,6 +4723,7 @@ void ata_qc_complete(struct ata_queued_cmd *qc)
__ata_qc_complete(qc);
}
}
+EXPORT_SYMBOL_GPL(ata_qc_complete);
/**
* ata_qc_get_active - get bitmask of active qcs
@@ -5353,64 +4750,6 @@ u64 ata_qc_get_active(struct ata_port *ap)
EXPORT_SYMBOL_GPL(ata_qc_get_active);
/**
- * ata_qc_complete_multiple - Complete multiple qcs successfully
- * @ap: port in question
- * @qc_active: new qc_active mask
- *
- * Complete in-flight commands. This functions is meant to be
- * called from low-level driver's interrupt routine to complete
- * requests normally. ap->qc_active and @qc_active is compared
- * and commands are completed accordingly.
- *
- * Always use this function when completing multiple NCQ commands
- * from IRQ handlers instead of calling ata_qc_complete()
- * multiple times to keep IRQ expect status properly in sync.
- *
- * LOCKING:
- * spin_lock_irqsave(host lock)
- *
- * RETURNS:
- * Number of completed commands on success, -errno otherwise.
- */
-int ata_qc_complete_multiple(struct ata_port *ap, u64 qc_active)
-{
- u64 done_mask, ap_qc_active = ap->qc_active;
- int nr_done = 0;
-
- /*
- * If the internal tag is set on ap->qc_active, then we care about
- * bit0 on the passed in qc_active mask. Move that bit up to match
- * the internal tag.
- */
- if (ap_qc_active & (1ULL << ATA_TAG_INTERNAL)) {
- qc_active |= (qc_active & 0x01) << ATA_TAG_INTERNAL;
- qc_active ^= qc_active & 0x01;
- }
-
- done_mask = ap_qc_active ^ qc_active;
-
- if (unlikely(done_mask & qc_active)) {
- ata_port_err(ap, "illegal qc_active transition (%08llx->%08llx)\n",
- ap->qc_active, qc_active);
- return -EINVAL;
- }
-
- while (done_mask) {
- struct ata_queued_cmd *qc;
- unsigned int tag = __ffs64(done_mask);
-
- qc = ata_qc_from_tag(ap, tag);
- if (qc) {
- ata_qc_complete(qc);
- nr_done++;
- }
- done_mask &= ~(1ULL << tag);
- }
-
- return nr_done;
-}
-
-/**
* ata_qc_issue - issue taskfile to device
* @qc: command to issue to device
*
@@ -5486,111 +4825,6 @@ void ata_qc_issue(struct ata_queued_cmd *qc)
}
/**
- * sata_scr_valid - test whether SCRs are accessible
- * @link: ATA link to test SCR accessibility for
- *
- * Test whether SCRs are accessible for @link.
- *
- * LOCKING:
- * None.
- *
- * RETURNS:
- * 1 if SCRs are accessible, 0 otherwise.
- */
-int sata_scr_valid(struct ata_link *link)
-{
- struct ata_port *ap = link->ap;
-
- return (ap->flags & ATA_FLAG_SATA) && ap->ops->scr_read;
-}
-
-/**
- * sata_scr_read - read SCR register of the specified port
- * @link: ATA link to read SCR for
- * @reg: SCR to read
- * @val: Place to store read value
- *
- * Read SCR register @reg of @link into *@val. This function is
- * guaranteed to succeed if @link is ap->link, the cable type of
- * the port is SATA and the port implements ->scr_read.
- *
- * LOCKING:
- * None if @link is ap->link. Kernel thread context otherwise.
- *
- * RETURNS:
- * 0 on success, negative errno on failure.
- */
-int sata_scr_read(struct ata_link *link, int reg, u32 *val)
-{
- if (ata_is_host_link(link)) {
- if (sata_scr_valid(link))
- return link->ap->ops->scr_read(link, reg, val);
- return -EOPNOTSUPP;
- }
-
- return sata_pmp_scr_read(link, reg, val);
-}
-
-/**
- * sata_scr_write - write SCR register of the specified port
- * @link: ATA link to write SCR for
- * @reg: SCR to write
- * @val: value to write
- *
- * Write @val to SCR register @reg of @link. This function is
- * guaranteed to succeed if @link is ap->link, the cable type of
- * the port is SATA and the port implements ->scr_read.
- *
- * LOCKING:
- * None if @link is ap->link. Kernel thread context otherwise.
- *
- * RETURNS:
- * 0 on success, negative errno on failure.
- */
-int sata_scr_write(struct ata_link *link, int reg, u32 val)
-{
- if (ata_is_host_link(link)) {
- if (sata_scr_valid(link))
- return link->ap->ops->scr_write(link, reg, val);
- return -EOPNOTSUPP;
- }
-
- return sata_pmp_scr_write(link, reg, val);
-}
-
-/**
- * sata_scr_write_flush - write SCR register of the specified port and flush
- * @link: ATA link to write SCR for
- * @reg: SCR to write
- * @val: value to write
- *
- * This function is identical to sata_scr_write() except that this
- * function performs flush after writing to the register.
- *
- * LOCKING:
- * None if @link is ap->link. Kernel thread context otherwise.
- *
- * RETURNS:
- * 0 on success, negative errno on failure.
- */
-int sata_scr_write_flush(struct ata_link *link, int reg, u32 val)
-{
- if (ata_is_host_link(link)) {
- int rc;
-
- if (sata_scr_valid(link)) {
- rc = link->ap->ops->scr_write(link, reg, val);
- if (rc == 0)
- rc = link->ap->ops->scr_read(link, reg, &val);
- return rc;
- }
- return -EOPNOTSUPP;
- }
-
- return sata_pmp_scr_write(link, reg, val);
-}
-
-/**
* ata_phys_link_online - test whether the given link is online
* @link: ATA link to test
*
@@ -5663,6 +4897,7 @@ bool ata_link_online(struct ata_link *link)
return ata_phys_link_online(link) ||
(slave && ata_phys_link_online(slave));
}
+EXPORT_SYMBOL_GPL(ata_link_online);
/**
* ata_link_offline - test whether the given link is offline
@@ -5689,6 +4924,7 @@ bool ata_link_offline(struct ata_link *link)
return ata_phys_link_offline(link) &&
(!slave || ata_phys_link_offline(slave));
}
+EXPORT_SYMBOL_GPL(ata_link_offline);
#ifdef CONFIG_PM
static void ata_port_request_pm(struct ata_port *ap, pm_message_t mesg,
@@ -5875,6 +5111,7 @@ int ata_host_suspend(struct ata_host *host, pm_message_t mesg)
host->dev->power.power_state = mesg;
return 0;
}
+EXPORT_SYMBOL_GPL(ata_host_suspend);
/**
* ata_host_resume - resume host
@@ -5886,6 +5123,7 @@ void ata_host_resume(struct ata_host *host)
{
host->dev->power.power_state = PMSG_ON;
}
+EXPORT_SYMBOL_GPL(ata_host_resume);
#endif
const struct device_type ata_port_type = {
@@ -6105,6 +5343,7 @@ void ata_host_put(struct ata_host *host)
{
kref_put(&host->kref, ata_host_release);
}
+EXPORT_SYMBOL_GPL(ata_host_put);
/**
* ata_host_alloc - allocate and init basic ATA host resources
@@ -6178,6 +5417,7 @@ struct ata_host *ata_host_alloc(struct device *dev, int max_ports)
kfree(host);
return NULL;
}
+EXPORT_SYMBOL_GPL(ata_host_alloc);
/**
* ata_host_alloc_pinfo - alloc host and init with port_info array
@@ -6226,68 +5466,7 @@ struct ata_host *ata_host_alloc_pinfo(struct device *dev,
return host;
}
-
-/**
- * ata_slave_link_init - initialize slave link
- * @ap: port to initialize slave link for
- *
- * Create and initialize slave link for @ap. This enables slave
- * link handling on the port.
- *
- * In libata, a port contains links and a link contains devices.
- * There is single host link but if a PMP is attached to it,
- * there can be multiple fan-out links. On SATA, there's usually
- * a single device connected to a link but PATA and SATA
- * controllers emulating TF based interface can have two - master
- * and slave.
- *
- * However, there are a few controllers which don't fit into this
- * abstraction too well - SATA controllers which emulate TF
- * interface with both master and slave devices but also have
- * separate SCR register sets for each device. These controllers
- * need separate links for physical link handling
- * (e.g. onlineness, link speed) but should be treated like a
- * traditional M/S controller for everything else (e.g. command
- * issue, softreset).
- *
- * slave_link is libata's way of handling this class of
- * controllers without impacting core layer too much. For
- * anything other than physical link handling, the default host
- * link is used for both master and slave. For physical link
- * handling, separate @ap->slave_link is used. All dirty details
- * are implemented inside libata core layer. From LLD's POV, the
- * only difference is that prereset, hardreset and postreset are
- * called once more for the slave link, so the reset sequence
- * looks like the following.
- *
- * prereset(M) -> prereset(S) -> hardreset(M) -> hardreset(S) ->
- * softreset(M) -> postreset(M) -> postreset(S)
- *
- * Note that softreset is called only for the master. Softreset
- * resets both M/S by definition, so SRST on master should handle
- * both (the standard method will work just fine).
- *
- * LOCKING:
- * Should be called before host is registered.
- *
- * RETURNS:
- * 0 on success, -errno on failure.
- */
-int ata_slave_link_init(struct ata_port *ap)
-{
- struct ata_link *link;
-
- WARN_ON(ap->slave_link);
- WARN_ON(ap->flags & ATA_FLAG_PMP);
-
- link = kzalloc(sizeof(*link), GFP_KERNEL);
- if (!link)
- return -ENOMEM;
-
- ata_link_init(ap, link, 1);
- ap->slave_link = link;
- return 0;
-}
+EXPORT_SYMBOL_GPL(ata_host_alloc_pinfo);
static void ata_host_stop(struct device *gendev, void *res)
{
@@ -6436,6 +5615,7 @@ int ata_host_start(struct ata_host *host)
devres_free(start_dr);
return rc;
}
+EXPORT_SYMBOL_GPL(ata_host_start);
/**
* ata_sas_host_init - Initialize a host struct for sas (ipr, libsas)
@@ -6454,6 +5634,7 @@ void ata_host_init(struct ata_host *host, struct device *dev,
host->ops = ops;
kref_init(&host->kref);
}
+EXPORT_SYMBOL_GPL(ata_host_init);
void __ata_port_probe(struct ata_port *ap)
{
@@ -6609,6 +5790,7 @@ int ata_host_register(struct ata_host *host, struct scsi_host_template *sht)
return rc;
}
+EXPORT_SYMBOL_GPL(ata_host_register);
/**
* ata_host_activate - start host, request IRQ and register it
@@ -6671,6 +5853,7 @@ int ata_host_activate(struct ata_host *host, int irq,
return rc;
}
+EXPORT_SYMBOL_GPL(ata_host_activate);
/**
* ata_port_detach - Detach ATA port in preparation of device removal
@@ -6746,6 +5929,7 @@ void ata_host_detach(struct ata_host *host)
/* the host is dead now, dissociate ACPI */
ata_acpi_dissociate(host);
}
+EXPORT_SYMBOL_GPL(ata_host_detach);
#ifdef CONFIG_PCI
@@ -6766,6 +5950,7 @@ void ata_pci_remove_one(struct pci_dev *pdev)
ata_host_detach(host);
}
+EXPORT_SYMBOL_GPL(ata_pci_remove_one);
void ata_pci_shutdown_one(struct pci_dev *pdev)
{
@@ -6786,6 +5971,7 @@ void ata_pci_shutdown_one(struct pci_dev *pdev)
ap->ops->port_stop(ap);
}
}
+EXPORT_SYMBOL_GPL(ata_pci_shutdown_one);
/* move to PCI subsystem */
int pci_test_config_bits(struct pci_dev *pdev, const struct pci_bits *bits)
@@ -6820,6 +6006,7 @@ int pci_test_config_bits(struct pci_dev *pdev, const struct pci_bits *bits)
return (tmp == bits->val) ? 1 : 0;
}
+EXPORT_SYMBOL_GPL(pci_test_config_bits);
#ifdef CONFIG_PM
void ata_pci_device_do_suspend(struct pci_dev *pdev, pm_message_t mesg)
@@ -6830,6 +6017,7 @@ void ata_pci_device_do_suspend(struct pci_dev *pdev, pm_message_t mesg)
if (mesg.event & PM_EVENT_SLEEP)
pci_set_power_state(pdev, PCI_D3hot);
}
+EXPORT_SYMBOL_GPL(ata_pci_device_do_suspend);
int ata_pci_device_do_resume(struct pci_dev *pdev)
{
@@ -6848,6 +6036,7 @@ int ata_pci_device_do_resume(struct pci_dev *pdev)
pci_set_master(pdev);
return 0;
}
+EXPORT_SYMBOL_GPL(ata_pci_device_do_resume);
int ata_pci_device_suspend(struct pci_dev *pdev, pm_message_t mesg)
{
@@ -6862,6 +6051,7 @@ int ata_pci_device_suspend(struct pci_dev *pdev, pm_message_t mesg)
return 0;
}
+EXPORT_SYMBOL_GPL(ata_pci_device_suspend);
int ata_pci_device_resume(struct pci_dev *pdev)
{
@@ -6873,8 +6063,8 @@ int ata_pci_device_resume(struct pci_dev *pdev)
ata_host_resume(host);
return rc;
}
+EXPORT_SYMBOL_GPL(ata_pci_device_resume);
#endif /* CONFIG_PM */
-
#endif /* CONFIG_PCI */
/**
@@ -6896,7 +6086,9 @@ int ata_platform_remove_one(struct platform_device *pdev)
return 0;
}
+EXPORT_SYMBOL_GPL(ata_platform_remove_one);
+#ifdef CONFIG_ATA_FORCE
static int __init ata_parse_force_one(char **cur,
struct ata_force_ent *force_ent,
const char **reason)
@@ -7076,6 +6268,15 @@ static void __init ata_parse_force_param(void)
ata_force_tbl_size = idx;
}
+static void ata_free_force_param(void)
+{
+ kfree(ata_force_tbl);
+}
+#else
+static inline void ata_parse_force_param(void) { }
+static inline void ata_free_force_param(void) { }
+#endif
+
static int __init ata_init(void)
{
int rc;
@@ -7084,7 +6285,7 @@ static int __init ata_init(void)
rc = ata_sff_init();
if (rc) {
- kfree(ata_force_tbl);
+ ata_free_force_param();
return rc;
}
@@ -7108,7 +6309,7 @@ static void __exit ata_exit(void)
ata_release_transport(ata_scsi_transport_template);
libata_transport_exit();
ata_sff_exit();
- kfree(ata_force_tbl);
+ ata_free_force_param();
}
subsys_initcall(ata_init);
@@ -7120,6 +6321,7 @@ int ata_ratelimit(void)
{
return __ratelimit(&ratelimit);
}
+EXPORT_SYMBOL_GPL(ata_ratelimit);
/**
* ata_msleep - ATA EH owner aware msleep
@@ -7152,6 +6354,7 @@ void ata_msleep(struct ata_port *ap, unsigned int msecs)
if (owns_eh)
ata_eh_acquire(ap);
}
+EXPORT_SYMBOL_GPL(ata_msleep);
/**
* ata_wait_register - wait until register value changes
@@ -7198,38 +6401,7 @@ u32 ata_wait_register(struct ata_port *ap, void __iomem *reg, u32 mask, u32 val,
return tmp;
}
-
-/**
- * sata_lpm_ignore_phy_events - test if PHY event should be ignored
- * @link: Link receiving the event
- *
- * Test whether the received PHY event has to be ignored or not.
- *
- * LOCKING:
- * None:
- *
- * RETURNS:
- * True if the event has to be ignored.
- */
-bool sata_lpm_ignore_phy_events(struct ata_link *link)
-{
- unsigned long lpm_timeout = link->last_lpm_change +
- msecs_to_jiffies(ATA_TMOUT_SPURIOUS_PHY);
-
- /* if LPM is enabled, PHYRDY doesn't mean anything */
- if (link->lpm_policy > ATA_LPM_MAX_POWER)
- return true;
-
- /* ignore the first PHY event after the LPM policy changed
- * as it is might be spurious
- */
- if ((link->flags & ATA_LFLAG_CHANGED) &&
- time_before(jiffies, lpm_timeout))
- return true;
-
- return false;
-}
-EXPORT_SYMBOL_GPL(sata_lpm_ignore_phy_events);
+EXPORT_SYMBOL_GPL(ata_wait_register);
/*
* Dummy port_ops
@@ -7251,10 +6423,12 @@ struct ata_port_operations ata_dummy_port_ops = {
.sched_eh = ata_std_sched_eh,
.end_eh = ata_std_end_eh,
};
+EXPORT_SYMBOL_GPL(ata_dummy_port_ops);
const struct ata_port_info ata_dummy_port_info = {
.port_ops = &ata_dummy_port_ops,
};
+EXPORT_SYMBOL_GPL(ata_dummy_port_info);
/*
* Utility print functions
@@ -7322,127 +6496,3 @@ void ata_print_version(const struct device *dev, const char *version)
dev_printk(KERN_DEBUG, dev, "version %s\n", version);
}
EXPORT_SYMBOL(ata_print_version);
-
-/*
- * libata is essentially a library of internal helper functions for
- * low-level ATA host controller drivers. As such, the API/ABI is
- * likely to change as new drivers are added and updated.
- * Do not depend on ABI/API stability.
- */
-EXPORT_SYMBOL_GPL(sata_deb_timing_normal);
-EXPORT_SYMBOL_GPL(sata_deb_timing_hotplug);
-EXPORT_SYMBOL_GPL(sata_deb_timing_long);
-EXPORT_SYMBOL_GPL(ata_base_port_ops);
-EXPORT_SYMBOL_GPL(sata_port_ops);
-EXPORT_SYMBOL_GPL(ata_dummy_port_ops);
-EXPORT_SYMBOL_GPL(ata_dummy_port_info);
-EXPORT_SYMBOL_GPL(ata_link_next);
-EXPORT_SYMBOL_GPL(ata_dev_next);
-EXPORT_SYMBOL_GPL(ata_std_bios_param);
-EXPORT_SYMBOL_GPL(ata_scsi_unlock_native_capacity);
-EXPORT_SYMBOL_GPL(ata_host_init);
-EXPORT_SYMBOL_GPL(ata_host_alloc);
-EXPORT_SYMBOL_GPL(ata_host_alloc_pinfo);
-EXPORT_SYMBOL_GPL(ata_slave_link_init);
-EXPORT_SYMBOL_GPL(ata_host_start);
-EXPORT_SYMBOL_GPL(ata_host_register);
-EXPORT_SYMBOL_GPL(ata_host_activate);
-EXPORT_SYMBOL_GPL(ata_host_detach);
-EXPORT_SYMBOL_GPL(ata_sg_init);
-EXPORT_SYMBOL_GPL(ata_qc_complete);
-EXPORT_SYMBOL_GPL(ata_qc_complete_multiple);
-EXPORT_SYMBOL_GPL(atapi_cmd_type);
-EXPORT_SYMBOL_GPL(ata_tf_to_fis);
-EXPORT_SYMBOL_GPL(ata_tf_from_fis);
-EXPORT_SYMBOL_GPL(ata_pack_xfermask);
-EXPORT_SYMBOL_GPL(ata_unpack_xfermask);
-EXPORT_SYMBOL_GPL(ata_xfer_mask2mode);
-EXPORT_SYMBOL_GPL(ata_xfer_mode2mask);
-EXPORT_SYMBOL_GPL(ata_xfer_mode2shift);
-EXPORT_SYMBOL_GPL(ata_mode_string);
-EXPORT_SYMBOL_GPL(ata_id_xfermask);
-EXPORT_SYMBOL_GPL(ata_do_set_mode);
-EXPORT_SYMBOL_GPL(ata_std_qc_defer);
-EXPORT_SYMBOL_GPL(ata_noop_qc_prep);
-EXPORT_SYMBOL_GPL(ata_dev_disable);
-EXPORT_SYMBOL_GPL(sata_set_spd);
-EXPORT_SYMBOL_GPL(ata_wait_after_reset);
-EXPORT_SYMBOL_GPL(sata_link_debounce);
-EXPORT_SYMBOL_GPL(sata_link_resume);
-EXPORT_SYMBOL_GPL(sata_link_scr_lpm);
-EXPORT_SYMBOL_GPL(ata_std_prereset);
-EXPORT_SYMBOL_GPL(sata_link_hardreset);
-EXPORT_SYMBOL_GPL(sata_std_hardreset);
-EXPORT_SYMBOL_GPL(ata_std_postreset);
-EXPORT_SYMBOL_GPL(ata_dev_classify);
-EXPORT_SYMBOL_GPL(ata_dev_pair);
-EXPORT_SYMBOL_GPL(ata_ratelimit);
-EXPORT_SYMBOL_GPL(ata_msleep);
-EXPORT_SYMBOL_GPL(ata_wait_register);
-EXPORT_SYMBOL_GPL(ata_scsi_queuecmd);
-EXPORT_SYMBOL_GPL(ata_scsi_slave_config);
-EXPORT_SYMBOL_GPL(ata_scsi_slave_destroy);
-EXPORT_SYMBOL_GPL(ata_scsi_change_queue_depth);
-EXPORT_SYMBOL_GPL(__ata_change_queue_depth);
-EXPORT_SYMBOL_GPL(sata_scr_valid);
-EXPORT_SYMBOL_GPL(sata_scr_read);
-EXPORT_SYMBOL_GPL(sata_scr_write);
-EXPORT_SYMBOL_GPL(sata_scr_write_flush);
-EXPORT_SYMBOL_GPL(ata_link_online);
-EXPORT_SYMBOL_GPL(ata_link_offline);
-#ifdef CONFIG_PM
-EXPORT_SYMBOL_GPL(ata_host_suspend);
-EXPORT_SYMBOL_GPL(ata_host_resume);
-#endif /* CONFIG_PM */
-EXPORT_SYMBOL_GPL(ata_id_string);
-EXPORT_SYMBOL_GPL(ata_id_c_string);
-EXPORT_SYMBOL_GPL(ata_do_dev_read_id);
-EXPORT_SYMBOL_GPL(ata_scsi_simulate);
-
-EXPORT_SYMBOL_GPL(ata_pio_need_iordy);
-EXPORT_SYMBOL_GPL(ata_timing_find_mode);
-EXPORT_SYMBOL_GPL(ata_timing_compute);
-EXPORT_SYMBOL_GPL(ata_timing_merge);
-EXPORT_SYMBOL_GPL(ata_timing_cycle2mode);
-
-#ifdef CONFIG_PCI
-EXPORT_SYMBOL_GPL(pci_test_config_bits);
-EXPORT_SYMBOL_GPL(ata_pci_shutdown_one);
-EXPORT_SYMBOL_GPL(ata_pci_remove_one);
-#ifdef CONFIG_PM
-EXPORT_SYMBOL_GPL(ata_pci_device_do_suspend);
-EXPORT_SYMBOL_GPL(ata_pci_device_do_resume);
-EXPORT_SYMBOL_GPL(ata_pci_device_suspend);
-EXPORT_SYMBOL_GPL(ata_pci_device_resume);
-#endif /* CONFIG_PM */
-#endif /* CONFIG_PCI */
-
-EXPORT_SYMBOL_GPL(ata_platform_remove_one);
-
-EXPORT_SYMBOL_GPL(__ata_ehi_push_desc);
-EXPORT_SYMBOL_GPL(ata_ehi_push_desc);
-EXPORT_SYMBOL_GPL(ata_ehi_clear_desc);
-EXPORT_SYMBOL_GPL(ata_port_desc);
-#ifdef CONFIG_PCI
-EXPORT_SYMBOL_GPL(ata_port_pbar_desc);
-#endif /* CONFIG_PCI */
-EXPORT_SYMBOL_GPL(ata_port_schedule_eh);
-EXPORT_SYMBOL_GPL(ata_link_abort);
-EXPORT_SYMBOL_GPL(ata_port_abort);
-EXPORT_SYMBOL_GPL(ata_port_freeze);
-EXPORT_SYMBOL_GPL(sata_async_notification);
-EXPORT_SYMBOL_GPL(ata_eh_freeze_port);
-EXPORT_SYMBOL_GPL(ata_eh_thaw_port);
-EXPORT_SYMBOL_GPL(ata_eh_qc_complete);
-EXPORT_SYMBOL_GPL(ata_eh_qc_retry);
-EXPORT_SYMBOL_GPL(ata_eh_analyze_ncq_error);
-EXPORT_SYMBOL_GPL(ata_do_eh);
-EXPORT_SYMBOL_GPL(ata_std_error_handler);
-
-EXPORT_SYMBOL_GPL(ata_cable_40wire);
-EXPORT_SYMBOL_GPL(ata_cable_80wire);
-EXPORT_SYMBOL_GPL(ata_cable_unknown);
-EXPORT_SYMBOL_GPL(ata_cable_ignore);
-EXPORT_SYMBOL_GPL(ata_cable_sata);
-EXPORT_SYMBOL_GPL(ata_host_get);
-EXPORT_SYMBOL_GPL(ata_host_put);
diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c
index 3bfd9da5..474c6c3 100644
--- a/drivers/ata/libata-eh.c
+++ b/drivers/ata/libata-eh.c
@@ -2,10 +2,6 @@
/*
* libata-eh.c - libata error handling
*
- * Maintained by: Tejun Heo <tj@kernel.org>
- * Please ALWAYS copy linux-ide@vger.kernel.org
- * on emails.
- *
* Copyright 2006 Tejun Heo <htejun@gmail.com>
*
* libata documentation is available via 'make {ps|pdf}docs',
@@ -184,6 +180,7 @@ void __ata_ehi_push_desc(struct ata_eh_info *ehi, const char *fmt, ...)
__ata_ehi_pushv_desc(ehi, fmt, args);
va_end(args);
}
+EXPORT_SYMBOL_GPL(__ata_ehi_push_desc);
/**
* ata_ehi_push_desc - push error description with separator
@@ -207,6 +204,7 @@ void ata_ehi_push_desc(struct ata_eh_info *ehi, const char *fmt, ...)
__ata_ehi_pushv_desc(ehi, fmt, args);
va_end(args);
}
+EXPORT_SYMBOL_GPL(ata_ehi_push_desc);
/**
* ata_ehi_clear_desc - clean error description
@@ -222,6 +220,7 @@ void ata_ehi_clear_desc(struct ata_eh_info *ehi)
ehi->desc[0] = '\0';
ehi->desc_len = 0;
}
+EXPORT_SYMBOL_GPL(ata_ehi_clear_desc);
/**
* ata_port_desc - append port description
@@ -249,9 +248,9 @@ void ata_port_desc(struct ata_port *ap, const char *fmt, ...)
__ata_ehi_pushv_desc(&ap->link.eh_info, fmt, args);
va_end(args);
}
+EXPORT_SYMBOL_GPL(ata_port_desc);
#ifdef CONFIG_PCI
-
/**
* ata_port_pbar_desc - append PCI BAR description
* @ap: target ATA port
@@ -288,7 +287,7 @@ void ata_port_pbar_desc(struct ata_port *ap, int bar, ssize_t offset,
ata_port_desc(ap, "%s 0x%llx", name,
start + (unsigned long long)offset);
}
-
+EXPORT_SYMBOL_GPL(ata_port_pbar_desc);
#endif /* CONFIG_PCI */
static int ata_lookup_timeout_table(u8 cmd)
@@ -973,6 +972,7 @@ void ata_port_schedule_eh(struct ata_port *ap)
/* see: ata_std_sched_eh, unless you know better */
ap->ops->sched_eh(ap);
}
+EXPORT_SYMBOL_GPL(ata_port_schedule_eh);
static int ata_do_link_abort(struct ata_port *ap, struct ata_link *link)
{
@@ -1015,6 +1015,7 @@ int ata_link_abort(struct ata_link *link)
{
return ata_do_link_abort(link->ap, link);
}
+EXPORT_SYMBOL_GPL(ata_link_abort);
/**
* ata_port_abort - abort all qc's on the port
@@ -1032,6 +1033,7 @@ int ata_port_abort(struct ata_port *ap)
{
return ata_do_link_abort(ap, NULL);
}
+EXPORT_SYMBOL_GPL(ata_port_abort);
/**
* __ata_port_freeze - freeze port
@@ -1088,79 +1090,7 @@ int ata_port_freeze(struct ata_port *ap)
return nr_aborted;
}
-
-/**
- * sata_async_notification - SATA async notification handler
- * @ap: ATA port where async notification is received
- *
- * Handler to be called when async notification via SDB FIS is
- * received. This function schedules EH if necessary.
- *
- * LOCKING:
- * spin_lock_irqsave(host lock)
- *
- * RETURNS:
- * 1 if EH is scheduled, 0 otherwise.
- */
-int sata_async_notification(struct ata_port *ap)
-{
- u32 sntf;
- int rc;
-
- if (!(ap->flags & ATA_FLAG_AN))
- return 0;
-
- rc = sata_scr_read(&ap->link, SCR_NOTIFICATION, &sntf);
- if (rc == 0)
- sata_scr_write(&ap->link, SCR_NOTIFICATION, sntf);
-
- if (!sata_pmp_attached(ap) || rc) {
- /* PMP is not attached or SNTF is not available */
- if (!sata_pmp_attached(ap)) {
- /* PMP is not attached. Check whether ATAPI
- * AN is configured. If so, notify media
- * change.
- */
- struct ata_device *dev = ap->link.device;
-
- if ((dev->class == ATA_DEV_ATAPI) &&
- (dev->flags & ATA_DFLAG_AN))
- ata_scsi_media_change_notify(dev);
- return 0;
- } else {
- /* PMP is attached but SNTF is not available.
- * ATAPI async media change notification is
- * not used. The PMP must be reporting PHY
- * status change, schedule EH.
- */
- ata_port_schedule_eh(ap);
- return 1;
- }
- } else {
- /* PMP is attached and SNTF is available */
- struct ata_link *link;
-
- /* check and notify ATAPI AN */
- ata_for_each_link(link, ap, EDGE) {
- if (!(sntf & (1 << link->pmp)))
- continue;
-
- if ((link->device->class == ATA_DEV_ATAPI) &&
- (link->device->flags & ATA_DFLAG_AN))
- ata_scsi_media_change_notify(link->device);
- }
-
- /* If PMP is reporting that PHY status of some
- * downstream ports has changed, schedule EH.
- */
- if (sntf & (1 << SATA_PMP_CTRL_PORT)) {
- ata_port_schedule_eh(ap);
- return 1;
- }
-
- return 0;
- }
-}
+EXPORT_SYMBOL_GPL(ata_port_freeze);
/**
* ata_eh_freeze_port - EH helper to freeze port
@@ -1182,6 +1112,7 @@ void ata_eh_freeze_port(struct ata_port *ap)
__ata_port_freeze(ap);
spin_unlock_irqrestore(ap->lock, flags);
}
+EXPORT_SYMBOL_GPL(ata_eh_freeze_port);
/**
* ata_port_thaw_port - EH helper to thaw port
@@ -1289,6 +1220,7 @@ void ata_dev_disable(struct ata_device *dev)
*/
ata_ering_clear(&dev->ering);
}
+EXPORT_SYMBOL_GPL(ata_dev_disable);
/**
* ata_eh_detach_dev - detach ATA device
@@ -1420,62 +1352,6 @@ static const char *ata_err_string(unsigned int err_mask)
}
/**
- * ata_eh_read_log_10h - Read log page 10h for NCQ error details
- * @dev: Device to read log page 10h from
- * @tag: Resulting tag of the failed command
- * @tf: Resulting taskfile registers of the failed command
- *
- * Read log page 10h to obtain NCQ error details and clear error
- * condition.
- *
- * LOCKING:
- * Kernel thread context (may sleep).
- *
- * RETURNS:
- * 0 on success, -errno otherwise.
- */
-static int ata_eh_read_log_10h(struct ata_device *dev,
- int *tag, struct ata_taskfile *tf)
-{
- u8 *buf = dev->link->ap->sector_buf;
- unsigned int err_mask;
- u8 csum;
- int i;
-
- err_mask = ata_read_log_page(dev, ATA_LOG_SATA_NCQ, 0, buf, 1);
- if (err_mask)
- return -EIO;
-
- csum = 0;
- for (i = 0; i < ATA_SECT_SIZE; i++)
- csum += buf[i];
- if (csum)
- ata_dev_warn(dev, "invalid checksum 0x%x on log page 10h\n",
- csum);
-
- if (buf[0] & 0x80)
- return -ENOENT;
-
- *tag = buf[0] & 0x1f;
-
- tf->command = buf[2];
- tf->feature = buf[3];
- tf->lbal = buf[4];
- tf->lbam = buf[5];
- tf->lbah = buf[6];
- tf->device = buf[7];
- tf->hob_lbal = buf[8];
- tf->hob_lbam = buf[9];
- tf->hob_lbah = buf[10];
- tf->nsect = buf[12];
- tf->hob_nsect = buf[13];
- if (dev->class == ATA_DEV_ZAC && ata_id_has_ncq_autosense(dev->id))
- tf->auxiliary = buf[14] << 16 | buf[15] << 8 | buf[16];
-
- return 0;
-}
-
-/**
* atapi_eh_tur - perform ATAPI TEST_UNIT_READY
* @dev: target ATAPI device
* @r_sense_key: out parameter for sense_key
@@ -1659,80 +1535,6 @@ static void ata_eh_analyze_serror(struct ata_link *link)
}
/**
- * ata_eh_analyze_ncq_error - analyze NCQ error
- * @link: ATA link to analyze NCQ error for
- *
- * Read log page 10h, determine the offending qc and acquire
- * error status TF. For NCQ device errors, all LLDDs have to do
- * is setting AC_ERR_DEV in ehi->err_mask. This function takes
- * care of the rest.
- *
- * LOCKING:
- * Kernel thread context (may sleep).
- */
-void ata_eh_analyze_ncq_error(struct ata_link *link)
-{
- struct ata_port *ap = link->ap;
- struct ata_eh_context *ehc = &link->eh_context;
- struct ata_device *dev = link->device;
- struct ata_queued_cmd *qc;
- struct ata_taskfile tf;
- int tag, rc;
-
- /* if frozen, we can't do much */
- if (ap->pflags & ATA_PFLAG_FROZEN)
- return;
-
- /* is it NCQ device error? */
- if (!link->sactive || !(ehc->i.err_mask & AC_ERR_DEV))
- return;
-
- /* has LLDD analyzed already? */
- ata_qc_for_each_raw(ap, qc, tag) {
- if (!(qc->flags & ATA_QCFLAG_FAILED))
- continue;
-
- if (qc->err_mask)
- return;
- }
-
- /* okay, this error is ours */
- memset(&tf, 0, sizeof(tf));
- rc = ata_eh_read_log_10h(dev, &tag, &tf);
- if (rc) {
- ata_link_err(link, "failed to read log page 10h (errno=%d)\n",
- rc);
- return;
- }
-
- if (!(link->sactive & (1 << tag))) {
- ata_link_err(link, "log page 10h reported inactive tag %d\n",
- tag);
- return;
- }
-
- /* we've got the perpetrator, condemn it */
- qc = __ata_qc_from_tag(ap, tag);
- memcpy(&qc->result_tf, &tf, sizeof(tf));
- qc->result_tf.flags = ATA_TFLAG_ISADDR | ATA_TFLAG_LBA | ATA_TFLAG_LBA48;
- qc->err_mask |= AC_ERR_DEV | AC_ERR_NCQ;
- if (dev->class == ATA_DEV_ZAC &&
- ((qc->result_tf.command & ATA_SENSE) || qc->result_tf.auxiliary)) {
- char sense_key, asc, ascq;
-
- sense_key = (qc->result_tf.auxiliary >> 16) & 0xff;
- asc = (qc->result_tf.auxiliary >> 8) & 0xff;
- ascq = qc->result_tf.auxiliary & 0xff;
- ata_scsi_set_sense(dev, qc->scsicmd, sense_key, asc, ascq);
- ata_scsi_set_sense_information(dev, qc->scsicmd,
- &qc->result_tf);
- qc->flags |= ATA_QCFLAG_SENSE_VALID;
- }
-
- ehc->i.err_mask &= ~AC_ERR_DEV;
-}
-
-/**
* ata_eh_analyze_tf - analyze taskfile of a failed qc
* @qc: qc to analyze
* @tf: Taskfile registers to analyze
@@ -3436,7 +3238,8 @@ static int ata_eh_set_lpm(struct ata_link *link, enum ata_lpm_policy policy,
int rc;
/* if the link or host doesn't do LPM, noop */
- if ((link->flags & ATA_LFLAG_NO_LPM) || (ap && !ap->ops->set_lpm))
+ if (!IS_ENABLED(CONFIG_SATA_HOST) ||
+ (link->flags & ATA_LFLAG_NO_LPM) || (ap && !ap->ops->set_lpm))
return 0;
/*
@@ -4052,6 +3855,7 @@ void ata_std_error_handler(struct ata_port *ap)
ata_do_eh(ap, ops->prereset, ops->softreset, hardreset, ops->postreset);
}
+EXPORT_SYMBOL_GPL(ata_std_error_handler);
#ifdef CONFIG_PM
/**
diff --git a/drivers/ata/libata-pata-timings.c b/drivers/ata/libata-pata-timings.c
new file mode 100644
index 0000000..af34122
--- /dev/null
+++ b/drivers/ata/libata-pata-timings.c
@@ -0,0 +1,192 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Helper library for PATA timings
+ *
+ * Copyright 2003-2004 Red Hat, Inc. All rights reserved.
+ * Copyright 2003-2004 Jeff Garzik
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/libata.h>
+
+/*
+ * This mode timing computation functionality is ported over from
+ * drivers/ide/ide-timing.h and was originally written by Vojtech Pavlik
+ */
+/*
+ * PIO 0-4, MWDMA 0-2 and UDMA 0-6 timings (in nanoseconds).
+ * These were taken from ATA/ATAPI-6 standard, rev 0a, except
+ * for UDMA6, which is currently supported only by Maxtor drives.
+ *
+ * For PIO 5/6 MWDMA 3/4 see the CFA specification 3.0.
+ */
+
+static const struct ata_timing ata_timing[] = {
+/* { XFER_PIO_SLOW, 120, 290, 240, 960, 290, 240, 0, 960, 0 }, */
+ { XFER_PIO_0, 70, 290, 240, 600, 165, 150, 0, 600, 0 },
+ { XFER_PIO_1, 50, 290, 93, 383, 125, 100, 0, 383, 0 },
+ { XFER_PIO_2, 30, 290, 40, 330, 100, 90, 0, 240, 0 },
+ { XFER_PIO_3, 30, 80, 70, 180, 80, 70, 0, 180, 0 },
+ { XFER_PIO_4, 25, 70, 25, 120, 70, 25, 0, 120, 0 },
+ { XFER_PIO_5, 15, 65, 25, 100, 65, 25, 0, 100, 0 },
+ { XFER_PIO_6, 10, 55, 20, 80, 55, 20, 0, 80, 0 },
+
+ { XFER_SW_DMA_0, 120, 0, 0, 0, 480, 480, 50, 960, 0 },
+ { XFER_SW_DMA_1, 90, 0, 0, 0, 240, 240, 30, 480, 0 },
+ { XFER_SW_DMA_2, 60, 0, 0, 0, 120, 120, 20, 240, 0 },
+
+ { XFER_MW_DMA_0, 60, 0, 0, 0, 215, 215, 20, 480, 0 },
+ { XFER_MW_DMA_1, 45, 0, 0, 0, 80, 50, 5, 150, 0 },
+ { XFER_MW_DMA_2, 25, 0, 0, 0, 70, 25, 5, 120, 0 },
+ { XFER_MW_DMA_3, 25, 0, 0, 0, 65, 25, 5, 100, 0 },
+ { XFER_MW_DMA_4, 25, 0, 0, 0, 55, 20, 5, 80, 0 },
+
+/* { XFER_UDMA_SLOW, 0, 0, 0, 0, 0, 0, 0, 0, 150 }, */
+ { XFER_UDMA_0, 0, 0, 0, 0, 0, 0, 0, 0, 120 },
+ { XFER_UDMA_1, 0, 0, 0, 0, 0, 0, 0, 0, 80 },
+ { XFER_UDMA_2, 0, 0, 0, 0, 0, 0, 0, 0, 60 },
+ { XFER_UDMA_3, 0, 0, 0, 0, 0, 0, 0, 0, 45 },
+ { XFER_UDMA_4, 0, 0, 0, 0, 0, 0, 0, 0, 30 },
+ { XFER_UDMA_5, 0, 0, 0, 0, 0, 0, 0, 0, 20 },
+ { XFER_UDMA_6, 0, 0, 0, 0, 0, 0, 0, 0, 15 },
+
+ { 0xFF }
+};
+
+#define ENOUGH(v, unit) (((v)-1)/(unit)+1)
+#define EZ(v, unit) ((v)?ENOUGH(((v) * 1000), unit):0)
+
+static void ata_timing_quantize(const struct ata_timing *t,
+ struct ata_timing *q, int T, int UT)
+{
+ q->setup = EZ(t->setup, T);
+ q->act8b = EZ(t->act8b, T);
+ q->rec8b = EZ(t->rec8b, T);
+ q->cyc8b = EZ(t->cyc8b, T);
+ q->active = EZ(t->active, T);
+ q->recover = EZ(t->recover, T);
+ q->dmack_hold = EZ(t->dmack_hold, T);
+ q->cycle = EZ(t->cycle, T);
+ q->udma = EZ(t->udma, UT);
+}
+
+void ata_timing_merge(const struct ata_timing *a, const struct ata_timing *b,
+ struct ata_timing *m, unsigned int what)
+{
+ if (what & ATA_TIMING_SETUP)
+ m->setup = max(a->setup, b->setup);
+ if (what & ATA_TIMING_ACT8B)
+ m->act8b = max(a->act8b, b->act8b);
+ if (what & ATA_TIMING_REC8B)
+ m->rec8b = max(a->rec8b, b->rec8b);
+ if (what & ATA_TIMING_CYC8B)
+ m->cyc8b = max(a->cyc8b, b->cyc8b);
+ if (what & ATA_TIMING_ACTIVE)
+ m->active = max(a->active, b->active);
+ if (what & ATA_TIMING_RECOVER)
+ m->recover = max(a->recover, b->recover);
+ if (what & ATA_TIMING_DMACK_HOLD)
+ m->dmack_hold = max(a->dmack_hold, b->dmack_hold);
+ if (what & ATA_TIMING_CYCLE)
+ m->cycle = max(a->cycle, b->cycle);
+ if (what & ATA_TIMING_UDMA)
+ m->udma = max(a->udma, b->udma);
+}
+EXPORT_SYMBOL_GPL(ata_timing_merge);
+
+const struct ata_timing *ata_timing_find_mode(u8 xfer_mode)
+{
+ const struct ata_timing *t = ata_timing;
+
+ while (xfer_mode > t->mode)
+ t++;
+
+ if (xfer_mode == t->mode)
+ return t;
+
+ WARN_ONCE(true, "%s: unable to find timing for xfer_mode 0x%x\n",
+ __func__, xfer_mode);
+
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(ata_timing_find_mode);
+
+int ata_timing_compute(struct ata_device *adev, unsigned short speed,
+ struct ata_timing *t, int T, int UT)
+{
+ const u16 *id = adev->id;
+ const struct ata_timing *s;
+ struct ata_timing p;
+
+ /*
+ * Find the mode.
+ */
+ s = ata_timing_find_mode(speed);
+ if (!s)
+ return -EINVAL;
+
+ memcpy(t, s, sizeof(*s));
+
+ /*
+ * If the drive is an EIDE drive, it can tell us it needs extended
+ * PIO/MW_DMA cycle timing.
+ */
+
+ if (id[ATA_ID_FIELD_VALID] & 2) { /* EIDE drive */
+ memset(&p, 0, sizeof(p));
+
+ if (speed >= XFER_PIO_0 && speed < XFER_SW_DMA_0) {
+ if (speed <= XFER_PIO_2)
+ p.cycle = p.cyc8b = id[ATA_ID_EIDE_PIO];
+ else if ((speed <= XFER_PIO_4) ||
+ (speed == XFER_PIO_5 && !ata_id_is_cfa(id)))
+ p.cycle = p.cyc8b = id[ATA_ID_EIDE_PIO_IORDY];
+ } else if (speed >= XFER_MW_DMA_0 && speed <= XFER_MW_DMA_2)
+ p.cycle = id[ATA_ID_EIDE_DMA_MIN];
+
+ ata_timing_merge(&p, t, t, ATA_TIMING_CYCLE | ATA_TIMING_CYC8B);
+ }
+
+ /*
+ * Convert the timing to bus clock counts.
+ */
+
+ ata_timing_quantize(t, t, T, UT);
+
+ /*
+ * Even in DMA/UDMA modes we still use PIO access for IDENTIFY,
+ * S.M.A.R.T * and some other commands. We have to ensure that the
+ * DMA cycle timing is slower/equal than the fastest PIO timing.
+ */
+
+ if (speed > XFER_PIO_6) {
+ ata_timing_compute(adev, adev->pio_mode, &p, T, UT);
+ ata_timing_merge(&p, t, t, ATA_TIMING_ALL);
+ }
+
+ /*
+ * Lengthen active & recovery time so that cycle time is correct.
+ */
+
+ if (t->act8b + t->rec8b < t->cyc8b) {
+ t->act8b += (t->cyc8b - (t->act8b + t->rec8b)) / 2;
+ t->rec8b = t->cyc8b - t->act8b;
+ }
+
+ if (t->active + t->recover < t->cycle) {
+ t->active += (t->cycle - (t->active + t->recover)) / 2;
+ t->recover = t->cycle - t->active;
+ }
+
+ /*
+ * In a few cases quantisation may produce enough errors to
+ * leave t->cycle too low for the sum of active and recovery
+ * if so we must correct this.
+ */
+ if (t->active + t->recover > t->cycle)
+ t->cycle = t->active + t->recover;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(ata_timing_compute);
diff --git a/drivers/ata/libata-sata.c b/drivers/ata/libata-sata.c
new file mode 100644
index 0000000..c16423e
--- /dev/null
+++ b/drivers/ata/libata-sata.c
@@ -0,0 +1,1483 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * SATA specific part of ATA helper library
+ *
+ * Copyright 2003-2004 Red Hat, Inc. All rights reserved.
+ * Copyright 2003-2004 Jeff Garzik
+ * Copyright 2006 Tejun Heo <htejun@gmail.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <scsi/scsi_cmnd.h>
+#include <scsi/scsi_device.h>
+#include <linux/libata.h>
+
+#include "libata.h"
+#include "libata-transport.h"
+
+/* debounce timing parameters in msecs { interval, duration, timeout } */
+const unsigned long sata_deb_timing_normal[] = { 5, 100, 2000 };
+EXPORT_SYMBOL_GPL(sata_deb_timing_normal);
+const unsigned long sata_deb_timing_hotplug[] = { 25, 500, 2000 };
+EXPORT_SYMBOL_GPL(sata_deb_timing_hotplug);
+const unsigned long sata_deb_timing_long[] = { 100, 2000, 5000 };
+EXPORT_SYMBOL_GPL(sata_deb_timing_long);
+
+/**
+ * sata_scr_valid - test whether SCRs are accessible
+ * @link: ATA link to test SCR accessibility for
+ *
+ * Test whether SCRs are accessible for @link.
+ *
+ * LOCKING:
+ * None.
+ *
+ * RETURNS:
+ * 1 if SCRs are accessible, 0 otherwise.
+ */
+int sata_scr_valid(struct ata_link *link)
+{
+ struct ata_port *ap = link->ap;
+
+ return (ap->flags & ATA_FLAG_SATA) && ap->ops->scr_read;
+}
+EXPORT_SYMBOL_GPL(sata_scr_valid);
+
+/**
+ * sata_scr_read - read SCR register of the specified port
+ * @link: ATA link to read SCR for
+ * @reg: SCR to read
+ * @val: Place to store read value
+ *
+ * Read SCR register @reg of @link into *@val. This function is
+ * guaranteed to succeed if @link is ap->link, the cable type of
+ * the port is SATA and the port implements ->scr_read.
+ *
+ * LOCKING:
+ * None if @link is ap->link. Kernel thread context otherwise.
+ *
+ * RETURNS:
+ * 0 on success, negative errno on failure.
+ */
+int sata_scr_read(struct ata_link *link, int reg, u32 *val)
+{
+ if (ata_is_host_link(link)) {
+ if (sata_scr_valid(link))
+ return link->ap->ops->scr_read(link, reg, val);
+ return -EOPNOTSUPP;
+ }
+
+ return sata_pmp_scr_read(link, reg, val);
+}
+EXPORT_SYMBOL_GPL(sata_scr_read);
+
+/**
+ * sata_scr_write - write SCR register of the specified port
+ * @link: ATA link to write SCR for
+ * @reg: SCR to write
+ * @val: value to write
+ *
+ * Write @val to SCR register @reg of @link. This function is
+ * guaranteed to succeed if @link is ap->link, the cable type of
+ * the port is SATA and the port implements ->scr_read.
+ *
+ * LOCKING:
+ * None if @link is ap->link. Kernel thread context otherwise.
+ *
+ * RETURNS:
+ * 0 on success, negative errno on failure.
+ */
+int sata_scr_write(struct ata_link *link, int reg, u32 val)
+{
+ if (ata_is_host_link(link)) {
+ if (sata_scr_valid(link))
+ return link->ap->ops->scr_write(link, reg, val);
+ return -EOPNOTSUPP;
+ }
+
+ return sata_pmp_scr_write(link, reg, val);
+}
+EXPORT_SYMBOL_GPL(sata_scr_write);
+
+/**
+ * sata_scr_write_flush - write SCR register of the specified port and flush
+ * @link: ATA link to write SCR for
+ * @reg: SCR to write
+ * @val: value to write
+ *
+ * This function is identical to sata_scr_write() except that this
+ * function performs flush after writing to the register.
+ *
+ * LOCKING:
+ * None if @link is ap->link. Kernel thread context otherwise.
+ *
+ * RETURNS:
+ * 0 on success, negative errno on failure.
+ */
+int sata_scr_write_flush(struct ata_link *link, int reg, u32 val)
+{
+ if (ata_is_host_link(link)) {
+ int rc;
+
+ if (sata_scr_valid(link)) {
+ rc = link->ap->ops->scr_write(link, reg, val);
+ if (rc == 0)
+ rc = link->ap->ops->scr_read(link, reg, &val);
+ return rc;
+ }
+ return -EOPNOTSUPP;
+ }
+
+ return sata_pmp_scr_write(link, reg, val);
+}
+EXPORT_SYMBOL_GPL(sata_scr_write_flush);
+
+/**
+ * ata_tf_to_fis - Convert ATA taskfile to SATA FIS structure
+ * @tf: Taskfile to convert
+ * @pmp: Port multiplier port
+ * @is_cmd: This FIS is for command
+ * @fis: Buffer into which data will output
+ *
+ * Converts a standard ATA taskfile to a Serial ATA
+ * FIS structure (Register - Host to Device).
+ *
+ * LOCKING:
+ * Inherited from caller.
+ */
+void ata_tf_to_fis(const struct ata_taskfile *tf, u8 pmp, int is_cmd, u8 *fis)
+{
+ fis[0] = 0x27; /* Register - Host to Device FIS */
+ fis[1] = pmp & 0xf; /* Port multiplier number*/
+ if (is_cmd)
+ fis[1] |= (1 << 7); /* bit 7 indicates Command FIS */
+
+ fis[2] = tf->command;
+ fis[3] = tf->feature;
+
+ fis[4] = tf->lbal;
+ fis[5] = tf->lbam;
+ fis[6] = tf->lbah;
+ fis[7] = tf->device;
+
+ fis[8] = tf->hob_lbal;
+ fis[9] = tf->hob_lbam;
+ fis[10] = tf->hob_lbah;
+ fis[11] = tf->hob_feature;
+
+ fis[12] = tf->nsect;
+ fis[13] = tf->hob_nsect;
+ fis[14] = 0;
+ fis[15] = tf->ctl;
+
+ fis[16] = tf->auxiliary & 0xff;
+ fis[17] = (tf->auxiliary >> 8) & 0xff;
+ fis[18] = (tf->auxiliary >> 16) & 0xff;
+ fis[19] = (tf->auxiliary >> 24) & 0xff;
+}
+EXPORT_SYMBOL_GPL(ata_tf_to_fis);
+
+/**
+ * ata_tf_from_fis - Convert SATA FIS to ATA taskfile
+ * @fis: Buffer from which data will be input
+ * @tf: Taskfile to output
+ *
+ * Converts a serial ATA FIS structure to a standard ATA taskfile.
+ *
+ * LOCKING:
+ * Inherited from caller.
+ */
+
+void ata_tf_from_fis(const u8 *fis, struct ata_taskfile *tf)
+{
+ tf->command = fis[2]; /* status */
+ tf->feature = fis[3]; /* error */
+
+ tf->lbal = fis[4];
+ tf->lbam = fis[5];
+ tf->lbah = fis[6];
+ tf->device = fis[7];
+
+ tf->hob_lbal = fis[8];
+ tf->hob_lbam = fis[9];
+ tf->hob_lbah = fis[10];
+
+ tf->nsect = fis[12];
+ tf->hob_nsect = fis[13];
+}
+EXPORT_SYMBOL_GPL(ata_tf_from_fis);
+
+/**
+ * sata_link_debounce - debounce SATA phy status
+ * @link: ATA link to debounce SATA phy status for
+ * @params: timing parameters { interval, duration, timeout } in msec
+ * @deadline: deadline jiffies for the operation
+ *
+ * Make sure SStatus of @link reaches stable state, determined by
+ * holding the same value where DET is not 1 for @duration polled
+ * every @interval, before @timeout. Timeout constraints the
+ * beginning of the stable state. Because DET gets stuck at 1 on
+ * some controllers after hot unplugging, this functions waits
+ * until timeout then returns 0 if DET is stable at 1.
+ *
+ * @timeout is further limited by @deadline. The sooner of the
+ * two is used.
+ *
+ * LOCKING:
+ * Kernel thread context (may sleep)
+ *
+ * RETURNS:
+ * 0 on success, -errno on failure.
+ */
+int sata_link_debounce(struct ata_link *link, const unsigned long *params,
+ unsigned long deadline)
+{
+ unsigned long interval = params[0];
+ unsigned long duration = params[1];
+ unsigned long last_jiffies, t;
+ u32 last, cur;
+ int rc;
+
+ t = ata_deadline(jiffies, params[2]);
+ if (time_before(t, deadline))
+ deadline = t;
+
+ if ((rc = sata_scr_read(link, SCR_STATUS, &cur)))
+ return rc;
+ cur &= 0xf;
+
+ last = cur;
+ last_jiffies = jiffies;
+
+ while (1) {
+ ata_msleep(link->ap, interval);
+ if ((rc = sata_scr_read(link, SCR_STATUS, &cur)))
+ return rc;
+ cur &= 0xf;
+
+ /* DET stable? */
+ if (cur == last) {
+ if (cur == 1 && time_before(jiffies, deadline))
+ continue;
+ if (time_after(jiffies,
+ ata_deadline(last_jiffies, duration)))
+ return 0;
+ continue;
+ }
+
+ /* unstable, start over */
+ last = cur;
+ last_jiffies = jiffies;
+
+ /* Check deadline. If debouncing failed, return
+ * -EPIPE to tell upper layer to lower link speed.
+ */
+ if (time_after(jiffies, deadline))
+ return -EPIPE;
+ }
+}
+EXPORT_SYMBOL_GPL(sata_link_debounce);
+
+/**
+ * sata_link_resume - resume SATA link
+ * @link: ATA link to resume SATA
+ * @params: timing parameters { interval, duration, timeout } in msec
+ * @deadline: deadline jiffies for the operation
+ *
+ * Resume SATA phy @link and debounce it.
+ *
+ * LOCKING:
+ * Kernel thread context (may sleep)
+ *
+ * RETURNS:
+ * 0 on success, -errno on failure.
+ */
+int sata_link_resume(struct ata_link *link, const unsigned long *params,
+ unsigned long deadline)
+{
+ int tries = ATA_LINK_RESUME_TRIES;
+ u32 scontrol, serror;
+ int rc;
+
+ if ((rc = sata_scr_read(link, SCR_CONTROL, &scontrol)))
+ return rc;
+
+ /*
+ * Writes to SControl sometimes get ignored under certain
+ * controllers (ata_piix SIDPR). Make sure DET actually is
+ * cleared.
+ */
+ do {
+ scontrol = (scontrol & 0x0f0) | 0x300;
+ if ((rc = sata_scr_write(link, SCR_CONTROL, scontrol)))
+ return rc;
+ /*
+ * Some PHYs react badly if SStatus is pounded
+ * immediately after resuming. Delay 200ms before
+ * debouncing.
+ */
+ if (!(link->flags & ATA_LFLAG_NO_DB_DELAY))
+ ata_msleep(link->ap, 200);
+
+ /* is SControl restored correctly? */
+ if ((rc = sata_scr_read(link, SCR_CONTROL, &scontrol)))
+ return rc;
+ } while ((scontrol & 0xf0f) != 0x300 && --tries);
+
+ if ((scontrol & 0xf0f) != 0x300) {
+ ata_link_warn(link, "failed to resume link (SControl %X)\n",
+ scontrol);
+ return 0;
+ }
+
+ if (tries < ATA_LINK_RESUME_TRIES)
+ ata_link_warn(link, "link resume succeeded after %d retries\n",
+ ATA_LINK_RESUME_TRIES - tries);
+
+ if ((rc = sata_link_debounce(link, params, deadline)))
+ return rc;
+
+ /* clear SError, some PHYs require this even for SRST to work */
+ if (!(rc = sata_scr_read(link, SCR_ERROR, &serror)))
+ rc = sata_scr_write(link, SCR_ERROR, serror);
+
+ return rc != -EINVAL ? rc : 0;
+}
+EXPORT_SYMBOL_GPL(sata_link_resume);
+
+/**
+ * sata_link_scr_lpm - manipulate SControl IPM and SPM fields
+ * @link: ATA link to manipulate SControl for
+ * @policy: LPM policy to configure
+ * @spm_wakeup: initiate LPM transition to active state
+ *
+ * Manipulate the IPM field of the SControl register of @link
+ * according to @policy. If @policy is ATA_LPM_MAX_POWER and
+ * @spm_wakeup is %true, the SPM field is manipulated to wake up
+ * the link. This function also clears PHYRDY_CHG before
+ * returning.
+ *
+ * LOCKING:
+ * EH context.
+ *
+ * RETURNS:
+ * 0 on success, -errno otherwise.
+ */
+int sata_link_scr_lpm(struct ata_link *link, enum ata_lpm_policy policy,
+ bool spm_wakeup)
+{
+ struct ata_eh_context *ehc = &link->eh_context;
+ bool woken_up = false;
+ u32 scontrol;
+ int rc;
+
+ rc = sata_scr_read(link, SCR_CONTROL, &scontrol);
+ if (rc)
+ return rc;
+
+ switch (policy) {
+ case ATA_LPM_MAX_POWER:
+ /* disable all LPM transitions */
+ scontrol |= (0x7 << 8);
+ /* initiate transition to active state */
+ if (spm_wakeup) {
+ scontrol |= (0x4 << 12);
+ woken_up = true;
+ }
+ break;
+ case ATA_LPM_MED_POWER:
+ /* allow LPM to PARTIAL */
+ scontrol &= ~(0x1 << 8);
+ scontrol |= (0x6 << 8);
+ break;
+ case ATA_LPM_MED_POWER_WITH_DIPM:
+ case ATA_LPM_MIN_POWER_WITH_PARTIAL:
+ case ATA_LPM_MIN_POWER:
+ if (ata_link_nr_enabled(link) > 0)
+ /* no restrictions on LPM transitions */
+ scontrol &= ~(0x7 << 8);
+ else {
+ /* empty port, power off */
+ scontrol &= ~0xf;
+ scontrol |= (0x1 << 2);
+ }
+ break;
+ default:
+ WARN_ON(1);
+ }
+
+ rc = sata_scr_write(link, SCR_CONTROL, scontrol);
+ if (rc)
+ return rc;
+
+ /* give the link time to transit out of LPM state */
+ if (woken_up)
+ msleep(10);
+
+ /* clear PHYRDY_CHG from SError */
+ ehc->i.serror &= ~SERR_PHYRDY_CHG;
+ return sata_scr_write(link, SCR_ERROR, SERR_PHYRDY_CHG);
+}
+EXPORT_SYMBOL_GPL(sata_link_scr_lpm);
+
+static int __sata_set_spd_needed(struct ata_link *link, u32 *scontrol)
+{
+ struct ata_link *host_link = &link->ap->link;
+ u32 limit, target, spd;
+
+ limit = link->sata_spd_limit;
+
+ /* Don't configure downstream link faster than upstream link.
+ * It doesn't speed up anything and some PMPs choke on such
+ * configuration.
+ */
+ if (!ata_is_host_link(link) && host_link->sata_spd)
+ limit &= (1 << host_link->sata_spd) - 1;
+
+ if (limit == UINT_MAX)
+ target = 0;
+ else
+ target = fls(limit);
+
+ spd = (*scontrol >> 4) & 0xf;
+ *scontrol = (*scontrol & ~0xf0) | ((target & 0xf) << 4);
+
+ return spd != target;
+}
+
+/**
+ * sata_set_spd_needed - is SATA spd configuration needed
+ * @link: Link in question
+ *
+ * Test whether the spd limit in SControl matches
+ * @link->sata_spd_limit. This function is used to determine
+ * whether hardreset is necessary to apply SATA spd
+ * configuration.
+ *
+ * LOCKING:
+ * Inherited from caller.
+ *
+ * RETURNS:
+ * 1 if SATA spd configuration is needed, 0 otherwise.
+ */
+static int sata_set_spd_needed(struct ata_link *link)
+{
+ u32 scontrol;
+
+ if (sata_scr_read(link, SCR_CONTROL, &scontrol))
+ return 1;
+
+ return __sata_set_spd_needed(link, &scontrol);
+}
+
+/**
+ * sata_set_spd - set SATA spd according to spd limit
+ * @link: Link to set SATA spd for
+ *
+ * Set SATA spd of @link according to sata_spd_limit.
+ *
+ * LOCKING:
+ * Inherited from caller.
+ *
+ * RETURNS:
+ * 0 if spd doesn't need to be changed, 1 if spd has been
+ * changed. Negative errno if SCR registers are inaccessible.
+ */
+int sata_set_spd(struct ata_link *link)
+{
+ u32 scontrol;
+ int rc;
+
+ if ((rc = sata_scr_read(link, SCR_CONTROL, &scontrol)))
+ return rc;
+
+ if (!__sata_set_spd_needed(link, &scontrol))
+ return 0;
+
+ if ((rc = sata_scr_write(link, SCR_CONTROL, scontrol)))
+ return rc;
+
+ return 1;
+}
+EXPORT_SYMBOL_GPL(sata_set_spd);
+
+/**
+ * sata_link_hardreset - reset link via SATA phy reset
+ * @link: link to reset
+ * @timing: timing parameters { interval, duration, timeout } in msec
+ * @deadline: deadline jiffies for the operation
+ * @online: optional out parameter indicating link onlineness
+ * @check_ready: optional callback to check link readiness
+ *
+ * SATA phy-reset @link using DET bits of SControl register.
+ * After hardreset, link readiness is waited upon using
+ * ata_wait_ready() if @check_ready is specified. LLDs are
+ * allowed to not specify @check_ready and wait itself after this
+ * function returns. Device classification is LLD's
+ * responsibility.
+ *
+ * *@online is set to one iff reset succeeded and @link is online
+ * after reset.
+ *
+ * LOCKING:
+ * Kernel thread context (may sleep)
+ *
+ * RETURNS:
+ * 0 on success, -errno otherwise.
+ */
+int sata_link_hardreset(struct ata_link *link, const unsigned long *timing,
+ unsigned long deadline,
+ bool *online, int (*check_ready)(struct ata_link *))
+{
+ u32 scontrol;
+ int rc;
+
+ DPRINTK("ENTER\n");
+
+ if (online)
+ *online = false;
+
+ if (sata_set_spd_needed(link)) {
+ /* SATA spec says nothing about how to reconfigure
+ * spd. To be on the safe side, turn off phy during
+ * reconfiguration. This works for at least ICH7 AHCI
+ * and Sil3124.
+ */
+ if ((rc = sata_scr_read(link, SCR_CONTROL, &scontrol)))
+ goto out;
+
+ scontrol = (scontrol & 0x0f0) | 0x304;
+
+ if ((rc = sata_scr_write(link, SCR_CONTROL, scontrol)))
+ goto out;
+
+ sata_set_spd(link);
+ }
+
+ /* issue phy wake/reset */
+ if ((rc = sata_scr_read(link, SCR_CONTROL, &scontrol)))
+ goto out;
+
+ scontrol = (scontrol & 0x0f0) | 0x301;
+
+ if ((rc = sata_scr_write_flush(link, SCR_CONTROL, scontrol)))
+ goto out;
+
+ /* Couldn't find anything in SATA I/II specs, but AHCI-1.1
+ * 10.4.2 says at least 1 ms.
+ */
+ ata_msleep(link->ap, 1);
+
+ /* bring link back */
+ rc = sata_link_resume(link, timing, deadline);
+ if (rc)
+ goto out;
+ /* if link is offline nothing more to do */
+ if (ata_phys_link_offline(link))
+ goto out;
+
+ /* Link is online. From this point, -ENODEV too is an error. */
+ if (online)
+ *online = true;
+
+ if (sata_pmp_supported(link->ap) && ata_is_host_link(link)) {
+ /* If PMP is supported, we have to do follow-up SRST.
+ * Some PMPs don't send D2H Reg FIS after hardreset if
+ * the first port is empty. Wait only for
+ * ATA_TMOUT_PMP_SRST_WAIT.
+ */
+ if (check_ready) {
+ unsigned long pmp_deadline;
+
+ pmp_deadline = ata_deadline(jiffies,
+ ATA_TMOUT_PMP_SRST_WAIT);
+ if (time_after(pmp_deadline, deadline))
+ pmp_deadline = deadline;
+ ata_wait_ready(link, pmp_deadline, check_ready);
+ }
+ rc = -EAGAIN;
+ goto out;
+ }
+
+ rc = 0;
+ if (check_ready)
+ rc = ata_wait_ready(link, deadline, check_ready);
+ out:
+ if (rc && rc != -EAGAIN) {
+ /* online is set iff link is online && reset succeeded */
+ if (online)
+ *online = false;
+ ata_link_err(link, "COMRESET failed (errno=%d)\n", rc);
+ }
+ DPRINTK("EXIT, rc=%d\n", rc);
+ return rc;
+}
+EXPORT_SYMBOL_GPL(sata_link_hardreset);
+
+/**
+ * ata_qc_complete_multiple - Complete multiple qcs successfully
+ * @ap: port in question
+ * @qc_active: new qc_active mask
+ *
+ * Complete in-flight commands. This functions is meant to be
+ * called from low-level driver's interrupt routine to complete
+ * requests normally. ap->qc_active and @qc_active is compared
+ * and commands are completed accordingly.
+ *
+ * Always use this function when completing multiple NCQ commands
+ * from IRQ handlers instead of calling ata_qc_complete()
+ * multiple times to keep IRQ expect status properly in sync.
+ *
+ * LOCKING:
+ * spin_lock_irqsave(host lock)
+ *
+ * RETURNS:
+ * Number of completed commands on success, -errno otherwise.
+ */
+int ata_qc_complete_multiple(struct ata_port *ap, u64 qc_active)
+{
+ u64 done_mask, ap_qc_active = ap->qc_active;
+ int nr_done = 0;
+
+ /*
+ * If the internal tag is set on ap->qc_active, then we care about
+ * bit0 on the passed in qc_active mask. Move that bit up to match
+ * the internal tag.
+ */
+ if (ap_qc_active & (1ULL << ATA_TAG_INTERNAL)) {
+ qc_active |= (qc_active & 0x01) << ATA_TAG_INTERNAL;
+ qc_active ^= qc_active & 0x01;
+ }
+
+ done_mask = ap_qc_active ^ qc_active;
+
+ if (unlikely(done_mask & qc_active)) {
+ ata_port_err(ap, "illegal qc_active transition (%08llx->%08llx)\n",
+ ap->qc_active, qc_active);
+ return -EINVAL;
+ }
+
+ while (done_mask) {
+ struct ata_queued_cmd *qc;
+ unsigned int tag = __ffs64(done_mask);
+
+ qc = ata_qc_from_tag(ap, tag);
+ if (qc) {
+ ata_qc_complete(qc);
+ nr_done++;
+ }
+ done_mask &= ~(1ULL << tag);
+ }
+
+ return nr_done;
+}
+EXPORT_SYMBOL_GPL(ata_qc_complete_multiple);
+
+/**
+ * ata_slave_link_init - initialize slave link
+ * @ap: port to initialize slave link for
+ *
+ * Create and initialize slave link for @ap. This enables slave
+ * link handling on the port.
+ *
+ * In libata, a port contains links and a link contains devices.
+ * There is single host link but if a PMP is attached to it,
+ * there can be multiple fan-out links. On SATA, there's usually
+ * a single device connected to a link but PATA and SATA
+ * controllers emulating TF based interface can have two - master
+ * and slave.
+ *
+ * However, there are a few controllers which don't fit into this
+ * abstraction too well - SATA controllers which emulate TF
+ * interface with both master and slave devices but also have
+ * separate SCR register sets for each device. These controllers
+ * need separate links for physical link handling
+ * (e.g. onlineness, link speed) but should be treated like a
+ * traditional M/S controller for everything else (e.g. command
+ * issue, softreset).
+ *
+ * slave_link is libata's way of handling this class of
+ * controllers without impacting core layer too much. For
+ * anything other than physical link handling, the default host
+ * link is used for both master and slave. For physical link
+ * handling, separate @ap->slave_link is used. All dirty details
+ * are implemented inside libata core layer. From LLD's POV, the
+ * only difference is that prereset, hardreset and postreset are
+ * called once more for the slave link, so the reset sequence
+ * looks like the following.
+ *
+ * prereset(M) -> prereset(S) -> hardreset(M) -> hardreset(S) ->
+ * softreset(M) -> postreset(M) -> postreset(S)
+ *
+ * Note that softreset is called only for the master. Softreset
+ * resets both M/S by definition, so SRST on master should handle
+ * both (the standard method will work just fine).
+ *
+ * LOCKING:
+ * Should be called before host is registered.
+ *
+ * RETURNS:
+ * 0 on success, -errno on failure.
+ */
+int ata_slave_link_init(struct ata_port *ap)
+{
+ struct ata_link *link;
+
+ WARN_ON(ap->slave_link);
+ WARN_ON(ap->flags & ATA_FLAG_PMP);
+
+ link = kzalloc(sizeof(*link), GFP_KERNEL);
+ if (!link)
+ return -ENOMEM;
+
+ ata_link_init(ap, link, 1);
+ ap->slave_link = link;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(ata_slave_link_init);
+
+/**
+ * sata_lpm_ignore_phy_events - test if PHY event should be ignored
+ * @link: Link receiving the event
+ *
+ * Test whether the received PHY event has to be ignored or not.
+ *
+ * LOCKING:
+ * None:
+ *
+ * RETURNS:
+ * True if the event has to be ignored.
+ */
+bool sata_lpm_ignore_phy_events(struct ata_link *link)
+{
+ unsigned long lpm_timeout = link->last_lpm_change +
+ msecs_to_jiffies(ATA_TMOUT_SPURIOUS_PHY);
+
+ /* if LPM is enabled, PHYRDY doesn't mean anything */
+ if (link->lpm_policy > ATA_LPM_MAX_POWER)
+ return true;
+
+ /* ignore the first PHY event after the LPM policy changed
+ * as it is might be spurious
+ */
+ if ((link->flags & ATA_LFLAG_CHANGED) &&
+ time_before(jiffies, lpm_timeout))
+ return true;
+
+ return false;
+}
+EXPORT_SYMBOL_GPL(sata_lpm_ignore_phy_events);
+
+static const char *ata_lpm_policy_names[] = {
+ [ATA_LPM_UNKNOWN] = "max_performance",
+ [ATA_LPM_MAX_POWER] = "max_performance",
+ [ATA_LPM_MED_POWER] = "medium_power",
+ [ATA_LPM_MED_POWER_WITH_DIPM] = "med_power_with_dipm",
+ [ATA_LPM_MIN_POWER_WITH_PARTIAL] = "min_power_with_partial",
+ [ATA_LPM_MIN_POWER] = "min_power",
+};
+
+static ssize_t ata_scsi_lpm_store(struct device *device,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct Scsi_Host *shost = class_to_shost(device);
+ struct ata_port *ap = ata_shost_to_port(shost);
+ struct ata_link *link;
+ struct ata_device *dev;
+ enum ata_lpm_policy policy;
+ unsigned long flags;
+
+ /* UNKNOWN is internal state, iterate from MAX_POWER */
+ for (policy = ATA_LPM_MAX_POWER;
+ policy < ARRAY_SIZE(ata_lpm_policy_names); policy++) {
+ const char *name = ata_lpm_policy_names[policy];
+
+ if (strncmp(name, buf, strlen(name)) == 0)
+ break;
+ }
+ if (policy == ARRAY_SIZE(ata_lpm_policy_names))
+ return -EINVAL;
+
+ spin_lock_irqsave(ap->lock, flags);
+
+ ata_for_each_link(link, ap, EDGE) {
+ ata_for_each_dev(dev, &ap->link, ENABLED) {
+ if (dev->horkage & ATA_HORKAGE_NOLPM) {
+ count = -EOPNOTSUPP;
+ goto out_unlock;
+ }
+ }
+ }
+
+ ap->target_lpm_policy = policy;
+ ata_port_schedule_eh(ap);
+out_unlock:
+ spin_unlock_irqrestore(ap->lock, flags);
+ return count;
+}
+
+static ssize_t ata_scsi_lpm_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct Scsi_Host *shost = class_to_shost(dev);
+ struct ata_port *ap = ata_shost_to_port(shost);
+
+ if (ap->target_lpm_policy >= ARRAY_SIZE(ata_lpm_policy_names))
+ return -EINVAL;
+
+ return snprintf(buf, PAGE_SIZE, "%s\n",
+ ata_lpm_policy_names[ap->target_lpm_policy]);
+}
+DEVICE_ATTR(link_power_management_policy, S_IRUGO | S_IWUSR,
+ ata_scsi_lpm_show, ata_scsi_lpm_store);
+EXPORT_SYMBOL_GPL(dev_attr_link_power_management_policy);
+
+static ssize_t ata_ncq_prio_enable_show(struct device *device,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct scsi_device *sdev = to_scsi_device(device);
+ struct ata_port *ap;
+ struct ata_device *dev;
+ bool ncq_prio_enable;
+ int rc = 0;
+
+ ap = ata_shost_to_port(sdev->host);
+
+ spin_lock_irq(ap->lock);
+ dev = ata_scsi_find_dev(ap, sdev);
+ if (!dev) {
+ rc = -ENODEV;
+ goto unlock;
+ }
+
+ ncq_prio_enable = dev->flags & ATA_DFLAG_NCQ_PRIO_ENABLE;
+
+unlock:
+ spin_unlock_irq(ap->lock);
+
+ return rc ? rc : snprintf(buf, 20, "%u\n", ncq_prio_enable);
+}
+
+static ssize_t ata_ncq_prio_enable_store(struct device *device,
+ struct device_attribute *attr,
+ const char *buf, size_t len)
+{
+ struct scsi_device *sdev = to_scsi_device(device);
+ struct ata_port *ap;
+ struct ata_device *dev;
+ long int input;
+ int rc;
+
+ rc = kstrtol(buf, 10, &input);
+ if (rc)
+ return rc;
+ if ((input < 0) || (input > 1))
+ return -EINVAL;
+
+ ap = ata_shost_to_port(sdev->host);
+ dev = ata_scsi_find_dev(ap, sdev);
+ if (unlikely(!dev))
+ return -ENODEV;
+
+ spin_lock_irq(ap->lock);
+ if (input)
+ dev->flags |= ATA_DFLAG_NCQ_PRIO_ENABLE;
+ else
+ dev->flags &= ~ATA_DFLAG_NCQ_PRIO_ENABLE;
+
+ dev->link->eh_info.action |= ATA_EH_REVALIDATE;
+ dev->link->eh_info.flags |= ATA_EHI_QUIET;
+ ata_port_schedule_eh(ap);
+ spin_unlock_irq(ap->lock);
+
+ ata_port_wait_eh(ap);
+
+ if (input) {
+ spin_lock_irq(ap->lock);
+ if (!(dev->flags & ATA_DFLAG_NCQ_PRIO)) {
+ dev->flags &= ~ATA_DFLAG_NCQ_PRIO_ENABLE;
+ rc = -EIO;
+ }
+ spin_unlock_irq(ap->lock);
+ }
+
+ return rc ? rc : len;
+}
+
+DEVICE_ATTR(ncq_prio_enable, S_IRUGO | S_IWUSR,
+ ata_ncq_prio_enable_show, ata_ncq_prio_enable_store);
+EXPORT_SYMBOL_GPL(dev_attr_ncq_prio_enable);
+
+struct device_attribute *ata_ncq_sdev_attrs[] = {
+ &dev_attr_unload_heads,
+ &dev_attr_ncq_prio_enable,
+ NULL
+};
+EXPORT_SYMBOL_GPL(ata_ncq_sdev_attrs);
+
+static ssize_t
+ata_scsi_em_message_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct Scsi_Host *shost = class_to_shost(dev);
+ struct ata_port *ap = ata_shost_to_port(shost);
+ if (ap->ops->em_store && (ap->flags & ATA_FLAG_EM))
+ return ap->ops->em_store(ap, buf, count);
+ return -EINVAL;
+}
+
+static ssize_t
+ata_scsi_em_message_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct Scsi_Host *shost = class_to_shost(dev);
+ struct ata_port *ap = ata_shost_to_port(shost);
+
+ if (ap->ops->em_show && (ap->flags & ATA_FLAG_EM))
+ return ap->ops->em_show(ap, buf);
+ return -EINVAL;
+}
+DEVICE_ATTR(em_message, S_IRUGO | S_IWUSR,
+ ata_scsi_em_message_show, ata_scsi_em_message_store);
+EXPORT_SYMBOL_GPL(dev_attr_em_message);
+
+static ssize_t
+ata_scsi_em_message_type_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct Scsi_Host *shost = class_to_shost(dev);
+ struct ata_port *ap = ata_shost_to_port(shost);
+
+ return snprintf(buf, 23, "%d\n", ap->em_message_type);
+}
+DEVICE_ATTR(em_message_type, S_IRUGO,
+ ata_scsi_em_message_type_show, NULL);
+EXPORT_SYMBOL_GPL(dev_attr_em_message_type);
+
+static ssize_t
+ata_scsi_activity_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct scsi_device *sdev = to_scsi_device(dev);
+ struct ata_port *ap = ata_shost_to_port(sdev->host);
+ struct ata_device *atadev = ata_scsi_find_dev(ap, sdev);
+
+ if (atadev && ap->ops->sw_activity_show &&
+ (ap->flags & ATA_FLAG_SW_ACTIVITY))
+ return ap->ops->sw_activity_show(atadev, buf);
+ return -EINVAL;
+}
+
+static ssize_t
+ata_scsi_activity_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct scsi_device *sdev = to_scsi_device(dev);
+ struct ata_port *ap = ata_shost_to_port(sdev->host);
+ struct ata_device *atadev = ata_scsi_find_dev(ap, sdev);
+ enum sw_activity val;
+ int rc;
+
+ if (atadev && ap->ops->sw_activity_store &&
+ (ap->flags & ATA_FLAG_SW_ACTIVITY)) {
+ val = simple_strtoul(buf, NULL, 0);
+ switch (val) {
+ case OFF: case BLINK_ON: case BLINK_OFF:
+ rc = ap->ops->sw_activity_store(atadev, val);
+ if (!rc)
+ return count;
+ else
+ return rc;
+ }
+ }
+ return -EINVAL;
+}
+DEVICE_ATTR(sw_activity, S_IWUSR | S_IRUGO, ata_scsi_activity_show,
+ ata_scsi_activity_store);
+EXPORT_SYMBOL_GPL(dev_attr_sw_activity);
+
+/**
+ * __ata_change_queue_depth - helper for ata_scsi_change_queue_depth
+ * @ap: ATA port to which the device change the queue depth
+ * @sdev: SCSI device to configure queue depth for
+ * @queue_depth: new queue depth
+ *
+ * libsas and libata have different approaches for associating a sdev to
+ * its ata_port.
+ *
+ */
+int __ata_change_queue_depth(struct ata_port *ap, struct scsi_device *sdev,
+ int queue_depth)
+{
+ struct ata_device *dev;
+ unsigned long flags;
+
+ if (queue_depth < 1 || queue_depth == sdev->queue_depth)
+ return sdev->queue_depth;
+
+ dev = ata_scsi_find_dev(ap, sdev);
+ if (!dev || !ata_dev_enabled(dev))
+ return sdev->queue_depth;
+
+ /* NCQ enabled? */
+ spin_lock_irqsave(ap->lock, flags);
+ dev->flags &= ~ATA_DFLAG_NCQ_OFF;
+ if (queue_depth == 1 || !ata_ncq_enabled(dev)) {
+ dev->flags |= ATA_DFLAG_NCQ_OFF;
+ queue_depth = 1;
+ }
+ spin_unlock_irqrestore(ap->lock, flags);
+
+ /* limit and apply queue depth */
+ queue_depth = min(queue_depth, sdev->host->can_queue);
+ queue_depth = min(queue_depth, ata_id_queue_depth(dev->id));
+ queue_depth = min(queue_depth, ATA_MAX_QUEUE);
+
+ if (sdev->queue_depth == queue_depth)
+ return -EINVAL;
+
+ return scsi_change_queue_depth(sdev, queue_depth);
+}
+EXPORT_SYMBOL_GPL(__ata_change_queue_depth);
+
+/**
+ * ata_scsi_change_queue_depth - SCSI callback for queue depth config
+ * @sdev: SCSI device to configure queue depth for
+ * @queue_depth: new queue depth
+ *
+ * This is libata standard hostt->change_queue_depth callback.
+ * SCSI will call into this callback when user tries to set queue
+ * depth via sysfs.
+ *
+ * LOCKING:
+ * SCSI layer (we don't care)
+ *
+ * RETURNS:
+ * Newly configured queue depth.
+ */
+int ata_scsi_change_queue_depth(struct scsi_device *sdev, int queue_depth)
+{
+ struct ata_port *ap = ata_shost_to_port(sdev->host);
+
+ return __ata_change_queue_depth(ap, sdev, queue_depth);
+}
+EXPORT_SYMBOL_GPL(ata_scsi_change_queue_depth);
+
+/**
+ * port_alloc - Allocate port for a SAS attached SATA device
+ * @host: ATA host container for all SAS ports
+ * @port_info: Information from low-level host driver
+ * @shost: SCSI host that the scsi device is attached to
+ *
+ * LOCKING:
+ * PCI/etc. bus probe sem.
+ *
+ * RETURNS:
+ * ata_port pointer on success / NULL on failure.
+ */
+
+struct ata_port *ata_sas_port_alloc(struct ata_host *host,
+ struct ata_port_info *port_info,
+ struct Scsi_Host *shost)
+{
+ struct ata_port *ap;
+
+ ap = ata_port_alloc(host);
+ if (!ap)
+ return NULL;
+
+ ap->port_no = 0;
+ ap->lock = &host->lock;
+ ap->pio_mask = port_info->pio_mask;
+ ap->mwdma_mask = port_info->mwdma_mask;
+ ap->udma_mask = port_info->udma_mask;
+ ap->flags |= port_info->flags;
+ ap->ops = port_info->port_ops;
+ ap->cbl = ATA_CBL_SATA;
+
+ return ap;
+}
+EXPORT_SYMBOL_GPL(ata_sas_port_alloc);
+
+/**
+ * ata_sas_port_start - Set port up for dma.
+ * @ap: Port to initialize
+ *
+ * Called just after data structures for each port are
+ * initialized.
+ *
+ * May be used as the port_start() entry in ata_port_operations.
+ *
+ * LOCKING:
+ * Inherited from caller.
+ */
+int ata_sas_port_start(struct ata_port *ap)
+{
+ /*
+ * the port is marked as frozen at allocation time, but if we don't
+ * have new eh, we won't thaw it
+ */
+ if (!ap->ops->error_handler)
+ ap->pflags &= ~ATA_PFLAG_FROZEN;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(ata_sas_port_start);
+
+/**
+ * ata_port_stop - Undo ata_sas_port_start()
+ * @ap: Port to shut down
+ *
+ * May be used as the port_stop() entry in ata_port_operations.
+ *
+ * LOCKING:
+ * Inherited from caller.
+ */
+
+void ata_sas_port_stop(struct ata_port *ap)
+{
+}
+EXPORT_SYMBOL_GPL(ata_sas_port_stop);
+
+/**
+ * ata_sas_async_probe - simply schedule probing and return
+ * @ap: Port to probe
+ *
+ * For batch scheduling of probe for sas attached ata devices, assumes
+ * the port has already been through ata_sas_port_init()
+ */
+void ata_sas_async_probe(struct ata_port *ap)
+{
+ __ata_port_probe(ap);
+}
+EXPORT_SYMBOL_GPL(ata_sas_async_probe);
+
+int ata_sas_sync_probe(struct ata_port *ap)
+{
+ return ata_port_probe(ap);
+}
+EXPORT_SYMBOL_GPL(ata_sas_sync_probe);
+
+
+/**
+ * ata_sas_port_init - Initialize a SATA device
+ * @ap: SATA port to initialize
+ *
+ * LOCKING:
+ * PCI/etc. bus probe sem.
+ *
+ * RETURNS:
+ * Zero on success, non-zero on error.
+ */
+
+int ata_sas_port_init(struct ata_port *ap)
+{
+ int rc = ap->ops->port_start(ap);
+
+ if (rc)
+ return rc;
+ ap->print_id = atomic_inc_return(&ata_print_id);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(ata_sas_port_init);
+
+int ata_sas_tport_add(struct device *parent, struct ata_port *ap)
+{
+ return ata_tport_add(parent, ap);
+}
+EXPORT_SYMBOL_GPL(ata_sas_tport_add);
+
+void ata_sas_tport_delete(struct ata_port *ap)
+{
+ ata_tport_delete(ap);
+}
+EXPORT_SYMBOL_GPL(ata_sas_tport_delete);
+
+/**
+ * ata_sas_port_destroy - Destroy a SATA port allocated by ata_sas_port_alloc
+ * @ap: SATA port to destroy
+ *
+ */
+
+void ata_sas_port_destroy(struct ata_port *ap)
+{
+ if (ap->ops->port_stop)
+ ap->ops->port_stop(ap);
+ kfree(ap);
+}
+EXPORT_SYMBOL_GPL(ata_sas_port_destroy);
+
+/**
+ * ata_sas_slave_configure - Default slave_config routine for libata devices
+ * @sdev: SCSI device to configure
+ * @ap: ATA port to which SCSI device is attached
+ *
+ * RETURNS:
+ * Zero.
+ */
+
+int ata_sas_slave_configure(struct scsi_device *sdev, struct ata_port *ap)
+{
+ ata_scsi_sdev_config(sdev);
+ ata_scsi_dev_config(sdev, ap->link.device);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(ata_sas_slave_configure);
+
+/**
+ * ata_sas_queuecmd - Issue SCSI cdb to libata-managed device
+ * @cmd: SCSI command to be sent
+ * @ap: ATA port to which the command is being sent
+ *
+ * RETURNS:
+ * Return value from __ata_scsi_queuecmd() if @cmd can be queued,
+ * 0 otherwise.
+ */
+
+int ata_sas_queuecmd(struct scsi_cmnd *cmd, struct ata_port *ap)
+{
+ int rc = 0;
+
+ ata_scsi_dump_cdb(ap, cmd);
+
+ if (likely(ata_dev_enabled(ap->link.device)))
+ rc = __ata_scsi_queuecmd(cmd, ap->link.device);
+ else {
+ cmd->result = (DID_BAD_TARGET << 16);
+ cmd->scsi_done(cmd);
+ }
+ return rc;
+}
+EXPORT_SYMBOL_GPL(ata_sas_queuecmd);
+
+int ata_sas_allocate_tag(struct ata_port *ap)
+{
+ unsigned int max_queue = ap->host->n_tags;
+ unsigned int i, tag;
+
+ for (i = 0, tag = ap->sas_last_tag + 1; i < max_queue; i++, tag++) {
+ tag = tag < max_queue ? tag : 0;
+
+ /* the last tag is reserved for internal command. */
+ if (ata_tag_internal(tag))
+ continue;
+
+ if (!test_and_set_bit(tag, &ap->sas_tag_allocated)) {
+ ap->sas_last_tag = tag;
+ return tag;
+ }
+ }
+ return -1;
+}
+
+void ata_sas_free_tag(unsigned int tag, struct ata_port *ap)
+{
+ clear_bit(tag, &ap->sas_tag_allocated);
+}
+
+/**
+ * sata_async_notification - SATA async notification handler
+ * @ap: ATA port where async notification is received
+ *
+ * Handler to be called when async notification via SDB FIS is
+ * received. This function schedules EH if necessary.
+ *
+ * LOCKING:
+ * spin_lock_irqsave(host lock)
+ *
+ * RETURNS:
+ * 1 if EH is scheduled, 0 otherwise.
+ */
+int sata_async_notification(struct ata_port *ap)
+{
+ u32 sntf;
+ int rc;
+
+ if (!(ap->flags & ATA_FLAG_AN))
+ return 0;
+
+ rc = sata_scr_read(&ap->link, SCR_NOTIFICATION, &sntf);
+ if (rc == 0)
+ sata_scr_write(&ap->link, SCR_NOTIFICATION, sntf);
+
+ if (!sata_pmp_attached(ap) || rc) {
+ /* PMP is not attached or SNTF is not available */
+ if (!sata_pmp_attached(ap)) {
+ /* PMP is not attached. Check whether ATAPI
+ * AN is configured. If so, notify media
+ * change.
+ */
+ struct ata_device *dev = ap->link.device;
+
+ if ((dev->class == ATA_DEV_ATAPI) &&
+ (dev->flags & ATA_DFLAG_AN))
+ ata_scsi_media_change_notify(dev);
+ return 0;
+ } else {
+ /* PMP is attached but SNTF is not available.
+ * ATAPI async media change notification is
+ * not used. The PMP must be reporting PHY
+ * status change, schedule EH.
+ */
+ ata_port_schedule_eh(ap);
+ return 1;
+ }
+ } else {
+ /* PMP is attached and SNTF is available */
+ struct ata_link *link;
+
+ /* check and notify ATAPI AN */
+ ata_for_each_link(link, ap, EDGE) {
+ if (!(sntf & (1 << link->pmp)))
+ continue;
+
+ if ((link->device->class == ATA_DEV_ATAPI) &&
+ (link->device->flags & ATA_DFLAG_AN))
+ ata_scsi_media_change_notify(link->device);
+ }
+
+ /* If PMP is reporting that PHY status of some
+ * downstream ports has changed, schedule EH.
+ */
+ if (sntf & (1 << SATA_PMP_CTRL_PORT)) {
+ ata_port_schedule_eh(ap);
+ return 1;
+ }
+
+ return 0;
+ }
+}
+EXPORT_SYMBOL_GPL(sata_async_notification);
+
+/**
+ * ata_eh_read_log_10h - Read log page 10h for NCQ error details
+ * @dev: Device to read log page 10h from
+ * @tag: Resulting tag of the failed command
+ * @tf: Resulting taskfile registers of the failed command
+ *
+ * Read log page 10h to obtain NCQ error details and clear error
+ * condition.
+ *
+ * LOCKING:
+ * Kernel thread context (may sleep).
+ *
+ * RETURNS:
+ * 0 on success, -errno otherwise.
+ */
+static int ata_eh_read_log_10h(struct ata_device *dev,
+ int *tag, struct ata_taskfile *tf)
+{
+ u8 *buf = dev->link->ap->sector_buf;
+ unsigned int err_mask;
+ u8 csum;
+ int i;
+
+ err_mask = ata_read_log_page(dev, ATA_LOG_SATA_NCQ, 0, buf, 1);
+ if (err_mask)
+ return -EIO;
+
+ csum = 0;
+ for (i = 0; i < ATA_SECT_SIZE; i++)
+ csum += buf[i];
+ if (csum)
+ ata_dev_warn(dev, "invalid checksum 0x%x on log page 10h\n",
+ csum);
+
+ if (buf[0] & 0x80)
+ return -ENOENT;
+
+ *tag = buf[0] & 0x1f;
+
+ tf->command = buf[2];
+ tf->feature = buf[3];
+ tf->lbal = buf[4];
+ tf->lbam = buf[5];
+ tf->lbah = buf[6];
+ tf->device = buf[7];
+ tf->hob_lbal = buf[8];
+ tf->hob_lbam = buf[9];
+ tf->hob_lbah = buf[10];
+ tf->nsect = buf[12];
+ tf->hob_nsect = buf[13];
+ if (dev->class == ATA_DEV_ZAC && ata_id_has_ncq_autosense(dev->id))
+ tf->auxiliary = buf[14] << 16 | buf[15] << 8 | buf[16];
+
+ return 0;
+}
+
+/**
+ * ata_eh_analyze_ncq_error - analyze NCQ error
+ * @link: ATA link to analyze NCQ error for
+ *
+ * Read log page 10h, determine the offending qc and acquire
+ * error status TF. For NCQ device errors, all LLDDs have to do
+ * is setting AC_ERR_DEV in ehi->err_mask. This function takes
+ * care of the rest.
+ *
+ * LOCKING:
+ * Kernel thread context (may sleep).
+ */
+void ata_eh_analyze_ncq_error(struct ata_link *link)
+{
+ struct ata_port *ap = link->ap;
+ struct ata_eh_context *ehc = &link->eh_context;
+ struct ata_device *dev = link->device;
+ struct ata_queued_cmd *qc;
+ struct ata_taskfile tf;
+ int tag, rc;
+
+ /* if frozen, we can't do much */
+ if (ap->pflags & ATA_PFLAG_FROZEN)
+ return;
+
+ /* is it NCQ device error? */
+ if (!link->sactive || !(ehc->i.err_mask & AC_ERR_DEV))
+ return;
+
+ /* has LLDD analyzed already? */
+ ata_qc_for_each_raw(ap, qc, tag) {
+ if (!(qc->flags & ATA_QCFLAG_FAILED))
+ continue;
+
+ if (qc->err_mask)
+ return;
+ }
+
+ /* okay, this error is ours */
+ memset(&tf, 0, sizeof(tf));
+ rc = ata_eh_read_log_10h(dev, &tag, &tf);
+ if (rc) {
+ ata_link_err(link, "failed to read log page 10h (errno=%d)\n",
+ rc);
+ return;
+ }
+
+ if (!(link->sactive & (1 << tag))) {
+ ata_link_err(link, "log page 10h reported inactive tag %d\n",
+ tag);
+ return;
+ }
+
+ /* we've got the perpetrator, condemn it */
+ qc = __ata_qc_from_tag(ap, tag);
+ memcpy(&qc->result_tf, &tf, sizeof(tf));
+ qc->result_tf.flags = ATA_TFLAG_ISADDR | ATA_TFLAG_LBA | ATA_TFLAG_LBA48;
+ qc->err_mask |= AC_ERR_DEV | AC_ERR_NCQ;
+ if (dev->class == ATA_DEV_ZAC &&
+ ((qc->result_tf.command & ATA_SENSE) || qc->result_tf.auxiliary)) {
+ char sense_key, asc, ascq;
+
+ sense_key = (qc->result_tf.auxiliary >> 16) & 0xff;
+ asc = (qc->result_tf.auxiliary >> 8) & 0xff;
+ ascq = qc->result_tf.auxiliary & 0xff;
+ ata_scsi_set_sense(dev, qc->scsicmd, sense_key, asc, ascq);
+ ata_scsi_set_sense_information(dev, qc->scsicmd,
+ &qc->result_tf);
+ qc->flags |= ATA_QCFLAG_SENSE_VALID;
+ }
+
+ ehc->i.err_mask &= ~AC_ERR_DEV;
+}
+EXPORT_SYMBOL_GPL(ata_eh_analyze_ncq_error);
diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index eb2eb599..36e588d 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -2,10 +2,6 @@
/*
* libata-scsi.c - helper library for ATA
*
- * Maintained by: Tejun Heo <tj@kernel.org>
- * Please ALWAYS copy linux-ide@vger.kernel.org
- * on emails.
- *
* Copyright 2003-2004 Red Hat, Inc. All rights reserved.
* Copyright 2003-2004 Jeff Garzik
*
@@ -36,11 +32,12 @@
#include <linux/suspend.h>
#include <asm/unaligned.h>
#include <linux/ioprio.h>
+#include <linux/of.h>
#include "libata.h"
#include "libata-transport.h"
-#define ATA_SCSI_RBUF_SIZE 4096
+#define ATA_SCSI_RBUF_SIZE 576
static DEFINE_SPINLOCK(ata_scsi_rbuf_lock);
static u8 ata_scsi_rbuf[ATA_SCSI_RBUF_SIZE];
@@ -49,8 +46,6 @@ typedef unsigned int (*ata_xlat_func_t)(struct ata_queued_cmd *qc);
static struct ata_device *__ata_scsi_find_dev(struct ata_port *ap,
const struct scsi_device *scsidev);
-static struct ata_device *ata_scsi_find_dev(struct ata_port *ap,
- const struct scsi_device *scsidev);
#define RW_RECOVERY_MPAGE 0x1
#define RW_RECOVERY_MPAGE_LEN 12
@@ -90,71 +85,6 @@ static const u8 def_control_mpage[CONTROL_MPAGE_LEN] = {
0, 30 /* extended self test time, see 05-359r1 */
};
-static const char *ata_lpm_policy_names[] = {
- [ATA_LPM_UNKNOWN] = "max_performance",
- [ATA_LPM_MAX_POWER] = "max_performance",
- [ATA_LPM_MED_POWER] = "medium_power",
- [ATA_LPM_MED_POWER_WITH_DIPM] = "med_power_with_dipm",
- [ATA_LPM_MIN_POWER_WITH_PARTIAL] = "min_power_with_partial",
- [ATA_LPM_MIN_POWER] = "min_power",
-};
-
-static ssize_t ata_scsi_lpm_store(struct device *device,
- struct device_attribute *attr,
- const char *buf, size_t count)
-{
- struct Scsi_Host *shost = class_to_shost(device);
- struct ata_port *ap = ata_shost_to_port(shost);
- struct ata_link *link;
- struct ata_device *dev;
- enum ata_lpm_policy policy;
- unsigned long flags;
-
- /* UNKNOWN is internal state, iterate from MAX_POWER */
- for (policy = ATA_LPM_MAX_POWER;
- policy < ARRAY_SIZE(ata_lpm_policy_names); policy++) {
- const char *name = ata_lpm_policy_names[policy];
-
- if (strncmp(name, buf, strlen(name)) == 0)
- break;
- }
- if (policy == ARRAY_SIZE(ata_lpm_policy_names))
- return -EINVAL;
-
- spin_lock_irqsave(ap->lock, flags);
-
- ata_for_each_link(link, ap, EDGE) {
- ata_for_each_dev(dev, &ap->link, ENABLED) {
- if (dev->horkage & ATA_HORKAGE_NOLPM) {
- count = -EOPNOTSUPP;
- goto out_unlock;
- }
- }
- }
-
- ap->target_lpm_policy = policy;
- ata_port_schedule_eh(ap);
-out_unlock:
- spin_unlock_irqrestore(ap->lock, flags);
- return count;
-}
-
-static ssize_t ata_scsi_lpm_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct Scsi_Host *shost = class_to_shost(dev);
- struct ata_port *ap = ata_shost_to_port(shost);
-
- if (ap->target_lpm_policy >= ARRAY_SIZE(ata_lpm_policy_names))
- return -EINVAL;
-
- return snprintf(buf, PAGE_SIZE, "%s\n",
- ata_lpm_policy_names[ap->target_lpm_policy]);
-}
-DEVICE_ATTR(link_power_management_policy, S_IRUGO | S_IWUSR,
- ata_scsi_lpm_show, ata_scsi_lpm_store);
-EXPORT_SYMBOL_GPL(dev_attr_link_power_management_policy);
-
static ssize_t ata_scsi_park_show(struct device *device,
struct device_attribute *attr, char *buf)
{
@@ -258,83 +188,6 @@ DEVICE_ATTR(unload_heads, S_IRUGO | S_IWUSR,
ata_scsi_park_show, ata_scsi_park_store);
EXPORT_SYMBOL_GPL(dev_attr_unload_heads);
-static ssize_t ata_ncq_prio_enable_show(struct device *device,
- struct device_attribute *attr,
- char *buf)
-{
- struct scsi_device *sdev = to_scsi_device(device);
- struct ata_port *ap;
- struct ata_device *dev;
- bool ncq_prio_enable;
- int rc = 0;
-
- ap = ata_shost_to_port(sdev->host);
-
- spin_lock_irq(ap->lock);
- dev = ata_scsi_find_dev(ap, sdev);
- if (!dev) {
- rc = -ENODEV;
- goto unlock;
- }
-
- ncq_prio_enable = dev->flags & ATA_DFLAG_NCQ_PRIO_ENABLE;
-
-unlock:
- spin_unlock_irq(ap->lock);
-
- return rc ? rc : snprintf(buf, 20, "%u\n", ncq_prio_enable);
-}
-
-static ssize_t ata_ncq_prio_enable_store(struct device *device,
- struct device_attribute *attr,
- const char *buf, size_t len)
-{
- struct scsi_device *sdev = to_scsi_device(device);
- struct ata_port *ap;
- struct ata_device *dev;
- long int input;
- int rc;
-
- rc = kstrtol(buf, 10, &input);
- if (rc)
- return rc;
- if ((input < 0) || (input > 1))
- return -EINVAL;
-
- ap = ata_shost_to_port(sdev->host);
- dev = ata_scsi_find_dev(ap, sdev);
- if (unlikely(!dev))
- return -ENODEV;
-
- spin_lock_irq(ap->lock);
- if (input)
- dev->flags |= ATA_DFLAG_NCQ_PRIO_ENABLE;
- else
- dev->flags &= ~ATA_DFLAG_NCQ_PRIO_ENABLE;
-
- dev->link->eh_info.action |= ATA_EH_REVALIDATE;
- dev->link->eh_info.flags |= ATA_EHI_QUIET;
- ata_port_schedule_eh(ap);
- spin_unlock_irq(ap->lock);
-
- ata_port_wait_eh(ap);
-
- if (input) {
- spin_lock_irq(ap->lock);
- if (!(dev->flags & ATA_DFLAG_NCQ_PRIO)) {
- dev->flags &= ~ATA_DFLAG_NCQ_PRIO_ENABLE;
- rc = -EIO;
- }
- spin_unlock_irq(ap->lock);
- }
-
- return rc ? rc : len;
-}
-
-DEVICE_ATTR(ncq_prio_enable, S_IRUGO | S_IWUSR,
- ata_ncq_prio_enable_show, ata_ncq_prio_enable_store);
-EXPORT_SYMBOL_GPL(dev_attr_ncq_prio_enable);
-
void ata_scsi_set_sense(struct ata_device *dev, struct scsi_cmnd *cmd,
u8 sk, u8 asc, u8 ascq)
{
@@ -383,90 +236,8 @@ static void ata_scsi_set_invalid_parameter(struct ata_device *dev,
field, 0xff, 0);
}
-static ssize_t
-ata_scsi_em_message_store(struct device *dev, struct device_attribute *attr,
- const char *buf, size_t count)
-{
- struct Scsi_Host *shost = class_to_shost(dev);
- struct ata_port *ap = ata_shost_to_port(shost);
- if (ap->ops->em_store && (ap->flags & ATA_FLAG_EM))
- return ap->ops->em_store(ap, buf, count);
- return -EINVAL;
-}
-
-static ssize_t
-ata_scsi_em_message_show(struct device *dev, struct device_attribute *attr,
- char *buf)
-{
- struct Scsi_Host *shost = class_to_shost(dev);
- struct ata_port *ap = ata_shost_to_port(shost);
-
- if (ap->ops->em_show && (ap->flags & ATA_FLAG_EM))
- return ap->ops->em_show(ap, buf);
- return -EINVAL;
-}
-DEVICE_ATTR(em_message, S_IRUGO | S_IWUSR,
- ata_scsi_em_message_show, ata_scsi_em_message_store);
-EXPORT_SYMBOL_GPL(dev_attr_em_message);
-
-static ssize_t
-ata_scsi_em_message_type_show(struct device *dev, struct device_attribute *attr,
- char *buf)
-{
- struct Scsi_Host *shost = class_to_shost(dev);
- struct ata_port *ap = ata_shost_to_port(shost);
-
- return snprintf(buf, 23, "%d\n", ap->em_message_type);
-}
-DEVICE_ATTR(em_message_type, S_IRUGO,
- ata_scsi_em_message_type_show, NULL);
-EXPORT_SYMBOL_GPL(dev_attr_em_message_type);
-
-static ssize_t
-ata_scsi_activity_show(struct device *dev, struct device_attribute *attr,
- char *buf)
-{
- struct scsi_device *sdev = to_scsi_device(dev);
- struct ata_port *ap = ata_shost_to_port(sdev->host);
- struct ata_device *atadev = ata_scsi_find_dev(ap, sdev);
-
- if (atadev && ap->ops->sw_activity_show &&
- (ap->flags & ATA_FLAG_SW_ACTIVITY))
- return ap->ops->sw_activity_show(atadev, buf);
- return -EINVAL;
-}
-
-static ssize_t
-ata_scsi_activity_store(struct device *dev, struct device_attribute *attr,
- const char *buf, size_t count)
-{
- struct scsi_device *sdev = to_scsi_device(dev);
- struct ata_port *ap = ata_shost_to_port(sdev->host);
- struct ata_device *atadev = ata_scsi_find_dev(ap, sdev);
- enum sw_activity val;
- int rc;
-
- if (atadev && ap->ops->sw_activity_store &&
- (ap->flags & ATA_FLAG_SW_ACTIVITY)) {
- val = simple_strtoul(buf, NULL, 0);
- switch (val) {
- case OFF: case BLINK_ON: case BLINK_OFF:
- rc = ap->ops->sw_activity_store(atadev, val);
- if (!rc)
- return count;
- else
- return rc;
- }
- }
- return -EINVAL;
-}
-DEVICE_ATTR(sw_activity, S_IWUSR | S_IRUGO, ata_scsi_activity_show,
- ata_scsi_activity_store);
-EXPORT_SYMBOL_GPL(dev_attr_sw_activity);
-
struct device_attribute *ata_common_sdev_attrs[] = {
&dev_attr_unload_heads,
- &dev_attr_ncq_prio_enable,
NULL
};
EXPORT_SYMBOL_GPL(ata_common_sdev_attrs);
@@ -499,6 +270,7 @@ int ata_std_bios_param(struct scsi_device *sdev, struct block_device *bdev,
return 0;
}
+EXPORT_SYMBOL_GPL(ata_std_bios_param);
/**
* ata_scsi_unlock_native_capacity - unlock native capacity
@@ -528,6 +300,7 @@ void ata_scsi_unlock_native_capacity(struct scsi_device *sdev)
spin_unlock_irqrestore(ap->lock, flags);
ata_port_wait_eh(ap);
}
+EXPORT_SYMBOL_GPL(ata_scsi_unlock_native_capacity);
/**
* ata_get_identity - Handler for HDIO_GET_IDENTITY ioctl
@@ -1215,7 +988,7 @@ static void ata_gen_ata_sense(struct ata_queued_cmd *qc)
scsi_set_sense_information(sb, SCSI_SENSE_BUFFERSIZE, block);
}
-static void ata_scsi_sdev_config(struct scsi_device *sdev)
+void ata_scsi_sdev_config(struct scsi_device *sdev)
{
sdev->use_10_for_rw = 1;
sdev->use_10_for_ms = 1;
@@ -1255,8 +1028,7 @@ static int atapi_drain_needed(struct request *rq)
return atapi_cmd_type(scsi_req(rq)->cmd[0]) == ATAPI_MISC;
}
-static int ata_scsi_dev_config(struct scsi_device *sdev,
- struct ata_device *dev)
+int ata_scsi_dev_config(struct scsi_device *sdev, struct ata_device *dev)
{
struct request_queue *q = sdev->request_queue;
@@ -1344,6 +1116,7 @@ int ata_scsi_slave_config(struct scsi_device *sdev)
return rc;
}
+EXPORT_SYMBOL_GPL(ata_scsi_slave_config);
/**
* ata_scsi_slave_destroy - SCSI device is about to be destroyed
@@ -1383,71 +1156,7 @@ void ata_scsi_slave_destroy(struct scsi_device *sdev)
q->dma_drain_buffer = NULL;
q->dma_drain_size = 0;
}
-
-/**
- * __ata_change_queue_depth - helper for ata_scsi_change_queue_depth
- * @ap: ATA port to which the device change the queue depth
- * @sdev: SCSI device to configure queue depth for
- * @queue_depth: new queue depth
- *
- * libsas and libata have different approaches for associating a sdev to
- * its ata_port.
- *
- */
-int __ata_change_queue_depth(struct ata_port *ap, struct scsi_device *sdev,
- int queue_depth)
-{
- struct ata_device *dev;
- unsigned long flags;
-
- if (queue_depth < 1 || queue_depth == sdev->queue_depth)
- return sdev->queue_depth;
-
- dev = ata_scsi_find_dev(ap, sdev);
- if (!dev || !ata_dev_enabled(dev))
- return sdev->queue_depth;
-
- /* NCQ enabled? */
- spin_lock_irqsave(ap->lock, flags);
- dev->flags &= ~ATA_DFLAG_NCQ_OFF;
- if (queue_depth == 1 || !ata_ncq_enabled(dev)) {
- dev->flags |= ATA_DFLAG_NCQ_OFF;
- queue_depth = 1;
- }
- spin_unlock_irqrestore(ap->lock, flags);
-
- /* limit and apply queue depth */
- queue_depth = min(queue_depth, sdev->host->can_queue);
- queue_depth = min(queue_depth, ata_id_queue_depth(dev->id));
- queue_depth = min(queue_depth, ATA_MAX_QUEUE);
-
- if (sdev->queue_depth == queue_depth)
- return -EINVAL;
-
- return scsi_change_queue_depth(sdev, queue_depth);
-}
-
-/**
- * ata_scsi_change_queue_depth - SCSI callback for queue depth config
- * @sdev: SCSI device to configure queue depth for
- * @queue_depth: new queue depth
- *
- * This is libata standard hostt->change_queue_depth callback.
- * SCSI will call into this callback when user tries to set queue
- * depth via sysfs.
- *
- * LOCKING:
- * SCSI layer (we don't care)
- *
- * RETURNS:
- * Newly configured queue depth.
- */
-int ata_scsi_change_queue_depth(struct scsi_device *sdev, int queue_depth)
-{
- struct ata_port *ap = ata_shost_to_port(sdev->host);
-
- return __ata_change_queue_depth(ap, sdev, queue_depth);
-}
+EXPORT_SYMBOL_GPL(ata_scsi_slave_destroy);
/**
* ata_scsi_start_stop_xlat - Translate SCSI START STOP UNIT command
@@ -2354,10 +2063,6 @@ static unsigned int ata_scsiop_inq_83(struct ata_scsi_args *args, u8 *rbuf)
*/
static unsigned int ata_scsiop_inq_89(struct ata_scsi_args *args, u8 *rbuf)
{
- struct ata_taskfile tf;
-
- memset(&tf, 0, sizeof(tf));
-
rbuf[1] = 0x89; /* our page code */
rbuf[2] = (0x238 >> 8); /* page size fixed at 238h */
rbuf[3] = (0x238 & 0xff);
@@ -2366,14 +2071,14 @@ static unsigned int ata_scsiop_inq_89(struct ata_scsi_args *args, u8 *rbuf)
memcpy(&rbuf[16], "libata ", 16);
memcpy(&rbuf[32], DRV_VERSION, 4);
- /* we don't store the ATA device signature, so we fake it */
-
- tf.command = ATA_DRDY; /* really, this is Status reg */
- tf.lbal = 0x1;
- tf.nsect = 0x1;
-
- ata_tf_to_fis(&tf, 0, 1, &rbuf[36]); /* TODO: PMP? */
rbuf[36] = 0x34; /* force D2H Reg FIS (34h) */
+ rbuf[37] = (1 << 7); /* bit 7 indicates Command FIS */
+ /* TODO: PMP? */
+
+ /* we don't store the ATA device signature, so we fake it */
+ rbuf[38] = ATA_DRDY; /* really, this is Status reg */
+ rbuf[40] = 0x1;
+ rbuf[48] = 0x1;
rbuf[56] = ATA_CMD_ID_ATA;
@@ -3089,7 +2794,7 @@ static struct ata_device *__ata_scsi_find_dev(struct ata_port *ap,
* RETURNS:
* Associated ATA device, or %NULL if not found.
*/
-static struct ata_device *
+struct ata_device *
ata_scsi_find_dev(struct ata_port *ap, const struct scsi_device *scsidev)
{
struct ata_device *dev = __ata_scsi_find_dev(ap, scsidev);
@@ -4299,8 +4004,7 @@ static inline ata_xlat_func_t ata_get_xlat_func(struct ata_device *dev, u8 cmd)
* Prints the contents of a SCSI command via printk().
*/
-static inline void ata_scsi_dump_cdb(struct ata_port *ap,
- struct scsi_cmnd *cmd)
+void ata_scsi_dump_cdb(struct ata_port *ap, struct scsi_cmnd *cmd)
{
#ifdef ATA_VERBOSE_DEBUG
struct scsi_device *scsidev = cmd->device;
@@ -4312,8 +4016,7 @@ static inline void ata_scsi_dump_cdb(struct ata_port *ap,
#endif
}
-static inline int __ata_scsi_queuecmd(struct scsi_cmnd *scmd,
- struct ata_device *dev)
+int __ata_scsi_queuecmd(struct scsi_cmnd *scmd, struct ata_device *dev)
{
u8 scsi_op = scmd->cmnd[0];
ata_xlat_func_t xlat_func;
@@ -4407,6 +4110,7 @@ int ata_scsi_queuecmd(struct Scsi_Host *shost, struct scsi_cmnd *cmd)
return rc;
}
+EXPORT_SYMBOL_GPL(ata_scsi_queuecmd);
/**
* ata_scsi_simulate - simulate SCSI command on ATA device
@@ -4562,26 +4266,51 @@ int ata_scsi_add_hosts(struct ata_host *host, struct scsi_host_template *sht)
*/
shost->max_host_blocked = 1;
- rc = scsi_add_host_with_dma(ap->scsi_host,
- &ap->tdev, ap->host->dev);
+ rc = scsi_add_host_with_dma(shost, &ap->tdev, ap->host->dev);
if (rc)
- goto err_add;
+ goto err_alloc;
}
return 0;
- err_add:
- scsi_host_put(host->ports[i]->scsi_host);
err_alloc:
while (--i >= 0) {
struct Scsi_Host *shost = host->ports[i]->scsi_host;
+ /* scsi_host_put() is in ata_devres_release() */
scsi_remove_host(shost);
- scsi_host_put(shost);
}
return rc;
}
+#ifdef CONFIG_OF
+static void ata_scsi_assign_ofnode(struct ata_device *dev, struct ata_port *ap)
+{
+ struct scsi_device *sdev = dev->sdev;
+ struct device *d = ap->host->dev;
+ struct device_node *np = d->of_node;
+ struct device_node *child;
+
+ for_each_available_child_of_node(np, child) {
+ int ret;
+ u32 val;
+
+ ret = of_property_read_u32(child, "reg", &val);
+ if (ret)
+ continue;
+ if (val == dev->devno) {
+ dev_dbg(d, "found matching device node\n");
+ sdev->sdev_gendev.of_node = child;
+ return;
+ }
+ }
+}
+#else
+static void ata_scsi_assign_ofnode(struct ata_device *dev, struct ata_port *ap)
+{
+}
+#endif
+
void ata_scsi_scan_host(struct ata_port *ap, int sync)
{
int tries = 5;
@@ -4607,6 +4336,7 @@ void ata_scsi_scan_host(struct ata_port *ap, int sync)
NULL);
if (!IS_ERR(sdev)) {
dev->sdev = sdev;
+ ata_scsi_assign_ofnode(dev, ap);
scsi_device_put(sdev);
} else {
dev->sdev = NULL;
@@ -4929,214 +4659,3 @@ void ata_scsi_dev_rescan(struct work_struct *work)
spin_unlock_irqrestore(ap->lock, flags);
mutex_unlock(&ap->scsi_scan_mutex);
}
-
-/**
- * ata_sas_port_alloc - Allocate port for a SAS attached SATA device
- * @host: ATA host container for all SAS ports
- * @port_info: Information from low-level host driver
- * @shost: SCSI host that the scsi device is attached to
- *
- * LOCKING:
- * PCI/etc. bus probe sem.
- *
- * RETURNS:
- * ata_port pointer on success / NULL on failure.
- */
-
-struct ata_port *ata_sas_port_alloc(struct ata_host *host,
- struct ata_port_info *port_info,
- struct Scsi_Host *shost)
-{
- struct ata_port *ap;
-
- ap = ata_port_alloc(host);
- if (!ap)
- return NULL;
-
- ap->port_no = 0;
- ap->lock = &host->lock;
- ap->pio_mask = port_info->pio_mask;
- ap->mwdma_mask = port_info->mwdma_mask;
- ap->udma_mask = port_info->udma_mask;
- ap->flags |= port_info->flags;
- ap->ops = port_info->port_ops;
- ap->cbl = ATA_CBL_SATA;
-
- return ap;
-}
-EXPORT_SYMBOL_GPL(ata_sas_port_alloc);
-
-/**
- * ata_sas_port_start - Set port up for dma.
- * @ap: Port to initialize
- *
- * Called just after data structures for each port are
- * initialized.
- *
- * May be used as the port_start() entry in ata_port_operations.
- *
- * LOCKING:
- * Inherited from caller.
- */
-int ata_sas_port_start(struct ata_port *ap)
-{
- /*
- * the port is marked as frozen at allocation time, but if we don't
- * have new eh, we won't thaw it
- */
- if (!ap->ops->error_handler)
- ap->pflags &= ~ATA_PFLAG_FROZEN;
- return 0;
-}
-EXPORT_SYMBOL_GPL(ata_sas_port_start);
-
-/**
- * ata_port_stop - Undo ata_sas_port_start()
- * @ap: Port to shut down
- *
- * May be used as the port_stop() entry in ata_port_operations.
- *
- * LOCKING:
- * Inherited from caller.
- */
-
-void ata_sas_port_stop(struct ata_port *ap)
-{
-}
-EXPORT_SYMBOL_GPL(ata_sas_port_stop);
-
-/**
- * ata_sas_async_probe - simply schedule probing and return
- * @ap: Port to probe
- *
- * For batch scheduling of probe for sas attached ata devices, assumes
- * the port has already been through ata_sas_port_init()
- */
-void ata_sas_async_probe(struct ata_port *ap)
-{
- __ata_port_probe(ap);
-}
-EXPORT_SYMBOL_GPL(ata_sas_async_probe);
-
-int ata_sas_sync_probe(struct ata_port *ap)
-{
- return ata_port_probe(ap);
-}
-EXPORT_SYMBOL_GPL(ata_sas_sync_probe);
-
-
-/**
- * ata_sas_port_init - Initialize a SATA device
- * @ap: SATA port to initialize
- *
- * LOCKING:
- * PCI/etc. bus probe sem.
- *
- * RETURNS:
- * Zero on success, non-zero on error.
- */
-
-int ata_sas_port_init(struct ata_port *ap)
-{
- int rc = ap->ops->port_start(ap);
-
- if (rc)
- return rc;
- ap->print_id = atomic_inc_return(&ata_print_id);
- return 0;
-}
-EXPORT_SYMBOL_GPL(ata_sas_port_init);
-
-int ata_sas_tport_add(struct device *parent, struct ata_port *ap)
-{
- return ata_tport_add(parent, ap);
-}
-EXPORT_SYMBOL_GPL(ata_sas_tport_add);
-
-void ata_sas_tport_delete(struct ata_port *ap)
-{
- ata_tport_delete(ap);
-}
-EXPORT_SYMBOL_GPL(ata_sas_tport_delete);
-
-/**
- * ata_sas_port_destroy - Destroy a SATA port allocated by ata_sas_port_alloc
- * @ap: SATA port to destroy
- *
- */
-
-void ata_sas_port_destroy(struct ata_port *ap)
-{
- if (ap->ops->port_stop)
- ap->ops->port_stop(ap);
- kfree(ap);
-}
-EXPORT_SYMBOL_GPL(ata_sas_port_destroy);
-
-/**
- * ata_sas_slave_configure - Default slave_config routine for libata devices
- * @sdev: SCSI device to configure
- * @ap: ATA port to which SCSI device is attached
- *
- * RETURNS:
- * Zero.
- */
-
-int ata_sas_slave_configure(struct scsi_device *sdev, struct ata_port *ap)
-{
- ata_scsi_sdev_config(sdev);
- ata_scsi_dev_config(sdev, ap->link.device);
- return 0;
-}
-EXPORT_SYMBOL_GPL(ata_sas_slave_configure);
-
-/**
- * ata_sas_queuecmd - Issue SCSI cdb to libata-managed device
- * @cmd: SCSI command to be sent
- * @ap: ATA port to which the command is being sent
- *
- * RETURNS:
- * Return value from __ata_scsi_queuecmd() if @cmd can be queued,
- * 0 otherwise.
- */
-
-int ata_sas_queuecmd(struct scsi_cmnd *cmd, struct ata_port *ap)
-{
- int rc = 0;
-
- ata_scsi_dump_cdb(ap, cmd);
-
- if (likely(ata_dev_enabled(ap->link.device)))
- rc = __ata_scsi_queuecmd(cmd, ap->link.device);
- else {
- cmd->result = (DID_BAD_TARGET << 16);
- cmd->scsi_done(cmd);
- }
- return rc;
-}
-EXPORT_SYMBOL_GPL(ata_sas_queuecmd);
-
-int ata_sas_allocate_tag(struct ata_port *ap)
-{
- unsigned int max_queue = ap->host->n_tags;
- unsigned int i, tag;
-
- for (i = 0, tag = ap->sas_last_tag + 1; i < max_queue; i++, tag++) {
- tag = tag < max_queue ? tag : 0;
-
- /* the last tag is reserved for internal command. */
- if (ata_tag_internal(tag))
- continue;
-
- if (!test_and_set_bit(tag, &ap->sas_tag_allocated)) {
- ap->sas_last_tag = tag;
- return tag;
- }
- }
- return -1;
-}
-
-void ata_sas_free_tag(unsigned int tag, struct ata_port *ap)
-{
- clear_bit(tag, &ap->sas_tag_allocated);
-}
diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c
index 038db94..ae7189d 100644
--- a/drivers/ata/libata-sff.c
+++ b/drivers/ata/libata-sff.c
@@ -2,10 +2,6 @@
/*
* libata-sff.c - helper library for PCI IDE BMDMA
*
- * Maintained by: Tejun Heo <tj@kernel.org>
- * Please ALWAYS copy linux-ide@vger.kernel.org
- * on emails.
- *
* Copyright 2003-2006 Red Hat, Inc. All rights reserved.
* Copyright 2003-2006 Jeff Garzik
*
diff --git a/drivers/ata/libata-transport.c b/drivers/ata/libata-transport.c
index 12a505bb..6a40e3c 100644
--- a/drivers/ata/libata-transport.c
+++ b/drivers/ata/libata-transport.c
@@ -208,7 +208,7 @@ show_ata_port_##name(struct device *dev, \
{ \
struct ata_port *ap = transport_class_to_port(dev); \
\
- return snprintf(buf, 20, format_string, cast ap->field); \
+ return scnprintf(buf, 20, format_string, cast ap->field); \
}
#define ata_port_simple_attr(field, name, format_string, type) \
@@ -479,7 +479,7 @@ show_ata_dev_##field(struct device *dev, \
{ \
struct ata_device *ata_dev = transport_class_to_dev(dev); \
\
- return snprintf(buf, 20, format_string, cast ata_dev->field); \
+ return scnprintf(buf, 20, format_string, cast ata_dev->field); \
}
#define ata_dev_simple_attr(field, format_string, type) \
@@ -533,7 +533,7 @@ show_ata_dev_id(struct device *dev,
if (ata_dev->class == ATA_DEV_PMP)
return 0;
for(i=0;i<ATA_ID_WORDS;i++) {
- written += snprintf(buf+written, 20, "%04x%c",
+ written += scnprintf(buf+written, 20, "%04x%c",
ata_dev->id[i],
((i+1) & 7) ? ' ' : '\n');
}
@@ -552,7 +552,7 @@ show_ata_dev_gscr(struct device *dev,
if (ata_dev->class != ATA_DEV_PMP)
return 0;
for(i=0;i<SATA_PMP_GSCR_DWORDS;i++) {
- written += snprintf(buf+written, 20, "%08x%c",
+ written += scnprintf(buf+written, 20, "%08x%c",
ata_dev->gscr[i],
((i+1) & 3) ? ' ' : '\n');
}
@@ -581,7 +581,7 @@ show_ata_dev_trim(struct device *dev,
else
mode = "unqueued";
- return snprintf(buf, 20, "%s\n", mode);
+ return scnprintf(buf, 20, "%s\n", mode);
}
static DEVICE_ATTR(trim, S_IRUGO, show_ata_dev_trim, NULL);
diff --git a/drivers/ata/libata.h b/drivers/ata/libata.h
index cd8090a..68cdd81 100644
--- a/drivers/ata/libata.h
+++ b/drivers/ata/libata.h
@@ -37,7 +37,11 @@ extern int libata_noacpi;
extern int libata_allow_tpm;
extern const struct device_type ata_port_type;
extern struct ata_link *ata_dev_phys_link(struct ata_device *dev);
+#ifdef CONFIG_ATA_FORCE
extern void ata_force_cbl(struct ata_port *ap);
+#else
+static inline void ata_force_cbl(struct ata_port *ap) { }
+#endif
extern u64 ata_tf_to_lba(const struct ata_taskfile *tf);
extern u64 ata_tf_to_lba48(const struct ata_taskfile *tf);
extern struct ata_queued_cmd *ata_qc_new_init(struct ata_device *dev, int tag);
@@ -87,6 +91,18 @@ extern unsigned int ata_read_log_page(struct ata_device *dev, u8 log,
#define to_ata_port(d) container_of(d, struct ata_port, tdev)
+/* libata-sata.c */
+#ifdef CONFIG_SATA_HOST
+int ata_sas_allocate_tag(struct ata_port *ap);
+void ata_sas_free_tag(unsigned int tag, struct ata_port *ap);
+#else
+static inline int ata_sas_allocate_tag(struct ata_port *ap)
+{
+ return -EOPNOTSUPP;
+}
+static inline void ata_sas_free_tag(unsigned int tag, struct ata_port *ap) { }
+#endif
+
/* libata-acpi.c */
#ifdef CONFIG_ATA_ACPI
extern unsigned int ata_acpi_gtf_filter;
@@ -112,6 +128,8 @@ static inline void ata_acpi_bind_dev(struct ata_device *dev) {}
#endif
/* libata-scsi.c */
+extern struct ata_device *ata_scsi_find_dev(struct ata_port *ap,
+ const struct scsi_device *scsidev);
extern int ata_scsi_add_hosts(struct ata_host *host,
struct scsi_host_template *sht);
extern void ata_scsi_scan_host(struct ata_port *ap, int sync);
@@ -128,9 +146,10 @@ extern void ata_scsi_dev_rescan(struct work_struct *work);
extern int ata_bus_probe(struct ata_port *ap);
extern int ata_scsi_user_scan(struct Scsi_Host *shost, unsigned int channel,
unsigned int id, u64 lun);
-int ata_sas_allocate_tag(struct ata_port *ap);
-void ata_sas_free_tag(unsigned int tag, struct ata_port *ap);
-
+void ata_scsi_sdev_config(struct scsi_device *sdev);
+int ata_scsi_dev_config(struct scsi_device *sdev, struct ata_device *dev);
+void ata_scsi_dump_cdb(struct ata_port *ap, struct scsi_cmnd *cmd);
+int __ata_scsi_queuecmd(struct scsi_cmnd *scmd, struct ata_device *dev);
/* libata-eh.c */
extern unsigned long ata_internal_cmd_timeout(struct ata_device *dev, u8 cmd);
diff --git a/drivers/ata/sata_promise.c b/drivers/ata/sata_promise.c
index c451d7d1..8729f78 100644
--- a/drivers/ata/sata_promise.c
+++ b/drivers/ata/sata_promise.c
@@ -157,7 +157,6 @@ static int pdc_sata_hardreset(struct ata_link *link, unsigned int *class,
static void pdc_error_handler(struct ata_port *ap);
static void pdc_post_internal_cmd(struct ata_queued_cmd *qc);
static int pdc_pata_cable_detect(struct ata_port *ap);
-static int pdc_sata_cable_detect(struct ata_port *ap);
static struct scsi_host_template pdc_ata_sht = {
ATA_BASE_SHT(DRV_NAME),
@@ -183,7 +182,7 @@ static const struct ata_port_operations pdc_common_ops = {
static struct ata_port_operations pdc_sata_ops = {
.inherits = &pdc_common_ops,
- .cable_detect = pdc_sata_cable_detect,
+ .cable_detect = ata_cable_sata,
.freeze = pdc_sata_freeze,
.thaw = pdc_sata_thaw,
.scr_read = pdc_sata_scr_read,
@@ -459,11 +458,6 @@ static int pdc_pata_cable_detect(struct ata_port *ap)
return ATA_CBL_PATA80;
}
-static int pdc_sata_cable_detect(struct ata_port *ap)
-{
- return ATA_CBL_SATA;
-}
-
static int pdc_sata_scr_read(struct ata_link *link,
unsigned int sc_reg, u32 *val)
{
diff --git a/drivers/atm/nicstar.c b/drivers/atm/nicstar.c
index 8db8c0f..7af74fb 100644
--- a/drivers/atm/nicstar.c
+++ b/drivers/atm/nicstar.c
@@ -91,7 +91,7 @@
#ifdef GENERAL_DEBUG
#define PRINTK(args...) printk(args)
#else
-#define PRINTK(args...)
+#define PRINTK(args...) do {} while (0)
#endif /* GENERAL_DEBUG */
#ifdef EXTRA_DEBUG
diff --git a/drivers/auxdisplay/Kconfig b/drivers/auxdisplay/Kconfig
index b8313a0..48efa7a 100644
--- a/drivers/auxdisplay/Kconfig
+++ b/drivers/auxdisplay/Kconfig
@@ -111,7 +111,7 @@
If unsure, say N.
config CFAG12864B_RATE
- int "Refresh rate (hertz)"
+ int "Refresh rate (hertz)"
depends on CFAG12864B
default "20"
---help---
@@ -329,7 +329,7 @@
config PANEL_LCD_PIN_E
depends on PANEL_PROFILE="0" && PANEL_LCD="1" && PANEL_LCD_PROTO="0"
- int "Parallel port pin number & polarity connected to the LCD E signal (-17...17) "
+ int "Parallel port pin number & polarity connected to the LCD E signal (-17...17) "
range -17 17
default 14
---help---
@@ -344,7 +344,7 @@
config PANEL_LCD_PIN_RS
depends on PANEL_PROFILE="0" && PANEL_LCD="1" && PANEL_LCD_PROTO="0"
- int "Parallel port pin number & polarity connected to the LCD RS signal (-17...17) "
+ int "Parallel port pin number & polarity connected to the LCD RS signal (-17...17) "
range -17 17
default 17
---help---
@@ -359,7 +359,7 @@
config PANEL_LCD_PIN_RW
depends on PANEL_PROFILE="0" && PANEL_LCD="1" && PANEL_LCD_PROTO="0"
- int "Parallel port pin number & polarity connected to the LCD RW signal (-17...17) "
+ int "Parallel port pin number & polarity connected to the LCD RW signal (-17...17) "
range -17 17
default 16
---help---
@@ -374,7 +374,7 @@
config PANEL_LCD_PIN_SCL
depends on PANEL_PROFILE="0" && PANEL_LCD="1" && PANEL_LCD_PROTO!="0"
- int "Parallel port pin number & polarity connected to the LCD SCL signal (-17...17) "
+ int "Parallel port pin number & polarity connected to the LCD SCL signal (-17...17) "
range -17 17
default 1
---help---
@@ -389,7 +389,7 @@
config PANEL_LCD_PIN_SDA
depends on PANEL_PROFILE="0" && PANEL_LCD="1" && PANEL_LCD_PROTO!="0"
- int "Parallel port pin number & polarity connected to the LCD SDA signal (-17...17) "
+ int "Parallel port pin number & polarity connected to the LCD SDA signal (-17...17) "
range -17 17
default 2
---help---
@@ -404,12 +404,12 @@
config PANEL_LCD_PIN_BL
depends on PANEL_PROFILE="0" && PANEL_LCD="1"
- int "Parallel port pin number & polarity connected to the LCD backlight signal (-17...17) "
+ int "Parallel port pin number & polarity connected to the LCD backlight signal (-17...17) "
range -17 17
default 0
---help---
This describes the number of the parallel port pin to which the LCD 'BL' signal
- has been connected. It can be :
+ has been connected. It can be :
0 : no connection (eg: connected to ground)
1..17 : directly connected to any of these pins on the DB25 plug
diff --git a/drivers/auxdisplay/charlcd.c b/drivers/auxdisplay/charlcd.c
index 874c259..c0da382 100644
--- a/drivers/auxdisplay/charlcd.c
+++ b/drivers/auxdisplay/charlcd.c
@@ -88,7 +88,7 @@ struct charlcd_priv {
int len;
} esc_seq;
- unsigned long long drvdata[0];
+ unsigned long long drvdata[];
};
#define charlcd_to_priv(p) container_of(p, struct charlcd_priv, lcd)
diff --git a/drivers/auxdisplay/img-ascii-lcd.c b/drivers/auxdisplay/img-ascii-lcd.c
index efb928e..1cce409 100644
--- a/drivers/auxdisplay/img-ascii-lcd.c
+++ b/drivers/auxdisplay/img-ascii-lcd.c
@@ -356,7 +356,6 @@ static int img_ascii_lcd_probe(struct platform_device *pdev)
const struct of_device_id *match;
const struct img_ascii_lcd_config *cfg;
struct img_ascii_lcd_ctx *ctx;
- struct resource *res;
int err;
match = of_match_device(img_ascii_lcd_matches, &pdev->dev);
@@ -378,8 +377,7 @@ static int img_ascii_lcd_probe(struct platform_device *pdev)
&ctx->offset))
return -EINVAL;
} else {
- res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
- ctx->base = devm_ioremap_resource(&pdev->dev, res);
+ ctx->base = devm_platform_ioremap_resource(pdev, 0);
if (IS_ERR(ctx->base))
return PTR_ERR(ctx->base);
}
diff --git a/drivers/base/core.c b/drivers/base/core.c
index 42a6724..dbb0f91 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -718,6 +718,8 @@ static void __device_links_queue_sync_state(struct device *dev,
{
struct device_link *link;
+ if (!dev_has_sync_state(dev))
+ return;
if (dev->state_synced)
return;
@@ -745,25 +747,31 @@ static void __device_links_queue_sync_state(struct device *dev,
/**
* device_links_flush_sync_list - Call sync_state() on a list of devices
* @list: List of devices to call sync_state() on
+ * @dont_lock_dev: Device for which lock is already held by the caller
*
* Calls sync_state() on all the devices that have been queued for it. This
- * function is used in conjunction with __device_links_queue_sync_state().
+ * function is used in conjunction with __device_links_queue_sync_state(). The
+ * @dont_lock_dev parameter is useful when this function is called from a
+ * context where a device lock is already held.
*/
-static void device_links_flush_sync_list(struct list_head *list)
+static void device_links_flush_sync_list(struct list_head *list,
+ struct device *dont_lock_dev)
{
struct device *dev, *tmp;
list_for_each_entry_safe(dev, tmp, list, links.defer_sync) {
list_del_init(&dev->links.defer_sync);
- device_lock(dev);
+ if (dev != dont_lock_dev)
+ device_lock(dev);
if (dev->bus->sync_state)
dev->bus->sync_state(dev);
else if (dev->driver && dev->driver->sync_state)
dev->driver->sync_state(dev);
- device_unlock(dev);
+ if (dev != dont_lock_dev)
+ device_unlock(dev);
put_device(dev);
}
@@ -801,7 +809,7 @@ void device_links_supplier_sync_state_resume(void)
out:
device_links_write_unlock();
- device_links_flush_sync_list(&sync_list);
+ device_links_flush_sync_list(&sync_list, NULL);
}
static int sync_state_resume_initcall(void)
@@ -813,7 +821,7 @@ late_initcall(sync_state_resume_initcall);
static void __device_links_supplier_defer_sync(struct device *sup)
{
- if (list_empty(&sup->links.defer_sync))
+ if (list_empty(&sup->links.defer_sync) && dev_has_sync_state(sup))
list_add_tail(&sup->links.defer_sync, &deferred_sync);
}
@@ -865,6 +873,11 @@ void device_links_driver_bound(struct device *dev)
driver_deferred_probe_add(link->consumer);
}
+ if (defer_sync_state_count)
+ __device_links_supplier_defer_sync(dev);
+ else
+ __device_links_queue_sync_state(dev, &sync_list);
+
list_for_each_entry(link, &dev->links.suppliers, c_node) {
if (!(link->flags & DL_FLAG_MANAGED))
continue;
@@ -883,7 +896,7 @@ void device_links_driver_bound(struct device *dev)
device_links_write_unlock();
- device_links_flush_sync_list(&sync_list);
+ device_links_flush_sync_list(&sync_list, dev);
}
static void device_link_drop_managed(struct device_link *link)
diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index b9f474c..4086718 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -97,30 +97,13 @@ static ssize_t phys_index_show(struct device *dev,
}
/*
- * Show whether the memory block is likely to be offlineable (or is already
- * offline). Once offline, the memory block could be removed. The return
- * value does, however, not indicate that there is a way to remove the
- * memory block.
+ * Legacy interface that we cannot remove. Always indicate "removable"
+ * with CONFIG_MEMORY_HOTREMOVE - bad heuristic.
*/
static ssize_t removable_show(struct device *dev, struct device_attribute *attr,
char *buf)
{
- struct memory_block *mem = to_memory_block(dev);
- unsigned long pfn;
- int ret = 1, i;
-
- if (mem->state != MEM_ONLINE)
- goto out;
-
- for (i = 0; i < sections_per_block; i++) {
- if (!present_section_nr(mem->start_section_nr + i))
- continue;
- pfn = section_nr_to_pfn(mem->start_section_nr + i);
- ret &= is_mem_section_removable(pfn, PAGES_PER_SECTION);
- }
-
-out:
- return sprintf(buf, "%d\n", ret);
+ return sprintf(buf, "%d\n", (int)IS_ENABLED(CONFIG_MEMORY_HOTREMOVE));
}
/*
diff --git a/drivers/base/platform.c b/drivers/base/platform.c
index 7fa654f..b5ce7b0 100644
--- a/drivers/base/platform.c
+++ b/drivers/base/platform.c
@@ -363,10 +363,10 @@ static void setup_pdev_dma_masks(struct platform_device *pdev)
{
if (!pdev->dev.coherent_dma_mask)
pdev->dev.coherent_dma_mask = DMA_BIT_MASK(32);
- if (!pdev->dma_mask)
- pdev->dma_mask = DMA_BIT_MASK(32);
- if (!pdev->dev.dma_mask)
- pdev->dev.dma_mask = &pdev->dma_mask;
+ if (!pdev->dev.dma_mask) {
+ pdev->platform_dma_mask = DMA_BIT_MASK(32);
+ pdev->dev.dma_mask = &pdev->platform_dma_mask;
+ }
};
/**
@@ -662,20 +662,8 @@ struct platform_device *platform_device_register_full(
pdev->dev.of_node_reused = pdevinfo->of_node_reused;
if (pdevinfo->dma_mask) {
- /*
- * This memory isn't freed when the device is put,
- * I don't have a nice idea for that though. Conceptually
- * dma_mask in struct device should not be a pointer.
- * See http://thread.gmane.org/gmane.linux.kernel.pci/9081
- */
- pdev->dev.dma_mask =
- kmalloc(sizeof(*pdev->dev.dma_mask), GFP_KERNEL);
- if (!pdev->dev.dma_mask)
- goto err;
-
- kmemleak_ignore(pdev->dev.dma_mask);
-
- *pdev->dev.dma_mask = pdevinfo->dma_mask;
+ pdev->platform_dma_mask = pdevinfo->dma_mask;
+ pdev->dev.dma_mask = &pdev->platform_dma_mask;
pdev->dev.coherent_dma_mask = pdevinfo->dma_mask;
}
@@ -700,7 +688,6 @@ struct platform_device *platform_device_register_full(
if (ret) {
err:
ACPI_COMPANION_SET(&pdev->dev, NULL);
- kfree(pdev->dev.dma_mask);
platform_device_put(pdev);
return ERR_PTR(ret);
}
diff --git a/drivers/base/swnode.c b/drivers/base/swnode.c
index 0b081de..de8d354 100644
--- a/drivers/base/swnode.c
+++ b/drivers/base/swnode.c
@@ -608,6 +608,13 @@ static void software_node_release(struct kobject *kobj)
{
struct swnode *swnode = kobj_to_swnode(kobj);
+ if (swnode->parent) {
+ ida_simple_remove(&swnode->parent->child_ids, swnode->id);
+ list_del(&swnode->entry);
+ } else {
+ ida_simple_remove(&swnode_root_ids, swnode->id);
+ }
+
if (swnode->allocated) {
property_entries_free(swnode->node->properties);
kfree(swnode->node);
@@ -773,13 +780,6 @@ void fwnode_remove_software_node(struct fwnode_handle *fwnode)
if (!swnode)
return;
- if (swnode->parent) {
- ida_simple_remove(&swnode->parent->child_ids, swnode->id);
- list_del(&swnode->entry);
- } else {
- ida_simple_remove(&swnode_root_ids, swnode->id);
- }
-
kobject_put(&swnode->kobj);
}
EXPORT_SYMBOL_GPL(fwnode_remove_software_node);
diff --git a/drivers/block/Makefile b/drivers/block/Makefile
index a53cc1e..795facd 100644
--- a/drivers/block/Makefile
+++ b/drivers/block/Makefile
@@ -6,6 +6,9 @@
# Rewritten to use lists instead of if-statements.
#
+# needed for trace events
+ccflags-y += -I$(src)
+
obj-$(CONFIG_MAC_FLOPPY) += swim3.o
obj-$(CONFIG_BLK_DEV_SWIM) += swim_mod.o
obj-$(CONFIG_BLK_DEV_FD) += floppy.o
@@ -39,6 +42,9 @@
obj-$(CONFIG_BLK_DEV_NULL_BLK) += null_blk.o
null_blk-objs := null_blk_main.o
+ifeq ($(CONFIG_BLK_DEV_ZONED), y)
+null_blk-$(CONFIG_TRACING) += null_blk_trace.o
+endif
null_blk-$(CONFIG_BLK_DEV_ZONED) += null_blk_zoned.o
skd-y := skd_main.o
diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c
index 7b32fb6..a27804d 100644
--- a/drivers/block/aoe/aoeblk.c
+++ b/drivers/block/aoe/aoeblk.c
@@ -87,9 +87,9 @@ static ssize_t aoedisk_show_netif(struct device *dev,
if (*nd == NULL)
return snprintf(page, PAGE_SIZE, "none\n");
for (p = page; nd < ne; nd++)
- p += snprintf(p, PAGE_SIZE - (p-page), "%s%s",
+ p += scnprintf(p, PAGE_SIZE - (p-page), "%s%s",
p == page ? "" : ",", (*nd)->name);
- p += snprintf(p, PAGE_SIZE - (p-page), "\n");
+ p += scnprintf(p, PAGE_SIZE - (p-page), "\n");
return p-page;
}
/* firmware version */
diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index 220c5e1..2fb25c3 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -381,12 +381,10 @@ static struct brd_device *brd_alloc(int i)
spin_lock_init(&brd->brd_lock);
INIT_RADIX_TREE(&brd->brd_pages, GFP_ATOMIC);
- brd->brd_queue = blk_alloc_queue(GFP_KERNEL);
+ brd->brd_queue = blk_alloc_queue(brd_make_request, NUMA_NO_NODE);
if (!brd->brd_queue)
goto out_free_dev;
- blk_queue_make_request(brd->brd_queue, brd_make_request);
-
/* This is so fdisk will align partitions on 4k, because of
* direct_access API needing 4k alignment, returning a PFN
* (This is only a problem on very small devices <= 4M,
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index a18155c..c094c3c2 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -2801,7 +2801,7 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig
drbd_init_set_defaults(device);
- q = blk_alloc_queue_node(GFP_KERNEL, NUMA_NO_NODE);
+ q = blk_alloc_queue(drbd_make_request, NUMA_NO_NODE);
if (!q)
goto out_no_q;
device->rq_queue = q;
@@ -2828,7 +2828,6 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig
q->backing_dev_info->congested_fn = drbd_congested;
q->backing_dev_info->congested_data = device;
- blk_queue_make_request(q, drbd_make_request);
blk_queue_write_cache(q, true, true);
/* Setting the max_hw_sectors to an odd value of 8kibyte here
This triggers a max_bio_size message upon first attach or connect */
@@ -3414,22 +3413,11 @@ int drbd_md_read(struct drbd_device *device, struct drbd_backing_dev *bdev)
* the meta-data super block. This function sets MD_DIRTY, and starts a
* timer that ensures that within five seconds you have to call drbd_md_sync().
*/
-#ifdef DEBUG
-void drbd_md_mark_dirty_(struct drbd_device *device, unsigned int line, const char *func)
-{
- if (!test_and_set_bit(MD_DIRTY, &device->flags)) {
- mod_timer(&device->md_sync_timer, jiffies + HZ);
- device->last_md_mark_dirty.line = line;
- device->last_md_mark_dirty.func = func;
- }
-}
-#else
void drbd_md_mark_dirty(struct drbd_device *device)
{
if (!test_and_set_bit(MD_DIRTY, &device->flags))
mod_timer(&device->md_sync_timer, jiffies + 5*HZ);
}
-#endif
void drbd_uuid_move_history(struct drbd_device *device) __must_hold(local)
{
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index 79e2164..c15e708 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -33,6 +33,7 @@
#include <linux/random.h>
#include <linux/string.h>
#include <linux/scatterlist.h>
+#include <linux/part_stat.h>
#include "drbd_int.h"
#include "drbd_protocol.h"
#include "drbd_req.h"
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c
index b7f605c..0dc019d 100644
--- a/drivers/block/drbd/drbd_worker.c
+++ b/drivers/block/drbd/drbd_worker.c
@@ -22,6 +22,7 @@
#include <linux/random.h>
#include <linux/string.h>
#include <linux/scatterlist.h>
+#include <linux/part_stat.h>
#include "drbd_int.h"
#include "drbd_protocol.h"
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index cd3612e..c3daa64 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -171,7 +171,6 @@ static int print_unex = 1;
#include <linux/kernel.h>
#include <linux/timer.h>
#include <linux/workqueue.h>
-#define FDPATCHES
#include <linux/fdreg.h>
#include <linux/fd.h>
#include <linux/hdreg.h>
@@ -306,36 +305,26 @@ static bool initialized;
/* reverse mapping from unit and fdc to drive */
#define REVDRIVE(fdc, unit) ((unit) + ((fdc) << 2))
-#define DP (&drive_params[current_drive])
-#define DRS (&drive_state[current_drive])
-#define DRWE (&write_errors[current_drive])
-#define FDCS (&fdc_state[fdc])
-
-#define UDP (&drive_params[drive])
-#define UDRS (&drive_state[drive])
-#define UDRWE (&write_errors[drive])
-#define UFDCS (&fdc_state[FDC(drive)])
-
#define PH_HEAD(floppy, head) (((((floppy)->stretch & 2) >> 1) ^ head) << 2)
#define STRETCH(floppy) ((floppy)->stretch & FD_STRETCH)
-/* read/write */
-#define COMMAND (raw_cmd->cmd[0])
-#define DR_SELECT (raw_cmd->cmd[1])
-#define TRACK (raw_cmd->cmd[2])
-#define HEAD (raw_cmd->cmd[3])
-#define SECTOR (raw_cmd->cmd[4])
-#define SIZECODE (raw_cmd->cmd[5])
-#define SECT_PER_TRACK (raw_cmd->cmd[6])
-#define GAP (raw_cmd->cmd[7])
-#define SIZECODE2 (raw_cmd->cmd[8])
+/* read/write commands */
+#define COMMAND 0
+#define DR_SELECT 1
+#define TRACK 2
+#define HEAD 3
+#define SECTOR 4
+#define SIZECODE 5
+#define SECT_PER_TRACK 6
+#define GAP 7
+#define SIZECODE2 8
#define NR_RW 9
-/* format */
-#define F_SIZECODE (raw_cmd->cmd[2])
-#define F_SECT_PER_TRACK (raw_cmd->cmd[3])
-#define F_GAP (raw_cmd->cmd[4])
-#define F_FILL (raw_cmd->cmd[5])
+/* format commands */
+#define F_SIZECODE 2
+#define F_SECT_PER_TRACK 3
+#define F_GAP 4
+#define F_FILL 5
#define NR_F 6
/*
@@ -351,14 +340,14 @@ static bool initialized;
#define MAX_REPLIES 16
static unsigned char reply_buffer[MAX_REPLIES];
static int inr; /* size of reply buffer, when called from interrupt */
-#define ST0 (reply_buffer[0])
-#define ST1 (reply_buffer[1])
-#define ST2 (reply_buffer[2])
-#define ST3 (reply_buffer[0]) /* result of GETSTATUS */
-#define R_TRACK (reply_buffer[3])
-#define R_HEAD (reply_buffer[4])
-#define R_SECTOR (reply_buffer[5])
-#define R_SIZECODE (reply_buffer[6])
+#define ST0 0
+#define ST1 1
+#define ST2 2
+#define ST3 0 /* result of GETSTATUS */
+#define R_TRACK 3
+#define R_HEAD 4
+#define R_SECTOR 5
+#define R_SIZECODE 6
#define SEL_DLY (2 * HZ / 100)
@@ -593,7 +582,7 @@ static int buffer_max = -1;
/* fdc related variables, should end up in a struct */
static struct floppy_fdc_state fdc_state[N_FDC];
-static int fdc; /* current fdc */
+static int current_fdc; /* current fdc */
static struct workqueue_struct *floppy_wq;
@@ -604,9 +593,19 @@ static unsigned char fsector_t; /* sector in track */
static unsigned char in_sector_offset; /* offset within physical sector,
* expressed in units of 512 bytes */
+static inline unsigned char fdc_inb(int fdc, int reg)
+{
+ return fd_inb(fdc_state[fdc].address + reg);
+}
+
+static inline void fdc_outb(unsigned char value, int fdc, int reg)
+{
+ fd_outb(value, fdc_state[fdc].address + reg);
+}
+
static inline bool drive_no_geom(int drive)
{
- return !current_type[drive] && !ITYPE(UDRS->fd_device);
+ return !current_type[drive] && !ITYPE(drive_state[drive].fd_device);
}
#ifndef fd_eject
@@ -630,7 +629,7 @@ static inline void set_debugt(void)
static inline void debugt(const char *func, const char *msg)
{
- if (DP->flags & DEBUGT)
+ if (drive_params[current_drive].flags & DEBUGT)
pr_info("%s:%s dtime=%lu\n", func, msg, jiffies - debugtimer);
}
#else
@@ -683,10 +682,10 @@ static void __reschedule_timeout(int drive, const char *message)
delay = 20UL * HZ;
drive = 0;
} else
- delay = UDP->timeout;
+ delay = drive_params[drive].timeout;
mod_delayed_work(floppy_wq, &fd_timeout, delay);
- if (UDP->flags & FD_DEBUG)
+ if (drive_params[drive].flags & FD_DEBUG)
DPRINT("reschedule timeout %s\n", message);
timeout_message = message;
}
@@ -740,33 +739,37 @@ static int disk_change(int drive)
{
int fdc = FDC(drive);
- if (time_before(jiffies, UDRS->select_date + UDP->select_delay))
+ if (time_before(jiffies, drive_state[drive].select_date + drive_params[drive].select_delay))
DPRINT("WARNING disk change called early\n");
- if (!(FDCS->dor & (0x10 << UNIT(drive))) ||
- (FDCS->dor & 3) != UNIT(drive) || fdc != FDC(drive)) {
+ if (!(fdc_state[fdc].dor & (0x10 << UNIT(drive))) ||
+ (fdc_state[fdc].dor & 3) != UNIT(drive) || fdc != FDC(drive)) {
DPRINT("probing disk change on unselected drive\n");
DPRINT("drive=%d fdc=%d dor=%x\n", drive, FDC(drive),
- (unsigned int)FDCS->dor);
+ (unsigned int)fdc_state[fdc].dor);
}
- debug_dcl(UDP->flags,
+ debug_dcl(drive_params[drive].flags,
"checking disk change line for drive %d\n", drive);
- debug_dcl(UDP->flags, "jiffies=%lu\n", jiffies);
- debug_dcl(UDP->flags, "disk change line=%x\n", fd_inb(FD_DIR) & 0x80);
- debug_dcl(UDP->flags, "flags=%lx\n", UDRS->flags);
+ debug_dcl(drive_params[drive].flags, "jiffies=%lu\n", jiffies);
+ debug_dcl(drive_params[drive].flags, "disk change line=%x\n",
+ fdc_inb(fdc, FD_DIR) & 0x80);
+ debug_dcl(drive_params[drive].flags, "flags=%lx\n",
+ drive_state[drive].flags);
- if (UDP->flags & FD_BROKEN_DCL)
- return test_bit(FD_DISK_CHANGED_BIT, &UDRS->flags);
- if ((fd_inb(FD_DIR) ^ UDP->flags) & 0x80) {
- set_bit(FD_VERIFY_BIT, &UDRS->flags);
+ if (drive_params[drive].flags & FD_BROKEN_DCL)
+ return test_bit(FD_DISK_CHANGED_BIT,
+ &drive_state[drive].flags);
+ if ((fdc_inb(fdc, FD_DIR) ^ drive_params[drive].flags) & 0x80) {
+ set_bit(FD_VERIFY_BIT, &drive_state[drive].flags);
/* verify write protection */
- if (UDRS->maxblock) /* mark it changed */
- set_bit(FD_DISK_CHANGED_BIT, &UDRS->flags);
+ if (drive_state[drive].maxblock) /* mark it changed */
+ set_bit(FD_DISK_CHANGED_BIT,
+ &drive_state[drive].flags);
/* invalidate its geometry */
- if (UDRS->keep_data >= 0) {
- if ((UDP->flags & FTD_MSG) &&
+ if (drive_state[drive].keep_data >= 0) {
+ if ((drive_params[drive].flags & FTD_MSG) &&
current_type[drive] != NULL)
DPRINT("Disk type is undefined after disk change\n");
current_type[drive] = NULL;
@@ -775,8 +778,8 @@ static int disk_change(int drive)
return 1;
} else {
- UDRS->last_checked = jiffies;
- clear_bit(FD_DISK_NEWCHANGE_BIT, &UDRS->flags);
+ drive_state[drive].last_checked = jiffies;
+ clear_bit(FD_DISK_NEWCHANGE_BIT, &drive_state[drive].flags);
}
return 0;
}
@@ -799,26 +802,26 @@ static int set_dor(int fdc, char mask, char data)
unsigned char newdor;
unsigned char olddor;
- if (FDCS->address == -1)
+ if (fdc_state[fdc].address == -1)
return -1;
- olddor = FDCS->dor;
+ olddor = fdc_state[fdc].dor;
newdor = (olddor & mask) | data;
if (newdor != olddor) {
unit = olddor & 0x3;
if (is_selected(olddor, unit) && !is_selected(newdor, unit)) {
drive = REVDRIVE(fdc, unit);
- debug_dcl(UDP->flags,
+ debug_dcl(drive_params[drive].flags,
"calling disk change from set_dor\n");
disk_change(drive);
}
- FDCS->dor = newdor;
- fd_outb(newdor, FD_DOR);
+ fdc_state[fdc].dor = newdor;
+ fdc_outb(newdor, fdc, FD_DOR);
unit = newdor & 0x3;
if (!is_selected(olddor, unit) && is_selected(newdor, unit)) {
drive = REVDRIVE(fdc, unit);
- UDRS->select_date = jiffies;
+ drive_state[drive].select_date = jiffies;
}
}
return olddor;
@@ -826,11 +829,12 @@ static int set_dor(int fdc, char mask, char data)
static void twaddle(void)
{
- if (DP->select_delay)
+ if (drive_params[current_drive].select_delay)
return;
- fd_outb(FDCS->dor & ~(0x10 << UNIT(current_drive)), FD_DOR);
- fd_outb(FDCS->dor, FD_DOR);
- DRS->select_date = jiffies;
+ fdc_outb(fdc_state[current_fdc].dor & ~(0x10 << UNIT(current_drive)),
+ current_fdc, FD_DOR);
+ fdc_outb(fdc_state[current_fdc].dor, current_fdc, FD_DOR);
+ drive_state[current_drive].select_date = jiffies;
}
/*
@@ -841,34 +845,38 @@ static void reset_fdc_info(int mode)
{
int drive;
- FDCS->spec1 = FDCS->spec2 = -1;
- FDCS->need_configure = 1;
- FDCS->perp_mode = 1;
- FDCS->rawcmd = 0;
+ fdc_state[current_fdc].spec1 = fdc_state[current_fdc].spec2 = -1;
+ fdc_state[current_fdc].need_configure = 1;
+ fdc_state[current_fdc].perp_mode = 1;
+ fdc_state[current_fdc].rawcmd = 0;
for (drive = 0; drive < N_DRIVE; drive++)
- if (FDC(drive) == fdc && (mode || UDRS->track != NEED_1_RECAL))
- UDRS->track = NEED_2_RECAL;
+ if (FDC(drive) == current_fdc &&
+ (mode || drive_state[drive].track != NEED_1_RECAL))
+ drive_state[drive].track = NEED_2_RECAL;
}
/* selects the fdc and drive, and enables the fdc's input/dma. */
static void set_fdc(int drive)
{
+ unsigned int new_fdc = current_fdc;
+
if (drive >= 0 && drive < N_DRIVE) {
- fdc = FDC(drive);
+ new_fdc = FDC(drive);
current_drive = drive;
}
- if (fdc != 1 && fdc != 0) {
+ if (new_fdc >= N_FDC) {
pr_info("bad fdc value\n");
return;
}
- set_dor(fdc, ~0, 8);
+ current_fdc = new_fdc;
+ set_dor(current_fdc, ~0, 8);
#if N_FDC > 1
- set_dor(1 - fdc, ~8, 0);
+ set_dor(1 - current_fdc, ~8, 0);
#endif
- if (FDCS->rawcmd == 2)
+ if (fdc_state[current_fdc].rawcmd == 2)
reset_fdc_info(1);
- if (fd_inb(FD_STATUS) != STATUS_READY)
- FDCS->reset = 1;
+ if (fdc_inb(current_fdc, FD_STATUS) != STATUS_READY)
+ fdc_state[current_fdc].reset = 1;
}
/* locks the driver */
@@ -921,19 +929,19 @@ static void floppy_off(unsigned int drive)
unsigned long volatile delta;
int fdc = FDC(drive);
- if (!(FDCS->dor & (0x10 << UNIT(drive))))
+ if (!(fdc_state[fdc].dor & (0x10 << UNIT(drive))))
return;
del_timer(motor_off_timer + drive);
/* make spindle stop in a position which minimizes spinup time
* next time */
- if (UDP->rps) {
- delta = jiffies - UDRS->first_read_date + HZ -
- UDP->spindown_offset;
- delta = ((delta * UDP->rps) % HZ) / UDP->rps;
+ if (drive_params[drive].rps) {
+ delta = jiffies - drive_state[drive].first_read_date + HZ -
+ drive_params[drive].spindown_offset;
+ delta = ((delta * drive_params[drive].rps) % HZ) / drive_params[drive].rps;
motor_off_timer[drive].expires =
- jiffies + UDP->spindown - delta;
+ jiffies + drive_params[drive].spindown - delta;
}
add_timer(motor_off_timer + drive);
}
@@ -949,20 +957,20 @@ static void scandrives(void)
int drive;
int saved_drive;
- if (DP->select_delay)
+ if (drive_params[current_drive].select_delay)
return;
saved_drive = current_drive;
for (i = 0; i < N_DRIVE; i++) {
drive = (saved_drive + i + 1) % N_DRIVE;
- if (UDRS->fd_ref == 0 || UDP->select_delay != 0)
+ if (drive_state[drive].fd_ref == 0 || drive_params[drive].select_delay != 0)
continue; /* skip closed drives */
set_fdc(drive);
- if (!(set_dor(fdc, ~3, UNIT(drive) | (0x10 << UNIT(drive))) &
+ if (!(set_dor(current_fdc, ~3, UNIT(drive) | (0x10 << UNIT(drive))) &
(0x10 << UNIT(drive))))
/* switch the motor off again, if it was off to
* begin with */
- set_dor(fdc, ~(0x10 << UNIT(drive)), 0);
+ set_dor(current_fdc, ~(0x10 << UNIT(drive)), 0);
}
set_fdc(saved_drive);
}
@@ -1008,7 +1016,8 @@ static void cancel_activity(void)
* transfer */
static void fd_watchdog(void)
{
- debug_dcl(DP->flags, "calling disk change from watchdog\n");
+ debug_dcl(drive_params[current_drive].flags,
+ "calling disk change from watchdog\n");
if (disk_change(current_drive)) {
DPRINT("disk removed during i/o\n");
@@ -1032,7 +1041,7 @@ static void main_command_interrupt(void)
static int fd_wait_for_completion(unsigned long expires,
void (*function)(void))
{
- if (FDCS->reset) {
+ if (fdc_state[current_fdc].reset) {
reset_fdc(); /* do the reset during sleep to win time
* if we don't need to sleep, it's a good
* occasion anyways */
@@ -1060,13 +1069,13 @@ static void setup_DMA(void)
pr_cont("%x,", raw_cmd->cmd[i]);
pr_cont("\n");
cont->done(0);
- FDCS->reset = 1;
+ fdc_state[current_fdc].reset = 1;
return;
}
if (((unsigned long)raw_cmd->kernel_data) % 512) {
pr_info("non aligned address: %p\n", raw_cmd->kernel_data);
cont->done(0);
- FDCS->reset = 1;
+ fdc_state[current_fdc].reset = 1;
return;
}
f = claim_dma_lock();
@@ -1074,10 +1083,11 @@ static void setup_DMA(void)
#ifdef fd_dma_setup
if (fd_dma_setup(raw_cmd->kernel_data, raw_cmd->length,
(raw_cmd->flags & FD_RAW_READ) ?
- DMA_MODE_READ : DMA_MODE_WRITE, FDCS->address) < 0) {
+ DMA_MODE_READ : DMA_MODE_WRITE,
+ fdc_state[current_fdc].address) < 0) {
release_dma_lock(f);
cont->done(0);
- FDCS->reset = 1;
+ fdc_state[current_fdc].reset = 1;
return;
}
release_dma_lock(f);
@@ -1088,7 +1098,7 @@ static void setup_DMA(void)
DMA_MODE_READ : DMA_MODE_WRITE);
fd_set_dma_addr(raw_cmd->kernel_data);
fd_set_dma_count(raw_cmd->length);
- virtual_dma_port = FDCS->address;
+ virtual_dma_port = fdc_state[current_fdc].address;
fd_enable_dma();
release_dma_lock(f);
#endif
@@ -1102,18 +1112,18 @@ static int wait_til_ready(void)
int status;
int counter;
- if (FDCS->reset)
+ if (fdc_state[current_fdc].reset)
return -1;
for (counter = 0; counter < 10000; counter++) {
- status = fd_inb(FD_STATUS);
+ status = fdc_inb(current_fdc, FD_STATUS);
if (status & STATUS_READY)
return status;
}
if (initialized) {
- DPRINT("Getstatus times out (%x) on fdc %d\n", status, fdc);
+ DPRINT("Getstatus times out (%x) on fdc %d\n", status, current_fdc);
show_floppy();
}
- FDCS->reset = 1;
+ fdc_state[current_fdc].reset = 1;
return -1;
}
@@ -1126,17 +1136,17 @@ static int output_byte(char byte)
return -1;
if (is_ready_state(status)) {
- fd_outb(byte, FD_DATA);
+ fdc_outb(byte, current_fdc, FD_DATA);
output_log[output_log_pos].data = byte;
output_log[output_log_pos].status = status;
output_log[output_log_pos].jiffies = jiffies;
output_log_pos = (output_log_pos + 1) % OLOGSIZE;
return 0;
}
- FDCS->reset = 1;
+ fdc_state[current_fdc].reset = 1;
if (initialized) {
DPRINT("Unable to send byte %x to FDC. Fdc=%x Status=%x\n",
- byte, fdc, status);
+ byte, current_fdc, status);
show_floppy();
}
return -1;
@@ -1159,16 +1169,16 @@ static int result(void)
return i;
}
if (status == (STATUS_DIR | STATUS_READY | STATUS_BUSY))
- reply_buffer[i] = fd_inb(FD_DATA);
+ reply_buffer[i] = fdc_inb(current_fdc, FD_DATA);
else
break;
}
if (initialized) {
DPRINT("get result error. Fdc=%d Last status=%x Read bytes=%d\n",
- fdc, status, i);
+ current_fdc, status, i);
show_floppy();
}
- FDCS->reset = 1;
+ fdc_state[current_fdc].reset = 1;
return -1;
}
@@ -1205,7 +1215,7 @@ static void perpendicular_mode(void)
default:
DPRINT("Invalid data rate for perpendicular mode!\n");
cont->done(0);
- FDCS->reset = 1;
+ fdc_state[current_fdc].reset = 1;
/*
* convenient way to return to
* redo without too much hassle
@@ -1216,12 +1226,12 @@ static void perpendicular_mode(void)
} else
perp_mode = 0;
- if (FDCS->perp_mode == perp_mode)
+ if (fdc_state[current_fdc].perp_mode == perp_mode)
return;
- if (FDCS->version >= FDC_82077_ORIG) {
+ if (fdc_state[current_fdc].version >= FDC_82077_ORIG) {
output_byte(FD_PERPENDICULAR);
output_byte(perp_mode);
- FDCS->perp_mode = perp_mode;
+ fdc_state[current_fdc].perp_mode = perp_mode;
} else if (perp_mode) {
DPRINT("perpendicular mode not supported by this FDC.\n");
}
@@ -1276,9 +1286,10 @@ static void fdc_specify(void)
int hlt_max_code = 0x7f;
int hut_max_code = 0xf;
- if (FDCS->need_configure && FDCS->version >= FDC_82072A) {
+ if (fdc_state[current_fdc].need_configure &&
+ fdc_state[current_fdc].version >= FDC_82072A) {
fdc_configure();
- FDCS->need_configure = 0;
+ fdc_state[current_fdc].need_configure = 0;
}
switch (raw_cmd->rate & 0x03) {
@@ -1287,7 +1298,7 @@ static void fdc_specify(void)
break;
case 1:
dtr = 300;
- if (FDCS->version >= FDC_82078) {
+ if (fdc_state[current_fdc].version >= FDC_82078) {
/* chose the default rate table, not the one
* where 1 = 2 Mbps */
output_byte(FD_DRIVESPEC);
@@ -1302,27 +1313,30 @@ static void fdc_specify(void)
break;
}
- if (FDCS->version >= FDC_82072) {
+ if (fdc_state[current_fdc].version >= FDC_82072) {
scale_dtr = dtr;
hlt_max_code = 0x00; /* 0==256msec*dtr0/dtr (not linear!) */
hut_max_code = 0x0; /* 0==256msec*dtr0/dtr (not linear!) */
}
/* Convert step rate from microseconds to milliseconds and 4 bits */
- srt = 16 - DIV_ROUND_UP(DP->srt * scale_dtr / 1000, NOMINAL_DTR);
+ srt = 16 - DIV_ROUND_UP(drive_params[current_drive].srt * scale_dtr / 1000,
+ NOMINAL_DTR);
if (slow_floppy)
srt = srt / 4;
SUPBOUND(srt, 0xf);
INFBOUND(srt, 0);
- hlt = DIV_ROUND_UP(DP->hlt * scale_dtr / 2, NOMINAL_DTR);
+ hlt = DIV_ROUND_UP(drive_params[current_drive].hlt * scale_dtr / 2,
+ NOMINAL_DTR);
if (hlt < 0x01)
hlt = 0x01;
else if (hlt > 0x7f)
hlt = hlt_max_code;
- hut = DIV_ROUND_UP(DP->hut * scale_dtr / 16, NOMINAL_DTR);
+ hut = DIV_ROUND_UP(drive_params[current_drive].hut * scale_dtr / 16,
+ NOMINAL_DTR);
if (hut < 0x1)
hut = 0x1;
else if (hut > 0xf)
@@ -1332,11 +1346,12 @@ static void fdc_specify(void)
spec2 = (hlt << 1) | (use_virtual_dma & 1);
/* If these parameters did not change, just return with success */
- if (FDCS->spec1 != spec1 || FDCS->spec2 != spec2) {
+ if (fdc_state[current_fdc].spec1 != spec1 ||
+ fdc_state[current_fdc].spec2 != spec2) {
/* Go ahead and set spec1 and spec2 */
output_byte(FD_SPECIFY);
- output_byte(FDCS->spec1 = spec1);
- output_byte(FDCS->spec2 = spec2);
+ output_byte(fdc_state[current_fdc].spec1 = spec1);
+ output_byte(fdc_state[current_fdc].spec2 = spec2);
}
} /* fdc_specify */
@@ -1347,52 +1362,55 @@ static void fdc_specify(void)
static int fdc_dtr(void)
{
/* If data rate not already set to desired value, set it. */
- if ((raw_cmd->rate & 3) == FDCS->dtr)
+ if ((raw_cmd->rate & 3) == fdc_state[current_fdc].dtr)
return 0;
/* Set dtr */
- fd_outb(raw_cmd->rate & 3, FD_DCR);
+ fdc_outb(raw_cmd->rate & 3, current_fdc, FD_DCR);
/* TODO: some FDC/drive combinations (C&T 82C711 with TEAC 1.2MB)
* need a stabilization period of several milliseconds to be
* enforced after data rate changes before R/W operations.
* Pause 5 msec to avoid trouble. (Needs to be 2 jiffies)
*/
- FDCS->dtr = raw_cmd->rate & 3;
+ fdc_state[current_fdc].dtr = raw_cmd->rate & 3;
return fd_wait_for_completion(jiffies + 2UL * HZ / 100, floppy_ready);
} /* fdc_dtr */
static void tell_sector(void)
{
pr_cont(": track %d, head %d, sector %d, size %d",
- R_TRACK, R_HEAD, R_SECTOR, R_SIZECODE);
+ reply_buffer[R_TRACK], reply_buffer[R_HEAD],
+ reply_buffer[R_SECTOR],
+ reply_buffer[R_SIZECODE]);
} /* tell_sector */
static void print_errors(void)
{
DPRINT("");
- if (ST0 & ST0_ECE) {
+ if (reply_buffer[ST0] & ST0_ECE) {
pr_cont("Recalibrate failed!");
- } else if (ST2 & ST2_CRC) {
+ } else if (reply_buffer[ST2] & ST2_CRC) {
pr_cont("data CRC error");
tell_sector();
- } else if (ST1 & ST1_CRC) {
+ } else if (reply_buffer[ST1] & ST1_CRC) {
pr_cont("CRC error");
tell_sector();
- } else if ((ST1 & (ST1_MAM | ST1_ND)) ||
- (ST2 & ST2_MAM)) {
+ } else if ((reply_buffer[ST1] & (ST1_MAM | ST1_ND)) ||
+ (reply_buffer[ST2] & ST2_MAM)) {
if (!probing) {
pr_cont("sector not found");
tell_sector();
} else
pr_cont("probe failed...");
- } else if (ST2 & ST2_WC) { /* seek error */
+ } else if (reply_buffer[ST2] & ST2_WC) { /* seek error */
pr_cont("wrong cylinder");
- } else if (ST2 & ST2_BC) { /* cylinder marked as bad */
+ } else if (reply_buffer[ST2] & ST2_BC) { /* cylinder marked as bad */
pr_cont("bad cylinder");
} else {
pr_cont("unknown error. ST[0..2] are: 0x%x 0x%x 0x%x",
- ST0, ST1, ST2);
+ reply_buffer[ST0], reply_buffer[ST1],
+ reply_buffer[ST2]);
tell_sector();
}
pr_cont("\n");
@@ -1411,33 +1429,35 @@ static int interpret_errors(void)
if (inr != 7) {
DPRINT("-- FDC reply error\n");
- FDCS->reset = 1;
+ fdc_state[current_fdc].reset = 1;
return 1;
}
/* check IC to find cause of interrupt */
- switch (ST0 & ST0_INTR) {
+ switch (reply_buffer[ST0] & ST0_INTR) {
case 0x40: /* error occurred during command execution */
- if (ST1 & ST1_EOC)
+ if (reply_buffer[ST1] & ST1_EOC)
return 0; /* occurs with pseudo-DMA */
bad = 1;
- if (ST1 & ST1_WP) {
+ if (reply_buffer[ST1] & ST1_WP) {
DPRINT("Drive is write protected\n");
- clear_bit(FD_DISK_WRITABLE_BIT, &DRS->flags);
+ clear_bit(FD_DISK_WRITABLE_BIT,
+ &drive_state[current_drive].flags);
cont->done(0);
bad = 2;
- } else if (ST1 & ST1_ND) {
- set_bit(FD_NEED_TWADDLE_BIT, &DRS->flags);
- } else if (ST1 & ST1_OR) {
- if (DP->flags & FTD_MSG)
+ } else if (reply_buffer[ST1] & ST1_ND) {
+ set_bit(FD_NEED_TWADDLE_BIT,
+ &drive_state[current_drive].flags);
+ } else if (reply_buffer[ST1] & ST1_OR) {
+ if (drive_params[current_drive].flags & FTD_MSG)
DPRINT("Over/Underrun - retrying\n");
bad = 0;
- } else if (*errors >= DP->max_errors.reporting) {
+ } else if (*errors >= drive_params[current_drive].max_errors.reporting) {
print_errors();
}
- if (ST2 & ST2_WC || ST2 & ST2_BC)
+ if (reply_buffer[ST2] & ST2_WC || reply_buffer[ST2] & ST2_BC)
/* wrong cylinder => recal */
- DRS->track = NEED_2_RECAL;
+ drive_state[current_drive].track = NEED_2_RECAL;
return bad;
case 0x80: /* invalid command given */
DPRINT("Invalid FDC command given!\n");
@@ -1470,13 +1490,13 @@ static void setup_rw_floppy(void)
flags |= FD_RAW_INTR;
if ((flags & FD_RAW_SPIN) && !(flags & FD_RAW_NO_MOTOR)) {
- ready_date = DRS->spinup_date + DP->spinup;
+ ready_date = drive_state[current_drive].spinup_date + drive_params[current_drive].spinup;
/* If spinup will take a long time, rerun scandrives
* again just before spinup completion. Beware that
* after scandrives, we must again wait for selection.
*/
- if (time_after(ready_date, jiffies + DP->select_delay)) {
- ready_date -= DP->select_delay;
+ if (time_after(ready_date, jiffies + drive_params[current_drive].select_delay)) {
+ ready_date -= drive_params[current_drive].select_delay;
function = floppy_start;
} else
function = setup_rw_floppy;
@@ -1519,44 +1539,52 @@ static int blind_seek;
static void seek_interrupt(void)
{
debugt(__func__, "");
- if (inr != 2 || (ST0 & 0xF8) != 0x20) {
+ if (inr != 2 || (reply_buffer[ST0] & 0xF8) != 0x20) {
DPRINT("seek failed\n");
- DRS->track = NEED_2_RECAL;
+ drive_state[current_drive].track = NEED_2_RECAL;
cont->error();
cont->redo();
return;
}
- if (DRS->track >= 0 && DRS->track != ST1 && !blind_seek) {
- debug_dcl(DP->flags,
+ if (drive_state[current_drive].track >= 0 &&
+ drive_state[current_drive].track != reply_buffer[ST1] &&
+ !blind_seek) {
+ debug_dcl(drive_params[current_drive].flags,
"clearing NEWCHANGE flag because of effective seek\n");
- debug_dcl(DP->flags, "jiffies=%lu\n", jiffies);
- clear_bit(FD_DISK_NEWCHANGE_BIT, &DRS->flags);
+ debug_dcl(drive_params[current_drive].flags, "jiffies=%lu\n",
+ jiffies);
+ clear_bit(FD_DISK_NEWCHANGE_BIT,
+ &drive_state[current_drive].flags);
/* effective seek */
- DRS->select_date = jiffies;
+ drive_state[current_drive].select_date = jiffies;
}
- DRS->track = ST1;
+ drive_state[current_drive].track = reply_buffer[ST1];
floppy_ready();
}
static void check_wp(void)
{
- if (test_bit(FD_VERIFY_BIT, &DRS->flags)) {
+ if (test_bit(FD_VERIFY_BIT, &drive_state[current_drive].flags)) {
/* check write protection */
output_byte(FD_GETSTATUS);
output_byte(UNIT(current_drive));
if (result() != 1) {
- FDCS->reset = 1;
+ fdc_state[current_fdc].reset = 1;
return;
}
- clear_bit(FD_VERIFY_BIT, &DRS->flags);
- clear_bit(FD_NEED_TWADDLE_BIT, &DRS->flags);
- debug_dcl(DP->flags,
+ clear_bit(FD_VERIFY_BIT, &drive_state[current_drive].flags);
+ clear_bit(FD_NEED_TWADDLE_BIT,
+ &drive_state[current_drive].flags);
+ debug_dcl(drive_params[current_drive].flags,
"checking whether disk is write protected\n");
- debug_dcl(DP->flags, "wp=%x\n", ST3 & 0x40);
- if (!(ST3 & 0x40))
- set_bit(FD_DISK_WRITABLE_BIT, &DRS->flags);
+ debug_dcl(drive_params[current_drive].flags, "wp=%x\n",
+ reply_buffer[ST3] & 0x40);
+ if (!(reply_buffer[ST3] & 0x40))
+ set_bit(FD_DISK_WRITABLE_BIT,
+ &drive_state[current_drive].flags);
else
- clear_bit(FD_DISK_WRITABLE_BIT, &DRS->flags);
+ clear_bit(FD_DISK_WRITABLE_BIT,
+ &drive_state[current_drive].flags);
}
}
@@ -1566,32 +1594,34 @@ static void seek_floppy(void)
blind_seek = 0;
- debug_dcl(DP->flags, "calling disk change from %s\n", __func__);
+ debug_dcl(drive_params[current_drive].flags,
+ "calling disk change from %s\n", __func__);
- if (!test_bit(FD_DISK_NEWCHANGE_BIT, &DRS->flags) &&
+ if (!test_bit(FD_DISK_NEWCHANGE_BIT, &drive_state[current_drive].flags) &&
disk_change(current_drive) && (raw_cmd->flags & FD_RAW_NEED_DISK)) {
/* the media changed flag should be cleared after the seek.
* If it isn't, this means that there is really no disk in
* the drive.
*/
- set_bit(FD_DISK_CHANGED_BIT, &DRS->flags);
+ set_bit(FD_DISK_CHANGED_BIT,
+ &drive_state[current_drive].flags);
cont->done(0);
cont->redo();
return;
}
- if (DRS->track <= NEED_1_RECAL) {
+ if (drive_state[current_drive].track <= NEED_1_RECAL) {
recalibrate_floppy();
return;
- } else if (test_bit(FD_DISK_NEWCHANGE_BIT, &DRS->flags) &&
+ } else if (test_bit(FD_DISK_NEWCHANGE_BIT, &drive_state[current_drive].flags) &&
(raw_cmd->flags & FD_RAW_NEED_DISK) &&
- (DRS->track <= NO_TRACK || DRS->track == raw_cmd->track)) {
+ (drive_state[current_drive].track <= NO_TRACK || drive_state[current_drive].track == raw_cmd->track)) {
/* we seek to clear the media-changed condition. Does anybody
* know a more elegant way, which works on all drives? */
if (raw_cmd->track)
track = raw_cmd->track - 1;
else {
- if (DP->flags & FD_SILENT_DCL_CLEAR) {
- set_dor(fdc, ~(0x10 << UNIT(current_drive)), 0);
+ if (drive_params[current_drive].flags & FD_SILENT_DCL_CLEAR) {
+ set_dor(current_fdc, ~(0x10 << UNIT(current_drive)), 0);
blind_seek = 1;
raw_cmd->flags |= FD_RAW_NEED_SEEK;
}
@@ -1599,7 +1629,7 @@ static void seek_floppy(void)
}
} else {
check_wp();
- if (raw_cmd->track != DRS->track &&
+ if (raw_cmd->track != drive_state[current_drive].track &&
(raw_cmd->flags & FD_RAW_NEED_SEEK))
track = raw_cmd->track;
else {
@@ -1622,9 +1652,9 @@ static void recal_interrupt(void)
{
debugt(__func__, "");
if (inr != 2)
- FDCS->reset = 1;
- else if (ST0 & ST0_ECE) {
- switch (DRS->track) {
+ fdc_state[current_fdc].reset = 1;
+ else if (reply_buffer[ST0] & ST0_ECE) {
+ switch (drive_state[current_drive].track) {
case NEED_1_RECAL:
debugt(__func__, "need 1 recal");
/* after a second recalibrate, we still haven't
@@ -1642,11 +1672,12 @@ static void recal_interrupt(void)
* not to move at recalibration is to
* be already at track 0.) Clear the
* new change flag */
- debug_dcl(DP->flags,
+ debug_dcl(drive_params[current_drive].flags,
"clearing NEWCHANGE flag because of second recalibrate\n");
- clear_bit(FD_DISK_NEWCHANGE_BIT, &DRS->flags);
- DRS->select_date = jiffies;
+ clear_bit(FD_DISK_NEWCHANGE_BIT,
+ &drive_state[current_drive].flags);
+ drive_state[current_drive].select_date = jiffies;
/* fall through */
default:
debugt(__func__, "default");
@@ -1656,11 +1687,11 @@ static void recal_interrupt(void)
* track 0, this might mean that we
* started beyond track 80. Try
* again. */
- DRS->track = NEED_1_RECAL;
+ drive_state[current_drive].track = NEED_1_RECAL;
break;
}
} else
- DRS->track = ST1;
+ drive_state[current_drive].track = reply_buffer[ST1];
floppy_ready();
}
@@ -1690,20 +1721,20 @@ irqreturn_t floppy_interrupt(int irq, void *dev_id)
release_dma_lock(f);
do_floppy = NULL;
- if (fdc >= N_FDC || FDCS->address == -1) {
+ if (current_fdc >= N_FDC || fdc_state[current_fdc].address == -1) {
/* we don't even know which FDC is the culprit */
pr_info("DOR0=%x\n", fdc_state[0].dor);
- pr_info("floppy interrupt on bizarre fdc %d\n", fdc);
+ pr_info("floppy interrupt on bizarre fdc %d\n", current_fdc);
pr_info("handler=%ps\n", handler);
is_alive(__func__, "bizarre fdc");
return IRQ_NONE;
}
- FDCS->reset = 0;
+ fdc_state[current_fdc].reset = 0;
/* We have to clear the reset flag here, because apparently on boxes
* with level triggered interrupts (PS/2, Sparc, ...), it is needed to
- * emit SENSEI's to clear the interrupt line. And FDCS->reset blocks the
- * emission of the SENSEI's.
+ * emit SENSEI's to clear the interrupt line. And fdc_state[fdc].reset
+ * blocks the emission of the SENSEI's.
* It is OK to emit floppy commands because we are in an interrupt
* handler here, and thus we have to fear no interference of other
* activity.
@@ -1722,11 +1753,11 @@ irqreturn_t floppy_interrupt(int irq, void *dev_id)
if (do_print)
print_result("sensei", inr);
max_sensei--;
- } while ((ST0 & 0x83) != UNIT(current_drive) &&
+ } while ((reply_buffer[ST0] & 0x83) != UNIT(current_drive) &&
inr == 2 && max_sensei);
}
if (!handler) {
- FDCS->reset = 1;
+ fdc_state[current_fdc].reset = 1;
return IRQ_NONE;
}
schedule_bh(handler);
@@ -1752,7 +1783,7 @@ static void reset_interrupt(void)
{
debugt(__func__, "");
result(); /* get the status ready for set_fdc */
- if (FDCS->reset) {
+ if (fdc_state[current_fdc].reset) {
pr_info("reset set in interrupt, calling %ps\n", cont->error);
cont->error(); /* a reset just after a reset. BAD! */
}
@@ -1768,7 +1799,7 @@ static void reset_fdc(void)
unsigned long flags;
do_floppy = reset_interrupt;
- FDCS->reset = 0;
+ fdc_state[current_fdc].reset = 0;
reset_fdc_info(0);
/* Pseudo-DMA may intercept 'reset finished' interrupt. */
@@ -1778,12 +1809,13 @@ static void reset_fdc(void)
fd_disable_dma();
release_dma_lock(flags);
- if (FDCS->version >= FDC_82072A)
- fd_outb(0x80 | (FDCS->dtr & 3), FD_STATUS);
+ if (fdc_state[current_fdc].version >= FDC_82072A)
+ fdc_outb(0x80 | (fdc_state[current_fdc].dtr & 3),
+ current_fdc, FD_STATUS);
else {
- fd_outb(FDCS->dor & ~0x04, FD_DOR);
+ fdc_outb(fdc_state[current_fdc].dor & ~0x04, current_fdc, FD_DOR);
udelay(FD_RESET_DELAY);
- fd_outb(FDCS->dor, FD_DOR);
+ fdc_outb(fdc_state[current_fdc].dor, current_fdc, FD_DOR);
}
}
@@ -1810,7 +1842,7 @@ static void show_floppy(void)
print_hex_dump(KERN_INFO, "", DUMP_PREFIX_NONE, 16, 1,
reply_buffer, resultsize, true);
- pr_info("status=%x\n", fd_inb(FD_STATUS));
+ pr_info("status=%x\n", fdc_inb(current_fdc, FD_STATUS));
pr_info("fdc_busy=%lu\n", fdc_busy);
if (do_floppy)
pr_info("do_floppy=%ps\n", do_floppy);
@@ -1847,7 +1879,7 @@ static void floppy_shutdown(struct work_struct *arg)
if (initialized)
DPRINT("floppy timeout called\n");
- FDCS->reset = 1;
+ fdc_state[current_fdc].reset = 1;
if (cont) {
cont->done(0);
cont->redo(); /* this will recall reset when needed */
@@ -1867,29 +1899,29 @@ static int start_motor(void (*function)(void))
mask = 0xfc;
data = UNIT(current_drive);
if (!(raw_cmd->flags & FD_RAW_NO_MOTOR)) {
- if (!(FDCS->dor & (0x10 << UNIT(current_drive)))) {
+ if (!(fdc_state[current_fdc].dor & (0x10 << UNIT(current_drive)))) {
set_debugt();
/* no read since this drive is running */
- DRS->first_read_date = 0;
+ drive_state[current_drive].first_read_date = 0;
/* note motor start time if motor is not yet running */
- DRS->spinup_date = jiffies;
+ drive_state[current_drive].spinup_date = jiffies;
data |= (0x10 << UNIT(current_drive));
}
- } else if (FDCS->dor & (0x10 << UNIT(current_drive)))
+ } else if (fdc_state[current_fdc].dor & (0x10 << UNIT(current_drive)))
mask &= ~(0x10 << UNIT(current_drive));
/* starts motor and selects floppy */
del_timer(motor_off_timer + current_drive);
- set_dor(fdc, mask, data);
+ set_dor(current_fdc, mask, data);
/* wait_for_completion also schedules reset if needed. */
- return fd_wait_for_completion(DRS->select_date + DP->select_delay,
+ return fd_wait_for_completion(drive_state[current_drive].select_date + drive_params[current_drive].select_delay,
function);
}
static void floppy_ready(void)
{
- if (FDCS->reset) {
+ if (fdc_state[current_fdc].reset) {
reset_fdc();
return;
}
@@ -1898,9 +1930,10 @@ static void floppy_ready(void)
if (fdc_dtr())
return;
- debug_dcl(DP->flags, "calling disk change from floppy_ready\n");
+ debug_dcl(drive_params[current_drive].flags,
+ "calling disk change from floppy_ready\n");
if (!(raw_cmd->flags & FD_RAW_NO_MOTOR) &&
- disk_change(current_drive) && !DP->select_delay)
+ disk_change(current_drive) && !drive_params[current_drive].select_delay)
twaddle(); /* this clears the dcl on certain
* drive/controller combinations */
@@ -1929,8 +1962,9 @@ static void floppy_start(void)
reschedule_timeout(current_reqD, "floppy start");
scandrives();
- debug_dcl(DP->flags, "setting NEWCHANGE in floppy_start\n");
- set_bit(FD_DISK_NEWCHANGE_BIT, &DRS->flags);
+ debug_dcl(drive_params[current_drive].flags,
+ "setting NEWCHANGE in floppy_start\n");
+ set_bit(FD_DISK_NEWCHANGE_BIT, &drive_state[current_drive].flags);
floppy_ready();
}
@@ -1988,7 +2022,7 @@ static int wait_til_done(void (*handler)(void), bool interruptible)
return -EINTR;
}
- if (FDCS->reset)
+ if (fdc_state[current_fdc].reset)
command_status = FD_COMMAND_ERROR;
if (command_status == FD_COMMAND_OKAY)
ret = 0;
@@ -2029,14 +2063,14 @@ static int next_valid_format(void)
{
int probed_format;
- probed_format = DRS->probed_format;
+ probed_format = drive_state[current_drive].probed_format;
while (1) {
- if (probed_format >= 8 || !DP->autodetect[probed_format]) {
- DRS->probed_format = 0;
+ if (probed_format >= 8 || !drive_params[current_drive].autodetect[probed_format]) {
+ drive_state[current_drive].probed_format = 0;
return 1;
}
- if (floppy_type[DP->autodetect[probed_format]].sect) {
- DRS->probed_format = probed_format;
+ if (floppy_type[drive_params[current_drive].autodetect[probed_format]].sect) {
+ drive_state[current_drive].probed_format = probed_format;
return 0;
}
probed_format++;
@@ -2048,23 +2082,23 @@ static void bad_flp_intr(void)
int err_count;
if (probing) {
- DRS->probed_format++;
+ drive_state[current_drive].probed_format++;
if (!next_valid_format())
return;
}
err_count = ++(*errors);
- INFBOUND(DRWE->badness, err_count);
- if (err_count > DP->max_errors.abort)
+ INFBOUND(write_errors[current_drive].badness, err_count);
+ if (err_count > drive_params[current_drive].max_errors.abort)
cont->done(0);
- if (err_count > DP->max_errors.reset)
- FDCS->reset = 1;
- else if (err_count > DP->max_errors.recal)
- DRS->track = NEED_2_RECAL;
+ if (err_count > drive_params[current_drive].max_errors.reset)
+ fdc_state[current_fdc].reset = 1;
+ else if (err_count > drive_params[current_drive].max_errors.recal)
+ drive_state[current_drive].track = NEED_2_RECAL;
}
static void set_floppy(int drive)
{
- int type = ITYPE(UDRS->fd_device);
+ int type = ITYPE(drive_state[drive].fd_device);
if (type)
_floppy = floppy_type + type;
@@ -2110,28 +2144,28 @@ static void setup_format_params(int track)
FD_RAW_NEED_DISK | FD_RAW_NEED_SEEK);
raw_cmd->rate = _floppy->rate & 0x43;
raw_cmd->cmd_count = NR_F;
- COMMAND = FM_MODE(_floppy, FD_FORMAT);
- DR_SELECT = UNIT(current_drive) + PH_HEAD(_floppy, format_req.head);
- F_SIZECODE = FD_SIZECODE(_floppy);
- F_SECT_PER_TRACK = _floppy->sect << 2 >> F_SIZECODE;
- F_GAP = _floppy->fmt_gap;
- F_FILL = FD_FILL_BYTE;
+ raw_cmd->cmd[COMMAND] = FM_MODE(_floppy, FD_FORMAT);
+ raw_cmd->cmd[DR_SELECT] = UNIT(current_drive) + PH_HEAD(_floppy, format_req.head);
+ raw_cmd->cmd[F_SIZECODE] = FD_SIZECODE(_floppy);
+ raw_cmd->cmd[F_SECT_PER_TRACK] = _floppy->sect << 2 >> raw_cmd->cmd[F_SIZECODE];
+ raw_cmd->cmd[F_GAP] = _floppy->fmt_gap;
+ raw_cmd->cmd[F_FILL] = FD_FILL_BYTE;
raw_cmd->kernel_data = floppy_track_buffer;
- raw_cmd->length = 4 * F_SECT_PER_TRACK;
+ raw_cmd->length = 4 * raw_cmd->cmd[F_SECT_PER_TRACK];
- if (!F_SECT_PER_TRACK)
+ if (!raw_cmd->cmd[F_SECT_PER_TRACK])
return;
/* allow for about 30ms for data transport per track */
- head_shift = (F_SECT_PER_TRACK + 5) / 6;
+ head_shift = (raw_cmd->cmd[F_SECT_PER_TRACK] + 5) / 6;
/* a ``cylinder'' is two tracks plus a little stepping time */
track_shift = 2 * head_shift + 3;
/* position of logical sector 1 on this track */
n = (track_shift * format_req.track + head_shift * format_req.head)
- % F_SECT_PER_TRACK;
+ % raw_cmd->cmd[F_SECT_PER_TRACK];
/* determine interleave */
il = 1;
@@ -2139,27 +2173,27 @@ static void setup_format_params(int track)
il++;
/* initialize field */
- for (count = 0; count < F_SECT_PER_TRACK; ++count) {
+ for (count = 0; count < raw_cmd->cmd[F_SECT_PER_TRACK]; ++count) {
here[count].track = format_req.track;
here[count].head = format_req.head;
here[count].sect = 0;
- here[count].size = F_SIZECODE;
+ here[count].size = raw_cmd->cmd[F_SIZECODE];
}
/* place logical sectors */
- for (count = 1; count <= F_SECT_PER_TRACK; ++count) {
+ for (count = 1; count <= raw_cmd->cmd[F_SECT_PER_TRACK]; ++count) {
here[n].sect = count;
- n = (n + il) % F_SECT_PER_TRACK;
+ n = (n + il) % raw_cmd->cmd[F_SECT_PER_TRACK];
if (here[n].sect) { /* sector busy, find next free sector */
++n;
- if (n >= F_SECT_PER_TRACK) {
- n -= F_SECT_PER_TRACK;
+ if (n >= raw_cmd->cmd[F_SECT_PER_TRACK]) {
+ n -= raw_cmd->cmd[F_SECT_PER_TRACK];
while (here[n].sect)
++n;
}
}
}
if (_floppy->stretch & FD_SECTBASEMASK) {
- for (count = 0; count < F_SECT_PER_TRACK; count++)
+ for (count = 0; count < raw_cmd->cmd[F_SECT_PER_TRACK]; count++)
here[count].sect += FD_SECTBASE(_floppy) - 1;
}
}
@@ -2188,7 +2222,7 @@ static int do_format(int drive, struct format_descr *tmp_format_req)
set_floppy(drive);
if (!_floppy ||
- _floppy->track > DP->tracks ||
+ _floppy->track > drive_params[current_drive].tracks ||
tmp_format_req->track >= _floppy->track ||
tmp_format_req->head >= _floppy->head ||
(_floppy->sect << 2) % (1 << FD_SIZECODE(_floppy)) ||
@@ -2250,21 +2284,21 @@ static void request_done(int uptodate)
/* maintain values for invalidation on geometry
* change */
block = current_count_sectors + blk_rq_pos(req);
- INFBOUND(DRS->maxblock, block);
+ INFBOUND(drive_state[current_drive].maxblock, block);
if (block > _floppy->sect)
- DRS->maxtrack = 1;
+ drive_state[current_drive].maxtrack = 1;
floppy_end_request(req, 0);
} else {
if (rq_data_dir(req) == WRITE) {
/* record write error information */
- DRWE->write_errors++;
- if (DRWE->write_errors == 1) {
- DRWE->first_error_sector = blk_rq_pos(req);
- DRWE->first_error_generation = DRS->generation;
+ write_errors[current_drive].write_errors++;
+ if (write_errors[current_drive].write_errors == 1) {
+ write_errors[current_drive].first_error_sector = blk_rq_pos(req);
+ write_errors[current_drive].first_error_generation = drive_state[current_drive].generation;
}
- DRWE->last_error_sector = blk_rq_pos(req);
- DRWE->last_error_generation = DRS->generation;
+ write_errors[current_drive].last_error_sector = blk_rq_pos(req);
+ write_errors[current_drive].last_error_generation = drive_state[current_drive].generation;
}
floppy_end_request(req, BLK_STS_IOERR);
}
@@ -2278,43 +2312,46 @@ static void rw_interrupt(void)
int heads;
int nr_sectors;
- if (R_HEAD >= 2) {
+ if (reply_buffer[R_HEAD] >= 2) {
/* some Toshiba floppy controllers occasionnally seem to
* return bogus interrupts after read/write operations, which
* can be recognized by a bad head number (>= 2) */
return;
}
- if (!DRS->first_read_date)
- DRS->first_read_date = jiffies;
+ if (!drive_state[current_drive].first_read_date)
+ drive_state[current_drive].first_read_date = jiffies;
nr_sectors = 0;
- ssize = DIV_ROUND_UP(1 << SIZECODE, 4);
+ ssize = DIV_ROUND_UP(1 << raw_cmd->cmd[SIZECODE], 4);
- if (ST1 & ST1_EOC)
+ if (reply_buffer[ST1] & ST1_EOC)
eoc = 1;
else
eoc = 0;
- if (COMMAND & 0x80)
+ if (raw_cmd->cmd[COMMAND] & 0x80)
heads = 2;
else
heads = 1;
- nr_sectors = (((R_TRACK - TRACK) * heads +
- R_HEAD - HEAD) * SECT_PER_TRACK +
- R_SECTOR - SECTOR + eoc) << SIZECODE >> 2;
+ nr_sectors = (((reply_buffer[R_TRACK] - raw_cmd->cmd[TRACK]) * heads +
+ reply_buffer[R_HEAD] - raw_cmd->cmd[HEAD]) * raw_cmd->cmd[SECT_PER_TRACK] +
+ reply_buffer[R_SECTOR] - raw_cmd->cmd[SECTOR] + eoc) << raw_cmd->cmd[SIZECODE] >> 2;
if (nr_sectors / ssize >
DIV_ROUND_UP(in_sector_offset + current_count_sectors, ssize)) {
DPRINT("long rw: %x instead of %lx\n",
nr_sectors, current_count_sectors);
- pr_info("rs=%d s=%d\n", R_SECTOR, SECTOR);
- pr_info("rh=%d h=%d\n", R_HEAD, HEAD);
- pr_info("rt=%d t=%d\n", R_TRACK, TRACK);
+ pr_info("rs=%d s=%d\n", reply_buffer[R_SECTOR],
+ raw_cmd->cmd[SECTOR]);
+ pr_info("rh=%d h=%d\n", reply_buffer[R_HEAD],
+ raw_cmd->cmd[HEAD]);
+ pr_info("rt=%d t=%d\n", reply_buffer[R_TRACK],
+ raw_cmd->cmd[TRACK]);
pr_info("heads=%d eoc=%d\n", heads, eoc);
pr_info("spt=%d st=%d ss=%d\n",
- SECT_PER_TRACK, fsector_t, ssize);
+ raw_cmd->cmd[SECT_PER_TRACK], fsector_t, ssize);
pr_info("in_sector_offset=%d\n", in_sector_offset);
}
@@ -2344,7 +2381,7 @@ static void rw_interrupt(void)
}
if (probing) {
- if (DP->flags & FTD_MSG)
+ if (drive_params[current_drive].flags & FTD_MSG)
DPRINT("Auto-detected floppy type %s in fd%d\n",
_floppy->name, current_drive);
current_type[current_drive] = _floppy;
@@ -2352,11 +2389,11 @@ static void rw_interrupt(void)
probing = 0;
}
- if (CT(COMMAND) != FD_READ ||
+ if (CT(raw_cmd->cmd[COMMAND]) != FD_READ ||
raw_cmd->kernel_data == bio_data(current_req->bio)) {
/* transfer directly from buffer */
cont->done(1);
- } else if (CT(COMMAND) == FD_READ) {
+ } else if (CT(raw_cmd->cmd[COMMAND]) == FD_READ) {
buffer_track = raw_cmd->track;
buffer_drive = current_drive;
INFBOUND(buffer_max, nr_sectors + fsector_t);
@@ -2415,13 +2452,13 @@ static void copy_buffer(int ssize, int max_sector, int max_sector_2)
min(max_sector, max_sector_2),
blk_rq_sectors(current_req));
- if (current_count_sectors <= 0 && CT(COMMAND) == FD_WRITE &&
+ if (current_count_sectors <= 0 && CT(raw_cmd->cmd[COMMAND]) == FD_WRITE &&
buffer_max > fsector_t + blk_rq_sectors(current_req))
current_count_sectors = min_t(int, buffer_max - fsector_t,
blk_rq_sectors(current_req));
remaining = current_count_sectors << 9;
- if (remaining > blk_rq_bytes(current_req) && CT(COMMAND) == FD_WRITE) {
+ if (remaining > blk_rq_bytes(current_req) && CT(raw_cmd->cmd[COMMAND]) == FD_WRITE) {
DPRINT("in copy buffer\n");
pr_info("current_count_sectors=%ld\n", current_count_sectors);
pr_info("remaining=%d\n", remaining >> 9);
@@ -2456,16 +2493,16 @@ static void copy_buffer(int ssize, int max_sector, int max_sector_2)
fsector_t, buffer_min);
pr_info("current_count_sectors=%ld\n",
current_count_sectors);
- if (CT(COMMAND) == FD_READ)
+ if (CT(raw_cmd->cmd[COMMAND]) == FD_READ)
pr_info("read\n");
- if (CT(COMMAND) == FD_WRITE)
+ if (CT(raw_cmd->cmd[COMMAND]) == FD_WRITE)
pr_info("write\n");
break;
}
if (((unsigned long)buffer) % 512)
DPRINT("%p buffer not aligned\n", buffer);
- if (CT(COMMAND) == FD_READ)
+ if (CT(raw_cmd->cmd[COMMAND]) == FD_READ)
memcpy(buffer, dma_buffer, size);
else
memcpy(dma_buffer, buffer, size);
@@ -2483,7 +2520,7 @@ static void copy_buffer(int ssize, int max_sector, int max_sector_2)
/* work around a bug in pseudo DMA
* (on some FDCs) pseudo DMA does not stop when the CPU stops
* sending data. Hence we need a different way to signal the
- * transfer length: We use SECT_PER_TRACK. Unfortunately, this
+ * transfer length: We use raw_cmd->cmd[SECT_PER_TRACK]. Unfortunately, this
* does not work with MT, hence we can only transfer one head at
* a time
*/
@@ -2492,18 +2529,18 @@ static void virtualdmabug_workaround(void)
int hard_sectors;
int end_sector;
- if (CT(COMMAND) == FD_WRITE) {
- COMMAND &= ~0x80; /* switch off multiple track mode */
+ if (CT(raw_cmd->cmd[COMMAND]) == FD_WRITE) {
+ raw_cmd->cmd[COMMAND] &= ~0x80; /* switch off multiple track mode */
- hard_sectors = raw_cmd->length >> (7 + SIZECODE);
- end_sector = SECTOR + hard_sectors - 1;
- if (end_sector > SECT_PER_TRACK) {
+ hard_sectors = raw_cmd->length >> (7 + raw_cmd->cmd[SIZECODE]);
+ end_sector = raw_cmd->cmd[SECTOR] + hard_sectors - 1;
+ if (end_sector > raw_cmd->cmd[SECT_PER_TRACK]) {
pr_info("too many sectors %d > %d\n",
- end_sector, SECT_PER_TRACK);
+ end_sector, raw_cmd->cmd[SECT_PER_TRACK]);
return;
}
- SECT_PER_TRACK = end_sector;
- /* make sure SECT_PER_TRACK
+ raw_cmd->cmd[SECT_PER_TRACK] = end_sector;
+ /* make sure raw_cmd->cmd[SECT_PER_TRACK]
* points to end of transfer */
}
}
@@ -2536,10 +2573,10 @@ static int make_raw_rw_request(void)
raw_cmd->cmd_count = NR_RW;
if (rq_data_dir(current_req) == READ) {
raw_cmd->flags |= FD_RAW_READ;
- COMMAND = FM_MODE(_floppy, FD_READ);
+ raw_cmd->cmd[COMMAND] = FM_MODE(_floppy, FD_READ);
} else if (rq_data_dir(current_req) == WRITE) {
raw_cmd->flags |= FD_RAW_WRITE;
- COMMAND = FM_MODE(_floppy, FD_WRITE);
+ raw_cmd->cmd[COMMAND] = FM_MODE(_floppy, FD_WRITE);
} else {
DPRINT("%s: unknown command\n", __func__);
return 0;
@@ -2547,24 +2584,24 @@ static int make_raw_rw_request(void)
max_sector = _floppy->sect * _floppy->head;
- TRACK = (int)blk_rq_pos(current_req) / max_sector;
+ raw_cmd->cmd[TRACK] = (int)blk_rq_pos(current_req) / max_sector;
fsector_t = (int)blk_rq_pos(current_req) % max_sector;
- if (_floppy->track && TRACK >= _floppy->track) {
+ if (_floppy->track && raw_cmd->cmd[TRACK] >= _floppy->track) {
if (blk_rq_cur_sectors(current_req) & 1) {
current_count_sectors = 1;
return 1;
} else
return 0;
}
- HEAD = fsector_t / _floppy->sect;
+ raw_cmd->cmd[HEAD] = fsector_t / _floppy->sect;
if (((_floppy->stretch & (FD_SWAPSIDES | FD_SECTBASEMASK)) ||
- test_bit(FD_NEED_TWADDLE_BIT, &DRS->flags)) &&
+ test_bit(FD_NEED_TWADDLE_BIT, &drive_state[current_drive].flags)) &&
fsector_t < _floppy->sect)
max_sector = _floppy->sect;
/* 2M disks have phantom sectors on the first track */
- if ((_floppy->rate & FD_2M) && (!TRACK) && (!HEAD)) {
+ if ((_floppy->rate & FD_2M) && (!raw_cmd->cmd[TRACK]) && (!raw_cmd->cmd[HEAD])) {
max_sector = 2 * _floppy->sect / 3;
if (fsector_t >= max_sector) {
current_count_sectors =
@@ -2572,23 +2609,24 @@ static int make_raw_rw_request(void)
blk_rq_sectors(current_req));
return 1;
}
- SIZECODE = 2;
+ raw_cmd->cmd[SIZECODE] = 2;
} else
- SIZECODE = FD_SIZECODE(_floppy);
+ raw_cmd->cmd[SIZECODE] = FD_SIZECODE(_floppy);
raw_cmd->rate = _floppy->rate & 0x43;
- if ((_floppy->rate & FD_2M) && (TRACK || HEAD) && raw_cmd->rate == 2)
+ if ((_floppy->rate & FD_2M) &&
+ (raw_cmd->cmd[TRACK] || raw_cmd->cmd[HEAD]) && raw_cmd->rate == 2)
raw_cmd->rate = 1;
- if (SIZECODE)
- SIZECODE2 = 0xff;
+ if (raw_cmd->cmd[SIZECODE])
+ raw_cmd->cmd[SIZECODE2] = 0xff;
else
- SIZECODE2 = 0x80;
- raw_cmd->track = TRACK << STRETCH(_floppy);
- DR_SELECT = UNIT(current_drive) + PH_HEAD(_floppy, HEAD);
- GAP = _floppy->gap;
- ssize = DIV_ROUND_UP(1 << SIZECODE, 4);
- SECT_PER_TRACK = _floppy->sect << 2 >> SIZECODE;
- SECTOR = ((fsector_t % _floppy->sect) << 2 >> SIZECODE) +
+ raw_cmd->cmd[SIZECODE2] = 0x80;
+ raw_cmd->track = raw_cmd->cmd[TRACK] << STRETCH(_floppy);
+ raw_cmd->cmd[DR_SELECT] = UNIT(current_drive) + PH_HEAD(_floppy, raw_cmd->cmd[HEAD]);
+ raw_cmd->cmd[GAP] = _floppy->gap;
+ ssize = DIV_ROUND_UP(1 << raw_cmd->cmd[SIZECODE], 4);
+ raw_cmd->cmd[SECT_PER_TRACK] = _floppy->sect << 2 >> raw_cmd->cmd[SIZECODE];
+ raw_cmd->cmd[SECTOR] = ((fsector_t % _floppy->sect) << 2 >> raw_cmd->cmd[SIZECODE]) +
FD_SECTBASE(_floppy);
/* tracksize describes the size which can be filled up with sectors
@@ -2596,24 +2634,24 @@ static int make_raw_rw_request(void)
*/
tracksize = _floppy->sect - _floppy->sect % ssize;
if (tracksize < _floppy->sect) {
- SECT_PER_TRACK++;
+ raw_cmd->cmd[SECT_PER_TRACK]++;
if (tracksize <= fsector_t % _floppy->sect)
- SECTOR--;
+ raw_cmd->cmd[SECTOR]--;
/* if we are beyond tracksize, fill up using smaller sectors */
while (tracksize <= fsector_t % _floppy->sect) {
while (tracksize + ssize > _floppy->sect) {
- SIZECODE--;
+ raw_cmd->cmd[SIZECODE]--;
ssize >>= 1;
}
- SECTOR++;
- SECT_PER_TRACK++;
+ raw_cmd->cmd[SECTOR]++;
+ raw_cmd->cmd[SECT_PER_TRACK]++;
tracksize += ssize;
}
- max_sector = HEAD * _floppy->sect + tracksize;
- } else if (!TRACK && !HEAD && !(_floppy->rate & FD_2M) && probing) {
+ max_sector = raw_cmd->cmd[HEAD] * _floppy->sect + tracksize;
+ } else if (!raw_cmd->cmd[TRACK] && !raw_cmd->cmd[HEAD] && !(_floppy->rate & FD_2M) && probing) {
max_sector = _floppy->sect;
- } else if (!HEAD && CT(COMMAND) == FD_WRITE) {
+ } else if (!raw_cmd->cmd[HEAD] && CT(raw_cmd->cmd[COMMAND]) == FD_WRITE) {
/* for virtual DMA bug workaround */
max_sector = _floppy->sect;
}
@@ -2625,12 +2663,12 @@ static int make_raw_rw_request(void)
(current_drive == buffer_drive) &&
(fsector_t >= buffer_min) && (fsector_t < buffer_max)) {
/* data already in track buffer */
- if (CT(COMMAND) == FD_READ) {
+ if (CT(raw_cmd->cmd[COMMAND]) == FD_READ) {
copy_buffer(1, max_sector, buffer_max);
return 1;
}
} else if (in_sector_offset || blk_rq_sectors(current_req) < ssize) {
- if (CT(COMMAND) == FD_WRITE) {
+ if (CT(raw_cmd->cmd[COMMAND]) == FD_WRITE) {
unsigned int sectors;
sectors = fsector_t + blk_rq_sectors(current_req);
@@ -2641,7 +2679,7 @@ static int make_raw_rw_request(void)
}
raw_cmd->flags &= ~FD_RAW_WRITE;
raw_cmd->flags |= FD_RAW_READ;
- COMMAND = FM_MODE(_floppy, FD_READ);
+ raw_cmd->cmd[COMMAND] = FM_MODE(_floppy, FD_READ);
} else if ((unsigned long)bio_data(current_req->bio) < MAX_DMA_ADDRESS) {
unsigned long dma_limit;
int direct, indirect;
@@ -2674,9 +2712,9 @@ static int make_raw_rw_request(void)
*/
if (!direct ||
(indirect * 2 > direct * 3 &&
- *errors < DP->max_errors.read_track &&
+ *errors < drive_params[current_drive].max_errors.read_track &&
((!probing ||
- (DP->read_track & (1 << DRS->probed_format)))))) {
+ (drive_params[current_drive].read_track & (1 << drive_state[current_drive].probed_format)))))) {
max_size = blk_rq_sectors(current_req);
} else {
raw_cmd->kernel_data = bio_data(current_req->bio);
@@ -2692,7 +2730,7 @@ static int make_raw_rw_request(void)
}
}
- if (CT(COMMAND) == FD_READ)
+ if (CT(raw_cmd->cmd[COMMAND]) == FD_READ)
max_size = max_sector; /* unbounded */
/* claim buffer track if needed */
@@ -2700,7 +2738,7 @@ static int make_raw_rw_request(void)
buffer_drive != current_drive || /* bad drive */
fsector_t > buffer_max ||
fsector_t < buffer_min ||
- ((CT(COMMAND) == FD_READ ||
+ ((CT(raw_cmd->cmd[COMMAND]) == FD_READ ||
(!in_sector_offset && blk_rq_sectors(current_req) >= ssize)) &&
max_sector > 2 * max_buffer_sectors + buffer_min &&
max_size + fsector_t > 2 * max_buffer_sectors + buffer_min)) {
@@ -2712,7 +2750,7 @@ static int make_raw_rw_request(void)
raw_cmd->kernel_data = floppy_track_buffer +
((aligned_sector_t - buffer_min) << 9);
- if (CT(COMMAND) == FD_WRITE) {
+ if (CT(raw_cmd->cmd[COMMAND]) == FD_WRITE) {
/* copy write buffer to track buffer.
* if we get here, we know that the write
* is either aligned or the data already in the buffer
@@ -2734,10 +2772,10 @@ static int make_raw_rw_request(void)
raw_cmd->length <<= 9;
if ((raw_cmd->length < current_count_sectors << 9) ||
(raw_cmd->kernel_data != bio_data(current_req->bio) &&
- CT(COMMAND) == FD_WRITE &&
+ CT(raw_cmd->cmd[COMMAND]) == FD_WRITE &&
(aligned_sector_t + (raw_cmd->length >> 9) > buffer_max ||
aligned_sector_t < buffer_min)) ||
- raw_cmd->length % (128 << SIZECODE) ||
+ raw_cmd->length % (128 << raw_cmd->cmd[SIZECODE]) ||
raw_cmd->length <= 0 || current_count_sectors <= 0) {
DPRINT("fractionary current count b=%lx s=%lx\n",
raw_cmd->length, current_count_sectors);
@@ -2748,9 +2786,10 @@ static int make_raw_rw_request(void)
current_count_sectors);
pr_info("st=%d ast=%d mse=%d msi=%d\n",
fsector_t, aligned_sector_t, max_sector, max_size);
- pr_info("ssize=%x SIZECODE=%d\n", ssize, SIZECODE);
+ pr_info("ssize=%x SIZECODE=%d\n", ssize, raw_cmd->cmd[SIZECODE]);
pr_info("command=%x SECTOR=%d HEAD=%d, TRACK=%d\n",
- COMMAND, SECTOR, HEAD, TRACK);
+ raw_cmd->cmd[COMMAND], raw_cmd->cmd[SECTOR],
+ raw_cmd->cmd[HEAD], raw_cmd->cmd[TRACK]);
pr_info("buffer drive=%d\n", buffer_drive);
pr_info("buffer track=%d\n", buffer_track);
pr_info("buffer_min=%d\n", buffer_min);
@@ -2769,9 +2808,9 @@ static int make_raw_rw_request(void)
fsector_t, buffer_min, raw_cmd->length >> 9);
pr_info("current_count_sectors=%ld\n",
current_count_sectors);
- if (CT(COMMAND) == FD_READ)
+ if (CT(raw_cmd->cmd[COMMAND]) == FD_READ)
pr_info("read\n");
- if (CT(COMMAND) == FD_WRITE)
+ if (CT(raw_cmd->cmd[COMMAND]) == FD_WRITE)
pr_info("write\n");
return 0;
}
@@ -2838,14 +2877,14 @@ static void redo_fd_request(void)
disk_change(current_drive);
if (test_bit(current_drive, &fake_change) ||
- test_bit(FD_DISK_CHANGED_BIT, &DRS->flags)) {
+ test_bit(FD_DISK_CHANGED_BIT, &drive_state[current_drive].flags)) {
DPRINT("disk absent or changed during operation\n");
request_done(0);
goto do_request;
}
if (!_floppy) { /* Autodetection */
if (!probing) {
- DRS->probed_format = 0;
+ drive_state[current_drive].probed_format = 0;
if (next_valid_format()) {
DPRINT("no autodetectable formats\n");
_floppy = NULL;
@@ -2854,7 +2893,7 @@ static void redo_fd_request(void)
}
}
probing = 1;
- _floppy = floppy_type + DP->autodetect[DRS->probed_format];
+ _floppy = floppy_type + drive_params[current_drive].autodetect[drive_state[current_drive].probed_format];
} else
probing = 0;
errors = &(current_req->error_count);
@@ -2864,7 +2903,7 @@ static void redo_fd_request(void)
goto do_request;
}
- if (test_bit(FD_NEED_TWADDLE_BIT, &DRS->flags))
+ if (test_bit(FD_NEED_TWADDLE_BIT, &drive_state[current_drive].flags))
twaddle();
schedule_bh(floppy_start);
debugt(__func__, "queue fd request");
@@ -2933,8 +2972,9 @@ static int poll_drive(bool interruptible, int flag)
raw_cmd->track = 0;
raw_cmd->cmd_count = 0;
cont = &poll_cont;
- debug_dcl(DP->flags, "setting NEWCHANGE in poll_drive\n");
- set_bit(FD_DISK_NEWCHANGE_BIT, &DRS->flags);
+ debug_dcl(drive_params[current_drive].flags,
+ "setting NEWCHANGE in poll_drive\n");
+ set_bit(FD_DISK_NEWCHANGE_BIT, &drive_state[current_drive].flags);
return wait_til_done(floppy_ready, interruptible);
}
@@ -2964,8 +3004,8 @@ static int user_reset_fdc(int drive, int arg, bool interruptible)
return -EINTR;
if (arg == FD_RESET_ALWAYS)
- FDCS->reset = 1;
- if (FDCS->reset) {
+ fdc_state[current_fdc].reset = 1;
+ if (fdc_state[current_fdc].reset) {
cont = &reset_cont;
ret = wait_til_done(reset_fdc, interruptible);
if (ret == -EINTR)
@@ -2998,8 +3038,8 @@ static const char *drive_name(int type, int drive)
if (type)
floppy = floppy_type + type;
else {
- if (UDP->native_format)
- floppy = floppy_type + UDP->native_format;
+ if (drive_params[drive].native_format)
+ floppy = floppy_type + drive_params[drive].native_format;
else
return "(null)";
}
@@ -3176,23 +3216,23 @@ static int raw_cmd_ioctl(int cmd, void __user *param)
int ret2;
int ret;
- if (FDCS->rawcmd <= 1)
- FDCS->rawcmd = 1;
+ if (fdc_state[current_fdc].rawcmd <= 1)
+ fdc_state[current_fdc].rawcmd = 1;
for (drive = 0; drive < N_DRIVE; drive++) {
- if (FDC(drive) != fdc)
+ if (FDC(drive) != current_fdc)
continue;
if (drive == current_drive) {
- if (UDRS->fd_ref > 1) {
- FDCS->rawcmd = 2;
+ if (drive_state[drive].fd_ref > 1) {
+ fdc_state[current_fdc].rawcmd = 2;
break;
}
- } else if (UDRS->fd_ref) {
- FDCS->rawcmd = 2;
+ } else if (drive_state[drive].fd_ref) {
+ fdc_state[current_fdc].rawcmd = 2;
break;
}
}
- if (FDCS->reset)
+ if (fdc_state[current_fdc].reset)
return -EIO;
ret = raw_cmd_copyin(cmd, param, &my_raw_cmd);
@@ -3204,12 +3244,13 @@ static int raw_cmd_ioctl(int cmd, void __user *param)
raw_cmd = my_raw_cmd;
cont = &raw_cmd_cont;
ret = wait_til_done(floppy_start, true);
- debug_dcl(DP->flags, "calling disk change from raw_cmd ioctl\n");
+ debug_dcl(drive_params[current_drive].flags,
+ "calling disk change from raw_cmd ioctl\n");
- if (ret != -EINTR && FDCS->reset)
+ if (ret != -EINTR && fdc_state[current_fdc].reset)
ret = -EIO;
- DRS->track = NO_TRACK;
+ drive_state[current_drive].track = NO_TRACK;
ret2 = raw_cmd_copyout(cmd, param, my_raw_cmd);
if (!ret)
@@ -3237,9 +3278,9 @@ static int set_geometry(unsigned int cmd, struct floppy_struct *g,
(int)g->head <= 0 ||
/* check for overflow in max_sector */
(int)(g->sect * g->head) <= 0 ||
- /* check for zero in F_SECT_PER_TRACK */
+ /* check for zero in raw_cmd->cmd[F_SECT_PER_TRACK] */
(unsigned char)((g->sect << 2) >> FD_SIZECODE(g)) == 0 ||
- g->track <= 0 || g->track > UDP->tracks >> STRETCH(g) ||
+ g->track <= 0 || g->track > drive_params[drive].tracks >> STRETCH(g) ||
/* check if reserved bits are set */
(g->stretch & ~(FD_STRETCH | FD_SWAPSIDES | FD_SECTBASEMASK)) != 0)
return -EINVAL;
@@ -3282,16 +3323,16 @@ static int set_geometry(unsigned int cmd, struct floppy_struct *g,
current_type[drive] = &user_params[drive];
floppy_sizes[drive] = user_params[drive].size;
if (cmd == FDDEFPRM)
- DRS->keep_data = -1;
+ drive_state[current_drive].keep_data = -1;
else
- DRS->keep_data = 1;
+ drive_state[current_drive].keep_data = 1;
/* invalidation. Invalidate only when needed, i.e.
* when there are already sectors in the buffer cache
* whose number will change. This is useful, because
* mtools often changes the geometry of the disk after
* looking at the boot block */
- if (DRS->maxblock > user_params[drive].sect ||
- DRS->maxtrack ||
+ if (drive_state[current_drive].maxblock > user_params[drive].sect ||
+ drive_state[current_drive].maxtrack ||
((user_params[drive].sect ^ oldStretch) &
(FD_SWAPSIDES | FD_SECTBASEMASK)))
invalidate_drive(bdev);
@@ -3404,7 +3445,7 @@ static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode, unsigned int
unsigned long param)
{
int drive = (long)bdev->bd_disk->private_data;
- int type = ITYPE(UDRS->fd_device);
+ int type = ITYPE(drive_state[drive].fd_device);
int i;
int ret;
int size;
@@ -3452,7 +3493,7 @@ static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode, unsigned int
switch (cmd) {
case FDEJECT:
- if (UDRS->fd_ref != 1)
+ if (drive_state[drive].fd_ref != 1)
/* somebody else has this drive open */
return -EBUSY;
if (lock_fdc(drive))
@@ -3462,8 +3503,8 @@ static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode, unsigned int
* non-Sparc architectures */
ret = fd_eject(UNIT(drive));
- set_bit(FD_DISK_CHANGED_BIT, &UDRS->flags);
- set_bit(FD_VERIFY_BIT, &UDRS->flags);
+ set_bit(FD_DISK_CHANGED_BIT, &drive_state[drive].flags);
+ set_bit(FD_VERIFY_BIT, &drive_state[drive].flags);
process_fd_request();
return ret;
case FDCLRPRM:
@@ -3471,7 +3512,7 @@ static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode, unsigned int
return -EINTR;
current_type[drive] = NULL;
floppy_sizes[drive] = MAX_DISK_SIZE << 1;
- UDRS->keep_data = 0;
+ drive_state[drive].keep_data = 0;
return invalidate_drive(bdev);
case FDSETPRM:
case FDDEFPRM:
@@ -3486,17 +3527,17 @@ static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode, unsigned int
outparam = &inparam.g;
break;
case FDMSGON:
- UDP->flags |= FTD_MSG;
+ drive_params[drive].flags |= FTD_MSG;
return 0;
case FDMSGOFF:
- UDP->flags &= ~FTD_MSG;
+ drive_params[drive].flags &= ~FTD_MSG;
return 0;
case FDFMTBEG:
if (lock_fdc(drive))
return -EINTR;
if (poll_drive(true, FD_RAW_NEED_DISK) == -EINTR)
return -EINTR;
- ret = UDRS->flags;
+ ret = drive_state[drive].flags;
process_fd_request();
if (ret & FD_VERIFY)
return -ENODEV;
@@ -3504,7 +3545,7 @@ static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode, unsigned int
return -EROFS;
return 0;
case FDFMTTRK:
- if (UDRS->fd_ref != 1)
+ if (drive_state[drive].fd_ref != 1)
return -EBUSY;
return do_format(drive, &inparam.f);
case FDFMTEND:
@@ -3513,13 +3554,13 @@ static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode, unsigned int
return -EINTR;
return invalidate_drive(bdev);
case FDSETEMSGTRESH:
- UDP->max_errors.reporting = (unsigned short)(param & 0x0f);
+ drive_params[drive].max_errors.reporting = (unsigned short)(param & 0x0f);
return 0;
case FDGETMAXERRS:
- outparam = &UDP->max_errors;
+ outparam = &drive_params[drive].max_errors;
break;
case FDSETMAXERRS:
- UDP->max_errors = inparam.max_errors;
+ drive_params[drive].max_errors = inparam.max_errors;
break;
case FDGETDRVTYP:
outparam = drive_name(type, drive);
@@ -3529,10 +3570,10 @@ static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode, unsigned int
if (!valid_floppy_drive_params(inparam.dp.autodetect,
inparam.dp.native_format))
return -EINVAL;
- *UDP = inparam.dp;
+ drive_params[drive] = inparam.dp;
break;
case FDGETDRVPRM:
- outparam = UDP;
+ outparam = &drive_params[drive];
break;
case FDPOLLDRVSTAT:
if (lock_fdc(drive))
@@ -3542,18 +3583,18 @@ static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode, unsigned int
process_fd_request();
/* fall through */
case FDGETDRVSTAT:
- outparam = UDRS;
+ outparam = &drive_state[drive];
break;
case FDRESET:
return user_reset_fdc(drive, (int)param, true);
case FDGETFDCSTAT:
- outparam = UFDCS;
+ outparam = &fdc_state[FDC(drive)];
break;
case FDWERRORCLR:
- memset(UDRWE, 0, sizeof(*UDRWE));
+ memset(&write_errors[drive], 0, sizeof(write_errors[drive]));
return 0;
case FDWERRORGET:
- outparam = UDRWE;
+ outparam = &write_errors[drive];
break;
case FDRAWCMD:
if (type)
@@ -3689,7 +3730,7 @@ static int compat_set_geometry(struct block_device *bdev, fmode_t mode, unsigned
mutex_lock(&floppy_mutex);
drive = (long)bdev->bd_disk->private_data;
- type = ITYPE(UDRS->fd_device);
+ type = ITYPE(drive_state[drive].fd_device);
err = set_geometry(cmd == FDSETPRM32 ? FDSETPRM : FDDEFPRM,
&v, drive, type, bdev);
mutex_unlock(&floppy_mutex);
@@ -3705,7 +3746,8 @@ static int compat_get_prm(int drive,
memset(&v, 0, sizeof(v));
mutex_lock(&floppy_mutex);
- err = get_floppy_geometry(drive, ITYPE(UDRS->fd_device), &p);
+ err = get_floppy_geometry(drive, ITYPE(drive_state[drive].fd_device),
+ &p);
if (err) {
mutex_unlock(&floppy_mutex);
return err;
@@ -3729,25 +3771,26 @@ static int compat_setdrvprm(int drive,
if (!valid_floppy_drive_params(v.autodetect, v.native_format))
return -EINVAL;
mutex_lock(&floppy_mutex);
- UDP->cmos = v.cmos;
- UDP->max_dtr = v.max_dtr;
- UDP->hlt = v.hlt;
- UDP->hut = v.hut;
- UDP->srt = v.srt;
- UDP->spinup = v.spinup;
- UDP->spindown = v.spindown;
- UDP->spindown_offset = v.spindown_offset;
- UDP->select_delay = v.select_delay;
- UDP->rps = v.rps;
- UDP->tracks = v.tracks;
- UDP->timeout = v.timeout;
- UDP->interleave_sect = v.interleave_sect;
- UDP->max_errors = v.max_errors;
- UDP->flags = v.flags;
- UDP->read_track = v.read_track;
- memcpy(UDP->autodetect, v.autodetect, sizeof(v.autodetect));
- UDP->checkfreq = v.checkfreq;
- UDP->native_format = v.native_format;
+ drive_params[drive].cmos = v.cmos;
+ drive_params[drive].max_dtr = v.max_dtr;
+ drive_params[drive].hlt = v.hlt;
+ drive_params[drive].hut = v.hut;
+ drive_params[drive].srt = v.srt;
+ drive_params[drive].spinup = v.spinup;
+ drive_params[drive].spindown = v.spindown;
+ drive_params[drive].spindown_offset = v.spindown_offset;
+ drive_params[drive].select_delay = v.select_delay;
+ drive_params[drive].rps = v.rps;
+ drive_params[drive].tracks = v.tracks;
+ drive_params[drive].timeout = v.timeout;
+ drive_params[drive].interleave_sect = v.interleave_sect;
+ drive_params[drive].max_errors = v.max_errors;
+ drive_params[drive].flags = v.flags;
+ drive_params[drive].read_track = v.read_track;
+ memcpy(drive_params[drive].autodetect, v.autodetect,
+ sizeof(v.autodetect));
+ drive_params[drive].checkfreq = v.checkfreq;
+ drive_params[drive].native_format = v.native_format;
mutex_unlock(&floppy_mutex);
return 0;
}
@@ -3759,25 +3802,26 @@ static int compat_getdrvprm(int drive,
memset(&v, 0, sizeof(struct compat_floppy_drive_params));
mutex_lock(&floppy_mutex);
- v.cmos = UDP->cmos;
- v.max_dtr = UDP->max_dtr;
- v.hlt = UDP->hlt;
- v.hut = UDP->hut;
- v.srt = UDP->srt;
- v.spinup = UDP->spinup;
- v.spindown = UDP->spindown;
- v.spindown_offset = UDP->spindown_offset;
- v.select_delay = UDP->select_delay;
- v.rps = UDP->rps;
- v.tracks = UDP->tracks;
- v.timeout = UDP->timeout;
- v.interleave_sect = UDP->interleave_sect;
- v.max_errors = UDP->max_errors;
- v.flags = UDP->flags;
- v.read_track = UDP->read_track;
- memcpy(v.autodetect, UDP->autodetect, sizeof(v.autodetect));
- v.checkfreq = UDP->checkfreq;
- v.native_format = UDP->native_format;
+ v.cmos = drive_params[drive].cmos;
+ v.max_dtr = drive_params[drive].max_dtr;
+ v.hlt = drive_params[drive].hlt;
+ v.hut = drive_params[drive].hut;
+ v.srt = drive_params[drive].srt;
+ v.spinup = drive_params[drive].spinup;
+ v.spindown = drive_params[drive].spindown;
+ v.spindown_offset = drive_params[drive].spindown_offset;
+ v.select_delay = drive_params[drive].select_delay;
+ v.rps = drive_params[drive].rps;
+ v.tracks = drive_params[drive].tracks;
+ v.timeout = drive_params[drive].timeout;
+ v.interleave_sect = drive_params[drive].interleave_sect;
+ v.max_errors = drive_params[drive].max_errors;
+ v.flags = drive_params[drive].flags;
+ v.read_track = drive_params[drive].read_track;
+ memcpy(v.autodetect, drive_params[drive].autodetect,
+ sizeof(v.autodetect));
+ v.checkfreq = drive_params[drive].checkfreq;
+ v.native_format = drive_params[drive].native_format;
mutex_unlock(&floppy_mutex);
if (copy_to_user(arg, &v, sizeof(struct compat_floppy_drive_params)))
@@ -3800,20 +3844,20 @@ static int compat_getdrvstat(int drive, bool poll,
goto Eintr;
process_fd_request();
}
- v.spinup_date = UDRS->spinup_date;
- v.select_date = UDRS->select_date;
- v.first_read_date = UDRS->first_read_date;
- v.probed_format = UDRS->probed_format;
- v.track = UDRS->track;
- v.maxblock = UDRS->maxblock;
- v.maxtrack = UDRS->maxtrack;
- v.generation = UDRS->generation;
- v.keep_data = UDRS->keep_data;
- v.fd_ref = UDRS->fd_ref;
- v.fd_device = UDRS->fd_device;
- v.last_checked = UDRS->last_checked;
- v.dmabuf = (uintptr_t)UDRS->dmabuf;
- v.bufblocks = UDRS->bufblocks;
+ v.spinup_date = drive_state[drive].spinup_date;
+ v.select_date = drive_state[drive].select_date;
+ v.first_read_date = drive_state[drive].first_read_date;
+ v.probed_format = drive_state[drive].probed_format;
+ v.track = drive_state[drive].track;
+ v.maxblock = drive_state[drive].maxblock;
+ v.maxtrack = drive_state[drive].maxtrack;
+ v.generation = drive_state[drive].generation;
+ v.keep_data = drive_state[drive].keep_data;
+ v.fd_ref = drive_state[drive].fd_ref;
+ v.fd_device = drive_state[drive].fd_device;
+ v.last_checked = drive_state[drive].last_checked;
+ v.dmabuf = (uintptr_t) drive_state[drive].dmabuf;
+ v.bufblocks = drive_state[drive].bufblocks;
mutex_unlock(&floppy_mutex);
if (copy_to_user(arg, &v, sizeof(struct compat_floppy_drive_struct)))
@@ -3831,7 +3875,7 @@ static int compat_getfdcstat(int drive,
struct floppy_fdc_state v;
mutex_lock(&floppy_mutex);
- v = *UFDCS;
+ v = fdc_state[FDC(drive)];
mutex_unlock(&floppy_mutex);
memset(&v32, 0, sizeof(struct compat_floppy_fdc_state));
@@ -3861,7 +3905,7 @@ static int compat_werrorget(int drive,
memset(&v32, 0, sizeof(struct compat_floppy_write_errors));
mutex_lock(&floppy_mutex);
- v = *UDRWE;
+ v = write_errors[drive];
mutex_unlock(&floppy_mutex);
v32.write_errors = v.write_errors;
v32.first_error_sector = v.first_error_sector;
@@ -3930,16 +3974,16 @@ static void __init config_types(void)
/* read drive info out of physical CMOS */
drive = 0;
- if (!UDP->cmos)
- UDP->cmos = FLOPPY0_TYPE;
+ if (!drive_params[drive].cmos)
+ drive_params[drive].cmos = FLOPPY0_TYPE;
drive = 1;
- if (!UDP->cmos)
- UDP->cmos = FLOPPY1_TYPE;
+ if (!drive_params[drive].cmos)
+ drive_params[drive].cmos = FLOPPY1_TYPE;
/* FIXME: additional physical CMOS drive detection should go here */
for (drive = 0; drive < N_DRIVE; drive++) {
- unsigned int type = UDP->cmos;
+ unsigned int type = drive_params[drive].cmos;
struct floppy_drive_params *params;
const char *name = NULL;
char temparea[32];
@@ -3969,7 +4013,7 @@ static void __init config_types(void)
pr_cont("%s fd%d is %s", prepend, drive, name);
}
- *UDP = *params;
+ drive_params[drive] = *params;
}
if (has_drive)
@@ -3982,11 +4026,11 @@ static void floppy_release(struct gendisk *disk, fmode_t mode)
mutex_lock(&floppy_mutex);
mutex_lock(&open_lock);
- if (!UDRS->fd_ref--) {
+ if (!drive_state[drive].fd_ref--) {
DPRINT("floppy_release with fd_ref == 0");
- UDRS->fd_ref = 0;
+ drive_state[drive].fd_ref = 0;
}
- if (!UDRS->fd_ref)
+ if (!drive_state[drive].fd_ref)
opened_bdev[drive] = NULL;
mutex_unlock(&open_lock);
mutex_unlock(&floppy_mutex);
@@ -4007,16 +4051,16 @@ static int floppy_open(struct block_device *bdev, fmode_t mode)
mutex_lock(&floppy_mutex);
mutex_lock(&open_lock);
- old_dev = UDRS->fd_device;
+ old_dev = drive_state[drive].fd_device;
if (opened_bdev[drive] && opened_bdev[drive] != bdev)
goto out2;
- if (!UDRS->fd_ref && (UDP->flags & FD_BROKEN_DCL)) {
- set_bit(FD_DISK_CHANGED_BIT, &UDRS->flags);
- set_bit(FD_VERIFY_BIT, &UDRS->flags);
+ if (!drive_state[drive].fd_ref && (drive_params[drive].flags & FD_BROKEN_DCL)) {
+ set_bit(FD_DISK_CHANGED_BIT, &drive_state[drive].flags);
+ set_bit(FD_VERIFY_BIT, &drive_state[drive].flags);
}
- UDRS->fd_ref++;
+ drive_state[drive].fd_ref++;
opened_bdev[drive] = bdev;
@@ -4025,7 +4069,7 @@ static int floppy_open(struct block_device *bdev, fmode_t mode)
if (!floppy_track_buffer) {
/* if opening an ED drive, reserve a big buffer,
* else reserve a small one */
- if ((UDP->cmos == 6) || (UDP->cmos == 5))
+ if ((drive_params[drive].cmos == 6) || (drive_params[drive].cmos == 5))
try = 64; /* Only 48 actually useful */
else
try = 32; /* Only 24 actually useful */
@@ -4053,38 +4097,39 @@ static int floppy_open(struct block_device *bdev, fmode_t mode)
}
new_dev = MINOR(bdev->bd_dev);
- UDRS->fd_device = new_dev;
+ drive_state[drive].fd_device = new_dev;
set_capacity(disks[drive], floppy_sizes[new_dev]);
if (old_dev != -1 && old_dev != new_dev) {
if (buffer_drive == drive)
buffer_track = -1;
}
- if (UFDCS->rawcmd == 1)
- UFDCS->rawcmd = 2;
+ if (fdc_state[FDC(drive)].rawcmd == 1)
+ fdc_state[FDC(drive)].rawcmd = 2;
if (!(mode & FMODE_NDELAY)) {
if (mode & (FMODE_READ|FMODE_WRITE)) {
- UDRS->last_checked = 0;
- clear_bit(FD_OPEN_SHOULD_FAIL_BIT, &UDRS->flags);
+ drive_state[drive].last_checked = 0;
+ clear_bit(FD_OPEN_SHOULD_FAIL_BIT,
+ &drive_state[drive].flags);
check_disk_change(bdev);
- if (test_bit(FD_DISK_CHANGED_BIT, &UDRS->flags))
+ if (test_bit(FD_DISK_CHANGED_BIT, &drive_state[drive].flags))
goto out;
- if (test_bit(FD_OPEN_SHOULD_FAIL_BIT, &UDRS->flags))
+ if (test_bit(FD_OPEN_SHOULD_FAIL_BIT, &drive_state[drive].flags))
goto out;
}
res = -EROFS;
if ((mode & FMODE_WRITE) &&
- !test_bit(FD_DISK_WRITABLE_BIT, &UDRS->flags))
+ !test_bit(FD_DISK_WRITABLE_BIT, &drive_state[drive].flags))
goto out;
}
mutex_unlock(&open_lock);
mutex_unlock(&floppy_mutex);
return 0;
out:
- UDRS->fd_ref--;
+ drive_state[drive].fd_ref--;
- if (!UDRS->fd_ref)
+ if (!drive_state[drive].fd_ref)
opened_bdev[drive] = NULL;
out2:
mutex_unlock(&open_lock);
@@ -4100,19 +4145,19 @@ static unsigned int floppy_check_events(struct gendisk *disk,
{
int drive = (long)disk->private_data;
- if (test_bit(FD_DISK_CHANGED_BIT, &UDRS->flags) ||
- test_bit(FD_VERIFY_BIT, &UDRS->flags))
+ if (test_bit(FD_DISK_CHANGED_BIT, &drive_state[drive].flags) ||
+ test_bit(FD_VERIFY_BIT, &drive_state[drive].flags))
return DISK_EVENT_MEDIA_CHANGE;
- if (time_after(jiffies, UDRS->last_checked + UDP->checkfreq)) {
+ if (time_after(jiffies, drive_state[drive].last_checked + drive_params[drive].checkfreq)) {
if (lock_fdc(drive))
return 0;
poll_drive(false, 0);
process_fd_request();
}
- if (test_bit(FD_DISK_CHANGED_BIT, &UDRS->flags) ||
- test_bit(FD_VERIFY_BIT, &UDRS->flags) ||
+ if (test_bit(FD_DISK_CHANGED_BIT, &drive_state[drive].flags) ||
+ test_bit(FD_VERIFY_BIT, &drive_state[drive].flags) ||
test_bit(drive, &fake_change) ||
drive_no_geom(drive))
return DISK_EVENT_MEDIA_CHANGE;
@@ -4138,7 +4183,7 @@ static void floppy_rb0_cb(struct bio *bio)
if (bio->bi_status) {
pr_info("floppy: error %d while reading block 0\n",
bio->bi_status);
- set_bit(FD_OPEN_SHOULD_FAIL_BIT, &UDRS->flags);
+ set_bit(FD_OPEN_SHOULD_FAIL_BIT, &drive_state[drive].flags);
}
complete(&cbdata->complete);
}
@@ -4195,8 +4240,8 @@ static int floppy_revalidate(struct gendisk *disk)
int cf;
int res = 0;
- if (test_bit(FD_DISK_CHANGED_BIT, &UDRS->flags) ||
- test_bit(FD_VERIFY_BIT, &UDRS->flags) ||
+ if (test_bit(FD_DISK_CHANGED_BIT, &drive_state[drive].flags) ||
+ test_bit(FD_VERIFY_BIT, &drive_state[drive].flags) ||
test_bit(drive, &fake_change) ||
drive_no_geom(drive)) {
if (WARN(atomic_read(&usage_count) == 0,
@@ -4206,20 +4251,20 @@ static int floppy_revalidate(struct gendisk *disk)
res = lock_fdc(drive);
if (res)
return res;
- cf = (test_bit(FD_DISK_CHANGED_BIT, &UDRS->flags) ||
- test_bit(FD_VERIFY_BIT, &UDRS->flags));
+ cf = (test_bit(FD_DISK_CHANGED_BIT, &drive_state[drive].flags) ||
+ test_bit(FD_VERIFY_BIT, &drive_state[drive].flags));
if (!(cf || test_bit(drive, &fake_change) || drive_no_geom(drive))) {
process_fd_request(); /*already done by another thread */
return 0;
}
- UDRS->maxblock = 0;
- UDRS->maxtrack = 0;
+ drive_state[drive].maxblock = 0;
+ drive_state[drive].maxtrack = 0;
if (buffer_drive == drive)
buffer_track = -1;
clear_bit(drive, &fake_change);
- clear_bit(FD_DISK_CHANGED_BIT, &UDRS->flags);
+ clear_bit(FD_DISK_CHANGED_BIT, &drive_state[drive].flags);
if (cf)
- UDRS->generation++;
+ drive_state[drive].generation++;
if (drive_no_geom(drive)) {
/* auto-sensing */
res = __floppy_read_block_0(opened_bdev[drive], drive);
@@ -4229,7 +4274,7 @@ static int floppy_revalidate(struct gendisk *disk)
process_fd_request();
}
}
- set_capacity(disk, floppy_sizes[UDRS->fd_device]);
+ set_capacity(disk, floppy_sizes[drive_state[drive].fd_device]);
return res;
}
@@ -4258,23 +4303,23 @@ static char __init get_fdc_version(void)
int r;
output_byte(FD_DUMPREGS); /* 82072 and better know DUMPREGS */
- if (FDCS->reset)
+ if (fdc_state[current_fdc].reset)
return FDC_NONE;
r = result();
if (r <= 0x00)
return FDC_NONE; /* No FDC present ??? */
if ((r == 1) && (reply_buffer[0] == 0x80)) {
- pr_info("FDC %d is an 8272A\n", fdc);
+ pr_info("FDC %d is an 8272A\n", current_fdc);
return FDC_8272A; /* 8272a/765 don't know DUMPREGS */
}
if (r != 10) {
pr_info("FDC %d init: DUMPREGS: unexpected return of %d bytes.\n",
- fdc, r);
+ current_fdc, r);
return FDC_UNKNOWN;
}
if (!fdc_configure()) {
- pr_info("FDC %d is an 82072\n", fdc);
+ pr_info("FDC %d is an 82072\n", current_fdc);
return FDC_82072; /* 82072 doesn't know CONFIGURE */
}
@@ -4282,50 +4327,50 @@ static char __init get_fdc_version(void)
if (need_more_output() == MORE_OUTPUT) {
output_byte(0);
} else {
- pr_info("FDC %d is an 82072A\n", fdc);
+ pr_info("FDC %d is an 82072A\n", current_fdc);
return FDC_82072A; /* 82072A as found on Sparcs. */
}
output_byte(FD_UNLOCK);
r = result();
if ((r == 1) && (reply_buffer[0] == 0x80)) {
- pr_info("FDC %d is a pre-1991 82077\n", fdc);
+ pr_info("FDC %d is a pre-1991 82077\n", current_fdc);
return FDC_82077_ORIG; /* Pre-1991 82077, doesn't know
* LOCK/UNLOCK */
}
if ((r != 1) || (reply_buffer[0] != 0x00)) {
pr_info("FDC %d init: UNLOCK: unexpected return of %d bytes.\n",
- fdc, r);
+ current_fdc, r);
return FDC_UNKNOWN;
}
output_byte(FD_PARTID);
r = result();
if (r != 1) {
pr_info("FDC %d init: PARTID: unexpected return of %d bytes.\n",
- fdc, r);
+ current_fdc, r);
return FDC_UNKNOWN;
}
if (reply_buffer[0] == 0x80) {
- pr_info("FDC %d is a post-1991 82077\n", fdc);
+ pr_info("FDC %d is a post-1991 82077\n", current_fdc);
return FDC_82077; /* Revised 82077AA passes all the tests */
}
switch (reply_buffer[0] >> 5) {
case 0x0:
/* Either a 82078-1 or a 82078SL running at 5Volt */
- pr_info("FDC %d is an 82078.\n", fdc);
+ pr_info("FDC %d is an 82078.\n", current_fdc);
return FDC_82078;
case 0x1:
- pr_info("FDC %d is a 44pin 82078\n", fdc);
+ pr_info("FDC %d is a 44pin 82078\n", current_fdc);
return FDC_82078;
case 0x2:
- pr_info("FDC %d is a S82078B\n", fdc);
+ pr_info("FDC %d is a S82078B\n", current_fdc);
return FDC_S82078B;
case 0x3:
- pr_info("FDC %d is a National Semiconductor PC87306\n", fdc);
+ pr_info("FDC %d is a National Semiconductor PC87306\n", current_fdc);
return FDC_87306;
default:
pr_info("FDC %d init: 82078 variant with unknown PARTID=%d.\n",
- fdc, reply_buffer[0] >> 5);
+ current_fdc, reply_buffer[0] >> 5);
return FDC_82078_UNKN;
}
} /* get_fdc_version */
@@ -4381,7 +4426,7 @@ static void __init set_cmos(int *ints, int dummy, int dummy2)
if (current_drive >= 4 && !FDC2)
FDC2 = 0x370;
#endif
- DP->cmos = ints[2];
+ drive_params[current_drive].cmos = ints[2];
DPRINT("setting CMOS code to %d\n", ints[2]);
}
@@ -4470,7 +4515,7 @@ static ssize_t floppy_cmos_show(struct device *dev,
int drive;
drive = p->id;
- return sprintf(buf, "%X\n", UDP->cmos);
+ return sprintf(buf, "%X\n", drive_params[drive].cmos);
}
static DEVICE_ATTR(cmos, 0444, floppy_cmos_show, NULL);
@@ -4491,7 +4536,7 @@ static int floppy_resume(struct device *dev)
int fdc;
for (fdc = 0; fdc < N_FDC; fdc++)
- if (FDCS->address != -1)
+ if (fdc_state[fdc].address != -1)
user_reset_fdc(-1, FD_RESET_ALWAYS, false);
return 0;
@@ -4601,16 +4646,16 @@ static int __init do_floppy_init(void)
config_types();
for (i = 0; i < N_FDC; i++) {
- fdc = i;
- memset(FDCS, 0, sizeof(*FDCS));
- FDCS->dtr = -1;
- FDCS->dor = 0x4;
+ current_fdc = i;
+ memset(&fdc_state[current_fdc], 0, sizeof(*fdc_state));
+ fdc_state[current_fdc].dtr = -1;
+ fdc_state[current_fdc].dor = 0x4;
#if defined(__sparc__) || defined(__mc68000__)
/*sparcs/sun3x don't have a DOR reset which we can fall back on to */
#ifdef __mc68000__
if (MACH_IS_SUN3X)
#endif
- FDCS->version = FDC_82072A;
+ fdc_state[current_fdc].version = FDC_82072A;
#endif
}
@@ -4625,7 +4670,7 @@ static int __init do_floppy_init(void)
fdc_state[1].address = FDC2;
#endif
- fdc = 0; /* reset fdc in case of unexpected interrupt */
+ current_fdc = 0; /* reset fdc in case of unexpected interrupt */
err = floppy_grab_irq_and_dma();
if (err) {
cancel_delayed_work(&fd_timeout);
@@ -4635,12 +4680,12 @@ static int __init do_floppy_init(void)
/* initialise drive state */
for (drive = 0; drive < N_DRIVE; drive++) {
- memset(UDRS, 0, sizeof(*UDRS));
- memset(UDRWE, 0, sizeof(*UDRWE));
- set_bit(FD_DISK_NEWCHANGE_BIT, &UDRS->flags);
- set_bit(FD_DISK_CHANGED_BIT, &UDRS->flags);
- set_bit(FD_VERIFY_BIT, &UDRS->flags);
- UDRS->fd_device = -1;
+ memset(&drive_state[drive], 0, sizeof(drive_state[drive]));
+ memset(&write_errors[drive], 0, sizeof(write_errors[drive]));
+ set_bit(FD_DISK_NEWCHANGE_BIT, &drive_state[drive].flags);
+ set_bit(FD_DISK_CHANGED_BIT, &drive_state[drive].flags);
+ set_bit(FD_VERIFY_BIT, &drive_state[drive].flags);
+ drive_state[drive].fd_device = -1;
floppy_track_buffer = NULL;
max_buffer_sectors = 0;
}
@@ -4652,29 +4697,30 @@ static int __init do_floppy_init(void)
msleep(10);
for (i = 0; i < N_FDC; i++) {
- fdc = i;
- FDCS->driver_version = FD_DRIVER_VERSION;
+ current_fdc = i;
+ fdc_state[current_fdc].driver_version = FD_DRIVER_VERSION;
for (unit = 0; unit < 4; unit++)
- FDCS->track[unit] = 0;
- if (FDCS->address == -1)
+ fdc_state[current_fdc].track[unit] = 0;
+ if (fdc_state[current_fdc].address == -1)
continue;
- FDCS->rawcmd = 2;
+ fdc_state[current_fdc].rawcmd = 2;
if (user_reset_fdc(-1, FD_RESET_ALWAYS, false)) {
/* free ioports reserved by floppy_grab_irq_and_dma() */
- floppy_release_regions(fdc);
- FDCS->address = -1;
- FDCS->version = FDC_NONE;
+ floppy_release_regions(current_fdc);
+ fdc_state[current_fdc].address = -1;
+ fdc_state[current_fdc].version = FDC_NONE;
continue;
}
/* Try to determine the floppy controller type */
- FDCS->version = get_fdc_version();
- if (FDCS->version == FDC_NONE) {
+ fdc_state[current_fdc].version = get_fdc_version();
+ if (fdc_state[current_fdc].version == FDC_NONE) {
/* free ioports reserved by floppy_grab_irq_and_dma() */
- floppy_release_regions(fdc);
- FDCS->address = -1;
+ floppy_release_regions(current_fdc);
+ fdc_state[current_fdc].address = -1;
continue;
}
- if (can_use_virtual_dma == 2 && FDCS->version < FDC_82072A)
+ if (can_use_virtual_dma == 2 &&
+ fdc_state[current_fdc].version < FDC_82072A)
can_use_virtual_dma = 0;
have_no_fdc = 0;
@@ -4684,7 +4730,7 @@ static int __init do_floppy_init(void)
*/
user_reset_fdc(-1, FD_RESET_ALWAYS, false);
}
- fdc = 0;
+ current_fdc = 0;
cancel_delayed_work(&fd_timeout);
current_drive = 0;
initialized = true;
@@ -4780,7 +4826,7 @@ static void floppy_release_allocated_regions(int fdc, const struct io_region *p)
{
while (p != io_regions) {
p--;
- release_region(FDCS->address + p->offset, p->size);
+ release_region(fdc_state[fdc].address + p->offset, p->size);
}
}
@@ -4791,10 +4837,10 @@ static int floppy_request_regions(int fdc)
const struct io_region *p;
for (p = io_regions; p < ARRAY_END(io_regions); p++) {
- if (!request_region(FDCS->address + p->offset,
+ if (!request_region(fdc_state[fdc].address + p->offset,
p->size, "floppy")) {
DPRINT("Floppy io-port 0x%04lx in use\n",
- FDCS->address + p->offset);
+ fdc_state[fdc].address + p->offset);
floppy_release_allocated_regions(fdc, p);
return -EBUSY;
}
@@ -4836,36 +4882,36 @@ static int floppy_grab_irq_and_dma(void)
}
}
- for (fdc = 0; fdc < N_FDC; fdc++) {
- if (FDCS->address != -1) {
- if (floppy_request_regions(fdc))
+ for (current_fdc = 0; current_fdc < N_FDC; current_fdc++) {
+ if (fdc_state[current_fdc].address != -1) {
+ if (floppy_request_regions(current_fdc))
goto cleanup;
}
}
- for (fdc = 0; fdc < N_FDC; fdc++) {
- if (FDCS->address != -1) {
+ for (current_fdc = 0; current_fdc < N_FDC; current_fdc++) {
+ if (fdc_state[current_fdc].address != -1) {
reset_fdc_info(1);
- fd_outb(FDCS->dor, FD_DOR);
+ fdc_outb(fdc_state[current_fdc].dor, current_fdc, FD_DOR);
}
}
- fdc = 0;
+ current_fdc = 0;
set_dor(0, ~0, 8); /* avoid immediate interrupt */
- for (fdc = 0; fdc < N_FDC; fdc++)
- if (FDCS->address != -1)
- fd_outb(FDCS->dor, FD_DOR);
+ for (current_fdc = 0; current_fdc < N_FDC; current_fdc++)
+ if (fdc_state[current_fdc].address != -1)
+ fdc_outb(fdc_state[current_fdc].dor, current_fdc, FD_DOR);
/*
* The driver will try and free resources and relies on us
* to know if they were allocated or not.
*/
- fdc = 0;
+ current_fdc = 0;
irqdma_allocated = 1;
return 0;
cleanup:
fd_free_irq();
fd_free_dma();
- while (--fdc >= 0)
- floppy_release_regions(fdc);
+ while (--current_fdc >= 0)
+ floppy_release_regions(current_fdc);
atomic_dec(&usage_count);
return -1;
}
@@ -4913,11 +4959,11 @@ static void floppy_release_irq_and_dma(void)
pr_info("auxiliary floppy timer still active\n");
if (work_pending(&floppy_work))
pr_info("work still pending\n");
- old_fdc = fdc;
- for (fdc = 0; fdc < N_FDC; fdc++)
- if (FDCS->address != -1)
- floppy_release_regions(fdc);
- fdc = old_fdc;
+ old_fdc = current_fdc;
+ for (current_fdc = 0; current_fdc < N_FDC; current_fdc++)
+ if (fdc_state[current_fdc].address != -1)
+ floppy_release_regions(current_fdc);
+ current_fdc = old_fdc;
}
#ifdef MODULE
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 739b372..a42c49e 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -214,7 +214,8 @@ static void __loop_update_dio(struct loop_device *lo, bool dio)
* LO_FLAGS_READ_ONLY, both are set from kernel, and losetup
* will get updated by ioctl(LOOP_GET_STATUS)
*/
- blk_mq_freeze_queue(lo->lo_queue);
+ if (lo->lo_state == Lo_bound)
+ blk_mq_freeze_queue(lo->lo_queue);
lo->use_dio = use_dio;
if (use_dio) {
blk_queue_flag_clear(QUEUE_FLAG_NOMERGES, lo->lo_queue);
@@ -223,7 +224,8 @@ static void __loop_update_dio(struct loop_device *lo, bool dio)
blk_queue_flag_set(QUEUE_FLAG_NOMERGES, lo->lo_queue);
lo->lo_flags &= ~LO_FLAGS_DIRECT_IO;
}
- blk_mq_unfreeze_queue(lo->lo_queue);
+ if (lo->lo_state == Lo_bound)
+ blk_mq_unfreeze_queue(lo->lo_queue);
}
static int
@@ -1539,16 +1541,16 @@ static int loop_set_block_size(struct loop_device *lo, unsigned long arg)
if (arg < 512 || arg > PAGE_SIZE || !is_power_of_2(arg))
return -EINVAL;
- if (lo->lo_queue->limits.logical_block_size != arg) {
- sync_blockdev(lo->lo_device);
- kill_bdev(lo->lo_device);
- }
+ if (lo->lo_queue->limits.logical_block_size == arg)
+ return 0;
+
+ sync_blockdev(lo->lo_device);
+ kill_bdev(lo->lo_device);
blk_mq_freeze_queue(lo->lo_queue);
/* kill_bdev should have truncated all the pages */
- if (lo->lo_queue->limits.logical_block_size != arg &&
- lo->lo_device->bd_inode->i_mapping->nrpages) {
+ if (lo->lo_device->bd_inode->i_mapping->nrpages) {
err = -EAGAIN;
pr_warn("%s: loop%d (%s) has still dirty pages (nrpages=%lu)\n",
__func__, lo->lo_number, lo->lo_file_name,
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 7818190..43cff01 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -395,16 +395,19 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req,
}
config = nbd->config;
- if (config->num_connections > 1) {
+ if (config->num_connections > 1 ||
+ (config->num_connections == 1 && nbd->tag_set.timeout)) {
dev_err_ratelimited(nbd_to_dev(nbd),
"Connection timed out, retrying (%d/%d alive)\n",
atomic_read(&config->live_connections),
config->num_connections);
/*
* Hooray we have more connections, requeue this IO, the submit
- * path will put it on a real connection.
+ * path will put it on a real connection. Or if only one
+ * connection is configured, the submit path will wait util
+ * a new connection is reconfigured or util dead timeout.
*/
- if (config->socks && config->num_connections > 1) {
+ if (config->socks) {
if (cmd->index < config->num_connections) {
struct nbd_sock *nsock =
config->socks[cmd->index];
@@ -431,12 +434,22 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req,
* Userspace sets timeout=0 to disable socket disconnection,
* so just warn and reset the timer.
*/
+ struct nbd_sock *nsock = config->socks[cmd->index];
cmd->retries++;
dev_info(nbd_to_dev(nbd), "Possible stuck request %p: control (%s@%llu,%uB). Runtime %u seconds\n",
req, nbdcmd_to_ascii(req_to_nbd_cmd_type(req)),
(unsigned long long)blk_rq_pos(req) << 9,
blk_rq_bytes(req), (req->timeout / HZ) * cmd->retries);
+ mutex_lock(&nsock->tx_lock);
+ if (cmd->cookie != nsock->cookie) {
+ nbd_requeue_cmd(cmd);
+ mutex_unlock(&nsock->tx_lock);
+ mutex_unlock(&cmd->lock);
+ nbd_config_put(nbd);
+ return BLK_EH_DONE;
+ }
+ mutex_unlock(&nsock->tx_lock);
mutex_unlock(&cmd->lock);
nbd_config_put(nbd);
return BLK_EH_RESET_TIMER;
@@ -741,14 +754,12 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index)
dev_err(disk_to_dev(nbd->disk), "Receive data failed (result %d)\n",
result);
/*
- * If we've disconnected or we only have 1
- * connection then we need to make sure we
+ * If we've disconnected, we need to make sure we
* complete this request, otherwise error out
* and let the timeout stuff handle resubmitting
* this request onto another connection.
*/
- if (nbd_disconnected(config) ||
- config->num_connections <= 1) {
+ if (nbd_disconnected(config)) {
cmd->status = BLK_STS_IOERR;
goto out;
}
@@ -825,7 +836,7 @@ static int find_fallback(struct nbd_device *nbd, int index)
if (config->num_connections <= 1) {
dev_err_ratelimited(disk_to_dev(nbd->disk),
- "Attempted send on invalid socket\n");
+ "Dead connection, failed to find a fallback\n");
return new_index;
}
diff --git a/drivers/block/null_blk.h b/drivers/block/null_blk.h
index bc83786..62b6608 100644
--- a/drivers/block/null_blk.h
+++ b/drivers/block/null_blk.h
@@ -14,9 +14,6 @@
#include <linux/fault-inject.h>
struct nullb_cmd {
- struct list_head list;
- struct llist_node ll_list;
- struct __call_single_data csd;
struct request *rq;
struct bio *bio;
unsigned int tag;
diff --git a/drivers/block/null_blk_main.c b/drivers/block/null_blk_main.c
index 1651079..4e1c071 100644
--- a/drivers/block/null_blk_main.c
+++ b/drivers/block/null_blk_main.c
@@ -23,6 +23,7 @@
#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION
static DECLARE_FAULT_ATTR(null_timeout_attr);
static DECLARE_FAULT_ATTR(null_requeue_attr);
+static DECLARE_FAULT_ATTR(null_init_hctx_attr);
#endif
static inline u64 mb_per_tick(int mbps)
@@ -96,11 +97,21 @@ module_param_named(home_node, g_home_node, int, 0444);
MODULE_PARM_DESC(home_node, "Home node for the device");
#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION
+/*
+ * For more details about fault injection, please refer to
+ * Documentation/fault-injection/fault-injection.rst.
+ */
static char g_timeout_str[80];
module_param_string(timeout, g_timeout_str, sizeof(g_timeout_str), 0444);
+MODULE_PARM_DESC(timeout, "Fault injection. timeout=<interval>,<probability>,<space>,<times>");
static char g_requeue_str[80];
module_param_string(requeue, g_requeue_str, sizeof(g_requeue_str), 0444);
+MODULE_PARM_DESC(requeue, "Fault injection. requeue=<interval>,<probability>,<space>,<times>");
+
+static char g_init_hctx_str[80];
+module_param_string(init_hctx, g_init_hctx_str, sizeof(g_init_hctx_str), 0444);
+MODULE_PARM_DESC(init_hctx, "Fault injection to fail hctx init. init_hctx=<interval>,<probability>,<space>,<times>");
#endif
static int g_queue_mode = NULL_Q_MQ;
@@ -276,7 +287,7 @@ nullb_device_##NAME##_store(struct config_item *item, const char *page, \
{ \
int (*apply_fn)(struct nullb_device *dev, TYPE new_value) = APPLY;\
struct nullb_device *dev = to_nullb_device(item); \
- TYPE uninitialized_var(new_value); \
+ TYPE new_value = 0; \
int ret; \
\
ret = nullb_device_##TYPE##_attr_store(&new_value, page, count);\
@@ -302,6 +313,12 @@ static int nullb_apply_submit_queues(struct nullb_device *dev,
if (!nullb)
return 0;
+ /*
+ * Make sure that null_init_hctx() does not access nullb->queues[] past
+ * the end of that array.
+ */
+ if (submit_queues > nr_cpu_ids)
+ return -EINVAL;
set = nullb->tag_set;
blk_mq_update_nr_hw_queues(set, submit_queues);
return set->nr_hw_queues == submit_queues ? 0 : -ENOMEM;
@@ -605,6 +622,7 @@ static struct nullb_cmd *__alloc_cmd(struct nullb_queue *nq)
if (tag != -1U) {
cmd = &nq->cmds[tag];
cmd->tag = tag;
+ cmd->error = BLK_STS_OK;
cmd->nq = nq;
if (nq->dev->irqmode == NULL_IRQ_TIMER) {
hrtimer_init(&cmd->timer, CLOCK_MONOTONIC,
@@ -1385,6 +1403,7 @@ static blk_status_t null_queue_rq(struct blk_mq_hw_ctx *hctx,
cmd->timer.function = null_cmd_timer_expired;
}
cmd->rq = bd->rq;
+ cmd->error = BLK_STS_OK;
cmd->nq = nq;
blk_mq_start_request(bd->rq);
@@ -1408,12 +1427,6 @@ static blk_status_t null_queue_rq(struct blk_mq_hw_ctx *hctx,
return null_handle_cmd(cmd, sector, nr_sectors, req_op(bd->rq));
}
-static const struct blk_mq_ops null_mq_ops = {
- .queue_rq = null_queue_rq,
- .complete = null_complete_rq,
- .timeout = null_timeout_rq,
-};
-
static void cleanup_queue(struct nullb_queue *nq)
{
kfree(nq->tag_map);
@@ -1430,9 +1443,56 @@ static void cleanup_queues(struct nullb *nullb)
kfree(nullb->queues);
}
+static void null_exit_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
+{
+ struct nullb_queue *nq = hctx->driver_data;
+ struct nullb *nullb = nq->dev->nullb;
+
+ nullb->nr_queues--;
+}
+
+static void null_init_queue(struct nullb *nullb, struct nullb_queue *nq)
+{
+ init_waitqueue_head(&nq->wait);
+ nq->queue_depth = nullb->queue_depth;
+ nq->dev = nullb->dev;
+}
+
+static int null_init_hctx(struct blk_mq_hw_ctx *hctx, void *driver_data,
+ unsigned int hctx_idx)
+{
+ struct nullb *nullb = hctx->queue->queuedata;
+ struct nullb_queue *nq;
+
+#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION
+ if (g_init_hctx_str[0] && should_fail(&null_init_hctx_attr, 1))
+ return -EFAULT;
+#endif
+
+ nq = &nullb->queues[hctx_idx];
+ hctx->driver_data = nq;
+ null_init_queue(nullb, nq);
+ nullb->nr_queues++;
+
+ return 0;
+}
+
+static const struct blk_mq_ops null_mq_ops = {
+ .queue_rq = null_queue_rq,
+ .complete = null_complete_rq,
+ .timeout = null_timeout_rq,
+ .init_hctx = null_init_hctx,
+ .exit_hctx = null_exit_hctx,
+};
+
static void null_del_dev(struct nullb *nullb)
{
- struct nullb_device *dev = nullb->dev;
+ struct nullb_device *dev;
+
+ if (!nullb)
+ return;
+
+ dev = nullb->dev;
ida_simple_remove(&nullb_indexes, nullb->index);
@@ -1473,33 +1533,6 @@ static const struct block_device_operations null_ops = {
.report_zones = null_report_zones,
};
-static void null_init_queue(struct nullb *nullb, struct nullb_queue *nq)
-{
- BUG_ON(!nullb);
- BUG_ON(!nq);
-
- init_waitqueue_head(&nq->wait);
- nq->queue_depth = nullb->queue_depth;
- nq->dev = nullb->dev;
-}
-
-static void null_init_queues(struct nullb *nullb)
-{
- struct request_queue *q = nullb->q;
- struct blk_mq_hw_ctx *hctx;
- struct nullb_queue *nq;
- int i;
-
- queue_for_each_hw_ctx(q, hctx, i) {
- if (!hctx->nr_ctx || !hctx->tags)
- continue;
- nq = &nullb->queues[i];
- hctx->driver_data = nq;
- null_init_queue(nullb, nq);
- nullb->nr_queues++;
- }
-}
-
static int setup_commands(struct nullb_queue *nq)
{
struct nullb_cmd *cmd;
@@ -1518,8 +1551,6 @@ static int setup_commands(struct nullb_queue *nq)
for (i = 0; i < nq->queue_depth; i++) {
cmd = &nq->cmds[i];
- INIT_LIST_HEAD(&cmd->list);
- cmd->ll_list.next = NULL;
cmd->tag = -1U;
}
@@ -1528,8 +1559,7 @@ static int setup_commands(struct nullb_queue *nq)
static int setup_queues(struct nullb *nullb)
{
- nullb->queues = kcalloc(nullb->dev->submit_queues,
- sizeof(struct nullb_queue),
+ nullb->queues = kcalloc(nr_cpu_ids, sizeof(struct nullb_queue),
GFP_KERNEL);
if (!nullb->queues)
return -ENOMEM;
@@ -1671,6 +1701,8 @@ static bool null_setup_fault(void)
return false;
if (!__null_setup_fault(&null_requeue_attr, g_requeue_str))
return false;
+ if (!__null_setup_fault(&null_init_hctx_attr, g_init_hctx_str))
+ return false;
#endif
return true;
}
@@ -1714,19 +1746,17 @@ static int null_add_dev(struct nullb_device *dev)
goto out_cleanup_queues;
nullb->tag_set->timeout = 5 * HZ;
- nullb->q = blk_mq_init_queue(nullb->tag_set);
+ nullb->q = blk_mq_init_queue_data(nullb->tag_set, nullb);
if (IS_ERR(nullb->q)) {
rv = -ENOMEM;
goto out_cleanup_tags;
}
- null_init_queues(nullb);
} else if (dev->queue_mode == NULL_Q_BIO) {
- nullb->q = blk_alloc_queue_node(GFP_KERNEL, dev->home_node);
+ nullb->q = blk_alloc_queue(null_queue_bio, dev->home_node);
if (!nullb->q) {
rv = -ENOMEM;
goto out_cleanup_queues;
}
- blk_queue_make_request(nullb->q, null_queue_bio);
rv = init_driver_queues(nullb);
if (rv)
goto out_cleanup_blk_queue;
@@ -1790,6 +1820,7 @@ static int null_add_dev(struct nullb_device *dev)
cleanup_queues(nullb);
out_free_nullb:
kfree(nullb);
+ dev->nullb = NULL;
out:
return rv;
}
diff --git a/drivers/block/null_blk_trace.c b/drivers/block/null_blk_trace.c
new file mode 100644
index 0000000..f246e7b
--- /dev/null
+++ b/drivers/block/null_blk_trace.c
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * null_blk trace related helpers.
+ *
+ * Copyright (C) 2020 Western Digital Corporation or its affiliates.
+ */
+#include "null_blk_trace.h"
+
+/*
+ * Helper to use for all null_blk traces to extract disk name.
+ */
+const char *nullb_trace_disk_name(struct trace_seq *p, char *name)
+{
+ const char *ret = trace_seq_buffer_ptr(p);
+
+ if (name && *name)
+ trace_seq_printf(p, "disk=%s, ", name);
+ trace_seq_putc(p, 0);
+
+ return ret;
+}
diff --git a/drivers/block/null_blk_trace.h b/drivers/block/null_blk_trace.h
new file mode 100644
index 0000000..4f83032
--- /dev/null
+++ b/drivers/block/null_blk_trace.h
@@ -0,0 +1,79 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * null_blk device driver tracepoints.
+ *
+ * Copyright (C) 2020 Western Digital Corporation or its affiliates.
+ */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM nullb
+
+#if !defined(_TRACE_NULLB_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_NULLB_H
+
+#include <linux/tracepoint.h>
+#include <linux/trace_seq.h>
+
+#include "null_blk.h"
+
+const char *nullb_trace_disk_name(struct trace_seq *p, char *name);
+
+#define __print_disk_name(name) nullb_trace_disk_name(p, name)
+
+#ifndef TRACE_HEADER_MULTI_READ
+static inline void __assign_disk_name(char *name, struct gendisk *disk)
+{
+ if (disk)
+ memcpy(name, disk->disk_name, DISK_NAME_LEN);
+ else
+ memset(name, 0, DISK_NAME_LEN);
+}
+#endif
+
+TRACE_EVENT(nullb_zone_op,
+ TP_PROTO(struct nullb_cmd *cmd, unsigned int zone_no,
+ unsigned int zone_cond),
+ TP_ARGS(cmd, zone_no, zone_cond),
+ TP_STRUCT__entry(
+ __array(char, disk, DISK_NAME_LEN)
+ __field(enum req_opf, op)
+ __field(unsigned int, zone_no)
+ __field(unsigned int, zone_cond)
+ ),
+ TP_fast_assign(
+ __entry->op = req_op(cmd->rq);
+ __entry->zone_no = zone_no;
+ __entry->zone_cond = zone_cond;
+ __assign_disk_name(__entry->disk, cmd->rq->rq_disk);
+ ),
+ TP_printk("%s req=%-15s zone_no=%u zone_cond=%-10s",
+ __print_disk_name(__entry->disk),
+ blk_op_str(__entry->op),
+ __entry->zone_no,
+ blk_zone_cond_str(__entry->zone_cond))
+);
+
+TRACE_EVENT(nullb_report_zones,
+ TP_PROTO(struct nullb *nullb, unsigned int nr_zones),
+ TP_ARGS(nullb, nr_zones),
+ TP_STRUCT__entry(
+ __array(char, disk, DISK_NAME_LEN)
+ __field(unsigned int, nr_zones)
+ ),
+ TP_fast_assign(
+ __entry->nr_zones = nr_zones;
+ __assign_disk_name(__entry->disk, nullb->disk);
+ ),
+ TP_printk("%s nr_zones=%u",
+ __print_disk_name(__entry->disk), __entry->nr_zones)
+);
+
+#endif /* _TRACE_NULLB_H */
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE null_blk_trace
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/drivers/block/null_blk_zoned.c b/drivers/block/null_blk_zoned.c
index ed34785..673618d 100644
--- a/drivers/block/null_blk_zoned.c
+++ b/drivers/block/null_blk_zoned.c
@@ -2,6 +2,9 @@
#include <linux/vmalloc.h>
#include "null_blk.h"
+#define CREATE_TRACE_POINTS
+#include "null_blk_trace.h"
+
/* zone_size in MBs to sectors. */
#define ZONE_SIZE_SHIFT 11
@@ -80,6 +83,8 @@ int null_report_zones(struct gendisk *disk, sector_t sector,
return 0;
nr_zones = min(nr_zones, dev->nr_zones - first_zone);
+ trace_nullb_report_zones(nullb, nr_zones);
+
for (i = 0; i < nr_zones; i++) {
/*
* Stacked DM target drivers will remap the zone information by
@@ -148,6 +153,8 @@ static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector,
/* Invalid zone condition */
return BLK_STS_IOERR;
}
+
+ trace_nullb_zone_op(cmd, zno, zone->cond);
return BLK_STS_OK;
}
@@ -155,7 +162,8 @@ static blk_status_t null_zone_mgmt(struct nullb_cmd *cmd, enum req_opf op,
sector_t sector)
{
struct nullb_device *dev = cmd->nq->dev;
- struct blk_zone *zone = &dev->zones[null_zone_no(dev, sector)];
+ unsigned int zone_no = null_zone_no(dev, sector);
+ struct blk_zone *zone = &dev->zones[zone_no];
size_t i;
switch (op) {
@@ -203,6 +211,8 @@ static blk_status_t null_zone_mgmt(struct nullb_cmd *cmd, enum req_opf op,
default:
return BLK_STS_NOTSUPP;
}
+
+ trace_nullb_zone_op(cmd, zone_no, zone->cond);
return BLK_STS_OK;
}
diff --git a/drivers/block/paride/pcd.c b/drivers/block/paride/pcd.c
index 117cfc8..cda5cf9 100644
--- a/drivers/block/paride/pcd.c
+++ b/drivers/block/paride/pcd.c
@@ -276,7 +276,7 @@ static const struct block_device_operations pcd_bdops = {
.release = pcd_block_release,
.ioctl = pcd_block_ioctl,
#ifdef CONFIG_COMPAT
- .ioctl = blkdev_compat_ptr_ioctl,
+ .compat_ioctl = blkdev_compat_ptr_ioctl,
#endif
.check_events = pcd_block_check_events,
};
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index 5f970a7..0b944ac 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -2493,7 +2493,6 @@ static void pkt_init_queue(struct pktcdvd_device *pd)
{
struct request_queue *q = pd->disk->queue;
- blk_queue_make_request(q, pkt_make_request);
blk_queue_logical_block_size(q, CD_FRAMESIZE);
blk_queue_max_hw_sectors(q, PACKET_MAX_SECTORS);
q->queuedata = pd;
@@ -2679,6 +2678,11 @@ static unsigned int pkt_check_events(struct gendisk *disk,
return attached_disk->fops->check_events(attached_disk, clearing);
}
+static char *pkt_devnode(struct gendisk *disk, umode_t *mode)
+{
+ return kasprintf(GFP_KERNEL, "pktcdvd/%s", disk->disk_name);
+}
+
static const struct block_device_operations pktcdvd_ops = {
.owner = THIS_MODULE,
.open = pkt_open,
@@ -2686,13 +2690,9 @@ static const struct block_device_operations pktcdvd_ops = {
.ioctl = pkt_ioctl,
.compat_ioctl = blkdev_compat_ptr_ioctl,
.check_events = pkt_check_events,
+ .devnode = pkt_devnode,
};
-static char *pktcdvd_devnode(struct gendisk *gd, umode_t *mode)
-{
- return kasprintf(GFP_KERNEL, "pktcdvd/%s", gd->disk_name);
-}
-
/*
* Set up mapping from pktcdvd device to CD-ROM device.
*/
@@ -2748,9 +2748,8 @@ static int pkt_setup_dev(dev_t dev, dev_t* pkt_dev)
disk->fops = &pktcdvd_ops;
disk->flags = GENHD_FL_REMOVABLE;
strcpy(disk->disk_name, pd->name);
- disk->devnode = pktcdvd_devnode;
disk->private_data = pd;
- disk->queue = blk_alloc_queue(GFP_KERNEL);
+ disk->queue = blk_alloc_queue(pkt_make_request, NUMA_NO_NODE);
if (!disk->queue)
goto out_mem2;
diff --git a/drivers/block/ps3vram.c b/drivers/block/ps3vram.c
index 4628e1a..821d4d8 100644
--- a/drivers/block/ps3vram.c
+++ b/drivers/block/ps3vram.c
@@ -737,7 +737,7 @@ static int ps3vram_probe(struct ps3_system_bus_device *dev)
ps3vram_proc_init(dev);
- queue = blk_alloc_queue(GFP_KERNEL);
+ queue = blk_alloc_queue(ps3vram_make_request, NUMA_NO_NODE);
if (!queue) {
dev_err(&dev->core, "blk_alloc_queue failed\n");
error = -ENOMEM;
@@ -746,7 +746,6 @@ static int ps3vram_probe(struct ps3_system_bus_device *dev)
priv->queue = queue;
queue->queuedata = dev;
- blk_queue_make_request(queue, ps3vram_make_request);
blk_queue_max_segments(queue, BLK_MAX_SEGMENTS);
blk_queue_max_segment_size(queue, BLK_MAX_SEGMENT_SIZE);
blk_queue_max_hw_sectors(queue, BLK_SAFE_MAX_SECTORS);
diff --git a/drivers/block/rsxx/dev.c b/drivers/block/rsxx/dev.c
index c47d28b..8ffa826 100644
--- a/drivers/block/rsxx/dev.c
+++ b/drivers/block/rsxx/dev.c
@@ -248,7 +248,7 @@ int rsxx_setup_dev(struct rsxx_cardinfo *card)
return -ENOMEM;
}
- card->queue = blk_alloc_queue(GFP_KERNEL);
+ card->queue = blk_alloc_queue(rsxx_make_request, NUMA_NO_NODE);
if (!card->queue) {
dev_err(CARD_TO_DEV(card), "Failed queue alloc\n");
unregister_blkdev(card->major, DRIVER_NAME);
@@ -269,7 +269,6 @@ int rsxx_setup_dev(struct rsxx_cardinfo *card)
blk_queue_logical_block_size(card->queue, blk_size);
}
- blk_queue_make_request(card->queue, rsxx_make_request);
blk_queue_max_hw_sectors(card->queue, blkdev_max_hw_sectors);
blk_queue_physical_block_size(card->queue, RSXX_HW_BLK_SIZE);
diff --git a/drivers/block/rsxx/dma.c b/drivers/block/rsxx/dma.c
index 111eb65..1914f54 100644
--- a/drivers/block/rsxx/dma.c
+++ b/drivers/block/rsxx/dma.c
@@ -80,7 +80,7 @@ struct dma_tracker {
struct dma_tracker_list {
spinlock_t lock;
int head;
- struct dma_tracker list[0];
+ struct dma_tracker list[];
};
diff --git a/drivers/block/umem.c b/drivers/block/umem.c
index 4eaf97d..d84e8a8 100644
--- a/drivers/block/umem.c
+++ b/drivers/block/umem.c
@@ -885,11 +885,9 @@ static int mm_pci_probe(struct pci_dev *dev, const struct pci_device_id *id)
card->biotail = &card->bio;
spin_lock_init(&card->lock);
- card->queue = blk_alloc_queue_node(GFP_KERNEL, NUMA_NO_NODE);
+ card->queue = blk_alloc_queue(mm_make_request, NUMA_NO_NODE);
if (!card->queue)
goto failed_alloc;
-
- blk_queue_make_request(card->queue, mm_make_request);
card->queue->queuedata = card;
tasklet_init(&card->tasklet, process_page, (unsigned long)card);
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 5415876..f9b1e70 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -245,13 +245,20 @@ static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
err = virtblk_add_req(vblk->vqs[qid].vq, vbr, vbr->sg, num);
if (err) {
virtqueue_kick(vblk->vqs[qid].vq);
- blk_mq_stop_hw_queue(hctx);
+ /* Don't stop the queue if -ENOMEM: we may have failed to
+ * bounce the buffer due to global resource outage.
+ */
+ if (err == -ENOSPC)
+ blk_mq_stop_hw_queue(hctx);
spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags);
- /* Out of mem doesn't actually happen, since we fall back
- * to direct descriptors */
- if (err == -ENOMEM || err == -ENOSPC)
+ switch (err) {
+ case -ENOSPC:
return BLK_STS_DEV_RESOURCE;
- return BLK_STS_IOERR;
+ case -ENOMEM:
+ return BLK_STS_RESOURCE;
+ default:
+ return BLK_STS_IOERR;
+ }
}
if (bd->last && virtqueue_kick_prepare(vblk->vqs[qid].vq))
@@ -381,18 +388,15 @@ static void virtblk_update_capacity(struct virtio_blk *vblk, bool resize)
cap_str_10,
cap_str_2);
- set_capacity(vblk->disk, capacity);
+ set_capacity_revalidate_and_notify(vblk->disk, capacity, true);
}
static void virtblk_config_changed_work(struct work_struct *work)
{
struct virtio_blk *vblk =
container_of(work, struct virtio_blk, config_work);
- char *envp[] = { "RESIZE=1", NULL };
virtblk_update_capacity(vblk, true);
- revalidate_disk(vblk->disk);
- kobject_uevent_env(&disk_to_dev(vblk->disk)->kobj, KOBJ_CHANGE, envp);
}
static void virtblk_config_changed(struct virtio_device *vdev)
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index e2ad6bb..915cf5b 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -213,6 +213,7 @@ struct blkfront_info
struct blk_mq_tag_set tag_set;
struct blkfront_ring_info *rinfo;
unsigned int nr_rings;
+ unsigned int rinfo_size;
/* Save uncomplete reqs and bios for migration. */
struct list_head requests;
struct bio_list bio_list;
@@ -259,6 +260,18 @@ static int blkfront_setup_indirect(struct blkfront_ring_info *rinfo);
static void blkfront_gather_backend_features(struct blkfront_info *info);
static int negotiate_mq(struct blkfront_info *info);
+#define for_each_rinfo(info, ptr, idx) \
+ for ((ptr) = (info)->rinfo, (idx) = 0; \
+ (idx) < (info)->nr_rings; \
+ (idx)++, (ptr) = (void *)(ptr) + (info)->rinfo_size)
+
+static inline struct blkfront_ring_info *
+get_rinfo(const struct blkfront_info *info, unsigned int i)
+{
+ BUG_ON(i >= info->nr_rings);
+ return (void *)info->rinfo + i * info->rinfo_size;
+}
+
static int get_id_from_freelist(struct blkfront_ring_info *rinfo)
{
unsigned long free = rinfo->shadow_free;
@@ -883,8 +896,7 @@ static blk_status_t blkif_queue_rq(struct blk_mq_hw_ctx *hctx,
struct blkfront_info *info = hctx->queue->queuedata;
struct blkfront_ring_info *rinfo = NULL;
- BUG_ON(info->nr_rings <= qid);
- rinfo = &info->rinfo[qid];
+ rinfo = get_rinfo(info, qid);
blk_mq_start_request(qd->rq);
spin_lock_irqsave(&rinfo->ring_lock, flags);
if (RING_FULL(&rinfo->ring))
@@ -1181,6 +1193,7 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
static void xlvbd_release_gendisk(struct blkfront_info *info)
{
unsigned int minor, nr_minors, i;
+ struct blkfront_ring_info *rinfo;
if (info->rq == NULL)
return;
@@ -1188,9 +1201,7 @@ static void xlvbd_release_gendisk(struct blkfront_info *info)
/* No more blkif_request(). */
blk_mq_stop_hw_queues(info->rq);
- for (i = 0; i < info->nr_rings; i++) {
- struct blkfront_ring_info *rinfo = &info->rinfo[i];
-
+ for_each_rinfo(info, rinfo, i) {
/* No more gnttab callback work. */
gnttab_cancel_free_callback(&rinfo->callback);
@@ -1339,6 +1350,7 @@ static void blkif_free_ring(struct blkfront_ring_info *rinfo)
static void blkif_free(struct blkfront_info *info, int suspend)
{
unsigned int i;
+ struct blkfront_ring_info *rinfo;
/* Prevent new requests being issued until we fix things up. */
info->connected = suspend ?
@@ -1347,8 +1359,8 @@ static void blkif_free(struct blkfront_info *info, int suspend)
if (info->rq)
blk_mq_stop_hw_queues(info->rq);
- for (i = 0; i < info->nr_rings; i++)
- blkif_free_ring(&info->rinfo[i]);
+ for_each_rinfo(info, rinfo, i)
+ blkif_free_ring(rinfo);
kvfree(info->rinfo);
info->rinfo = NULL;
@@ -1775,6 +1787,7 @@ static int talk_to_blkback(struct xenbus_device *dev,
int err;
unsigned int i, max_page_order;
unsigned int ring_page_order;
+ struct blkfront_ring_info *rinfo;
if (!info)
return -ENODEV;
@@ -1788,9 +1801,7 @@ static int talk_to_blkback(struct xenbus_device *dev,
if (err)
goto destroy_blkring;
- for (i = 0; i < info->nr_rings; i++) {
- struct blkfront_ring_info *rinfo = &info->rinfo[i];
-
+ for_each_rinfo(info, rinfo, i) {
/* Create shared ring, alloc event channel. */
err = setup_blkring(dev, rinfo);
if (err)
@@ -1815,7 +1826,7 @@ static int talk_to_blkback(struct xenbus_device *dev,
/* We already got the number of queues/rings in _probe */
if (info->nr_rings == 1) {
- err = write_per_ring_nodes(xbt, &info->rinfo[0], dev->nodename);
+ err = write_per_ring_nodes(xbt, info->rinfo, dev->nodename);
if (err)
goto destroy_blkring;
} else {
@@ -1837,10 +1848,10 @@ static int talk_to_blkback(struct xenbus_device *dev,
goto abort_transaction;
}
- for (i = 0; i < info->nr_rings; i++) {
+ for_each_rinfo(info, rinfo, i) {
memset(path, 0, pathsize);
snprintf(path, pathsize, "%s/queue-%u", dev->nodename, i);
- err = write_per_ring_nodes(xbt, &info->rinfo[i], path);
+ err = write_per_ring_nodes(xbt, rinfo, path);
if (err) {
kfree(path);
goto destroy_blkring;
@@ -1868,9 +1879,8 @@ static int talk_to_blkback(struct xenbus_device *dev,
goto destroy_blkring;
}
- for (i = 0; i < info->nr_rings; i++) {
+ for_each_rinfo(info, rinfo, i) {
unsigned int j;
- struct blkfront_ring_info *rinfo = &info->rinfo[i];
for (j = 0; j < BLK_RING_SIZE(info); j++)
rinfo->shadow[j].req.u.rw.id = j + 1;
@@ -1900,6 +1910,7 @@ static int negotiate_mq(struct blkfront_info *info)
{
unsigned int backend_max_queues;
unsigned int i;
+ struct blkfront_ring_info *rinfo;
BUG_ON(info->nr_rings);
@@ -1911,20 +1922,16 @@ static int negotiate_mq(struct blkfront_info *info)
if (!info->nr_rings)
info->nr_rings = 1;
- info->rinfo = kvcalloc(info->nr_rings,
- struct_size(info->rinfo, shadow,
- BLK_RING_SIZE(info)),
- GFP_KERNEL);
+ info->rinfo_size = struct_size(info->rinfo, shadow,
+ BLK_RING_SIZE(info));
+ info->rinfo = kvcalloc(info->nr_rings, info->rinfo_size, GFP_KERNEL);
if (!info->rinfo) {
xenbus_dev_fatal(info->xbdev, -ENOMEM, "allocating ring_info structure");
info->nr_rings = 0;
return -ENOMEM;
}
- for (i = 0; i < info->nr_rings; i++) {
- struct blkfront_ring_info *rinfo;
-
- rinfo = &info->rinfo[i];
+ for_each_rinfo(info, rinfo, i) {
INIT_LIST_HEAD(&rinfo->indirect_pages);
INIT_LIST_HEAD(&rinfo->grants);
rinfo->dev_info = info;
@@ -2017,6 +2024,7 @@ static int blkif_recover(struct blkfront_info *info)
int rc;
struct bio *bio;
unsigned int segs;
+ struct blkfront_ring_info *rinfo;
blkfront_gather_backend_features(info);
/* Reset limits changed by blk_mq_update_nr_hw_queues(). */
@@ -2024,9 +2032,7 @@ static int blkif_recover(struct blkfront_info *info)
segs = info->max_indirect_segments ? : BLKIF_MAX_SEGMENTS_PER_REQUEST;
blk_queue_max_segments(info->rq, segs / GRANTS_PER_PSEG);
- for (r_index = 0; r_index < info->nr_rings; r_index++) {
- struct blkfront_ring_info *rinfo = &info->rinfo[r_index];
-
+ for_each_rinfo(info, rinfo, r_index) {
rc = blkfront_setup_indirect(rinfo);
if (rc)
return rc;
@@ -2036,10 +2042,7 @@ static int blkif_recover(struct blkfront_info *info)
/* Now safe for us to use the shared ring */
info->connected = BLKIF_STATE_CONNECTED;
- for (r_index = 0; r_index < info->nr_rings; r_index++) {
- struct blkfront_ring_info *rinfo;
-
- rinfo = &info->rinfo[r_index];
+ for_each_rinfo(info, rinfo, r_index) {
/* Kick any other new requests queued since we resumed */
kick_pending_request_queues(rinfo);
}
@@ -2072,13 +2075,13 @@ static int blkfront_resume(struct xenbus_device *dev)
struct blkfront_info *info = dev_get_drvdata(&dev->dev);
int err = 0;
unsigned int i, j;
+ struct blkfront_ring_info *rinfo;
dev_dbg(&dev->dev, "blkfront_resume: %s\n", dev->nodename);
bio_list_init(&info->bio_list);
INIT_LIST_HEAD(&info->requests);
- for (i = 0; i < info->nr_rings; i++) {
- struct blkfront_ring_info *rinfo = &info->rinfo[i];
+ for_each_rinfo(info, rinfo, i) {
struct bio_list merge_bio;
struct blk_shadow *shadow = rinfo->shadow;
@@ -2335,8 +2338,8 @@ static void blkfront_connect(struct blkfront_info *info)
unsigned long sector_size;
unsigned int physical_sector_size;
unsigned int binfo;
- char *envp[] = { "RESIZE=1", NULL };
int err, i;
+ struct blkfront_ring_info *rinfo;
switch (info->connected) {
case BLKIF_STATE_CONNECTED:
@@ -2350,10 +2353,7 @@ static void blkfront_connect(struct blkfront_info *info)
return;
printk(KERN_INFO "Setting capacity to %Lu\n",
sectors);
- set_capacity(info->gd, sectors);
- revalidate_disk(info->gd);
- kobject_uevent_env(&disk_to_dev(info->gd)->kobj,
- KOBJ_CHANGE, envp);
+ set_capacity_revalidate_and_notify(info->gd, sectors, true);
return;
case BLKIF_STATE_SUSPENDED:
@@ -2394,8 +2394,8 @@ static void blkfront_connect(struct blkfront_info *info)
"physical-sector-size",
sector_size);
blkfront_gather_backend_features(info);
- for (i = 0; i < info->nr_rings; i++) {
- err = blkfront_setup_indirect(&info->rinfo[i]);
+ for_each_rinfo(info, rinfo, i) {
+ err = blkfront_setup_indirect(rinfo);
if (err) {
xenbus_dev_fatal(info->xbdev, err, "setup_indirect at %s",
info->xbdev->otherend);
@@ -2416,8 +2416,8 @@ static void blkfront_connect(struct blkfront_info *info)
/* Kick pending requests. */
info->connected = BLKIF_STATE_CONNECTED;
- for (i = 0; i < info->nr_rings; i++)
- kick_pending_request_queues(&info->rinfo[i]);
+ for_each_rinfo(info, rinfo, i)
+ kick_pending_request_queues(rinfo);
device_add_disk(&info->xbdev->dev, info->gd, NULL);
@@ -2652,9 +2652,9 @@ static void purge_persistent_grants(struct blkfront_info *info)
{
unsigned int i;
unsigned long flags;
+ struct blkfront_ring_info *rinfo;
- for (i = 0; i < info->nr_rings; i++) {
- struct blkfront_ring_info *rinfo = &info->rinfo[i];
+ for_each_rinfo(info, rinfo, i) {
struct grant *gnt_list_entry, *tmp;
spin_lock_irqsave(&rinfo->ring_lock, flags);
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index 1bdb579..ebb234f 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -33,6 +33,7 @@
#include <linux/sysfs.h>
#include <linux/debugfs.h>
#include <linux/cpuhotplug.h>
+#include <linux/part_stat.h>
#include "zram_drv.h"
@@ -1894,7 +1895,7 @@ static int zram_add(void)
#ifdef CONFIG_ZRAM_WRITEBACK
spin_lock_init(&zram->wb_limit_lock);
#endif
- queue = blk_alloc_queue(GFP_KERNEL);
+ queue = blk_alloc_queue(zram_make_request, NUMA_NO_NODE);
if (!queue) {
pr_err("Error allocating disk queue for device %d\n",
device_id);
@@ -1902,8 +1903,6 @@ static int zram_add(void)
goto out_free_idr;
}
- blk_queue_make_request(queue, zram_make_request);
-
/* gendisk structure */
zram->disk = alloc_disk(1);
if (!zram->disk) {
diff --git a/drivers/bus/moxtet.c b/drivers/bus/moxtet.c
index 15fa293..b20fdcb 100644
--- a/drivers/bus/moxtet.c
+++ b/drivers/bus/moxtet.c
@@ -465,7 +465,7 @@ static ssize_t input_read(struct file *file, char __user *buf, size_t len,
{
struct moxtet *moxtet = file->private_data;
u8 bin[TURRIS_MOX_MAX_MODULES];
- u8 hex[sizeof(buf) * 2 + 1];
+ u8 hex[sizeof(bin) * 2 + 1];
int ret, n;
ret = moxtet_spi_read(moxtet, bin);
diff --git a/drivers/bus/sunxi-rsb.c b/drivers/bus/sunxi-rsb.c
index be79d6c..1bb00a9 100644
--- a/drivers/bus/sunxi-rsb.c
+++ b/drivers/bus/sunxi-rsb.c
@@ -345,7 +345,7 @@ static int sunxi_rsb_read(struct sunxi_rsb *rsb, u8 rtaddr, u8 addr,
if (ret)
goto unlock;
- *buf = readl(rsb->regs + RSB_DATA);
+ *buf = readl(rsb->regs + RSB_DATA) & GENMASK(len * 8 - 1, 0);
unlock:
mutex_unlock(&rsb->lock);
diff --git a/drivers/bus/ti-sysc.c b/drivers/bus/ti-sysc.c
index f702c85..4400196 100644
--- a/drivers/bus/ti-sysc.c
+++ b/drivers/bus/ti-sysc.c
@@ -1266,6 +1266,8 @@ static const struct sysc_revision_quirk sysc_revision_quirks[] = {
SYSC_QUIRK("gpu", 0x50000000, 0x14, -1, -1, 0x00010201, 0xffffffff, 0),
SYSC_QUIRK("gpu", 0x50000000, 0xfe00, 0xfe10, -1, 0x40000000 , 0xffffffff,
SYSC_MODULE_QUIRK_SGX),
+ SYSC_QUIRK("lcdc", 0, 0, 0x54, -1, 0x4f201000, 0xffffffff,
+ SYSC_QUIRK_SWSUP_SIDLE | SYSC_QUIRK_SWSUP_MSTANDBY),
SYSC_QUIRK("usb_otg_hs", 0, 0x400, 0x404, 0x408, 0x00000050,
0xffffffff, SYSC_QUIRK_SWSUP_SIDLE | SYSC_QUIRK_SWSUP_MSTANDBY),
SYSC_QUIRK("usb_otg_hs", 0, 0, 0x10, -1, 0x4ea2080d, 0xffffffff,
@@ -1294,7 +1296,6 @@ static const struct sysc_revision_quirk sysc_revision_quirks[] = {
SYSC_QUIRK("gpu", 0, 0xfe00, 0xfe10, -1, 0x40000000 , 0xffffffff, 0),
SYSC_QUIRK("hsi", 0, 0, 0x10, 0x14, 0x50043101, 0xffffffff, 0),
SYSC_QUIRK("iss", 0, 0, 0x10, -1, 0x40000101, 0xffffffff, 0),
- SYSC_QUIRK("lcdc", 0, 0, 0x54, -1, 0x4f201000, 0xffffffff, 0),
SYSC_QUIRK("mcasp", 0, 0, 0x4, -1, 0x44306302, 0xffffffff, 0),
SYSC_QUIRK("mcasp", 0, 0, 0x4, -1, 0x44307b02, 0xffffffff, 0),
SYSC_QUIRK("mcbsp", 0, -1, 0x8c, -1, 0, 0, 0),
@@ -1400,7 +1401,7 @@ static void sysc_init_revision_quirks(struct sysc *ddata)
}
/* 1-wire needs module's internal clocks enabled for reset */
-static void sysc_clk_enable_quirk_hdq1w(struct sysc *ddata)
+static void sysc_pre_reset_quirk_hdq1w(struct sysc *ddata)
{
int offset = 0x0c; /* HDQ_CTRL_STATUS */
u16 val;
@@ -1488,7 +1489,7 @@ static void sysc_init_module_quirks(struct sysc *ddata)
return;
if (ddata->cfg.quirks & SYSC_MODULE_QUIRK_HDQ1W) {
- ddata->clk_enable_quirk = sysc_clk_enable_quirk_hdq1w;
+ ddata->clk_disable_quirk = sysc_pre_reset_quirk_hdq1w;
return;
}
diff --git a/drivers/cdrom/gdrom.c b/drivers/cdrom/gdrom.c
index 886b263..c51292c 100644
--- a/drivers/cdrom/gdrom.c
+++ b/drivers/cdrom/gdrom.c
@@ -519,7 +519,7 @@ static const struct block_device_operations gdrom_bdops = {
.check_events = gdrom_bdops_check_events,
.ioctl = gdrom_bdops_ioctl,
#ifdef CONFIG_COMPAT
- .ioctl = blkdev_compat_ptr_ioctl,
+ .compat_ioctl = blkdev_compat_ptr_ioctl,
#endif
};
diff --git a/drivers/char/ipmi/ipmb_dev_int.c b/drivers/char/ipmi/ipmb_dev_int.c
index 1ff4fb1..382b28f 100644
--- a/drivers/char/ipmi/ipmb_dev_int.c
+++ b/drivers/char/ipmi/ipmb_dev_int.c
@@ -19,7 +19,7 @@
#include <linux/spinlock.h>
#include <linux/wait.h>
-#define MAX_MSG_LEN 128
+#define MAX_MSG_LEN 240
#define IPMB_REQUEST_LEN_MIN 7
#define NETFN_RSP_BIT_MASK 0x4
#define REQUEST_QUEUE_MAX_LEN 256
@@ -63,6 +63,7 @@ struct ipmb_dev {
spinlock_t lock;
wait_queue_head_t wait_queue;
struct mutex file_mutex;
+ bool is_i2c_protocol;
};
static inline struct ipmb_dev *to_ipmb_dev(struct file *file)
@@ -112,6 +113,25 @@ static ssize_t ipmb_read(struct file *file, char __user *buf, size_t count,
return ret < 0 ? ret : count;
}
+static int ipmb_i2c_write(struct i2c_client *client, u8 *msg, u8 addr)
+{
+ struct i2c_msg i2c_msg;
+
+ /*
+ * subtract 1 byte (rq_sa) from the length of the msg passed to
+ * raw i2c_transfer
+ */
+ i2c_msg.len = msg[IPMB_MSG_LEN_IDX] - 1;
+
+ /* Assign message to buffer except first 2 bytes (length and address) */
+ i2c_msg.buf = msg + 2;
+
+ i2c_msg.addr = addr;
+ i2c_msg.flags = client->flags & I2C_CLIENT_PEC;
+
+ return i2c_transfer(client->adapter, &i2c_msg, 1);
+}
+
static ssize_t ipmb_write(struct file *file, const char __user *buf,
size_t count, loff_t *ppos)
{
@@ -133,6 +153,12 @@ static ssize_t ipmb_write(struct file *file, const char __user *buf,
rq_sa = GET_7BIT_ADDR(msg[RQ_SA_8BIT_IDX]);
netf_rq_lun = msg[NETFN_LUN_IDX];
+ /* Check i2c block transfer vs smbus */
+ if (ipmb_dev->is_i2c_protocol) {
+ ret = ipmb_i2c_write(ipmb_dev->client, msg, rq_sa);
+ return (ret == 1) ? count : ret;
+ }
+
/*
* subtract rq_sa and netf_rq_lun from the length of the msg passed to
* i2c_smbus_xfer
@@ -253,7 +279,7 @@ static int ipmb_slave_cb(struct i2c_client *client,
break;
case I2C_SLAVE_WRITE_RECEIVED:
- if (ipmb_dev->msg_idx >= sizeof(struct ipmb_msg))
+ if (ipmb_dev->msg_idx >= sizeof(struct ipmb_msg) - 1)
break;
buf[++ipmb_dev->msg_idx] = *val;
@@ -302,6 +328,9 @@ static int ipmb_probe(struct i2c_client *client,
if (ret)
return ret;
+ ipmb_dev->is_i2c_protocol
+ = device_property_read_bool(&client->dev, "i2c-protocol");
+
ipmb_dev->client = client;
i2c_set_clientdata(client, ipmb_dev);
ret = i2c_slave_register(client, ipmb_slave_cb);
diff --git a/drivers/char/ipmi/ipmi_si_platform.c b/drivers/char/ipmi/ipmi_si_platform.c
index c78127c..638c693 100644
--- a/drivers/char/ipmi/ipmi_si_platform.c
+++ b/drivers/char/ipmi/ipmi_si_platform.c
@@ -194,7 +194,7 @@ static int platform_ipmi_probe(struct platform_device *pdev)
else
io.slave_addr = slave_addr;
- io.irq = platform_get_irq(pdev, 0);
+ io.irq = platform_get_irq_optional(pdev, 0);
if (io.irq > 0)
io.irq_setup = ipmi_std_irq_setup;
else
@@ -378,7 +378,7 @@ static int acpi_ipmi_probe(struct platform_device *pdev)
io.irq = tmp;
io.irq_setup = acpi_gpe_irq_setup;
} else {
- int irq = platform_get_irq(pdev, 0);
+ int irq = platform_get_irq_optional(pdev, 0);
if (irq > 0) {
io.irq = irq;
diff --git a/drivers/char/ipmi/ipmi_ssif.c b/drivers/char/ipmi/ipmi_ssif.c
index 22c6a2e..8ac390c 100644
--- a/drivers/char/ipmi/ipmi_ssif.c
+++ b/drivers/char/ipmi/ipmi_ssif.c
@@ -775,10 +775,14 @@ static void msg_done_handler(struct ssif_info *ssif_info, int result,
flags = ipmi_ssif_lock_cond(ssif_info, &oflags);
msg = ssif_info->curr_msg;
if (msg) {
+ if (data) {
+ if (len > IPMI_MAX_MSG_LENGTH)
+ len = IPMI_MAX_MSG_LENGTH;
+ memcpy(msg->rsp, data, len);
+ } else {
+ len = 0;
+ }
msg->rsp_size = len;
- if (msg->rsp_size > IPMI_MAX_MSG_LENGTH)
- msg->rsp_size = IPMI_MAX_MSG_LENGTH;
- memcpy(msg->rsp, data, msg->rsp_size);
ssif_info->curr_msg = NULL;
}
diff --git a/drivers/char/tpm/Makefile b/drivers/char/tpm/Makefile
index 5a0d99d..9567e51 100644
--- a/drivers/char/tpm/Makefile
+++ b/drivers/char/tpm/Makefile
@@ -21,9 +21,11 @@
tpm-$(CONFIG_OF) += eventlog/of.o
obj-$(CONFIG_TCG_TIS_CORE) += tpm_tis_core.o
obj-$(CONFIG_TCG_TIS) += tpm_tis.o
-obj-$(CONFIG_TCG_TIS_SPI) += tpm_tis_spi_mod.o
-tpm_tis_spi_mod-y := tpm_tis_spi.o
-tpm_tis_spi_mod-$(CONFIG_TCG_TIS_SPI_CR50) += tpm_tis_spi_cr50.o
+
+obj-$(CONFIG_TCG_TIS_SPI) += tpm_tis_spi.o
+tpm_tis_spi-y := tpm_tis_spi_main.o
+tpm_tis_spi-$(CONFIG_TCG_TIS_SPI_CR50) += tpm_tis_spi_cr50.o
+
obj-$(CONFIG_TCG_TIS_I2C_ATMEL) += tpm_i2c_atmel.o
obj-$(CONFIG_TCG_TIS_I2C_INFINEON) += tpm_i2c_infineon.o
obj-$(CONFIG_TCG_TIS_I2C_NUVOTON) += tpm_i2c_nuvoton.o
diff --git a/drivers/char/tpm/eventlog/common.c b/drivers/char/tpm/eventlog/common.c
index 7a0fca6..7460f23 100644
--- a/drivers/char/tpm/eventlog/common.c
+++ b/drivers/char/tpm/eventlog/common.c
@@ -99,11 +99,8 @@ static int tpm_read_log(struct tpm_chip *chip)
*
* If an event log is found then the securityfs files are setup to
* export it to userspace, otherwise nothing is done.
- *
- * Returns -ENODEV if the firmware has no event log or securityfs is not
- * supported.
*/
-int tpm_bios_log_setup(struct tpm_chip *chip)
+void tpm_bios_log_setup(struct tpm_chip *chip)
{
const char *name = dev_name(&chip->dev);
unsigned int cnt;
@@ -112,7 +109,7 @@ int tpm_bios_log_setup(struct tpm_chip *chip)
rc = tpm_read_log(chip);
if (rc < 0)
- return rc;
+ return;
log_version = rc;
cnt = 0;
@@ -158,13 +155,12 @@ int tpm_bios_log_setup(struct tpm_chip *chip)
cnt++;
}
- return 0;
+ return;
err:
- rc = PTR_ERR(chip->bios_dir[cnt]);
chip->bios_dir[cnt] = NULL;
tpm_bios_log_teardown(chip);
- return rc;
+ return;
}
void tpm_bios_log_teardown(struct tpm_chip *chip)
diff --git a/drivers/char/tpm/eventlog/of.c b/drivers/char/tpm/eventlog/of.c
index af347c1..a9ce66d 100644
--- a/drivers/char/tpm/eventlog/of.c
+++ b/drivers/char/tpm/eventlog/of.c
@@ -51,7 +51,8 @@ int tpm_read_log_of(struct tpm_chip *chip)
* endian format. For this reason, vtpm doesn't need conversion
* but physical tpm needs the conversion.
*/
- if (of_property_match_string(np, "compatible", "IBM,vtpm") < 0) {
+ if (of_property_match_string(np, "compatible", "IBM,vtpm") < 0 &&
+ of_property_match_string(np, "compatible", "IBM,vtpm20") < 0) {
size = be32_to_cpup((__force __be32 *)sizep);
base = be64_to_cpup((__force __be64 *)basep);
} else {
diff --git a/drivers/char/tpm/eventlog/tpm1.c b/drivers/char/tpm/eventlog/tpm1.c
index 739b1d9..2c96977 100644
--- a/drivers/char/tpm/eventlog/tpm1.c
+++ b/drivers/char/tpm/eventlog/tpm1.c
@@ -115,6 +115,7 @@ static void *tpm1_bios_measurements_next(struct seq_file *m, void *v,
u32 converted_event_size;
u32 converted_event_type;
+ (*pos)++;
converted_event_size = do_endian_conversion(event->event_size);
v += sizeof(struct tcpa_event) + converted_event_size;
@@ -132,7 +133,6 @@ static void *tpm1_bios_measurements_next(struct seq_file *m, void *v,
((v + sizeof(struct tcpa_event) + converted_event_size) > limit))
return NULL;
- (*pos)++;
return v;
}
diff --git a/drivers/char/tpm/eventlog/tpm2.c b/drivers/char/tpm/eventlog/tpm2.c
index b9aeda1..e741b11 100644
--- a/drivers/char/tpm/eventlog/tpm2.c
+++ b/drivers/char/tpm/eventlog/tpm2.c
@@ -94,6 +94,7 @@ static void *tpm2_bios_measurements_next(struct seq_file *m, void *v,
size_t event_size;
void *marker;
+ (*pos)++;
event_header = log->bios_event_log;
if (v == SEQ_START_TOKEN) {
@@ -118,7 +119,6 @@ static void *tpm2_bios_measurements_next(struct seq_file *m, void *v,
if (((v + event_size) >= limit) || (event_size == 0))
return NULL;
- (*pos)++;
return v;
}
diff --git a/drivers/char/tpm/tpm-chip.c b/drivers/char/tpm/tpm-chip.c
index 3d6d394..5807383 100644
--- a/drivers/char/tpm/tpm-chip.c
+++ b/drivers/char/tpm/tpm-chip.c
@@ -596,9 +596,7 @@ int tpm_chip_register(struct tpm_chip *chip)
tpm_sysfs_add_device(chip);
- rc = tpm_bios_log_setup(chip);
- if (rc != 0 && rc != -ENODEV)
- return rc;
+ tpm_bios_log_setup(chip);
tpm_add_ppi(chip);
diff --git a/drivers/char/tpm/tpm.h b/drivers/char/tpm/tpm.h
index 5620747d..0fbcede 100644
--- a/drivers/char/tpm/tpm.h
+++ b/drivers/char/tpm/tpm.h
@@ -226,6 +226,7 @@ int tpm2_auto_startup(struct tpm_chip *chip);
void tpm2_shutdown(struct tpm_chip *chip, u16 shutdown_type);
unsigned long tpm2_calc_ordinal_duration(struct tpm_chip *chip, u32 ordinal);
int tpm2_probe(struct tpm_chip *chip);
+int tpm2_get_cc_attrs_tbl(struct tpm_chip *chip);
int tpm2_find_cc(struct tpm_chip *chip, u32 cc);
int tpm2_init_space(struct tpm_space *space);
void tpm2_del_space(struct tpm_chip *chip, struct tpm_space *space);
@@ -235,7 +236,7 @@ int tpm2_prepare_space(struct tpm_chip *chip, struct tpm_space *space, u8 *cmd,
int tpm2_commit_space(struct tpm_chip *chip, struct tpm_space *space, void *buf,
size_t *bufsiz);
-int tpm_bios_log_setup(struct tpm_chip *chip);
+void tpm_bios_log_setup(struct tpm_chip *chip);
void tpm_bios_log_teardown(struct tpm_chip *chip);
int tpm_dev_common_init(void);
void tpm_dev_common_exit(void);
diff --git a/drivers/char/tpm/tpm2-cmd.c b/drivers/char/tpm/tpm2-cmd.c
index 13696de..76f67b1 100644
--- a/drivers/char/tpm/tpm2-cmd.c
+++ b/drivers/char/tpm/tpm2-cmd.c
@@ -525,6 +525,8 @@ static int tpm2_init_bank_info(struct tpm_chip *chip, u32 bank_index)
return 0;
}
+ bank->crypto_id = HASH_ALGO__LAST;
+
return tpm2_pcr_read(chip, 0, &digest, &bank->digest_size);
}
@@ -613,7 +615,7 @@ ssize_t tpm2_get_pcr_allocation(struct tpm_chip *chip)
return rc;
}
-static int tpm2_get_cc_attrs_tbl(struct tpm_chip *chip)
+int tpm2_get_cc_attrs_tbl(struct tpm_chip *chip)
{
struct tpm_buf buf;
u32 nr_commands;
diff --git a/drivers/char/tpm/tpm_ibmvtpm.c b/drivers/char/tpm/tpm_ibmvtpm.c
index 78cc526..1a49db9 100644
--- a/drivers/char/tpm/tpm_ibmvtpm.c
+++ b/drivers/char/tpm/tpm_ibmvtpm.c
@@ -29,6 +29,7 @@ static const char tpm_ibmvtpm_driver_name[] = "tpm_ibmvtpm";
static const struct vio_device_id tpm_ibmvtpm_device_table[] = {
{ "IBM,vtpm", "IBM,vtpm"},
+ { "IBM,vtpm", "IBM,vtpm20"},
{ "", "" }
};
MODULE_DEVICE_TABLE(vio, tpm_ibmvtpm_device_table);
@@ -571,6 +572,7 @@ static irqreturn_t ibmvtpm_interrupt(int irq, void *vtpm_instance)
*/
while ((crq = ibmvtpm_crq_get_next(ibmvtpm)) != NULL) {
ibmvtpm_crq_process(crq, ibmvtpm);
+ wake_up_interruptible(&ibmvtpm->crq_queue.wq);
crq->valid = 0;
smp_wmb();
}
@@ -618,6 +620,7 @@ static int tpm_ibmvtpm_probe(struct vio_dev *vio_dev,
}
crq_q->num_entry = CRQ_RES_BUF_SIZE / sizeof(*crq_q->crq_addr);
+ init_waitqueue_head(&crq_q->wq);
ibmvtpm->crq_dma_handle = dma_map_single(dev, crq_q->crq_addr,
CRQ_RES_BUF_SIZE,
DMA_BIDIRECTIONAL);
@@ -670,6 +673,20 @@ static int tpm_ibmvtpm_probe(struct vio_dev *vio_dev,
if (rc)
goto init_irq_cleanup;
+ if (!strcmp(id->compat, "IBM,vtpm20")) {
+ chip->flags |= TPM_CHIP_FLAG_TPM2;
+ rc = tpm2_get_cc_attrs_tbl(chip);
+ if (rc)
+ goto init_irq_cleanup;
+ }
+
+ if (!wait_event_timeout(ibmvtpm->crq_queue.wq,
+ ibmvtpm->rtce_buf != NULL,
+ HZ)) {
+ dev_err(dev, "CRQ response timed out\n");
+ goto init_irq_cleanup;
+ }
+
return tpm_chip_register(chip);
init_irq_cleanup:
do {
diff --git a/drivers/char/tpm/tpm_ibmvtpm.h b/drivers/char/tpm/tpm_ibmvtpm.h
index 7983f1a..b92aa7d 100644
--- a/drivers/char/tpm/tpm_ibmvtpm.h
+++ b/drivers/char/tpm/tpm_ibmvtpm.h
@@ -26,6 +26,7 @@ struct ibmvtpm_crq_queue {
struct ibmvtpm_crq *crq_addr;
u32 index;
u32 num_entry;
+ wait_queue_head_t wq;
};
struct ibmvtpm_dev {
diff --git a/drivers/char/tpm/tpm_tis_spi_cr50.c b/drivers/char/tpm/tpm_tis_spi_cr50.c
index 37d72e8..ea759af 100644
--- a/drivers/char/tpm/tpm_tis_spi_cr50.c
+++ b/drivers/char/tpm/tpm_tis_spi_cr50.c
@@ -132,7 +132,12 @@ static void cr50_wake_if_needed(struct cr50_spi_phy *cr50_phy)
if (cr50_needs_waking(cr50_phy)) {
/* Assert CS, wait 1 msec, deassert CS */
- struct spi_transfer spi_cs_wake = { .delay_usecs = 1000 };
+ struct spi_transfer spi_cs_wake = {
+ .delay = {
+ .value = 1000,
+ .unit = SPI_DELAY_UNIT_USECS
+ }
+ };
spi_sync_transfer(phy->spi_device, &spi_cs_wake, 1);
/* Wait for it to fully wake */
diff --git a/drivers/char/tpm/tpm_tis_spi.c b/drivers/char/tpm/tpm_tis_spi_main.c
similarity index 98%
rename from drivers/char/tpm/tpm_tis_spi.c
rename to drivers/char/tpm/tpm_tis_spi_main.c
index d1754fd..d967559 100644
--- a/drivers/char/tpm/tpm_tis_spi.c
+++ b/drivers/char/tpm/tpm_tis_spi_main.c
@@ -110,7 +110,8 @@ int tpm_tis_spi_transfer(struct tpm_tis_data *data, u32 addr, u16 len,
spi_xfer.cs_change = 0;
spi_xfer.len = transfer_len;
- spi_xfer.delay_usecs = 5;
+ spi_xfer.delay.value = 5;
+ spi_xfer.delay.unit = SPI_DELAY_UNIT_USECS;
if (in) {
spi_xfer.tx_buf = NULL;
diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c
index f0f2b59..95adf6c 100644
--- a/drivers/clk/clk.c
+++ b/drivers/clk/clk.c
@@ -4713,7 +4713,7 @@ EXPORT_SYMBOL(of_clk_get_by_name);
*
* Returns: The number of clocks that are possible parents of this node
*/
-unsigned int of_clk_get_parent_count(struct device_node *np)
+unsigned int of_clk_get_parent_count(const struct device_node *np)
{
int count;
@@ -4725,7 +4725,7 @@ unsigned int of_clk_get_parent_count(struct device_node *np)
}
EXPORT_SYMBOL_GPL(of_clk_get_parent_count);
-const char *of_clk_get_parent_name(struct device_node *np, int index)
+const char *of_clk_get_parent_name(const struct device_node *np, int index)
{
struct of_phandle_args clkspec;
struct property *prop;
diff --git a/drivers/clk/imx/clk-imx8mp.c b/drivers/clk/imx/clk-imx8mp.c
index f6c120c..cf19290 100644
--- a/drivers/clk/imx/clk-imx8mp.c
+++ b/drivers/clk/imx/clk-imx8mp.c
@@ -560,7 +560,7 @@ static int imx8mp_clocks_probe(struct platform_device *pdev)
hws[IMX8MP_CLK_MEDIA_AXI] = imx8m_clk_hw_composite("media_axi", imx8mp_media_axi_sels, ccm_base + 0x8a00);
hws[IMX8MP_CLK_MEDIA_APB] = imx8m_clk_hw_composite("media_apb", imx8mp_media_apb_sels, ccm_base + 0x8a80);
hws[IMX8MP_CLK_HDMI_APB] = imx8m_clk_hw_composite("hdmi_apb", imx8mp_media_apb_sels, ccm_base + 0x8b00);
- hws[IMX8MP_CLK_HDMI_AXI] = imx8m_clk_hw_composite("hdmi_axi", imx8mp_media_apb_sels, ccm_base + 0x8b80);
+ hws[IMX8MP_CLK_HDMI_AXI] = imx8m_clk_hw_composite("hdmi_axi", imx8mp_media_axi_sels, ccm_base + 0x8b80);
hws[IMX8MP_CLK_GPU_AXI] = imx8m_clk_hw_composite("gpu_axi", imx8mp_gpu_axi_sels, ccm_base + 0x8c00);
hws[IMX8MP_CLK_GPU_AHB] = imx8m_clk_hw_composite("gpu_ahb", imx8mp_gpu_ahb_sels, ccm_base + 0x8c80);
hws[IMX8MP_CLK_NOC] = imx8m_clk_hw_composite_critical("noc", imx8mp_noc_sels, ccm_base + 0x8d00);
@@ -686,7 +686,7 @@ static int imx8mp_clocks_probe(struct platform_device *pdev)
hws[IMX8MP_CLK_CAN1_ROOT] = imx_clk_hw_gate2("can1_root_clk", "can1", ccm_base + 0x4350, 0);
hws[IMX8MP_CLK_CAN2_ROOT] = imx_clk_hw_gate2("can2_root_clk", "can2", ccm_base + 0x4360, 0);
hws[IMX8MP_CLK_SDMA1_ROOT] = imx_clk_hw_gate4("sdma1_root_clk", "ipg_root", ccm_base + 0x43a0, 0);
- hws[IMX8MP_CLK_ENET_QOS_ROOT] = imx_clk_hw_gate4("enet_qos_root_clk", "enet_axi", ccm_base + 0x43b0, 0);
+ hws[IMX8MP_CLK_ENET_QOS_ROOT] = imx_clk_hw_gate4("enet_qos_root_clk", "sim_enet_root_clk", ccm_base + 0x43b0, 0);
hws[IMX8MP_CLK_SIM_ENET_ROOT] = imx_clk_hw_gate4("sim_enet_root_clk", "enet_axi", ccm_base + 0x4400, 0);
hws[IMX8MP_CLK_GPU2D_ROOT] = imx_clk_hw_gate4("gpu2d_root_clk", "gpu2d_div", ccm_base + 0x4450, 0);
hws[IMX8MP_CLK_GPU3D_ROOT] = imx_clk_hw_gate4("gpu3d_root_clk", "gpu3d_core_div", ccm_base + 0x4460, 0);
diff --git a/drivers/clk/imx/clk-scu.c b/drivers/clk/imx/clk-scu.c
index fbef740..b8b2072 100644
--- a/drivers/clk/imx/clk-scu.c
+++ b/drivers/clk/imx/clk-scu.c
@@ -43,12 +43,12 @@ struct imx_sc_msg_req_set_clock_rate {
__le32 rate;
__le16 resource;
u8 clk;
-} __packed;
+} __packed __aligned(4);
struct req_get_clock_rate {
__le16 resource;
u8 clk;
-} __packed;
+} __packed __aligned(4);
struct resp_get_clock_rate {
__le32 rate;
@@ -84,7 +84,7 @@ struct imx_sc_msg_get_clock_parent {
struct req_get_clock_parent {
__le16 resource;
u8 clk;
- } __packed req;
+ } __packed __aligned(4) req;
struct resp_get_clock_parent {
u8 parent;
} resp;
@@ -121,7 +121,7 @@ struct imx_sc_msg_req_clock_enable {
u8 clk;
u8 enable;
u8 autog;
-} __packed;
+} __packed __aligned(4);
static inline struct clk_scu *to_clk_scu(struct clk_hw *hw)
{
diff --git a/drivers/clk/qcom/dispcc-sc7180.c b/drivers/clk/qcom/dispcc-sc7180.c
index dd7af41..0a5d395 100644
--- a/drivers/clk/qcom/dispcc-sc7180.c
+++ b/drivers/clk/qcom/dispcc-sc7180.c
@@ -592,24 +592,6 @@ static struct clk_branch disp_cc_mdss_rot_clk = {
},
};
-static struct clk_branch disp_cc_mdss_rscc_ahb_clk = {
- .halt_reg = 0x400c,
- .halt_check = BRANCH_HALT,
- .clkr = {
- .enable_reg = 0x400c,
- .enable_mask = BIT(0),
- .hw.init = &(struct clk_init_data){
- .name = "disp_cc_mdss_rscc_ahb_clk",
- .parent_data = &(const struct clk_parent_data){
- .hw = &disp_cc_mdss_ahb_clk_src.clkr.hw,
- },
- .num_parents = 1,
- .flags = CLK_IS_CRITICAL | CLK_SET_RATE_PARENT,
- .ops = &clk_branch2_ops,
- },
- },
-};
-
static struct clk_branch disp_cc_mdss_rscc_vsync_clk = {
.halt_reg = 0x4008,
.halt_check = BRANCH_HALT,
@@ -687,7 +669,6 @@ static struct clk_regmap *disp_cc_sc7180_clocks[] = {
[DISP_CC_MDSS_PCLK0_CLK_SRC] = &disp_cc_mdss_pclk0_clk_src.clkr,
[DISP_CC_MDSS_ROT_CLK] = &disp_cc_mdss_rot_clk.clkr,
[DISP_CC_MDSS_ROT_CLK_SRC] = &disp_cc_mdss_rot_clk_src.clkr,
- [DISP_CC_MDSS_RSCC_AHB_CLK] = &disp_cc_mdss_rscc_ahb_clk.clkr,
[DISP_CC_MDSS_RSCC_VSYNC_CLK] = &disp_cc_mdss_rscc_vsync_clk.clkr,
[DISP_CC_MDSS_VSYNC_CLK] = &disp_cc_mdss_vsync_clk.clkr,
[DISP_CC_MDSS_VSYNC_CLK_SRC] = &disp_cc_mdss_vsync_clk_src.clkr,
diff --git a/drivers/clk/qcom/videocc-sc7180.c b/drivers/clk/qcom/videocc-sc7180.c
index c363c3c..276e5ec 100644
--- a/drivers/clk/qcom/videocc-sc7180.c
+++ b/drivers/clk/qcom/videocc-sc7180.c
@@ -97,7 +97,7 @@ static struct clk_branch video_cc_vcodec0_axi_clk = {
static struct clk_branch video_cc_vcodec0_core_clk = {
.halt_reg = 0x890,
- .halt_check = BRANCH_HALT,
+ .halt_check = BRANCH_HALT_VOTED,
.clkr = {
.enable_reg = 0x890,
.enable_mask = BIT(0),
diff --git a/drivers/clk/ti/clk-43xx.c b/drivers/clk/ti/clk-43xx.c
index af3e780..e5538d57 100644
--- a/drivers/clk/ti/clk-43xx.c
+++ b/drivers/clk/ti/clk-43xx.c
@@ -78,7 +78,7 @@ static const struct omap_clkctrl_reg_data am4_gfx_l3_clkctrl_regs[] __initconst
};
static const struct omap_clkctrl_reg_data am4_l4_rtc_clkctrl_regs[] __initconst = {
- { AM4_L4_RTC_RTC_CLKCTRL, NULL, CLKF_SW_SUP, "clk_32768_ck" },
+ { AM4_L4_RTC_RTC_CLKCTRL, NULL, CLKF_SW_SUP, "clkdiv32k_ick" },
{ 0 },
};
diff --git a/drivers/clocksource/hyperv_timer.c b/drivers/clocksource/hyperv_timer.c
index 9d808d5..eb0ba78 100644
--- a/drivers/clocksource/hyperv_timer.c
+++ b/drivers/clocksource/hyperv_timer.c
@@ -343,7 +343,8 @@ static u64 notrace read_hv_clock_tsc_cs(struct clocksource *arg)
static u64 read_hv_sched_clock_tsc(void)
{
- return read_hv_clock_tsc() - hv_sched_clock_offset;
+ return (read_hv_clock_tsc() - hv_sched_clock_offset) *
+ (NSEC_PER_SEC / HV_CLOCK_HZ);
}
static void suspend_hv_clock_tsc(struct clocksource *arg)
@@ -398,7 +399,8 @@ static u64 notrace read_hv_clock_msr_cs(struct clocksource *arg)
static u64 read_hv_sched_clock_msr(void)
{
- return read_hv_clock_msr() - hv_sched_clock_offset;
+ return (read_hv_clock_msr() - hv_sched_clock_offset) *
+ (NSEC_PER_SEC / HV_CLOCK_HZ);
}
static struct clocksource hyperv_cs_msr = {
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 4adac3a..808874b 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -105,6 +105,8 @@ bool have_governor_per_policy(void)
}
EXPORT_SYMBOL_GPL(have_governor_per_policy);
+static struct kobject *cpufreq_global_kobject;
+
struct kobject *get_governor_parent_kobj(struct cpufreq_policy *policy)
{
if (have_governor_per_policy())
@@ -1074,9 +1076,17 @@ static int cpufreq_init_policy(struct cpufreq_policy *policy)
pol = policy->last_policy;
} else if (def_gov) {
pol = cpufreq_parse_policy(def_gov->name);
- } else {
- return -ENODATA;
+ /*
+ * In case the default governor is neiter "performance"
+ * nor "powersave", fall back to the initial policy
+ * value set by the driver.
+ */
+ if (pol == CPUFREQ_POLICY_UNKNOWN)
+ pol = policy->policy;
}
+ if (pol != CPUFREQ_POLICY_PERFORMANCE &&
+ pol != CPUFREQ_POLICY_POWERSAVE)
+ return -ENODATA;
}
return cpufreq_set_policy(policy, gov, pol);
@@ -2745,9 +2755,6 @@ int cpufreq_unregister_driver(struct cpufreq_driver *driver)
}
EXPORT_SYMBOL_GPL(cpufreq_unregister_driver);
-struct kobject *cpufreq_global_kobject;
-EXPORT_SYMBOL(cpufreq_global_kobject);
-
static int __init cpufreq_core_init(void)
{
if (cpufreq_disabled())
diff --git a/drivers/dax/super.c b/drivers/dax/super.c
index 26a654d..0aa4b6b 100644
--- a/drivers/dax/super.c
+++ b/drivers/dax/super.c
@@ -61,7 +61,7 @@ struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev)
{
if (!blk_queue_dax(bdev->bd_queue))
return NULL;
- return fs_dax_get_by_host(bdev->bd_disk->disk_name);
+ return dax_get_by_host(bdev->bd_disk->disk_name);
}
EXPORT_SYMBOL_GPL(fs_dax_get_by_bdev);
#endif
diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c
index cceee8b..7dcf209 100644
--- a/drivers/devfreq/devfreq.c
+++ b/drivers/devfreq/devfreq.c
@@ -738,7 +738,6 @@ struct devfreq *devfreq_add_device(struct device *dev,
{
struct devfreq *devfreq;
struct devfreq_governor *governor;
- static atomic_t devfreq_no = ATOMIC_INIT(-1);
int err = 0;
if (!dev || !profile || !governor_name) {
@@ -800,8 +799,7 @@ struct devfreq *devfreq_add_device(struct device *dev,
devfreq->suspend_freq = dev_pm_opp_get_suspend_opp_freq(dev);
atomic_set(&devfreq->suspend_count, 0);
- dev_set_name(&devfreq->dev, "devfreq%d",
- atomic_inc_return(&devfreq_no));
+ dev_set_name(&devfreq->dev, "%s", dev_name(dev));
err = device_register(&devfreq->dev);
if (err) {
mutex_unlock(&devfreq->lock);
diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
index d409785..c343c7c 100644
--- a/drivers/dma-buf/dma-buf.c
+++ b/drivers/dma-buf/dma-buf.c
@@ -108,6 +108,7 @@ static int dma_buf_release(struct inode *inode, struct file *file)
dma_resv_fini(dmabuf->resv);
module_put(dmabuf->owner);
+ kfree(dmabuf->name);
kfree(dmabuf);
return 0;
}
diff --git a/drivers/dma/coh901318.c b/drivers/dma/coh901318.c
index e51d836..1092d4c 100644
--- a/drivers/dma/coh901318.c
+++ b/drivers/dma/coh901318.c
@@ -1947,8 +1947,6 @@ static void dma_tc_handle(struct coh901318_chan *cohc)
return;
}
- spin_lock(&cohc->lock);
-
/*
* When we reach this point, at least one queue item
* should have been moved over from cohc->queue to
@@ -1969,8 +1967,6 @@ static void dma_tc_handle(struct coh901318_chan *cohc)
if (coh901318_queue_start(cohc) == NULL)
cohc->busy = 0;
- spin_unlock(&cohc->lock);
-
/*
* This tasklet will remove items from cohc->active
* and thus terminates them.
diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c
index c3b1283..17909fd 100644
--- a/drivers/dma/dmaengine.c
+++ b/drivers/dma/dmaengine.c
@@ -1151,7 +1151,7 @@ int dma_async_device_register(struct dma_device *device)
}
if (!device->device_release)
- dev_warn(device->dev,
+ dev_dbg(device->dev,
"WARN: Device release is not defined so it is not safe to unbind this driver while in use\n");
kref_init(&device->ref);
diff --git a/drivers/dma/idxd/cdev.c b/drivers/dma/idxd/cdev.c
index 1d73478..989b7a2 100644
--- a/drivers/dma/idxd/cdev.c
+++ b/drivers/dma/idxd/cdev.c
@@ -81,9 +81,9 @@ static int idxd_cdev_open(struct inode *inode, struct file *filp)
dev = &idxd->pdev->dev;
idxd_cdev = &wq->idxd_cdev;
- dev_dbg(dev, "%s called\n", __func__);
+ dev_dbg(dev, "%s called: %d\n", __func__, idxd_wq_refcount(wq));
- if (idxd_wq_refcount(wq) > 1 && wq_dedicated(wq))
+ if (idxd_wq_refcount(wq) > 0 && wq_dedicated(wq))
return -EBUSY;
ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
@@ -204,6 +204,7 @@ static int idxd_wq_cdev_dev_setup(struct idxd_wq *wq)
minor = ida_simple_get(&cdev_ctx->minor_ida, 0, MINORMASK, GFP_KERNEL);
if (minor < 0) {
rc = minor;
+ kfree(dev);
goto ida_err;
}
@@ -212,7 +213,6 @@ static int idxd_wq_cdev_dev_setup(struct idxd_wq *wq)
rc = device_register(dev);
if (rc < 0) {
dev_err(&idxd->pdev->dev, "device register failed\n");
- put_device(dev);
goto dev_reg_err;
}
idxd_cdev->minor = minor;
@@ -221,8 +221,8 @@ static int idxd_wq_cdev_dev_setup(struct idxd_wq *wq)
dev_reg_err:
ida_simple_remove(&cdev_ctx->minor_ida, MINOR(dev->devt));
+ put_device(dev);
ida_err:
- kfree(dev);
idxd_cdev->dev = NULL;
return rc;
}
diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c
index 6d907fe..6ca6e52 100644
--- a/drivers/dma/idxd/sysfs.c
+++ b/drivers/dma/idxd/sysfs.c
@@ -124,6 +124,7 @@ static int idxd_config_bus_probe(struct device *dev)
rc = idxd_device_config(idxd);
if (rc < 0) {
spin_unlock_irqrestore(&idxd->dev_lock, flags);
+ module_put(THIS_MODULE);
dev_warn(dev, "Device config failed: %d\n", rc);
return rc;
}
@@ -132,6 +133,7 @@ static int idxd_config_bus_probe(struct device *dev)
rc = idxd_device_enable(idxd);
if (rc < 0) {
spin_unlock_irqrestore(&idxd->dev_lock, flags);
+ module_put(THIS_MODULE);
dev_warn(dev, "Device enable failed: %d\n", rc);
return rc;
}
@@ -142,6 +144,7 @@ static int idxd_config_bus_probe(struct device *dev)
rc = idxd_register_dma_device(idxd);
if (rc < 0) {
spin_unlock_irqrestore(&idxd->dev_lock, flags);
+ module_put(THIS_MODULE);
dev_dbg(dev, "Failed to register dmaengine device\n");
return rc;
}
@@ -516,7 +519,7 @@ static ssize_t group_tokens_reserved_store(struct device *dev,
if (val > idxd->max_tokens)
return -EINVAL;
- if (val > idxd->nr_tokens)
+ if (val > idxd->nr_tokens + group->tokens_reserved)
return -EINVAL;
group->tokens_reserved = val;
@@ -901,6 +904,20 @@ static ssize_t wq_size_show(struct device *dev, struct device_attribute *attr,
return sprintf(buf, "%u\n", wq->size);
}
+static int total_claimed_wq_size(struct idxd_device *idxd)
+{
+ int i;
+ int wq_size = 0;
+
+ for (i = 0; i < idxd->max_wqs; i++) {
+ struct idxd_wq *wq = &idxd->wqs[i];
+
+ wq_size += wq->size;
+ }
+
+ return wq_size;
+}
+
static ssize_t wq_size_store(struct device *dev,
struct device_attribute *attr, const char *buf,
size_t count)
@@ -920,7 +937,7 @@ static ssize_t wq_size_store(struct device *dev,
if (wq->state != IDXD_WQ_DISABLED)
return -EPERM;
- if (size > idxd->max_wq_size)
+ if (size + total_claimed_wq_size(idxd) - wq->size > idxd->max_wq_size)
return -EINVAL;
wq->size = size;
@@ -999,12 +1016,14 @@ static ssize_t wq_type_store(struct device *dev,
return -EPERM;
old_type = wq->type;
- if (sysfs_streq(buf, idxd_wq_type_names[IDXD_WQT_KERNEL]))
+ if (sysfs_streq(buf, idxd_wq_type_names[IDXD_WQT_NONE]))
+ wq->type = IDXD_WQT_NONE;
+ else if (sysfs_streq(buf, idxd_wq_type_names[IDXD_WQT_KERNEL]))
wq->type = IDXD_WQT_KERNEL;
else if (sysfs_streq(buf, idxd_wq_type_names[IDXD_WQT_USER]))
wq->type = IDXD_WQT_USER;
else
- wq->type = IDXD_WQT_NONE;
+ return -EINVAL;
/* If we are changing queue type, clear the name */
if (wq->type != old_type)
diff --git a/drivers/dma/imx-sdma.c b/drivers/dma/imx-sdma.c
index 066b21a..4d4477d 100644
--- a/drivers/dma/imx-sdma.c
+++ b/drivers/dma/imx-sdma.c
@@ -1331,13 +1331,14 @@ static void sdma_free_chan_resources(struct dma_chan *chan)
sdma_channel_synchronize(chan);
- if (sdmac->event_id0)
+ if (sdmac->event_id0 >= 0)
sdma_event_disable(sdmac, sdmac->event_id0);
if (sdmac->event_id1)
sdma_event_disable(sdmac, sdmac->event_id1);
sdmac->event_id0 = 0;
sdmac->event_id1 = 0;
+ sdmac->context_loaded = false;
sdma_set_channel_priority(sdmac, 0);
@@ -1631,7 +1632,7 @@ static int sdma_config(struct dma_chan *chan,
memcpy(&sdmac->slave_config, dmaengine_cfg, sizeof(*dmaengine_cfg));
/* Set ENBLn earlier to make sure dma request triggered after that */
- if (sdmac->event_id0) {
+ if (sdmac->event_id0 >= 0) {
if (sdmac->event_id0 >= sdmac->sdma->drvdata->num_events)
return -EINVAL;
sdma_event_enable(sdmac, sdmac->event_id0);
diff --git a/drivers/dma/tegra20-apb-dma.c b/drivers/dma/tegra20-apb-dma.c
index 3a45079..4a750e2 100644
--- a/drivers/dma/tegra20-apb-dma.c
+++ b/drivers/dma/tegra20-apb-dma.c
@@ -281,7 +281,7 @@ static struct tegra_dma_desc *tegra_dma_desc_get(
/* Do not allocate if desc are waiting for ack */
list_for_each_entry(dma_desc, &tdc->free_dma_desc, node) {
- if (async_tx_test_ack(&dma_desc->txd)) {
+ if (async_tx_test_ack(&dma_desc->txd) && !dma_desc->cb_count) {
list_del(&dma_desc->node);
spin_unlock_irqrestore(&tdc->lock, flags);
dma_desc->txd.flags = 0;
@@ -756,10 +756,6 @@ static int tegra_dma_terminate_all(struct dma_chan *dc)
bool was_busy;
spin_lock_irqsave(&tdc->lock, flags);
- if (list_empty(&tdc->pending_sg_req)) {
- spin_unlock_irqrestore(&tdc->lock, flags);
- return 0;
- }
if (!tdc->busy)
goto skip_dma_stop;
diff --git a/drivers/dma/ti/k3-udma-glue.c b/drivers/dma/ti/k3-udma-glue.c
index c151129..4d7561a 100644
--- a/drivers/dma/ti/k3-udma-glue.c
+++ b/drivers/dma/ti/k3-udma-glue.c
@@ -564,12 +564,12 @@ static int k3_udma_glue_cfg_rx_flow(struct k3_udma_glue_rx_channel *rx_chn,
if (IS_ERR(flow->udma_rflow)) {
ret = PTR_ERR(flow->udma_rflow);
dev_err(dev, "UDMAX rflow get err %d\n", ret);
- goto err;
+ return ret;
}
if (flow->udma_rflow_id != xudma_rflow_get_id(flow->udma_rflow)) {
- xudma_rflow_put(rx_chn->common.udmax, flow->udma_rflow);
- return -ENODEV;
+ ret = -ENODEV;
+ goto err_rflow_put;
}
/* request and cfg rings */
@@ -578,7 +578,7 @@ static int k3_udma_glue_cfg_rx_flow(struct k3_udma_glue_rx_channel *rx_chn,
if (!flow->ringrx) {
ret = -ENODEV;
dev_err(dev, "Failed to get RX ring\n");
- goto err;
+ goto err_rflow_put;
}
flow->ringrxfdq = k3_ringacc_request_ring(rx_chn->common.ringacc,
@@ -586,19 +586,19 @@ static int k3_udma_glue_cfg_rx_flow(struct k3_udma_glue_rx_channel *rx_chn,
if (!flow->ringrxfdq) {
ret = -ENODEV;
dev_err(dev, "Failed to get RXFDQ ring\n");
- goto err;
+ goto err_ringrx_free;
}
ret = k3_ringacc_ring_cfg(flow->ringrx, &flow_cfg->rx_cfg);
if (ret) {
dev_err(dev, "Failed to cfg ringrx %d\n", ret);
- goto err;
+ goto err_ringrxfdq_free;
}
ret = k3_ringacc_ring_cfg(flow->ringrxfdq, &flow_cfg->rxfdq_cfg);
if (ret) {
dev_err(dev, "Failed to cfg ringrxfdq %d\n", ret);
- goto err;
+ goto err_ringrxfdq_free;
}
if (rx_chn->remote) {
@@ -648,7 +648,7 @@ static int k3_udma_glue_cfg_rx_flow(struct k3_udma_glue_rx_channel *rx_chn,
if (ret) {
dev_err(dev, "flow%d config failed: %d\n", flow->udma_rflow_id,
ret);
- goto err;
+ goto err_ringrxfdq_free;
}
rx_chn->flows_ready++;
@@ -656,8 +656,17 @@ static int k3_udma_glue_cfg_rx_flow(struct k3_udma_glue_rx_channel *rx_chn,
flow->udma_rflow_id, rx_chn->flows_ready);
return 0;
-err:
- k3_udma_glue_release_rx_flow(rx_chn, flow_idx);
+
+err_ringrxfdq_free:
+ k3_ringacc_ring_free(flow->ringrxfdq);
+
+err_ringrx_free:
+ k3_ringacc_ring_free(flow->ringrx);
+
+err_rflow_put:
+ xudma_rflow_put(rx_chn->common.udmax, flow->udma_rflow);
+ flow->udma_rflow = NULL;
+
return ret;
}
diff --git a/drivers/dma/ti/k3-udma.c b/drivers/dma/ti/k3-udma.c
index ea79c2d..0536866 100644
--- a/drivers/dma/ti/k3-udma.c
+++ b/drivers/dma/ti/k3-udma.c
@@ -5,6 +5,7 @@
*/
#include <linux/kernel.h>
+#include <linux/delay.h>
#include <linux/dmaengine.h>
#include <linux/dma-mapping.h>
#include <linux/dmapool.h>
@@ -96,6 +97,24 @@ struct udma_match_data {
u32 level_start_idx[];
};
+struct udma_hwdesc {
+ size_t cppi5_desc_size;
+ void *cppi5_desc_vaddr;
+ dma_addr_t cppi5_desc_paddr;
+
+ /* TR descriptor internal pointers */
+ void *tr_req_base;
+ struct cppi5_tr_resp_t *tr_resp_base;
+};
+
+struct udma_rx_flush {
+ struct udma_hwdesc hwdescs[2];
+
+ size_t buffer_size;
+ void *buffer_vaddr;
+ dma_addr_t buffer_paddr;
+};
+
struct udma_dev {
struct dma_device ddev;
struct device *dev;
@@ -112,6 +131,8 @@ struct udma_dev {
struct list_head desc_to_purge;
spinlock_t lock;
+ struct udma_rx_flush rx_flush;
+
int tchan_cnt;
int echan_cnt;
int rchan_cnt;
@@ -130,16 +151,6 @@ struct udma_dev {
u32 psil_base;
};
-struct udma_hwdesc {
- size_t cppi5_desc_size;
- void *cppi5_desc_vaddr;
- dma_addr_t cppi5_desc_paddr;
-
- /* TR descriptor internal pointers */
- void *tr_req_base;
- struct cppi5_tr_resp_t *tr_resp_base;
-};
-
struct udma_desc {
struct virt_dma_desc vd;
@@ -169,7 +180,7 @@ enum udma_chan_state {
struct udma_tx_drain {
struct delayed_work work;
- unsigned long jiffie;
+ ktime_t tstamp;
u32 residue;
};
@@ -502,7 +513,7 @@ static bool udma_is_chan_paused(struct udma_chan *uc)
{
u32 val, pause_mask;
- switch (uc->desc->dir) {
+ switch (uc->config.dir) {
case DMA_DEV_TO_MEM:
val = udma_rchanrt_read(uc->rchan,
UDMA_RCHAN_RT_PEER_RT_EN_REG);
@@ -551,12 +562,17 @@ static void udma_sync_for_device(struct udma_chan *uc, int idx)
}
}
+static inline dma_addr_t udma_get_rx_flush_hwdesc_paddr(struct udma_chan *uc)
+{
+ return uc->ud->rx_flush.hwdescs[uc->config.pkt_mode].cppi5_desc_paddr;
+}
+
static int udma_push_to_ring(struct udma_chan *uc, int idx)
{
struct udma_desc *d = uc->desc;
-
struct k3_ring *ring = NULL;
- int ret = -EINVAL;
+ dma_addr_t paddr;
+ int ret;
switch (uc->config.dir) {
case DMA_DEV_TO_MEM:
@@ -567,21 +583,37 @@ static int udma_push_to_ring(struct udma_chan *uc, int idx)
ring = uc->tchan->t_ring;
break;
default:
- break;
+ return -EINVAL;
}
- if (ring) {
- dma_addr_t desc_addr = udma_curr_cppi5_desc_paddr(d, idx);
+ /* RX flush packet: idx == -1 is only passed in case of DEV_TO_MEM */
+ if (idx == -1) {
+ paddr = udma_get_rx_flush_hwdesc_paddr(uc);
+ } else {
+ paddr = udma_curr_cppi5_desc_paddr(d, idx);
wmb(); /* Ensure that writes are not moved over this point */
udma_sync_for_device(uc, idx);
- ret = k3_ringacc_ring_push(ring, &desc_addr);
- uc->in_ring_cnt++;
}
+ ret = k3_ringacc_ring_push(ring, &paddr);
+ if (!ret)
+ uc->in_ring_cnt++;
+
return ret;
}
+static bool udma_desc_is_rx_flush(struct udma_chan *uc, dma_addr_t addr)
+{
+ if (uc->config.dir != DMA_DEV_TO_MEM)
+ return false;
+
+ if (addr == udma_get_rx_flush_hwdesc_paddr(uc))
+ return true;
+
+ return false;
+}
+
static int udma_pop_from_ring(struct udma_chan *uc, dma_addr_t *addr)
{
struct k3_ring *ring = NULL;
@@ -610,6 +642,10 @@ static int udma_pop_from_ring(struct udma_chan *uc, dma_addr_t *addr)
if (cppi5_desc_is_tdcm(*addr))
return ret;
+ /* Check for flush descriptor */
+ if (udma_desc_is_rx_flush(uc, *addr))
+ return -ENOENT;
+
d = udma_udma_desc_from_paddr(uc, *addr);
if (d)
@@ -890,6 +926,9 @@ static int udma_stop(struct udma_chan *uc)
switch (uc->config.dir) {
case DMA_DEV_TO_MEM:
+ if (!uc->cyclic && !uc->desc)
+ udma_push_to_ring(uc, -1);
+
udma_rchanrt_write(uc->rchan, UDMA_RCHAN_RT_PEER_RT_EN_REG,
UDMA_PEER_RT_EN_ENABLE |
UDMA_PEER_RT_EN_TEARDOWN);
@@ -946,9 +985,10 @@ static bool udma_is_desc_really_done(struct udma_chan *uc, struct udma_desc *d)
peer_bcnt = udma_tchanrt_read(uc->tchan, UDMA_TCHAN_RT_PEER_BCNT_REG);
bcnt = udma_tchanrt_read(uc->tchan, UDMA_TCHAN_RT_BCNT_REG);
+ /* Transfer is incomplete, store current residue and time stamp */
if (peer_bcnt < bcnt) {
uc->tx_drain.residue = bcnt - peer_bcnt;
- uc->tx_drain.jiffie = jiffies;
+ uc->tx_drain.tstamp = ktime_get();
return false;
}
@@ -961,35 +1001,59 @@ static void udma_check_tx_completion(struct work_struct *work)
tx_drain.work.work);
bool desc_done = true;
u32 residue_diff;
- unsigned long jiffie_diff, delay;
+ ktime_t time_diff;
+ unsigned long delay;
- if (uc->desc) {
- residue_diff = uc->tx_drain.residue;
- jiffie_diff = uc->tx_drain.jiffie;
- desc_done = udma_is_desc_really_done(uc, uc->desc);
- }
-
- if (!desc_done) {
- jiffie_diff = uc->tx_drain.jiffie - jiffie_diff;
- residue_diff -= uc->tx_drain.residue;
- if (residue_diff) {
- /* Try to guess when we should check next time */
- residue_diff /= jiffie_diff;
- delay = uc->tx_drain.residue / residue_diff / 3;
- if (jiffies_to_msecs(delay) < 5)
- delay = 0;
- } else {
- /* No progress, check again in 1 second */
- delay = HZ;
+ while (1) {
+ if (uc->desc) {
+ /* Get previous residue and time stamp */
+ residue_diff = uc->tx_drain.residue;
+ time_diff = uc->tx_drain.tstamp;
+ /*
+ * Get current residue and time stamp or see if
+ * transfer is complete
+ */
+ desc_done = udma_is_desc_really_done(uc, uc->desc);
}
- schedule_delayed_work(&uc->tx_drain.work, delay);
- } else if (uc->desc) {
- struct udma_desc *d = uc->desc;
+ if (!desc_done) {
+ /*
+ * Find the time delta and residue delta w.r.t
+ * previous poll
+ */
+ time_diff = ktime_sub(uc->tx_drain.tstamp,
+ time_diff) + 1;
+ residue_diff -= uc->tx_drain.residue;
+ if (residue_diff) {
+ /*
+ * Try to guess when we should check
+ * next time by calculating rate at
+ * which data is being drained at the
+ * peer device
+ */
+ delay = (time_diff / residue_diff) *
+ uc->tx_drain.residue;
+ } else {
+ /* No progress, check again in 1 second */
+ schedule_delayed_work(&uc->tx_drain.work, HZ);
+ break;
+ }
- uc->bcnt += d->residue;
- udma_start(uc);
- vchan_cookie_complete(&d->vd);
+ usleep_range(ktime_to_us(delay),
+ ktime_to_us(delay) + 10);
+ continue;
+ }
+
+ if (uc->desc) {
+ struct udma_desc *d = uc->desc;
+
+ uc->bcnt += d->residue;
+ udma_start(uc);
+ vchan_cookie_complete(&d->vd);
+ break;
+ }
+
+ break;
}
}
@@ -1033,29 +1097,27 @@ static irqreturn_t udma_ring_irq_handler(int irq, void *data)
goto out;
}
- if (uc->cyclic) {
- /* push the descriptor back to the ring */
- if (d == uc->desc) {
+ if (d == uc->desc) {
+ /* active descriptor */
+ if (uc->cyclic) {
udma_cyclic_packet_elapsed(uc);
vchan_cyclic_callback(&d->vd);
- }
- } else {
- bool desc_done = false;
-
- if (d == uc->desc) {
- desc_done = udma_is_desc_really_done(uc, d);
-
- if (desc_done) {
+ } else {
+ if (udma_is_desc_really_done(uc, d)) {
uc->bcnt += d->residue;
udma_start(uc);
+ vchan_cookie_complete(&d->vd);
} else {
schedule_delayed_work(&uc->tx_drain.work,
0);
}
}
-
- if (desc_done)
- vchan_cookie_complete(&d->vd);
+ } else {
+ /*
+ * terminated descriptor, mark the descriptor as
+ * completed to update the channel's cookie marker
+ */
+ dma_cookie_complete(&d->vd.tx);
}
}
out:
@@ -1965,36 +2027,81 @@ static struct udma_desc *udma_alloc_tr_desc(struct udma_chan *uc,
return d;
}
+/**
+ * udma_get_tr_counters - calculate TR counters for a given length
+ * @len: Length of the trasnfer
+ * @align_to: Preferred alignment
+ * @tr0_cnt0: First TR icnt0
+ * @tr0_cnt1: First TR icnt1
+ * @tr1_cnt0: Second (if used) TR icnt0
+ *
+ * For len < SZ_64K only one TR is enough, tr1_cnt0 is not updated
+ * For len >= SZ_64K two TRs are used in a simple way:
+ * First TR: SZ_64K-alignment blocks (tr0_cnt0, tr0_cnt1)
+ * Second TR: the remaining length (tr1_cnt0)
+ *
+ * Returns the number of TRs the length needs (1 or 2)
+ * -EINVAL if the length can not be supported
+ */
+static int udma_get_tr_counters(size_t len, unsigned long align_to,
+ u16 *tr0_cnt0, u16 *tr0_cnt1, u16 *tr1_cnt0)
+{
+ if (len < SZ_64K) {
+ *tr0_cnt0 = len;
+ *tr0_cnt1 = 1;
+
+ return 1;
+ }
+
+ if (align_to > 3)
+ align_to = 3;
+
+realign:
+ *tr0_cnt0 = SZ_64K - BIT(align_to);
+ if (len / *tr0_cnt0 >= SZ_64K) {
+ if (align_to) {
+ align_to--;
+ goto realign;
+ }
+ return -EINVAL;
+ }
+
+ *tr0_cnt1 = len / *tr0_cnt0;
+ *tr1_cnt0 = len % *tr0_cnt0;
+
+ return 2;
+}
+
static struct udma_desc *
udma_prep_slave_sg_tr(struct udma_chan *uc, struct scatterlist *sgl,
unsigned int sglen, enum dma_transfer_direction dir,
unsigned long tx_flags, void *context)
{
- enum dma_slave_buswidth dev_width;
struct scatterlist *sgent;
struct udma_desc *d;
- size_t tr_size;
struct cppi5_tr_type1_t *tr_req = NULL;
+ u16 tr0_cnt0, tr0_cnt1, tr1_cnt0;
unsigned int i;
- u32 burst;
+ size_t tr_size;
+ int num_tr = 0;
+ int tr_idx = 0;
- if (dir == DMA_DEV_TO_MEM) {
- dev_width = uc->cfg.src_addr_width;
- burst = uc->cfg.src_maxburst;
- } else if (dir == DMA_MEM_TO_DEV) {
- dev_width = uc->cfg.dst_addr_width;
- burst = uc->cfg.dst_maxburst;
- } else {
- dev_err(uc->ud->dev, "%s: bad direction?\n", __func__);
+ if (!is_slave_direction(dir)) {
+ dev_err(uc->ud->dev, "Only slave cyclic is supported\n");
return NULL;
}
- if (!burst)
- burst = 1;
+ /* estimate the number of TRs we will need */
+ for_each_sg(sgl, sgent, sglen, i) {
+ if (sg_dma_len(sgent) < SZ_64K)
+ num_tr++;
+ else
+ num_tr += 2;
+ }
/* Now allocate and setup the descriptor. */
tr_size = sizeof(struct cppi5_tr_type1_t);
- d = udma_alloc_tr_desc(uc, tr_size, sglen, dir);
+ d = udma_alloc_tr_desc(uc, tr_size, num_tr, dir);
if (!d)
return NULL;
@@ -2002,19 +2109,46 @@ udma_prep_slave_sg_tr(struct udma_chan *uc, struct scatterlist *sgl,
tr_req = d->hwdesc[0].tr_req_base;
for_each_sg(sgl, sgent, sglen, i) {
- d->residue += sg_dma_len(sgent);
+ dma_addr_t sg_addr = sg_dma_address(sgent);
+
+ num_tr = udma_get_tr_counters(sg_dma_len(sgent), __ffs(sg_addr),
+ &tr0_cnt0, &tr0_cnt1, &tr1_cnt0);
+ if (num_tr < 0) {
+ dev_err(uc->ud->dev, "size %u is not supported\n",
+ sg_dma_len(sgent));
+ udma_free_hwdesc(uc, d);
+ kfree(d);
+ return NULL;
+ }
cppi5_tr_init(&tr_req[i].flags, CPPI5_TR_TYPE1, false, false,
CPPI5_TR_EVENT_SIZE_COMPLETION, 0);
cppi5_tr_csf_set(&tr_req[i].flags, CPPI5_TR_CSF_SUPR_EVT);
- tr_req[i].addr = sg_dma_address(sgent);
- tr_req[i].icnt0 = burst * dev_width;
- tr_req[i].dim1 = burst * dev_width;
- tr_req[i].icnt1 = sg_dma_len(sgent) / tr_req[i].icnt0;
+ tr_req[tr_idx].addr = sg_addr;
+ tr_req[tr_idx].icnt0 = tr0_cnt0;
+ tr_req[tr_idx].icnt1 = tr0_cnt1;
+ tr_req[tr_idx].dim1 = tr0_cnt0;
+ tr_idx++;
+
+ if (num_tr == 2) {
+ cppi5_tr_init(&tr_req[tr_idx].flags, CPPI5_TR_TYPE1,
+ false, false,
+ CPPI5_TR_EVENT_SIZE_COMPLETION, 0);
+ cppi5_tr_csf_set(&tr_req[tr_idx].flags,
+ CPPI5_TR_CSF_SUPR_EVT);
+
+ tr_req[tr_idx].addr = sg_addr + tr0_cnt1 * tr0_cnt0;
+ tr_req[tr_idx].icnt0 = tr1_cnt0;
+ tr_req[tr_idx].icnt1 = 1;
+ tr_req[tr_idx].dim1 = tr1_cnt0;
+ tr_idx++;
+ }
+
+ d->residue += sg_dma_len(sgent);
}
- cppi5_tr_csf_set(&tr_req[i - 1].flags, CPPI5_TR_CSF_EOP);
+ cppi5_tr_csf_set(&tr_req[tr_idx - 1].flags, CPPI5_TR_CSF_EOP);
return d;
}
@@ -2319,47 +2453,66 @@ udma_prep_dma_cyclic_tr(struct udma_chan *uc, dma_addr_t buf_addr,
size_t buf_len, size_t period_len,
enum dma_transfer_direction dir, unsigned long flags)
{
- enum dma_slave_buswidth dev_width;
struct udma_desc *d;
- size_t tr_size;
+ size_t tr_size, period_addr;
struct cppi5_tr_type1_t *tr_req;
- unsigned int i;
unsigned int periods = buf_len / period_len;
- u32 burst;
+ u16 tr0_cnt0, tr0_cnt1, tr1_cnt0;
+ unsigned int i;
+ int num_tr;
- if (dir == DMA_DEV_TO_MEM) {
- dev_width = uc->cfg.src_addr_width;
- burst = uc->cfg.src_maxburst;
- } else if (dir == DMA_MEM_TO_DEV) {
- dev_width = uc->cfg.dst_addr_width;
- burst = uc->cfg.dst_maxburst;
- } else {
- dev_err(uc->ud->dev, "%s: bad direction?\n", __func__);
+ if (!is_slave_direction(dir)) {
+ dev_err(uc->ud->dev, "Only slave cyclic is supported\n");
return NULL;
}
- if (!burst)
- burst = 1;
+ num_tr = udma_get_tr_counters(period_len, __ffs(buf_addr), &tr0_cnt0,
+ &tr0_cnt1, &tr1_cnt0);
+ if (num_tr < 0) {
+ dev_err(uc->ud->dev, "size %zu is not supported\n",
+ period_len);
+ return NULL;
+ }
/* Now allocate and setup the descriptor. */
tr_size = sizeof(struct cppi5_tr_type1_t);
- d = udma_alloc_tr_desc(uc, tr_size, periods, dir);
+ d = udma_alloc_tr_desc(uc, tr_size, periods * num_tr, dir);
if (!d)
return NULL;
tr_req = d->hwdesc[0].tr_req_base;
+ period_addr = buf_addr;
for (i = 0; i < periods; i++) {
- cppi5_tr_init(&tr_req[i].flags, CPPI5_TR_TYPE1, false, false,
- CPPI5_TR_EVENT_SIZE_COMPLETION, 0);
+ int tr_idx = i * num_tr;
- tr_req[i].addr = buf_addr + period_len * i;
- tr_req[i].icnt0 = dev_width;
- tr_req[i].icnt1 = period_len / dev_width;
- tr_req[i].dim1 = dev_width;
+ cppi5_tr_init(&tr_req[tr_idx].flags, CPPI5_TR_TYPE1, false,
+ false, CPPI5_TR_EVENT_SIZE_COMPLETION, 0);
+
+ tr_req[tr_idx].addr = period_addr;
+ tr_req[tr_idx].icnt0 = tr0_cnt0;
+ tr_req[tr_idx].icnt1 = tr0_cnt1;
+ tr_req[tr_idx].dim1 = tr0_cnt0;
+
+ if (num_tr == 2) {
+ cppi5_tr_csf_set(&tr_req[tr_idx].flags,
+ CPPI5_TR_CSF_SUPR_EVT);
+ tr_idx++;
+
+ cppi5_tr_init(&tr_req[tr_idx].flags, CPPI5_TR_TYPE1,
+ false, false,
+ CPPI5_TR_EVENT_SIZE_COMPLETION, 0);
+
+ tr_req[tr_idx].addr = period_addr + tr0_cnt1 * tr0_cnt0;
+ tr_req[tr_idx].icnt0 = tr1_cnt0;
+ tr_req[tr_idx].icnt1 = 1;
+ tr_req[tr_idx].dim1 = tr1_cnt0;
+ }
if (!(flags & DMA_PREP_INTERRUPT))
- cppi5_tr_csf_set(&tr_req[i].flags,
+ cppi5_tr_csf_set(&tr_req[tr_idx].flags,
CPPI5_TR_CSF_SUPR_EVT);
+
+ period_addr += period_len;
}
return d;
@@ -2517,29 +2670,12 @@ udma_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
return NULL;
}
- if (len < SZ_64K) {
- num_tr = 1;
- tr0_cnt0 = len;
- tr0_cnt1 = 1;
- } else {
- unsigned long align_to = __ffs(src | dest);
-
- if (align_to > 3)
- align_to = 3;
- /*
- * Keep simple: tr0: SZ_64K-alignment blocks,
- * tr1: the remaining
- */
- num_tr = 2;
- tr0_cnt0 = (SZ_64K - BIT(align_to));
- if (len / tr0_cnt0 >= SZ_64K) {
- dev_err(uc->ud->dev, "size %zu is not supported\n",
- len);
- return NULL;
- }
-
- tr0_cnt1 = len / tr0_cnt0;
- tr1_cnt0 = len % tr0_cnt0;
+ num_tr = udma_get_tr_counters(len, __ffs(src | dest), &tr0_cnt0,
+ &tr0_cnt1, &tr1_cnt0);
+ if (num_tr < 0) {
+ dev_err(uc->ud->dev, "size %zu is not supported\n",
+ len);
+ return NULL;
}
d = udma_alloc_tr_desc(uc, tr_size, num_tr, DMA_MEM_TO_MEM);
@@ -2631,6 +2767,9 @@ static enum dma_status udma_tx_status(struct dma_chan *chan,
ret = dma_cookie_status(chan, cookie, txstate);
+ if (!udma_is_chan_running(uc))
+ ret = DMA_COMPLETE;
+
if (ret == DMA_IN_PROGRESS && udma_is_chan_paused(uc))
ret = DMA_PAUSED;
@@ -2697,11 +2836,8 @@ static int udma_pause(struct dma_chan *chan)
{
struct udma_chan *uc = to_udma_chan(chan);
- if (!uc->desc)
- return -EINVAL;
-
/* pause the channel */
- switch (uc->desc->dir) {
+ switch (uc->config.dir) {
case DMA_DEV_TO_MEM:
udma_rchanrt_update_bits(uc->rchan,
UDMA_RCHAN_RT_PEER_RT_EN_REG,
@@ -2730,11 +2866,8 @@ static int udma_resume(struct dma_chan *chan)
{
struct udma_chan *uc = to_udma_chan(chan);
- if (!uc->desc)
- return -EINVAL;
-
/* resume the channel */
- switch (uc->desc->dir) {
+ switch (uc->config.dir) {
case DMA_DEV_TO_MEM:
udma_rchanrt_update_bits(uc->rchan,
UDMA_RCHAN_RT_PEER_RT_EN_REG,
@@ -3248,6 +3381,98 @@ static int udma_setup_resources(struct udma_dev *ud)
return ch_count;
}
+static int udma_setup_rx_flush(struct udma_dev *ud)
+{
+ struct udma_rx_flush *rx_flush = &ud->rx_flush;
+ struct cppi5_desc_hdr_t *tr_desc;
+ struct cppi5_tr_type1_t *tr_req;
+ struct cppi5_host_desc_t *desc;
+ struct device *dev = ud->dev;
+ struct udma_hwdesc *hwdesc;
+ size_t tr_size;
+
+ /* Allocate 1K buffer for discarded data on RX channel teardown */
+ rx_flush->buffer_size = SZ_1K;
+ rx_flush->buffer_vaddr = devm_kzalloc(dev, rx_flush->buffer_size,
+ GFP_KERNEL);
+ if (!rx_flush->buffer_vaddr)
+ return -ENOMEM;
+
+ rx_flush->buffer_paddr = dma_map_single(dev, rx_flush->buffer_vaddr,
+ rx_flush->buffer_size,
+ DMA_TO_DEVICE);
+ if (dma_mapping_error(dev, rx_flush->buffer_paddr))
+ return -ENOMEM;
+
+ /* Set up descriptor to be used for TR mode */
+ hwdesc = &rx_flush->hwdescs[0];
+ tr_size = sizeof(struct cppi5_tr_type1_t);
+ hwdesc->cppi5_desc_size = cppi5_trdesc_calc_size(tr_size, 1);
+ hwdesc->cppi5_desc_size = ALIGN(hwdesc->cppi5_desc_size,
+ ud->desc_align);
+
+ hwdesc->cppi5_desc_vaddr = devm_kzalloc(dev, hwdesc->cppi5_desc_size,
+ GFP_KERNEL);
+ if (!hwdesc->cppi5_desc_vaddr)
+ return -ENOMEM;
+
+ hwdesc->cppi5_desc_paddr = dma_map_single(dev, hwdesc->cppi5_desc_vaddr,
+ hwdesc->cppi5_desc_size,
+ DMA_TO_DEVICE);
+ if (dma_mapping_error(dev, hwdesc->cppi5_desc_paddr))
+ return -ENOMEM;
+
+ /* Start of the TR req records */
+ hwdesc->tr_req_base = hwdesc->cppi5_desc_vaddr + tr_size;
+ /* Start address of the TR response array */
+ hwdesc->tr_resp_base = hwdesc->tr_req_base + tr_size;
+
+ tr_desc = hwdesc->cppi5_desc_vaddr;
+ cppi5_trdesc_init(tr_desc, 1, tr_size, 0, 0);
+ cppi5_desc_set_pktids(tr_desc, 0, CPPI5_INFO1_DESC_FLOWID_DEFAULT);
+ cppi5_desc_set_retpolicy(tr_desc, 0, 0);
+
+ tr_req = hwdesc->tr_req_base;
+ cppi5_tr_init(&tr_req->flags, CPPI5_TR_TYPE1, false, false,
+ CPPI5_TR_EVENT_SIZE_COMPLETION, 0);
+ cppi5_tr_csf_set(&tr_req->flags, CPPI5_TR_CSF_SUPR_EVT);
+
+ tr_req->addr = rx_flush->buffer_paddr;
+ tr_req->icnt0 = rx_flush->buffer_size;
+ tr_req->icnt1 = 1;
+
+ /* Set up descriptor to be used for packet mode */
+ hwdesc = &rx_flush->hwdescs[1];
+ hwdesc->cppi5_desc_size = ALIGN(sizeof(struct cppi5_host_desc_t) +
+ CPPI5_INFO0_HDESC_EPIB_SIZE +
+ CPPI5_INFO0_HDESC_PSDATA_MAX_SIZE,
+ ud->desc_align);
+
+ hwdesc->cppi5_desc_vaddr = devm_kzalloc(dev, hwdesc->cppi5_desc_size,
+ GFP_KERNEL);
+ if (!hwdesc->cppi5_desc_vaddr)
+ return -ENOMEM;
+
+ hwdesc->cppi5_desc_paddr = dma_map_single(dev, hwdesc->cppi5_desc_vaddr,
+ hwdesc->cppi5_desc_size,
+ DMA_TO_DEVICE);
+ if (dma_mapping_error(dev, hwdesc->cppi5_desc_paddr))
+ return -ENOMEM;
+
+ desc = hwdesc->cppi5_desc_vaddr;
+ cppi5_hdesc_init(desc, 0, 0);
+ cppi5_desc_set_pktids(&desc->hdr, 0, CPPI5_INFO1_DESC_FLOWID_DEFAULT);
+ cppi5_desc_set_retpolicy(&desc->hdr, 0, 0);
+
+ cppi5_hdesc_attach_buf(desc,
+ rx_flush->buffer_paddr, rx_flush->buffer_size,
+ rx_flush->buffer_paddr, rx_flush->buffer_size);
+
+ dma_sync_single_for_device(dev, hwdesc->cppi5_desc_paddr,
+ hwdesc->cppi5_desc_size, DMA_TO_DEVICE);
+ return 0;
+}
+
#define TI_UDMAC_BUSWIDTHS (BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) | \
BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | \
BIT(DMA_SLAVE_BUSWIDTH_3_BYTES) | \
@@ -3361,6 +3586,10 @@ static int udma_probe(struct platform_device *pdev)
if (ud->desc_align < dma_get_cache_alignment())
ud->desc_align = dma_get_cache_alignment();
+ ret = udma_setup_rx_flush(ud);
+ if (ret)
+ return ret;
+
for (i = 0; i < ud->tchan_cnt; i++) {
struct udma_tchan *tchan = &ud->tchans[i];
diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig
index b3c99bb..fe2eb89 100644
--- a/drivers/edac/Kconfig
+++ b/drivers/edac/Kconfig
@@ -523,4 +523,11 @@
Support for error detection and correction on the
Mellanox BlueField SoCs.
+config EDAC_DMC520
+ tristate "ARM DMC-520 ECC"
+ depends on ARM64
+ help
+ Support for error detection and correction on the
+ SoCs with ARM DMC-520 DRAM controller.
+
endif # EDAC
diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile
index d77200c..269e151 100644
--- a/drivers/edac/Makefile
+++ b/drivers/edac/Makefile
@@ -87,3 +87,4 @@
obj-$(CONFIG_EDAC_QCOM) += qcom_edac.o
obj-$(CONFIG_EDAC_ASPEED) += aspeed_edac.o
obj-$(CONFIG_EDAC_BLUEFIELD) += bluefield_edac.o
+obj-$(CONFIG_EDAC_DMC520) += dmc520_edac.o
diff --git a/drivers/edac/armada_xp_edac.c b/drivers/edac/armada_xp_edac.c
index 7f227bd..a7502eb 100644
--- a/drivers/edac/armada_xp_edac.c
+++ b/drivers/edac/armada_xp_edac.c
@@ -429,26 +429,26 @@ static void aurora_l2_check(struct edac_device_ctl_info *dci)
src = (attr_cap & AURORA_ERR_ATTR_SRC_MSK) >> AURORA_ERR_ATTR_SRC_OFF;
if (src <= 3)
- len += snprintf(msg+len, size-len, "src=CPU%d ", src);
+ len += scnprintf(msg+len, size-len, "src=CPU%d ", src);
else
- len += snprintf(msg+len, size-len, "src=IO ");
+ len += scnprintf(msg+len, size-len, "src=IO ");
txn = (attr_cap & AURORA_ERR_ATTR_TXN_MSK) >> AURORA_ERR_ATTR_TXN_OFF;
switch (txn) {
case 0:
- len += snprintf(msg+len, size-len, "txn=Data-Read ");
+ len += scnprintf(msg+len, size-len, "txn=Data-Read ");
break;
case 1:
- len += snprintf(msg+len, size-len, "txn=Isn-Read ");
+ len += scnprintf(msg+len, size-len, "txn=Isn-Read ");
break;
case 2:
- len += snprintf(msg+len, size-len, "txn=Clean-Flush ");
+ len += scnprintf(msg+len, size-len, "txn=Clean-Flush ");
break;
case 3:
- len += snprintf(msg+len, size-len, "txn=Eviction ");
+ len += scnprintf(msg+len, size-len, "txn=Eviction ");
break;
case 4:
- len += snprintf(msg+len, size-len,
+ len += scnprintf(msg+len, size-len,
"txn=Read-Modify-Write ");
break;
}
@@ -456,19 +456,19 @@ static void aurora_l2_check(struct edac_device_ctl_info *dci)
err = (attr_cap & AURORA_ERR_ATTR_ERR_MSK) >> AURORA_ERR_ATTR_ERR_OFF;
switch (err) {
case 0:
- len += snprintf(msg+len, size-len, "err=CorrECC ");
+ len += scnprintf(msg+len, size-len, "err=CorrECC ");
break;
case 1:
- len += snprintf(msg+len, size-len, "err=UnCorrECC ");
+ len += scnprintf(msg+len, size-len, "err=UnCorrECC ");
break;
case 2:
- len += snprintf(msg+len, size-len, "err=TagParity ");
+ len += scnprintf(msg+len, size-len, "err=TagParity ");
break;
}
- len += snprintf(msg+len, size-len, "addr=0x%x ", addr_cap & AURORA_ERR_ADDR_CAP_ADDR_MASK);
- len += snprintf(msg+len, size-len, "index=0x%x ", (way_cap & AURORA_ERR_WAY_IDX_MSK) >> AURORA_ERR_WAY_IDX_OFF);
- len += snprintf(msg+len, size-len, "way=0x%x", (way_cap & AURORA_ERR_WAY_CAP_WAY_MASK) >> AURORA_ERR_WAY_CAP_WAY_OFFSET);
+ len += scnprintf(msg+len, size-len, "addr=0x%x ", addr_cap & AURORA_ERR_ADDR_CAP_ADDR_MASK);
+ len += scnprintf(msg+len, size-len, "index=0x%x ", (way_cap & AURORA_ERR_WAY_IDX_MSK) >> AURORA_ERR_WAY_IDX_OFF);
+ len += scnprintf(msg+len, size-len, "way=0x%x", (way_cap & AURORA_ERR_WAY_CAP_WAY_MASK) >> AURORA_ERR_WAY_CAP_WAY_OFFSET);
/* clear error capture registers */
writel(AURORA_ERR_ATTR_CAP_VALID, drvdata->base + AURORA_ERR_ATTR_CAP_REG);
diff --git a/drivers/edac/dmc520_edac.c b/drivers/edac/dmc520_edac.c
new file mode 100644
index 0000000..fc1153a
--- /dev/null
+++ b/drivers/edac/dmc520_edac.c
@@ -0,0 +1,656 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * EDAC driver for DMC-520 memory controller.
+ *
+ * The driver supports 10 interrupt lines,
+ * though only dram_ecc_errc and dram_ecc_errd are currently handled.
+ *
+ * Authors: Rui Zhao <ruizhao@microsoft.com>
+ * Lei Wang <lewan@microsoft.com>
+ * Shiping Ji <shji@microsoft.com>
+ */
+
+#include <linux/bitfield.h>
+#include <linux/edac.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include "edac_mc.h"
+
+/* DMC-520 registers */
+#define REG_OFFSET_FEATURE_CONFIG 0x130
+#define REG_OFFSET_ECC_ERRC_COUNT_31_00 0x158
+#define REG_OFFSET_ECC_ERRC_COUNT_63_32 0x15C
+#define REG_OFFSET_ECC_ERRD_COUNT_31_00 0x160
+#define REG_OFFSET_ECC_ERRD_COUNT_63_32 0x164
+#define REG_OFFSET_INTERRUPT_CONTROL 0x500
+#define REG_OFFSET_INTERRUPT_CLR 0x508
+#define REG_OFFSET_INTERRUPT_STATUS 0x510
+#define REG_OFFSET_DRAM_ECC_ERRC_INT_INFO_31_00 0x528
+#define REG_OFFSET_DRAM_ECC_ERRC_INT_INFO_63_32 0x52C
+#define REG_OFFSET_DRAM_ECC_ERRD_INT_INFO_31_00 0x530
+#define REG_OFFSET_DRAM_ECC_ERRD_INT_INFO_63_32 0x534
+#define REG_OFFSET_ADDRESS_CONTROL_NOW 0x1010
+#define REG_OFFSET_MEMORY_TYPE_NOW 0x1128
+#define REG_OFFSET_SCRUB_CONTROL0_NOW 0x1170
+#define REG_OFFSET_FORMAT_CONTROL 0x18
+
+/* DMC-520 types, masks and bitfields */
+#define RAM_ECC_INT_CE_BIT BIT(0)
+#define RAM_ECC_INT_UE_BIT BIT(1)
+#define DRAM_ECC_INT_CE_BIT BIT(2)
+#define DRAM_ECC_INT_UE_BIT BIT(3)
+#define FAILED_ACCESS_INT_BIT BIT(4)
+#define FAILED_PROG_INT_BIT BIT(5)
+#define LINK_ERR_INT_BIT BIT(6)
+#define TEMPERATURE_EVENT_INT_BIT BIT(7)
+#define ARCH_FSM_INT_BIT BIT(8)
+#define PHY_REQUEST_INT_BIT BIT(9)
+#define MEMORY_WIDTH_MASK GENMASK(1, 0)
+#define SCRUB_TRIGGER0_NEXT_MASK GENMASK(1, 0)
+#define REG_FIELD_DRAM_ECC_ENABLED GENMASK(1, 0)
+#define REG_FIELD_MEMORY_TYPE GENMASK(2, 0)
+#define REG_FIELD_DEVICE_WIDTH GENMASK(9, 8)
+#define REG_FIELD_ADDRESS_CONTROL_COL GENMASK(2, 0)
+#define REG_FIELD_ADDRESS_CONTROL_ROW GENMASK(10, 8)
+#define REG_FIELD_ADDRESS_CONTROL_BANK GENMASK(18, 16)
+#define REG_FIELD_ADDRESS_CONTROL_RANK GENMASK(25, 24)
+#define REG_FIELD_ERR_INFO_LOW_VALID BIT(0)
+#define REG_FIELD_ERR_INFO_LOW_COL GENMASK(10, 1)
+#define REG_FIELD_ERR_INFO_LOW_ROW GENMASK(28, 11)
+#define REG_FIELD_ERR_INFO_LOW_RANK GENMASK(31, 29)
+#define REG_FIELD_ERR_INFO_HIGH_BANK GENMASK(3, 0)
+#define REG_FIELD_ERR_INFO_HIGH_VALID BIT(31)
+
+#define DRAM_ADDRESS_CONTROL_MIN_COL_BITS 8
+#define DRAM_ADDRESS_CONTROL_MIN_ROW_BITS 11
+
+#define DMC520_SCRUB_TRIGGER_ERR_DETECT 2
+#define DMC520_SCRUB_TRIGGER_IDLE 3
+
+/* Driver settings */
+/*
+ * The max-length message would be: "rank:7 bank:15 row:262143 col:1023".
+ * Max length is 34. Using a 40-size buffer is enough.
+ */
+#define DMC520_MSG_BUF_SIZE 40
+#define EDAC_MOD_NAME "dmc520-edac"
+#define EDAC_CTL_NAME "dmc520"
+
+/* the data bus width for the attached memory chips. */
+enum dmc520_mem_width {
+ MEM_WIDTH_X32 = 2,
+ MEM_WIDTH_X64 = 3
+};
+
+/* memory type */
+enum dmc520_mem_type {
+ MEM_TYPE_DDR3 = 1,
+ MEM_TYPE_DDR4 = 2
+};
+
+/* memory device width */
+enum dmc520_dev_width {
+ DEV_WIDTH_X4 = 0,
+ DEV_WIDTH_X8 = 1,
+ DEV_WIDTH_X16 = 2
+};
+
+struct ecc_error_info {
+ u32 col;
+ u32 row;
+ u32 bank;
+ u32 rank;
+};
+
+/* The interrupt config */
+struct dmc520_irq_config {
+ char *name;
+ int mask;
+};
+
+/* The interrupt mappings */
+static struct dmc520_irq_config dmc520_irq_configs[] = {
+ {
+ .name = "ram_ecc_errc",
+ .mask = RAM_ECC_INT_CE_BIT
+ },
+ {
+ .name = "ram_ecc_errd",
+ .mask = RAM_ECC_INT_UE_BIT
+ },
+ {
+ .name = "dram_ecc_errc",
+ .mask = DRAM_ECC_INT_CE_BIT
+ },
+ {
+ .name = "dram_ecc_errd",
+ .mask = DRAM_ECC_INT_UE_BIT
+ },
+ {
+ .name = "failed_access",
+ .mask = FAILED_ACCESS_INT_BIT
+ },
+ {
+ .name = "failed_prog",
+ .mask = FAILED_PROG_INT_BIT
+ },
+ {
+ .name = "link_err",
+ .mask = LINK_ERR_INT_BIT
+ },
+ {
+ .name = "temperature_event",
+ .mask = TEMPERATURE_EVENT_INT_BIT
+ },
+ {
+ .name = "arch_fsm",
+ .mask = ARCH_FSM_INT_BIT
+ },
+ {
+ .name = "phy_request",
+ .mask = PHY_REQUEST_INT_BIT
+ }
+};
+
+#define NUMBER_OF_IRQS ARRAY_SIZE(dmc520_irq_configs)
+
+/*
+ * The EDAC driver private data.
+ * error_lock is to protect concurrent writes to the mci->error_desc through
+ * edac_mc_handle_error().
+ */
+struct dmc520_edac {
+ void __iomem *reg_base;
+ spinlock_t error_lock;
+ u32 mem_width_in_bytes;
+ int irqs[NUMBER_OF_IRQS];
+ int masks[NUMBER_OF_IRQS];
+};
+
+static int dmc520_mc_idx;
+
+static u32 dmc520_read_reg(struct dmc520_edac *pvt, u32 offset)
+{
+ return readl(pvt->reg_base + offset);
+}
+
+static void dmc520_write_reg(struct dmc520_edac *pvt, u32 val, u32 offset)
+{
+ writel(val, pvt->reg_base + offset);
+}
+
+static u32 dmc520_calc_dram_ecc_error(u32 value)
+{
+ u32 total = 0;
+
+ /* Each rank's error counter takes one byte. */
+ while (value > 0) {
+ total += (value & 0xFF);
+ value >>= 8;
+ }
+ return total;
+}
+
+static u32 dmc520_get_dram_ecc_error_count(struct dmc520_edac *pvt,
+ bool is_ce)
+{
+ u32 reg_offset_low, reg_offset_high;
+ u32 err_low, err_high;
+ u32 err_count;
+
+ reg_offset_low = is_ce ? REG_OFFSET_ECC_ERRC_COUNT_31_00 :
+ REG_OFFSET_ECC_ERRD_COUNT_31_00;
+ reg_offset_high = is_ce ? REG_OFFSET_ECC_ERRC_COUNT_63_32 :
+ REG_OFFSET_ECC_ERRD_COUNT_63_32;
+
+ err_low = dmc520_read_reg(pvt, reg_offset_low);
+ err_high = dmc520_read_reg(pvt, reg_offset_high);
+ /* Reset error counters */
+ dmc520_write_reg(pvt, 0, reg_offset_low);
+ dmc520_write_reg(pvt, 0, reg_offset_high);
+
+ err_count = dmc520_calc_dram_ecc_error(err_low) +
+ dmc520_calc_dram_ecc_error(err_high);
+
+ return err_count;
+}
+
+static void dmc520_get_dram_ecc_error_info(struct dmc520_edac *pvt,
+ bool is_ce,
+ struct ecc_error_info *info)
+{
+ u32 reg_offset_low, reg_offset_high;
+ u32 reg_val_low, reg_val_high;
+ bool valid;
+
+ reg_offset_low = is_ce ? REG_OFFSET_DRAM_ECC_ERRC_INT_INFO_31_00 :
+ REG_OFFSET_DRAM_ECC_ERRD_INT_INFO_31_00;
+ reg_offset_high = is_ce ? REG_OFFSET_DRAM_ECC_ERRC_INT_INFO_63_32 :
+ REG_OFFSET_DRAM_ECC_ERRD_INT_INFO_63_32;
+
+ reg_val_low = dmc520_read_reg(pvt, reg_offset_low);
+ reg_val_high = dmc520_read_reg(pvt, reg_offset_high);
+
+ valid = (FIELD_GET(REG_FIELD_ERR_INFO_LOW_VALID, reg_val_low) != 0) &&
+ (FIELD_GET(REG_FIELD_ERR_INFO_HIGH_VALID, reg_val_high) != 0);
+
+ if (valid) {
+ info->col = FIELD_GET(REG_FIELD_ERR_INFO_LOW_COL, reg_val_low);
+ info->row = FIELD_GET(REG_FIELD_ERR_INFO_LOW_ROW, reg_val_low);
+ info->rank = FIELD_GET(REG_FIELD_ERR_INFO_LOW_RANK, reg_val_low);
+ info->bank = FIELD_GET(REG_FIELD_ERR_INFO_HIGH_BANK, reg_val_high);
+ } else {
+ memset(info, 0, sizeof(*info));
+ }
+}
+
+static bool dmc520_is_ecc_enabled(void __iomem *reg_base)
+{
+ u32 reg_val = readl(reg_base + REG_OFFSET_FEATURE_CONFIG);
+
+ return FIELD_GET(REG_FIELD_DRAM_ECC_ENABLED, reg_val);
+}
+
+static enum scrub_type dmc520_get_scrub_type(struct dmc520_edac *pvt)
+{
+ enum scrub_type type = SCRUB_NONE;
+ u32 reg_val, scrub_cfg;
+
+ reg_val = dmc520_read_reg(pvt, REG_OFFSET_SCRUB_CONTROL0_NOW);
+ scrub_cfg = FIELD_GET(SCRUB_TRIGGER0_NEXT_MASK, reg_val);
+
+ if (scrub_cfg == DMC520_SCRUB_TRIGGER_ERR_DETECT ||
+ scrub_cfg == DMC520_SCRUB_TRIGGER_IDLE)
+ type = SCRUB_HW_PROG;
+
+ return type;
+}
+
+/* Get the memory data bus width, in number of bytes. */
+static u32 dmc520_get_memory_width(struct dmc520_edac *pvt)
+{
+ enum dmc520_mem_width mem_width_field;
+ u32 mem_width_in_bytes = 0;
+ u32 reg_val;
+
+ reg_val = dmc520_read_reg(pvt, REG_OFFSET_FORMAT_CONTROL);
+ mem_width_field = FIELD_GET(MEMORY_WIDTH_MASK, reg_val);
+
+ if (mem_width_field == MEM_WIDTH_X32)
+ mem_width_in_bytes = 4;
+ else if (mem_width_field == MEM_WIDTH_X64)
+ mem_width_in_bytes = 8;
+ return mem_width_in_bytes;
+}
+
+static enum mem_type dmc520_get_mtype(struct dmc520_edac *pvt)
+{
+ enum mem_type mt = MEM_UNKNOWN;
+ enum dmc520_mem_type type;
+ u32 reg_val;
+
+ reg_val = dmc520_read_reg(pvt, REG_OFFSET_MEMORY_TYPE_NOW);
+ type = FIELD_GET(REG_FIELD_MEMORY_TYPE, reg_val);
+
+ switch (type) {
+ case MEM_TYPE_DDR3:
+ mt = MEM_DDR3;
+ break;
+
+ case MEM_TYPE_DDR4:
+ mt = MEM_DDR4;
+ break;
+ }
+
+ return mt;
+}
+
+static enum dev_type dmc520_get_dtype(struct dmc520_edac *pvt)
+{
+ enum dmc520_dev_width device_width;
+ enum dev_type dt = DEV_UNKNOWN;
+ u32 reg_val;
+
+ reg_val = dmc520_read_reg(pvt, REG_OFFSET_MEMORY_TYPE_NOW);
+ device_width = FIELD_GET(REG_FIELD_DEVICE_WIDTH, reg_val);
+
+ switch (device_width) {
+ case DEV_WIDTH_X4:
+ dt = DEV_X4;
+ break;
+
+ case DEV_WIDTH_X8:
+ dt = DEV_X8;
+ break;
+
+ case DEV_WIDTH_X16:
+ dt = DEV_X16;
+ break;
+ }
+
+ return dt;
+}
+
+static u32 dmc520_get_rank_count(void __iomem *reg_base)
+{
+ u32 reg_val, rank_bits;
+
+ reg_val = readl(reg_base + REG_OFFSET_ADDRESS_CONTROL_NOW);
+ rank_bits = FIELD_GET(REG_FIELD_ADDRESS_CONTROL_RANK, reg_val);
+
+ return BIT(rank_bits);
+}
+
+static u64 dmc520_get_rank_size(struct dmc520_edac *pvt)
+{
+ u32 reg_val, col_bits, row_bits, bank_bits;
+
+ reg_val = dmc520_read_reg(pvt, REG_OFFSET_ADDRESS_CONTROL_NOW);
+
+ col_bits = FIELD_GET(REG_FIELD_ADDRESS_CONTROL_COL, reg_val) +
+ DRAM_ADDRESS_CONTROL_MIN_COL_BITS;
+ row_bits = FIELD_GET(REG_FIELD_ADDRESS_CONTROL_ROW, reg_val) +
+ DRAM_ADDRESS_CONTROL_MIN_ROW_BITS;
+ bank_bits = FIELD_GET(REG_FIELD_ADDRESS_CONTROL_BANK, reg_val);
+
+ return (u64)pvt->mem_width_in_bytes << (col_bits + row_bits + bank_bits);
+}
+
+static void dmc520_handle_dram_ecc_errors(struct mem_ctl_info *mci,
+ bool is_ce)
+{
+ struct dmc520_edac *pvt = mci->pvt_info;
+ char message[DMC520_MSG_BUF_SIZE];
+ struct ecc_error_info info;
+ u32 cnt;
+
+ dmc520_get_dram_ecc_error_info(pvt, is_ce, &info);
+
+ cnt = dmc520_get_dram_ecc_error_count(pvt, is_ce);
+ if (!cnt)
+ return;
+
+ snprintf(message, ARRAY_SIZE(message),
+ "rank:%d bank:%d row:%d col:%d",
+ info.rank, info.bank,
+ info.row, info.col);
+
+ spin_lock(&pvt->error_lock);
+ edac_mc_handle_error((is_ce ? HW_EVENT_ERR_CORRECTED :
+ HW_EVENT_ERR_UNCORRECTED),
+ mci, cnt, 0, 0, 0, info.rank, -1, -1,
+ message, "");
+ spin_unlock(&pvt->error_lock);
+}
+
+static irqreturn_t dmc520_edac_dram_ecc_isr(int irq, struct mem_ctl_info *mci,
+ bool is_ce)
+{
+ struct dmc520_edac *pvt = mci->pvt_info;
+ u32 i_mask;
+
+ i_mask = is_ce ? DRAM_ECC_INT_CE_BIT : DRAM_ECC_INT_UE_BIT;
+
+ dmc520_handle_dram_ecc_errors(mci, is_ce);
+
+ dmc520_write_reg(pvt, i_mask, REG_OFFSET_INTERRUPT_CLR);
+
+ return IRQ_HANDLED;
+}
+
+static irqreturn_t dmc520_edac_dram_all_isr(int irq, struct mem_ctl_info *mci,
+ u32 irq_mask)
+{
+ struct dmc520_edac *pvt = mci->pvt_info;
+ irqreturn_t irq_ret = IRQ_NONE;
+ u32 status;
+
+ status = dmc520_read_reg(pvt, REG_OFFSET_INTERRUPT_STATUS);
+
+ if ((irq_mask & DRAM_ECC_INT_CE_BIT) &&
+ (status & DRAM_ECC_INT_CE_BIT))
+ irq_ret = dmc520_edac_dram_ecc_isr(irq, mci, true);
+
+ if ((irq_mask & DRAM_ECC_INT_UE_BIT) &&
+ (status & DRAM_ECC_INT_UE_BIT))
+ irq_ret = dmc520_edac_dram_ecc_isr(irq, mci, false);
+
+ return irq_ret;
+}
+
+static irqreturn_t dmc520_isr(int irq, void *data)
+{
+ struct mem_ctl_info *mci = data;
+ struct dmc520_edac *pvt = mci->pvt_info;
+ u32 mask = 0;
+ int idx;
+
+ for (idx = 0; idx < NUMBER_OF_IRQS; idx++) {
+ if (pvt->irqs[idx] == irq) {
+ mask = pvt->masks[idx];
+ break;
+ }
+ }
+ return dmc520_edac_dram_all_isr(irq, mci, mask);
+}
+
+static void dmc520_init_csrow(struct mem_ctl_info *mci)
+{
+ struct dmc520_edac *pvt = mci->pvt_info;
+ struct csrow_info *csi;
+ struct dimm_info *dimm;
+ u32 pages_per_rank;
+ enum dev_type dt;
+ enum mem_type mt;
+ int row, ch;
+ u64 rs;
+
+ dt = dmc520_get_dtype(pvt);
+ mt = dmc520_get_mtype(pvt);
+ rs = dmc520_get_rank_size(pvt);
+ pages_per_rank = rs >> PAGE_SHIFT;
+
+ for (row = 0; row < mci->nr_csrows; row++) {
+ csi = mci->csrows[row];
+
+ for (ch = 0; ch < csi->nr_channels; ch++) {
+ dimm = csi->channels[ch]->dimm;
+ dimm->grain = pvt->mem_width_in_bytes;
+ dimm->dtype = dt;
+ dimm->mtype = mt;
+ dimm->edac_mode = EDAC_FLAG_SECDED;
+ dimm->nr_pages = pages_per_rank / csi->nr_channels;
+ }
+ }
+}
+
+static int dmc520_edac_probe(struct platform_device *pdev)
+{
+ bool registered[NUMBER_OF_IRQS] = { false };
+ int irqs[NUMBER_OF_IRQS] = { -ENXIO };
+ int masks[NUMBER_OF_IRQS] = { 0 };
+ struct edac_mc_layer layers[1];
+ struct dmc520_edac *pvt = NULL;
+ struct mem_ctl_info *mci;
+ void __iomem *reg_base;
+ u32 irq_mask_all = 0;
+ struct resource *res;
+ struct device *dev;
+ int ret, idx, irq;
+ u32 reg_val;
+
+ /* Parse the device node */
+ dev = &pdev->dev;
+
+ for (idx = 0; idx < NUMBER_OF_IRQS; idx++) {
+ irq = platform_get_irq_byname(pdev, dmc520_irq_configs[idx].name);
+ irqs[idx] = irq;
+ masks[idx] = dmc520_irq_configs[idx].mask;
+ if (irq >= 0) {
+ irq_mask_all |= dmc520_irq_configs[idx].mask;
+ edac_dbg(0, "Discovered %s, irq: %d.\n", dmc520_irq_configs[idx].name, irq);
+ }
+ }
+
+ if (!irq_mask_all) {
+ edac_printk(KERN_ERR, EDAC_MOD_NAME,
+ "At least one valid interrupt line is expected.\n");
+ return -EINVAL;
+ }
+
+ /* Initialize dmc520 edac */
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ reg_base = devm_ioremap_resource(dev, res);
+ if (IS_ERR(reg_base))
+ return PTR_ERR(reg_base);
+
+ if (!dmc520_is_ecc_enabled(reg_base))
+ return -ENXIO;
+
+ layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
+ layers[0].size = dmc520_get_rank_count(reg_base);
+ layers[0].is_virt_csrow = true;
+
+ mci = edac_mc_alloc(dmc520_mc_idx++, ARRAY_SIZE(layers), layers, sizeof(*pvt));
+ if (!mci) {
+ edac_printk(KERN_ERR, EDAC_MOD_NAME,
+ "Failed to allocate memory for mc instance\n");
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ pvt = mci->pvt_info;
+
+ pvt->reg_base = reg_base;
+ spin_lock_init(&pvt->error_lock);
+ memcpy(pvt->irqs, irqs, sizeof(irqs));
+ memcpy(pvt->masks, masks, sizeof(masks));
+
+ platform_set_drvdata(pdev, mci);
+
+ mci->pdev = dev;
+ mci->mtype_cap = MEM_FLAG_DDR3 | MEM_FLAG_DDR4;
+ mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_SECDED;
+ mci->edac_cap = EDAC_FLAG_SECDED;
+ mci->scrub_cap = SCRUB_FLAG_HW_SRC;
+ mci->scrub_mode = dmc520_get_scrub_type(pvt);
+ mci->ctl_name = EDAC_CTL_NAME;
+ mci->dev_name = dev_name(mci->pdev);
+ mci->mod_name = EDAC_MOD_NAME;
+
+ edac_op_state = EDAC_OPSTATE_INT;
+
+ pvt->mem_width_in_bytes = dmc520_get_memory_width(pvt);
+
+ dmc520_init_csrow(mci);
+
+ /* Clear interrupts, not affecting other unrelated interrupts */
+ reg_val = dmc520_read_reg(pvt, REG_OFFSET_INTERRUPT_CONTROL);
+ dmc520_write_reg(pvt, reg_val & (~irq_mask_all),
+ REG_OFFSET_INTERRUPT_CONTROL);
+ dmc520_write_reg(pvt, irq_mask_all, REG_OFFSET_INTERRUPT_CLR);
+
+ for (idx = 0; idx < NUMBER_OF_IRQS; idx++) {
+ irq = irqs[idx];
+ if (irq >= 0) {
+ ret = devm_request_irq(&pdev->dev, irq,
+ dmc520_isr, IRQF_SHARED,
+ dev_name(&pdev->dev), mci);
+ if (ret < 0) {
+ edac_printk(KERN_ERR, EDAC_MC,
+ "Failed to request irq %d\n", irq);
+ goto err;
+ }
+ registered[idx] = true;
+ }
+ }
+
+ /* Reset DRAM CE/UE counters */
+ if (irq_mask_all & DRAM_ECC_INT_CE_BIT)
+ dmc520_get_dram_ecc_error_count(pvt, true);
+
+ if (irq_mask_all & DRAM_ECC_INT_UE_BIT)
+ dmc520_get_dram_ecc_error_count(pvt, false);
+
+ ret = edac_mc_add_mc(mci);
+ if (ret) {
+ edac_printk(KERN_ERR, EDAC_MOD_NAME,
+ "Failed to register with EDAC core\n");
+ goto err;
+ }
+
+ /* Enable interrupts, not affecting other unrelated interrupts */
+ dmc520_write_reg(pvt, reg_val | irq_mask_all,
+ REG_OFFSET_INTERRUPT_CONTROL);
+
+ return 0;
+
+err:
+ for (idx = 0; idx < NUMBER_OF_IRQS; idx++) {
+ if (registered[idx])
+ devm_free_irq(&pdev->dev, pvt->irqs[idx], mci);
+ }
+ if (mci)
+ edac_mc_free(mci);
+
+ return ret;
+}
+
+static int dmc520_edac_remove(struct platform_device *pdev)
+{
+ u32 reg_val, idx, irq_mask_all = 0;
+ struct mem_ctl_info *mci;
+ struct dmc520_edac *pvt;
+
+ mci = platform_get_drvdata(pdev);
+ pvt = mci->pvt_info;
+
+ /* Disable interrupts */
+ reg_val = dmc520_read_reg(pvt, REG_OFFSET_INTERRUPT_CONTROL);
+ dmc520_write_reg(pvt, reg_val & (~irq_mask_all),
+ REG_OFFSET_INTERRUPT_CONTROL);
+
+ /* free irq's */
+ for (idx = 0; idx < NUMBER_OF_IRQS; idx++) {
+ if (pvt->irqs[idx] >= 0) {
+ irq_mask_all |= pvt->masks[idx];
+ devm_free_irq(&pdev->dev, pvt->irqs[idx], mci);
+ }
+ }
+
+ edac_mc_del_mc(&pdev->dev);
+ edac_mc_free(mci);
+
+ return 0;
+}
+
+static const struct of_device_id dmc520_edac_driver_id[] = {
+ { .compatible = "arm,dmc-520", },
+ { /* end of table */ }
+};
+
+MODULE_DEVICE_TABLE(of, dmc520_edac_driver_id);
+
+static struct platform_driver dmc520_edac_driver = {
+ .driver = {
+ .name = "dmc520",
+ .of_match_table = dmc520_edac_driver_id,
+ },
+
+ .probe = dmc520_edac_probe,
+ .remove = dmc520_edac_remove
+};
+
+module_platform_driver(dmc520_edac_driver);
+
+MODULE_AUTHOR("Rui Zhao <ruizhao@microsoft.com>");
+MODULE_AUTHOR("Lei Wang <lewan@microsoft.com>");
+MODULE_AUTHOR("Shiping Ji <shji@microsoft.com>");
+MODULE_DESCRIPTION("DMC-520 ECC driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c
index 7243b88..75ede27 100644
--- a/drivers/edac/edac_mc.c
+++ b/drivers/edac/edac_mc.c
@@ -55,6 +55,11 @@ static LIST_HEAD(mc_devices);
*/
static const char *edac_mc_owner;
+static struct mem_ctl_info *error_desc_to_mci(struct edac_raw_error_desc *e)
+{
+ return container_of(e, struct mem_ctl_info, error_desc);
+}
+
int edac_get_report_status(void)
{
return edac_report;
@@ -278,6 +283,12 @@ void *edac_align_ptr(void **p, unsigned int size, int n_elems)
static void _edac_mc_free(struct mem_ctl_info *mci)
{
+ put_device(&mci->dev);
+}
+
+static void mci_release(struct device *dev)
+{
+ struct mem_ctl_info *mci = container_of(dev, struct mem_ctl_info, dev);
struct csrow_info *csr;
int i, chn, row;
@@ -305,6 +316,134 @@ static void _edac_mc_free(struct mem_ctl_info *mci)
kfree(mci);
}
+static int edac_mc_alloc_csrows(struct mem_ctl_info *mci)
+{
+ unsigned int tot_channels = mci->num_cschannel;
+ unsigned int tot_csrows = mci->nr_csrows;
+ unsigned int row, chn;
+
+ /*
+ * Alocate and fill the csrow/channels structs
+ */
+ mci->csrows = kcalloc(tot_csrows, sizeof(*mci->csrows), GFP_KERNEL);
+ if (!mci->csrows)
+ return -ENOMEM;
+
+ for (row = 0; row < tot_csrows; row++) {
+ struct csrow_info *csr;
+
+ csr = kzalloc(sizeof(**mci->csrows), GFP_KERNEL);
+ if (!csr)
+ return -ENOMEM;
+
+ mci->csrows[row] = csr;
+ csr->csrow_idx = row;
+ csr->mci = mci;
+ csr->nr_channels = tot_channels;
+ csr->channels = kcalloc(tot_channels, sizeof(*csr->channels),
+ GFP_KERNEL);
+ if (!csr->channels)
+ return -ENOMEM;
+
+ for (chn = 0; chn < tot_channels; chn++) {
+ struct rank_info *chan;
+
+ chan = kzalloc(sizeof(**csr->channels), GFP_KERNEL);
+ if (!chan)
+ return -ENOMEM;
+
+ csr->channels[chn] = chan;
+ chan->chan_idx = chn;
+ chan->csrow = csr;
+ }
+ }
+
+ return 0;
+}
+
+static int edac_mc_alloc_dimms(struct mem_ctl_info *mci)
+{
+ unsigned int pos[EDAC_MAX_LAYERS];
+ unsigned int row, chn, idx;
+ int layer;
+ void *p;
+
+ /*
+ * Allocate and fill the dimm structs
+ */
+ mci->dimms = kcalloc(mci->tot_dimms, sizeof(*mci->dimms), GFP_KERNEL);
+ if (!mci->dimms)
+ return -ENOMEM;
+
+ memset(&pos, 0, sizeof(pos));
+ row = 0;
+ chn = 0;
+ for (idx = 0; idx < mci->tot_dimms; idx++) {
+ struct dimm_info *dimm;
+ struct rank_info *chan;
+ int n, len;
+
+ chan = mci->csrows[row]->channels[chn];
+
+ dimm = kzalloc(sizeof(**mci->dimms), GFP_KERNEL);
+ if (!dimm)
+ return -ENOMEM;
+ mci->dimms[idx] = dimm;
+ dimm->mci = mci;
+ dimm->idx = idx;
+
+ /*
+ * Copy DIMM location and initialize it.
+ */
+ len = sizeof(dimm->label);
+ p = dimm->label;
+ n = snprintf(p, len, "mc#%u", mci->mc_idx);
+ p += n;
+ len -= n;
+ for (layer = 0; layer < mci->n_layers; layer++) {
+ n = snprintf(p, len, "%s#%u",
+ edac_layer_name[mci->layers[layer].type],
+ pos[layer]);
+ p += n;
+ len -= n;
+ dimm->location[layer] = pos[layer];
+
+ if (len <= 0)
+ break;
+ }
+
+ /* Link it to the csrows old API data */
+ chan->dimm = dimm;
+ dimm->csrow = row;
+ dimm->cschannel = chn;
+
+ /* Increment csrow location */
+ if (mci->layers[0].is_virt_csrow) {
+ chn++;
+ if (chn == mci->num_cschannel) {
+ chn = 0;
+ row++;
+ }
+ } else {
+ row++;
+ if (row == mci->nr_csrows) {
+ row = 0;
+ chn++;
+ }
+ }
+
+ /* Increment dimm location */
+ for (layer = mci->n_layers - 1; layer >= 0; layer--) {
+ pos[layer]++;
+ if (pos[layer] < mci->layers[layer].size)
+ break;
+ pos[layer] = 0;
+ }
+ }
+
+ return 0;
+}
+
struct mem_ctl_info *edac_mc_alloc(unsigned int mc_num,
unsigned int n_layers,
struct edac_mc_layer *layers,
@@ -312,15 +451,9 @@ struct mem_ctl_info *edac_mc_alloc(unsigned int mc_num,
{
struct mem_ctl_info *mci;
struct edac_mc_layer *layer;
- struct csrow_info *csr;
- struct rank_info *chan;
- struct dimm_info *dimm;
- u32 *ce_per_layer[EDAC_MAX_LAYERS], *ue_per_layer[EDAC_MAX_LAYERS];
- unsigned int pos[EDAC_MAX_LAYERS];
- unsigned int idx, size, tot_dimms = 1, count = 1;
- unsigned int tot_csrows = 1, tot_channels = 1, tot_errcount = 0;
- void *pvt, *p, *ptr = NULL;
- int i, j, row, chn, n, len;
+ unsigned int idx, size, tot_dimms = 1;
+ unsigned int tot_csrows = 1, tot_channels = 1;
+ void *pvt, *ptr = NULL;
bool per_rank = false;
if (WARN_ON(n_layers > EDAC_MAX_LAYERS || n_layers == 0))
@@ -347,19 +480,10 @@ struct mem_ctl_info *edac_mc_alloc(unsigned int mc_num,
* stringent as what the compiler would provide if we could simply
* hardcode everything into a single struct.
*/
- mci = edac_align_ptr(&ptr, sizeof(*mci), 1);
- layer = edac_align_ptr(&ptr, sizeof(*layer), n_layers);
- for (i = 0; i < n_layers; i++) {
- count *= layers[i].size;
- edac_dbg(4, "errcount layer %d size %d\n", i, count);
- ce_per_layer[i] = edac_align_ptr(&ptr, sizeof(u32), count);
- ue_per_layer[i] = edac_align_ptr(&ptr, sizeof(u32), count);
- tot_errcount += 2 * count;
- }
-
- edac_dbg(4, "allocating %d error counters\n", tot_errcount);
- pvt = edac_align_ptr(&ptr, sz_pvt, 1);
- size = ((unsigned long)pvt) + sz_pvt;
+ mci = edac_align_ptr(&ptr, sizeof(*mci), 1);
+ layer = edac_align_ptr(&ptr, sizeof(*layer), n_layers);
+ pvt = edac_align_ptr(&ptr, sz_pvt, 1);
+ size = ((unsigned long)pvt) + sz_pvt;
edac_dbg(1, "allocating %u bytes for mci data (%d %s, %d csrows/channels)\n",
size,
@@ -371,14 +495,13 @@ struct mem_ctl_info *edac_mc_alloc(unsigned int mc_num,
if (mci == NULL)
return NULL;
+ mci->dev.release = mci_release;
+ device_initialize(&mci->dev);
+
/* Adjust pointers so they point within the memory we just allocated
* rather than an imaginary chunk of memory located at address 0.
*/
layer = (struct edac_mc_layer *)(((char *)mci) + ((unsigned long)layer));
- for (i = 0; i < n_layers; i++) {
- mci->ce_per_layer[i] = (u32 *)((char *)mci + ((unsigned long)ce_per_layer[i]));
- mci->ue_per_layer[i] = (u32 *)((char *)mci + ((unsigned long)ue_per_layer[i]));
- }
pvt = sz_pvt ? (((char *)mci) + ((unsigned long)pvt)) : NULL;
/* setup index and various internal pointers */
@@ -392,103 +515,11 @@ struct mem_ctl_info *edac_mc_alloc(unsigned int mc_num,
mci->num_cschannel = tot_channels;
mci->csbased = per_rank;
- /*
- * Alocate and fill the csrow/channels structs
- */
- mci->csrows = kcalloc(tot_csrows, sizeof(*mci->csrows), GFP_KERNEL);
- if (!mci->csrows)
- goto error;
- for (row = 0; row < tot_csrows; row++) {
- csr = kzalloc(sizeof(**mci->csrows), GFP_KERNEL);
- if (!csr)
- goto error;
- mci->csrows[row] = csr;
- csr->csrow_idx = row;
- csr->mci = mci;
- csr->nr_channels = tot_channels;
- csr->channels = kcalloc(tot_channels, sizeof(*csr->channels),
- GFP_KERNEL);
- if (!csr->channels)
- goto error;
-
- for (chn = 0; chn < tot_channels; chn++) {
- chan = kzalloc(sizeof(**csr->channels), GFP_KERNEL);
- if (!chan)
- goto error;
- csr->channels[chn] = chan;
- chan->chan_idx = chn;
- chan->csrow = csr;
- }
- }
-
- /*
- * Allocate and fill the dimm structs
- */
- mci->dimms = kcalloc(tot_dimms, sizeof(*mci->dimms), GFP_KERNEL);
- if (!mci->dimms)
+ if (edac_mc_alloc_csrows(mci))
goto error;
- memset(&pos, 0, sizeof(pos));
- row = 0;
- chn = 0;
- for (idx = 0; idx < tot_dimms; idx++) {
- chan = mci->csrows[row]->channels[chn];
-
- dimm = kzalloc(sizeof(**mci->dimms), GFP_KERNEL);
- if (!dimm)
- goto error;
- mci->dimms[idx] = dimm;
- dimm->mci = mci;
- dimm->idx = idx;
-
- /*
- * Copy DIMM location and initialize it.
- */
- len = sizeof(dimm->label);
- p = dimm->label;
- n = snprintf(p, len, "mc#%u", mc_num);
- p += n;
- len -= n;
- for (j = 0; j < n_layers; j++) {
- n = snprintf(p, len, "%s#%u",
- edac_layer_name[layers[j].type],
- pos[j]);
- p += n;
- len -= n;
- dimm->location[j] = pos[j];
-
- if (len <= 0)
- break;
- }
-
- /* Link it to the csrows old API data */
- chan->dimm = dimm;
- dimm->csrow = row;
- dimm->cschannel = chn;
-
- /* Increment csrow location */
- if (layers[0].is_virt_csrow) {
- chn++;
- if (chn == tot_channels) {
- chn = 0;
- row++;
- }
- } else {
- row++;
- if (row == tot_csrows) {
- row = 0;
- chn++;
- }
- }
-
- /* Increment dimm location */
- for (j = n_layers - 1; j >= 0; j--) {
- pos[j]++;
- if (pos[j] < layers[j].size)
- break;
- pos[j] = 0;
- }
- }
+ if (edac_mc_alloc_dimms(mci))
+ goto error;
mci->op_state = OP_ALLOC;
@@ -505,16 +536,7 @@ void edac_mc_free(struct mem_ctl_info *mci)
{
edac_dbg(1, "\n");
- /* If we're not yet registered with sysfs free only what was allocated
- * in edac_mc_alloc().
- */
- if (!device_is_registered(&mci->dev)) {
- _edac_mc_free(mci);
- return;
- }
-
- /* the mci instance is freed here, when the sysfs object is dropped */
- edac_unregister_sysfs(mci);
+ _edac_mc_free(mci);
}
EXPORT_SYMBOL_GPL(edac_mc_free);
@@ -908,88 +930,51 @@ const char *edac_layer_name[] = {
};
EXPORT_SYMBOL_GPL(edac_layer_name);
-static void edac_inc_ce_error(struct mem_ctl_info *mci,
- bool enable_per_layer_report,
- const int pos[EDAC_MAX_LAYERS],
- const u16 count)
+static void edac_inc_ce_error(struct edac_raw_error_desc *e)
{
- int i, index = 0;
+ int pos[EDAC_MAX_LAYERS] = { e->top_layer, e->mid_layer, e->low_layer };
+ struct mem_ctl_info *mci = error_desc_to_mci(e);
+ struct dimm_info *dimm = edac_get_dimm(mci, pos[0], pos[1], pos[2]);
- mci->ce_mc += count;
+ mci->ce_mc += e->error_count;
- if (!enable_per_layer_report) {
- mci->ce_noinfo_count += count;
- return;
- }
-
- for (i = 0; i < mci->n_layers; i++) {
- if (pos[i] < 0)
- break;
- index += pos[i];
- mci->ce_per_layer[i][index] += count;
-
- if (i < mci->n_layers - 1)
- index *= mci->layers[i + 1].size;
- }
+ if (dimm)
+ dimm->ce_count += e->error_count;
+ else
+ mci->ce_noinfo_count += e->error_count;
}
-static void edac_inc_ue_error(struct mem_ctl_info *mci,
- bool enable_per_layer_report,
- const int pos[EDAC_MAX_LAYERS],
- const u16 count)
+static void edac_inc_ue_error(struct edac_raw_error_desc *e)
{
- int i, index = 0;
+ int pos[EDAC_MAX_LAYERS] = { e->top_layer, e->mid_layer, e->low_layer };
+ struct mem_ctl_info *mci = error_desc_to_mci(e);
+ struct dimm_info *dimm = edac_get_dimm(mci, pos[0], pos[1], pos[2]);
- mci->ue_mc += count;
+ mci->ue_mc += e->error_count;
- if (!enable_per_layer_report) {
- mci->ue_noinfo_count += count;
- return;
- }
-
- for (i = 0; i < mci->n_layers; i++) {
- if (pos[i] < 0)
- break;
- index += pos[i];
- mci->ue_per_layer[i][index] += count;
-
- if (i < mci->n_layers - 1)
- index *= mci->layers[i + 1].size;
- }
+ if (dimm)
+ dimm->ue_count += e->error_count;
+ else
+ mci->ue_noinfo_count += e->error_count;
}
-static void edac_ce_error(struct mem_ctl_info *mci,
- const u16 error_count,
- const int pos[EDAC_MAX_LAYERS],
- const char *msg,
- const char *location,
- const char *label,
- const char *detail,
- const char *other_detail,
- const bool enable_per_layer_report,
- const unsigned long page_frame_number,
- const unsigned long offset_in_page,
- long grain)
+static void edac_ce_error(struct edac_raw_error_desc *e)
{
+ struct mem_ctl_info *mci = error_desc_to_mci(e);
unsigned long remapped_page;
- char *msg_aux = "";
-
- if (*msg)
- msg_aux = " ";
if (edac_mc_get_log_ce()) {
- if (other_detail && *other_detail)
- edac_mc_printk(mci, KERN_WARNING,
- "%d CE %s%son %s (%s %s - %s)\n",
- error_count, msg, msg_aux, label,
- location, detail, other_detail);
- else
- edac_mc_printk(mci, KERN_WARNING,
- "%d CE %s%son %s (%s %s)\n",
- error_count, msg, msg_aux, label,
- location, detail);
+ edac_mc_printk(mci, KERN_WARNING,
+ "%d CE %s%son %s (%s page:0x%lx offset:0x%lx grain:%ld syndrome:0x%lx%s%s)\n",
+ e->error_count, e->msg,
+ *e->msg ? " " : "",
+ e->label, e->location, e->page_frame_number, e->offset_in_page,
+ e->grain, e->syndrome,
+ *e->other_detail ? " - " : "",
+ e->other_detail);
}
- edac_inc_ce_error(mci, enable_per_layer_report, pos, error_count);
+
+ edac_inc_ce_error(e);
if (mci->scrub_mode == SCRUB_SW_SRC) {
/*
@@ -1004,60 +989,64 @@ static void edac_ce_error(struct mem_ctl_info *mci,
* be scrubbed.
*/
remapped_page = mci->ctl_page_to_phys ?
- mci->ctl_page_to_phys(mci, page_frame_number) :
- page_frame_number;
+ mci->ctl_page_to_phys(mci, e->page_frame_number) :
+ e->page_frame_number;
- edac_mc_scrub_block(remapped_page,
- offset_in_page, grain);
+ edac_mc_scrub_block(remapped_page, e->offset_in_page, e->grain);
}
}
-static void edac_ue_error(struct mem_ctl_info *mci,
- const u16 error_count,
- const int pos[EDAC_MAX_LAYERS],
- const char *msg,
- const char *location,
- const char *label,
- const char *detail,
- const char *other_detail,
- const bool enable_per_layer_report)
+static void edac_ue_error(struct edac_raw_error_desc *e)
{
- char *msg_aux = "";
-
- if (*msg)
- msg_aux = " ";
+ struct mem_ctl_info *mci = error_desc_to_mci(e);
if (edac_mc_get_log_ue()) {
- if (other_detail && *other_detail)
- edac_mc_printk(mci, KERN_WARNING,
- "%d UE %s%son %s (%s %s - %s)\n",
- error_count, msg, msg_aux, label,
- location, detail, other_detail);
- else
- edac_mc_printk(mci, KERN_WARNING,
- "%d UE %s%son %s (%s %s)\n",
- error_count, msg, msg_aux, label,
- location, detail);
+ edac_mc_printk(mci, KERN_WARNING,
+ "%d UE %s%son %s (%s page:0x%lx offset:0x%lx grain:%ld%s%s)\n",
+ e->error_count, e->msg,
+ *e->msg ? " " : "",
+ e->label, e->location, e->page_frame_number, e->offset_in_page,
+ e->grain,
+ *e->other_detail ? " - " : "",
+ e->other_detail);
}
if (edac_mc_get_panic_on_ue()) {
- if (other_detail && *other_detail)
- panic("UE %s%son %s (%s%s - %s)\n",
- msg, msg_aux, label, location, detail, other_detail);
- else
- panic("UE %s%son %s (%s%s)\n",
- msg, msg_aux, label, location, detail);
+ panic("UE %s%son %s (%s page:0x%lx offset:0x%lx grain:%ld%s%s)\n",
+ e->msg,
+ *e->msg ? " " : "",
+ e->label, e->location, e->page_frame_number, e->offset_in_page,
+ e->grain,
+ *e->other_detail ? " - " : "",
+ e->other_detail);
}
- edac_inc_ue_error(mci, enable_per_layer_report, pos, error_count);
+ edac_inc_ue_error(e);
}
-void edac_raw_mc_handle_error(const enum hw_event_mc_err_type type,
- struct mem_ctl_info *mci,
- struct edac_raw_error_desc *e)
+static void edac_inc_csrow(struct edac_raw_error_desc *e, int row, int chan)
{
- char detail[80];
- int pos[EDAC_MAX_LAYERS] = { e->top_layer, e->mid_layer, e->low_layer };
+ struct mem_ctl_info *mci = error_desc_to_mci(e);
+ enum hw_event_mc_err_type type = e->type;
+ u16 count = e->error_count;
+
+ if (row < 0)
+ return;
+
+ edac_dbg(4, "csrow/channel to increment: (%d,%d)\n", row, chan);
+
+ if (type == HW_EVENT_ERR_CORRECTED) {
+ mci->csrows[row]->ce_count += count;
+ if (chan >= 0)
+ mci->csrows[row]->channels[chan]->ce_count += count;
+ } else {
+ mci->csrows[row]->ue_count += count;
+ }
+}
+
+void edac_raw_mc_handle_error(struct edac_raw_error_desc *e)
+{
+ struct mem_ctl_info *mci = error_desc_to_mci(e);
u8 grain_bits;
/* Sanity-check driver-supplied grain value. */
@@ -1068,31 +1057,16 @@ void edac_raw_mc_handle_error(const enum hw_event_mc_err_type type,
/* Report the error via the trace interface */
if (IS_ENABLED(CONFIG_RAS))
- trace_mc_event(type, e->msg, e->label, e->error_count,
+ trace_mc_event(e->type, e->msg, e->label, e->error_count,
mci->mc_idx, e->top_layer, e->mid_layer,
e->low_layer,
(e->page_frame_number << PAGE_SHIFT) | e->offset_in_page,
grain_bits, e->syndrome, e->other_detail);
- /* Memory type dependent details about the error */
- if (type == HW_EVENT_ERR_CORRECTED) {
- snprintf(detail, sizeof(detail),
- "page:0x%lx offset:0x%lx grain:%ld syndrome:0x%lx",
- e->page_frame_number, e->offset_in_page,
- e->grain, e->syndrome);
- edac_ce_error(mci, e->error_count, pos, e->msg, e->location, e->label,
- detail, e->other_detail, e->enable_per_layer_report,
- e->page_frame_number, e->offset_in_page, e->grain);
- } else {
- snprintf(detail, sizeof(detail),
- "page:0x%lx offset:0x%lx grain:%ld",
- e->page_frame_number, e->offset_in_page, e->grain);
-
- edac_ue_error(mci, e->error_count, pos, e->msg, e->location, e->label,
- detail, e->other_detail, e->enable_per_layer_report);
- }
-
-
+ if (e->type == HW_EVENT_ERR_CORRECTED)
+ edac_ce_error(e);
+ else
+ edac_ue_error(e);
}
EXPORT_SYMBOL_GPL(edac_raw_mc_handle_error);
@@ -1114,25 +1088,27 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
int pos[EDAC_MAX_LAYERS] = { top_layer, mid_layer, low_layer };
int i, n_labels = 0;
struct edac_raw_error_desc *e = &mci->error_desc;
+ bool any_memory = true;
edac_dbg(3, "MC%d\n", mci->mc_idx);
/* Fills the error report buffer */
memset(e, 0, sizeof (*e));
e->error_count = error_count;
+ e->type = type;
e->top_layer = top_layer;
e->mid_layer = mid_layer;
e->low_layer = low_layer;
e->page_frame_number = page_frame_number;
e->offset_in_page = offset_in_page;
e->syndrome = syndrome;
- e->msg = msg;
- e->other_detail = other_detail;
+ /* need valid strings here for both: */
+ e->msg = msg ?: "";
+ e->other_detail = other_detail ?: "";
/*
- * Check if the event report is consistent and if the memory
- * location is known. If it is known, enable_per_layer_report will be
- * true, the DIMM(s) label info will be filled and the per-layer
+ * Check if the event report is consistent and if the memory location is
+ * known. If it is, the DIMM(s) label info will be filled and the DIMM's
* error counters will be incremented.
*/
for (i = 0; i < mci->n_layers; i++) {
@@ -1151,7 +1127,7 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
pos[i] = -1;
}
if (pos[i] >= 0)
- e->enable_per_layer_report = true;
+ any_memory = false;
}
/*
@@ -1182,24 +1158,25 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
/*
* If the error is memory-controller wide, there's no need to
- * seek for the affected DIMMs because the whole
- * channel/memory controller/... may be affected.
- * Also, don't show errors for empty DIMM slots.
+ * seek for the affected DIMMs because the whole channel/memory
+ * controller/... may be affected. Also, don't show errors for
+ * empty DIMM slots.
*/
- if (!e->enable_per_layer_report || !dimm->nr_pages)
+ if (!dimm->nr_pages)
continue;
- if (n_labels >= EDAC_MAX_LABELS) {
- e->enable_per_layer_report = false;
- break;
- }
n_labels++;
- if (p != e->label) {
- strcpy(p, OTHER_LABEL);
- p += strlen(OTHER_LABEL);
+ if (n_labels > EDAC_MAX_LABELS) {
+ p = e->label;
+ *p = '\0';
+ } else {
+ if (p != e->label) {
+ strcpy(p, OTHER_LABEL);
+ p += strlen(OTHER_LABEL);
+ }
+ strcpy(p, dimm->label);
+ p += strlen(p);
}
- strcpy(p, dimm->label);
- p += strlen(p);
/*
* get csrow/channel of the DIMM, in order to allow
@@ -1219,22 +1196,12 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
chan = -2;
}
- if (!e->enable_per_layer_report) {
+ if (any_memory)
strcpy(e->label, "any memory");
- } else {
- edac_dbg(4, "csrow/channel to increment: (%d,%d)\n", row, chan);
- if (p == e->label)
- strcpy(e->label, "unknown memory");
- if (type == HW_EVENT_ERR_CORRECTED) {
- if (row >= 0) {
- mci->csrows[row]->ce_count += error_count;
- if (chan >= 0)
- mci->csrows[row]->channels[chan]->ce_count += error_count;
- }
- } else
- if (row >= 0)
- mci->csrows[row]->ue_count += error_count;
- }
+ else if (!*e->label)
+ strcpy(e->label, "unknown memory");
+
+ edac_inc_csrow(e, row, chan);
/* Fill the RAM location data */
p = e->location;
@@ -1250,6 +1217,6 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
if (p > e->location)
*(p - 1) = '\0';
- edac_raw_mc_handle_error(type, mci, e);
+ edac_raw_mc_handle_error(e);
}
EXPORT_SYMBOL_GPL(edac_mc_handle_error);
diff --git a/drivers/edac/edac_mc.h b/drivers/edac/edac_mc.h
index 02aac5c..881b00e 100644
--- a/drivers/edac/edac_mc.h
+++ b/drivers/edac/edac_mc.h
@@ -212,17 +212,13 @@ extern int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci,
* edac_raw_mc_handle_error() - Reports a memory event to userspace without
* doing anything to discover the error location.
*
- * @type: severity of the error (CE/UE/Fatal)
- * @mci: a struct mem_ctl_info pointer
* @e: error description
*
* This raw function is used internally by edac_mc_handle_error(). It should
* only be called directly when the hardware error come directly from BIOS,
* like in the case of APEI GHES driver.
*/
-void edac_raw_mc_handle_error(const enum hw_event_mc_err_type type,
- struct mem_ctl_info *mci,
- struct edac_raw_error_desc *e);
+void edac_raw_mc_handle_error(struct edac_raw_error_desc *e);
/**
* edac_mc_handle_error() - Reports a memory event to userspace.
diff --git a/drivers/edac/edac_mc_sysfs.c b/drivers/edac/edac_mc_sysfs.c
index 0367554..4e6aca5 100644
--- a/drivers/edac/edac_mc_sysfs.c
+++ b/drivers/edac/edac_mc_sysfs.c
@@ -274,17 +274,8 @@ static const struct attribute_group *csrow_attr_groups[] = {
NULL
};
-static void csrow_attr_release(struct device *dev)
-{
- struct csrow_info *csrow = container_of(dev, struct csrow_info, dev);
-
- edac_dbg(1, "device %s released\n", dev_name(dev));
- kfree(csrow);
-}
-
static const struct device_type csrow_attr_type = {
.groups = csrow_attr_groups,
- .release = csrow_attr_release,
};
/*
@@ -390,6 +381,14 @@ static const struct attribute_group *csrow_dev_groups[] = {
NULL
};
+static void csrow_release(struct device *dev)
+{
+ /*
+ * Nothing to do, just unregister sysfs here. The mci
+ * device owns the data and will also release it.
+ */
+}
+
static inline int nr_pages_per_csrow(struct csrow_info *csrow)
{
int chan, nr_pages = 0;
@@ -408,6 +407,7 @@ static int edac_create_csrow_object(struct mem_ctl_info *mci,
csrow->dev.type = &csrow_attr_type;
csrow->dev.groups = csrow_dev_groups;
+ csrow->dev.release = csrow_release;
device_initialize(&csrow->dev);
csrow->dev.parent = &mci->dev;
csrow->mci = mci;
@@ -444,11 +444,8 @@ static int edac_create_csrow_objects(struct mem_ctl_info *mci)
error:
for (--i; i >= 0; i--) {
- csrow = mci->csrows[i];
- if (!nr_pages_per_csrow(csrow))
- continue;
-
- device_del(&mci->csrows[i]->dev);
+ if (device_is_registered(&mci->csrows[i]->dev))
+ device_unregister(&mci->csrows[i]->dev);
}
return err;
@@ -457,15 +454,13 @@ static int edac_create_csrow_objects(struct mem_ctl_info *mci)
static void edac_delete_csrow_objects(struct mem_ctl_info *mci)
{
int i;
- struct csrow_info *csrow;
- for (i = mci->nr_csrows - 1; i >= 0; i--) {
- csrow = mci->csrows[i];
- if (!nr_pages_per_csrow(csrow))
- continue;
- device_unregister(&mci->csrows[i]->dev);
+ for (i = 0; i < mci->nr_csrows; i++) {
+ if (device_is_registered(&mci->csrows[i]->dev))
+ device_unregister(&mci->csrows[i]->dev);
}
}
+
#endif
/*
@@ -556,10 +551,8 @@ static ssize_t dimmdev_ce_count_show(struct device *dev,
char *data)
{
struct dimm_info *dimm = to_dimm(dev);
- u32 count;
- count = dimm->mci->ce_per_layer[dimm->mci->n_layers-1][dimm->idx];
- return sprintf(data, "%u\n", count);
+ return sprintf(data, "%u\n", dimm->ce_count);
}
static ssize_t dimmdev_ue_count_show(struct device *dev,
@@ -567,10 +560,8 @@ static ssize_t dimmdev_ue_count_show(struct device *dev,
char *data)
{
struct dimm_info *dimm = to_dimm(dev);
- u32 count;
- count = dimm->mci->ue_per_layer[dimm->mci->n_layers-1][dimm->idx];
- return sprintf(data, "%u\n", count);
+ return sprintf(data, "%u\n", dimm->ue_count);
}
/* dimm/rank attribute files */
@@ -606,19 +597,18 @@ static const struct attribute_group *dimm_attr_groups[] = {
NULL
};
-static void dimm_attr_release(struct device *dev)
-{
- struct dimm_info *dimm = container_of(dev, struct dimm_info, dev);
-
- edac_dbg(1, "device %s released\n", dev_name(dev));
- kfree(dimm);
-}
-
static const struct device_type dimm_attr_type = {
.groups = dimm_attr_groups,
- .release = dimm_attr_release,
};
+static void dimm_release(struct device *dev)
+{
+ /*
+ * Nothing to do, just unregister sysfs here. The mci
+ * device owns the data and will also release it.
+ */
+}
+
/* Create a DIMM object under specifed memory controller device */
static int edac_create_dimm_object(struct mem_ctl_info *mci,
struct dimm_info *dimm)
@@ -627,6 +617,7 @@ static int edac_create_dimm_object(struct mem_ctl_info *mci,
dimm->mci = mci;
dimm->dev.type = &dimm_attr_type;
+ dimm->dev.release = dimm_release;
device_initialize(&dimm->dev);
dimm->dev.parent = &mci->dev;
@@ -666,7 +657,9 @@ static ssize_t mci_reset_counters_store(struct device *dev,
const char *data, size_t count)
{
struct mem_ctl_info *mci = to_mci(dev);
- int cnt, row, chan, i;
+ struct dimm_info *dimm;
+ int row, chan;
+
mci->ue_mc = 0;
mci->ce_mc = 0;
mci->ue_noinfo_count = 0;
@@ -682,11 +675,9 @@ static ssize_t mci_reset_counters_store(struct device *dev,
ri->channels[chan]->ce_count = 0;
}
- cnt = 1;
- for (i = 0; i < mci->n_layers; i++) {
- cnt *= mci->layers[i].size;
- memset(mci->ce_per_layer[i], 0, cnt * sizeof(u32));
- memset(mci->ue_per_layer[i], 0, cnt * sizeof(u32));
+ mci_for_each_dimm(mci, dimm) {
+ dimm->ue_count = 0;
+ dimm->ce_count = 0;
}
mci->start_time = jiffies;
@@ -891,17 +882,8 @@ static const struct attribute_group *mci_attr_groups[] = {
NULL
};
-static void mci_attr_release(struct device *dev)
-{
- struct mem_ctl_info *mci = container_of(dev, struct mem_ctl_info, dev);
-
- edac_dbg(1, "device %s released\n", dev_name(dev));
- kfree(mci);
-}
-
static const struct device_type mci_attr_type = {
.groups = mci_attr_groups,
- .release = mci_attr_release,
};
/*
@@ -920,8 +902,6 @@ int edac_create_sysfs_mci_device(struct mem_ctl_info *mci,
/* get the /sys/devices/system/edac subsys reference */
mci->dev.type = &mci_attr_type;
- device_initialize(&mci->dev);
-
mci->dev.parent = mci_pdev;
mci->dev.groups = groups;
dev_set_name(&mci->dev, "mc%d", mci->mc_idx);
@@ -931,7 +911,7 @@ int edac_create_sysfs_mci_device(struct mem_ctl_info *mci,
err = device_add(&mci->dev);
if (err < 0) {
edac_dbg(1, "failure: create device %s\n", dev_name(&mci->dev));
- put_device(&mci->dev);
+ /* no put_device() here, free mci with _edac_mc_free() */
return err;
}
@@ -947,24 +927,20 @@ int edac_create_sysfs_mci_device(struct mem_ctl_info *mci,
err = edac_create_dimm_object(mci, dimm);
if (err)
- goto fail_unregister_dimm;
+ goto fail;
}
#ifdef CONFIG_EDAC_LEGACY_SYSFS
err = edac_create_csrow_objects(mci);
if (err < 0)
- goto fail_unregister_dimm;
+ goto fail;
#endif
edac_create_debugfs_nodes(mci);
return 0;
-fail_unregister_dimm:
- mci_for_each_dimm(mci, dimm) {
- if (device_is_registered(&dimm->dev))
- device_unregister(&dimm->dev);
- }
- device_unregister(&mci->dev);
+fail:
+ edac_remove_sysfs_mci_device(mci);
return err;
}
@@ -976,6 +952,9 @@ void edac_remove_sysfs_mci_device(struct mem_ctl_info *mci)
{
struct dimm_info *dimm;
+ if (!device_is_registered(&mci->dev))
+ return;
+
edac_dbg(0, "\n");
#ifdef CONFIG_EDAC_DEBUG
@@ -986,17 +965,14 @@ void edac_remove_sysfs_mci_device(struct mem_ctl_info *mci)
#endif
mci_for_each_dimm(mci, dimm) {
- if (dimm->nr_pages == 0)
+ if (!device_is_registered(&dimm->dev))
continue;
edac_dbg(1, "unregistering device %s\n", dev_name(&dimm->dev));
device_unregister(&dimm->dev);
}
-}
-void edac_unregister_sysfs(struct mem_ctl_info *mci)
-{
- edac_dbg(1, "unregistering device %s\n", dev_name(&mci->dev));
- device_unregister(&mci->dev);
+ /* only remove the device, but keep mci */
+ device_del(&mci->dev);
}
static void mc_attr_release(struct device *dev)
@@ -1010,9 +986,6 @@ static void mc_attr_release(struct device *dev)
kfree(dev);
}
-static const struct device_type mc_attr_type = {
- .release = mc_attr_release,
-};
/*
* Init/exit code for the module. Basically, creates/removes /sys/class/rc
*/
@@ -1025,11 +998,10 @@ int __init edac_mc_sysfs_init(void)
return -ENOMEM;
mci_pdev->bus = edac_get_sysfs_subsys();
- mci_pdev->type = &mc_attr_type;
- device_initialize(mci_pdev);
- dev_set_name(mci_pdev, "mc");
+ mci_pdev->release = mc_attr_release;
+ mci_pdev->init_name = "mc";
- err = device_add(mci_pdev);
+ err = device_register(mci_pdev);
if (err < 0) {
edac_dbg(1, "failure: create device %s\n", dev_name(mci_pdev));
put_device(mci_pdev);
diff --git a/drivers/edac/edac_module.h b/drivers/edac/edac_module.h
index 388427d..aa1f916 100644
--- a/drivers/edac/edac_module.h
+++ b/drivers/edac/edac_module.h
@@ -28,7 +28,6 @@ void edac_mc_sysfs_exit(void);
extern int edac_create_sysfs_mci_device(struct mem_ctl_info *mci,
const struct attribute_group **groups);
extern void edac_remove_sysfs_mci_device(struct mem_ctl_info *mci);
-void edac_unregister_sysfs(struct mem_ctl_info *mci);
extern int edac_get_log_ue(void);
extern int edac_get_log_ce(void);
extern int edac_get_panic_on_ue(void);
diff --git a/drivers/edac/ghes_edac.c b/drivers/edac/ghes_edac.c
index b99080d..cb3dab5 100644
--- a/drivers/edac/ghes_edac.c
+++ b/drivers/edac/ghes_edac.c
@@ -201,7 +201,6 @@ static void ghes_edac_dmidecode(const struct dmi_header *dh, void *arg)
void ghes_edac_report_mem_error(int sev, struct cper_sec_mem_err *mem_err)
{
- enum hw_event_mc_err_type type;
struct edac_raw_error_desc *e;
struct mem_ctl_info *mci;
struct ghes_edac_pvt *pvt;
@@ -240,17 +239,17 @@ void ghes_edac_report_mem_error(int sev, struct cper_sec_mem_err *mem_err)
switch (sev) {
case GHES_SEV_CORRECTED:
- type = HW_EVENT_ERR_CORRECTED;
+ e->type = HW_EVENT_ERR_CORRECTED;
break;
case GHES_SEV_RECOVERABLE:
- type = HW_EVENT_ERR_UNCORRECTED;
+ e->type = HW_EVENT_ERR_UNCORRECTED;
break;
case GHES_SEV_PANIC:
- type = HW_EVENT_ERR_FATAL;
+ e->type = HW_EVENT_ERR_FATAL;
break;
default:
case GHES_SEV_NO:
- type = HW_EVENT_ERR_INFO;
+ e->type = HW_EVENT_ERR_INFO;
}
edac_dbg(1, "error validation_bits: 0x%08llx\n",
@@ -356,11 +355,8 @@ void ghes_edac_report_mem_error(int sev, struct cper_sec_mem_err *mem_err)
mem_err->mem_dev_handle);
index = get_dimm_smbios_index(mci, mem_err->mem_dev_handle);
- if (index >= 0) {
+ if (index >= 0)
e->top_layer = index;
- e->enable_per_layer_report = true;
- }
-
}
if (p > e->location)
*(p - 1) = '\0';
@@ -442,7 +438,7 @@ void ghes_edac_report_mem_error(int sev, struct cper_sec_mem_err *mem_err)
if (p > pvt->other_detail)
*(p - 1) = '\0';
- edac_raw_mc_handle_error(type, mci, e);
+ edac_raw_mc_handle_error(e);
unlock:
spin_unlock_irqrestore(&ghes_lock, flags);
diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c
index ea980c5..8874b77 100644
--- a/drivers/edac/mce_amd.c
+++ b/drivers/edac/mce_amd.c
@@ -1239,7 +1239,7 @@ static int __init mce_amd_init(void)
case 0x17:
case 0x18:
- pr_warn("Decoding supported only on Scalable MCA processors.\n");
+ pr_warn_once("Decoding supported only on Scalable MCA processors.\n");
return -EINVAL;
default:
diff --git a/drivers/edac/synopsys_edac.c b/drivers/edac/synopsys_edac.c
index 2d26338..12211dc 100644
--- a/drivers/edac/synopsys_edac.c
+++ b/drivers/edac/synopsys_edac.c
@@ -477,22 +477,16 @@ static void handle_error(struct mem_ctl_info *mci, struct synps_ecc_status *p)
if (p->ce_cnt) {
pinf = &p->ceinfo;
- if (!priv->p_data->quirks) {
+ if (priv->p_data->quirks & DDR_ECC_INTR_SUPPORT) {
snprintf(priv->message, SYNPS_EDAC_MSG_SIZE,
- "DDR ECC error type:%s Row %d Bank %d Col %d ",
- "CE", pinf->row, pinf->bank, pinf->col);
- snprintf(priv->message, SYNPS_EDAC_MSG_SIZE,
- "Bit Position: %d Data: 0x%08x\n",
+ "DDR ECC error type:%s Row %d Bank %d BankGroup Number %d Block Number %d Bit Position: %d Data: 0x%08x",
+ "CE", pinf->row, pinf->bank,
+ pinf->bankgrpnr, pinf->blknr,
pinf->bitpos, pinf->data);
} else {
snprintf(priv->message, SYNPS_EDAC_MSG_SIZE,
- "DDR ECC error type:%s Row %d Bank %d Col %d ",
- "CE", pinf->row, pinf->bank, pinf->col);
- snprintf(priv->message, SYNPS_EDAC_MSG_SIZE,
- "BankGroup Number %d Block Number %d ",
- pinf->bankgrpnr, pinf->blknr);
- snprintf(priv->message, SYNPS_EDAC_MSG_SIZE,
- "Bit Position: %d Data: 0x%08x\n",
+ "DDR ECC error type:%s Row %d Bank %d Col %d Bit Position: %d Data: 0x%08x",
+ "CE", pinf->row, pinf->bank, pinf->col,
pinf->bitpos, pinf->data);
}
@@ -503,17 +497,15 @@ static void handle_error(struct mem_ctl_info *mci, struct synps_ecc_status *p)
if (p->ue_cnt) {
pinf = &p->ueinfo;
- if (!priv->p_data->quirks) {
+ if (priv->p_data->quirks & DDR_ECC_INTR_SUPPORT) {
snprintf(priv->message, SYNPS_EDAC_MSG_SIZE,
- "DDR ECC error type :%s Row %d Bank %d Col %d ",
- "UE", pinf->row, pinf->bank, pinf->col);
+ "DDR ECC error type :%s Row %d Bank %d BankGroup Number %d Block Number %d",
+ "UE", pinf->row, pinf->bank,
+ pinf->bankgrpnr, pinf->blknr);
} else {
snprintf(priv->message, SYNPS_EDAC_MSG_SIZE,
"DDR ECC error type :%s Row %d Bank %d Col %d ",
"UE", pinf->row, pinf->bank, pinf->col);
- snprintf(priv->message, SYNPS_EDAC_MSG_SIZE,
- "BankGroup Number %d Block Number %d",
- pinf->bankgrpnr, pinf->blknr);
}
edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci,
diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c
index 621220a..21ea99f6 100644
--- a/drivers/firmware/efi/efi.c
+++ b/drivers/firmware/efi/efi.c
@@ -552,7 +552,7 @@ int __init efi_config_parse_tables(void *config_tables, int count, int sz,
seed = early_memremap(efi.rng_seed, sizeof(*seed));
if (seed != NULL) {
- size = seed->size;
+ size = READ_ONCE(seed->size);
early_memunmap(seed, sizeof(*seed));
} else {
pr_err("Could not map UEFI random seed!\n");
@@ -562,7 +562,7 @@ int __init efi_config_parse_tables(void *config_tables, int count, int sz,
sizeof(*seed) + size);
if (seed != NULL) {
pr_notice("seeding entropy pool\n");
- add_bootloader_randomness(seed->bits, seed->size);
+ add_bootloader_randomness(seed->bits, size);
early_memunmap(seed, sizeof(*seed) + size);
} else {
pr_err("Could not map UEFI random seed!\n");
diff --git a/drivers/firmware/efi/efivars.c b/drivers/firmware/efi/efivars.c
index 7576450..aff3dfb 100644
--- a/drivers/firmware/efi/efivars.c
+++ b/drivers/firmware/efi/efivars.c
@@ -83,13 +83,16 @@ static ssize_t
efivar_attr_read(struct efivar_entry *entry, char *buf)
{
struct efi_variable *var = &entry->var;
+ unsigned long size = sizeof(var->Data);
char *str = buf;
+ int ret;
if (!entry || !buf)
return -EINVAL;
- var->DataSize = 1024;
- if (efivar_entry_get(entry, &var->Attributes, &var->DataSize, var->Data))
+ ret = efivar_entry_get(entry, &var->Attributes, &size, var->Data);
+ var->DataSize = size;
+ if (ret)
return -EIO;
if (var->Attributes & EFI_VARIABLE_NON_VOLATILE)
@@ -116,13 +119,16 @@ static ssize_t
efivar_size_read(struct efivar_entry *entry, char *buf)
{
struct efi_variable *var = &entry->var;
+ unsigned long size = sizeof(var->Data);
char *str = buf;
+ int ret;
if (!entry || !buf)
return -EINVAL;
- var->DataSize = 1024;
- if (efivar_entry_get(entry, &var->Attributes, &var->DataSize, var->Data))
+ ret = efivar_entry_get(entry, &var->Attributes, &size, var->Data);
+ var->DataSize = size;
+ if (ret)
return -EIO;
str += sprintf(str, "0x%lx\n", var->DataSize);
@@ -133,12 +139,15 @@ static ssize_t
efivar_data_read(struct efivar_entry *entry, char *buf)
{
struct efi_variable *var = &entry->var;
+ unsigned long size = sizeof(var->Data);
+ int ret;
if (!entry || !buf)
return -EINVAL;
- var->DataSize = 1024;
- if (efivar_entry_get(entry, &var->Attributes, &var->DataSize, var->Data))
+ ret = efivar_entry_get(entry, &var->Attributes, &size, var->Data);
+ var->DataSize = size;
+ if (ret)
return -EIO;
memcpy(buf, var->Data, var->DataSize);
@@ -199,6 +208,9 @@ efivar_store_raw(struct efivar_entry *entry, const char *buf, size_t count)
u8 *data;
int err;
+ if (!entry || !buf)
+ return -EINVAL;
+
if (in_compat_syscall()) {
struct compat_efi_variable *compat;
@@ -250,14 +262,16 @@ efivar_show_raw(struct efivar_entry *entry, char *buf)
{
struct efi_variable *var = &entry->var;
struct compat_efi_variable *compat;
+ unsigned long datasize = sizeof(var->Data);
size_t size;
+ int ret;
if (!entry || !buf)
return 0;
- var->DataSize = 1024;
- if (efivar_entry_get(entry, &entry->var.Attributes,
- &entry->var.DataSize, entry->var.Data))
+ ret = efivar_entry_get(entry, &var->Attributes, &datasize, var->Data);
+ var->DataSize = datasize;
+ if (ret)
return -EIO;
if (in_compat_syscall()) {
diff --git a/drivers/firmware/imx/imx-scu.c b/drivers/firmware/imx/imx-scu.c
index 03b43b7..f71eaa5 100644
--- a/drivers/firmware/imx/imx-scu.c
+++ b/drivers/firmware/imx/imx-scu.c
@@ -29,6 +29,7 @@ struct imx_sc_chan {
struct mbox_client cl;
struct mbox_chan *ch;
int idx;
+ struct completion tx_done;
};
struct imx_sc_ipc {
@@ -100,6 +101,14 @@ int imx_scu_get_handle(struct imx_sc_ipc **ipc)
}
EXPORT_SYMBOL(imx_scu_get_handle);
+/* Callback called when the word of a message is ack-ed, eg read by SCU */
+static void imx_scu_tx_done(struct mbox_client *cl, void *mssg, int r)
+{
+ struct imx_sc_chan *sc_chan = container_of(cl, struct imx_sc_chan, cl);
+
+ complete(&sc_chan->tx_done);
+}
+
static void imx_scu_rx_callback(struct mbox_client *c, void *msg)
{
struct imx_sc_chan *sc_chan = container_of(c, struct imx_sc_chan, cl);
@@ -149,6 +158,19 @@ static int imx_scu_ipc_write(struct imx_sc_ipc *sc_ipc, void *msg)
for (i = 0; i < hdr->size; i++) {
sc_chan = &sc_ipc->chans[i % 4];
+
+ /*
+ * SCU requires that all messages words are written
+ * sequentially but linux MU driver implements multiple
+ * independent channels for each register so ordering between
+ * different channels must be ensured by SCU API interface.
+ *
+ * Wait for tx_done before every send to ensure that no
+ * queueing happens at the mailbox channel level.
+ */
+ wait_for_completion(&sc_chan->tx_done);
+ reinit_completion(&sc_chan->tx_done);
+
ret = mbox_send_message(sc_chan->ch, &data[i]);
if (ret < 0)
return ret;
@@ -247,6 +269,11 @@ static int imx_scu_probe(struct platform_device *pdev)
cl->knows_txdone = true;
cl->rx_callback = imx_scu_rx_callback;
+ /* Initial tx_done completion as "done" */
+ cl->tx_done = imx_scu_tx_done;
+ init_completion(&sc_chan->tx_done);
+ complete(&sc_chan->tx_done);
+
sc_chan->sc_ipc = sc_ipc;
sc_chan->idx = i % 4;
sc_chan->ch = mbox_request_channel_byname(cl, chan_name);
diff --git a/drivers/firmware/imx/misc.c b/drivers/firmware/imx/misc.c
index 4b56a58..d073cb3 100644
--- a/drivers/firmware/imx/misc.c
+++ b/drivers/firmware/imx/misc.c
@@ -16,7 +16,7 @@ struct imx_sc_msg_req_misc_set_ctrl {
u32 ctrl;
u32 val;
u16 resource;
-} __packed;
+} __packed __aligned(4);
struct imx_sc_msg_req_cpu_start {
struct imx_sc_rpc_msg hdr;
@@ -24,18 +24,18 @@ struct imx_sc_msg_req_cpu_start {
u32 address_lo;
u16 resource;
u8 enable;
-} __packed;
+} __packed __aligned(4);
struct imx_sc_msg_req_misc_get_ctrl {
struct imx_sc_rpc_msg hdr;
u32 ctrl;
u16 resource;
-} __packed;
+} __packed __aligned(4);
struct imx_sc_msg_resp_misc_get_ctrl {
struct imx_sc_rpc_msg hdr;
u32 val;
-} __packed;
+} __packed __aligned(4);
/*
* This function sets a miscellaneous control value.
diff --git a/drivers/firmware/imx/scu-pd.c b/drivers/firmware/imx/scu-pd.c
index b556612..af3ae00 100644
--- a/drivers/firmware/imx/scu-pd.c
+++ b/drivers/firmware/imx/scu-pd.c
@@ -61,7 +61,7 @@ struct imx_sc_msg_req_set_resource_power_mode {
struct imx_sc_rpc_msg hdr;
u16 resource;
u8 mode;
-} __packed;
+} __packed __aligned(4);
#define IMX_SCU_PD_NAME_SIZE 20
struct imx_sc_pm_domain {
diff --git a/drivers/fsi/Kconfig b/drivers/fsi/Kconfig
index 92ce6d8..4cc0e63 100644
--- a/drivers/fsi/Kconfig
+++ b/drivers/fsi/Kconfig
@@ -55,6 +55,7 @@
config FSI_MASTER_ASPEED
tristate "FSI ASPEED master"
+ depends on HAS_IOMEM
help
This option enables a FSI master that is present behind an OPB bridge
in the AST2600.
diff --git a/drivers/gpio/gpio-bd71828.c b/drivers/gpio/gpio-bd71828.c
index 04aade9..3dbbc63 100644
--- a/drivers/gpio/gpio-bd71828.c
+++ b/drivers/gpio/gpio-bd71828.c
@@ -10,16 +10,6 @@
#define GPIO_OUT_REG(off) (BD71828_REG_GPIO_CTRL1 + (off))
#define HALL_GPIO_OFFSET 3
-/*
- * These defines can be removed when
- * "gpio: Add definition for GPIO direction"
- * (9208b1e77d6e8e9776f34f46ef4079ecac9c3c25 in GPIO tree) gets merged,
- */
-#ifndef GPIO_LINE_DIRECTION_IN
- #define GPIO_LINE_DIRECTION_IN 1
- #define GPIO_LINE_DIRECTION_OUT 0
-#endif
-
struct bd71828_gpio {
struct rohm_regmap_dev chip;
struct gpio_chip gpio;
diff --git a/drivers/gpio/gpio-sifive.c b/drivers/gpio/gpio-sifive.c
index 147a1bd..c54dd08 100644
--- a/drivers/gpio/gpio-sifive.c
+++ b/drivers/gpio/gpio-sifive.c
@@ -35,7 +35,7 @@ struct sifive_gpio {
void __iomem *base;
struct gpio_chip gc;
struct regmap *regs;
- u32 irq_state;
+ unsigned long irq_state;
unsigned int trigger[SIFIVE_GPIO_MAX];
unsigned int irq_parent[SIFIVE_GPIO_MAX];
};
@@ -94,7 +94,7 @@ static void sifive_gpio_irq_enable(struct irq_data *d)
spin_unlock_irqrestore(&gc->bgpio_lock, flags);
/* Enable interrupts */
- assign_bit(offset, (unsigned long *)&chip->irq_state, 1);
+ assign_bit(offset, &chip->irq_state, 1);
sifive_gpio_set_ie(chip, offset);
}
@@ -104,7 +104,7 @@ static void sifive_gpio_irq_disable(struct irq_data *d)
struct sifive_gpio *chip = gpiochip_get_data(gc);
int offset = irqd_to_hwirq(d) % SIFIVE_GPIO_MAX;
- assign_bit(offset, (unsigned long *)&chip->irq_state, 0);
+ assign_bit(offset, &chip->irq_state, 0);
sifive_gpio_set_ie(chip, offset);
irq_chip_disable_parent(d);
}
diff --git a/drivers/gpio/gpio-xilinx.c b/drivers/gpio/gpio-xilinx.c
index a9748b5..67f9f82 100644
--- a/drivers/gpio/gpio-xilinx.c
+++ b/drivers/gpio/gpio-xilinx.c
@@ -147,9 +147,10 @@ static void xgpio_set_multiple(struct gpio_chip *gc, unsigned long *mask,
for (i = 0; i < gc->ngpio; i++) {
if (*mask == 0)
break;
+ /* Once finished with an index write it out to the register */
if (index != xgpio_index(chip, i)) {
xgpio_writereg(chip->regs + XGPIO_DATA_OFFSET +
- xgpio_regoffset(chip, i),
+ index * XGPIO_CHANNEL_OFFSET,
chip->gpio_state[index]);
spin_unlock_irqrestore(&chip->gpio_lock[index], flags);
index = xgpio_index(chip, i);
@@ -165,7 +166,7 @@ static void xgpio_set_multiple(struct gpio_chip *gc, unsigned long *mask,
}
xgpio_writereg(chip->regs + XGPIO_DATA_OFFSET +
- xgpio_regoffset(chip, i), chip->gpio_state[index]);
+ index * XGPIO_CHANNEL_OFFSET, chip->gpio_state[index]);
spin_unlock_irqrestore(&chip->gpio_lock[index], flags);
}
diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c
index 31fee5e..0017367 100644
--- a/drivers/gpio/gpiolib-acpi.c
+++ b/drivers/gpio/gpiolib-acpi.c
@@ -21,18 +21,21 @@
#include "gpiolib.h"
#include "gpiolib-acpi.h"
-#define QUIRK_NO_EDGE_EVENTS_ON_BOOT 0x01l
-#define QUIRK_NO_WAKEUP 0x02l
-
static int run_edge_events_on_boot = -1;
module_param(run_edge_events_on_boot, int, 0444);
MODULE_PARM_DESC(run_edge_events_on_boot,
"Run edge _AEI event-handlers at boot: 0=no, 1=yes, -1=auto");
-static int honor_wakeup = -1;
-module_param(honor_wakeup, int, 0444);
-MODULE_PARM_DESC(honor_wakeup,
- "Honor the ACPI wake-capable flag: 0=no, 1=yes, -1=auto");
+static char *ignore_wake;
+module_param(ignore_wake, charp, 0444);
+MODULE_PARM_DESC(ignore_wake,
+ "controller@pin combos on which to ignore the ACPI wake flag "
+ "ignore_wake=controller@pin[,controller@pin[,...]]");
+
+struct acpi_gpiolib_dmi_quirk {
+ bool no_edge_events_on_boot;
+ char *ignore_wake;
+};
/**
* struct acpi_gpio_event - ACPI GPIO event handler data
@@ -202,6 +205,57 @@ static void acpi_gpiochip_request_irqs(struct acpi_gpio_chip *acpi_gpio)
acpi_gpiochip_request_irq(acpi_gpio, event);
}
+static bool acpi_gpio_in_ignore_list(const char *controller_in, int pin_in)
+{
+ const char *controller, *pin_str;
+ int len, pin;
+ char *endp;
+
+ controller = ignore_wake;
+ while (controller) {
+ pin_str = strchr(controller, '@');
+ if (!pin_str)
+ goto err;
+
+ len = pin_str - controller;
+ if (len == strlen(controller_in) &&
+ strncmp(controller, controller_in, len) == 0) {
+ pin = simple_strtoul(pin_str + 1, &endp, 10);
+ if (*endp != 0 && *endp != ',')
+ goto err;
+
+ if (pin == pin_in)
+ return true;
+ }
+
+ controller = strchr(controller, ',');
+ if (controller)
+ controller++;
+ }
+
+ return false;
+err:
+ pr_err_once("Error invalid value for gpiolib_acpi.ignore_wake: %s\n",
+ ignore_wake);
+ return false;
+}
+
+static bool acpi_gpio_irq_is_wake(struct device *parent,
+ struct acpi_resource_gpio *agpio)
+{
+ int pin = agpio->pin_table[0];
+
+ if (agpio->wake_capable != ACPI_WAKE_CAPABLE)
+ return false;
+
+ if (acpi_gpio_in_ignore_list(dev_name(parent), pin)) {
+ dev_info(parent, "Ignoring wakeup on pin %d\n", pin);
+ return false;
+ }
+
+ return true;
+}
+
/* Always returns AE_OK so that we keep looping over the resources */
static acpi_status acpi_gpiochip_alloc_event(struct acpi_resource *ares,
void *context)
@@ -289,7 +343,7 @@ static acpi_status acpi_gpiochip_alloc_event(struct acpi_resource *ares,
event->handle = evt_handle;
event->handler = handler;
event->irq = irq;
- event->irq_is_wake = honor_wakeup && agpio->wake_capable == ACPI_WAKE_CAPABLE;
+ event->irq_is_wake = acpi_gpio_irq_is_wake(chip->parent, agpio);
event->pin = pin;
event->desc = desc;
@@ -1328,7 +1382,9 @@ static const struct dmi_system_id gpiolib_acpi_quirks[] = {
DMI_MATCH(DMI_SYS_VENDOR, "MINIX"),
DMI_MATCH(DMI_PRODUCT_NAME, "Z83-4"),
},
- .driver_data = (void *)QUIRK_NO_EDGE_EVENTS_ON_BOOT,
+ .driver_data = &(struct acpi_gpiolib_dmi_quirk) {
+ .no_edge_events_on_boot = true,
+ },
},
{
/*
@@ -1341,16 +1397,20 @@ static const struct dmi_system_id gpiolib_acpi_quirks[] = {
DMI_MATCH(DMI_SYS_VENDOR, "Wortmann_AG"),
DMI_MATCH(DMI_PRODUCT_NAME, "TERRA_PAD_1061"),
},
- .driver_data = (void *)QUIRK_NO_EDGE_EVENTS_ON_BOOT,
+ .driver_data = &(struct acpi_gpiolib_dmi_quirk) {
+ .no_edge_events_on_boot = true,
+ },
},
{
/*
- * Various HP X2 10 Cherry Trail models use an external
- * embedded-controller connected via I2C + an ACPI GPIO
- * event handler. The embedded controller generates various
- * spurious wakeup events when suspended. So disable wakeup
- * for its handler (it uses the only ACPI GPIO event handler).
- * This breaks wakeup when opening the lid, the user needs
+ * HP X2 10 models with Cherry Trail SoC + TI PMIC use an
+ * external embedded-controller connected via I2C + an ACPI GPIO
+ * event handler on INT33FF:01 pin 0, causing spurious wakeups.
+ * When suspending by closing the LID, the power to the USB
+ * keyboard is turned off, causing INT0002 ACPI events to
+ * trigger once the XHCI controller notices the keyboard is
+ * gone. So INT0002 events cause spurious wakeups too. Ignoring
+ * EC wakes breaks wakeup when opening the lid, the user needs
* to press the power-button to wakeup the system. The
* alternative is suspend simply not working, which is worse.
*/
@@ -1358,33 +1418,61 @@ static const struct dmi_system_id gpiolib_acpi_quirks[] = {
DMI_MATCH(DMI_SYS_VENDOR, "HP"),
DMI_MATCH(DMI_PRODUCT_NAME, "HP x2 Detachable 10-p0XX"),
},
- .driver_data = (void *)QUIRK_NO_WAKEUP,
+ .driver_data = &(struct acpi_gpiolib_dmi_quirk) {
+ .ignore_wake = "INT33FF:01@0,INT0002:00@2",
+ },
+ },
+ {
+ /*
+ * HP X2 10 models with Bay Trail SoC + AXP288 PMIC use an
+ * external embedded-controller connected via I2C + an ACPI GPIO
+ * event handler on INT33FC:02 pin 28, causing spurious wakeups.
+ */
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "HP Pavilion x2 Detachable"),
+ DMI_MATCH(DMI_BOARD_NAME, "815D"),
+ },
+ .driver_data = &(struct acpi_gpiolib_dmi_quirk) {
+ .ignore_wake = "INT33FC:02@28",
+ },
+ },
+ {
+ /*
+ * HP X2 10 models with Cherry Trail SoC + AXP288 PMIC use an
+ * external embedded-controller connected via I2C + an ACPI GPIO
+ * event handler on INT33FF:01 pin 0, causing spurious wakeups.
+ */
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "HP"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "HP Pavilion x2 Detachable"),
+ DMI_MATCH(DMI_BOARD_NAME, "813E"),
+ },
+ .driver_data = &(struct acpi_gpiolib_dmi_quirk) {
+ .ignore_wake = "INT33FF:01@0",
+ },
},
{} /* Terminating entry */
};
static int acpi_gpio_setup_params(void)
{
+ const struct acpi_gpiolib_dmi_quirk *quirk = NULL;
const struct dmi_system_id *id;
- long quirks = 0;
id = dmi_first_match(gpiolib_acpi_quirks);
if (id)
- quirks = (long)id->driver_data;
+ quirk = id->driver_data;
if (run_edge_events_on_boot < 0) {
- if (quirks & QUIRK_NO_EDGE_EVENTS_ON_BOOT)
+ if (quirk && quirk->no_edge_events_on_boot)
run_edge_events_on_boot = 0;
else
run_edge_events_on_boot = 1;
}
- if (honor_wakeup < 0) {
- if (quirks & QUIRK_NO_WAKEUP)
- honor_wakeup = 0;
- else
- honor_wakeup = 1;
- }
+ if (ignore_wake == NULL && quirk && quirk->ignore_wake)
+ ignore_wake = quirk->ignore_wake;
return 0;
}
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index 7532834..00fb91f 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -2306,9 +2306,16 @@ static void gpiochip_irq_disable(struct irq_data *d)
{
struct gpio_chip *chip = irq_data_get_irq_chip_data(d);
+ /*
+ * Since we override .irq_disable() we need to mimic the
+ * behaviour of __irq_disable() in irq/chip.c.
+ * First call .irq_disable() if it exists, else mimic the
+ * behaviour of mask_irq() which calls .irq_mask() if
+ * it exists.
+ */
if (chip->irq.irq_disable)
chip->irq.irq_disable(d);
- else
+ else if (chip->irq.chip->irq_mask)
chip->irq.chip->irq_mask(d);
gpiochip_disable_irq(chip, d->hwirq);
}
@@ -3035,13 +3042,33 @@ EXPORT_SYMBOL_GPL(gpiochip_free_own_desc);
* rely on gpio_request() having been called beforehand.
*/
-static int gpio_set_config(struct gpio_chip *gc, unsigned int offset,
- enum pin_config_param mode)
+static int gpio_do_set_config(struct gpio_chip *gc, unsigned int offset,
+ unsigned long config)
{
if (!gc->set_config)
return -ENOTSUPP;
- return gc->set_config(gc, offset, mode);
+ return gc->set_config(gc, offset, config);
+}
+
+static int gpio_set_config(struct gpio_chip *gc, unsigned int offset,
+ enum pin_config_param mode)
+{
+ unsigned long config;
+ unsigned arg;
+
+ switch (mode) {
+ case PIN_CONFIG_BIAS_PULL_DOWN:
+ case PIN_CONFIG_BIAS_PULL_UP:
+ arg = 1;
+ break;
+
+ default:
+ arg = 0;
+ }
+
+ config = PIN_CONF_PACKED(mode, arg);
+ return gpio_do_set_config(gc, offset, config);
}
static int gpio_set_bias(struct gpio_chip *chip, struct gpio_desc *desc)
@@ -3277,7 +3304,7 @@ int gpiod_set_debounce(struct gpio_desc *desc, unsigned debounce)
chip = desc->gdev->chip;
config = pinconf_to_config_packed(PIN_CONFIG_INPUT_DEBOUNCE, debounce);
- return gpio_set_config(chip, gpio_chip_hwgpio(desc), config);
+ return gpio_do_set_config(chip, gpio_chip_hwgpio(desc), config);
}
EXPORT_SYMBOL_GPL(gpiod_set_debounce);
@@ -3311,7 +3338,7 @@ int gpiod_set_transitory(struct gpio_desc *desc, bool transitory)
packed = pinconf_to_config_packed(PIN_CONFIG_PERSIST_STATE,
!transitory);
gpio = gpio_chip_hwgpio(desc);
- rc = gpio_set_config(chip, gpio, packed);
+ rc = gpio_do_set_config(chip, gpio, packed);
if (rc == -ENOTSUPP) {
dev_dbg(&desc->gdev->dev, "Persistence not supported for GPIO %d\n",
gpio);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
index f24ed9a..337d7cd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
@@ -781,11 +781,11 @@ static ssize_t amdgpu_debugfs_gpr_read(struct file *f, char __user *buf,
ssize_t result = 0;
uint32_t offset, se, sh, cu, wave, simd, thread, bank, *data;
- if (size & 3 || *pos & 3)
+ if (size > 4096 || size & 3 || *pos & 3)
return -EINVAL;
/* decode offset */
- offset = *pos & GENMASK_ULL(11, 0);
+ offset = (*pos & GENMASK_ULL(11, 0)) >> 2;
se = (*pos & GENMASK_ULL(19, 12)) >> 12;
sh = (*pos & GENMASK_ULL(27, 20)) >> 20;
cu = (*pos & GENMASK_ULL(35, 28)) >> 28;
@@ -823,7 +823,7 @@ static ssize_t amdgpu_debugfs_gpr_read(struct file *f, char __user *buf,
while (size) {
uint32_t value;
- value = data[offset++];
+ value = data[result >> 2];
r = put_user(value, (uint32_t *)buf);
if (r) {
result = r;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 39cd545..b897585 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3913,6 +3913,8 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,
if (r)
goto out;
+ amdgpu_fbdev_set_suspend(tmp_adev, 0);
+
/* must succeed. */
amdgpu_ras_resume(tmp_adev);
@@ -4086,6 +4088,8 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
*/
amdgpu_unregister_gpu_instance(tmp_adev);
+ amdgpu_fbdev_set_suspend(adev, 1);
+
/* disable ras on ALL IPs */
if (!(in_ras_intr && !use_baco) &&
amdgpu_device_ip_need_full_reset(tmp_adev))
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 94e2fd7..42f4feb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -1389,7 +1389,7 @@ amdgpu_get_crtc_scanout_position(struct drm_device *dev, unsigned int pipe,
static struct drm_driver kms_driver = {
.driver_features =
- DRIVER_USE_AGP | DRIVER_ATOMIC |
+ DRIVER_ATOMIC |
DRIVER_GEM |
DRIVER_RENDER | DRIVER_MODESET | DRIVER_SYNCOBJ |
DRIVER_SYNCOBJ_TIMELINE,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
index d3c27a3..7546da0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
@@ -195,6 +195,7 @@ struct amdgpu_gmc {
uint32_t srbm_soft_reset;
bool prt_warning;
uint64_t stolen_size;
+ uint32_t sdpif_register;
/* apertures */
u64 shared_aperture_start;
u64 shared_aperture_end;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c
index 07914e34..1311d6ae 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c
@@ -52,7 +52,7 @@ static int amdgpu_perf_event_init(struct perf_event *event)
return -ENOENT;
/* update the hw_perf_event struct with config data */
- hwc->conf = event->attr.config;
+ hwc->config = event->attr.config;
return 0;
}
@@ -74,9 +74,9 @@ static void amdgpu_perf_start(struct perf_event *event, int flags)
switch (pe->pmu_perf_type) {
case PERF_TYPE_AMDGPU_DF:
if (!(flags & PERF_EF_RELOAD))
- pe->adev->df.funcs->pmc_start(pe->adev, hwc->conf, 1);
+ pe->adev->df.funcs->pmc_start(pe->adev, hwc->config, 1);
- pe->adev->df.funcs->pmc_start(pe->adev, hwc->conf, 0);
+ pe->adev->df.funcs->pmc_start(pe->adev, hwc->config, 0);
break;
default:
break;
@@ -101,7 +101,7 @@ static void amdgpu_perf_read(struct perf_event *event)
switch (pe->pmu_perf_type) {
case PERF_TYPE_AMDGPU_DF:
- pe->adev->df.funcs->pmc_get_count(pe->adev, hwc->conf,
+ pe->adev->df.funcs->pmc_get_count(pe->adev, hwc->config,
&count);
break;
default:
@@ -126,7 +126,7 @@ static void amdgpu_perf_stop(struct perf_event *event, int flags)
switch (pe->pmu_perf_type) {
case PERF_TYPE_AMDGPU_DF:
- pe->adev->df.funcs->pmc_stop(pe->adev, hwc->conf, 0);
+ pe->adev->df.funcs->pmc_stop(pe->adev, hwc->config, 0);
break;
default:
break;
@@ -156,7 +156,8 @@ static int amdgpu_perf_add(struct perf_event *event, int flags)
switch (pe->pmu_perf_type) {
case PERF_TYPE_AMDGPU_DF:
- retval = pe->adev->df.funcs->pmc_start(pe->adev, hwc->conf, 1);
+ retval = pe->adev->df.funcs->pmc_start(pe->adev,
+ hwc->config, 1);
break;
default:
return 0;
@@ -184,7 +185,7 @@ static void amdgpu_perf_del(struct perf_event *event, int flags)
switch (pe->pmu_perf_type) {
case PERF_TYPE_AMDGPU_DF:
- pe->adev->df.funcs->pmc_stop(pe->adev, hwc->conf, 1);
+ pe->adev->df.funcs->pmc_stop(pe->adev, hwc->config, 1);
break;
default:
break;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index 3a1570d..146f966 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -1013,6 +1013,30 @@ static int psp_dtm_initialize(struct psp_context *psp)
return 0;
}
+static int psp_dtm_unload(struct psp_context *psp)
+{
+ int ret;
+ struct psp_gfx_cmd_resp *cmd;
+
+ /*
+ * TODO: bypass the unloading in sriov for now
+ */
+ if (amdgpu_sriov_vf(psp->adev))
+ return 0;
+
+ cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
+ if (!cmd)
+ return -ENOMEM;
+
+ psp_prep_ta_unload_cmd_buf(cmd, psp->dtm_context.session_id);
+
+ ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr);
+
+ kfree(cmd);
+
+ return ret;
+}
+
int psp_dtm_invoke(struct psp_context *psp, uint32_t ta_cmd_id)
{
/*
@@ -1037,7 +1061,7 @@ static int psp_dtm_terminate(struct psp_context *psp)
if (!psp->dtm_context.dtm_initialized)
return 0;
- ret = psp_hdcp_unload(psp);
+ ret = psp_dtm_unload(psp);
if (ret)
return ret;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index dee4462..c6e9885 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -974,7 +974,7 @@ static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm)
/* Map SG to device */
r = -ENOMEM;
nents = dma_map_sg(adev->dev, ttm->sg->sgl, ttm->sg->nents, direction);
- if (nents != ttm->sg->nents)
+ if (nents == 0)
goto release_sg;
/* convert SG to linear array of pages and dma addresses */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
index d6deb0e..6fe0573 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
@@ -179,6 +179,7 @@ struct amdgpu_vcn_inst {
struct amdgpu_irq_src irq;
struct amdgpu_vcn_reg external;
struct amdgpu_bo *dpg_sram_bo;
+ struct dpg_pause_state pause_state;
void *dpg_sram_cpu_addr;
uint64_t dpg_sram_gpu_addr;
uint32_t *dpg_sram_curr_addr;
@@ -190,8 +191,6 @@ struct amdgpu_vcn {
const struct firmware *fw; /* VCN firmware */
unsigned num_enc_rings;
enum amd_powergating_state cur_state;
- struct dpg_pause_state pause_state;
-
bool indirect_sram;
uint8_t num_vcn_inst;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index 1785fda..0270259 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -52,7 +52,7 @@
* 1. Primary ring
* 2. Async ring
*/
-#define GFX10_NUM_GFX_RINGS 2
+#define GFX10_NUM_GFX_RINGS_NV1X 1
#define GFX10_MEC_HPD_SIZE 2048
#define F32_CE_PROGRAM_RAM_SIZE 65536
@@ -1304,7 +1304,7 @@ static int gfx_v10_0_sw_init(void *handle)
case CHIP_NAVI14:
case CHIP_NAVI12:
adev->gfx.me.num_me = 1;
- adev->gfx.me.num_pipe_per_me = 2;
+ adev->gfx.me.num_pipe_per_me = 1;
adev->gfx.me.num_queue_per_pipe = 1;
adev->gfx.mec.num_mec = 2;
adev->gfx.mec.num_pipe_per_mec = 4;
@@ -2710,18 +2710,20 @@ static int gfx_v10_0_cp_gfx_start(struct amdgpu_device *adev)
amdgpu_ring_commit(ring);
/* submit cs packet to copy state 0 to next available state */
- ring = &adev->gfx.gfx_ring[1];
- r = amdgpu_ring_alloc(ring, 2);
- if (r) {
- DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
- return r;
+ if (adev->gfx.num_gfx_rings > 1) {
+ /* maximum supported gfx ring is 2 */
+ ring = &adev->gfx.gfx_ring[1];
+ r = amdgpu_ring_alloc(ring, 2);
+ if (r) {
+ DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
+ return r;
+ }
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
+ amdgpu_ring_write(ring, 0);
+
+ amdgpu_ring_commit(ring);
}
-
- amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
- amdgpu_ring_write(ring, 0);
-
- amdgpu_ring_commit(ring);
-
return 0;
}
@@ -2818,39 +2820,41 @@ static int gfx_v10_0_cp_gfx_resume(struct amdgpu_device *adev)
mutex_unlock(&adev->srbm_mutex);
/* Init gfx ring 1 for pipe 1 */
- mutex_lock(&adev->srbm_mutex);
- gfx_v10_0_cp_gfx_switch_pipe(adev, PIPE_ID1);
- ring = &adev->gfx.gfx_ring[1];
- rb_bufsz = order_base_2(ring->ring_size / 8);
- tmp = REG_SET_FIELD(0, CP_RB1_CNTL, RB_BUFSZ, rb_bufsz);
- tmp = REG_SET_FIELD(tmp, CP_RB1_CNTL, RB_BLKSZ, rb_bufsz - 2);
- WREG32_SOC15(GC, 0, mmCP_RB1_CNTL, tmp);
- /* Initialize the ring buffer's write pointers */
- ring->wptr = 0;
- WREG32_SOC15(GC, 0, mmCP_RB1_WPTR, lower_32_bits(ring->wptr));
- WREG32_SOC15(GC, 0, mmCP_RB1_WPTR_HI, upper_32_bits(ring->wptr));
- /* Set the wb address wether it's enabled or not */
- rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
- WREG32_SOC15(GC, 0, mmCP_RB1_RPTR_ADDR, lower_32_bits(rptr_addr));
- WREG32_SOC15(GC, 0, mmCP_RB1_RPTR_ADDR_HI, upper_32_bits(rptr_addr) &
- CP_RB1_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
- wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
- WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO,
- lower_32_bits(wptr_gpu_addr));
- WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI,
- upper_32_bits(wptr_gpu_addr));
+ if (adev->gfx.num_gfx_rings > 1) {
+ mutex_lock(&adev->srbm_mutex);
+ gfx_v10_0_cp_gfx_switch_pipe(adev, PIPE_ID1);
+ /* maximum supported gfx ring is 2 */
+ ring = &adev->gfx.gfx_ring[1];
+ rb_bufsz = order_base_2(ring->ring_size / 8);
+ tmp = REG_SET_FIELD(0, CP_RB1_CNTL, RB_BUFSZ, rb_bufsz);
+ tmp = REG_SET_FIELD(tmp, CP_RB1_CNTL, RB_BLKSZ, rb_bufsz - 2);
+ WREG32_SOC15(GC, 0, mmCP_RB1_CNTL, tmp);
+ /* Initialize the ring buffer's write pointers */
+ ring->wptr = 0;
+ WREG32_SOC15(GC, 0, mmCP_RB1_WPTR, lower_32_bits(ring->wptr));
+ WREG32_SOC15(GC, 0, mmCP_RB1_WPTR_HI, upper_32_bits(ring->wptr));
+ /* Set the wb address wether it's enabled or not */
+ rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
+ WREG32_SOC15(GC, 0, mmCP_RB1_RPTR_ADDR, lower_32_bits(rptr_addr));
+ WREG32_SOC15(GC, 0, mmCP_RB1_RPTR_ADDR_HI, upper_32_bits(rptr_addr) &
+ CP_RB1_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
+ wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
+ WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO,
+ lower_32_bits(wptr_gpu_addr));
+ WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI,
+ upper_32_bits(wptr_gpu_addr));
- mdelay(1);
- WREG32_SOC15(GC, 0, mmCP_RB1_CNTL, tmp);
+ mdelay(1);
+ WREG32_SOC15(GC, 0, mmCP_RB1_CNTL, tmp);
- rb_addr = ring->gpu_addr >> 8;
- WREG32_SOC15(GC, 0, mmCP_RB1_BASE, rb_addr);
- WREG32_SOC15(GC, 0, mmCP_RB1_BASE_HI, upper_32_bits(rb_addr));
- WREG32_SOC15(GC, 0, mmCP_RB1_ACTIVE, 1);
+ rb_addr = ring->gpu_addr >> 8;
+ WREG32_SOC15(GC, 0, mmCP_RB1_BASE, rb_addr);
+ WREG32_SOC15(GC, 0, mmCP_RB1_BASE_HI, upper_32_bits(rb_addr));
+ WREG32_SOC15(GC, 0, mmCP_RB1_ACTIVE, 1);
- gfx_v10_0_cp_gfx_set_doorbell(adev, ring);
- mutex_unlock(&adev->srbm_mutex);
-
+ gfx_v10_0_cp_gfx_set_doorbell(adev, ring);
+ mutex_unlock(&adev->srbm_mutex);
+ }
/* Switch to pipe 0 */
mutex_lock(&adev->srbm_mutex);
gfx_v10_0_cp_gfx_switch_pipe(adev, PIPE_ID0);
@@ -3513,6 +3517,7 @@ static int gfx_v10_0_kcq_init_queue(struct amdgpu_ring *ring)
/* reset ring buffer */
ring->wptr = 0;
+ atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs], 0);
amdgpu_ring_clear_ring(ring);
} else {
amdgpu_ring_clear_ring(ring);
@@ -3923,11 +3928,13 @@ static uint64_t gfx_v10_0_get_gpu_clock_counter(struct amdgpu_device *adev)
{
uint64_t clock;
+ amdgpu_gfx_off_ctrl(adev, false);
mutex_lock(&adev->gfx.gpu_clock_mutex);
WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) |
((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
mutex_unlock(&adev->gfx.gpu_clock_mutex);
+ amdgpu_gfx_off_ctrl(adev, true);
return clock;
}
@@ -3964,7 +3971,8 @@ static int gfx_v10_0_early_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- adev->gfx.num_gfx_rings = GFX10_NUM_GFX_RINGS;
+ adev->gfx.num_gfx_rings = GFX10_NUM_GFX_RINGS_NV1X;
+
adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
gfx_v10_0_set_kiq_pm4_funcs(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 90f64b8..889154a 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -1193,6 +1193,14 @@ static bool gfx_v9_0_should_disable_gfxoff(struct pci_dev *pdev)
return false;
}
+static bool is_raven_kicker(struct amdgpu_device *adev)
+{
+ if (adev->pm.fw_version >= 0x41e2b)
+ return true;
+ else
+ return false;
+}
+
static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev)
{
if (gfx_v9_0_should_disable_gfxoff(adev->pdev))
@@ -1205,9 +1213,8 @@ static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev)
break;
case CHIP_RAVEN:
if (!(adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8) &&
- ((adev->gfx.rlc_fw_version != 106 &&
+ ((!is_raven_kicker(adev) &&
adev->gfx.rlc_fw_version < 531) ||
- (adev->gfx.rlc_fw_version == 53815) ||
(adev->gfx.rlc_feature_version < 1) ||
!adev->gfx.rlc.is_rlc_v2_1))
adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
@@ -3656,6 +3663,7 @@ static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring)
/* reset ring buffer */
ring->wptr = 0;
+ atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs], 0);
amdgpu_ring_clear_ring(ring);
} else {
amdgpu_ring_clear_ring(ring);
@@ -3959,6 +3967,7 @@ static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev)
{
uint64_t clock;
+ amdgpu_gfx_off_ctrl(adev, false);
mutex_lock(&adev->gfx.gpu_clock_mutex);
if (adev->asic_type == CHIP_VEGA10 && amdgpu_sriov_runtime(adev)) {
uint32_t tmp, lsb, msb, i = 0;
@@ -3977,6 +3986,7 @@ static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev)
((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
}
mutex_unlock(&adev->gfx.gpu_clock_mutex);
+ amdgpu_gfx_off_ctrl(adev, true);
return clock;
}
@@ -4374,9 +4384,17 @@ static int gfx_v9_0_ecc_late_init(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int r;
- r = gfx_v9_0_do_edc_gds_workarounds(adev);
- if (r)
- return r;
+ /*
+ * Temp workaround to fix the issue that CP firmware fails to
+ * update read pointer when CPDMA is writing clearing operation
+ * to GDS in suspend/resume sequence on several cards. So just
+ * limit this operation in cold boot sequence.
+ */
+ if (!adev->in_suspend) {
+ r = gfx_v9_0_do_edc_gds_workarounds(adev);
+ if (r)
+ return r;
+ }
/* requires IBs so do in late init after IB pool is initialized */
r = gfx_v9_0_do_edc_gpr_workarounds(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 90216ab..cc0c273 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -1272,6 +1272,19 @@ static void gmc_v9_0_init_golden_registers(struct amdgpu_device *adev)
}
/**
+ * gmc_v9_0_restore_registers - restores regs
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * This restores register values, saved at suspend.
+ */
+static void gmc_v9_0_restore_registers(struct amdgpu_device *adev)
+{
+ if (adev->asic_type == CHIP_RAVEN)
+ WREG32(mmDCHUBBUB_SDPIF_MMIO_CNTRL_0, adev->gmc.sdpif_register);
+}
+
+/**
* gmc_v9_0_gart_enable - gart enable
*
* @adev: amdgpu_device pointer
@@ -1377,6 +1390,20 @@ static int gmc_v9_0_hw_init(void *handle)
}
/**
+ * gmc_v9_0_save_registers - saves regs
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * This saves potential register values that should be
+ * restored upon resume
+ */
+static void gmc_v9_0_save_registers(struct amdgpu_device *adev)
+{
+ if (adev->asic_type == CHIP_RAVEN)
+ adev->gmc.sdpif_register = RREG32(mmDCHUBBUB_SDPIF_MMIO_CNTRL_0);
+}
+
+/**
* gmc_v9_0_gart_disable - gart disable
*
* @adev: amdgpu_device pointer
@@ -1412,9 +1439,16 @@ static int gmc_v9_0_hw_fini(void *handle)
static int gmc_v9_0_suspend(void *handle)
{
+ int r;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- return gmc_v9_0_hw_fini(adev);
+ r = gmc_v9_0_hw_fini(adev);
+ if (r)
+ return r;
+
+ gmc_v9_0_save_registers(adev);
+
+ return 0;
}
static int gmc_v9_0_resume(void *handle)
@@ -1422,6 +1456,7 @@ static int gmc_v9_0_resume(void *handle)
int r;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ gmc_v9_0_restore_registers(adev);
r = gmc_v9_0_hw_init(adev);
if (r)
return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c
index ff2e6e1..6173951 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c
@@ -693,7 +693,7 @@ static int jpeg_v2_0_set_clockgating_state(void *handle,
bool enable = (state == AMD_CG_STATE_GATE);
if (enable) {
- if (jpeg_v2_0_is_idle(handle))
+ if (!jpeg_v2_0_is_idle(handle))
return -EBUSY;
jpeg_v2_0_enable_clock_gating(adev);
} else {
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c
index c6d046d..c04c207 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c
@@ -477,7 +477,7 @@ static int jpeg_v2_5_set_clockgating_state(void *handle,
continue;
if (enable) {
- if (jpeg_v2_5_is_idle(handle))
+ if (!jpeg_v2_5_is_idle(handle))
return -EBUSY;
jpeg_v2_5_enable_clock_gating(adev, i);
} else {
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c
index 15f3424..d8945c3 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
@@ -89,6 +89,13 @@
#define HDP_MEM_POWER_CTRL__RC_MEM_POWER_CTRL_EN_MASK 0x00010000L
#define HDP_MEM_POWER_CTRL__RC_MEM_POWER_LS_EN_MASK 0x00020000L
#define mmHDP_MEM_POWER_CTRL_BASE_IDX 0
+
+/* for Vega20/arcturus regiter offset change */
+#define mmROM_INDEX_VG20 0x00e4
+#define mmROM_INDEX_VG20_BASE_IDX 0
+#define mmROM_DATA_VG20 0x00e5
+#define mmROM_DATA_VG20_BASE_IDX 0
+
/*
* Indirect registers accessor
*/
@@ -272,7 +279,12 @@ static u32 soc15_get_config_memsize(struct amdgpu_device *adev)
static u32 soc15_get_xclk(struct amdgpu_device *adev)
{
- return adev->clock.spll.reference_freq;
+ u32 reference_clock = adev->clock.spll.reference_freq;
+
+ if (adev->asic_type == CHIP_RAVEN)
+ return reference_clock / 4;
+
+ return reference_clock;
}
@@ -304,6 +316,8 @@ static bool soc15_read_bios_from_rom(struct amdgpu_device *adev,
{
u32 *dw_ptr;
u32 i, length_dw;
+ uint32_t rom_index_offset;
+ uint32_t rom_data_offset;
if (bios == NULL)
return false;
@@ -316,11 +330,23 @@ static bool soc15_read_bios_from_rom(struct amdgpu_device *adev,
dw_ptr = (u32 *)bios;
length_dw = ALIGN(length_bytes, 4) / 4;
+ switch (adev->asic_type) {
+ case CHIP_VEGA20:
+ case CHIP_ARCTURUS:
+ rom_index_offset = SOC15_REG_OFFSET(SMUIO, 0, mmROM_INDEX_VG20);
+ rom_data_offset = SOC15_REG_OFFSET(SMUIO, 0, mmROM_DATA_VG20);
+ break;
+ default:
+ rom_index_offset = SOC15_REG_OFFSET(SMUIO, 0, mmROM_INDEX);
+ rom_data_offset = SOC15_REG_OFFSET(SMUIO, 0, mmROM_DATA);
+ break;
+ }
+
/* set rom index to 0 */
- WREG32(SOC15_REG_OFFSET(SMUIO, 0, mmROM_INDEX), 0);
+ WREG32(rom_index_offset, 0);
/* read out the rom data */
for (i = 0; i < length_dw; i++)
- dw_ptr[i] = RREG32(SOC15_REG_OFFSET(SMUIO, 0, mmROM_DATA));
+ dw_ptr[i] = RREG32(rom_data_offset);
return true;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
index 1a24fad..09b0572 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
@@ -1207,9 +1207,10 @@ static int vcn_v1_0_pause_dpg_mode(struct amdgpu_device *adev,
struct amdgpu_ring *ring;
/* pause/unpause if state is changed */
- if (adev->vcn.pause_state.fw_based != new_state->fw_based) {
+ if (adev->vcn.inst[inst_idx].pause_state.fw_based != new_state->fw_based) {
DRM_DEBUG("dpg pause state changed %d:%d -> %d:%d",
- adev->vcn.pause_state.fw_based, adev->vcn.pause_state.jpeg,
+ adev->vcn.inst[inst_idx].pause_state.fw_based,
+ adev->vcn.inst[inst_idx].pause_state.jpeg,
new_state->fw_based, new_state->jpeg);
reg_data = RREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE) &
@@ -1258,13 +1259,14 @@ static int vcn_v1_0_pause_dpg_mode(struct amdgpu_device *adev,
reg_data &= ~UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;
WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data);
}
- adev->vcn.pause_state.fw_based = new_state->fw_based;
+ adev->vcn.inst[inst_idx].pause_state.fw_based = new_state->fw_based;
}
/* pause/unpause if state is changed */
- if (adev->vcn.pause_state.jpeg != new_state->jpeg) {
+ if (adev->vcn.inst[inst_idx].pause_state.jpeg != new_state->jpeg) {
DRM_DEBUG("dpg pause state changed %d:%d -> %d:%d",
- adev->vcn.pause_state.fw_based, adev->vcn.pause_state.jpeg,
+ adev->vcn.inst[inst_idx].pause_state.fw_based,
+ adev->vcn.inst[inst_idx].pause_state.jpeg,
new_state->fw_based, new_state->jpeg);
reg_data = RREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE) &
@@ -1318,7 +1320,7 @@ static int vcn_v1_0_pause_dpg_mode(struct amdgpu_device *adev,
reg_data &= ~UVD_DPG_PAUSE__JPEG_PAUSE_DPG_REQ_MASK;
WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data);
}
- adev->vcn.pause_state.jpeg = new_state->jpeg;
+ adev->vcn.inst[inst_idx].pause_state.jpeg = new_state->jpeg;
}
return 0;
@@ -1350,7 +1352,7 @@ static int vcn_v1_0_set_clockgating_state(void *handle,
if (enable) {
/* wait for STATUS to clear */
- if (vcn_v1_0_is_idle(handle))
+ if (!vcn_v1_0_is_idle(handle))
return -EBUSY;
vcn_v1_0_enable_clock_gating(adev);
} else {
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c
index 4f72167..b7f1734 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c
@@ -1137,9 +1137,9 @@ static int vcn_v2_0_pause_dpg_mode(struct amdgpu_device *adev,
int ret_code;
/* pause/unpause if state is changed */
- if (adev->vcn.pause_state.fw_based != new_state->fw_based) {
+ if (adev->vcn.inst[inst_idx].pause_state.fw_based != new_state->fw_based) {
DRM_DEBUG("dpg pause state changed %d -> %d",
- adev->vcn.pause_state.fw_based, new_state->fw_based);
+ adev->vcn.inst[inst_idx].pause_state.fw_based, new_state->fw_based);
reg_data = RREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE) &
(~UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK);
@@ -1185,7 +1185,7 @@ static int vcn_v2_0_pause_dpg_mode(struct amdgpu_device *adev,
reg_data &= ~UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;
WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data);
}
- adev->vcn.pause_state.fw_based = new_state->fw_based;
+ adev->vcn.inst[inst_idx].pause_state.fw_based = new_state->fw_based;
}
return 0;
@@ -1217,7 +1217,7 @@ static int vcn_v2_0_set_clockgating_state(void *handle,
if (enable) {
/* wait for STATUS to clear */
- if (vcn_v2_0_is_idle(handle))
+ if (!vcn_v2_0_is_idle(handle))
return -EBUSY;
vcn_v2_0_enable_clock_gating(adev);
} else {
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c
index 70fae79..678253d 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c
@@ -1367,9 +1367,9 @@ static int vcn_v2_5_pause_dpg_mode(struct amdgpu_device *adev,
int ret_code;
/* pause/unpause if state is changed */
- if (adev->vcn.pause_state.fw_based != new_state->fw_based) {
+ if (adev->vcn.inst[inst_idx].pause_state.fw_based != new_state->fw_based) {
DRM_DEBUG("dpg pause state changed %d -> %d",
- adev->vcn.pause_state.fw_based, new_state->fw_based);
+ adev->vcn.inst[inst_idx].pause_state.fw_based, new_state->fw_based);
reg_data = RREG32_SOC15(UVD, inst_idx, mmUVD_DPG_PAUSE) &
(~UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK);
@@ -1407,14 +1407,14 @@ static int vcn_v2_5_pause_dpg_mode(struct amdgpu_device *adev,
RREG32_SOC15(UVD, inst_idx, mmUVD_SCRATCH2) & 0x7FFFFFFF);
SOC15_WAIT_ON_RREG(UVD, inst_idx, mmUVD_POWER_STATUS,
- 0x0, UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
+ UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON, UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
}
} else {
/* unpause dpg, no need to wait */
reg_data &= ~UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;
WREG32_SOC15(UVD, inst_idx, mmUVD_DPG_PAUSE, reg_data);
}
- adev->vcn.pause_state.fw_based = new_state->fw_based;
+ adev->vcn.inst[inst_idx].pause_state.fw_based = new_state->fw_based;
}
return 0;
@@ -1672,7 +1672,7 @@ static int vcn_v2_5_set_clockgating_state(void *handle,
return 0;
if (enable) {
- if (vcn_v2_5_is_idle(handle))
+ if (!vcn_v2_5_is_idle(handle))
return -EBUSY;
vcn_v2_5_enable_clock_gating(adev);
} else {
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 2795415..6240259 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -522,8 +522,9 @@ static void dm_dcn_crtc_high_irq(void *interrupt_params)
acrtc_state = to_dm_crtc_state(acrtc->base.state);
- DRM_DEBUG_DRIVER("crtc:%d, vupdate-vrr:%d\n", acrtc->crtc_id,
- amdgpu_dm_vrr_active(acrtc_state));
+ DRM_DEBUG_DRIVER("crtc:%d, vupdate-vrr:%d, planes:%d\n", acrtc->crtc_id,
+ amdgpu_dm_vrr_active(acrtc_state),
+ acrtc_state->active_planes);
amdgpu_dm_crtc_handle_crc_irq(&acrtc->base);
drm_crtc_handle_vblank(&acrtc->base);
@@ -543,7 +544,18 @@ static void dm_dcn_crtc_high_irq(void *interrupt_params)
&acrtc_state->vrr_params.adjust);
}
- if (acrtc->pflip_status == AMDGPU_FLIP_SUBMITTED) {
+ /*
+ * If there aren't any active_planes then DCH HUBP may be clock-gated.
+ * In that case, pageflip completion interrupts won't fire and pageflip
+ * completion events won't get delivered. Prevent this by sending
+ * pending pageflip events from here if a flip is still pending.
+ *
+ * If any planes are enabled, use dm_pflip_high_irq() instead, to
+ * avoid race conditions between flip programming and completion,
+ * which could cause too early flip completion events.
+ */
+ if (acrtc->pflip_status == AMDGPU_FLIP_SUBMITTED &&
+ acrtc_state->active_planes == 0) {
if (acrtc->event) {
drm_crtc_send_vblank_event(&acrtc->base, acrtc->event);
acrtc->event = NULL;
@@ -1422,6 +1434,73 @@ static void s3_handle_mst(struct drm_device *dev, bool suspend)
drm_kms_helper_hotplug_event(dev);
}
+static int amdgpu_dm_smu_write_watermarks_table(struct amdgpu_device *adev)
+{
+ struct smu_context *smu = &adev->smu;
+ int ret = 0;
+
+ if (!is_support_sw_smu(adev))
+ return 0;
+
+ /* This interface is for dGPU Navi1x.Linux dc-pplib interface depends
+ * on window driver dc implementation.
+ * For Navi1x, clock settings of dcn watermarks are fixed. the settings
+ * should be passed to smu during boot up and resume from s3.
+ * boot up: dc calculate dcn watermark clock settings within dc_create,
+ * dcn20_resource_construct
+ * then call pplib functions below to pass the settings to smu:
+ * smu_set_watermarks_for_clock_ranges
+ * smu_set_watermarks_table
+ * navi10_set_watermarks_table
+ * smu_write_watermarks_table
+ *
+ * For Renoir, clock settings of dcn watermark are also fixed values.
+ * dc has implemented different flow for window driver:
+ * dc_hardware_init / dc_set_power_state
+ * dcn10_init_hw
+ * notify_wm_ranges
+ * set_wm_ranges
+ * -- Linux
+ * smu_set_watermarks_for_clock_ranges
+ * renoir_set_watermarks_table
+ * smu_write_watermarks_table
+ *
+ * For Linux,
+ * dc_hardware_init -> amdgpu_dm_init
+ * dc_set_power_state --> dm_resume
+ *
+ * therefore, this function apply to navi10/12/14 but not Renoir
+ * *
+ */
+ switch(adev->asic_type) {
+ case CHIP_NAVI10:
+ case CHIP_NAVI14:
+ case CHIP_NAVI12:
+ break;
+ default:
+ return 0;
+ }
+
+ mutex_lock(&smu->mutex);
+
+ /* pass data to smu controller */
+ if ((smu->watermarks_bitmap & WATERMARKS_EXIST) &&
+ !(smu->watermarks_bitmap & WATERMARKS_LOADED)) {
+ ret = smu_write_watermarks_table(smu);
+
+ if (ret) {
+ mutex_unlock(&smu->mutex);
+ DRM_ERROR("Failed to update WMTABLE!\n");
+ return ret;
+ }
+ smu->watermarks_bitmap |= WATERMARKS_LOADED;
+ }
+
+ mutex_unlock(&smu->mutex);
+
+ return 0;
+}
+
/**
* dm_hw_init() - Initialize DC device
* @handle: The base driver device containing the amdgpu_dm device.
@@ -1700,6 +1779,8 @@ static int dm_resume(void *handle)
amdgpu_dm_irq_resume_late(adev);
+ amdgpu_dm_smu_write_watermarks_table(adev);
+
return 0;
}
@@ -1911,7 +1992,7 @@ static void handle_hpd_irq(void *param)
mutex_lock(&aconnector->hpd_lock);
#ifdef CONFIG_DRM_AMD_DC_HDCP
- if (adev->asic_type >= CHIP_RAVEN)
+ if (adev->dm.hdcp_workqueue)
hdcp_reset_display(adev->dm.hdcp_workqueue, aconnector->dc_link->link_index);
#endif
if (aconnector->fake_enable)
@@ -2088,8 +2169,10 @@ static void handle_hpd_rx_irq(void *param)
}
}
#ifdef CONFIG_DRM_AMD_DC_HDCP
- if (hpd_irq_data.bytes.device_service_irq.bits.CP_IRQ)
- hdcp_handle_cpirq(adev->dm.hdcp_workqueue, aconnector->base.index);
+ if (hpd_irq_data.bytes.device_service_irq.bits.CP_IRQ) {
+ if (adev->dm.hdcp_workqueue)
+ hdcp_handle_cpirq(adev->dm.hdcp_workqueue, aconnector->base.index);
+ }
#endif
if ((dc_link->cur_link_settings.lane_count != LANE_COUNT_UNKNOWN) ||
(dc_link->type == dc_connection_mst_branch))
@@ -5702,7 +5785,7 @@ void amdgpu_dm_connector_init_helper(struct amdgpu_display_manager *dm,
drm_connector_attach_vrr_capable_property(
&aconnector->base);
#ifdef CONFIG_DRM_AMD_DC_HDCP
- if (adev->asic_type >= CHIP_RAVEN)
+ if (adev->dm.hdcp_workqueue)
drm_connector_attach_content_protection_property(&aconnector->base, true);
#endif
}
@@ -8408,7 +8491,6 @@ bool amdgpu_dm_psr_enable(struct dc_stream_state *stream)
/* Calculate number of static frames before generating interrupt to
* enter PSR.
*/
- unsigned int frame_time_microsec = 1000000 / vsync_rate_hz;
// Init fail safe of 2 frames static
unsigned int num_frames_static = 2;
@@ -8423,8 +8505,10 @@ bool amdgpu_dm_psr_enable(struct dc_stream_state *stream)
* Calculate number of frames such that at least 30 ms of time has
* passed.
*/
- if (vsync_rate_hz != 0)
+ if (vsync_rate_hz != 0) {
+ unsigned int frame_time_microsec = 1000000 / vsync_rate_hz;
num_frames_static = (30000 / frame_time_microsec) + 1;
+ }
params.triggers.cursor_update = true;
params.triggers.overlay_update = true;
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
index 5672f77..da73161 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
@@ -451,6 +451,7 @@ static void dm_dp_destroy_mst_connector(struct drm_dp_mst_topology_mgr *mgr,
aconnector->dc_sink);
dc_sink_release(aconnector->dc_sink);
aconnector->dc_sink = NULL;
+ aconnector->dc_link->cur_link_settings.lane_count = 0;
}
drm_connector_unregister(connector);
diff --git a/drivers/gpu/drm/amd/display/dc/bios/command_table2.c b/drivers/gpu/drm/amd/display/dc/bios/command_table2.c
index 629a07a..c4ba6e8 100644
--- a/drivers/gpu/drm/amd/display/dc/bios/command_table2.c
+++ b/drivers/gpu/drm/amd/display/dc/bios/command_table2.c
@@ -711,10 +711,6 @@ static void enable_disp_power_gating_dmcub(
power_gating.header.sub_type = DMUB_CMD__VBIOS_ENABLE_DISP_POWER_GATING;
power_gating.power_gating.pwr = *pwr;
- /* ATOM_ENABLE is old API in DMUB */
- if (power_gating.power_gating.pwr.enable == ATOM_ENABLE)
- power_gating.power_gating.pwr.enable = ATOM_INIT;
-
dc_dmub_srv_cmd_queue(dmcub, &power_gating.header);
dc_dmub_srv_cmd_execute(dmcub);
dc_dmub_srv_wait_idle(dmcub);
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile b/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile
index 3cd28319..c0f6a8c 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile
@@ -87,6 +87,12 @@
###############################################################################
CLK_MGR_DCN21 = rn_clk_mgr.o rn_clk_mgr_vbios_smu.o
+# prevent build errors regarding soft-float vs hard-float FP ABI tags
+# this code is currently unused on ppc64, as it applies to Renoir APUs only
+ifdef CONFIG_PPC64
+CFLAGS_$(AMDDALPATH)/dc/clk_mgr/dcn21/rn_clk_mgr.o := $(call cc-option,-mno-gnu-attribute)
+endif
+
AMD_DAL_CLK_MGR_DCN21 = $(addprefix $(AMDDALPATH)/dc/clk_mgr/dcn21/,$(CLK_MGR_DCN21))
AMD_DISPLAY_FILES += $(AMD_DAL_CLK_MGR_DCN21)
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c
index 495f01e..49ce46b 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c
@@ -117,7 +117,7 @@ void dcn20_update_clocks_update_dpp_dto(struct clk_mgr_internal *clk_mgr,
prev_dppclk_khz = clk_mgr->base.ctx->dc->current_state->res_ctx.pipe_ctx[i].plane_res.bw.dppclk_khz;
- if (safe_to_lower || prev_dppclk_khz < dppclk_khz) {
+ if ((prev_dppclk_khz > dppclk_khz && safe_to_lower) || prev_dppclk_khz < dppclk_khz) {
clk_mgr->dccg->funcs->update_dpp_dto(
clk_mgr->dccg, dpp_inst, dppclk_khz);
}
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c
index 7ae4c06..9ef3f7b 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c
@@ -151,6 +151,12 @@ void rn_update_clocks(struct clk_mgr *clk_mgr_base,
rn_vbios_smu_set_min_deep_sleep_dcfclk(clk_mgr, clk_mgr_base->clks.dcfclk_deep_sleep_khz);
}
+ // workaround: Limit dppclk to 100Mhz to avoid lower eDP panel switch to plus 4K monitor underflow.
+ if (!IS_DIAG_DC(dc->ctx->dce_environment)) {
+ if (new_clocks->dppclk_khz < 100000)
+ new_clocks->dppclk_khz = 100000;
+ }
+
if (should_set_clock(safe_to_lower, new_clocks->dppclk_khz, clk_mgr->base.clks.dppclk_khz)) {
if (clk_mgr->base.clks.dppclk_khz > new_clocks->dppclk_khz)
dpp_clock_lowered = true;
@@ -412,19 +418,19 @@ void build_watermark_ranges(struct clk_bw_params *bw_params, struct pp_smu_wm_ra
ranges->reader_wm_sets[num_valid_sets].wm_inst = bw_params->wm_table.entries[i].wm_inst;
ranges->reader_wm_sets[num_valid_sets].wm_type = bw_params->wm_table.entries[i].wm_type;
- /* We will not select WM based on dcfclk, so leave it as unconstrained */
- ranges->reader_wm_sets[num_valid_sets].min_drain_clk_mhz = PP_SMU_WM_SET_RANGE_CLK_UNCONSTRAINED_MIN;
- ranges->reader_wm_sets[num_valid_sets].max_drain_clk_mhz = PP_SMU_WM_SET_RANGE_CLK_UNCONSTRAINED_MAX;
- /* fclk wil be used to select WM*/
+ /* We will not select WM based on fclk, so leave it as unconstrained */
+ ranges->reader_wm_sets[num_valid_sets].min_fill_clk_mhz = PP_SMU_WM_SET_RANGE_CLK_UNCONSTRAINED_MIN;
+ ranges->reader_wm_sets[num_valid_sets].max_fill_clk_mhz = PP_SMU_WM_SET_RANGE_CLK_UNCONSTRAINED_MAX;
+ /* dcfclk wil be used to select WM*/
if (ranges->reader_wm_sets[num_valid_sets].wm_type == WM_TYPE_PSTATE_CHG) {
if (i == 0)
- ranges->reader_wm_sets[num_valid_sets].min_fill_clk_mhz = 0;
+ ranges->reader_wm_sets[num_valid_sets].min_drain_clk_mhz = 0;
else {
/* add 1 to make it non-overlapping with next lvl */
- ranges->reader_wm_sets[num_valid_sets].min_fill_clk_mhz = bw_params->clk_table.entries[i - 1].fclk_mhz + 1;
+ ranges->reader_wm_sets[num_valid_sets].min_drain_clk_mhz = bw_params->clk_table.entries[i - 1].dcfclk_mhz + 1;
}
- ranges->reader_wm_sets[num_valid_sets].max_fill_clk_mhz = bw_params->clk_table.entries[i].fclk_mhz;
+ ranges->reader_wm_sets[num_valid_sets].max_drain_clk_mhz = bw_params->clk_table.entries[i].dcfclk_mhz;
} else {
/* unconstrained for memory retraining */
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
index cb731c1..fd9e696 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
@@ -3401,6 +3401,17 @@ static bool retrieve_link_cap(struct dc_link *link)
sink_id.ieee_device_id,
sizeof(sink_id.ieee_device_id));
+ /* Quirk Apple MBP 2017 15" Retina panel: Wrong DP_MAX_LINK_RATE */
+ {
+ uint8_t str_mbp_2017[] = { 101, 68, 21, 101, 98, 97 };
+
+ if ((link->dpcd_caps.sink_dev_id == 0x0010fa) &&
+ !memcmp(link->dpcd_caps.sink_dev_id_str, str_mbp_2017,
+ sizeof(str_mbp_2017))) {
+ link->reported_link_cap.link_rate = 0x0c;
+ }
+ }
+
core_link_read_dpcd(
link,
DP_SINK_HW_REVISION_START,
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c b/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c
index f1a5d2c..68c4049 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c
@@ -400,7 +400,7 @@ static bool acquire(
{
enum gpio_result result;
- if (!is_engine_available(engine))
+ if ((engine == NULL) || !is_engine_available(engine))
return false;
result = dal_ddc_open(ddc, GPIO_MODE_HARDWARE,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c
index f36a0d8..446ba0a 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c
@@ -840,8 +840,8 @@ static void hubbub1_det_request_size(
hubbub1_get_blk256_size(&blk256_width, &blk256_height, bpe);
- swath_bytes_horz_wc = height * blk256_height * bpe;
- swath_bytes_vert_wc = width * blk256_width * bpe;
+ swath_bytes_horz_wc = width * blk256_height * bpe;
+ swath_bytes_vert_wc = height * blk256_width * bpe;
*req128_horz_wc = (2 * swath_bytes_horz_wc <= detile_buf_size) ?
false : /* full 256B request */
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c
index cfbbaff..a444fed 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c
@@ -572,7 +572,6 @@ void dcn20_plane_atomic_disable(struct dc *dc, struct pipe_ctx *pipe_ctx)
dpp->funcs->dpp_dppclk_control(dpp, false, false);
hubp->power_gated = true;
- dc->optimized_required = false; /* We're powering off, no need to optimize */
hws->funcs.plane_atomic_power_down(dc,
pipe_ctx->plane_res.dpp,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_init.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_init.c
index d51e02f..5e640f1 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_init.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_init.c
@@ -108,7 +108,6 @@ static const struct hwseq_private_funcs dcn20_private_funcs = {
.enable_power_gating_plane = dcn20_enable_power_gating_plane,
.dpp_pg_control = dcn20_dpp_pg_control,
.hubp_pg_control = dcn20_hubp_pg_control,
- .dsc_pg_control = NULL,
.update_odm = dcn20_update_odm,
.dsc_pg_control = dcn20_dsc_pg_control,
.get_surface_visual_confirm_color = dcn10_get_surface_visual_confirm_color,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c
index 85f90f3..e310d67 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c
@@ -335,6 +335,117 @@ struct _vcs_dpi_soc_bounding_box_st dcn2_0_soc = {
.use_urgent_burst_bw = 0
};
+struct _vcs_dpi_soc_bounding_box_st dcn2_0_nv14_soc = {
+ .clock_limits = {
+ {
+ .state = 0,
+ .dcfclk_mhz = 560.0,
+ .fabricclk_mhz = 560.0,
+ .dispclk_mhz = 513.0,
+ .dppclk_mhz = 513.0,
+ .phyclk_mhz = 540.0,
+ .socclk_mhz = 560.0,
+ .dscclk_mhz = 171.0,
+ .dram_speed_mts = 8960.0,
+ },
+ {
+ .state = 1,
+ .dcfclk_mhz = 694.0,
+ .fabricclk_mhz = 694.0,
+ .dispclk_mhz = 642.0,
+ .dppclk_mhz = 642.0,
+ .phyclk_mhz = 600.0,
+ .socclk_mhz = 694.0,
+ .dscclk_mhz = 214.0,
+ .dram_speed_mts = 11104.0,
+ },
+ {
+ .state = 2,
+ .dcfclk_mhz = 875.0,
+ .fabricclk_mhz = 875.0,
+ .dispclk_mhz = 734.0,
+ .dppclk_mhz = 734.0,
+ .phyclk_mhz = 810.0,
+ .socclk_mhz = 875.0,
+ .dscclk_mhz = 245.0,
+ .dram_speed_mts = 14000.0,
+ },
+ {
+ .state = 3,
+ .dcfclk_mhz = 1000.0,
+ .fabricclk_mhz = 1000.0,
+ .dispclk_mhz = 1100.0,
+ .dppclk_mhz = 1100.0,
+ .phyclk_mhz = 810.0,
+ .socclk_mhz = 1000.0,
+ .dscclk_mhz = 367.0,
+ .dram_speed_mts = 16000.0,
+ },
+ {
+ .state = 4,
+ .dcfclk_mhz = 1200.0,
+ .fabricclk_mhz = 1200.0,
+ .dispclk_mhz = 1284.0,
+ .dppclk_mhz = 1284.0,
+ .phyclk_mhz = 810.0,
+ .socclk_mhz = 1200.0,
+ .dscclk_mhz = 428.0,
+ .dram_speed_mts = 16000.0,
+ },
+ /*Extra state, no dispclk ramping*/
+ {
+ .state = 5,
+ .dcfclk_mhz = 1200.0,
+ .fabricclk_mhz = 1200.0,
+ .dispclk_mhz = 1284.0,
+ .dppclk_mhz = 1284.0,
+ .phyclk_mhz = 810.0,
+ .socclk_mhz = 1200.0,
+ .dscclk_mhz = 428.0,
+ .dram_speed_mts = 16000.0,
+ },
+ },
+ .num_states = 5,
+ .sr_exit_time_us = 8.6,
+ .sr_enter_plus_exit_time_us = 10.9,
+ .urgent_latency_us = 4.0,
+ .urgent_latency_pixel_data_only_us = 4.0,
+ .urgent_latency_pixel_mixed_with_vm_data_us = 4.0,
+ .urgent_latency_vm_data_only_us = 4.0,
+ .urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096,
+ .urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096,
+ .urgent_out_of_order_return_per_channel_vm_only_bytes = 4096,
+ .pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 40.0,
+ .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 40.0,
+ .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 40.0,
+ .max_avg_sdp_bw_use_normal_percent = 40.0,
+ .max_avg_dram_bw_use_normal_percent = 40.0,
+ .writeback_latency_us = 12.0,
+ .ideal_dram_bw_after_urgent_percent = 40.0,
+ .max_request_size_bytes = 256,
+ .dram_channel_width_bytes = 2,
+ .fabric_datapath_to_dcn_data_return_bytes = 64,
+ .dcn_downspread_percent = 0.5,
+ .downspread_percent = 0.38,
+ .dram_page_open_time_ns = 50.0,
+ .dram_rw_turnaround_time_ns = 17.5,
+ .dram_return_buffer_per_channel_bytes = 8192,
+ .round_trip_ping_latency_dcfclk_cycles = 131,
+ .urgent_out_of_order_return_per_channel_bytes = 256,
+ .channel_interleave_bytes = 256,
+ .num_banks = 8,
+ .num_chans = 8,
+ .vmm_page_size_bytes = 4096,
+ .dram_clock_change_latency_us = 404.0,
+ .dummy_pstate_latency_us = 5.0,
+ .writeback_dram_clock_change_latency_us = 23.0,
+ .return_bus_width_bytes = 64,
+ .dispclk_dppclk_vco_speed_mhz = 3850,
+ .xfc_bus_transport_time_us = 20,
+ .xfc_xbuf_latency_tolerance_us = 4,
+ .use_urgent_burst_bw = 0
+};
+
struct _vcs_dpi_soc_bounding_box_st dcn2_0_nv12_soc = { 0 };
#ifndef mmDP0_DP_DPHY_INTERNAL_CTRL
@@ -3291,6 +3402,9 @@ void dcn20_patch_bounding_box(struct dc *dc, struct _vcs_dpi_soc_bounding_box_st
static struct _vcs_dpi_soc_bounding_box_st *get_asic_rev_soc_bb(
uint32_t hw_internal_rev)
{
+ if (ASICREV_IS_NAVI14_M(hw_internal_rev))
+ return &dcn2_0_nv14_soc;
+
if (ASICREV_IS_NAVI12_P(hw_internal_rev))
return &dcn2_0_nv12_soc;
diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_init.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_init.c
index 4861aa5..fddbd59 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_init.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_init.c
@@ -116,7 +116,6 @@ static const struct hwseq_private_funcs dcn21_private_funcs = {
.enable_power_gating_plane = dcn20_enable_power_gating_plane,
.dpp_pg_control = dcn20_dpp_pg_control,
.hubp_pg_control = dcn20_hubp_pg_control,
- .dsc_pg_control = NULL,
.update_odm = dcn20_update_odm,
.dsc_pg_control = dcn20_dsc_pg_control,
.get_surface_visual_confirm_color = dcn10_get_surface_visual_confirm_color,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c
index 0d506d3..33d0a17 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c
@@ -60,6 +60,7 @@
#include "dcn20/dcn20_dccg.h"
#include "dcn21_hubbub.h"
#include "dcn10/dcn10_resource.h"
+#include "dce110/dce110_resource.h"
#include "dcn20/dcn20_dwb.h"
#include "dcn20/dcn20_mmhubbub.h"
@@ -856,6 +857,7 @@ static const struct dc_debug_options debug_defaults_diags = {
enum dcn20_clk_src_array_id {
DCN20_CLK_SRC_PLL0,
DCN20_CLK_SRC_PLL1,
+ DCN20_CLK_SRC_PLL2,
DCN20_CLK_SRC_TOTAL_DCN21
};
@@ -1718,6 +1720,10 @@ static bool dcn21_resource_construct(
dcn21_clock_source_create(ctx, ctx->dc_bios,
CLOCK_SOURCE_COMBO_PHY_PLL1,
&clk_src_regs[1], false);
+ pool->base.clock_sources[DCN20_CLK_SRC_PLL2] =
+ dcn21_clock_source_create(ctx, ctx->dc_bios,
+ CLOCK_SOURCE_COMBO_PHY_PLL2,
+ &clk_src_regs[2], false);
pool->base.clk_src_count = DCN20_CLK_SRC_TOTAL_DCN21;
diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_execution.c b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_execution.c
index f730b94..5524671 100644
--- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_execution.c
+++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_execution.c
@@ -46,8 +46,8 @@ static inline enum mod_hdcp_status check_hdcp2_capable(struct mod_hdcp *hdcp)
enum mod_hdcp_status status;
if (is_dp_hdcp(hdcp))
- status = (hdcp->auth.msg.hdcp2.rxcaps_dp[2] & HDCP_2_2_RX_CAPS_VERSION_VAL) &&
- HDCP_2_2_DP_HDCP_CAPABLE(hdcp->auth.msg.hdcp2.rxcaps_dp[0]) ?
+ status = (hdcp->auth.msg.hdcp2.rxcaps_dp[0] == HDCP_2_2_RX_CAPS_VERSION_VAL) &&
+ HDCP_2_2_DP_HDCP_CAPABLE(hdcp->auth.msg.hdcp2.rxcaps_dp[2]) ?
MOD_HDCP_STATUS_SUCCESS :
MOD_HDCP_STATUS_HDCP2_NOT_CAPABLE;
else
diff --git a/drivers/gpu/drm/amd/include/asic_reg/dce/dce_12_0_offset.h b/drivers/gpu/drm/amd/include/asic_reg/dce/dce_12_0_offset.h
index b6f74bf..27bb8c1 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/dce/dce_12_0_offset.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/dce/dce_12_0_offset.h
@@ -7376,6 +7376,8 @@
#define mmCRTC4_CRTC_DRR_CONTROL 0x0f3e
#define mmCRTC4_CRTC_DRR_CONTROL_BASE_IDX 2
+#define mmDCHUBBUB_SDPIF_MMIO_CNTRL_0 0x395d
+#define mmDCHUBBUB_SDPIF_MMIO_CNTRL_0_BASE_IDX 2
// addressBlock: dce_dc_fmt4_dispdec
// base address: 0x2000
diff --git a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
index 99ad4dd..96e81c7 100644
--- a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
@@ -222,7 +222,7 @@ int smu_set_soft_freq_range(struct smu_context *smu, enum smu_clk_type clk_type,
{
int ret = 0;
- if (min <= 0 && max <= 0)
+ if (min < 0 && max < 0)
return -EINVAL;
if (!smu_clk_dpm_is_enabled(smu, clk_type))
@@ -2006,8 +2006,11 @@ int smu_set_watermarks_for_clock_ranges(struct smu_context *smu,
smu_feature_is_enabled(smu, SMU_FEATURE_DPM_DCEFCLK_BIT) &&
smu_feature_is_enabled(smu, SMU_FEATURE_DPM_SOCCLK_BIT)) {
smu_set_watermarks_table(smu, table, clock_ranges);
- smu->watermarks_bitmap |= WATERMARKS_EXIST;
- smu->watermarks_bitmap &= ~WATERMARKS_LOADED;
+
+ if (!(smu->watermarks_bitmap & WATERMARKS_EXIST)) {
+ smu->watermarks_bitmap |= WATERMARKS_EXIST;
+ smu->watermarks_bitmap &= ~WATERMARKS_LOADED;
+ }
}
mutex_unlock(&smu->mutex);
diff --git a/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0_pptable.h b/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0_pptable.h
index b2f96a1..7a63cf8 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0_pptable.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0_pptable.h
@@ -39,21 +39,39 @@
#define SMU_11_0_PP_OVERDRIVE_VERSION 0x0800
#define SMU_11_0_PP_POWERSAVINGCLOCK_VERSION 0x0100
+enum SMU_11_0_ODFEATURE_CAP {
+ SMU_11_0_ODCAP_GFXCLK_LIMITS = 0,
+ SMU_11_0_ODCAP_GFXCLK_CURVE,
+ SMU_11_0_ODCAP_UCLK_MAX,
+ SMU_11_0_ODCAP_POWER_LIMIT,
+ SMU_11_0_ODCAP_FAN_ACOUSTIC_LIMIT,
+ SMU_11_0_ODCAP_FAN_SPEED_MIN,
+ SMU_11_0_ODCAP_TEMPERATURE_FAN,
+ SMU_11_0_ODCAP_TEMPERATURE_SYSTEM,
+ SMU_11_0_ODCAP_MEMORY_TIMING_TUNE,
+ SMU_11_0_ODCAP_FAN_ZERO_RPM_CONTROL,
+ SMU_11_0_ODCAP_AUTO_UV_ENGINE,
+ SMU_11_0_ODCAP_AUTO_OC_ENGINE,
+ SMU_11_0_ODCAP_AUTO_OC_MEMORY,
+ SMU_11_0_ODCAP_FAN_CURVE,
+ SMU_11_0_ODCAP_COUNT,
+};
+
enum SMU_11_0_ODFEATURE_ID {
- SMU_11_0_ODFEATURE_GFXCLK_LIMITS = 1 << 0, //GFXCLK Limit feature
- SMU_11_0_ODFEATURE_GFXCLK_CURVE = 1 << 1, //GFXCLK Curve feature
- SMU_11_0_ODFEATURE_UCLK_MAX = 1 << 2, //UCLK Limit feature
- SMU_11_0_ODFEATURE_POWER_LIMIT = 1 << 3, //Power Limit feature
- SMU_11_0_ODFEATURE_FAN_ACOUSTIC_LIMIT = 1 << 4, //Fan Acoustic RPM feature
- SMU_11_0_ODFEATURE_FAN_SPEED_MIN = 1 << 5, //Minimum Fan Speed feature
- SMU_11_0_ODFEATURE_TEMPERATURE_FAN = 1 << 6, //Fan Target Temperature Limit feature
- SMU_11_0_ODFEATURE_TEMPERATURE_SYSTEM = 1 << 7, //Operating Temperature Limit feature
- SMU_11_0_ODFEATURE_MEMORY_TIMING_TUNE = 1 << 8, //AC Timing Tuning feature
- SMU_11_0_ODFEATURE_FAN_ZERO_RPM_CONTROL = 1 << 9, //Zero RPM feature
- SMU_11_0_ODFEATURE_AUTO_UV_ENGINE = 1 << 10, //Auto Under Volt GFXCLK feature
- SMU_11_0_ODFEATURE_AUTO_OC_ENGINE = 1 << 11, //Auto Over Clock GFXCLK feature
- SMU_11_0_ODFEATURE_AUTO_OC_MEMORY = 1 << 12, //Auto Over Clock MCLK feature
- SMU_11_0_ODFEATURE_FAN_CURVE = 1 << 13, //VICTOR TODO
+ SMU_11_0_ODFEATURE_GFXCLK_LIMITS = 1 << SMU_11_0_ODCAP_GFXCLK_LIMITS, //GFXCLK Limit feature
+ SMU_11_0_ODFEATURE_GFXCLK_CURVE = 1 << SMU_11_0_ODCAP_GFXCLK_CURVE, //GFXCLK Curve feature
+ SMU_11_0_ODFEATURE_UCLK_MAX = 1 << SMU_11_0_ODCAP_UCLK_MAX, //UCLK Limit feature
+ SMU_11_0_ODFEATURE_POWER_LIMIT = 1 << SMU_11_0_ODCAP_POWER_LIMIT, //Power Limit feature
+ SMU_11_0_ODFEATURE_FAN_ACOUSTIC_LIMIT = 1 << SMU_11_0_ODCAP_FAN_ACOUSTIC_LIMIT, //Fan Acoustic RPM feature
+ SMU_11_0_ODFEATURE_FAN_SPEED_MIN = 1 << SMU_11_0_ODCAP_FAN_SPEED_MIN, //Minimum Fan Speed feature
+ SMU_11_0_ODFEATURE_TEMPERATURE_FAN = 1 << SMU_11_0_ODCAP_TEMPERATURE_FAN, //Fan Target Temperature Limit feature
+ SMU_11_0_ODFEATURE_TEMPERATURE_SYSTEM = 1 << SMU_11_0_ODCAP_TEMPERATURE_SYSTEM, //Operating Temperature Limit feature
+ SMU_11_0_ODFEATURE_MEMORY_TIMING_TUNE = 1 << SMU_11_0_ODCAP_MEMORY_TIMING_TUNE, //AC Timing Tuning feature
+ SMU_11_0_ODFEATURE_FAN_ZERO_RPM_CONTROL = 1 << SMU_11_0_ODCAP_FAN_ZERO_RPM_CONTROL, //Zero RPM feature
+ SMU_11_0_ODFEATURE_AUTO_UV_ENGINE = 1 << SMU_11_0_ODCAP_AUTO_UV_ENGINE, //Auto Under Volt GFXCLK feature
+ SMU_11_0_ODFEATURE_AUTO_OC_ENGINE = 1 << SMU_11_0_ODCAP_AUTO_OC_ENGINE, //Auto Over Clock GFXCLK feature
+ SMU_11_0_ODFEATURE_AUTO_OC_MEMORY = 1 << SMU_11_0_ODCAP_AUTO_OC_MEMORY, //Auto Over Clock MCLK feature
+ SMU_11_0_ODFEATURE_FAN_CURVE = 1 << SMU_11_0_ODCAP_FAN_CURVE, //Fan Curve feature
SMU_11_0_ODFEATURE_COUNT = 14,
};
#define SMU_11_0_MAX_ODFEATURE 32 //Maximum Number of OD Features
diff --git a/drivers/gpu/drm/amd/powerplay/navi10_ppt.c b/drivers/gpu/drm/amd/powerplay/navi10_ppt.c
index 19a9846..aed4d6e 100644
--- a/drivers/gpu/drm/amd/powerplay/navi10_ppt.c
+++ b/drivers/gpu/drm/amd/powerplay/navi10_ppt.c
@@ -736,9 +736,9 @@ static bool navi10_is_support_fine_grained_dpm(struct smu_context *smu, enum smu
return dpm_desc->SnapToDiscrete == 0 ? true : false;
}
-static inline bool navi10_od_feature_is_supported(struct smu_11_0_overdrive_table *od_table, enum SMU_11_0_ODFEATURE_ID feature)
+static inline bool navi10_od_feature_is_supported(struct smu_11_0_overdrive_table *od_table, enum SMU_11_0_ODFEATURE_CAP cap)
{
- return od_table->cap[feature];
+ return od_table->cap[cap];
}
static void navi10_od_setting_get_range(struct smu_11_0_overdrive_table *od_table,
@@ -846,7 +846,7 @@ static int navi10_print_clk_levels(struct smu_context *smu,
case SMU_OD_SCLK:
if (!smu->od_enabled || !od_table || !od_settings)
break;
- if (!navi10_od_feature_is_supported(od_settings, SMU_11_0_ODFEATURE_GFXCLK_LIMITS))
+ if (!navi10_od_feature_is_supported(od_settings, SMU_11_0_ODCAP_GFXCLK_LIMITS))
break;
size += sprintf(buf + size, "OD_SCLK:\n");
size += sprintf(buf + size, "0: %uMhz\n1: %uMhz\n", od_table->GfxclkFmin, od_table->GfxclkFmax);
@@ -854,7 +854,7 @@ static int navi10_print_clk_levels(struct smu_context *smu,
case SMU_OD_MCLK:
if (!smu->od_enabled || !od_table || !od_settings)
break;
- if (!navi10_od_feature_is_supported(od_settings, SMU_11_0_ODFEATURE_UCLK_MAX))
+ if (!navi10_od_feature_is_supported(od_settings, SMU_11_0_ODCAP_UCLK_MAX))
break;
size += sprintf(buf + size, "OD_MCLK:\n");
size += sprintf(buf + size, "1: %uMHz\n", od_table->UclkFmax);
@@ -862,7 +862,7 @@ static int navi10_print_clk_levels(struct smu_context *smu,
case SMU_OD_VDDC_CURVE:
if (!smu->od_enabled || !od_table || !od_settings)
break;
- if (!navi10_od_feature_is_supported(od_settings, SMU_11_0_ODFEATURE_GFXCLK_CURVE))
+ if (!navi10_od_feature_is_supported(od_settings, SMU_11_0_ODCAP_GFXCLK_CURVE))
break;
size += sprintf(buf + size, "OD_VDDC_CURVE:\n");
for (i = 0; i < 3; i++) {
@@ -887,7 +887,7 @@ static int navi10_print_clk_levels(struct smu_context *smu,
break;
size = sprintf(buf, "%s:\n", "OD_RANGE");
- if (navi10_od_feature_is_supported(od_settings, SMU_11_0_ODFEATURE_GFXCLK_LIMITS)) {
+ if (navi10_od_feature_is_supported(od_settings, SMU_11_0_ODCAP_GFXCLK_LIMITS)) {
navi10_od_setting_get_range(od_settings, SMU_11_0_ODSETTING_GFXCLKFMIN,
&min_value, NULL);
navi10_od_setting_get_range(od_settings, SMU_11_0_ODSETTING_GFXCLKFMAX,
@@ -896,14 +896,14 @@ static int navi10_print_clk_levels(struct smu_context *smu,
min_value, max_value);
}
- if (navi10_od_feature_is_supported(od_settings, SMU_11_0_ODFEATURE_UCLK_MAX)) {
+ if (navi10_od_feature_is_supported(od_settings, SMU_11_0_ODCAP_UCLK_MAX)) {
navi10_od_setting_get_range(od_settings, SMU_11_0_ODSETTING_UCLKFMAX,
&min_value, &max_value);
size += sprintf(buf + size, "MCLK: %7uMhz %10uMhz\n",
min_value, max_value);
}
- if (navi10_od_feature_is_supported(od_settings, SMU_11_0_ODFEATURE_GFXCLK_CURVE)) {
+ if (navi10_od_feature_is_supported(od_settings, SMU_11_0_ODCAP_GFXCLK_CURVE)) {
navi10_od_setting_get_range(od_settings, SMU_11_0_ODSETTING_VDDGFXCURVEFREQ_P1,
&min_value, &max_value);
size += sprintf(buf + size, "VDDC_CURVE_SCLK[0]: %7uMhz %10uMhz\n",
@@ -1063,15 +1063,6 @@ static int navi10_display_config_changed(struct smu_context *smu)
int ret = 0;
if ((smu->watermarks_bitmap & WATERMARKS_EXIST) &&
- !(smu->watermarks_bitmap & WATERMARKS_LOADED)) {
- ret = smu_write_watermarks_table(smu);
- if (ret)
- return ret;
-
- smu->watermarks_bitmap |= WATERMARKS_LOADED;
- }
-
- if ((smu->watermarks_bitmap & WATERMARKS_EXIST) &&
smu_feature_is_supported(smu, SMU_FEATURE_DPM_DCEFCLK_BIT) &&
smu_feature_is_supported(smu, SMU_FEATURE_DPM_SOCCLK_BIT)) {
ret = smu_send_smc_msg_with_param(smu, SMU_MSG_NumOfDisplays,
@@ -1493,6 +1484,7 @@ static int navi10_set_watermarks_table(struct smu_context *smu,
*clock_ranges)
{
int i;
+ int ret = 0;
Watermarks_t *table = watermarks;
if (!table || !clock_ranges)
@@ -1544,6 +1536,18 @@ static int navi10_set_watermarks_table(struct smu_context *smu,
clock_ranges->wm_mcif_clocks_ranges[i].wm_set_id;
}
+ smu->watermarks_bitmap |= WATERMARKS_EXIST;
+
+ /* pass data to smu controller */
+ if (!(smu->watermarks_bitmap & WATERMARKS_LOADED)) {
+ ret = smu_write_watermarks_table(smu);
+ if (ret) {
+ pr_err("Failed to update WMTABLE!");
+ return ret;
+ }
+ smu->watermarks_bitmap |= WATERMARKS_LOADED;
+ }
+
return 0;
}
@@ -2056,7 +2060,7 @@ static int navi10_od_edit_dpm_table(struct smu_context *smu, enum PP_OD_DPM_TABL
switch (type) {
case PP_OD_EDIT_SCLK_VDDC_TABLE:
- if (!navi10_od_feature_is_supported(od_settings, SMU_11_0_ODFEATURE_GFXCLK_LIMITS)) {
+ if (!navi10_od_feature_is_supported(od_settings, SMU_11_0_ODCAP_GFXCLK_LIMITS)) {
pr_warn("GFXCLK_LIMITS not supported!\n");
return -ENOTSUPP;
}
@@ -2102,7 +2106,7 @@ static int navi10_od_edit_dpm_table(struct smu_context *smu, enum PP_OD_DPM_TABL
}
break;
case PP_OD_EDIT_MCLK_VDDC_TABLE:
- if (!navi10_od_feature_is_supported(od_settings, SMU_11_0_ODFEATURE_UCLK_MAX)) {
+ if (!navi10_od_feature_is_supported(od_settings, SMU_11_0_ODCAP_UCLK_MAX)) {
pr_warn("UCLK_MAX not supported!\n");
return -ENOTSUPP;
}
@@ -2143,7 +2147,7 @@ static int navi10_od_edit_dpm_table(struct smu_context *smu, enum PP_OD_DPM_TABL
}
break;
case PP_OD_EDIT_VDDC_CURVE:
- if (!navi10_od_feature_is_supported(od_settings, SMU_11_0_ODFEATURE_GFXCLK_CURVE)) {
+ if (!navi10_od_feature_is_supported(od_settings, SMU_11_0_ODCAP_GFXCLK_CURVE)) {
pr_warn("GFXCLK_CURVE not supported!\n");
return -ENOTSUPP;
}
diff --git a/drivers/gpu/drm/amd/powerplay/renoir_ppt.c b/drivers/gpu/drm/amd/powerplay/renoir_ppt.c
index 861e641..3ad0f4a 100644
--- a/drivers/gpu/drm/amd/powerplay/renoir_ppt.c
+++ b/drivers/gpu/drm/amd/powerplay/renoir_ppt.c
@@ -111,8 +111,8 @@ static struct smu_12_0_cmn2aisc_mapping renoir_clk_map[SMU_CLK_COUNT] = {
CLK_MAP(GFXCLK, CLOCK_GFXCLK),
CLK_MAP(SCLK, CLOCK_GFXCLK),
CLK_MAP(SOCCLK, CLOCK_SOCCLK),
- CLK_MAP(UCLK, CLOCK_UMCCLK),
- CLK_MAP(MCLK, CLOCK_UMCCLK),
+ CLK_MAP(UCLK, CLOCK_FCLK),
+ CLK_MAP(MCLK, CLOCK_FCLK),
};
static struct smu_12_0_cmn2aisc_mapping renoir_table_map[SMU_TABLE_COUNT] = {
@@ -280,7 +280,7 @@ static int renoir_print_clk_levels(struct smu_context *smu,
break;
case SMU_MCLK:
count = NUM_MEMCLK_DPM_LEVELS;
- cur_value = metrics.ClockFrequency[CLOCK_UMCCLK];
+ cur_value = metrics.ClockFrequency[CLOCK_FCLK];
break;
case SMU_DCEFCLK:
count = NUM_DCFCLK_DPM_LEVELS;
@@ -806,9 +806,10 @@ static int renoir_set_watermarks_table(
clock_ranges->wm_mcif_clocks_ranges[i].wm_set_id;
}
+ smu->watermarks_bitmap |= WATERMARKS_EXIST;
+
/* pass data to smu controller */
- if ((smu->watermarks_bitmap & WATERMARKS_EXIST) &&
- !(smu->watermarks_bitmap & WATERMARKS_LOADED)) {
+ if (!(smu->watermarks_bitmap & WATERMARKS_LOADED)) {
ret = smu_write_watermarks_table(smu);
if (ret) {
pr_err("Failed to update WMTABLE!");
diff --git a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c
index 0dc4947..c9e5ce1 100644
--- a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c
+++ b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c
@@ -898,6 +898,9 @@ int smu_v11_0_system_features_control(struct smu_context *smu,
if (ret)
return ret;
+ bitmap_zero(feature->enabled, feature->feature_num);
+ bitmap_zero(feature->supported, feature->feature_num);
+
if (en) {
ret = smu_feature_get_enabled_mask(smu, feature_mask, 2);
if (ret)
@@ -907,9 +910,6 @@ int smu_v11_0_system_features_control(struct smu_context *smu,
feature->feature_num);
bitmap_copy(feature->supported, (unsigned long *)&feature_mask,
feature->feature_num);
- } else {
- bitmap_zero(feature->enabled, feature->feature_num);
- bitmap_zero(feature->supported, feature->feature_num);
}
return ret;
@@ -978,8 +978,12 @@ int smu_v11_0_init_max_sustainable_clocks(struct smu_context *smu)
struct smu_11_0_max_sustainable_clocks *max_sustainable_clocks;
int ret = 0;
- max_sustainable_clocks = kzalloc(sizeof(struct smu_11_0_max_sustainable_clocks),
+ if (!smu->smu_table.max_sustainable_clocks)
+ max_sustainable_clocks = kzalloc(sizeof(struct smu_11_0_max_sustainable_clocks),
GFP_KERNEL);
+ else
+ max_sustainable_clocks = smu->smu_table.max_sustainable_clocks;
+
smu->smu_table.max_sustainable_clocks = (void *)max_sustainable_clocks;
max_sustainable_clocks->uclock = smu->smu_table.boot_values.uclk / 100;
diff --git a/drivers/gpu/drm/amd/powerplay/smu_v12_0.c b/drivers/gpu/drm/amd/powerplay/smu_v12_0.c
index 870e6db..518e659 100644
--- a/drivers/gpu/drm/amd/powerplay/smu_v12_0.c
+++ b/drivers/gpu/drm/amd/powerplay/smu_v12_0.c
@@ -458,9 +458,6 @@ int smu_v12_0_set_soft_freq_limited_range(struct smu_context *smu, enum smu_clk_
{
int ret = 0;
- if (max < min)
- return -EINVAL;
-
switch (clk_type) {
case SMU_GFXCLK:
case SMU_SCLK:
diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_drv.c b/drivers/gpu/drm/arm/display/komeda/komeda_drv.c
index ea5cd1e..e793393 100644
--- a/drivers/gpu/drm/arm/display/komeda/komeda_drv.c
+++ b/drivers/gpu/drm/arm/display/komeda/komeda_drv.c
@@ -146,14 +146,14 @@ static const struct of_device_id komeda_of_match[] = {
MODULE_DEVICE_TABLE(of, komeda_of_match);
-static int komeda_rt_pm_suspend(struct device *dev)
+static int __maybe_unused komeda_rt_pm_suspend(struct device *dev)
{
struct komeda_drv *mdrv = dev_get_drvdata(dev);
return komeda_dev_suspend(mdrv->mdev);
}
-static int komeda_rt_pm_resume(struct device *dev)
+static int __maybe_unused komeda_rt_pm_resume(struct device *dev)
{
struct komeda_drv *mdrv = dev_get_drvdata(dev);
diff --git a/drivers/gpu/drm/bochs/bochs_hw.c b/drivers/gpu/drm/bochs/bochs_hw.c
index b615b7d..a4fc4e6 100644
--- a/drivers/gpu/drm/bochs/bochs_hw.c
+++ b/drivers/gpu/drm/bochs/bochs_hw.c
@@ -156,10 +156,8 @@ int bochs_hw_init(struct drm_device *dev)
size = min(size, mem);
}
- if (pci_request_region(pdev, 0, "bochs-drm") != 0) {
- DRM_ERROR("Cannot request framebuffer\n");
- return -EBUSY;
- }
+ if (pci_request_region(pdev, 0, "bochs-drm") != 0)
+ DRM_WARN("Cannot request framebuffer, boot fb still active?\n");
bochs->fb_map = ioremap(addr, size);
if (bochs->fb_map == NULL) {
diff --git a/drivers/gpu/drm/bridge/analogix/analogix-anx6345.c b/drivers/gpu/drm/bridge/analogix/analogix-anx6345.c
index 56f55c5..2dfa2fd 100644
--- a/drivers/gpu/drm/bridge/analogix/analogix-anx6345.c
+++ b/drivers/gpu/drm/bridge/analogix/analogix-anx6345.c
@@ -210,8 +210,7 @@ static int anx6345_dp_link_training(struct anx6345 *anx6345)
if (err)
return err;
- dpcd[0] = drm_dp_max_link_rate(anx6345->dpcd);
- dpcd[0] = drm_dp_link_rate_to_bw_code(dpcd[0]);
+ dpcd[0] = dp_bw;
err = regmap_write(anx6345->map[I2C_IDX_DPTX],
SP_DP_MAIN_LINK_BW_SET_REG, dpcd[0]);
if (err)
diff --git a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c
index 67fca43..24965e5 100644
--- a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c
+++ b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c
@@ -1624,28 +1624,34 @@ static void hdmi_config_AVI(struct dw_hdmi *hdmi, struct drm_display_mode *mode)
frame.colorspace = HDMI_COLORSPACE_RGB;
/* Set up colorimetry */
- switch (hdmi->hdmi_data.enc_out_encoding) {
- case V4L2_YCBCR_ENC_601:
- if (hdmi->hdmi_data.enc_in_encoding == V4L2_YCBCR_ENC_XV601)
- frame.colorimetry = HDMI_COLORIMETRY_EXTENDED;
- else
+ if (!hdmi_bus_fmt_is_rgb(hdmi->hdmi_data.enc_out_bus_format)) {
+ switch (hdmi->hdmi_data.enc_out_encoding) {
+ case V4L2_YCBCR_ENC_601:
+ if (hdmi->hdmi_data.enc_in_encoding == V4L2_YCBCR_ENC_XV601)
+ frame.colorimetry = HDMI_COLORIMETRY_EXTENDED;
+ else
+ frame.colorimetry = HDMI_COLORIMETRY_ITU_601;
+ frame.extended_colorimetry =
+ HDMI_EXTENDED_COLORIMETRY_XV_YCC_601;
+ break;
+ case V4L2_YCBCR_ENC_709:
+ if (hdmi->hdmi_data.enc_in_encoding == V4L2_YCBCR_ENC_XV709)
+ frame.colorimetry = HDMI_COLORIMETRY_EXTENDED;
+ else
+ frame.colorimetry = HDMI_COLORIMETRY_ITU_709;
+ frame.extended_colorimetry =
+ HDMI_EXTENDED_COLORIMETRY_XV_YCC_709;
+ break;
+ default: /* Carries no data */
frame.colorimetry = HDMI_COLORIMETRY_ITU_601;
+ frame.extended_colorimetry =
+ HDMI_EXTENDED_COLORIMETRY_XV_YCC_601;
+ break;
+ }
+ } else {
+ frame.colorimetry = HDMI_COLORIMETRY_NONE;
frame.extended_colorimetry =
- HDMI_EXTENDED_COLORIMETRY_XV_YCC_601;
- break;
- case V4L2_YCBCR_ENC_709:
- if (hdmi->hdmi_data.enc_in_encoding == V4L2_YCBCR_ENC_XV709)
- frame.colorimetry = HDMI_COLORIMETRY_EXTENDED;
- else
- frame.colorimetry = HDMI_COLORIMETRY_ITU_709;
- frame.extended_colorimetry =
- HDMI_EXTENDED_COLORIMETRY_XV_YCC_709;
- break;
- default: /* Carries no data */
- frame.colorimetry = HDMI_COLORIMETRY_ITU_601;
- frame.extended_colorimetry =
- HDMI_EXTENDED_COLORIMETRY_XV_YCC_601;
- break;
+ HDMI_EXTENDED_COLORIMETRY_XV_YCC_601;
}
frame.scan_mode = HDMI_SCAN_MODE_NONE;
diff --git a/drivers/gpu/drm/bridge/tc358767.c b/drivers/gpu/drm/bridge/tc358767.c
index 3709e5a..fbdb42d 100644
--- a/drivers/gpu/drm/bridge/tc358767.c
+++ b/drivers/gpu/drm/bridge/tc358767.c
@@ -297,7 +297,7 @@ static inline int tc_poll_timeout(struct tc_data *tc, unsigned int addr,
static int tc_aux_wait_busy(struct tc_data *tc)
{
- return tc_poll_timeout(tc, DP0_AUXSTATUS, AUX_BUSY, 0, 1000, 100000);
+ return tc_poll_timeout(tc, DP0_AUXSTATUS, AUX_BUSY, 0, 100, 100000);
}
static int tc_aux_write_data(struct tc_data *tc, const void *data,
@@ -640,7 +640,7 @@ static int tc_aux_link_setup(struct tc_data *tc)
if (ret)
goto err;
- ret = tc_poll_timeout(tc, DP_PHY_CTRL, PHY_RDY, PHY_RDY, 1, 1000);
+ ret = tc_poll_timeout(tc, DP_PHY_CTRL, PHY_RDY, PHY_RDY, 100, 100000);
if (ret == -ETIMEDOUT) {
dev_err(tc->dev, "Timeout waiting for PHY to become ready");
return ret;
@@ -876,7 +876,7 @@ static int tc_wait_link_training(struct tc_data *tc)
int ret;
ret = tc_poll_timeout(tc, DP0_LTSTAT, LT_LOOPDONE,
- LT_LOOPDONE, 1, 1000);
+ LT_LOOPDONE, 500, 100000);
if (ret) {
dev_err(tc->dev, "Link training timeout waiting for LT_LOOPDONE!\n");
return ret;
@@ -949,7 +949,7 @@ static int tc_main_link_enable(struct tc_data *tc)
dp_phy_ctrl &= ~(DP_PHY_RST | PHY_M1_RST | PHY_M0_RST);
ret = regmap_write(tc->regmap, DP_PHY_CTRL, dp_phy_ctrl);
- ret = tc_poll_timeout(tc, DP_PHY_CTRL, PHY_RDY, PHY_RDY, 1, 1000);
+ ret = tc_poll_timeout(tc, DP_PHY_CTRL, PHY_RDY, PHY_RDY, 500, 100000);
if (ret) {
dev_err(dev, "timeout waiting for phy become ready");
return ret;
diff --git a/drivers/gpu/drm/bridge/ti-tfp410.c b/drivers/gpu/drm/bridge/ti-tfp410.c
index 6f6d6d1..f195a47 100644
--- a/drivers/gpu/drm/bridge/ti-tfp410.c
+++ b/drivers/gpu/drm/bridge/ti-tfp410.c
@@ -140,7 +140,8 @@ static int tfp410_attach(struct drm_bridge *bridge)
dvi->connector_type,
dvi->ddc);
if (ret) {
- dev_err(dvi->dev, "drm_connector_init() failed: %d\n", ret);
+ dev_err(dvi->dev, "drm_connector_init_with_ddc() failed: %d\n",
+ ret);
return ret;
}
diff --git a/drivers/gpu/drm/drm_client_modeset.c b/drivers/gpu/drm/drm_client_modeset.c
index 6d4a29e..3035584 100644
--- a/drivers/gpu/drm/drm_client_modeset.c
+++ b/drivers/gpu/drm/drm_client_modeset.c
@@ -951,7 +951,8 @@ bool drm_client_rotation(struct drm_mode_set *modeset, unsigned int *rotation)
* depending on the hardware this may require the framebuffer
* to be in a specific tiling format.
*/
- if ((*rotation & DRM_MODE_ROTATE_MASK) != DRM_MODE_ROTATE_180 ||
+ if (((*rotation & DRM_MODE_ROTATE_MASK) != DRM_MODE_ROTATE_0 &&
+ (*rotation & DRM_MODE_ROTATE_MASK) != DRM_MODE_ROTATE_180) ||
!plane->rotation_property)
return false;
diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c
index 20cdaf3..ed0fea2 100644
--- a/drivers/gpu/drm/drm_dp_mst_topology.c
+++ b/drivers/gpu/drm/drm_dp_mst_topology.c
@@ -1935,7 +1935,7 @@ static u8 drm_dp_calculate_rad(struct drm_dp_mst_port *port,
return parent_lct + 1;
}
-static bool drm_dp_mst_is_dp_mst_end_device(u8 pdt, bool mcs)
+static bool drm_dp_mst_is_end_device(u8 pdt, bool mcs)
{
switch (pdt) {
case DP_PEER_DEVICE_DP_LEGACY_CONV:
@@ -1965,13 +1965,13 @@ drm_dp_port_set_pdt(struct drm_dp_mst_port *port, u8 new_pdt,
/* Teardown the old pdt, if there is one */
if (port->pdt != DP_PEER_DEVICE_NONE) {
- if (drm_dp_mst_is_dp_mst_end_device(port->pdt, port->mcs)) {
+ if (drm_dp_mst_is_end_device(port->pdt, port->mcs)) {
/*
* If the new PDT would also have an i2c bus,
* don't bother with reregistering it
*/
if (new_pdt != DP_PEER_DEVICE_NONE &&
- drm_dp_mst_is_dp_mst_end_device(new_pdt, new_mcs)) {
+ drm_dp_mst_is_end_device(new_pdt, new_mcs)) {
port->pdt = new_pdt;
port->mcs = new_mcs;
return 0;
@@ -1991,7 +1991,7 @@ drm_dp_port_set_pdt(struct drm_dp_mst_port *port, u8 new_pdt,
port->mcs = new_mcs;
if (port->pdt != DP_PEER_DEVICE_NONE) {
- if (drm_dp_mst_is_dp_mst_end_device(port->pdt, port->mcs)) {
+ if (drm_dp_mst_is_end_device(port->pdt, port->mcs)) {
/* add i2c over sideband */
ret = drm_dp_mst_register_i2c_bus(&port->aux);
} else {
@@ -2172,7 +2172,7 @@ drm_dp_mst_port_add_connector(struct drm_dp_mst_branch *mstb,
}
if (port->pdt != DP_PEER_DEVICE_NONE &&
- drm_dp_mst_is_dp_mst_end_device(port->pdt, port->mcs)) {
+ drm_dp_mst_is_end_device(port->pdt, port->mcs)) {
port->cached_edid = drm_get_edid(port->connector,
&port->aux.ddc);
drm_connector_set_tile_property(port->connector);
@@ -2302,14 +2302,18 @@ drm_dp_mst_handle_link_address_port(struct drm_dp_mst_branch *mstb,
mutex_unlock(&mgr->lock);
}
- if (old_ddps != port->ddps) {
- if (port->ddps) {
- if (!port->input) {
- drm_dp_send_enum_path_resources(mgr, mstb,
- port);
- }
+ /*
+ * Reprobe PBN caps on both hotplug, and when re-probing the link
+ * for our parent mstb
+ */
+ if (old_ddps != port->ddps || !created) {
+ if (port->ddps && !port->input) {
+ ret = drm_dp_send_enum_path_resources(mgr, mstb,
+ port);
+ if (ret == 1)
+ changed = true;
} else {
- port->available_pbn = 0;
+ port->full_pbn = 0;
}
}
@@ -2401,11 +2405,10 @@ drm_dp_mst_handle_conn_stat(struct drm_dp_mst_branch *mstb,
port->ddps = conn_stat->displayport_device_plug_status;
if (old_ddps != port->ddps) {
- if (port->ddps) {
- dowork = true;
- } else {
- port->available_pbn = 0;
- }
+ if (port->ddps && !port->input)
+ drm_dp_send_enum_path_resources(mgr, mstb, port);
+ else
+ port->full_pbn = 0;
}
new_pdt = port->input ? DP_PEER_DEVICE_NONE : conn_stat->peer_device_type;
@@ -2556,13 +2559,6 @@ static int drm_dp_check_and_send_link_address(struct drm_dp_mst_topology_mgr *mg
if (port->input || !port->ddps)
continue;
- if (!port->available_pbn) {
- drm_modeset_lock(&mgr->base.lock, NULL);
- drm_dp_send_enum_path_resources(mgr, mstb, port);
- drm_modeset_unlock(&mgr->base.lock);
- changed = true;
- }
-
if (port->mstb)
mstb_child = drm_dp_mst_topology_get_mstb_validated(
mgr, port->mstb);
@@ -2990,6 +2986,7 @@ drm_dp_send_enum_path_resources(struct drm_dp_mst_topology_mgr *mgr,
ret = drm_dp_mst_wait_tx_reply(mstb, txmsg);
if (ret > 0) {
+ ret = 0;
path_res = &txmsg->reply.u.path_resources;
if (txmsg->reply.reply_type == DP_SIDEBAND_REPLY_NAK) {
@@ -3002,14 +2999,22 @@ drm_dp_send_enum_path_resources(struct drm_dp_mst_topology_mgr *mgr,
path_res->port_number,
path_res->full_payload_bw_number,
path_res->avail_payload_bw_number);
- port->available_pbn =
- path_res->avail_payload_bw_number;
+
+ /*
+ * If something changed, make sure we send a
+ * hotplug
+ */
+ if (port->full_pbn != path_res->full_payload_bw_number ||
+ port->fec_capable != path_res->fec_capable)
+ ret = 1;
+
+ port->full_pbn = path_res->full_payload_bw_number;
port->fec_capable = path_res->fec_capable;
}
}
kfree(txmsg);
- return 0;
+ return ret;
}
static struct drm_dp_mst_port *drm_dp_get_last_connected_port_to_mstb(struct drm_dp_mst_branch *mstb)
@@ -3596,13 +3601,9 @@ drm_dp_mst_topology_mgr_invalidate_mstb(struct drm_dp_mst_branch *mstb)
/* The link address will need to be re-sent on resume */
mstb->link_address_sent = false;
- list_for_each_entry(port, &mstb->ports, next) {
- /* The PBN for each port will also need to be re-probed */
- port->available_pbn = 0;
-
+ list_for_each_entry(port, &mstb->ports, next)
if (port->mstb)
drm_dp_mst_topology_mgr_invalidate_mstb(port->mstb);
- }
}
/**
@@ -3838,7 +3839,8 @@ drm_dp_mst_process_up_req(struct drm_dp_mst_topology_mgr *mgr,
else if (msg->req_type == DP_RESOURCE_STATUS_NOTIFY)
guid = msg->u.resource_stat.guid;
- mstb = drm_dp_get_mst_branch_device_by_guid(mgr, guid);
+ if (guid)
+ mstb = drm_dp_get_mst_branch_device_by_guid(mgr, guid);
} else {
mstb = drm_dp_get_mst_branch_device(mgr, hdr->lct, hdr->rad);
}
@@ -4828,41 +4830,102 @@ static bool drm_dp_mst_port_downstream_of_branch(struct drm_dp_mst_port *port,
return false;
}
-static inline
-int drm_dp_mst_atomic_check_bw_limit(struct drm_dp_mst_branch *branch,
- struct drm_dp_mst_topology_state *mst_state)
+static int
+drm_dp_mst_atomic_check_port_bw_limit(struct drm_dp_mst_port *port,
+ struct drm_dp_mst_topology_state *state);
+
+static int
+drm_dp_mst_atomic_check_mstb_bw_limit(struct drm_dp_mst_branch *mstb,
+ struct drm_dp_mst_topology_state *state)
{
- struct drm_dp_mst_port *port;
struct drm_dp_vcpi_allocation *vcpi;
- int pbn_limit = 0, pbn_used = 0;
+ struct drm_dp_mst_port *port;
+ int pbn_used = 0, ret;
+ bool found = false;
- list_for_each_entry(port, &branch->ports, next) {
- if (port->mstb)
- if (drm_dp_mst_atomic_check_bw_limit(port->mstb, mst_state))
- return -ENOSPC;
-
- if (port->available_pbn > 0)
- pbn_limit = port->available_pbn;
- }
- DRM_DEBUG_ATOMIC("[MST BRANCH:%p] branch has %d PBN available\n",
- branch, pbn_limit);
-
- list_for_each_entry(vcpi, &mst_state->vcpis, next) {
- if (!vcpi->pbn)
+ /* Check that we have at least one port in our state that's downstream
+ * of this branch, otherwise we can skip this branch
+ */
+ list_for_each_entry(vcpi, &state->vcpis, next) {
+ if (!vcpi->pbn ||
+ !drm_dp_mst_port_downstream_of_branch(vcpi->port, mstb))
continue;
- if (drm_dp_mst_port_downstream_of_branch(vcpi->port, branch))
- pbn_used += vcpi->pbn;
+ found = true;
+ break;
}
- DRM_DEBUG_ATOMIC("[MST BRANCH:%p] branch used %d PBN\n",
- branch, pbn_used);
+ if (!found)
+ return 0;
- if (pbn_used > pbn_limit) {
- DRM_DEBUG_ATOMIC("[MST BRANCH:%p] No available bandwidth\n",
- branch);
+ if (mstb->port_parent)
+ DRM_DEBUG_ATOMIC("[MSTB:%p] [MST PORT:%p] Checking bandwidth limits on [MSTB:%p]\n",
+ mstb->port_parent->parent, mstb->port_parent,
+ mstb);
+ else
+ DRM_DEBUG_ATOMIC("[MSTB:%p] Checking bandwidth limits\n",
+ mstb);
+
+ list_for_each_entry(port, &mstb->ports, next) {
+ ret = drm_dp_mst_atomic_check_port_bw_limit(port, state);
+ if (ret < 0)
+ return ret;
+
+ pbn_used += ret;
+ }
+
+ return pbn_used;
+}
+
+static int
+drm_dp_mst_atomic_check_port_bw_limit(struct drm_dp_mst_port *port,
+ struct drm_dp_mst_topology_state *state)
+{
+ struct drm_dp_vcpi_allocation *vcpi;
+ int pbn_used = 0;
+
+ if (port->pdt == DP_PEER_DEVICE_NONE)
+ return 0;
+
+ if (drm_dp_mst_is_end_device(port->pdt, port->mcs)) {
+ bool found = false;
+
+ list_for_each_entry(vcpi, &state->vcpis, next) {
+ if (vcpi->port != port)
+ continue;
+ if (!vcpi->pbn)
+ return 0;
+
+ found = true;
+ break;
+ }
+ if (!found)
+ return 0;
+
+ /* This should never happen, as it means we tried to
+ * set a mode before querying the full_pbn
+ */
+ if (WARN_ON(!port->full_pbn))
+ return -EINVAL;
+
+ pbn_used = vcpi->pbn;
+ } else {
+ pbn_used = drm_dp_mst_atomic_check_mstb_bw_limit(port->mstb,
+ state);
+ if (pbn_used <= 0)
+ return pbn_used;
+ }
+
+ if (pbn_used > port->full_pbn) {
+ DRM_DEBUG_ATOMIC("[MSTB:%p] [MST PORT:%p] required PBN of %d exceeds port limit of %d\n",
+ port->parent, port, pbn_used,
+ port->full_pbn);
return -ENOSPC;
}
- return 0;
+
+ DRM_DEBUG_ATOMIC("[MSTB:%p] [MST PORT:%p] uses %d out of %d PBN\n",
+ port->parent, port, pbn_used, port->full_pbn);
+
+ return pbn_used;
}
static inline int
@@ -5060,9 +5123,15 @@ int drm_dp_mst_atomic_check(struct drm_atomic_state *state)
ret = drm_dp_mst_atomic_check_vcpi_alloc_limit(mgr, mst_state);
if (ret)
break;
- ret = drm_dp_mst_atomic_check_bw_limit(mgr->mst_primary, mst_state);
- if (ret)
+
+ mutex_lock(&mgr->lock);
+ ret = drm_dp_mst_atomic_check_mstb_bw_limit(mgr->mst_primary,
+ mst_state);
+ mutex_unlock(&mgr->lock);
+ if (ret < 0)
break;
+ else
+ ret = 0;
}
return ret;
diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c
index 99769d6..805fb00 100644
--- a/drivers/gpu/drm/drm_edid.c
+++ b/drivers/gpu/drm/drm_edid.c
@@ -3211,7 +3211,7 @@ static u8 *drm_find_cea_extension(const struct edid *edid)
return cea;
}
-static const struct drm_display_mode *cea_mode_for_vic(u8 vic)
+static __always_inline const struct drm_display_mode *cea_mode_for_vic(u8 vic)
{
BUILD_BUG_ON(1 + ARRAY_SIZE(edid_cea_modes_1) - 1 != 127);
BUILD_BUG_ON(193 + ARRAY_SIZE(edid_cea_modes_193) - 1 != 219);
diff --git a/drivers/gpu/drm/drm_gem_shmem_helper.c b/drivers/gpu/drm/drm_gem_shmem_helper.c
index a421a2e..df31e57 100644
--- a/drivers/gpu/drm/drm_gem_shmem_helper.c
+++ b/drivers/gpu/drm/drm_gem_shmem_helper.c
@@ -254,11 +254,16 @@ static void *drm_gem_shmem_vmap_locked(struct drm_gem_shmem_object *shmem)
if (ret)
goto err_zero_use;
- if (obj->import_attach)
+ if (obj->import_attach) {
shmem->vaddr = dma_buf_vmap(obj->import_attach->dmabuf);
- else
+ } else {
+ pgprot_t prot = PAGE_KERNEL;
+
+ if (!shmem->map_cached)
+ prot = pgprot_writecombine(prot);
shmem->vaddr = vmap(shmem->pages, obj->size >> PAGE_SHIFT,
- VM_MAP, pgprot_writecombine(PAGE_KERNEL));
+ VM_MAP, prot);
+ }
if (!shmem->vaddr) {
DRM_DEBUG_KMS("Failed to vmap pages\n");
@@ -540,8 +545,9 @@ int drm_gem_shmem_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma)
}
vma->vm_flags |= VM_MIXEDMAP | VM_DONTEXPAND;
- vma->vm_page_prot = pgprot_writecombine(vm_get_page_prot(vma->vm_flags));
- vma->vm_page_prot = pgprot_decrypted(vma->vm_page_prot);
+ vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
+ if (!shmem->map_cached)
+ vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
vma->vm_ops = &drm_gem_shmem_vm_ops;
return 0;
diff --git a/drivers/gpu/drm/drm_lease.c b/drivers/gpu/drm/drm_lease.c
index b481caf..825abe3 100644
--- a/drivers/gpu/drm/drm_lease.c
+++ b/drivers/gpu/drm/drm_lease.c
@@ -542,10 +542,12 @@ int drm_mode_create_lease_ioctl(struct drm_device *dev,
}
DRM_DEBUG_LEASE("Creating lease\n");
+ /* lessee will take the ownership of leases */
lessee = drm_lease_create(lessor, &leases);
if (IS_ERR(lessee)) {
ret = PTR_ERR(lessee);
+ idr_destroy(&leases);
goto out_leases;
}
@@ -580,7 +582,6 @@ int drm_mode_create_lease_ioctl(struct drm_device *dev,
out_leases:
put_unused_fd(fd);
- idr_destroy(&leases);
DRM_DEBUG_LEASE("drm_mode_create_lease_ioctl failed: %d\n", ret);
return ret;
diff --git a/drivers/gpu/drm/drm_modes.c b/drivers/gpu/drm/drm_modes.c
index 10336b1..d4d6451 100644
--- a/drivers/gpu/drm/drm_modes.c
+++ b/drivers/gpu/drm/drm_modes.c
@@ -1698,6 +1698,13 @@ static int drm_mode_parse_cmdline_options(const char *str,
if (rotation && freestanding)
return -EINVAL;
+ if (!(rotation & DRM_MODE_ROTATE_MASK))
+ rotation |= DRM_MODE_ROTATE_0;
+
+ /* Make sure there is exactly one rotation defined */
+ if (!is_power_of_2(rotation & DRM_MODE_ROTATE_MASK))
+ return -EINVAL;
+
mode->rotation_reflection = rotation;
return 0;
diff --git a/drivers/gpu/drm/drm_prime.c b/drivers/gpu/drm/drm_prime.c
index 86d9b0e..1de2cde 100644
--- a/drivers/gpu/drm/drm_prime.c
+++ b/drivers/gpu/drm/drm_prime.c
@@ -967,7 +967,7 @@ int drm_prime_sg_to_page_addr_arrays(struct sg_table *sgt, struct page **pages,
index = 0;
for_each_sg(sgt->sgl, sg, sgt->nents, count) {
- len = sg->length;
+ len = sg_dma_len(sg);
page = sg_page(sg);
addr = sg_dma_address(sg);
diff --git a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c
index 8428ae1..1f79bc2 100644
--- a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c
+++ b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c
@@ -55,6 +55,7 @@ static const char * const decon_clks_name[] = {
struct decon_context {
struct device *dev;
struct drm_device *drm_dev;
+ void *dma_priv;
struct exynos_drm_crtc *crtc;
struct exynos_drm_plane planes[WINDOWS_NR];
struct exynos_drm_plane_config configs[WINDOWS_NR];
@@ -644,7 +645,7 @@ static int decon_bind(struct device *dev, struct device *master, void *data)
decon_clear_channels(ctx->crtc);
- return exynos_drm_register_dma(drm_dev, dev);
+ return exynos_drm_register_dma(drm_dev, dev, &ctx->dma_priv);
}
static void decon_unbind(struct device *dev, struct device *master, void *data)
@@ -654,7 +655,7 @@ static void decon_unbind(struct device *dev, struct device *master, void *data)
decon_atomic_disable(ctx->crtc);
/* detach this sub driver from iommu mapping if supported. */
- exynos_drm_unregister_dma(ctx->drm_dev, ctx->dev);
+ exynos_drm_unregister_dma(ctx->drm_dev, ctx->dev, &ctx->dma_priv);
}
static const struct component_ops decon_component_ops = {
diff --git a/drivers/gpu/drm/exynos/exynos7_drm_decon.c b/drivers/gpu/drm/exynos/exynos7_drm_decon.c
index ff59c64..1eed332 100644
--- a/drivers/gpu/drm/exynos/exynos7_drm_decon.c
+++ b/drivers/gpu/drm/exynos/exynos7_drm_decon.c
@@ -40,6 +40,7 @@
struct decon_context {
struct device *dev;
struct drm_device *drm_dev;
+ void *dma_priv;
struct exynos_drm_crtc *crtc;
struct exynos_drm_plane planes[WINDOWS_NR];
struct exynos_drm_plane_config configs[WINDOWS_NR];
@@ -127,13 +128,13 @@ static int decon_ctx_initialize(struct decon_context *ctx,
decon_clear_channels(ctx->crtc);
- return exynos_drm_register_dma(drm_dev, ctx->dev);
+ return exynos_drm_register_dma(drm_dev, ctx->dev, &ctx->dma_priv);
}
static void decon_ctx_remove(struct decon_context *ctx)
{
/* detach this sub driver from iommu mapping if supported. */
- exynos_drm_unregister_dma(ctx->drm_dev, ctx->dev);
+ exynos_drm_unregister_dma(ctx->drm_dev, ctx->dev, &ctx->dma_priv);
}
static u32 decon_calc_clkdiv(struct decon_context *ctx,
diff --git a/drivers/gpu/drm/exynos/exynos_drm_dma.c b/drivers/gpu/drm/exynos/exynos_drm_dma.c
index 9ebc027..619f814 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_dma.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_dma.c
@@ -58,7 +58,7 @@ static inline void clear_dma_max_seg_size(struct device *dev)
* mapping.
*/
static int drm_iommu_attach_device(struct drm_device *drm_dev,
- struct device *subdrv_dev)
+ struct device *subdrv_dev, void **dma_priv)
{
struct exynos_drm_private *priv = drm_dev->dev_private;
int ret;
@@ -74,7 +74,14 @@ static int drm_iommu_attach_device(struct drm_device *drm_dev,
return ret;
if (IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU)) {
- if (to_dma_iommu_mapping(subdrv_dev))
+ /*
+ * Keep the original DMA mapping of the sub-device and
+ * restore it on Exynos DRM detach, otherwise the DMA
+ * framework considers it as IOMMU-less during the next
+ * probe (in case of deferred probe or modular build)
+ */
+ *dma_priv = to_dma_iommu_mapping(subdrv_dev);
+ if (*dma_priv)
arm_iommu_detach_device(subdrv_dev);
ret = arm_iommu_attach_device(subdrv_dev, priv->mapping);
@@ -98,19 +105,21 @@ static int drm_iommu_attach_device(struct drm_device *drm_dev,
* mapping
*/
static void drm_iommu_detach_device(struct drm_device *drm_dev,
- struct device *subdrv_dev)
+ struct device *subdrv_dev, void **dma_priv)
{
struct exynos_drm_private *priv = drm_dev->dev_private;
- if (IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU))
+ if (IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU)) {
arm_iommu_detach_device(subdrv_dev);
- else if (IS_ENABLED(CONFIG_IOMMU_DMA))
+ arm_iommu_attach_device(subdrv_dev, *dma_priv);
+ } else if (IS_ENABLED(CONFIG_IOMMU_DMA))
iommu_detach_device(priv->mapping, subdrv_dev);
clear_dma_max_seg_size(subdrv_dev);
}
-int exynos_drm_register_dma(struct drm_device *drm, struct device *dev)
+int exynos_drm_register_dma(struct drm_device *drm, struct device *dev,
+ void **dma_priv)
{
struct exynos_drm_private *priv = drm->dev_private;
@@ -137,13 +146,14 @@ int exynos_drm_register_dma(struct drm_device *drm, struct device *dev)
priv->mapping = mapping;
}
- return drm_iommu_attach_device(drm, dev);
+ return drm_iommu_attach_device(drm, dev, dma_priv);
}
-void exynos_drm_unregister_dma(struct drm_device *drm, struct device *dev)
+void exynos_drm_unregister_dma(struct drm_device *drm, struct device *dev,
+ void **dma_priv)
{
if (IS_ENABLED(CONFIG_EXYNOS_IOMMU))
- drm_iommu_detach_device(drm, dev);
+ drm_iommu_detach_device(drm, dev, dma_priv);
}
void exynos_drm_cleanup_dma(struct drm_device *drm)
diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.h b/drivers/gpu/drm/exynos/exynos_drm_drv.h
index d4d21d8..6ae9056 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_drv.h
+++ b/drivers/gpu/drm/exynos/exynos_drm_drv.h
@@ -223,8 +223,10 @@ static inline bool is_drm_iommu_supported(struct drm_device *drm_dev)
return priv->mapping ? true : false;
}
-int exynos_drm_register_dma(struct drm_device *drm, struct device *dev);
-void exynos_drm_unregister_dma(struct drm_device *drm, struct device *dev);
+int exynos_drm_register_dma(struct drm_device *drm, struct device *dev,
+ void **dma_priv);
+void exynos_drm_unregister_dma(struct drm_device *drm, struct device *dev,
+ void **dma_priv);
void exynos_drm_cleanup_dma(struct drm_device *drm);
#ifdef CONFIG_DRM_EXYNOS_DPI
diff --git a/drivers/gpu/drm/exynos/exynos_drm_dsi.c b/drivers/gpu/drm/exynos/exynos_drm_dsi.c
index 33628d8..a85365c 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_dsi.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_dsi.c
@@ -1773,8 +1773,9 @@ static int exynos_dsi_probe(struct platform_device *pdev)
ret = devm_regulator_bulk_get(dev, ARRAY_SIZE(dsi->supplies),
dsi->supplies);
if (ret) {
- dev_info(dev, "failed to get regulators: %d\n", ret);
- return -EPROBE_DEFER;
+ if (ret != -EPROBE_DEFER)
+ dev_info(dev, "failed to get regulators: %d\n", ret);
+ return ret;
}
dsi->clks = devm_kcalloc(dev,
@@ -1787,9 +1788,10 @@ static int exynos_dsi_probe(struct platform_device *pdev)
dsi->clks[i] = devm_clk_get(dev, clk_names[i]);
if (IS_ERR(dsi->clks[i])) {
if (strcmp(clk_names[i], "sclk_mipi") == 0) {
- strcpy(clk_names[i], OLD_SCLK_MIPI_CLK_NAME);
- i--;
- continue;
+ dsi->clks[i] = devm_clk_get(dev,
+ OLD_SCLK_MIPI_CLK_NAME);
+ if (!IS_ERR(dsi->clks[i]))
+ continue;
}
dev_info(dev, "failed to get the clock: %s\n",
diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimc.c b/drivers/gpu/drm/exynos/exynos_drm_fimc.c
index 8ea2e1d..29ab8be 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_fimc.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_fimc.c
@@ -97,6 +97,7 @@ struct fimc_scaler {
struct fimc_context {
struct exynos_drm_ipp ipp;
struct drm_device *drm_dev;
+ void *dma_priv;
struct device *dev;
struct exynos_drm_ipp_task *task;
struct exynos_drm_ipp_formats *formats;
@@ -1133,7 +1134,7 @@ static int fimc_bind(struct device *dev, struct device *master, void *data)
ctx->drm_dev = drm_dev;
ipp->drm_dev = drm_dev;
- exynos_drm_register_dma(drm_dev, dev);
+ exynos_drm_register_dma(drm_dev, dev, &ctx->dma_priv);
exynos_drm_ipp_register(dev, ipp, &ipp_funcs,
DRM_EXYNOS_IPP_CAP_CROP | DRM_EXYNOS_IPP_CAP_ROTATE |
@@ -1153,7 +1154,7 @@ static void fimc_unbind(struct device *dev, struct device *master,
struct exynos_drm_ipp *ipp = &ctx->ipp;
exynos_drm_ipp_unregister(dev, ipp);
- exynos_drm_unregister_dma(drm_dev, dev);
+ exynos_drm_unregister_dma(drm_dev, dev, &ctx->dma_priv);
}
static const struct component_ops fimc_component_ops = {
diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimd.c b/drivers/gpu/drm/exynos/exynos_drm_fimd.c
index 21aec38..bb67cad 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_fimd.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_fimd.c
@@ -167,6 +167,7 @@ static struct fimd_driver_data exynos5420_fimd_driver_data = {
struct fimd_context {
struct device *dev;
struct drm_device *drm_dev;
+ void *dma_priv;
struct exynos_drm_crtc *crtc;
struct exynos_drm_plane planes[WINDOWS_NR];
struct exynos_drm_plane_config configs[WINDOWS_NR];
@@ -1090,7 +1091,7 @@ static int fimd_bind(struct device *dev, struct device *master, void *data)
if (is_drm_iommu_supported(drm_dev))
fimd_clear_channels(ctx->crtc);
- return exynos_drm_register_dma(drm_dev, dev);
+ return exynos_drm_register_dma(drm_dev, dev, &ctx->dma_priv);
}
static void fimd_unbind(struct device *dev, struct device *master,
@@ -1100,7 +1101,7 @@ static void fimd_unbind(struct device *dev, struct device *master,
fimd_atomic_disable(ctx->crtc);
- exynos_drm_unregister_dma(ctx->drm_dev, ctx->dev);
+ exynos_drm_unregister_dma(ctx->drm_dev, ctx->dev, &ctx->dma_priv);
if (ctx->encoder)
exynos_dpi_remove(ctx->encoder);
diff --git a/drivers/gpu/drm/exynos/exynos_drm_g2d.c b/drivers/gpu/drm/exynos/exynos_drm_g2d.c
index 2a3382d..fcee33a 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_g2d.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_g2d.c
@@ -232,6 +232,7 @@ struct g2d_runqueue_node {
struct g2d_data {
struct device *dev;
+ void *dma_priv;
struct clk *gate_clk;
void __iomem *regs;
int irq;
@@ -1409,7 +1410,7 @@ static int g2d_bind(struct device *dev, struct device *master, void *data)
return ret;
}
- ret = exynos_drm_register_dma(drm_dev, dev);
+ ret = exynos_drm_register_dma(drm_dev, dev, &g2d->dma_priv);
if (ret < 0) {
dev_err(dev, "failed to enable iommu.\n");
g2d_fini_cmdlist(g2d);
@@ -1434,7 +1435,7 @@ static void g2d_unbind(struct device *dev, struct device *master, void *data)
priv->g2d_dev = NULL;
cancel_work_sync(&g2d->runqueue_work);
- exynos_drm_unregister_dma(g2d->drm_dev, dev);
+ exynos_drm_unregister_dma(g2d->drm_dev, dev, &g2d->dma_priv);
}
static const struct component_ops g2d_component_ops = {
diff --git a/drivers/gpu/drm/exynos/exynos_drm_gsc.c b/drivers/gpu/drm/exynos/exynos_drm_gsc.c
index 88b6fca..45e9aee 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_gsc.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_gsc.c
@@ -97,6 +97,7 @@ struct gsc_scaler {
struct gsc_context {
struct exynos_drm_ipp ipp;
struct drm_device *drm_dev;
+ void *dma_priv;
struct device *dev;
struct exynos_drm_ipp_task *task;
struct exynos_drm_ipp_formats *formats;
@@ -1169,7 +1170,7 @@ static int gsc_bind(struct device *dev, struct device *master, void *data)
ctx->drm_dev = drm_dev;
ctx->drm_dev = drm_dev;
- exynos_drm_register_dma(drm_dev, dev);
+ exynos_drm_register_dma(drm_dev, dev, &ctx->dma_priv);
exynos_drm_ipp_register(dev, ipp, &ipp_funcs,
DRM_EXYNOS_IPP_CAP_CROP | DRM_EXYNOS_IPP_CAP_ROTATE |
@@ -1189,7 +1190,7 @@ static void gsc_unbind(struct device *dev, struct device *master,
struct exynos_drm_ipp *ipp = &ctx->ipp;
exynos_drm_ipp_unregister(dev, ipp);
- exynos_drm_unregister_dma(drm_dev, dev);
+ exynos_drm_unregister_dma(drm_dev, dev, &ctx->dma_priv);
}
static const struct component_ops gsc_component_ops = {
diff --git a/drivers/gpu/drm/exynos/exynos_drm_rotator.c b/drivers/gpu/drm/exynos/exynos_drm_rotator.c
index b984829..dafa87b 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_rotator.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_rotator.c
@@ -56,6 +56,7 @@ struct rot_variant {
struct rot_context {
struct exynos_drm_ipp ipp;
struct drm_device *drm_dev;
+ void *dma_priv;
struct device *dev;
void __iomem *regs;
struct clk *clock;
@@ -243,7 +244,7 @@ static int rotator_bind(struct device *dev, struct device *master, void *data)
rot->drm_dev = drm_dev;
ipp->drm_dev = drm_dev;
- exynos_drm_register_dma(drm_dev, dev);
+ exynos_drm_register_dma(drm_dev, dev, &rot->dma_priv);
exynos_drm_ipp_register(dev, ipp, &ipp_funcs,
DRM_EXYNOS_IPP_CAP_CROP | DRM_EXYNOS_IPP_CAP_ROTATE,
@@ -261,7 +262,7 @@ static void rotator_unbind(struct device *dev, struct device *master,
struct exynos_drm_ipp *ipp = &rot->ipp;
exynos_drm_ipp_unregister(dev, ipp);
- exynos_drm_unregister_dma(rot->drm_dev, rot->dev);
+ exynos_drm_unregister_dma(rot->drm_dev, rot->dev, &rot->dma_priv);
}
static const struct component_ops rotator_component_ops = {
diff --git a/drivers/gpu/drm/exynos/exynos_drm_scaler.c b/drivers/gpu/drm/exynos/exynos_drm_scaler.c
index 497973e..93c43c8 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_scaler.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_scaler.c
@@ -39,6 +39,7 @@ struct scaler_data {
struct scaler_context {
struct exynos_drm_ipp ipp;
struct drm_device *drm_dev;
+ void *dma_priv;
struct device *dev;
void __iomem *regs;
struct clk *clock[SCALER_MAX_CLK];
@@ -450,7 +451,7 @@ static int scaler_bind(struct device *dev, struct device *master, void *data)
scaler->drm_dev = drm_dev;
ipp->drm_dev = drm_dev;
- exynos_drm_register_dma(drm_dev, dev);
+ exynos_drm_register_dma(drm_dev, dev, &scaler->dma_priv);
exynos_drm_ipp_register(dev, ipp, &ipp_funcs,
DRM_EXYNOS_IPP_CAP_CROP | DRM_EXYNOS_IPP_CAP_ROTATE |
@@ -470,7 +471,8 @@ static void scaler_unbind(struct device *dev, struct device *master,
struct exynos_drm_ipp *ipp = &scaler->ipp;
exynos_drm_ipp_unregister(dev, ipp);
- exynos_drm_unregister_dma(scaler->drm_dev, scaler->dev);
+ exynos_drm_unregister_dma(scaler->drm_dev, scaler->dev,
+ &scaler->dma_priv);
}
static const struct component_ops scaler_component_ops = {
diff --git a/drivers/gpu/drm/exynos/exynos_hdmi.c b/drivers/gpu/drm/exynos/exynos_hdmi.c
index 9ff921f..f141916 100644
--- a/drivers/gpu/drm/exynos/exynos_hdmi.c
+++ b/drivers/gpu/drm/exynos/exynos_hdmi.c
@@ -1805,18 +1805,10 @@ static int hdmi_resources_init(struct hdmi_context *hdata)
hdata->reg_hdmi_en = devm_regulator_get_optional(dev, "hdmi-en");
- if (PTR_ERR(hdata->reg_hdmi_en) != -ENODEV) {
+ if (PTR_ERR(hdata->reg_hdmi_en) != -ENODEV)
if (IS_ERR(hdata->reg_hdmi_en))
return PTR_ERR(hdata->reg_hdmi_en);
- ret = regulator_enable(hdata->reg_hdmi_en);
- if (ret) {
- DRM_DEV_ERROR(dev,
- "failed to enable hdmi-en regulator\n");
- return ret;
- }
- }
-
return hdmi_bridge_init(hdata);
}
@@ -2023,6 +2015,15 @@ static int hdmi_probe(struct platform_device *pdev)
}
}
+ if (!IS_ERR(hdata->reg_hdmi_en)) {
+ ret = regulator_enable(hdata->reg_hdmi_en);
+ if (ret) {
+ DRM_DEV_ERROR(dev,
+ "failed to enable hdmi-en regulator\n");
+ goto err_hdmiphy;
+ }
+ }
+
pm_runtime_enable(dev);
audio_infoframe = &hdata->audio.infoframe;
@@ -2047,7 +2048,8 @@ static int hdmi_probe(struct platform_device *pdev)
err_rpm_disable:
pm_runtime_disable(dev);
-
+ if (!IS_ERR(hdata->reg_hdmi_en))
+ regulator_disable(hdata->reg_hdmi_en);
err_hdmiphy:
if (hdata->hdmiphy_port)
put_device(&hdata->hdmiphy_port->dev);
diff --git a/drivers/gpu/drm/exynos/exynos_mixer.c b/drivers/gpu/drm/exynos/exynos_mixer.c
index 38ae9c3..21b726b 100644
--- a/drivers/gpu/drm/exynos/exynos_mixer.c
+++ b/drivers/gpu/drm/exynos/exynos_mixer.c
@@ -94,6 +94,7 @@ struct mixer_context {
struct platform_device *pdev;
struct device *dev;
struct drm_device *drm_dev;
+ void *dma_priv;
struct exynos_drm_crtc *crtc;
struct exynos_drm_plane planes[MIXER_WIN_NR];
unsigned long flags;
@@ -894,12 +895,14 @@ static int mixer_initialize(struct mixer_context *mixer_ctx,
}
}
- return exynos_drm_register_dma(drm_dev, mixer_ctx->dev);
+ return exynos_drm_register_dma(drm_dev, mixer_ctx->dev,
+ &mixer_ctx->dma_priv);
}
static void mixer_ctx_remove(struct mixer_context *mixer_ctx)
{
- exynos_drm_unregister_dma(mixer_ctx->drm_dev, mixer_ctx->dev);
+ exynos_drm_unregister_dma(mixer_ctx->drm_dev, mixer_ctx->dev,
+ &mixer_ctx->dma_priv);
}
static int mixer_enable_vblank(struct exynos_drm_crtc *crtc)
diff --git a/drivers/gpu/drm/hisilicon/kirin/kirin_ade_reg.h b/drivers/gpu/drm/hisilicon/kirin/kirin_ade_reg.h
index 0da8602..e2ac098 100644
--- a/drivers/gpu/drm/hisilicon/kirin/kirin_ade_reg.h
+++ b/drivers/gpu/drm/hisilicon/kirin/kirin_ade_reg.h
@@ -83,7 +83,6 @@
#define VSIZE_OFST 20
#define LDI_INT_EN 0x741C
#define FRAME_END_INT_EN_OFST 1
-#define UNDERFLOW_INT_EN_OFST 2
#define LDI_CTRL 0x7420
#define BPP_OFST 3
#define DATA_GATE_EN BIT(2)
diff --git a/drivers/gpu/drm/hisilicon/kirin/kirin_drm_ade.c b/drivers/gpu/drm/hisilicon/kirin/kirin_drm_ade.c
index 73cd28a..8600012 100644
--- a/drivers/gpu/drm/hisilicon/kirin/kirin_drm_ade.c
+++ b/drivers/gpu/drm/hisilicon/kirin/kirin_drm_ade.c
@@ -46,7 +46,6 @@ struct ade_hw_ctx {
struct clk *media_noc_clk;
struct clk *ade_pix_clk;
struct reset_control *reset;
- struct work_struct display_reset_wq;
bool power_on;
int irq;
@@ -136,7 +135,6 @@ static void ade_init(struct ade_hw_ctx *ctx)
*/
ade_update_bits(base + ADE_CTRL, FRM_END_START_OFST,
FRM_END_START_MASK, REG_EFFECTIVE_IN_ADEEN_FRMEND);
- ade_update_bits(base + LDI_INT_EN, UNDERFLOW_INT_EN_OFST, MASK(1), 1);
}
static bool ade_crtc_mode_fixup(struct drm_crtc *crtc,
@@ -304,17 +302,6 @@ static void ade_crtc_disable_vblank(struct drm_crtc *crtc)
MASK(1), 0);
}
-static void drm_underflow_wq(struct work_struct *work)
-{
- struct ade_hw_ctx *ctx = container_of(work, struct ade_hw_ctx,
- display_reset_wq);
- struct drm_device *drm_dev = ctx->crtc->dev;
- struct drm_atomic_state *state;
-
- state = drm_atomic_helper_suspend(drm_dev);
- drm_atomic_helper_resume(drm_dev, state);
-}
-
static irqreturn_t ade_irq_handler(int irq, void *data)
{
struct ade_hw_ctx *ctx = data;
@@ -331,12 +318,6 @@ static irqreturn_t ade_irq_handler(int irq, void *data)
MASK(1), 1);
drm_crtc_handle_vblank(crtc);
}
- if (status & BIT(UNDERFLOW_INT_EN_OFST)) {
- ade_update_bits(base + LDI_INT_CLR, UNDERFLOW_INT_EN_OFST,
- MASK(1), 1);
- DRM_ERROR("LDI underflow!");
- schedule_work(&ctx->display_reset_wq);
- }
return IRQ_HANDLED;
}
@@ -919,7 +900,6 @@ static void *ade_hw_ctx_alloc(struct platform_device *pdev,
if (ret)
return ERR_PTR(-EIO);
- INIT_WORK(&ctx->display_reset_wq, drm_underflow_wq);
ctx->crtc = crtc;
return ctx;
diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig
index ba95959..907c447 100644
--- a/drivers/gpu/drm/i915/Kconfig
+++ b/drivers/gpu/drm/i915/Kconfig
@@ -75,9 +75,8 @@
help
This option enables capturing the GPU state when a hang is detected.
This information is vital for triaging hangs and assists in debugging.
- Please report any hang to
- https://bugs.freedesktop.org/enter_bug.cgi?product=DRI
- for triaging.
+ Please report any hang for triaging according to:
+ https://gitlab.freedesktop.org/drm/intel/-/wikis/How-to-file-i915-bugs
If in doubt, say "Y".
diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index b8c5f89..a1f2411 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -294,7 +294,7 @@
$(shell cd $(srctree)/$(src) && find * -name '*.h')))
quiet_cmd_hdrtest = HDRTEST $(patsubst %.hdrtest,%.h,$@)
- cmd_hdrtest = $(CC) $(c_flags) -S -o /dev/null -x c /dev/null -include $<; touch $@
+ cmd_hdrtest = $(CC) $(filter-out $(CFLAGS_GCOV), $(c_flags)) -S -o /dev/null -x c /dev/null -include $<; touch $@
$(obj)/%.hdrtest: $(src)/%.h FORCE
$(call if_changed_dep,hdrtest)
diff --git a/drivers/gpu/drm/i915/display/intel_bios.c b/drivers/gpu/drm/i915/display/intel_bios.c
index 8beac06..ef4017a 100644
--- a/drivers/gpu/drm/i915/display/intel_bios.c
+++ b/drivers/gpu/drm/i915/display/intel_bios.c
@@ -357,14 +357,16 @@ parse_generic_dtd(struct drm_i915_private *dev_priv,
panel_fixed_mode->hdisplay + dtd->hfront_porch;
panel_fixed_mode->hsync_end =
panel_fixed_mode->hsync_start + dtd->hsync;
- panel_fixed_mode->htotal = panel_fixed_mode->hsync_end;
+ panel_fixed_mode->htotal =
+ panel_fixed_mode->hdisplay + dtd->hblank;
panel_fixed_mode->vdisplay = dtd->vactive;
panel_fixed_mode->vsync_start =
panel_fixed_mode->vdisplay + dtd->vfront_porch;
panel_fixed_mode->vsync_end =
panel_fixed_mode->vsync_start + dtd->vsync;
- panel_fixed_mode->vtotal = panel_fixed_mode->vsync_end;
+ panel_fixed_mode->vtotal =
+ panel_fixed_mode->vdisplay + dtd->vblank;
panel_fixed_mode->clock = dtd->pixel_clock;
panel_fixed_mode->width_mm = dtd->width_mm;
diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c
index 33f1dc3..d9a61f3 100644
--- a/drivers/gpu/drm/i915/display/intel_ddi.c
+++ b/drivers/gpu/drm/i915/display/intel_ddi.c
@@ -4251,7 +4251,9 @@ static bool intel_ddi_is_audio_enabled(struct drm_i915_private *dev_priv,
void intel_ddi_compute_min_voltage_level(struct drm_i915_private *dev_priv,
struct intel_crtc_state *crtc_state)
{
- if (INTEL_GEN(dev_priv) >= 11 && crtc_state->port_clock > 594000)
+ if (IS_ELKHARTLAKE(dev_priv) && crtc_state->port_clock > 594000)
+ crtc_state->min_voltage_level = 3;
+ else if (INTEL_GEN(dev_priv) >= 11 && crtc_state->port_clock > 594000)
crtc_state->min_voltage_level = 1;
else if (IS_CANNONLAKE(dev_priv) && crtc_state->port_clock > 594000)
crtc_state->min_voltage_level = 2;
diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c
index 19ea842..aa45395 100644
--- a/drivers/gpu/drm/i915/display/intel_display.c
+++ b/drivers/gpu/drm/i915/display/intel_display.c
@@ -11087,7 +11087,7 @@ static u32 intel_cursor_base(const struct intel_plane_state *plane_state)
u32 base;
if (INTEL_INFO(dev_priv)->display.cursor_needs_physical)
- base = obj->phys_handle->busaddr;
+ base = sg_dma_address(obj->mm.pages->sgl);
else
base = intel_plane_ggtt_offset(plane_state);
@@ -12366,6 +12366,7 @@ static int icl_check_nv12_planes(struct intel_crtc_state *crtc_state)
/* Copy parameters to slave plane */
linked_state->ctl = plane_state->ctl | PLANE_CTL_YUV420_Y_PLANE;
linked_state->color_ctl = plane_state->color_ctl;
+ linked_state->view = plane_state->view;
memcpy(linked_state->color_plane, plane_state->color_plane,
sizeof(linked_state->color_plane));
@@ -14476,37 +14477,23 @@ static int intel_atomic_check_crtcs(struct intel_atomic_state *state)
return 0;
}
-static bool intel_cpu_transcoder_needs_modeset(struct intel_atomic_state *state,
- enum transcoder transcoder)
+static bool intel_cpu_transcoders_need_modeset(struct intel_atomic_state *state,
+ u8 transcoders)
{
- struct intel_crtc_state *new_crtc_state;
+ const struct intel_crtc_state *new_crtc_state;
struct intel_crtc *crtc;
int i;
- for_each_new_intel_crtc_in_state(state, crtc, new_crtc_state, i)
- if (new_crtc_state->cpu_transcoder == transcoder)
- return needs_modeset(new_crtc_state);
+ for_each_new_intel_crtc_in_state(state, crtc, new_crtc_state, i) {
+ if (new_crtc_state->hw.enable &&
+ transcoders & BIT(new_crtc_state->cpu_transcoder) &&
+ needs_modeset(new_crtc_state))
+ return true;
+ }
return false;
}
-static void
-intel_modeset_synced_crtcs(struct intel_atomic_state *state,
- u8 transcoders)
-{
- struct intel_crtc_state *new_crtc_state;
- struct intel_crtc *crtc;
- int i;
-
- for_each_new_intel_crtc_in_state(state, crtc,
- new_crtc_state, i) {
- if (transcoders & BIT(new_crtc_state->cpu_transcoder)) {
- new_crtc_state->uapi.mode_changed = true;
- new_crtc_state->update_pipe = false;
- }
- }
-}
-
static int
intel_modeset_all_tiles(struct intel_atomic_state *state, int tile_grp_id)
{
@@ -14662,15 +14649,20 @@ static int intel_atomic_check(struct drm_device *dev,
if (intel_dp_mst_is_slave_trans(new_crtc_state)) {
enum transcoder master = new_crtc_state->mst_master_transcoder;
- if (intel_cpu_transcoder_needs_modeset(state, master)) {
+ if (intel_cpu_transcoders_need_modeset(state, BIT(master))) {
new_crtc_state->uapi.mode_changed = true;
new_crtc_state->update_pipe = false;
}
- } else if (is_trans_port_sync_mode(new_crtc_state)) {
+ }
+
+ if (is_trans_port_sync_mode(new_crtc_state)) {
u8 trans = new_crtc_state->sync_mode_slaves_mask |
BIT(new_crtc_state->master_transcoder);
- intel_modeset_synced_crtcs(state, trans);
+ if (intel_cpu_transcoders_need_modeset(state, trans)) {
+ new_crtc_state->uapi.mode_changed = true;
+ new_crtc_state->update_pipe = false;
+ }
}
}
@@ -17441,6 +17433,24 @@ static int intel_initial_commit(struct drm_device *dev)
* have readout for pipe gamma enable.
*/
crtc_state->uapi.color_mgmt_changed = true;
+
+ /*
+ * FIXME hack to force full modeset when DSC is being
+ * used.
+ *
+ * As long as we do not have full state readout and
+ * config comparison of crtc_state->dsc, we have no way
+ * to ensure reliable fastset. Remove once we have
+ * readout for DSC.
+ */
+ if (crtc_state->dsc.compression_enable) {
+ ret = drm_atomic_add_affected_connectors(state,
+ &crtc->base);
+ if (ret)
+ goto out;
+ crtc_state->uapi.mode_changed = true;
+ drm_dbg_kms(dev, "Force full modeset for DSC\n");
+ }
}
}
diff --git a/drivers/gpu/drm/i915/display/intel_display_power.c b/drivers/gpu/drm/i915/display/intel_display_power.c
index 21561ac..46c40db 100644
--- a/drivers/gpu/drm/i915/display/intel_display_power.c
+++ b/drivers/gpu/drm/i915/display/intel_display_power.c
@@ -4466,13 +4466,19 @@ static void icl_dbuf_disable(struct drm_i915_private *dev_priv)
static void icl_mbus_init(struct drm_i915_private *dev_priv)
{
- u32 val;
+ u32 mask, val;
- val = MBUS_ABOX_BT_CREDIT_POOL1(16) |
- MBUS_ABOX_BT_CREDIT_POOL2(16) |
- MBUS_ABOX_B_CREDIT(1) |
- MBUS_ABOX_BW_CREDIT(1);
+ mask = MBUS_ABOX_BT_CREDIT_POOL1_MASK |
+ MBUS_ABOX_BT_CREDIT_POOL2_MASK |
+ MBUS_ABOX_B_CREDIT_MASK |
+ MBUS_ABOX_BW_CREDIT_MASK;
+ val = I915_READ(MBUS_ABOX_CTL);
+ val &= ~mask;
+ val |= MBUS_ABOX_BT_CREDIT_POOL1(16) |
+ MBUS_ABOX_BT_CREDIT_POOL2(16) |
+ MBUS_ABOX_B_CREDIT(1) |
+ MBUS_ABOX_BW_CREDIT(1);
I915_WRITE(MBUS_ABOX_CTL, val);
}
@@ -4968,8 +4974,21 @@ static void tgl_bw_buddy_init(struct drm_i915_private *dev_priv)
I915_WRITE(BW_BUDDY1_CTL, BW_BUDDY_DISABLE);
I915_WRITE(BW_BUDDY2_CTL, BW_BUDDY_DISABLE);
} else {
+ u32 val;
+
I915_WRITE(BW_BUDDY1_PAGE_MASK, table[i].page_mask);
I915_WRITE(BW_BUDDY2_PAGE_MASK, table[i].page_mask);
+
+ /* Wa_22010178259:tgl */
+ val = I915_READ(BW_BUDDY1_CTL);
+ val &= ~BW_BUDDY_TLB_REQ_TIMER_MASK;
+ val |= REG_FIELD_PREP(BW_BUDDY_TLB_REQ_TIMER_MASK, 0x8);
+ I915_WRITE(BW_BUDDY1_CTL, val);
+
+ val = I915_READ(BW_BUDDY2_CTL);
+ val &= ~BW_BUDDY_TLB_REQ_TIMER_MASK;
+ val |= REG_FIELD_PREP(BW_BUDDY_TLB_REQ_TIMER_MASK, 0x8);
+ I915_WRITE(BW_BUDDY2_CTL, val);
}
}
diff --git a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c
index 89fb0d9..04f953b 100644
--- a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c
+++ b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c
@@ -384,6 +384,7 @@ static const u8 *mipi_exec_gpio(struct intel_dsi *intel_dsi, const u8 *data)
return data;
}
+#ifdef CONFIG_ACPI
static int i2c_adapter_lookup(struct acpi_resource *ares, void *data)
{
struct i2c_adapter_lookup *lookup = data;
@@ -393,8 +394,7 @@ static int i2c_adapter_lookup(struct acpi_resource *ares, void *data)
acpi_handle adapter_handle;
acpi_status status;
- if (intel_dsi->i2c_bus_num >= 0 ||
- !i2c_acpi_get_i2c_resource(ares, &sb))
+ if (!i2c_acpi_get_i2c_resource(ares, &sb))
return 1;
if (lookup->slave_addr != sb->slave_address)
@@ -413,14 +413,41 @@ static int i2c_adapter_lookup(struct acpi_resource *ares, void *data)
return 1;
}
+static void i2c_acpi_find_adapter(struct intel_dsi *intel_dsi,
+ const u16 slave_addr)
+{
+ struct drm_device *drm_dev = intel_dsi->base.base.dev;
+ struct device *dev = &drm_dev->pdev->dev;
+ struct acpi_device *acpi_dev;
+ struct list_head resource_list;
+ struct i2c_adapter_lookup lookup;
+
+ acpi_dev = ACPI_COMPANION(dev);
+ if (acpi_dev) {
+ memset(&lookup, 0, sizeof(lookup));
+ lookup.slave_addr = slave_addr;
+ lookup.intel_dsi = intel_dsi;
+ lookup.dev_handle = acpi_device_handle(acpi_dev);
+
+ INIT_LIST_HEAD(&resource_list);
+ acpi_dev_get_resources(acpi_dev, &resource_list,
+ i2c_adapter_lookup,
+ &lookup);
+ acpi_dev_free_resource_list(&resource_list);
+ }
+}
+#else
+static inline void i2c_acpi_find_adapter(struct intel_dsi *intel_dsi,
+ const u16 slave_addr)
+{
+}
+#endif
+
static const u8 *mipi_exec_i2c(struct intel_dsi *intel_dsi, const u8 *data)
{
struct drm_device *drm_dev = intel_dsi->base.base.dev;
struct device *dev = &drm_dev->pdev->dev;
struct i2c_adapter *adapter;
- struct acpi_device *acpi_dev;
- struct list_head resource_list;
- struct i2c_adapter_lookup lookup;
struct i2c_msg msg;
int ret;
u8 vbt_i2c_bus_num = *(data + 2);
@@ -431,20 +458,7 @@ static const u8 *mipi_exec_i2c(struct intel_dsi *intel_dsi, const u8 *data)
if (intel_dsi->i2c_bus_num < 0) {
intel_dsi->i2c_bus_num = vbt_i2c_bus_num;
-
- acpi_dev = ACPI_COMPANION(dev);
- if (acpi_dev) {
- memset(&lookup, 0, sizeof(lookup));
- lookup.slave_addr = slave_addr;
- lookup.intel_dsi = intel_dsi;
- lookup.dev_handle = acpi_device_handle(acpi_dev);
-
- INIT_LIST_HEAD(&resource_list);
- acpi_dev_get_resources(acpi_dev, &resource_list,
- i2c_adapter_lookup,
- &lookup);
- acpi_dev_free_resource_list(&resource_list);
- }
+ i2c_acpi_find_adapter(intel_dsi, slave_addr);
}
adapter = i2c_get_adapter(intel_dsi->i2c_bus_num);
diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c
index 89c9cf5..8302505 100644
--- a/drivers/gpu/drm/i915/display/intel_psr.c
+++ b/drivers/gpu/drm/i915/display/intel_psr.c
@@ -852,10 +852,12 @@ void intel_psr_enable(struct intel_dp *intel_dp,
{
struct drm_i915_private *dev_priv = dp_to_i915(intel_dp);
- if (!crtc_state->has_psr)
+ if (!CAN_PSR(dev_priv) || dev_priv->psr.dp != intel_dp)
return;
- if (WARN_ON(!CAN_PSR(dev_priv)))
+ dev_priv->psr.force_mode_changed = false;
+
+ if (!crtc_state->has_psr)
return;
WARN_ON(dev_priv->drrs.dp);
@@ -1009,6 +1011,8 @@ void intel_psr_update(struct intel_dp *intel_dp,
if (!CAN_PSR(dev_priv) || READ_ONCE(psr->dp) != intel_dp)
return;
+ dev_priv->psr.force_mode_changed = false;
+
mutex_lock(&dev_priv->psr.lock);
enable = crtc_state->has_psr && psr_global_enabled(psr->debug);
@@ -1534,7 +1538,7 @@ void intel_psr_atomic_check(struct drm_connector *connector,
struct drm_crtc_state *crtc_state;
if (!CAN_PSR(dev_priv) || !new_state->crtc ||
- dev_priv->psr.initially_probed)
+ !dev_priv->psr.force_mode_changed)
return;
intel_connector = to_intel_connector(connector);
@@ -1545,5 +1549,18 @@ void intel_psr_atomic_check(struct drm_connector *connector,
crtc_state = drm_atomic_get_new_crtc_state(new_state->state,
new_state->crtc);
crtc_state->mode_changed = true;
- dev_priv->psr.initially_probed = true;
+}
+
+void intel_psr_set_force_mode_changed(struct intel_dp *intel_dp)
+{
+ struct drm_i915_private *dev_priv;
+
+ if (!intel_dp)
+ return;
+
+ dev_priv = dp_to_i915(intel_dp);
+ if (!CAN_PSR(dev_priv) || intel_dp != dev_priv->psr.dp)
+ return;
+
+ dev_priv->psr.force_mode_changed = true;
}
diff --git a/drivers/gpu/drm/i915/display/intel_psr.h b/drivers/gpu/drm/i915/display/intel_psr.h
index c58a1d4..274fc6b 100644
--- a/drivers/gpu/drm/i915/display/intel_psr.h
+++ b/drivers/gpu/drm/i915/display/intel_psr.h
@@ -40,5 +40,6 @@ bool intel_psr_enabled(struct intel_dp *intel_dp);
void intel_psr_atomic_check(struct drm_connector *connector,
struct drm_connector_state *old_state,
struct drm_connector_state *new_state);
+void intel_psr_set_force_mode_changed(struct intel_dp *intel_dp);
#endif /* __INTEL_PSR_H__ */
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index a2e57e62..151a1e8 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -565,6 +565,22 @@ static int __context_set_persistence(struct i915_gem_context *ctx, bool state)
if (!(ctx->i915->caps.scheduler & I915_SCHEDULER_CAP_PREEMPTION))
return -ENODEV;
+ /*
+ * If the cancel fails, we then need to reset, cleanly!
+ *
+ * If the per-engine reset fails, all hope is lost! We resort
+ * to a full GPU reset in that unlikely case, but realistically
+ * if the engine could not reset, the full reset does not fare
+ * much better. The damage has been done.
+ *
+ * However, if we cannot reset an engine by itself, we cannot
+ * cleanup a hanging persistent context without causing
+ * colateral damage, and we should not pretend we can by
+ * exposing the interface.
+ */
+ if (!intel_has_reset_engine(&ctx->i915->gt))
+ return -ENODEV;
+
i915_gem_context_clear_persistence(ctx);
}
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index d5a0f5a..7643a30 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -423,7 +423,8 @@ eb_validate_vma(struct i915_execbuffer *eb,
if (unlikely(entry->flags & eb->invalid_flags))
return -EINVAL;
- if (unlikely(entry->alignment && !is_power_of_2(entry->alignment)))
+ if (unlikely(entry->alignment &&
+ !is_power_of_2_u64(entry->alignment)))
return -EINVAL;
/*
@@ -1981,9 +1982,20 @@ static int __eb_parse(struct dma_fence_work *work)
pw->trampoline);
}
+static void __eb_parse_release(struct dma_fence_work *work)
+{
+ struct eb_parse_work *pw = container_of(work, typeof(*pw), base);
+
+ if (pw->trampoline)
+ i915_active_release(&pw->trampoline->active);
+ i915_active_release(&pw->shadow->active);
+ i915_active_release(&pw->batch->active);
+}
+
static const struct dma_fence_work_ops eb_parse_ops = {
.name = "eb_parse",
.work = __eb_parse,
+ .release = __eb_parse_release,
};
static int eb_parse_pipeline(struct i915_execbuffer *eb,
@@ -1997,6 +2009,20 @@ static int eb_parse_pipeline(struct i915_execbuffer *eb,
if (!pw)
return -ENOMEM;
+ err = i915_active_acquire(&eb->batch->active);
+ if (err)
+ goto err_free;
+
+ err = i915_active_acquire(&shadow->active);
+ if (err)
+ goto err_batch;
+
+ if (trampoline) {
+ err = i915_active_acquire(&trampoline->active);
+ if (err)
+ goto err_shadow;
+ }
+
dma_fence_work_init(&pw->base, &eb_parse_ops);
pw->engine = eb->engine;
@@ -2006,7 +2032,9 @@ static int eb_parse_pipeline(struct i915_execbuffer *eb,
pw->shadow = shadow;
pw->trampoline = trampoline;
- dma_resv_lock(pw->batch->resv, NULL);
+ err = dma_resv_lock_interruptible(pw->batch->resv, NULL);
+ if (err)
+ goto err_trampoline;
err = dma_resv_reserve_shared(pw->batch->resv, 1);
if (err)
@@ -2034,6 +2062,14 @@ static int eb_parse_pipeline(struct i915_execbuffer *eb,
err_batch_unlock:
dma_resv_unlock(pw->batch->resv);
+err_trampoline:
+ if (trampoline)
+ i915_active_release(&trampoline->active);
+err_shadow:
+ i915_active_release(&shadow->active);
+err_batch:
+ i915_active_release(&eb->batch->active);
+err_free:
kfree(pw);
return err;
}
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
index b9fdac2..0b6a442 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
@@ -455,10 +455,11 @@ static void i915_gem_object_release_mmap_gtt(struct drm_i915_gem_object *obj)
void i915_gem_object_release_mmap_offset(struct drm_i915_gem_object *obj)
{
- struct i915_mmap_offset *mmo;
+ struct i915_mmap_offset *mmo, *mn;
spin_lock(&obj->mmo.lock);
- list_for_each_entry(mmo, &obj->mmo.offsets, offset) {
+ rbtree_postorder_for_each_entry_safe(mmo, mn,
+ &obj->mmo.offsets, offset) {
/*
* vma_node_unmap for GTT mmaps handled already in
* __i915_gem_object_release_mmap_gtt
@@ -488,6 +489,67 @@ void i915_gem_object_release_mmap(struct drm_i915_gem_object *obj)
}
static struct i915_mmap_offset *
+lookup_mmo(struct drm_i915_gem_object *obj,
+ enum i915_mmap_type mmap_type)
+{
+ struct rb_node *rb;
+
+ spin_lock(&obj->mmo.lock);
+ rb = obj->mmo.offsets.rb_node;
+ while (rb) {
+ struct i915_mmap_offset *mmo =
+ rb_entry(rb, typeof(*mmo), offset);
+
+ if (mmo->mmap_type == mmap_type) {
+ spin_unlock(&obj->mmo.lock);
+ return mmo;
+ }
+
+ if (mmo->mmap_type < mmap_type)
+ rb = rb->rb_right;
+ else
+ rb = rb->rb_left;
+ }
+ spin_unlock(&obj->mmo.lock);
+
+ return NULL;
+}
+
+static struct i915_mmap_offset *
+insert_mmo(struct drm_i915_gem_object *obj, struct i915_mmap_offset *mmo)
+{
+ struct rb_node *rb, **p;
+
+ spin_lock(&obj->mmo.lock);
+ rb = NULL;
+ p = &obj->mmo.offsets.rb_node;
+ while (*p) {
+ struct i915_mmap_offset *pos;
+
+ rb = *p;
+ pos = rb_entry(rb, typeof(*pos), offset);
+
+ if (pos->mmap_type == mmo->mmap_type) {
+ spin_unlock(&obj->mmo.lock);
+ drm_vma_offset_remove(obj->base.dev->vma_offset_manager,
+ &mmo->vma_node);
+ kfree(mmo);
+ return pos;
+ }
+
+ if (pos->mmap_type < mmo->mmap_type)
+ p = &rb->rb_right;
+ else
+ p = &rb->rb_left;
+ }
+ rb_link_node(&mmo->offset, rb, p);
+ rb_insert_color(&mmo->offset, &obj->mmo.offsets);
+ spin_unlock(&obj->mmo.lock);
+
+ return mmo;
+}
+
+static struct i915_mmap_offset *
mmap_offset_attach(struct drm_i915_gem_object *obj,
enum i915_mmap_type mmap_type,
struct drm_file *file)
@@ -496,20 +558,22 @@ mmap_offset_attach(struct drm_i915_gem_object *obj,
struct i915_mmap_offset *mmo;
int err;
+ mmo = lookup_mmo(obj, mmap_type);
+ if (mmo)
+ goto out;
+
mmo = kmalloc(sizeof(*mmo), GFP_KERNEL);
if (!mmo)
return ERR_PTR(-ENOMEM);
mmo->obj = obj;
- mmo->dev = obj->base.dev;
- mmo->file = file;
mmo->mmap_type = mmap_type;
drm_vma_node_reset(&mmo->vma_node);
- err = drm_vma_offset_add(mmo->dev->vma_offset_manager, &mmo->vma_node,
- obj->base.size / PAGE_SIZE);
+ err = drm_vma_offset_add(obj->base.dev->vma_offset_manager,
+ &mmo->vma_node, obj->base.size / PAGE_SIZE);
if (likely(!err))
- goto out;
+ goto insert;
/* Attempt to reap some mmap space from dead objects */
err = intel_gt_retire_requests_timeout(&i915->gt, MAX_SCHEDULE_TIMEOUT);
@@ -517,19 +581,17 @@ mmap_offset_attach(struct drm_i915_gem_object *obj,
goto err;
i915_gem_drain_freed_objects(i915);
- err = drm_vma_offset_add(mmo->dev->vma_offset_manager, &mmo->vma_node,
- obj->base.size / PAGE_SIZE);
+ err = drm_vma_offset_add(obj->base.dev->vma_offset_manager,
+ &mmo->vma_node, obj->base.size / PAGE_SIZE);
if (err)
goto err;
+insert:
+ mmo = insert_mmo(obj, mmo);
+ GEM_BUG_ON(lookup_mmo(obj, mmap_type) != mmo);
out:
if (file)
drm_vma_node_allow(&mmo->vma_node, file);
-
- spin_lock(&obj->mmo.lock);
- list_add(&mmo->offset, &obj->mmo.offsets);
- spin_unlock(&obj->mmo.lock);
-
return mmo;
err:
@@ -745,60 +807,43 @@ int i915_gem_mmap(struct file *filp, struct vm_area_struct *vma)
struct drm_vma_offset_node *node;
struct drm_file *priv = filp->private_data;
struct drm_device *dev = priv->minor->dev;
+ struct drm_i915_gem_object *obj = NULL;
struct i915_mmap_offset *mmo = NULL;
- struct drm_gem_object *obj = NULL;
struct file *anon;
if (drm_dev_is_unplugged(dev))
return -ENODEV;
+ rcu_read_lock();
drm_vma_offset_lock_lookup(dev->vma_offset_manager);
node = drm_vma_offset_exact_lookup_locked(dev->vma_offset_manager,
vma->vm_pgoff,
vma_pages(vma));
- if (likely(node)) {
- mmo = container_of(node, struct i915_mmap_offset,
- vma_node);
- /*
- * In our dependency chain, the drm_vma_offset_node
- * depends on the validity of the mmo, which depends on
- * the gem object. However the only reference we have
- * at this point is the mmo (as the parent of the node).
- * Try to check if the gem object was at least cleared.
- */
- if (!mmo || !mmo->obj) {
- drm_vma_offset_unlock_lookup(dev->vma_offset_manager);
- return -EINVAL;
- }
+ if (node && drm_vma_node_is_allowed(node, priv)) {
/*
* Skip 0-refcnted objects as it is in the process of being
* destroyed and will be invalid when the vma manager lock
* is released.
*/
- obj = &mmo->obj->base;
- if (!kref_get_unless_zero(&obj->refcount))
- obj = NULL;
+ mmo = container_of(node, struct i915_mmap_offset, vma_node);
+ obj = i915_gem_object_get_rcu(mmo->obj);
}
drm_vma_offset_unlock_lookup(dev->vma_offset_manager);
+ rcu_read_unlock();
if (!obj)
- return -EINVAL;
+ return node ? -EACCES : -EINVAL;
- if (!drm_vma_node_is_allowed(node, priv)) {
- drm_gem_object_put_unlocked(obj);
- return -EACCES;
- }
-
- if (i915_gem_object_is_readonly(to_intel_bo(obj))) {
+ if (i915_gem_object_is_readonly(obj)) {
if (vma->vm_flags & VM_WRITE) {
- drm_gem_object_put_unlocked(obj);
+ i915_gem_object_put(obj);
return -EINVAL;
}
vma->vm_flags &= ~VM_MAYWRITE;
}
- anon = mmap_singleton(to_i915(obj->dev));
+ anon = mmap_singleton(to_i915(dev));
if (IS_ERR(anon)) {
- drm_gem_object_put_unlocked(obj);
+ i915_gem_object_put(obj);
return PTR_ERR(anon);
}
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c
index 46bacc8..5da9f9e 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
@@ -63,7 +63,7 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
INIT_LIST_HEAD(&obj->lut_list);
spin_lock_init(&obj->mmo.lock);
- INIT_LIST_HEAD(&obj->mmo.offsets);
+ obj->mmo.offsets = RB_ROOT;
init_rcu_head(&obj->rcu);
@@ -100,8 +100,8 @@ void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file)
{
struct drm_i915_gem_object *obj = to_intel_bo(gem);
struct drm_i915_file_private *fpriv = file->driver_priv;
+ struct i915_mmap_offset *mmo, *mn;
struct i915_lut_handle *lut, *ln;
- struct i915_mmap_offset *mmo;
LIST_HEAD(close);
i915_gem_object_lock(obj);
@@ -117,14 +117,8 @@ void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file)
i915_gem_object_unlock(obj);
spin_lock(&obj->mmo.lock);
- list_for_each_entry(mmo, &obj->mmo.offsets, offset) {
- if (mmo->file != file)
- continue;
-
- spin_unlock(&obj->mmo.lock);
+ rbtree_postorder_for_each_entry_safe(mmo, mn, &obj->mmo.offsets, offset)
drm_vma_node_revoke(&mmo->vma_node, file);
- spin_lock(&obj->mmo.lock);
- }
spin_unlock(&obj->mmo.lock);
list_for_each_entry_safe(lut, ln, &close, obj_link) {
@@ -203,12 +197,14 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915,
i915_gem_object_release_mmap(obj);
- list_for_each_entry_safe(mmo, mn, &obj->mmo.offsets, offset) {
+ rbtree_postorder_for_each_entry_safe(mmo, mn,
+ &obj->mmo.offsets,
+ offset) {
drm_vma_offset_remove(obj->base.dev->vma_offset_manager,
&mmo->vma_node);
kfree(mmo);
}
- INIT_LIST_HEAD(&obj->mmo.offsets);
+ obj->mmo.offsets = RB_ROOT;
GEM_BUG_ON(atomic_read(&obj->bind_count));
GEM_BUG_ON(obj->userfault_count);
@@ -229,6 +225,7 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915,
/* But keep the pointer alive for RCU-protected lookups */
call_rcu(&obj->rcu, __i915_gem_free_object_rcu);
+ cond_resched();
}
intel_runtime_pm_put(&i915->runtime_pm, wakeref);
}
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h
index db70a33..9c86f2d 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
@@ -70,14 +70,22 @@ i915_gem_object_lookup_rcu(struct drm_file *file, u32 handle)
}
static inline struct drm_i915_gem_object *
+i915_gem_object_get_rcu(struct drm_i915_gem_object *obj)
+{
+ if (obj && !kref_get_unless_zero(&obj->base.refcount))
+ obj = NULL;
+
+ return obj;
+}
+
+static inline struct drm_i915_gem_object *
i915_gem_object_lookup(struct drm_file *file, u32 handle)
{
struct drm_i915_gem_object *obj;
rcu_read_lock();
obj = i915_gem_object_lookup_rcu(file, handle);
- if (obj && !kref_get_unless_zero(&obj->base.refcount))
- obj = NULL;
+ obj = i915_gem_object_get_rcu(obj);
rcu_read_unlock();
return obj;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
index 88e2686..c2174da 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
@@ -71,13 +71,11 @@ enum i915_mmap_type {
};
struct i915_mmap_offset {
- struct drm_device *dev;
struct drm_vma_offset_node vma_node;
struct drm_i915_gem_object *obj;
- struct drm_file *file;
enum i915_mmap_type mmap_type;
- struct list_head offset;
+ struct rb_node offset;
};
struct drm_i915_gem_object {
@@ -137,7 +135,7 @@ struct drm_i915_gem_object {
struct {
spinlock_t lock; /* Protects access to mmo offsets */
- struct list_head offsets;
+ struct rb_root offsets;
} mmo;
I915_SELFTEST_DECLARE(struct list_head st_link);
@@ -287,9 +285,6 @@ struct drm_i915_gem_object {
void *gvt_info;
};
-
- /** for phys allocated objects */
- struct drm_dma_handle *phys_handle;
};
static inline struct drm_i915_gem_object *
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_phys.c b/drivers/gpu/drm/i915/gem/i915_gem_phys.c
index b1b7c1b3..b07bb40 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_phys.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_phys.c
@@ -22,88 +22,87 @@
static int i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj)
{
struct address_space *mapping = obj->base.filp->f_mapping;
- struct drm_dma_handle *phys;
- struct sg_table *st;
struct scatterlist *sg;
- char *vaddr;
+ struct sg_table *st;
+ dma_addr_t dma;
+ void *vaddr;
+ void *dst;
int i;
- int err;
if (WARN_ON(i915_gem_object_needs_bit17_swizzle(obj)))
return -EINVAL;
- /* Always aligning to the object size, allows a single allocation
+ /*
+ * Always aligning to the object size, allows a single allocation
* to handle all possible callers, and given typical object sizes,
* the alignment of the buddy allocation will naturally match.
*/
- phys = drm_pci_alloc(obj->base.dev,
- roundup_pow_of_two(obj->base.size),
- roundup_pow_of_two(obj->base.size));
- if (!phys)
+ vaddr = dma_alloc_coherent(&obj->base.dev->pdev->dev,
+ roundup_pow_of_two(obj->base.size),
+ &dma, GFP_KERNEL);
+ if (!vaddr)
return -ENOMEM;
- vaddr = phys->vaddr;
- for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
- struct page *page;
- char *src;
-
- page = shmem_read_mapping_page(mapping, i);
- if (IS_ERR(page)) {
- err = PTR_ERR(page);
- goto err_phys;
- }
-
- src = kmap_atomic(page);
- memcpy(vaddr, src, PAGE_SIZE);
- drm_clflush_virt_range(vaddr, PAGE_SIZE);
- kunmap_atomic(src);
-
- put_page(page);
- vaddr += PAGE_SIZE;
- }
-
- intel_gt_chipset_flush(&to_i915(obj->base.dev)->gt);
-
st = kmalloc(sizeof(*st), GFP_KERNEL);
- if (!st) {
- err = -ENOMEM;
- goto err_phys;
- }
+ if (!st)
+ goto err_pci;
- if (sg_alloc_table(st, 1, GFP_KERNEL)) {
- kfree(st);
- err = -ENOMEM;
- goto err_phys;
- }
+ if (sg_alloc_table(st, 1, GFP_KERNEL))
+ goto err_st;
sg = st->sgl;
sg->offset = 0;
sg->length = obj->base.size;
- sg_dma_address(sg) = phys->busaddr;
+ sg_assign_page(sg, (struct page *)vaddr);
+ sg_dma_address(sg) = dma;
sg_dma_len(sg) = obj->base.size;
- obj->phys_handle = phys;
+ dst = vaddr;
+ for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
+ struct page *page;
+ void *src;
+
+ page = shmem_read_mapping_page(mapping, i);
+ if (IS_ERR(page))
+ goto err_st;
+
+ src = kmap_atomic(page);
+ memcpy(dst, src, PAGE_SIZE);
+ drm_clflush_virt_range(dst, PAGE_SIZE);
+ kunmap_atomic(src);
+
+ put_page(page);
+ dst += PAGE_SIZE;
+ }
+
+ intel_gt_chipset_flush(&to_i915(obj->base.dev)->gt);
__i915_gem_object_set_pages(obj, st, sg->length);
return 0;
-err_phys:
- drm_pci_free(obj->base.dev, phys);
-
- return err;
+err_st:
+ kfree(st);
+err_pci:
+ dma_free_coherent(&obj->base.dev->pdev->dev,
+ roundup_pow_of_two(obj->base.size),
+ vaddr, dma);
+ return -ENOMEM;
}
static void
i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj,
struct sg_table *pages)
{
+ dma_addr_t dma = sg_dma_address(pages->sgl);
+ void *vaddr = sg_page(pages->sgl);
+
__i915_gem_object_release_shmem(obj, pages, false);
if (obj->mm.dirty) {
struct address_space *mapping = obj->base.filp->f_mapping;
- char *vaddr = obj->phys_handle->vaddr;
+ void *src = vaddr;
int i;
for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
@@ -115,15 +114,16 @@ i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj,
continue;
dst = kmap_atomic(page);
- drm_clflush_virt_range(vaddr, PAGE_SIZE);
- memcpy(dst, vaddr, PAGE_SIZE);
+ drm_clflush_virt_range(src, PAGE_SIZE);
+ memcpy(dst, src, PAGE_SIZE);
kunmap_atomic(dst);
set_page_dirty(page);
if (obj->mm.madv == I915_MADV_WILLNEED)
mark_page_accessed(page);
put_page(page);
- vaddr += PAGE_SIZE;
+
+ src += PAGE_SIZE;
}
obj->mm.dirty = false;
}
@@ -131,7 +131,9 @@ i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj,
sg_free_table(pages);
kfree(pages);
- drm_pci_free(obj->base.dev, obj->phys_handle);
+ dma_free_coherent(&obj->base.dev->pdev->dev,
+ roundup_pow_of_two(obj->base.size),
+ vaddr, dma);
}
static void phys_release(struct drm_i915_gem_object *obj)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
index f7e4b39..59b387a 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
@@ -256,8 +256,7 @@ unsigned long i915_gem_shrink_all(struct drm_i915_private *i915)
with_intel_runtime_pm(&i915->runtime_pm, wakeref) {
freed = i915_gem_shrink(i915, -1UL, NULL,
I915_SHRINK_BOUND |
- I915_SHRINK_UNBOUND |
- I915_SHRINK_ACTIVE);
+ I915_SHRINK_UNBOUND);
}
return freed;
@@ -336,7 +335,6 @@ i915_gem_shrinker_oom(struct notifier_block *nb, unsigned long event, void *ptr)
freed_pages = 0;
with_intel_runtime_pm(&i915->runtime_pm, wakeref)
freed_pages += i915_gem_shrink(i915, -1UL, NULL,
- I915_SHRINK_ACTIVE |
I915_SHRINK_BOUND |
I915_SHRINK_UNBOUND |
I915_SHRINK_WRITEBACK);
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
index ef7c74c..43912e9 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
@@ -570,7 +570,7 @@ static bool assert_mmap_offset(struct drm_i915_private *i915,
obj = i915_gem_object_create_internal(i915, size);
if (IS_ERR(obj))
- return PTR_ERR(obj);
+ return false;
mmo = mmap_offset_attach(obj, I915_MMAP_OFFSET_GTT, NULL);
i915_gem_object_put(obj);
diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
index 0ba524a..cbad7fe 100644
--- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
@@ -136,6 +136,9 @@ static void add_retire(struct intel_breadcrumbs *b, struct intel_timeline *tl)
struct intel_engine_cs *engine =
container_of(b, struct intel_engine_cs, breadcrumbs);
+ if (unlikely(intel_engine_is_virtual(engine)))
+ engine = intel_virtual_engine_get_sibling(engine, 0);
+
intel_engine_add_retire(engine, tl);
}
diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
index 23137b2..57e8a05 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -67,21 +67,18 @@ static int intel_context_active_acquire(struct intel_context *ce)
{
int err;
- err = i915_active_acquire(&ce->active);
- if (err)
- return err;
+ __i915_active_acquire(&ce->active);
+
+ if (intel_context_is_barrier(ce))
+ return 0;
/* Preallocate tracking nodes */
- if (!intel_context_is_barrier(ce)) {
- err = i915_active_acquire_preallocate_barrier(&ce->active,
- ce->engine);
- if (err) {
- i915_active_release(&ce->active);
- return err;
- }
- }
+ err = i915_active_acquire_preallocate_barrier(&ce->active,
+ ce->engine);
+ if (err)
+ i915_active_release(&ce->active);
- return 0;
+ return err;
}
static void intel_context_active_release(struct intel_context *ce)
@@ -101,13 +98,19 @@ int __intel_context_do_pin(struct intel_context *ce)
return err;
}
- if (mutex_lock_interruptible(&ce->pin_mutex))
- return -EINTR;
+ err = i915_active_acquire(&ce->active);
+ if (err)
+ return err;
- if (likely(!atomic_read(&ce->pin_count))) {
+ if (mutex_lock_interruptible(&ce->pin_mutex)) {
+ err = -EINTR;
+ goto out_release;
+ }
+
+ if (likely(!atomic_add_unless(&ce->pin_count, 1, 0))) {
err = intel_context_active_acquire(ce);
if (unlikely(err))
- goto err;
+ goto out_unlock;
err = ce->ops->pin(ce);
if (unlikely(err))
@@ -117,18 +120,19 @@ int __intel_context_do_pin(struct intel_context *ce)
ce->ring->head, ce->ring->tail);
smp_mb__before_atomic(); /* flush pin before it is visible */
+ atomic_inc(&ce->pin_count);
}
- atomic_inc(&ce->pin_count);
GEM_BUG_ON(!intel_context_is_pinned(ce)); /* no overflow! */
-
- mutex_unlock(&ce->pin_mutex);
- return 0;
+ GEM_BUG_ON(i915_active_is_idle(&ce->active));
+ goto out_unlock;
err_active:
intel_context_active_release(ce);
-err:
+out_unlock:
mutex_unlock(&ce->pin_mutex);
+out_release:
+ i915_active_release(&ce->active);
return err;
}
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index f451ef3..06ff769 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -671,6 +671,7 @@ void
intel_engine_init_active(struct intel_engine_cs *engine, unsigned int subclass)
{
INIT_LIST_HEAD(&engine->active.requests);
+ INIT_LIST_HEAD(&engine->active.hold);
spin_lock_init(&engine->active.lock);
lockdep_set_subclass(&engine->active.lock, subclass);
@@ -1422,6 +1423,17 @@ static void print_request_ring(struct drm_printer *m, struct i915_request *rq)
}
}
+static unsigned long list_count(struct list_head *list)
+{
+ struct list_head *pos;
+ unsigned long count = 0;
+
+ list_for_each(pos, list)
+ count++;
+
+ return count;
+}
+
void intel_engine_dump(struct intel_engine_cs *engine,
struct drm_printer *m,
const char *header, ...)
@@ -1491,6 +1503,7 @@ void intel_engine_dump(struct intel_engine_cs *engine,
hexdump(m, rq->context->lrc_reg_state, PAGE_SIZE);
}
}
+ drm_printf(m, "\tOn hold?: %lu\n", list_count(&engine->active.hold));
spin_unlock_irqrestore(&engine->active.lock, flags);
drm_printf(m, "\tMMIO base: 0x%08x\n", engine->mmio_base);
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index 350da59..92be41a 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -295,6 +295,7 @@ struct intel_engine_cs {
struct {
spinlock_t lock;
struct list_head requests;
+ struct list_head hold; /* ready requests, but on hold */
} active;
struct llist_head barrier_tasks;
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.c b/drivers/gpu/drm/i915/gt/intel_gt_requests.c
index 7ef1d37..24c99d0 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_requests.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.c
@@ -99,6 +99,9 @@ static bool add_retire(struct intel_engine_cs *engine,
void intel_engine_add_retire(struct intel_engine_cs *engine,
struct intel_timeline *tl)
{
+ /* We don't deal well with the engine disappearing beneath us */
+ GEM_BUG_ON(intel_engine_is_virtual(engine));
+
if (add_retire(engine, tl))
schedule_work(&engine->retire_work);
}
@@ -144,24 +147,32 @@ long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout)
fence = i915_active_fence_get(&tl->last_request);
if (fence) {
+ mutex_unlock(&tl->mutex);
+
timeout = dma_fence_wait_timeout(fence,
interruptible,
timeout);
dma_fence_put(fence);
+
+ /* Retirement is best effort */
+ if (!mutex_trylock(&tl->mutex)) {
+ active_count++;
+ goto out_active;
+ }
}
}
if (!retire_requests(tl) || flush_submission(gt))
active_count++;
+ mutex_unlock(&tl->mutex);
- spin_lock(&timelines->lock);
+out_active: spin_lock(&timelines->lock);
- /* Resume iteration after dropping lock */
+ /* Resume list iteration after reacquiring spinlock */
list_safe_reset_next(tl, tn, link);
if (atomic_dec_and_test(&tl->active_count))
list_del(&tl->link);
- mutex_unlock(&tl->mutex);
/* Defer the final release to after the spinlock */
if (refcount_dec_and_test(&tl->kref.refcount)) {
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 0cf0f6f..31455ec 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -237,7 +237,8 @@ static void execlists_init_reg_state(u32 *reg_state,
bool close);
static void
__execlists_update_reg_state(const struct intel_context *ce,
- const struct intel_engine_cs *engine);
+ const struct intel_engine_cs *engine,
+ u32 head);
static void mark_eio(struct i915_request *rq)
{
@@ -985,6 +986,8 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine)
GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
list_move(&rq->sched.link, pl);
+ set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
+
active = rq;
} else {
struct intel_engine_cs *owner = rq->context->engine;
@@ -1184,12 +1187,11 @@ static void reset_active(struct i915_request *rq,
head = rq->tail;
else
head = active_request(ce->timeline, rq)->head;
- ce->ring->head = intel_ring_wrap(ce->ring, head);
- intel_ring_update_space(ce->ring);
+ head = intel_ring_wrap(ce->ring, head);
/* Scrub the context image to prevent replaying the previous batch */
restore_default_state(ce, engine);
- __execlists_update_reg_state(ce, engine);
+ __execlists_update_reg_state(ce, engine, head);
/* We've switched away, so this should be a no-op, but intent matters */
ce->lrc_desc |= CTX_DESC_FORCE_RESTORE;
@@ -1319,7 +1321,7 @@ static u64 execlists_update_context(struct i915_request *rq)
{
struct intel_context *ce = rq->context;
u64 desc = ce->lrc_desc;
- u32 tail;
+ u32 tail, prev;
/*
* WaIdleLiteRestore:bdw,skl
@@ -1332,9 +1334,15 @@ static u64 execlists_update_context(struct i915_request *rq)
* subsequent resubmissions (for lite restore). Should that fail us,
* and we try and submit the same tail again, force the context
* reload.
+ *
+ * If we need to return to a preempted context, we need to skip the
+ * lite-restore and force it to reload the RING_TAIL. Otherwise, the
+ * HW has a tendency to ignore us rewinding the TAIL to the end of
+ * an earlier request.
*/
tail = intel_ring_set_tail(rq->ring, rq->tail);
- if (unlikely(ce->lrc_reg_state[CTX_RING_TAIL] == tail))
+ prev = ce->lrc_reg_state[CTX_RING_TAIL];
+ if (unlikely(intel_ring_direction(rq->ring, tail, prev) <= 0))
desc |= CTX_DESC_FORCE_RESTORE;
ce->lrc_reg_state[CTX_RING_TAIL] = tail;
rq->tail = rq->wa_tail;
@@ -1535,7 +1543,8 @@ static bool can_merge_rq(const struct i915_request *prev,
return true;
if (unlikely((prev->fence.flags ^ next->fence.flags) &
- (I915_FENCE_FLAG_NOPREEMPT | I915_FENCE_FLAG_SENTINEL)))
+ (BIT(I915_FENCE_FLAG_NOPREEMPT) |
+ BIT(I915_FENCE_FLAG_SENTINEL))))
return false;
if (!can_merge_ctx(prev->context, next->context))
@@ -1591,16 +1600,10 @@ static void virtual_xfer_breadcrumbs(struct virtual_engine *ve,
spin_unlock(&old->breadcrumbs.irq_lock);
}
-static struct i915_request *
-last_active(const struct intel_engine_execlists *execlists)
-{
- struct i915_request * const *last = READ_ONCE(execlists->active);
-
- while (*last && i915_request_completed(*last))
- last++;
-
- return *last;
-}
+#define for_each_waiter(p__, rq__) \
+ list_for_each_entry_lockless(p__, \
+ &(rq__)->sched.waiters_list, \
+ wait_link)
static void defer_request(struct i915_request *rq, struct list_head * const pl)
{
@@ -1619,7 +1622,7 @@ static void defer_request(struct i915_request *rq, struct list_head * const pl)
GEM_BUG_ON(i915_request_is_active(rq));
list_move_tail(&rq->sched.link, pl);
- list_for_each_entry(p, &rq->sched.waiters_list, wait_link) {
+ for_each_waiter(p, rq) {
struct i915_request *w =
container_of(p->waiter, typeof(*w), sched);
@@ -1632,8 +1635,8 @@ static void defer_request(struct i915_request *rq, struct list_head * const pl)
!i915_request_completed(rq));
GEM_BUG_ON(i915_request_is_active(w));
- if (list_empty(&w->sched.link))
- continue; /* Not yet submitted; unready */
+ if (!i915_request_is_ready(w))
+ continue;
if (rq_prio(w) < rq_prio(rq))
continue;
@@ -1665,11 +1668,9 @@ need_timeslice(struct intel_engine_cs *engine, const struct i915_request *rq)
if (!intel_engine_has_timeslices(engine))
return false;
- if (list_is_last(&rq->sched.link, &engine->active.requests))
- return false;
-
- hint = max(rq_prio(list_next_entry(rq, sched.link)),
- engine->execlists.queue_priority_hint);
+ hint = engine->execlists.queue_priority_hint;
+ if (!list_is_last(&rq->sched.link, &engine->active.requests))
+ hint = max(hint, rq_prio(list_next_entry(rq, sched.link)));
return hint >= effective_prio(rq);
}
@@ -1711,16 +1712,26 @@ static void set_timeslice(struct intel_engine_cs *engine)
set_timer_ms(&engine->execlists.timer, active_timeslice(engine));
}
+static void start_timeslice(struct intel_engine_cs *engine)
+{
+ struct intel_engine_execlists *execlists = &engine->execlists;
+
+ execlists->switch_priority_hint = execlists->queue_priority_hint;
+
+ if (timer_pending(&execlists->timer))
+ return;
+
+ set_timer_ms(&execlists->timer, timeslice(engine));
+}
+
static void record_preemption(struct intel_engine_execlists *execlists)
{
(void)I915_SELFTEST_ONLY(execlists->preempt_hang.count++);
}
-static unsigned long active_preempt_timeout(struct intel_engine_cs *engine)
+static unsigned long active_preempt_timeout(struct intel_engine_cs *engine,
+ const struct i915_request *rq)
{
- struct i915_request *rq;
-
- rq = last_active(&engine->execlists);
if (!rq)
return 0;
@@ -1731,13 +1742,14 @@ static unsigned long active_preempt_timeout(struct intel_engine_cs *engine)
return READ_ONCE(engine->props.preempt_timeout_ms);
}
-static void set_preempt_timeout(struct intel_engine_cs *engine)
+static void set_preempt_timeout(struct intel_engine_cs *engine,
+ const struct i915_request *rq)
{
if (!intel_engine_has_preempt_reset(engine))
return;
set_timer_ms(&engine->execlists.preempt,
- active_preempt_timeout(engine));
+ active_preempt_timeout(engine, rq));
}
static inline void clear_ports(struct i915_request **ports, int count)
@@ -1750,6 +1762,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
struct intel_engine_execlists * const execlists = &engine->execlists;
struct i915_request **port = execlists->pending;
struct i915_request ** const last_port = port + execlists->port_mask;
+ struct i915_request * const *active;
struct i915_request *last;
struct rb_node *rb;
bool submit = false;
@@ -1804,7 +1817,10 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
* i.e. we will retrigger preemption following the ack in case
* of trouble.
*/
- last = last_active(execlists);
+ active = READ_ONCE(execlists->active);
+ while ((last = *active) && i915_request_completed(last))
+ active++;
+
if (last) {
if (need_preempt(engine, last, rb)) {
ENGINE_TRACE(engine,
@@ -1831,14 +1847,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
*/
__unwind_incomplete_requests(engine);
- /*
- * If we need to return to the preempted context, we
- * need to skip the lite-restore and force it to
- * reload the RING_TAIL. Otherwise, the HW has a
- * tendency to ignore us rewinding the TAIL to the
- * end of an earlier request.
- */
- last->context->lrc_desc |= CTX_DESC_FORCE_RESTORE;
last = NULL;
} else if (need_timeslice(engine, last) &&
timer_expired(&engine->execlists.timer)) {
@@ -1882,11 +1890,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
* Even if ELSP[1] is occupied and not worthy
* of timeslices, our queue might be.
*/
- if (!execlists->timer.expires &&
- need_timeslice(engine, last))
- set_timer_ms(&execlists->timer,
- timeslice(engine));
-
+ start_timeslice(engine);
return;
}
}
@@ -1921,7 +1925,8 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
if (last && !can_merge_rq(last, rq)) {
spin_unlock(&ve->base.active.lock);
- return; /* leave this for another */
+ start_timeslice(engine);
+ return; /* leave this for another sibling */
}
ENGINE_TRACE(engine,
@@ -2097,7 +2102,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
* Skip if we ended up with exactly the same set of requests,
* e.g. trying to timeslice a pair of ordered contexts
*/
- if (!memcmp(execlists->active, execlists->pending,
+ if (!memcmp(active, execlists->pending,
(port - execlists->pending + 1) * sizeof(*port))) {
do
execlists_schedule_out(fetch_and_zero(port));
@@ -2108,7 +2113,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
clear_ports(port + 1, last_port - port);
execlists_submit_ports(engine);
- set_preempt_timeout(engine);
+ set_preempt_timeout(engine, *active);
} else {
skip_submit:
ring_set_paused(engine, 0);
@@ -2351,6 +2356,310 @@ static void __execlists_submission_tasklet(struct intel_engine_cs *const engine)
}
}
+static void __execlists_hold(struct i915_request *rq)
+{
+ LIST_HEAD(list);
+
+ do {
+ struct i915_dependency *p;
+
+ if (i915_request_is_active(rq))
+ __i915_request_unsubmit(rq);
+
+ RQ_TRACE(rq, "on hold\n");
+ clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
+ list_move_tail(&rq->sched.link, &rq->engine->active.hold);
+ i915_request_set_hold(rq);
+
+ list_for_each_entry(p, &rq->sched.waiters_list, wait_link) {
+ struct i915_request *w =
+ container_of(p->waiter, typeof(*w), sched);
+
+ /* Leave semaphores spinning on the other engines */
+ if (w->engine != rq->engine)
+ continue;
+
+ if (!i915_request_is_ready(w))
+ continue;
+
+ if (i915_request_completed(w))
+ continue;
+
+ if (i915_request_on_hold(rq))
+ continue;
+
+ list_move_tail(&w->sched.link, &list);
+ }
+
+ rq = list_first_entry_or_null(&list, typeof(*rq), sched.link);
+ } while (rq);
+}
+
+static bool execlists_hold(struct intel_engine_cs *engine,
+ struct i915_request *rq)
+{
+ spin_lock_irq(&engine->active.lock);
+
+ if (i915_request_completed(rq)) { /* too late! */
+ rq = NULL;
+ goto unlock;
+ }
+
+ if (rq->engine != engine) { /* preempted virtual engine */
+ struct virtual_engine *ve = to_virtual_engine(rq->engine);
+
+ /*
+ * intel_context_inflight() is only protected by virtue
+ * of process_csb() being called only by the tasklet (or
+ * directly from inside reset while the tasklet is suspended).
+ * Assert that neither of those are allowed to run while we
+ * poke at the request queues.
+ */
+ GEM_BUG_ON(!reset_in_progress(&engine->execlists));
+
+ /*
+ * An unsubmitted request along a virtual engine will
+ * remain on the active (this) engine until we are able
+ * to process the context switch away (and so mark the
+ * context as no longer in flight). That cannot have happened
+ * yet, otherwise we would not be hanging!
+ */
+ spin_lock(&ve->base.active.lock);
+ GEM_BUG_ON(intel_context_inflight(rq->context) != engine);
+ GEM_BUG_ON(ve->request != rq);
+ ve->request = NULL;
+ spin_unlock(&ve->base.active.lock);
+ i915_request_put(rq);
+
+ rq->engine = engine;
+ }
+
+ /*
+ * Transfer this request onto the hold queue to prevent it
+ * being resumbitted to HW (and potentially completed) before we have
+ * released it. Since we may have already submitted following
+ * requests, we need to remove those as well.
+ */
+ GEM_BUG_ON(i915_request_on_hold(rq));
+ GEM_BUG_ON(rq->engine != engine);
+ __execlists_hold(rq);
+
+unlock:
+ spin_unlock_irq(&engine->active.lock);
+ return rq;
+}
+
+static bool hold_request(const struct i915_request *rq)
+{
+ struct i915_dependency *p;
+
+ /*
+ * If one of our ancestors is on hold, we must also be on hold,
+ * otherwise we will bypass it and execute before it.
+ */
+ list_for_each_entry(p, &rq->sched.signalers_list, signal_link) {
+ const struct i915_request *s =
+ container_of(p->signaler, typeof(*s), sched);
+
+ if (s->engine != rq->engine)
+ continue;
+
+ if (i915_request_on_hold(s))
+ return true;
+ }
+
+ return false;
+}
+
+static void __execlists_unhold(struct i915_request *rq)
+{
+ LIST_HEAD(list);
+
+ do {
+ struct i915_dependency *p;
+
+ GEM_BUG_ON(!i915_request_on_hold(rq));
+ GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit));
+
+ i915_request_clear_hold(rq);
+ list_move_tail(&rq->sched.link,
+ i915_sched_lookup_priolist(rq->engine,
+ rq_prio(rq)));
+ set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
+ RQ_TRACE(rq, "hold release\n");
+
+ /* Also release any children on this engine that are ready */
+ list_for_each_entry(p, &rq->sched.waiters_list, wait_link) {
+ struct i915_request *w =
+ container_of(p->waiter, typeof(*w), sched);
+
+ if (w->engine != rq->engine)
+ continue;
+
+ if (!i915_request_on_hold(rq))
+ continue;
+
+ /* Check that no other parents are also on hold */
+ if (hold_request(rq))
+ continue;
+
+ list_move_tail(&w->sched.link, &list);
+ }
+
+ rq = list_first_entry_or_null(&list, typeof(*rq), sched.link);
+ } while (rq);
+}
+
+static void execlists_unhold(struct intel_engine_cs *engine,
+ struct i915_request *rq)
+{
+ spin_lock_irq(&engine->active.lock);
+
+ /*
+ * Move this request back to the priority queue, and all of its
+ * children and grandchildren that were suspended along with it.
+ */
+ __execlists_unhold(rq);
+
+ if (rq_prio(rq) > engine->execlists.queue_priority_hint) {
+ engine->execlists.queue_priority_hint = rq_prio(rq);
+ tasklet_hi_schedule(&engine->execlists.tasklet);
+ }
+
+ spin_unlock_irq(&engine->active.lock);
+}
+
+struct execlists_capture {
+ struct work_struct work;
+ struct i915_request *rq;
+ struct i915_gpu_coredump *error;
+};
+
+static void execlists_capture_work(struct work_struct *work)
+{
+ struct execlists_capture *cap = container_of(work, typeof(*cap), work);
+ const gfp_t gfp = GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN;
+ struct intel_engine_cs *engine = cap->rq->engine;
+ struct intel_gt_coredump *gt = cap->error->gt;
+ struct intel_engine_capture_vma *vma;
+
+ /* Compress all the objects attached to the request, slow! */
+ vma = intel_engine_coredump_add_request(gt->engine, cap->rq, gfp);
+ if (vma) {
+ struct i915_vma_compress *compress =
+ i915_vma_capture_prepare(gt);
+
+ intel_engine_coredump_add_vma(gt->engine, vma, compress);
+ i915_vma_capture_finish(gt, compress);
+ }
+
+ gt->simulated = gt->engine->simulated;
+ cap->error->simulated = gt->simulated;
+
+ /* Publish the error state, and announce it to the world */
+ i915_error_state_store(cap->error);
+ i915_gpu_coredump_put(cap->error);
+
+ /* Return this request and all that depend upon it for signaling */
+ execlists_unhold(engine, cap->rq);
+ i915_request_put(cap->rq);
+
+ kfree(cap);
+}
+
+static struct execlists_capture *capture_regs(struct intel_engine_cs *engine)
+{
+ const gfp_t gfp = GFP_ATOMIC | __GFP_NOWARN;
+ struct execlists_capture *cap;
+
+ cap = kmalloc(sizeof(*cap), gfp);
+ if (!cap)
+ return NULL;
+
+ cap->error = i915_gpu_coredump_alloc(engine->i915, gfp);
+ if (!cap->error)
+ goto err_cap;
+
+ cap->error->gt = intel_gt_coredump_alloc(engine->gt, gfp);
+ if (!cap->error->gt)
+ goto err_gpu;
+
+ cap->error->gt->engine = intel_engine_coredump_alloc(engine, gfp);
+ if (!cap->error->gt->engine)
+ goto err_gt;
+
+ return cap;
+
+err_gt:
+ kfree(cap->error->gt);
+err_gpu:
+ kfree(cap->error);
+err_cap:
+ kfree(cap);
+ return NULL;
+}
+
+static bool execlists_capture(struct intel_engine_cs *engine)
+{
+ struct execlists_capture *cap;
+
+ if (!IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR))
+ return true;
+
+ /*
+ * We need to _quickly_ capture the engine state before we reset.
+ * We are inside an atomic section (softirq) here and we are delaying
+ * the forced preemption event.
+ */
+ cap = capture_regs(engine);
+ if (!cap)
+ return true;
+
+ cap->rq = execlists_active(&engine->execlists);
+ GEM_BUG_ON(!cap->rq);
+
+ rcu_read_lock();
+ cap->rq = active_request(cap->rq->context->timeline, cap->rq);
+ cap->rq = i915_request_get_rcu(cap->rq);
+ rcu_read_unlock();
+ if (!cap->rq)
+ goto err_free;
+
+ /*
+ * Remove the request from the execlists queue, and take ownership
+ * of the request. We pass it to our worker who will _slowly_ compress
+ * all the pages the _user_ requested for debugging their batch, after
+ * which we return it to the queue for signaling.
+ *
+ * By removing them from the execlists queue, we also remove the
+ * requests from being processed by __unwind_incomplete_requests()
+ * during the intel_engine_reset(), and so they will *not* be replayed
+ * afterwards.
+ *
+ * Note that because we have not yet reset the engine at this point,
+ * it is possible for the request that we have identified as being
+ * guilty, did in fact complete and we will then hit an arbitration
+ * point allowing the outstanding preemption to succeed. The likelihood
+ * of that is very low (as capturing of the engine registers should be
+ * fast enough to run inside an irq-off atomic section!), so we will
+ * simply hold that request accountable for being non-preemptible
+ * long enough to force the reset.
+ */
+ if (!execlists_hold(engine, cap->rq))
+ goto err_rq;
+
+ INIT_WORK(&cap->work, execlists_capture_work);
+ schedule_work(&cap->work);
+ return true;
+
+err_rq:
+ i915_request_put(cap->rq);
+err_free:
+ i915_gpu_coredump_put(cap->error);
+ kfree(cap);
+ return false;
+}
+
static noinline void preempt_reset(struct intel_engine_cs *engine)
{
const unsigned int bit = I915_RESET_ENGINE + engine->id;
@@ -2368,7 +2677,12 @@ static noinline void preempt_reset(struct intel_engine_cs *engine)
ENGINE_TRACE(engine, "preempt timeout %lu+%ums\n",
READ_ONCE(engine->props.preempt_timeout_ms),
jiffies_to_msecs(jiffies - engine->execlists.preempt.expires));
- intel_engine_reset(engine, "preemption time out");
+
+ ring_set_paused(engine, 1); /* Freeze the current request in place */
+ if (execlists_capture(engine))
+ intel_engine_reset(engine, "preemption time out");
+ else
+ ring_set_paused(engine, 0);
tasklet_enable(&engine->execlists.tasklet);
clear_and_wake_up_bit(bit, lock);
@@ -2430,11 +2744,12 @@ static void execlists_preempt(struct timer_list *timer)
}
static void queue_request(struct intel_engine_cs *engine,
- struct i915_sched_node *node,
- int prio)
+ struct i915_request *rq)
{
- GEM_BUG_ON(!list_empty(&node->link));
- list_add_tail(&node->link, i915_sched_lookup_priolist(engine, prio));
+ GEM_BUG_ON(!list_empty(&rq->sched.link));
+ list_add_tail(&rq->sched.link,
+ i915_sched_lookup_priolist(engine, rq_prio(rq)));
+ set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
}
static void __submit_queue_imm(struct intel_engine_cs *engine)
@@ -2462,6 +2777,13 @@ static void submit_queue(struct intel_engine_cs *engine,
__submit_queue_imm(engine);
}
+static bool ancestor_on_hold(const struct intel_engine_cs *engine,
+ const struct i915_request *rq)
+{
+ GEM_BUG_ON(i915_request_on_hold(rq));
+ return !list_empty(&engine->active.hold) && hold_request(rq);
+}
+
static void execlists_submit_request(struct i915_request *request)
{
struct intel_engine_cs *engine = request->engine;
@@ -2470,12 +2792,17 @@ static void execlists_submit_request(struct i915_request *request)
/* Will be called from irq-context when using foreign fences. */
spin_lock_irqsave(&engine->active.lock, flags);
- queue_request(engine, &request->sched, rq_prio(request));
+ if (unlikely(ancestor_on_hold(engine, request))) {
+ list_add_tail(&request->sched.link, &engine->active.hold);
+ i915_request_set_hold(request);
+ } else {
+ queue_request(engine, request);
- GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
- GEM_BUG_ON(list_empty(&request->sched.link));
+ GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
+ GEM_BUG_ON(list_empty(&request->sched.link));
- submit_queue(engine, request);
+ submit_queue(engine, request);
+ }
spin_unlock_irqrestore(&engine->active.lock, flags);
}
@@ -2531,21 +2858,21 @@ static void execlists_context_unpin(struct intel_context *ce)
ce->engine);
i915_gem_object_unpin_map(ce->state->obj);
- intel_ring_reset(ce->ring, ce->ring->tail);
}
static void
__execlists_update_reg_state(const struct intel_context *ce,
- const struct intel_engine_cs *engine)
+ const struct intel_engine_cs *engine,
+ u32 head)
{
struct intel_ring *ring = ce->ring;
u32 *regs = ce->lrc_reg_state;
- GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->head));
+ GEM_BUG_ON(!intel_ring_offset_valid(ring, head));
GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->tail));
regs[CTX_RING_START] = i915_ggtt_offset(ring->vma);
- regs[CTX_RING_HEAD] = ring->head;
+ regs[CTX_RING_HEAD] = head;
regs[CTX_RING_TAIL] = ring->tail;
/* RPCS */
@@ -2574,7 +2901,7 @@ __execlists_context_pin(struct intel_context *ce,
ce->lrc_desc = lrc_descriptor(ce, engine) | CTX_DESC_FORCE_RESTORE;
ce->lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE;
- __execlists_update_reg_state(ce, engine);
+ __execlists_update_reg_state(ce, engine, ce->ring->tail);
return 0;
}
@@ -2615,7 +2942,7 @@ static void execlists_context_reset(struct intel_context *ce)
/* Scrub away the garbage */
execlists_init_reg_state(ce->lrc_reg_state,
ce, ce->engine, ce->ring, true);
- __execlists_update_reg_state(ce, ce->engine);
+ __execlists_update_reg_state(ce, ce->engine, ce->ring->tail);
ce->lrc_desc |= CTX_DESC_FORCE_RESTORE;
}
@@ -3170,6 +3497,7 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
struct intel_engine_execlists * const execlists = &engine->execlists;
struct intel_context *ce;
struct i915_request *rq;
+ u32 head;
mb(); /* paranoia: read the CSB pointers from after the reset */
clflush(execlists->csb_write);
@@ -3197,15 +3525,15 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
if (i915_request_completed(rq)) {
/* Idle context; tidy up the ring so we can restart afresh */
- ce->ring->head = intel_ring_wrap(ce->ring, rq->tail);
+ head = intel_ring_wrap(ce->ring, rq->tail);
goto out_replay;
}
/* Context has requests still in-flight; it should not be idle! */
GEM_BUG_ON(i915_active_is_idle(&ce->active));
rq = active_request(ce->timeline, rq);
- ce->ring->head = intel_ring_wrap(ce->ring, rq->head);
- GEM_BUG_ON(ce->ring->head == ce->ring->tail);
+ head = intel_ring_wrap(ce->ring, rq->head);
+ GEM_BUG_ON(head == ce->ring->tail);
/*
* If this request hasn't started yet, e.g. it is waiting on a
@@ -3250,10 +3578,9 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
out_replay:
ENGINE_TRACE(engine, "replay {head:%04x, tail:%04x}\n",
- ce->ring->head, ce->ring->tail);
- intel_ring_update_space(ce->ring);
+ head, ce->ring->tail);
__execlists_reset_reg_state(ce, engine);
- __execlists_update_reg_state(ce, engine);
+ __execlists_update_reg_state(ce, engine, head);
ce->lrc_desc |= CTX_DESC_FORCE_RESTORE; /* paranoid: GPU was reset! */
unwind:
@@ -3325,6 +3652,10 @@ static void execlists_reset_cancel(struct intel_engine_cs *engine)
i915_priolist_free(p);
}
+ /* On-hold requests will be flushed to timeline upon their release */
+ list_for_each_entry(rq, &engine->active.hold, sched.link)
+ mark_eio(rq);
+
/* Cancel all attached virtual engines */
while ((rb = rb_first_cached(&execlists->virtual))) {
struct virtual_engine *ve =
@@ -3669,26 +4000,6 @@ static int gen12_emit_flush_render(struct i915_request *request,
*cs++ = preparser_disable(false);
intel_ring_advance(request, cs);
-
- /*
- * Wa_1604544889:tgl
- */
- if (IS_TGL_REVID(request->i915, TGL_REVID_A0, TGL_REVID_A0)) {
- flags = 0;
- flags |= PIPE_CONTROL_CS_STALL;
- flags |= PIPE_CONTROL_HDC_PIPELINE_FLUSH;
-
- flags |= PIPE_CONTROL_STORE_DATA_INDEX;
- flags |= PIPE_CONTROL_QW_WRITE;
-
- cs = intel_ring_begin(request, 6);
- if (IS_ERR(cs))
- return PTR_ERR(cs);
-
- cs = gen8_emit_pipe_control(cs, flags,
- LRC_PPHWSP_SCRATCH_ADDR);
- intel_ring_advance(request, cs);
- }
}
return 0;
@@ -4892,10 +5203,7 @@ void intel_lr_context_reset(struct intel_engine_cs *engine,
restore_default_state(ce, engine);
/* Rerun the request; its payload has been neutered (if guilty). */
- ce->ring->head = head;
- intel_ring_update_space(ce->ring);
-
- __execlists_update_reg_state(ce, engine);
+ __execlists_update_reg_state(ce, engine, head);
}
bool
diff --git a/drivers/gpu/drm/i915/gt/intel_ring.c b/drivers/gpu/drm/i915/gt/intel_ring.c
index 374b28f..6ff803f 100644
--- a/drivers/gpu/drm/i915/gt/intel_ring.c
+++ b/drivers/gpu/drm/i915/gt/intel_ring.c
@@ -145,6 +145,7 @@ intel_engine_create_ring(struct intel_engine_cs *engine, int size)
kref_init(&ring->ref);
ring->size = size;
+ ring->wrap = BITS_PER_TYPE(ring->size) - ilog2(size);
/*
* Workaround an erratum on the i830 which causes a hang if
diff --git a/drivers/gpu/drm/i915/gt/intel_ring.h b/drivers/gpu/drm/i915/gt/intel_ring.h
index ea2839d..5bdce24 100644
--- a/drivers/gpu/drm/i915/gt/intel_ring.h
+++ b/drivers/gpu/drm/i915/gt/intel_ring.h
@@ -56,6 +56,14 @@ static inline u32 intel_ring_wrap(const struct intel_ring *ring, u32 pos)
return pos & (ring->size - 1);
}
+static inline int intel_ring_direction(const struct intel_ring *ring,
+ u32 next, u32 prev)
+{
+ typecheck(typeof(ring->size), next);
+ typecheck(typeof(ring->size), prev);
+ return (next - prev) << ring->wrap;
+}
+
static inline bool
intel_ring_offset_valid(const struct intel_ring *ring,
unsigned int pos)
diff --git a/drivers/gpu/drm/i915/gt/intel_ring_types.h b/drivers/gpu/drm/i915/gt/intel_ring_types.h
index d9f17f3..1a189ea 100644
--- a/drivers/gpu/drm/i915/gt/intel_ring_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_ring_types.h
@@ -39,12 +39,13 @@ struct intel_ring {
*/
atomic_t pin_count;
- u32 head;
- u32 tail;
- u32 emit;
+ u32 head; /* updated during retire, loosely tracks RING_HEAD */
+ u32 tail; /* updated on submission, used for RING_TAIL */
+ u32 emit; /* updated during request construction */
u32 space;
u32 size;
+ u32 wrap;
u32 effective_size;
};
diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c
index 8771652..d8d9f11 100644
--- a/drivers/gpu/drm/i915/gt/intel_timeline.c
+++ b/drivers/gpu/drm/i915/gt/intel_timeline.c
@@ -192,11 +192,15 @@ static void cacheline_release(struct intel_timeline_cacheline *cl)
static void cacheline_free(struct intel_timeline_cacheline *cl)
{
+ if (!i915_active_acquire_if_busy(&cl->active)) {
+ __idle_cacheline_free(cl);
+ return;
+ }
+
GEM_BUG_ON(ptr_test_bit(cl->vaddr, CACHELINE_FREE));
cl->vaddr = ptr_set_bit(cl->vaddr, CACHELINE_FREE);
- if (i915_active_is_idle(&cl->active))
- __idle_cacheline_free(cl);
+ i915_active_release(&cl->active);
}
int intel_timeline_init(struct intel_timeline *timeline,
diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c
index 4e292d4..6c2f846 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -575,24 +575,19 @@ static void icl_ctx_workarounds_init(struct intel_engine_cs *engine,
static void tgl_ctx_workarounds_init(struct intel_engine_cs *engine,
struct i915_wa_list *wal)
{
- u32 val;
-
/* Wa_1409142259:tgl */
WA_SET_BIT_MASKED(GEN11_COMMON_SLICE_CHICKEN3,
GEN12_DISABLE_CPS_AWARE_COLOR_PIPE);
- /* Wa_1604555607:tgl */
- val = intel_uncore_read(engine->uncore, FF_MODE2);
- val &= ~FF_MODE2_TDS_TIMER_MASK;
- val |= FF_MODE2_TDS_TIMER_128;
/*
- * FIXME: FF_MODE2 register is not readable till TGL B0. We can
- * enable verification of WA from the later steppings, which enables
- * the read of FF_MODE2.
+ * Wa_1604555607:gen12 and Wa_1608008084:gen12
+ * FF_MODE2 register will return the wrong value when read. The default
+ * value for this register is zero for all fields and there are no bit
+ * masks. So instead of doing a RMW we should just write the TDS timer
+ * value for Wa_1604555607.
*/
- wa_add(wal, FF_MODE2, FF_MODE2_TDS_TIMER_MASK, val,
- IS_TGL_REVID(engine->i915, TGL_REVID_A0, TGL_REVID_A0) ? 0 :
- FF_MODE2_TDS_TIMER_MASK);
+ wa_add(wal, FF_MODE2, FF_MODE2_TDS_TIMER_MASK,
+ FF_MODE2_TDS_TIMER_128, 0);
}
static void
@@ -1534,15 +1529,34 @@ create_scratch(struct i915_address_space *vm, int count)
return ERR_PTR(err);
}
+static const struct {
+ u32 start;
+ u32 end;
+} mcr_ranges_gen8[] = {
+ { .start = 0x5500, .end = 0x55ff },
+ { .start = 0x7000, .end = 0x7fff },
+ { .start = 0x9400, .end = 0x97ff },
+ { .start = 0xb000, .end = 0xb3ff },
+ { .start = 0xe000, .end = 0xe7ff },
+ {},
+};
+
static bool mcr_range(struct drm_i915_private *i915, u32 offset)
{
+ int i;
+
+ if (INTEL_GEN(i915) < 8)
+ return false;
+
/*
- * Registers in this range are affected by the MCR selector
+ * Registers in these ranges are affected by the MCR selector
* which only controls CPU initiated MMIO. Routing does not
* work for CS access so we cannot verify them on this path.
*/
- if (INTEL_GEN(i915) >= 8 && (offset >= 0xb000 && offset <= 0xb4ff))
- return true;
+ for (i = 0; mcr_ranges_gen8[i].start; i++)
+ if (offset >= mcr_ranges_gen8[i].start &&
+ offset <= mcr_ranges_gen8[i].end)
+ return true;
return false;
}
diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c
index a560b7e..f2806381 100644
--- a/drivers/gpu/drm/i915/gt/mock_engine.c
+++ b/drivers/gpu/drm/i915/gt/mock_engine.c
@@ -59,11 +59,26 @@ static struct intel_ring *mock_ring(struct intel_engine_cs *engine)
ring->vaddr = (void *)(ring + 1);
atomic_set(&ring->pin_count, 1);
+ ring->vma = i915_vma_alloc();
+ if (!ring->vma) {
+ kfree(ring);
+ return NULL;
+ }
+ i915_active_init(&ring->vma->active, NULL, NULL);
+
intel_ring_update_space(ring);
return ring;
}
+static void mock_ring_free(struct intel_ring *ring)
+{
+ i915_active_fini(&ring->vma->active);
+ i915_vma_free(ring->vma);
+
+ kfree(ring);
+}
+
static struct i915_request *first_request(struct mock_engine *engine)
{
return list_first_entry_or_null(&engine->hw_queue,
@@ -121,7 +136,7 @@ static void mock_context_destroy(struct kref *ref)
GEM_BUG_ON(intel_context_is_pinned(ce));
if (test_bit(CONTEXT_ALLOC_BIT, &ce->flags)) {
- kfree(ce->ring);
+ mock_ring_free(ce->ring);
mock_timeline_unpin(ce->timeline);
}
diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
index 15cda02..b292f8c 100644
--- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
@@ -186,7 +186,7 @@ static int live_unlite_restore(struct intel_gt *gt, int prio)
}
GEM_BUG_ON(!ce[1]->ring->size);
intel_ring_reset(ce[1]->ring, ce[1]->ring->size / 2);
- __execlists_update_reg_state(ce[1], engine);
+ __execlists_update_reg_state(ce[1], engine, ce[1]->ring->head);
rq[0] = igt_spinner_create_request(&spin, ce[0], MI_ARB_CHECK);
if (IS_ERR(rq[0])) {
@@ -285,6 +285,107 @@ static int live_unlite_preempt(void *arg)
return live_unlite_restore(arg, I915_USER_PRIORITY(I915_PRIORITY_MAX));
}
+static int live_hold_reset(void *arg)
+{
+ struct intel_gt *gt = arg;
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ struct igt_spinner spin;
+ int err = 0;
+
+ /*
+ * In order to support offline error capture for fast preempt reset,
+ * we need to decouple the guilty request and ensure that it and its
+ * descendents are not executed while the capture is in progress.
+ */
+
+ if (!intel_has_reset_engine(gt))
+ return 0;
+
+ if (igt_spinner_init(&spin, gt))
+ return -ENOMEM;
+
+ for_each_engine(engine, gt, id) {
+ struct intel_context *ce;
+ unsigned long heartbeat;
+ struct i915_request *rq;
+
+ ce = intel_context_create(engine);
+ if (IS_ERR(ce)) {
+ err = PTR_ERR(ce);
+ break;
+ }
+
+ engine_heartbeat_disable(engine, &heartbeat);
+
+ rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ goto out;
+ }
+ i915_request_add(rq);
+
+ if (!igt_wait_for_spinner(&spin, rq)) {
+ intel_gt_set_wedged(gt);
+ err = -ETIME;
+ goto out;
+ }
+
+ /* We have our request executing, now remove it and reset */
+
+ if (test_and_set_bit(I915_RESET_ENGINE + id,
+ >->reset.flags)) {
+ intel_gt_set_wedged(gt);
+ err = -EBUSY;
+ goto out;
+ }
+ tasklet_disable(&engine->execlists.tasklet);
+
+ engine->execlists.tasklet.func(engine->execlists.tasklet.data);
+ GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
+
+ i915_request_get(rq);
+ execlists_hold(engine, rq);
+ GEM_BUG_ON(!i915_request_on_hold(rq));
+
+ intel_engine_reset(engine, NULL);
+ GEM_BUG_ON(rq->fence.error != -EIO);
+
+ tasklet_enable(&engine->execlists.tasklet);
+ clear_and_wake_up_bit(I915_RESET_ENGINE + id,
+ >->reset.flags);
+
+ /* Check that we do not resubmit the held request */
+ if (!i915_request_wait(rq, 0, HZ / 5)) {
+ pr_err("%s: on hold request completed!\n",
+ engine->name);
+ i915_request_put(rq);
+ err = -EIO;
+ goto out;
+ }
+ GEM_BUG_ON(!i915_request_on_hold(rq));
+
+ /* But is resubmitted on release */
+ execlists_unhold(engine, rq);
+ if (i915_request_wait(rq, 0, HZ / 5) < 0) {
+ pr_err("%s: held request did not complete!\n",
+ engine->name);
+ intel_gt_set_wedged(gt);
+ err = -ETIME;
+ }
+ i915_request_put(rq);
+
+out:
+ engine_heartbeat_enable(engine, heartbeat);
+ intel_context_put(ce);
+ if (err)
+ break;
+ }
+
+ igt_spinner_fini(&spin);
+ return err;
+}
+
static int
emit_semaphore_chain(struct i915_request *rq, struct i915_vma *vma, int idx)
{
@@ -3309,12 +3410,168 @@ static int live_virtual_bond(void *arg)
return 0;
}
+static int reset_virtual_engine(struct intel_gt *gt,
+ struct intel_engine_cs **siblings,
+ unsigned int nsibling)
+{
+ struct intel_engine_cs *engine;
+ struct intel_context *ve;
+ unsigned long *heartbeat;
+ struct igt_spinner spin;
+ struct i915_request *rq;
+ unsigned int n;
+ int err = 0;
+
+ /*
+ * In order to support offline error capture for fast preempt reset,
+ * we need to decouple the guilty request and ensure that it and its
+ * descendents are not executed while the capture is in progress.
+ */
+
+ heartbeat = kmalloc_array(nsibling, sizeof(*heartbeat), GFP_KERNEL);
+ if (!heartbeat)
+ return -ENOMEM;
+
+ if (igt_spinner_init(&spin, gt)) {
+ err = -ENOMEM;
+ goto out_free;
+ }
+
+ ve = intel_execlists_create_virtual(siblings, nsibling);
+ if (IS_ERR(ve)) {
+ err = PTR_ERR(ve);
+ goto out_spin;
+ }
+
+ for (n = 0; n < nsibling; n++)
+ engine_heartbeat_disable(siblings[n], &heartbeat[n]);
+
+ rq = igt_spinner_create_request(&spin, ve, MI_ARB_CHECK);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ goto out_heartbeat;
+ }
+ i915_request_add(rq);
+
+ if (!igt_wait_for_spinner(&spin, rq)) {
+ intel_gt_set_wedged(gt);
+ err = -ETIME;
+ goto out_heartbeat;
+ }
+
+ engine = rq->engine;
+ GEM_BUG_ON(engine == ve->engine);
+
+ /* Take ownership of the reset and tasklet */
+ if (test_and_set_bit(I915_RESET_ENGINE + engine->id,
+ >->reset.flags)) {
+ intel_gt_set_wedged(gt);
+ err = -EBUSY;
+ goto out_heartbeat;
+ }
+ tasklet_disable(&engine->execlists.tasklet);
+
+ engine->execlists.tasklet.func(engine->execlists.tasklet.data);
+ GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
+
+ /* Fake a preemption event; failed of course */
+ spin_lock_irq(&engine->active.lock);
+ __unwind_incomplete_requests(engine);
+ spin_unlock_irq(&engine->active.lock);
+ GEM_BUG_ON(rq->engine != ve->engine);
+
+ /* Reset the engine while keeping our active request on hold */
+ execlists_hold(engine, rq);
+ GEM_BUG_ON(!i915_request_on_hold(rq));
+
+ intel_engine_reset(engine, NULL);
+ GEM_BUG_ON(rq->fence.error != -EIO);
+
+ /* Release our grasp on the engine, letting CS flow again */
+ tasklet_enable(&engine->execlists.tasklet);
+ clear_and_wake_up_bit(I915_RESET_ENGINE + engine->id, >->reset.flags);
+
+ /* Check that we do not resubmit the held request */
+ i915_request_get(rq);
+ if (!i915_request_wait(rq, 0, HZ / 5)) {
+ pr_err("%s: on hold request completed!\n",
+ engine->name);
+ intel_gt_set_wedged(gt);
+ err = -EIO;
+ goto out_rq;
+ }
+ GEM_BUG_ON(!i915_request_on_hold(rq));
+
+ /* But is resubmitted on release */
+ execlists_unhold(engine, rq);
+ if (i915_request_wait(rq, 0, HZ / 5) < 0) {
+ pr_err("%s: held request did not complete!\n",
+ engine->name);
+ intel_gt_set_wedged(gt);
+ err = -ETIME;
+ }
+
+out_rq:
+ i915_request_put(rq);
+out_heartbeat:
+ for (n = 0; n < nsibling; n++)
+ engine_heartbeat_enable(siblings[n], heartbeat[n]);
+
+ intel_context_put(ve);
+out_spin:
+ igt_spinner_fini(&spin);
+out_free:
+ kfree(heartbeat);
+ return err;
+}
+
+static int live_virtual_reset(void *arg)
+{
+ struct intel_gt *gt = arg;
+ struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
+ unsigned int class, inst;
+
+ /*
+ * Check that we handle a reset event within a virtual engine.
+ * Only the physical engine is reset, but we have to check the flow
+ * of the virtual requests around the reset, and make sure it is not
+ * forgotten.
+ */
+
+ if (USES_GUC_SUBMISSION(gt->i915))
+ return 0;
+
+ if (!intel_has_reset_engine(gt))
+ return 0;
+
+ for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
+ int nsibling, err;
+
+ nsibling = 0;
+ for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
+ if (!gt->engine_class[class][inst])
+ continue;
+
+ siblings[nsibling++] = gt->engine_class[class][inst];
+ }
+ if (nsibling < 2)
+ continue;
+
+ err = reset_virtual_engine(gt, siblings, nsibling);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
int intel_execlists_live_selftests(struct drm_i915_private *i915)
{
static const struct i915_subtest tests[] = {
SUBTEST(live_sanitycheck),
SUBTEST(live_unlite_switch),
SUBTEST(live_unlite_preempt),
+ SUBTEST(live_hold_reset),
SUBTEST(live_timeslice_preempt),
SUBTEST(live_timeslice_queue),
SUBTEST(live_busywait_preempt),
@@ -3333,6 +3590,7 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915)
SUBTEST(live_virtual_mask),
SUBTEST(live_virtual_preserved),
SUBTEST(live_virtual_bond),
+ SUBTEST(live_virtual_reset),
};
if (!HAS_EXECLISTS(i915))
diff --git a/drivers/gpu/drm/i915/gvt/display.c b/drivers/gpu/drm/i915/gvt/display.c
index e1c313d..a62bdf9 100644
--- a/drivers/gpu/drm/i915/gvt/display.c
+++ b/drivers/gpu/drm/i915/gvt/display.c
@@ -457,7 +457,8 @@ void intel_vgpu_emulate_hotplug(struct intel_vgpu *vgpu, bool connected)
struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
/* TODO: add more platforms support */
- if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) {
+ if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv) ||
+ IS_COFFEELAKE(dev_priv)) {
if (connected) {
vgpu_vreg_t(vgpu, SFUSE_STRAP) |=
SFUSE_STRAP_DDID_DETECTED;
diff --git a/drivers/gpu/drm/i915/gvt/dmabuf.c b/drivers/gpu/drm/i915/gvt/dmabuf.c
index 2477a1e..ae139f0 100644
--- a/drivers/gpu/drm/i915/gvt/dmabuf.c
+++ b/drivers/gpu/drm/i915/gvt/dmabuf.c
@@ -151,12 +151,12 @@ static void dmabuf_gem_object_free(struct kref *kref)
dmabuf_obj = container_of(pos,
struct intel_vgpu_dmabuf_obj, list);
if (dmabuf_obj == obj) {
+ list_del(pos);
intel_gvt_hypervisor_put_vfio_device(vgpu);
idr_remove(&vgpu->object_idr,
dmabuf_obj->dmabuf_id);
kfree(dmabuf_obj->info);
kfree(dmabuf_obj);
- list_del(pos);
break;
}
}
diff --git a/drivers/gpu/drm/i915/gvt/firmware.c b/drivers/gpu/drm/i915/gvt/firmware.c
index 049775e..b0c1fda 100644
--- a/drivers/gpu/drm/i915/gvt/firmware.c
+++ b/drivers/gpu/drm/i915/gvt/firmware.c
@@ -146,7 +146,7 @@ void intel_gvt_free_firmware(struct intel_gvt *gvt)
clean_firmware_sysfs(gvt);
kfree(gvt->firmware.cfg_space);
- kfree(gvt->firmware.mmio);
+ vfree(gvt->firmware.mmio);
}
static int verify_firmware(struct intel_gvt *gvt,
@@ -229,7 +229,7 @@ int intel_gvt_load_firmware(struct intel_gvt *gvt)
firmware->cfg_space = mem;
- mem = kmalloc(info->mmio_size, GFP_KERNEL);
+ mem = vmalloc(info->mmio_size);
if (!mem) {
kfree(path);
kfree(firmware->cfg_space);
diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c
index 34cb404..4a48280 100644
--- a/drivers/gpu/drm/i915/gvt/gtt.c
+++ b/drivers/gpu/drm/i915/gvt/gtt.c
@@ -1956,7 +1956,11 @@ void _intel_vgpu_mm_release(struct kref *mm_ref)
if (mm->type == INTEL_GVT_MM_PPGTT) {
list_del(&mm->ppgtt_mm.list);
+
+ mutex_lock(&mm->vgpu->gvt->gtt.ppgtt_mm_lock);
list_del(&mm->ppgtt_mm.lru_list);
+ mutex_unlock(&mm->vgpu->gvt->gtt.ppgtt_mm_lock);
+
invalidate_ppgtt_mm(mm);
} else {
vfree(mm->ggtt_mm.virtual_ggtt);
diff --git a/drivers/gpu/drm/i915/gvt/opregion.c b/drivers/gpu/drm/i915/gvt/opregion.c
index 867e7629..33569b9 100644
--- a/drivers/gpu/drm/i915/gvt/opregion.c
+++ b/drivers/gpu/drm/i915/gvt/opregion.c
@@ -147,15 +147,14 @@ static void virt_vbt_generation(struct vbt *v)
/* there's features depending on version! */
v->header.version = 155;
v->header.header_size = sizeof(v->header);
- v->header.vbt_size = sizeof(struct vbt) - sizeof(v->header);
+ v->header.vbt_size = sizeof(struct vbt);
v->header.bdb_offset = offsetof(struct vbt, bdb_header);
strcpy(&v->bdb_header.signature[0], "BIOS_DATA_BLOCK");
v->bdb_header.version = 186; /* child_dev_size = 33 */
v->bdb_header.header_size = sizeof(v->bdb_header);
- v->bdb_header.bdb_size = sizeof(struct vbt) - sizeof(struct vbt_header)
- - sizeof(struct bdb_header);
+ v->bdb_header.bdb_size = sizeof(struct vbt) - sizeof(struct vbt_header);
/* general features */
v->general_features_header.id = BDB_GENERAL_FEATURES;
diff --git a/drivers/gpu/drm/i915/gvt/vgpu.c b/drivers/gpu/drm/i915/gvt/vgpu.c
index 85bd9bf..345c2aa 100644
--- a/drivers/gpu/drm/i915/gvt/vgpu.c
+++ b/drivers/gpu/drm/i915/gvt/vgpu.c
@@ -272,10 +272,17 @@ void intel_gvt_destroy_vgpu(struct intel_vgpu *vgpu)
{
struct intel_gvt *gvt = vgpu->gvt;
- mutex_lock(&vgpu->vgpu_lock);
-
WARN(vgpu->active, "vGPU is still active!\n");
+ /*
+ * remove idr first so later clean can judge if need to stop
+ * service if no active vgpu.
+ */
+ mutex_lock(&gvt->lock);
+ idr_remove(&gvt->vgpu_idr, vgpu->id);
+ mutex_unlock(&gvt->lock);
+
+ mutex_lock(&vgpu->vgpu_lock);
intel_gvt_debugfs_remove_vgpu(vgpu);
intel_vgpu_clean_sched_policy(vgpu);
intel_vgpu_clean_submission(vgpu);
@@ -290,7 +297,6 @@ void intel_gvt_destroy_vgpu(struct intel_vgpu *vgpu)
mutex_unlock(&vgpu->vgpu_lock);
mutex_lock(&gvt->lock);
- idr_remove(&gvt->vgpu_idr, vgpu->id);
if (idr_is_empty(&gvt->vgpu_idr))
intel_gvt_clean_irq(gvt);
intel_gvt_update_vgpu_types(gvt);
@@ -560,9 +566,9 @@ void intel_gvt_reset_vgpu_locked(struct intel_vgpu *vgpu, bool dmlr,
intel_vgpu_reset_mmio(vgpu, dmlr);
populate_pvinfo_page(vgpu);
- intel_vgpu_reset_display(vgpu);
if (dmlr) {
+ intel_vgpu_reset_display(vgpu);
intel_vgpu_reset_cfg_space(vgpu);
/* only reset the failsafe mode when dmlr reset */
vgpu->failsafe = false;
diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c
index f3da5c0..b0a4997 100644
--- a/drivers/gpu/drm/i915/i915_active.c
+++ b/drivers/gpu/drm/i915/i915_active.c
@@ -416,13 +416,15 @@ int i915_active_acquire(struct i915_active *ref)
if (err)
return err;
- if (!atomic_read(&ref->count) && ref->active)
- err = ref->active(ref);
- if (!err) {
- spin_lock_irq(&ref->tree_lock); /* vs __active_retire() */
- debug_active_activate(ref);
- atomic_inc(&ref->count);
- spin_unlock_irq(&ref->tree_lock);
+ if (likely(!i915_active_acquire_if_busy(ref))) {
+ if (ref->active)
+ err = ref->active(ref);
+ if (!err) {
+ spin_lock_irq(&ref->tree_lock); /* __active_retire() */
+ debug_active_activate(ref);
+ atomic_inc(&ref->count);
+ spin_unlock_irq(&ref->tree_lock);
+ }
}
mutex_unlock(&ref->mutex);
@@ -605,7 +607,7 @@ int i915_active_acquire_preallocate_barrier(struct i915_active *ref,
struct intel_engine_cs *engine)
{
intel_engine_mask_t tmp, mask = engine->mask;
- struct llist_node *pos = NULL, *next;
+ struct llist_node *first = NULL, *last = NULL;
struct intel_gt *gt = engine->gt;
int err;
@@ -623,6 +625,7 @@ int i915_active_acquire_preallocate_barrier(struct i915_active *ref,
*/
for_each_engine_masked(engine, gt, mask, tmp) {
u64 idx = engine->kernel_context->timeline->fence_context;
+ struct llist_node *prev = first;
struct active_node *node;
node = reuse_idle_barrier(ref, idx);
@@ -656,23 +659,23 @@ int i915_active_acquire_preallocate_barrier(struct i915_active *ref,
GEM_BUG_ON(rcu_access_pointer(node->base.fence) != ERR_PTR(-EAGAIN));
GEM_BUG_ON(barrier_to_engine(node) != engine);
- next = barrier_to_ll(node);
- next->next = pos;
- if (!pos)
- pos = next;
+ first = barrier_to_ll(node);
+ first->next = prev;
+ if (!last)
+ last = first;
intel_engine_pm_get(engine);
}
GEM_BUG_ON(!llist_empty(&ref->preallocated_barriers));
- llist_add_batch(next, pos, &ref->preallocated_barriers);
+ llist_add_batch(first, last, &ref->preallocated_barriers);
return 0;
unwind:
- while (pos) {
- struct active_node *node = barrier_from_ll(pos);
+ while (first) {
+ struct active_node *node = barrier_from_ll(first);
- pos = pos->next;
+ first = first->next;
atomic_dec(&ref->count);
intel_engine_pm_put(barrier_to_engine(node));
diff --git a/drivers/gpu/drm/i915/i915_active.h b/drivers/gpu/drm/i915/i915_active.h
index b571f67..51e1e854 100644
--- a/drivers/gpu/drm/i915/i915_active.h
+++ b/drivers/gpu/drm/i915/i915_active.h
@@ -188,6 +188,12 @@ int i915_active_acquire(struct i915_active *ref);
bool i915_active_acquire_if_busy(struct i915_active *ref);
void i915_active_release(struct i915_active *ref);
+static inline void __i915_active_acquire(struct i915_active *ref)
+{
+ GEM_BUG_ON(!atomic_read(&ref->count));
+ atomic_inc(&ref->count);
+}
+
static inline bool
i915_active_is_idle(const struct i915_active *ref)
{
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index f7385ab..8410330 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -56,6 +56,7 @@
#include "display/intel_hotplug.h"
#include "display/intel_overlay.h"
#include "display/intel_pipe_crc.h"
+#include "display/intel_psr.h"
#include "display/intel_sprite.h"
#include "display/intel_vga.h"
@@ -330,6 +331,8 @@ static int i915_driver_modeset_probe(struct drm_i915_private *i915)
intel_init_ipc(i915);
+ intel_psr_set_force_mode_changed(i915->psr.dp);
+
return 0;
cleanup_gem:
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 077af22..810e3cc 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -505,7 +505,7 @@ struct i915_psr {
bool dc3co_enabled;
u32 dc3co_exit_delay;
struct delayed_work idle_work;
- bool initially_probed;
+ bool force_mode_changed;
};
#define QUIRK_LVDS_SSC_DISABLE (1<<1)
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 94f993e..5f6e639 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -180,7 +180,7 @@ i915_gem_phys_pwrite(struct drm_i915_gem_object *obj,
struct drm_i915_gem_pwrite *args,
struct drm_file *file)
{
- void *vaddr = obj->phys_handle->vaddr + args->offset;
+ void *vaddr = sg_page(obj->mm.pages->sgl) + args->offset;
char __user *user_data = u64_to_user_ptr(args->data_ptr);
/*
@@ -265,7 +265,10 @@ i915_gem_dumb_create(struct drm_file *file,
DRM_FORMAT_MOD_LINEAR))
args->pitch = ALIGN(args->pitch, 4096);
- args->size = args->pitch * args->height;
+ if (args->pitch < args->width)
+ return -EINVAL;
+
+ args->size = mul_u32_u32(args->pitch, args->height);
mem_type = INTEL_MEMORY_SYSTEM;
if (HAS_LMEM(to_i915(dev)))
@@ -841,10 +844,10 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
ret = i915_gem_gtt_pwrite_fast(obj, args);
if (ret == -EFAULT || ret == -ENOSPC) {
- if (obj->phys_handle)
- ret = i915_gem_phys_pwrite(obj, args, file);
- else
+ if (i915_gem_object_has_struct_page(obj))
ret = i915_gem_shmem_pwrite(obj, args);
+ else
+ ret = i915_gem_phys_pwrite(obj, args, file);
}
i915_gem_object_unpin_pages(obj);
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 4c1836f..9e401a5 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -1681,7 +1681,7 @@ static const char *error_msg(struct i915_gpu_coredump *error)
"GPU HANG: ecode %d:%x:%08x",
INTEL_GEN(error->i915), engines,
generate_ecode(first));
- if (first) {
+ if (first && first->context.pid) {
/* Just show the first executing process, more is confusing */
len += scnprintf(error->error_msg + len,
sizeof(error->error_msg) - len,
@@ -1852,7 +1852,8 @@ void i915_error_state_store(struct i915_gpu_coredump *error)
if (!xchg(&warned, true) &&
ktime_get_real_seconds() - DRIVER_TIMESTAMP < DAY_AS_SECONDS(180)) {
pr_info("GPU hangs can indicate a bug anywhere in the entire gfx stack, including userspace.\n");
- pr_info("Please file a _new_ bug report on bugs.freedesktop.org against DRI -> DRM/Intel\n");
+ pr_info("Please file a _new_ bug report at https://gitlab.freedesktop.org/drm/intel/issues/new.\n");
+ pr_info("Please see https://gitlab.freedesktop.org/drm/intel/-/wikis/How-to-file-i915-bugs for details.\n");
pr_info("drm/i915 developers can then reassign to the right component if it's not a kernel issue.\n");
pr_info("The GPU crash dump is required to analyze GPU hangs, so please always attach it.\n");
pr_info("GPU crash dump saved to /sys/class/drm/card%d/error\n",
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h
index 9109004..e4a6afe 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.h
+++ b/drivers/gpu/drm/i915/i915_gpu_error.h
@@ -314,8 +314,11 @@ i915_vma_capture_finish(struct intel_gt_coredump *gt,
}
static inline void
-i915_error_state_store(struct drm_i915_private *i915,
- struct i915_gpu_coredump *error)
+i915_error_state_store(struct i915_gpu_coredump *error)
+{
+}
+
+static inline void i915_gpu_coredump_put(struct i915_gpu_coredump *gpu)
{
}
diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index 83f0140..f631f6d 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -437,7 +437,7 @@ static const struct intel_device_info snb_m_gt2_info = {
.has_rc6 = 1, \
.has_rc6p = 1, \
.has_rps = true, \
- .ppgtt_type = INTEL_PPGTT_FULL, \
+ .ppgtt_type = INTEL_PPGTT_ALIASING, \
.ppgtt_size = 31, \
IVB_PIPE_OFFSETS, \
IVB_CURSOR_OFFSETS, \
@@ -494,7 +494,7 @@ static const struct intel_device_info vlv_info = {
.has_rps = true,
.display.has_gmch = 1,
.display.has_hotplug = 1,
- .ppgtt_type = INTEL_PPGTT_FULL,
+ .ppgtt_type = INTEL_PPGTT_ALIASING,
.ppgtt_size = 31,
.has_snoop = true,
.has_coherent_ggtt = false,
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 0f556d8..3b6b913 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -1954,9 +1954,10 @@ get_oa_vma(struct i915_perf_stream *stream, struct i915_oa_config *oa_config)
return i915_vma_get(oa_bo->vma);
}
-static int emit_oa_config(struct i915_perf_stream *stream,
- struct i915_oa_config *oa_config,
- struct intel_context *ce)
+static struct i915_request *
+emit_oa_config(struct i915_perf_stream *stream,
+ struct i915_oa_config *oa_config,
+ struct intel_context *ce)
{
struct i915_request *rq;
struct i915_vma *vma;
@@ -1964,7 +1965,7 @@ static int emit_oa_config(struct i915_perf_stream *stream,
vma = get_oa_vma(stream, oa_config);
if (IS_ERR(vma))
- return PTR_ERR(vma);
+ return ERR_CAST(vma);
err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
if (err)
@@ -1989,13 +1990,17 @@ static int emit_oa_config(struct i915_perf_stream *stream,
err = rq->engine->emit_bb_start(rq,
vma->node.start, 0,
I915_DISPATCH_SECURE);
+ if (err)
+ goto err_add_request;
+
+ i915_request_get(rq);
err_add_request:
i915_request_add(rq);
err_vma_unpin:
i915_vma_unpin(vma);
err_vma_put:
i915_vma_put(vma);
- return err;
+ return err ? ERR_PTR(err) : rq;
}
static struct intel_context *oa_context(struct i915_perf_stream *stream)
@@ -2003,7 +2008,8 @@ static struct intel_context *oa_context(struct i915_perf_stream *stream)
return stream->pinned_ctx ?: stream->engine->kernel_context;
}
-static int hsw_enable_metric_set(struct i915_perf_stream *stream)
+static struct i915_request *
+hsw_enable_metric_set(struct i915_perf_stream *stream)
{
struct intel_uncore *uncore = stream->uncore;
@@ -2406,7 +2412,8 @@ static int lrc_configure_all_contexts(struct i915_perf_stream *stream,
return oa_configure_all_contexts(stream, regs, ARRAY_SIZE(regs));
}
-static int gen8_enable_metric_set(struct i915_perf_stream *stream)
+static struct i915_request *
+gen8_enable_metric_set(struct i915_perf_stream *stream)
{
struct intel_uncore *uncore = stream->uncore;
struct i915_oa_config *oa_config = stream->oa_config;
@@ -2448,7 +2455,7 @@ static int gen8_enable_metric_set(struct i915_perf_stream *stream)
*/
ret = lrc_configure_all_contexts(stream, oa_config);
if (ret)
- return ret;
+ return ERR_PTR(ret);
return emit_oa_config(stream, oa_config, oa_context(stream));
}
@@ -2460,7 +2467,8 @@ static u32 oag_report_ctx_switches(const struct i915_perf_stream *stream)
0 : GEN12_OAG_OA_DEBUG_DISABLE_CTX_SWITCH_REPORTS);
}
-static int gen12_enable_metric_set(struct i915_perf_stream *stream)
+static struct i915_request *
+gen12_enable_metric_set(struct i915_perf_stream *stream)
{
struct intel_uncore *uncore = stream->uncore;
struct i915_oa_config *oa_config = stream->oa_config;
@@ -2491,7 +2499,7 @@ static int gen12_enable_metric_set(struct i915_perf_stream *stream)
*/
ret = gen12_configure_all_contexts(stream, oa_config);
if (ret)
- return ret;
+ return ERR_PTR(ret);
/*
* For Gen12, performance counters are context
@@ -2501,7 +2509,7 @@ static int gen12_enable_metric_set(struct i915_perf_stream *stream)
if (stream->ctx) {
ret = gen12_configure_oar_context(stream, true);
if (ret)
- return ret;
+ return ERR_PTR(ret);
}
return emit_oa_config(stream, oa_config, oa_context(stream));
@@ -2696,6 +2704,20 @@ static const struct i915_perf_stream_ops i915_oa_stream_ops = {
.read = i915_oa_read,
};
+static int i915_perf_stream_enable_sync(struct i915_perf_stream *stream)
+{
+ struct i915_request *rq;
+
+ rq = stream->perf->ops.enable_metric_set(stream);
+ if (IS_ERR(rq))
+ return PTR_ERR(rq);
+
+ i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT);
+ i915_request_put(rq);
+
+ return 0;
+}
+
/**
* i915_oa_stream_init - validate combined props for OA stream and init
* @stream: An i915 perf stream
@@ -2829,7 +2851,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
stream->ops = &i915_oa_stream_ops;
perf->exclusive_stream = stream;
- ret = perf->ops.enable_metric_set(stream);
+ ret = i915_perf_stream_enable_sync(stream);
if (ret) {
DRM_DEBUG("Unable to enable metric set\n");
goto err_enable;
@@ -3147,7 +3169,7 @@ static long i915_perf_config_locked(struct i915_perf_stream *stream,
return -EINVAL;
if (config != stream->oa_config) {
- int err;
+ struct i915_request *rq;
/*
* If OA is bound to a specific context, emit the
@@ -3158,11 +3180,13 @@ static long i915_perf_config_locked(struct i915_perf_stream *stream,
* When set globally, we use a low priority kernel context,
* so it will effectively take effect when idle.
*/
- err = emit_oa_config(stream, config, oa_context(stream));
- if (err == 0)
+ rq = emit_oa_config(stream, config, oa_context(stream));
+ if (!IS_ERR(rq)) {
config = xchg(&stream->oa_config, config);
- else
- ret = err;
+ i915_request_put(rq);
+ } else {
+ ret = PTR_ERR(rq);
+ }
}
i915_oa_config_put(config);
diff --git a/drivers/gpu/drm/i915/i915_perf_types.h b/drivers/gpu/drm/i915/i915_perf_types.h
index 45e5814..a0e22f0 100644
--- a/drivers/gpu/drm/i915/i915_perf_types.h
+++ b/drivers/gpu/drm/i915/i915_perf_types.h
@@ -339,7 +339,8 @@ struct i915_oa_ops {
* counter reports being sampled. May apply system constraints such as
* disabling EU clock gating as required.
*/
- int (*enable_metric_set)(struct i915_perf_stream *stream);
+ struct i915_request *
+ (*enable_metric_set)(struct i915_perf_stream *stream);
/**
* @disable_metric_set: Remove system constraints associated with using
diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index 28a82c8..aa729d0 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -637,8 +637,10 @@ static void i915_pmu_enable(struct perf_event *event)
container_of(event->pmu, typeof(*i915), pmu.base);
unsigned int bit = event_enabled_bit(event);
struct i915_pmu *pmu = &i915->pmu;
+ intel_wakeref_t wakeref;
unsigned long flags;
+ wakeref = intel_runtime_pm_get(&i915->runtime_pm);
spin_lock_irqsave(&pmu->lock, flags);
/*
@@ -648,6 +650,14 @@ static void i915_pmu_enable(struct perf_event *event)
BUILD_BUG_ON(ARRAY_SIZE(pmu->enable_count) != I915_PMU_MASK_BITS);
GEM_BUG_ON(bit >= ARRAY_SIZE(pmu->enable_count));
GEM_BUG_ON(pmu->enable_count[bit] == ~0);
+
+ if (pmu->enable_count[bit] == 0 &&
+ config_enabled_mask(I915_PMU_RC6_RESIDENCY) & BIT_ULL(bit)) {
+ pmu->sample[__I915_SAMPLE_RC6_LAST_REPORTED].cur = 0;
+ pmu->sample[__I915_SAMPLE_RC6].cur = __get_rc6(&i915->gt);
+ pmu->sleep_last = ktime_get();
+ }
+
pmu->enable |= BIT_ULL(bit);
pmu->enable_count[bit]++;
@@ -688,6 +698,8 @@ static void i915_pmu_enable(struct perf_event *event)
* an existing non-zero value.
*/
local64_set(&event->hw.prev_count, __i915_pmu_event_read(event));
+
+ intel_runtime_pm_put(&i915->runtime_pm, wakeref);
}
static void i915_pmu_disable(struct perf_event *event)
@@ -810,11 +822,6 @@ static ssize_t i915_pmu_event_show(struct device *dev,
return sprintf(buf, "config=0x%lx\n", eattr->val);
}
-static struct attribute_group i915_pmu_events_attr_group = {
- .name = "events",
- /* Patch in attrs at runtime. */
-};
-
static ssize_t
i915_pmu_get_attr_cpumask(struct device *dev,
struct device_attribute *attr,
@@ -834,13 +841,6 @@ static const struct attribute_group i915_pmu_cpumask_attr_group = {
.attrs = i915_cpumask_attrs,
};
-static const struct attribute_group *i915_pmu_attr_groups[] = {
- &i915_pmu_format_attr_group,
- &i915_pmu_events_attr_group,
- &i915_pmu_cpumask_attr_group,
- NULL
-};
-
#define __event(__config, __name, __unit) \
{ \
.config = (__config), \
@@ -1014,23 +1014,23 @@ err:;
static void free_event_attributes(struct i915_pmu *pmu)
{
- struct attribute **attr_iter = i915_pmu_events_attr_group.attrs;
+ struct attribute **attr_iter = pmu->events_attr_group.attrs;
for (; *attr_iter; attr_iter++)
kfree((*attr_iter)->name);
- kfree(i915_pmu_events_attr_group.attrs);
+ kfree(pmu->events_attr_group.attrs);
kfree(pmu->i915_attr);
kfree(pmu->pmu_attr);
- i915_pmu_events_attr_group.attrs = NULL;
+ pmu->events_attr_group.attrs = NULL;
pmu->i915_attr = NULL;
pmu->pmu_attr = NULL;
}
static int i915_pmu_cpu_online(unsigned int cpu, struct hlist_node *node)
{
- struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), node);
+ struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), cpuhp.node);
GEM_BUG_ON(!pmu->base.event_init);
@@ -1043,7 +1043,7 @@ static int i915_pmu_cpu_online(unsigned int cpu, struct hlist_node *node)
static int i915_pmu_cpu_offline(unsigned int cpu, struct hlist_node *node)
{
- struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), node);
+ struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), cpuhp.node);
unsigned int target;
GEM_BUG_ON(!pmu->base.event_init);
@@ -1060,8 +1060,6 @@ static int i915_pmu_cpu_offline(unsigned int cpu, struct hlist_node *node)
return 0;
}
-static enum cpuhp_state cpuhp_slot = CPUHP_INVALID;
-
static int i915_pmu_register_cpuhp_state(struct i915_pmu *pmu)
{
enum cpuhp_state slot;
@@ -1075,21 +1073,22 @@ static int i915_pmu_register_cpuhp_state(struct i915_pmu *pmu)
return ret;
slot = ret;
- ret = cpuhp_state_add_instance(slot, &pmu->node);
+ ret = cpuhp_state_add_instance(slot, &pmu->cpuhp.node);
if (ret) {
cpuhp_remove_multi_state(slot);
return ret;
}
- cpuhp_slot = slot;
+ pmu->cpuhp.slot = slot;
return 0;
}
static void i915_pmu_unregister_cpuhp_state(struct i915_pmu *pmu)
{
- WARN_ON(cpuhp_slot == CPUHP_INVALID);
- WARN_ON(cpuhp_state_remove_instance(cpuhp_slot, &pmu->node));
- cpuhp_remove_multi_state(cpuhp_slot);
+ WARN_ON(pmu->cpuhp.slot == CPUHP_INVALID);
+ WARN_ON(cpuhp_state_remove_instance(pmu->cpuhp.slot, &pmu->cpuhp.node));
+ cpuhp_remove_multi_state(pmu->cpuhp.slot);
+ pmu->cpuhp.slot = CPUHP_INVALID;
}
static bool is_igp(struct drm_i915_private *i915)
@@ -1106,6 +1105,13 @@ static bool is_igp(struct drm_i915_private *i915)
void i915_pmu_register(struct drm_i915_private *i915)
{
struct i915_pmu *pmu = &i915->pmu;
+ const struct attribute_group *attr_groups[] = {
+ &i915_pmu_format_attr_group,
+ &pmu->events_attr_group,
+ &i915_pmu_cpumask_attr_group,
+ NULL
+ };
+
int ret = -ENOMEM;
if (INTEL_GEN(i915) <= 2) {
@@ -1116,6 +1122,7 @@ void i915_pmu_register(struct drm_i915_private *i915)
spin_lock_init(&pmu->lock);
hrtimer_init(&pmu->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
pmu->timer.function = i915_sample;
+ pmu->cpuhp.slot = CPUHP_INVALID;
if (!is_igp(i915)) {
pmu->name = kasprintf(GFP_KERNEL,
@@ -1131,11 +1138,16 @@ void i915_pmu_register(struct drm_i915_private *i915)
if (!pmu->name)
goto err;
- i915_pmu_events_attr_group.attrs = create_event_attributes(pmu);
- if (!i915_pmu_events_attr_group.attrs)
+ pmu->events_attr_group.name = "events";
+ pmu->events_attr_group.attrs = create_event_attributes(pmu);
+ if (!pmu->events_attr_group.attrs)
goto err_name;
- pmu->base.attr_groups = i915_pmu_attr_groups;
+ pmu->base.attr_groups = kmemdup(attr_groups, sizeof(attr_groups),
+ GFP_KERNEL);
+ if (!pmu->base.attr_groups)
+ goto err_attr;
+
pmu->base.task_ctx_nr = perf_invalid_context;
pmu->base.event_init = i915_pmu_event_init;
pmu->base.add = i915_pmu_event_add;
@@ -1147,7 +1159,7 @@ void i915_pmu_register(struct drm_i915_private *i915)
ret = perf_pmu_register(&pmu->base, pmu->name, -1);
if (ret)
- goto err_attr;
+ goto err_groups;
ret = i915_pmu_register_cpuhp_state(pmu);
if (ret)
@@ -1157,6 +1169,8 @@ void i915_pmu_register(struct drm_i915_private *i915)
err_unreg:
perf_pmu_unregister(&pmu->base);
+err_groups:
+ kfree(pmu->base.attr_groups);
err_attr:
pmu->base.event_init = NULL;
free_event_attributes(pmu);
@@ -1182,6 +1196,7 @@ void i915_pmu_unregister(struct drm_i915_private *i915)
perf_pmu_unregister(&pmu->base);
pmu->base.event_init = NULL;
+ kfree(pmu->base.attr_groups);
if (!is_igp(i915))
kfree(pmu->name);
free_event_attributes(pmu);
diff --git a/drivers/gpu/drm/i915/i915_pmu.h b/drivers/gpu/drm/i915/i915_pmu.h
index 6c1647c..f1d6cad 100644
--- a/drivers/gpu/drm/i915/i915_pmu.h
+++ b/drivers/gpu/drm/i915/i915_pmu.h
@@ -39,9 +39,12 @@ struct i915_pmu_sample {
struct i915_pmu {
/**
- * @node: List node for CPU hotplug handling.
+ * @cpuhp: Struct used for CPU hotplug handling.
*/
- struct hlist_node node;
+ struct {
+ struct hlist_node node;
+ enum cpuhp_state slot;
+ } cpuhp;
/**
* @base: PMU base.
*/
@@ -105,6 +108,10 @@ struct i915_pmu {
*/
ktime_t sleep_last;
/**
+ * @events_attr_group: Device events attribute group.
+ */
+ struct attribute_group events_attr_group;
+ /**
* @i915_attr: Memory block holding device attributes.
*/
void *i915_attr;
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 6cc55c1..3575fd3 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -7757,6 +7757,7 @@ enum {
#define BW_BUDDY1_CTL _MMIO(0x45140)
#define BW_BUDDY2_CTL _MMIO(0x45150)
#define BW_BUDDY_DISABLE REG_BIT(31)
+#define BW_BUDDY_TLB_REQ_TIMER_MASK REG_GENMASK(21, 16)
#define BW_BUDDY1_PAGE_MASK _MMIO(0x45144)
#define BW_BUDDY2_PAGE_MASK _MMIO(0x45154)
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index be185886..a18b2a2 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -221,6 +221,8 @@ static void remove_from_engine(struct i915_request *rq)
locked = engine;
}
list_del_init(&rq->sched.link);
+ clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
+ clear_bit(I915_FENCE_FLAG_HOLD, &rq->fence.flags);
spin_unlock_irq(&locked->active.lock);
}
@@ -273,7 +275,7 @@ bool i915_request_retire(struct i915_request *rq)
spin_unlock_irq(&rq->lock);
remove_from_client(rq);
- list_del(&rq->link);
+ list_del_rcu(&rq->link);
intel_context_exit(rq->context);
intel_context_unpin(rq->context);
@@ -408,8 +410,10 @@ bool __i915_request_submit(struct i915_request *request)
xfer: /* We may be recursing from the signal callback of another i915 fence */
spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
- if (!test_and_set_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags))
+ if (!test_and_set_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags)) {
list_move_tail(&request->sched.link, &engine->active.requests);
+ clear_bit(I915_FENCE_FLAG_PQUEUE, &request->fence.flags);
+ }
if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags) &&
!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &request->fence.flags) &&
@@ -523,19 +527,31 @@ submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
return NOTIFY_DONE;
}
+static void irq_semaphore_cb(struct irq_work *wrk)
+{
+ struct i915_request *rq =
+ container_of(wrk, typeof(*rq), semaphore_work);
+
+ i915_schedule_bump_priority(rq, I915_PRIORITY_NOSEMAPHORE);
+ i915_request_put(rq);
+}
+
static int __i915_sw_fence_call
semaphore_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
{
- struct i915_request *request =
- container_of(fence, typeof(*request), semaphore);
+ struct i915_request *rq = container_of(fence, typeof(*rq), semaphore);
switch (state) {
case FENCE_COMPLETE:
- i915_schedule_bump_priority(request, I915_PRIORITY_NOSEMAPHORE);
+ if (!(READ_ONCE(rq->sched.attr.priority) & I915_PRIORITY_NOSEMAPHORE)) {
+ i915_request_get(rq);
+ init_irq_work(&rq->semaphore_work, irq_semaphore_cb);
+ irq_work_queue(&rq->semaphore_work);
+ }
break;
case FENCE_FREE:
- i915_request_put(request);
+ i915_request_put(rq);
break;
}
@@ -591,6 +607,8 @@ static void __i915_request_ctor(void *arg)
i915_sw_fence_init(&rq->submit, submit_notify);
i915_sw_fence_init(&rq->semaphore, semaphore_notify);
+ dma_fence_init(&rq->fence, &i915_fence_ops, &rq->lock, 0, 0);
+
rq->file_priv = NULL;
rq->capture_list = NULL;
@@ -649,25 +667,30 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp)
}
}
- ret = intel_timeline_get_seqno(tl, rq, &seqno);
- if (ret)
- goto err_free;
-
rq->i915 = ce->engine->i915;
rq->context = ce;
rq->engine = ce->engine;
rq->ring = ce->ring;
rq->execution_mask = ce->engine->mask;
+ kref_init(&rq->fence.refcount);
+ rq->fence.flags = 0;
+ rq->fence.error = 0;
+ INIT_LIST_HEAD(&rq->fence.cb_list);
+
+ ret = intel_timeline_get_seqno(tl, rq, &seqno);
+ if (ret)
+ goto err_free;
+
+ rq->fence.context = tl->fence_context;
+ rq->fence.seqno = seqno;
+
RCU_INIT_POINTER(rq->timeline, tl);
RCU_INIT_POINTER(rq->hwsp_cacheline, tl->hwsp_cacheline);
rq->hwsp_seqno = tl->hwsp_seqno;
rq->rcustate = get_state_synchronize_rcu(); /* acts as smp_mb() */
- dma_fence_init(&rq->fence, &i915_fence_ops, &rq->lock,
- tl->fence_context, seqno);
-
/* We bump the ref for the fence chain */
i915_sw_fence_reinit(&i915_request_get(rq)->submit);
i915_sw_fence_reinit(&i915_request_get(rq)->semaphore);
@@ -710,6 +733,8 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp)
rq->infix = rq->ring->emit; /* end of header; start of user payload */
intel_context_mark_active(ce);
+ list_add_tail_rcu(&rq->link, &tl->requests);
+
return rq;
err_unwind:
@@ -763,16 +788,26 @@ i915_request_await_start(struct i915_request *rq, struct i915_request *signal)
struct dma_fence *fence;
int err;
- GEM_BUG_ON(i915_request_timeline(rq) ==
- rcu_access_pointer(signal->timeline));
+ if (i915_request_timeline(rq) == rcu_access_pointer(signal->timeline))
+ return 0;
+
+ if (i915_request_started(signal))
+ return 0;
fence = NULL;
rcu_read_lock();
spin_lock_irq(&signal->lock);
- if (!i915_request_started(signal) &&
- !list_is_first(&signal->link,
- &rcu_dereference(signal->timeline)->requests)) {
- struct i915_request *prev = list_prev_entry(signal, link);
+ do {
+ struct list_head *pos = READ_ONCE(signal->link.prev);
+ struct i915_request *prev;
+
+ /* Confirm signal has not been retired, the link is valid */
+ if (unlikely(i915_request_started(signal)))
+ break;
+
+ /* Is signal the earliest request on its timeline? */
+ if (pos == &rcu_dereference(signal->timeline)->requests)
+ break;
/*
* Peek at the request before us in the timeline. That
@@ -780,20 +815,25 @@ i915_request_await_start(struct i915_request *rq, struct i915_request *signal)
* after acquiring a reference to it, confirm that it is
* still part of the signaler's timeline.
*/
- if (i915_request_get_rcu(prev)) {
- if (list_next_entry(prev, link) == signal)
- fence = &prev->fence;
- else
- i915_request_put(prev);
+ prev = list_entry(pos, typeof(*prev), link);
+ if (!i915_request_get_rcu(prev))
+ break;
+
+ /* After the strong barrier, confirm prev is still attached */
+ if (unlikely(READ_ONCE(prev->link.next) != &signal->link)) {
+ i915_request_put(prev);
+ break;
}
- }
+
+ fence = &prev->fence;
+ } while (0);
spin_unlock_irq(&signal->lock);
rcu_read_unlock();
if (!fence)
return 0;
err = 0;
- if (intel_timeline_sync_is_later(i915_request_timeline(rq), fence))
+ if (!intel_timeline_sync_is_later(i915_request_timeline(rq), fence))
err = i915_sw_fence_await_dma_fence(&rq->submit,
fence, 0,
I915_FENCE_GFP);
@@ -1231,8 +1271,6 @@ __i915_request_add_to_timeline(struct i915_request *rq)
0);
}
- list_add_tail(&rq->link, &timeline->requests);
-
/*
* Make sure that no request gazumped us - if it was allocated after
* our i915_request_alloc() and called __i915_request_add() before
@@ -1292,9 +1330,9 @@ void __i915_request_queue(struct i915_request *rq,
* decide whether to preempt the entire chain so that it is ready to
* run at the earliest possible convenience.
*/
- i915_sw_fence_commit(&rq->semaphore);
if (attr && rq->engine->schedule)
rq->engine->schedule(rq, attr);
+ i915_sw_fence_commit(&rq->semaphore);
i915_sw_fence_commit(&rq->submit);
}
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index 0314336..fccc339 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -26,6 +26,7 @@
#define I915_REQUEST_H
#include <linux/dma-fence.h>
+#include <linux/irq_work.h>
#include <linux/lockdep.h>
#include "gem/i915_gem_context_types.h"
@@ -71,6 +72,18 @@ enum {
I915_FENCE_FLAG_ACTIVE = DMA_FENCE_FLAG_USER_BITS,
/*
+ * I915_FENCE_FLAG_PQUEUE - this request is ready for execution
+ *
+ * Using the scheduler, when a request is ready for execution it is put
+ * into the priority queue, and removed from that queue when transferred
+ * to the HW runlists. We want to track its membership within the
+ * priority queue so that we can easily check before rescheduling.
+ *
+ * See i915_request_in_priority_queue()
+ */
+ I915_FENCE_FLAG_PQUEUE,
+
+ /*
* I915_FENCE_FLAG_SIGNAL - this request is currently on signal_list
*
* Internal bookkeeping used by the breadcrumb code to track when
@@ -79,6 +92,13 @@ enum {
I915_FENCE_FLAG_SIGNAL,
/*
+ * I915_FENCE_FLAG_HOLD - this request is currently on hold
+ *
+ * This request has been suspended, pending an ongoing investigation.
+ */
+ I915_FENCE_FLAG_HOLD,
+
+ /*
* I915_FENCE_FLAG_NOPREEMPT - this request should not be preempted
*
* The execution of some requests should not be interrupted. This is
@@ -189,6 +209,7 @@ struct i915_request {
};
struct list_head execute_cb;
struct i915_sw_fence semaphore;
+ struct irq_work semaphore_work;
/*
* A list of everyone we wait upon, and everyone who waits upon us.
@@ -361,6 +382,11 @@ static inline bool i915_request_is_active(const struct i915_request *rq)
return test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags);
}
+static inline bool i915_request_in_priority_queue(const struct i915_request *rq)
+{
+ return test_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
+}
+
/**
* Returns true if seq1 is later than seq2.
*/
@@ -454,6 +480,27 @@ static inline bool i915_request_is_running(const struct i915_request *rq)
return __i915_request_has_started(rq);
}
+/**
+ * i915_request_is_running - check if the request is ready for execution
+ * @rq: the request
+ *
+ * Upon construction, the request is instructed to wait upon various
+ * signals before it is ready to be executed by the HW. That is, we do
+ * not want to start execution and read data before it is written. In practice,
+ * this is controlled with a mixture of interrupts and semaphores. Once
+ * the submit fence is completed, the backend scheduler will place the
+ * request into its queue and from there submit it for execution. So we
+ * can detect when a request is eligible for execution (and is under control
+ * of the scheduler) by querying where it is in any of the scheduler's lists.
+ *
+ * Returns true if the request is ready for execution (it may be inflight),
+ * false otherwise.
+ */
+static inline bool i915_request_is_ready(const struct i915_request *rq)
+{
+ return !list_empty(&rq->sched.link);
+}
+
static inline bool i915_request_completed(const struct i915_request *rq)
{
if (i915_request_signaled(rq))
@@ -483,6 +530,21 @@ static inline bool i915_request_has_sentinel(const struct i915_request *rq)
return unlikely(test_bit(I915_FENCE_FLAG_SENTINEL, &rq->fence.flags));
}
+static inline bool i915_request_on_hold(const struct i915_request *rq)
+{
+ return unlikely(test_bit(I915_FENCE_FLAG_HOLD, &rq->fence.flags));
+}
+
+static inline void i915_request_set_hold(struct i915_request *rq)
+{
+ set_bit(I915_FENCE_FLAG_HOLD, &rq->fence.flags);
+}
+
+static inline void i915_request_clear_hold(struct i915_request *rq)
+{
+ clear_bit(I915_FENCE_FLAG_HOLD, &rq->fence.flags);
+}
+
static inline struct intel_timeline *
i915_request_timeline(struct i915_request *rq)
{
diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
index bf87c70b..34b654b 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -326,20 +326,18 @@ static void __i915_schedule(struct i915_sched_node *node,
node->attr.priority = prio;
- if (list_empty(&node->link)) {
- /*
- * If the request is not in the priolist queue because
- * it is not yet runnable, then it doesn't contribute
- * to our preemption decisions. On the other hand,
- * if the request is on the HW, it too is not in the
- * queue; but in that case we may still need to reorder
- * the inflight requests.
- */
+ /*
+ * Once the request is ready, it will be placed into the
+ * priority lists and then onto the HW runlist. Before the
+ * request is ready, it does not contribute to our preemption
+ * decisions and we can safely ignore it, as it will, and
+ * any preemption required, be dealt with upon submission.
+ * See engine->submit_request()
+ */
+ if (list_empty(&node->link))
continue;
- }
- if (!intel_engine_is_virtual(engine) &&
- !i915_request_is_active(node_to_request(node))) {
+ if (i915_request_in_priority_queue(node_to_request(node))) {
if (!cache.priolist)
cache.priolist =
i915_sched_lookup_priolist(engine,
@@ -425,8 +423,6 @@ bool __i915_sched_node_add_dependency(struct i915_sched_node *node,
if (!node_signaled(signal)) {
INIT_LIST_HEAD(&dep->dfs_link);
- list_add(&dep->wait_link, &signal->waiters_list);
- list_add(&dep->signal_link, &node->signalers_list);
dep->signaler = signal;
dep->waiter = node;
dep->flags = flags;
@@ -436,6 +432,10 @@ bool __i915_sched_node_add_dependency(struct i915_sched_node *node,
!node_started(signal))
node->flags |= I915_SCHED_HAS_SEMAPHORE_CHAIN;
+ /* All set, now publish. Beware the lockless walkers. */
+ list_add(&dep->signal_link, &node->signalers_list);
+ list_add_rcu(&dep->wait_link, &signal->waiters_list);
+
/*
* As we do not allow WAIT to preempt inflight requests,
* once we have executed a request, along with triggering
diff --git a/drivers/gpu/drm/i915/i915_utils.c b/drivers/gpu/drm/i915/i915_utils.c
index c47261a..632d695 100644
--- a/drivers/gpu/drm/i915/i915_utils.c
+++ b/drivers/gpu/drm/i915/i915_utils.c
@@ -8,9 +8,8 @@
#include "i915_drv.h"
#include "i915_utils.h"
-#define FDO_BUG_URL "https://bugs.freedesktop.org/enter_bug.cgi?product=DRI"
-#define FDO_BUG_MSG "Please file a bug at " FDO_BUG_URL " against DRM/Intel " \
- "providing the dmesg log by booting with drm.debug=0xf"
+#define FDO_BUG_URL "https://gitlab.freedesktop.org/drm/intel/-/wikis/How-to-file-i915-bugs"
+#define FDO_BUG_MSG "Please file a bug on drm/i915; see " FDO_BUG_URL " for details."
void
__i915_printk(struct drm_i915_private *dev_priv, const char *level,
diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h
index b0ade76..d34141f 100644
--- a/drivers/gpu/drm/i915/i915_utils.h
+++ b/drivers/gpu/drm/i915/i915_utils.h
@@ -234,6 +234,11 @@ static inline u64 ptr_to_u64(const void *ptr)
__idx; \
})
+static inline bool is_power_of_2_u64(u64 n)
+{
+ return (n != 0 && ((n & (n - 1)) == 0));
+}
+
static inline void __list_del_many(struct list_head *head,
struct list_head *first)
{
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index 17d7c52..4ff3807 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -1202,16 +1202,26 @@ int __i915_vma_unbind(struct i915_vma *vma)
if (ret)
return ret;
- GEM_BUG_ON(i915_vma_is_active(vma));
if (i915_vma_is_pinned(vma)) {
vma_print_allocator(vma, "is pinned");
return -EAGAIN;
}
- GEM_BUG_ON(i915_vma_is_active(vma));
+ /*
+ * After confirming that no one else is pinning this vma, wait for
+ * any laggards who may have crept in during the wait (through
+ * a residual pin skipping the vm->mutex) to complete.
+ */
+ ret = i915_vma_sync(vma);
+ if (ret)
+ return ret;
+
if (!drm_mm_node_allocated(&vma->node))
return 0;
+ GEM_BUG_ON(i915_vma_is_pinned(vma));
+ GEM_BUG_ON(i915_vma_is_active(vma));
+
if (i915_vma_is_map_and_fenceable(vma)) {
/*
* Check that we have flushed all writes through the GGTT
diff --git a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c
index 0dfcd178..fe85e48 100644
--- a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c
+++ b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c
@@ -486,6 +486,7 @@ static void mtk_drm_crtc_hw_config(struct mtk_drm_crtc *mtk_crtc)
}
#if IS_REACHABLE(CONFIG_MTK_CMDQ)
if (mtk_crtc->cmdq_client) {
+ mbox_flush(mtk_crtc->cmdq_client->chan, 2000);
cmdq_handle = cmdq_pkt_create(mtk_crtc->cmdq_client, PAGE_SIZE);
cmdq_pkt_clear_event(cmdq_handle, mtk_crtc->cmdq_event);
cmdq_pkt_wfe(cmdq_handle, mtk_crtc->cmdq_event);
@@ -636,10 +637,18 @@ static const struct drm_crtc_helper_funcs mtk_crtc_helper_funcs = {
static int mtk_drm_crtc_init(struct drm_device *drm,
struct mtk_drm_crtc *mtk_crtc,
- struct drm_plane *primary,
- struct drm_plane *cursor, unsigned int pipe)
+ unsigned int pipe)
{
- int ret;
+ struct drm_plane *primary = NULL;
+ struct drm_plane *cursor = NULL;
+ int i, ret;
+
+ for (i = 0; i < mtk_crtc->layer_nr; i++) {
+ if (mtk_crtc->planes[i].type == DRM_PLANE_TYPE_PRIMARY)
+ primary = &mtk_crtc->planes[i];
+ else if (mtk_crtc->planes[i].type == DRM_PLANE_TYPE_CURSOR)
+ cursor = &mtk_crtc->planes[i];
+ }
ret = drm_crtc_init_with_planes(drm, &mtk_crtc->base, primary, cursor,
&mtk_crtc_funcs, NULL);
@@ -689,11 +698,12 @@ static int mtk_drm_crtc_num_comp_planes(struct mtk_drm_crtc *mtk_crtc,
}
static inline
-enum drm_plane_type mtk_drm_crtc_plane_type(unsigned int plane_idx)
+enum drm_plane_type mtk_drm_crtc_plane_type(unsigned int plane_idx,
+ unsigned int num_planes)
{
if (plane_idx == 0)
return DRM_PLANE_TYPE_PRIMARY;
- else if (plane_idx == 1)
+ else if (plane_idx == (num_planes - 1))
return DRM_PLANE_TYPE_CURSOR;
else
return DRM_PLANE_TYPE_OVERLAY;
@@ -712,7 +722,8 @@ static int mtk_drm_crtc_init_comp_planes(struct drm_device *drm_dev,
ret = mtk_plane_init(drm_dev,
&mtk_crtc->planes[mtk_crtc->layer_nr],
BIT(pipe),
- mtk_drm_crtc_plane_type(mtk_crtc->layer_nr),
+ mtk_drm_crtc_plane_type(mtk_crtc->layer_nr,
+ num_planes),
mtk_ddp_comp_supported_rotations(comp));
if (ret)
return ret;
@@ -807,9 +818,7 @@ int mtk_drm_crtc_create(struct drm_device *drm_dev,
return ret;
}
- ret = mtk_drm_crtc_init(drm_dev, mtk_crtc, &mtk_crtc->planes[0],
- mtk_crtc->layer_nr > 1 ? &mtk_crtc->planes[1] :
- NULL, pipe);
+ ret = mtk_drm_crtc_init(drm_dev, mtk_crtc, pipe);
if (ret < 0)
return ret;
@@ -828,7 +837,8 @@ int mtk_drm_crtc_create(struct drm_device *drm_dev,
drm_crtc_index(&mtk_crtc->base));
mtk_crtc->cmdq_client = NULL;
}
- ret = of_property_read_u32_index(dev->of_node, "mediatek,gce-events",
+ ret = of_property_read_u32_index(priv->mutex_node,
+ "mediatek,gce-events",
drm_crtc_index(&mtk_crtc->base),
&mtk_crtc->cmdq_event);
if (ret)
diff --git a/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c b/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c
index 1f5a112..57c88de 100644
--- a/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c
+++ b/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c
@@ -471,6 +471,7 @@ int mtk_ddp_comp_init(struct device *dev, struct device_node *node,
/* Only DMA capable components need the LARB property */
comp->larb_dev = NULL;
if (type != MTK_DISP_OVL &&
+ type != MTK_DISP_OVL_2L &&
type != MTK_DISP_RDMA &&
type != MTK_DISP_WDMA)
return 0;
diff --git a/drivers/gpu/drm/mediatek/mtk_drm_plane.c b/drivers/gpu/drm/mediatek/mtk_drm_plane.c
index 914cc76..c2bd683 100644
--- a/drivers/gpu/drm/mediatek/mtk_drm_plane.c
+++ b/drivers/gpu/drm/mediatek/mtk_drm_plane.c
@@ -80,6 +80,7 @@ static int mtk_plane_atomic_async_check(struct drm_plane *plane,
struct drm_plane_state *state)
{
struct drm_crtc_state *crtc_state;
+ int ret;
if (plane != state->crtc->cursor)
return -EINVAL;
@@ -90,6 +91,11 @@ static int mtk_plane_atomic_async_check(struct drm_plane *plane,
if (!plane->state->fb)
return -EINVAL;
+ ret = mtk_drm_crtc_plane_check(state->crtc, plane,
+ to_mtk_plane_state(state));
+ if (ret)
+ return ret;
+
if (state->state)
crtc_state = drm_atomic_get_existing_crtc_state(state->state,
state->crtc);
@@ -115,6 +121,7 @@ static void mtk_plane_atomic_async_update(struct drm_plane *plane,
plane->state->src_y = new_state->src_y;
plane->state->src_h = new_state->src_h;
plane->state->src_w = new_state->src_w;
+ swap(plane->state->fb, new_state->fb);
state->pending.async_dirty = true;
mtk_drm_crtc_async_update(new_state->crtc, plane, new_state);
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
index 983afea..748cd37 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
@@ -796,12 +796,41 @@ bool a6xx_gmu_isidle(struct a6xx_gmu *gmu)
return true;
}
+#define GBIF_CLIENT_HALT_MASK BIT(0)
+#define GBIF_ARB_HALT_MASK BIT(1)
+
+static void a6xx_bus_clear_pending_transactions(struct adreno_gpu *adreno_gpu)
+{
+ struct msm_gpu *gpu = &adreno_gpu->base;
+
+ if (!a6xx_has_gbif(adreno_gpu)) {
+ gpu_write(gpu, REG_A6XX_VBIF_XIN_HALT_CTRL0, 0xf);
+ spin_until((gpu_read(gpu, REG_A6XX_VBIF_XIN_HALT_CTRL1) &
+ 0xf) == 0xf);
+ gpu_write(gpu, REG_A6XX_VBIF_XIN_HALT_CTRL0, 0);
+
+ return;
+ }
+
+ /* Halt new client requests on GBIF */
+ gpu_write(gpu, REG_A6XX_GBIF_HALT, GBIF_CLIENT_HALT_MASK);
+ spin_until((gpu_read(gpu, REG_A6XX_GBIF_HALT_ACK) &
+ (GBIF_CLIENT_HALT_MASK)) == GBIF_CLIENT_HALT_MASK);
+
+ /* Halt all AXI requests on GBIF */
+ gpu_write(gpu, REG_A6XX_GBIF_HALT, GBIF_ARB_HALT_MASK);
+ spin_until((gpu_read(gpu, REG_A6XX_GBIF_HALT_ACK) &
+ (GBIF_ARB_HALT_MASK)) == GBIF_ARB_HALT_MASK);
+
+ /* The GBIF halt needs to be explicitly cleared */
+ gpu_write(gpu, REG_A6XX_GBIF_HALT, 0x0);
+}
+
/* Gracefully try to shut down the GMU and by extension the GPU */
static void a6xx_gmu_shutdown(struct a6xx_gmu *gmu)
{
struct a6xx_gpu *a6xx_gpu = container_of(gmu, struct a6xx_gpu, gmu);
struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
- struct msm_gpu *gpu = &adreno_gpu->base;
u32 val;
/*
@@ -819,11 +848,7 @@ static void a6xx_gmu_shutdown(struct a6xx_gmu *gmu)
return;
}
- /* Clear the VBIF pipe before shutting down */
- gpu_write(gpu, REG_A6XX_VBIF_XIN_HALT_CTRL0, 0xf);
- spin_until((gpu_read(gpu, REG_A6XX_VBIF_XIN_HALT_CTRL1) & 0xf)
- == 0xf);
- gpu_write(gpu, REG_A6XX_VBIF_XIN_HALT_CTRL0, 0);
+ a6xx_bus_clear_pending_transactions(adreno_gpu);
/* tell the GMU we want to slumber */
a6xx_gmu_notify_slumber(gmu);
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
index daf0780..68af241 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
@@ -378,18 +378,6 @@ static int a6xx_hw_init(struct msm_gpu *gpu)
struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
int ret;
- /*
- * During a previous slumber, GBIF halt is asserted to ensure
- * no further transaction can go through GPU before GPU
- * headswitch is turned off.
- *
- * This halt is deasserted once headswitch goes off but
- * incase headswitch doesn't goes off clear GBIF halt
- * here to ensure GPU wake-up doesn't fail because of
- * halted GPU transactions.
- */
- gpu_write(gpu, REG_A6XX_GBIF_HALT, 0x0);
-
/* Make sure the GMU keeps the GPU on while we set it up */
a6xx_gmu_set_oob(&a6xx_gpu->gmu, GMU_OOB_GPU_SET);
@@ -470,10 +458,12 @@ static int a6xx_hw_init(struct msm_gpu *gpu)
/* Select CP0 to always count cycles */
gpu_write(gpu, REG_A6XX_CP_PERFCTR_CP_SEL_0, PERF_CP_ALWAYS_COUNT);
- gpu_write(gpu, REG_A6XX_RB_NC_MODE_CNTL, 2 << 1);
- gpu_write(gpu, REG_A6XX_TPL1_NC_MODE_CNTL, 2 << 1);
- gpu_write(gpu, REG_A6XX_SP_NC_MODE_CNTL, 2 << 1);
- gpu_write(gpu, REG_A6XX_UCHE_MODE_CNTL, 2 << 21);
+ if (adreno_is_a630(adreno_gpu)) {
+ gpu_write(gpu, REG_A6XX_RB_NC_MODE_CNTL, 2 << 1);
+ gpu_write(gpu, REG_A6XX_TPL1_NC_MODE_CNTL, 2 << 1);
+ gpu_write(gpu, REG_A6XX_SP_NC_MODE_CNTL, 2 << 1);
+ gpu_write(gpu, REG_A6XX_UCHE_MODE_CNTL, 2 << 21);
+ }
/* Enable fault detection */
gpu_write(gpu, REG_A6XX_RBBM_INTERFACE_HANG_INT_CNTL,
@@ -748,39 +738,6 @@ static const u32 a6xx_register_offsets[REG_ADRENO_REGISTER_MAX] = {
REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_CNTL, REG_A6XX_CP_RB_CNTL),
};
-#define GBIF_CLIENT_HALT_MASK BIT(0)
-#define GBIF_ARB_HALT_MASK BIT(1)
-
-static void a6xx_bus_clear_pending_transactions(struct adreno_gpu *adreno_gpu)
-{
- struct msm_gpu *gpu = &adreno_gpu->base;
-
- if(!a6xx_has_gbif(adreno_gpu)){
- gpu_write(gpu, REG_A6XX_VBIF_XIN_HALT_CTRL0, 0xf);
- spin_until((gpu_read(gpu, REG_A6XX_VBIF_XIN_HALT_CTRL1) &
- 0xf) == 0xf);
- gpu_write(gpu, REG_A6XX_VBIF_XIN_HALT_CTRL0, 0);
-
- return;
- }
-
- /* Halt new client requests on GBIF */
- gpu_write(gpu, REG_A6XX_GBIF_HALT, GBIF_CLIENT_HALT_MASK);
- spin_until((gpu_read(gpu, REG_A6XX_GBIF_HALT_ACK) &
- (GBIF_CLIENT_HALT_MASK)) == GBIF_CLIENT_HALT_MASK);
-
- /* Halt all AXI requests on GBIF */
- gpu_write(gpu, REG_A6XX_GBIF_HALT, GBIF_ARB_HALT_MASK);
- spin_until((gpu_read(gpu, REG_A6XX_GBIF_HALT_ACK) &
- (GBIF_ARB_HALT_MASK)) == GBIF_ARB_HALT_MASK);
-
- /*
- * GMU needs DDR access in slumber path. Deassert GBIF halt now
- * to allow for GMU to access system memory.
- */
- gpu_write(gpu, REG_A6XX_GBIF_HALT, 0x0);
-}
-
static int a6xx_pm_resume(struct msm_gpu *gpu)
{
struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
@@ -805,16 +762,6 @@ static int a6xx_pm_suspend(struct msm_gpu *gpu)
devfreq_suspend_device(gpu->devfreq.devfreq);
- /*
- * Make sure the GMU is idle before continuing (because some transitions
- * may use VBIF
- */
- a6xx_gmu_wait_for_idle(&a6xx_gpu->gmu);
-
- /* Clear the VBIF pipe before shutting down */
- /* FIXME: This accesses the GPU - do we need to make sure it is on? */
- a6xx_bus_clear_pending_transactions(adreno_gpu);
-
return a6xx_gmu_stop(a6xx_gpu);
}
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_hfi.c b/drivers/gpu/drm/msm/adreno/a6xx_hfi.c
index eda11ab..e450e0b 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_hfi.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_hfi.c
@@ -7,6 +7,7 @@
#include "a6xx_gmu.h"
#include "a6xx_gmu.xml.h"
+#include "a6xx_gpu.h"
#define HFI_MSG_ID(val) [val] = #val
@@ -216,48 +217,82 @@ static int a6xx_hfi_send_perf_table(struct a6xx_gmu *gmu)
NULL, 0);
}
-static int a6xx_hfi_send_bw_table(struct a6xx_gmu *gmu)
+static void a618_build_bw_table(struct a6xx_hfi_msg_bw_table *msg)
{
- struct a6xx_hfi_msg_bw_table msg = { 0 };
+ /* Send a single "off" entry since the 618 GMU doesn't do bus scaling */
+ msg->bw_level_num = 1;
+
+ msg->ddr_cmds_num = 3;
+ msg->ddr_wait_bitmask = 0x01;
+
+ msg->ddr_cmds_addrs[0] = 0x50000;
+ msg->ddr_cmds_addrs[1] = 0x5003c;
+ msg->ddr_cmds_addrs[2] = 0x5000c;
+
+ msg->ddr_cmds_data[0][0] = 0x40000000;
+ msg->ddr_cmds_data[0][1] = 0x40000000;
+ msg->ddr_cmds_data[0][2] = 0x40000000;
/*
- * The sdm845 GMU doesn't do bus frequency scaling on its own but it
- * does need at least one entry in the list because it might be accessed
- * when the GMU is shutting down. Send a single "off" entry.
+ * These are the CX (CNOC) votes - these are used by the GMU but the
+ * votes are known and fixed for the target
*/
+ msg->cnoc_cmds_num = 1;
+ msg->cnoc_wait_bitmask = 0x01;
- msg.bw_level_num = 1;
+ msg->cnoc_cmds_addrs[0] = 0x5007c;
+ msg->cnoc_cmds_data[0][0] = 0x40000000;
+ msg->cnoc_cmds_data[1][0] = 0x60000001;
+}
- msg.ddr_cmds_num = 3;
- msg.ddr_wait_bitmask = 0x07;
+static void a6xx_build_bw_table(struct a6xx_hfi_msg_bw_table *msg)
+{
+ /* Send a single "off" entry since the 630 GMU doesn't do bus scaling */
+ msg->bw_level_num = 1;
- msg.ddr_cmds_addrs[0] = 0x50000;
- msg.ddr_cmds_addrs[1] = 0x5005c;
- msg.ddr_cmds_addrs[2] = 0x5000c;
+ msg->ddr_cmds_num = 3;
+ msg->ddr_wait_bitmask = 0x07;
- msg.ddr_cmds_data[0][0] = 0x40000000;
- msg.ddr_cmds_data[0][1] = 0x40000000;
- msg.ddr_cmds_data[0][2] = 0x40000000;
+ msg->ddr_cmds_addrs[0] = 0x50000;
+ msg->ddr_cmds_addrs[1] = 0x5005c;
+ msg->ddr_cmds_addrs[2] = 0x5000c;
+
+ msg->ddr_cmds_data[0][0] = 0x40000000;
+ msg->ddr_cmds_data[0][1] = 0x40000000;
+ msg->ddr_cmds_data[0][2] = 0x40000000;
/*
* These are the CX (CNOC) votes. This is used but the values for the
* sdm845 GMU are known and fixed so we can hard code them.
*/
- msg.cnoc_cmds_num = 3;
- msg.cnoc_wait_bitmask = 0x05;
+ msg->cnoc_cmds_num = 3;
+ msg->cnoc_wait_bitmask = 0x05;
- msg.cnoc_cmds_addrs[0] = 0x50034;
- msg.cnoc_cmds_addrs[1] = 0x5007c;
- msg.cnoc_cmds_addrs[2] = 0x5004c;
+ msg->cnoc_cmds_addrs[0] = 0x50034;
+ msg->cnoc_cmds_addrs[1] = 0x5007c;
+ msg->cnoc_cmds_addrs[2] = 0x5004c;
- msg.cnoc_cmds_data[0][0] = 0x40000000;
- msg.cnoc_cmds_data[0][1] = 0x00000000;
- msg.cnoc_cmds_data[0][2] = 0x40000000;
+ msg->cnoc_cmds_data[0][0] = 0x40000000;
+ msg->cnoc_cmds_data[0][1] = 0x00000000;
+ msg->cnoc_cmds_data[0][2] = 0x40000000;
- msg.cnoc_cmds_data[1][0] = 0x60000001;
- msg.cnoc_cmds_data[1][1] = 0x20000001;
- msg.cnoc_cmds_data[1][2] = 0x60000001;
+ msg->cnoc_cmds_data[1][0] = 0x60000001;
+ msg->cnoc_cmds_data[1][1] = 0x20000001;
+ msg->cnoc_cmds_data[1][2] = 0x60000001;
+}
+
+
+static int a6xx_hfi_send_bw_table(struct a6xx_gmu *gmu)
+{
+ struct a6xx_hfi_msg_bw_table msg = { 0 };
+ struct a6xx_gpu *a6xx_gpu = container_of(gmu, struct a6xx_gpu, gmu);
+ struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
+
+ if (adreno_is_a618(adreno_gpu))
+ a618_build_bw_table(&msg);
+ else
+ a6xx_build_bw_table(&msg);
return a6xx_hfi_send_msg(gmu, HFI_H2F_MSG_BW_TABLE, &msg, sizeof(msg),
NULL, 0);
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_formats.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_formats.c
index 5286326..a05282d 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_formats.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_formats.c
@@ -255,13 +255,13 @@ static const struct dpu_format dpu_format_map[] = {
INTERLEAVED_RGB_FMT(RGB565,
0, COLOR_5BIT, COLOR_6BIT, COLOR_5BIT,
- C2_R_Cr, C0_G_Y, C1_B_Cb, 0, 3,
+ C1_B_Cb, C0_G_Y, C2_R_Cr, 0, 3,
false, 2, 0,
DPU_FETCH_LINEAR, 1),
INTERLEAVED_RGB_FMT(BGR565,
0, COLOR_5BIT, COLOR_6BIT, COLOR_5BIT,
- C1_B_Cb, C0_G_Y, C2_R_Cr, 0, 3,
+ C2_R_Cr, C0_G_Y, C1_B_Cb, 0, 3,
false, 2, 0,
DPU_FETCH_LINEAR, 1),
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_mdss.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_mdss.c
index 29705e7..80d3cfc 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_mdss.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_mdss.c
@@ -12,6 +12,7 @@
#define to_dpu_mdss(x) container_of(x, struct dpu_mdss, base)
+#define HW_REV 0x0
#define HW_INTR_STATUS 0x0010
/* Max BW defined in KBps */
@@ -22,6 +23,17 @@ struct dpu_irq_controller {
struct irq_domain *domain;
};
+struct dpu_hw_cfg {
+ u32 val;
+ u32 offset;
+};
+
+struct dpu_mdss_hw_init_handler {
+ u32 hw_rev;
+ u32 hw_reg_count;
+ struct dpu_hw_cfg* hw_cfg;
+};
+
struct dpu_mdss {
struct msm_mdss base;
void __iomem *mmio;
@@ -32,6 +44,44 @@ struct dpu_mdss {
u32 num_paths;
};
+static struct dpu_hw_cfg hw_cfg[] = {
+ {
+ /* UBWC global settings */
+ .val = 0x1E,
+ .offset = 0x144,
+ }
+};
+
+static struct dpu_mdss_hw_init_handler cfg_handler[] = {
+ { .hw_rev = DPU_HW_VER_620,
+ .hw_reg_count = ARRAY_SIZE(hw_cfg),
+ .hw_cfg = hw_cfg
+ },
+};
+
+static void dpu_mdss_hw_init(struct dpu_mdss *dpu_mdss, u32 hw_rev)
+{
+ int i;
+ u32 count = 0;
+ struct dpu_hw_cfg *hw_cfg = NULL;
+
+ for (i = 0; i < ARRAY_SIZE(cfg_handler); i++) {
+ if (cfg_handler[i].hw_rev == hw_rev) {
+ hw_cfg = cfg_handler[i].hw_cfg;
+ count = cfg_handler[i].hw_reg_count;
+ break;
+ }
+ }
+
+ for (i = 0; i < count; i++ ) {
+ writel_relaxed(hw_cfg->val,
+ dpu_mdss->mmio + hw_cfg->offset);
+ hw_cfg++;
+ }
+
+ return;
+}
+
static int dpu_mdss_parse_data_bus_icc_path(struct drm_device *dev,
struct dpu_mdss *dpu_mdss)
{
@@ -174,12 +224,18 @@ static int dpu_mdss_enable(struct msm_mdss *mdss)
struct dpu_mdss *dpu_mdss = to_dpu_mdss(mdss);
struct dss_module_power *mp = &dpu_mdss->mp;
int ret;
+ u32 mdss_rev;
dpu_mdss_icc_request_bw(mdss);
ret = msm_dss_enable_clk(mp->clk_config, mp->num_clk, true);
- if (ret)
+ if (ret) {
DPU_ERROR("clock enable failed, ret:%d\n", ret);
+ return ret;
+ }
+
+ mdss_rev = readl_relaxed(dpu_mdss->mmio + HW_REV);
+ dpu_mdss_hw_init(dpu_mdss, mdss_rev);
return ret;
}
diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_crtc.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_crtc.c
index 05cc04f..e1cc541 100644
--- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_crtc.c
+++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_crtc.c
@@ -1109,8 +1109,8 @@ static void mdp5_crtc_wait_for_pp_done(struct drm_crtc *crtc)
ret = wait_for_completion_timeout(&mdp5_crtc->pp_completion,
msecs_to_jiffies(50));
if (ret == 0)
- dev_warn(dev->dev, "pp done time out, lm=%d\n",
- mdp5_cstate->pipeline.mixer->lm);
+ dev_warn_ratelimited(dev->dev, "pp done time out, lm=%d\n",
+ mdp5_cstate->pipeline.mixer->lm);
}
static void mdp5_crtc_wait_for_flush_done(struct drm_crtc *crtc)
diff --git a/drivers/gpu/drm/msm/dsi/dsi_manager.c b/drivers/gpu/drm/msm/dsi/dsi_manager.c
index 104115d..4864b95 100644
--- a/drivers/gpu/drm/msm/dsi/dsi_manager.c
+++ b/drivers/gpu/drm/msm/dsi/dsi_manager.c
@@ -336,7 +336,7 @@ static int dsi_mgr_connector_get_modes(struct drm_connector *connector)
return num;
}
-static int dsi_mgr_connector_mode_valid(struct drm_connector *connector,
+static enum drm_mode_status dsi_mgr_connector_mode_valid(struct drm_connector *connector,
struct drm_display_mode *mode)
{
int id = dsi_mgr_connector_get_id(connector);
@@ -506,6 +506,7 @@ static void dsi_mgr_bridge_post_disable(struct drm_bridge *bridge)
struct msm_dsi *msm_dsi1 = dsi_mgr_get_dsi(DSI_1);
struct mipi_dsi_host *host = msm_dsi->host;
struct drm_panel *panel = msm_dsi->panel;
+ struct msm_dsi_pll *src_pll;
bool is_dual_dsi = IS_DUAL_DSI();
int ret;
@@ -539,6 +540,10 @@ static void dsi_mgr_bridge_post_disable(struct drm_bridge *bridge)
id, ret);
}
+ /* Save PLL status if it is a clock source */
+ src_pll = msm_dsi_phy_get_pll(msm_dsi->phy);
+ msm_dsi_pll_save_state(src_pll);
+
ret = msm_dsi_host_power_off(host);
if (ret)
pr_err("%s: host %d power off failed,%d\n", __func__, id, ret);
diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c
index b0cfa67..f509ebd 100644
--- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c
+++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c
@@ -724,10 +724,6 @@ void msm_dsi_phy_disable(struct msm_dsi_phy *phy)
if (!phy || !phy->cfg->ops.disable)
return;
- /* Save PLL status if it is a clock source */
- if (phy->usecase != MSM_DSI_PHY_SLAVE)
- msm_dsi_pll_save_state(phy->pll);
-
phy->cfg->ops.disable(phy);
dsi_phy_regulator_disable(phy);
diff --git a/drivers/gpu/drm/msm/dsi/pll/dsi_pll_10nm.c b/drivers/gpu/drm/msm/dsi/pll/dsi_pll_10nm.c
index 1c89454..6ac04fc 100644
--- a/drivers/gpu/drm/msm/dsi/pll/dsi_pll_10nm.c
+++ b/drivers/gpu/drm/msm/dsi/pll/dsi_pll_10nm.c
@@ -411,6 +411,12 @@ static int dsi_pll_10nm_vco_prepare(struct clk_hw *hw)
if (pll_10nm->slave)
dsi_pll_enable_pll_bias(pll_10nm->slave);
+ rc = dsi_pll_10nm_vco_set_rate(hw,pll_10nm->vco_current_rate, 0);
+ if (rc) {
+ pr_err("vco_set_rate failed, rc=%d\n", rc);
+ return rc;
+ }
+
/* Start PLL */
pll_write(pll_10nm->phy_cmn_mmio + REG_DSI_10nm_PHY_CMN_PLL_CNTRL,
0x01);
diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c
index c26219c..e4b750b 100644
--- a/drivers/gpu/drm/msm/msm_drv.c
+++ b/drivers/gpu/drm/msm/msm_drv.c
@@ -441,6 +441,14 @@ static int msm_drm_init(struct device *dev, struct drm_driver *drv)
if (ret)
goto err_msm_uninit;
+ if (!dev->dma_parms) {
+ dev->dma_parms = devm_kzalloc(dev, sizeof(*dev->dma_parms),
+ GFP_KERNEL);
+ if (!dev->dma_parms)
+ return -ENOMEM;
+ }
+ dma_set_max_seg_size(dev, DMA_BIT_MASK(32));
+
msm_gem_shrinker_init(ddev);
switch (get_mdp_ver(pdev)) {
diff --git a/drivers/gpu/drm/nouveau/dispnv50/wndw.c b/drivers/gpu/drm/nouveau/dispnv50/wndw.c
index 8903152..bb737f9 100644
--- a/drivers/gpu/drm/nouveau/dispnv50/wndw.c
+++ b/drivers/gpu/drm/nouveau/dispnv50/wndw.c
@@ -458,6 +458,8 @@ nv50_wndw_atomic_check(struct drm_plane *plane, struct drm_plane_state *state)
asyw->clr.ntfy = armw->ntfy.handle != 0;
asyw->clr.sema = armw->sema.handle != 0;
asyw->clr.xlut = armw->xlut.handle != 0;
+ if (asyw->clr.xlut && asyw->visible)
+ asyw->set.xlut = asyw->xlut.handle != 0;
asyw->clr.csc = armw->csc.valid;
if (wndw->func->image_clr)
asyw->clr.image = armw->image.handle[0] != 0;
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
index c7d700916..8ebbe16 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
@@ -2579,6 +2579,7 @@ nv166_chipset = {
static const struct nvkm_device_chip
nv167_chipset = {
.name = "TU117",
+ .acr = tu102_acr_new,
.bar = tu102_bar_new,
.bios = nvkm_bios_new,
.bus = gf100_bus_new,
@@ -2607,6 +2608,7 @@ nv167_chipset = {
.disp = tu102_disp_new,
.dma = gv100_dma_new,
.fifo = tu102_fifo_new,
+ .gr = tu102_gr_new,
.nvdec[0] = gm107_nvdec_new,
.nvenc[0] = gm107_nvenc_new,
.sec2 = tu102_sec2_new,
@@ -2615,6 +2617,7 @@ nv167_chipset = {
static const struct nvkm_device_chip
nv168_chipset = {
.name = "TU116",
+ .acr = tu102_acr_new,
.bar = tu102_bar_new,
.bios = nvkm_bios_new,
.bus = gf100_bus_new,
@@ -2643,6 +2646,7 @@ nv168_chipset = {
.disp = tu102_disp_new,
.dma = gv100_dma_new,
.fifo = tu102_fifo_new,
+ .gr = tu102_gr_new,
.nvdec[0] = gm107_nvdec_new,
.nvenc[0] = gm107_nvenc_new,
.sec2 = tu102_sec2_new,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/tu102.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/tu102.c
index 454668b..a9efa4d 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/tu102.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/tu102.c
@@ -164,6 +164,32 @@ MODULE_FIRMWARE("nvidia/tu106/gr/sw_nonctx.bin");
MODULE_FIRMWARE("nvidia/tu106/gr/sw_bundle_init.bin");
MODULE_FIRMWARE("nvidia/tu106/gr/sw_method_init.bin");
+MODULE_FIRMWARE("nvidia/tu117/gr/fecs_bl.bin");
+MODULE_FIRMWARE("nvidia/tu117/gr/fecs_inst.bin");
+MODULE_FIRMWARE("nvidia/tu117/gr/fecs_data.bin");
+MODULE_FIRMWARE("nvidia/tu117/gr/fecs_sig.bin");
+MODULE_FIRMWARE("nvidia/tu117/gr/gpccs_bl.bin");
+MODULE_FIRMWARE("nvidia/tu117/gr/gpccs_inst.bin");
+MODULE_FIRMWARE("nvidia/tu117/gr/gpccs_data.bin");
+MODULE_FIRMWARE("nvidia/tu117/gr/gpccs_sig.bin");
+MODULE_FIRMWARE("nvidia/tu117/gr/sw_ctx.bin");
+MODULE_FIRMWARE("nvidia/tu117/gr/sw_nonctx.bin");
+MODULE_FIRMWARE("nvidia/tu117/gr/sw_bundle_init.bin");
+MODULE_FIRMWARE("nvidia/tu117/gr/sw_method_init.bin");
+
+MODULE_FIRMWARE("nvidia/tu116/gr/fecs_bl.bin");
+MODULE_FIRMWARE("nvidia/tu116/gr/fecs_inst.bin");
+MODULE_FIRMWARE("nvidia/tu116/gr/fecs_data.bin");
+MODULE_FIRMWARE("nvidia/tu116/gr/fecs_sig.bin");
+MODULE_FIRMWARE("nvidia/tu116/gr/gpccs_bl.bin");
+MODULE_FIRMWARE("nvidia/tu116/gr/gpccs_inst.bin");
+MODULE_FIRMWARE("nvidia/tu116/gr/gpccs_data.bin");
+MODULE_FIRMWARE("nvidia/tu116/gr/gpccs_sig.bin");
+MODULE_FIRMWARE("nvidia/tu116/gr/sw_ctx.bin");
+MODULE_FIRMWARE("nvidia/tu116/gr/sw_nonctx.bin");
+MODULE_FIRMWARE("nvidia/tu116/gr/sw_bundle_init.bin");
+MODULE_FIRMWARE("nvidia/tu116/gr/sw_method_init.bin");
+
static const struct gf100_gr_fwif
tu102_gr_fwif[] = {
{ 0, gm200_gr_load, &tu102_gr, &gp108_gr_fecs_acr, &gp108_gr_gpccs_acr },
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/acr/tu102.c b/drivers/gpu/drm/nouveau/nvkm/subdev/acr/tu102.c
index 7f4b89d..d28d8f3 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/acr/tu102.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/acr/tu102.c
@@ -107,6 +107,12 @@ MODULE_FIRMWARE("nvidia/tu104/acr/ucode_unload.bin");
MODULE_FIRMWARE("nvidia/tu106/acr/unload_bl.bin");
MODULE_FIRMWARE("nvidia/tu106/acr/ucode_unload.bin");
+MODULE_FIRMWARE("nvidia/tu116/acr/unload_bl.bin");
+MODULE_FIRMWARE("nvidia/tu116/acr/ucode_unload.bin");
+
+MODULE_FIRMWARE("nvidia/tu117/acr/unload_bl.bin");
+MODULE_FIRMWARE("nvidia/tu117/acr/ucode_unload.bin");
+
static const struct nvkm_acr_hsf_fwif
tu102_acr_unload_fwif[] = {
{ 0, nvkm_acr_hsfw_load, &gp108_acr_unload_0 },
@@ -130,6 +136,8 @@ tu102_acr_asb_0 = {
MODULE_FIRMWARE("nvidia/tu102/acr/ucode_asb.bin");
MODULE_FIRMWARE("nvidia/tu104/acr/ucode_asb.bin");
MODULE_FIRMWARE("nvidia/tu106/acr/ucode_asb.bin");
+MODULE_FIRMWARE("nvidia/tu116/acr/ucode_asb.bin");
+MODULE_FIRMWARE("nvidia/tu117/acr/ucode_asb.bin");
static const struct nvkm_acr_hsf_fwif
tu102_acr_asb_fwif[] = {
@@ -154,6 +162,12 @@ MODULE_FIRMWARE("nvidia/tu104/acr/ucode_ahesasc.bin");
MODULE_FIRMWARE("nvidia/tu106/acr/bl.bin");
MODULE_FIRMWARE("nvidia/tu106/acr/ucode_ahesasc.bin");
+MODULE_FIRMWARE("nvidia/tu116/acr/bl.bin");
+MODULE_FIRMWARE("nvidia/tu116/acr/ucode_ahesasc.bin");
+
+MODULE_FIRMWARE("nvidia/tu117/acr/bl.bin");
+MODULE_FIRMWARE("nvidia/tu117/acr/ucode_ahesasc.bin");
+
static const struct nvkm_acr_hsf_fwif
tu102_acr_ahesasc_fwif[] = {
{ 0, nvkm_acr_hsfw_load, &tu102_acr_ahesasc_0 },
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gv100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gv100.c
index 389bad3..10ff5d05 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gv100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gv100.c
@@ -51,3 +51,5 @@ MODULE_FIRMWARE("nvidia/gv100/nvdec/scrubber.bin");
MODULE_FIRMWARE("nvidia/tu102/nvdec/scrubber.bin");
MODULE_FIRMWARE("nvidia/tu104/nvdec/scrubber.bin");
MODULE_FIRMWARE("nvidia/tu106/nvdec/scrubber.bin");
+MODULE_FIRMWARE("nvidia/tu116/nvdec/scrubber.bin");
+MODULE_FIRMWARE("nvidia/tu117/nvdec/scrubber.bin");
diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c
index 6da59f4..b7a618d 100644
--- a/drivers/gpu/drm/panfrost/panfrost_drv.c
+++ b/drivers/gpu/drm/panfrost/panfrost_drv.c
@@ -166,6 +166,7 @@ panfrost_lookup_bos(struct drm_device *dev,
break;
}
+ atomic_inc(&bo->gpu_usecount);
job->mappings[i] = mapping;
}
diff --git a/drivers/gpu/drm/panfrost/panfrost_gem.h b/drivers/gpu/drm/panfrost/panfrost_gem.h
index ca1bc90..b3517ff 100644
--- a/drivers/gpu/drm/panfrost/panfrost_gem.h
+++ b/drivers/gpu/drm/panfrost/panfrost_gem.h
@@ -30,6 +30,12 @@ struct panfrost_gem_object {
struct mutex lock;
} mappings;
+ /*
+ * Count the number of jobs referencing this BO so we don't let the
+ * shrinker reclaim this object prematurely.
+ */
+ atomic_t gpu_usecount;
+
bool noexec :1;
bool is_heap :1;
};
diff --git a/drivers/gpu/drm/panfrost/panfrost_gem_shrinker.c b/drivers/gpu/drm/panfrost/panfrost_gem_shrinker.c
index f5dd7b2..288e46c 100644
--- a/drivers/gpu/drm/panfrost/panfrost_gem_shrinker.c
+++ b/drivers/gpu/drm/panfrost/panfrost_gem_shrinker.c
@@ -41,6 +41,9 @@ static bool panfrost_gem_purge(struct drm_gem_object *obj)
struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj);
struct panfrost_gem_object *bo = to_panfrost_bo(obj);
+ if (atomic_read(&bo->gpu_usecount))
+ return false;
+
if (!mutex_trylock(&shmem->pages_lock))
return false;
diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c b/drivers/gpu/drm/panfrost/panfrost_job.c
index 7c36ec6..9a1a72a 100644
--- a/drivers/gpu/drm/panfrost/panfrost_job.c
+++ b/drivers/gpu/drm/panfrost/panfrost_job.c
@@ -269,18 +269,19 @@ static void panfrost_job_cleanup(struct kref *ref)
dma_fence_put(job->render_done_fence);
if (job->mappings) {
- for (i = 0; i < job->bo_count; i++)
+ for (i = 0; i < job->bo_count; i++) {
+ if (!job->mappings[i])
+ break;
+
+ atomic_dec(&job->mappings[i]->obj->gpu_usecount);
panfrost_gem_mapping_put(job->mappings[i]);
+ }
kvfree(job->mappings);
}
if (job->bos) {
- struct panfrost_gem_object *bo;
-
- for (i = 0; i < job->bo_count; i++) {
- bo = to_panfrost_bo(job->bos[i]);
+ for (i = 0; i < job->bo_count; i++)
drm_gem_object_put_unlocked(job->bos[i]);
- }
kvfree(job->bos);
}
diff --git a/drivers/gpu/drm/panfrost/panfrost_mmu.c b/drivers/gpu/drm/panfrost/panfrost_mmu.c
index 763cfca..5d75f8c 100644
--- a/drivers/gpu/drm/panfrost/panfrost_mmu.c
+++ b/drivers/gpu/drm/panfrost/panfrost_mmu.c
@@ -151,7 +151,12 @@ u32 panfrost_mmu_as_get(struct panfrost_device *pfdev, struct panfrost_mmu *mmu)
as = mmu->as;
if (as >= 0) {
int en = atomic_inc_return(&mmu->as_count);
- WARN_ON(en >= NUM_JOB_SLOTS);
+
+ /*
+ * AS can be retained by active jobs or a perfcnt context,
+ * hence the '+ 1' here.
+ */
+ WARN_ON(en >= (NUM_JOB_SLOTS + 1));
list_move(&mmu->list, &pfdev->as_lru_list);
goto out;
@@ -596,33 +601,27 @@ static irqreturn_t panfrost_mmu_irq_handler_thread(int irq, void *data)
source_id = (fault_status >> 16);
/* Page fault only */
- if ((status & mask) == BIT(i)) {
- WARN_ON(exception_type < 0xC1 || exception_type > 0xC4);
-
+ ret = -1;
+ if ((status & mask) == BIT(i) && (exception_type & 0xF8) == 0xC0)
ret = panfrost_mmu_map_fault_addr(pfdev, i, addr);
- if (!ret) {
- mmu_write(pfdev, MMU_INT_CLEAR, BIT(i));
- status &= ~mask;
- continue;
- }
- }
- /* terminal fault, print info about the fault */
- dev_err(pfdev->dev,
- "Unhandled Page fault in AS%d at VA 0x%016llX\n"
- "Reason: %s\n"
- "raw fault status: 0x%X\n"
- "decoded fault status: %s\n"
- "exception type 0x%X: %s\n"
- "access type 0x%X: %s\n"
- "source id 0x%X\n",
- i, addr,
- "TODO",
- fault_status,
- (fault_status & (1 << 10) ? "DECODER FAULT" : "SLAVE FAULT"),
- exception_type, panfrost_exception_name(pfdev, exception_type),
- access_type, access_type_name(pfdev, fault_status),
- source_id);
+ if (ret)
+ /* terminal fault, print info about the fault */
+ dev_err(pfdev->dev,
+ "Unhandled Page fault in AS%d at VA 0x%016llX\n"
+ "Reason: %s\n"
+ "raw fault status: 0x%X\n"
+ "decoded fault status: %s\n"
+ "exception type 0x%X: %s\n"
+ "access type 0x%X: %s\n"
+ "source id 0x%X\n",
+ i, addr,
+ "TODO",
+ fault_status,
+ (fault_status & (1 << 10) ? "DECODER FAULT" : "SLAVE FAULT"),
+ exception_type, panfrost_exception_name(pfdev, exception_type),
+ access_type, access_type_name(pfdev, fault_status),
+ source_id);
mmu_write(pfdev, MMU_INT_CLEAR, mask);
diff --git a/drivers/gpu/drm/panfrost/panfrost_perfcnt.c b/drivers/gpu/drm/panfrost/panfrost_perfcnt.c
index 6848204..6913578 100644
--- a/drivers/gpu/drm/panfrost/panfrost_perfcnt.c
+++ b/drivers/gpu/drm/panfrost/panfrost_perfcnt.c
@@ -73,7 +73,7 @@ static int panfrost_perfcnt_enable_locked(struct panfrost_device *pfdev,
struct panfrost_file_priv *user = file_priv->driver_priv;
struct panfrost_perfcnt *perfcnt = pfdev->perfcnt;
struct drm_gem_shmem_object *bo;
- u32 cfg;
+ u32 cfg, as;
int ret;
if (user == perfcnt->user)
@@ -126,12 +126,8 @@ static int panfrost_perfcnt_enable_locked(struct panfrost_device *pfdev,
perfcnt->user = user;
- /*
- * Always use address space 0 for now.
- * FIXME: this needs to be updated when we start using different
- * address space.
- */
- cfg = GPU_PERFCNT_CFG_AS(0) |
+ as = panfrost_mmu_as_get(pfdev, perfcnt->mapping->mmu);
+ cfg = GPU_PERFCNT_CFG_AS(as) |
GPU_PERFCNT_CFG_MODE(GPU_PERFCNT_CFG_MODE_MANUAL);
/*
@@ -195,6 +191,7 @@ static int panfrost_perfcnt_disable_locked(struct panfrost_device *pfdev,
drm_gem_shmem_vunmap(&perfcnt->mapping->obj->base.base, perfcnt->buf);
perfcnt->buf = NULL;
panfrost_gem_close(&perfcnt->mapping->obj->base.base, file_priv);
+ panfrost_mmu_as_put(pfdev, perfcnt->mapping->mmu);
panfrost_gem_mapping_put(perfcnt->mapping);
perfcnt->mapping = NULL;
pm_runtime_mark_last_busy(pfdev->dev);
diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c
index fd74e26..8696af1 100644
--- a/drivers/gpu/drm/radeon/radeon_drv.c
+++ b/drivers/gpu/drm/radeon/radeon_drv.c
@@ -37,6 +37,7 @@
#include <linux/vga_switcheroo.h>
#include <linux/mmu_notifier.h>
+#include <drm/drm_agpsupport.h>
#include <drm/drm_crtc_helper.h>
#include <drm/drm_drv.h>
#include <drm/drm_fb_helper.h>
@@ -325,6 +326,7 @@ static int radeon_pci_probe(struct pci_dev *pdev,
const struct pci_device_id *ent)
{
unsigned long flags = 0;
+ struct drm_device *dev;
int ret;
if (!ent)
@@ -365,7 +367,44 @@ static int radeon_pci_probe(struct pci_dev *pdev,
if (ret)
return ret;
- return drm_get_pci_dev(pdev, ent, &kms_driver);
+ dev = drm_dev_alloc(&kms_driver, &pdev->dev);
+ if (IS_ERR(dev))
+ return PTR_ERR(dev);
+
+ ret = pci_enable_device(pdev);
+ if (ret)
+ goto err_free;
+
+ dev->pdev = pdev;
+#ifdef __alpha__
+ dev->hose = pdev->sysdata;
+#endif
+
+ pci_set_drvdata(pdev, dev);
+
+ if (pci_find_capability(dev->pdev, PCI_CAP_ID_AGP))
+ dev->agp = drm_agp_init(dev);
+ if (dev->agp) {
+ dev->agp->agp_mtrr = arch_phys_wc_add(
+ dev->agp->agp_info.aper_base,
+ dev->agp->agp_info.aper_size *
+ 1024 * 1024);
+ }
+
+ ret = drm_dev_register(dev, ent->driver_data);
+ if (ret)
+ goto err_agp;
+
+ return 0;
+
+err_agp:
+ if (dev->agp)
+ arch_phys_wc_del(dev->agp->agp_mtrr);
+ kfree(dev->agp);
+ pci_disable_device(pdev);
+err_free:
+ drm_dev_put(dev);
+ return ret;
}
static void
@@ -575,7 +614,7 @@ radeon_get_crtc_scanout_position(struct drm_device *dev, unsigned int pipe,
static struct drm_driver kms_driver = {
.driver_features =
- DRIVER_USE_AGP | DRIVER_GEM | DRIVER_RENDER,
+ DRIVER_GEM | DRIVER_RENDER,
.load = radeon_driver_load_kms,
.open = radeon_driver_open_kms,
.postclose = radeon_driver_postclose_kms,
diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c
index d24f23a..dd2f19b 100644
--- a/drivers/gpu/drm/radeon/radeon_kms.c
+++ b/drivers/gpu/drm/radeon/radeon_kms.c
@@ -32,6 +32,7 @@
#include <linux/uaccess.h>
#include <linux/vga_switcheroo.h>
+#include <drm/drm_agpsupport.h>
#include <drm/drm_fb_helper.h>
#include <drm/drm_file.h>
#include <drm/drm_ioctl.h>
@@ -77,6 +78,11 @@ void radeon_driver_unload_kms(struct drm_device *dev)
radeon_modeset_fini(rdev);
radeon_device_fini(rdev);
+ if (dev->agp)
+ arch_phys_wc_del(dev->agp->agp_mtrr);
+ kfree(dev->agp);
+ dev->agp = NULL;
+
done_free:
kfree(rdev);
dev->dev_private = NULL;
diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
index 3b92311..b3380ff 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -528,7 +528,7 @@ static int radeon_ttm_tt_pin_userptr(struct ttm_tt *ttm)
r = -ENOMEM;
nents = dma_map_sg(rdev->dev, ttm->sg->sgl, ttm->sg->nents, direction);
- if (nents != ttm->sg->nents)
+ if (nents == 0)
goto release_sg;
drm_prime_sg_to_page_addr_arrays(ttm->sg, ttm->pages,
diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
index 71ce621..60c4c6a 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -661,7 +661,9 @@ static void drm_sched_process_job(struct dma_fence *f, struct dma_fence_cb *cb)
trace_drm_sched_process_job(s_fence);
+ dma_fence_get(&s_fence->finished);
drm_sched_fence_finished(s_fence);
+ dma_fence_put(&s_fence->finished);
wake_up_interruptible(&sched->wake_up_worker);
}
diff --git a/drivers/gpu/drm/selftests/drm_cmdline_selftests.h b/drivers/gpu/drm/selftests/drm_cmdline_selftests.h
index ceac7af..29e367d 100644
--- a/drivers/gpu/drm/selftests/drm_cmdline_selftests.h
+++ b/drivers/gpu/drm/selftests/drm_cmdline_selftests.h
@@ -53,6 +53,7 @@ cmdline_test(drm_cmdline_test_rotate_0)
cmdline_test(drm_cmdline_test_rotate_90)
cmdline_test(drm_cmdline_test_rotate_180)
cmdline_test(drm_cmdline_test_rotate_270)
+cmdline_test(drm_cmdline_test_rotate_multiple)
cmdline_test(drm_cmdline_test_rotate_invalid_val)
cmdline_test(drm_cmdline_test_rotate_truncated)
cmdline_test(drm_cmdline_test_hmirror)
diff --git a/drivers/gpu/drm/selftests/test-drm_cmdline_parser.c b/drivers/gpu/drm/selftests/test-drm_cmdline_parser.c
index 520f3e6..d96cd89 100644
--- a/drivers/gpu/drm/selftests/test-drm_cmdline_parser.c
+++ b/drivers/gpu/drm/selftests/test-drm_cmdline_parser.c
@@ -856,6 +856,17 @@ static int drm_cmdline_test_rotate_270(void *ignored)
return 0;
}
+static int drm_cmdline_test_rotate_multiple(void *ignored)
+{
+ struct drm_cmdline_mode mode = { };
+
+ FAIL_ON(drm_mode_parse_command_line_for_connector("720x480,rotate=0,rotate=90",
+ &no_connector,
+ &mode));
+
+ return 0;
+}
+
static int drm_cmdline_test_rotate_invalid_val(void *ignored)
{
struct drm_cmdline_mode mode = { };
@@ -888,7 +899,7 @@ static int drm_cmdline_test_hmirror(void *ignored)
FAIL_ON(!mode.specified);
FAIL_ON(mode.xres != 720);
FAIL_ON(mode.yres != 480);
- FAIL_ON(mode.rotation_reflection != DRM_MODE_REFLECT_X);
+ FAIL_ON(mode.rotation_reflection != (DRM_MODE_ROTATE_0 | DRM_MODE_REFLECT_X));
FAIL_ON(mode.refresh_specified);
@@ -913,7 +924,7 @@ static int drm_cmdline_test_vmirror(void *ignored)
FAIL_ON(!mode.specified);
FAIL_ON(mode.xres != 720);
FAIL_ON(mode.yres != 480);
- FAIL_ON(mode.rotation_reflection != DRM_MODE_REFLECT_Y);
+ FAIL_ON(mode.rotation_reflection != (DRM_MODE_ROTATE_0 | DRM_MODE_REFLECT_Y));
FAIL_ON(mode.refresh_specified);
diff --git a/drivers/gpu/drm/sun4i/sun4i_drv.c b/drivers/gpu/drm/sun4i/sun4i_drv.c
index 5ae67d5..328272f 100644
--- a/drivers/gpu/drm/sun4i/sun4i_drv.c
+++ b/drivers/gpu/drm/sun4i/sun4i_drv.c
@@ -85,7 +85,6 @@ static int sun4i_drv_bind(struct device *dev)
}
drm_mode_config_init(drm);
- drm->mode_config.allow_fb_modifiers = true;
ret = component_bind_all(drm->dev, drm);
if (ret) {
diff --git a/drivers/gpu/drm/sun4i/sun8i_mixer.c b/drivers/gpu/drm/sun4i/sun8i_mixer.c
index 7c24f8f..4a64f7a 100644
--- a/drivers/gpu/drm/sun4i/sun8i_mixer.c
+++ b/drivers/gpu/drm/sun4i/sun8i_mixer.c
@@ -107,48 +107,128 @@ static const struct de2_fmt_info de2_formats[] = {
.csc = SUN8I_CSC_MODE_OFF,
},
{
+ /* for DE2 VI layer which ignores alpha */
+ .drm_fmt = DRM_FORMAT_XRGB4444,
+ .de2_fmt = SUN8I_MIXER_FBFMT_ARGB4444,
+ .rgb = true,
+ .csc = SUN8I_CSC_MODE_OFF,
+ },
+ {
.drm_fmt = DRM_FORMAT_ABGR4444,
.de2_fmt = SUN8I_MIXER_FBFMT_ABGR4444,
.rgb = true,
.csc = SUN8I_CSC_MODE_OFF,
},
{
+ /* for DE2 VI layer which ignores alpha */
+ .drm_fmt = DRM_FORMAT_XBGR4444,
+ .de2_fmt = SUN8I_MIXER_FBFMT_ABGR4444,
+ .rgb = true,
+ .csc = SUN8I_CSC_MODE_OFF,
+ },
+ {
.drm_fmt = DRM_FORMAT_RGBA4444,
.de2_fmt = SUN8I_MIXER_FBFMT_RGBA4444,
.rgb = true,
.csc = SUN8I_CSC_MODE_OFF,
},
{
+ /* for DE2 VI layer which ignores alpha */
+ .drm_fmt = DRM_FORMAT_RGBX4444,
+ .de2_fmt = SUN8I_MIXER_FBFMT_RGBA4444,
+ .rgb = true,
+ .csc = SUN8I_CSC_MODE_OFF,
+ },
+ {
.drm_fmt = DRM_FORMAT_BGRA4444,
.de2_fmt = SUN8I_MIXER_FBFMT_BGRA4444,
.rgb = true,
.csc = SUN8I_CSC_MODE_OFF,
},
{
+ /* for DE2 VI layer which ignores alpha */
+ .drm_fmt = DRM_FORMAT_BGRX4444,
+ .de2_fmt = SUN8I_MIXER_FBFMT_BGRA4444,
+ .rgb = true,
+ .csc = SUN8I_CSC_MODE_OFF,
+ },
+ {
.drm_fmt = DRM_FORMAT_ARGB1555,
.de2_fmt = SUN8I_MIXER_FBFMT_ARGB1555,
.rgb = true,
.csc = SUN8I_CSC_MODE_OFF,
},
{
+ /* for DE2 VI layer which ignores alpha */
+ .drm_fmt = DRM_FORMAT_XRGB1555,
+ .de2_fmt = SUN8I_MIXER_FBFMT_ARGB1555,
+ .rgb = true,
+ .csc = SUN8I_CSC_MODE_OFF,
+ },
+ {
.drm_fmt = DRM_FORMAT_ABGR1555,
.de2_fmt = SUN8I_MIXER_FBFMT_ABGR1555,
.rgb = true,
.csc = SUN8I_CSC_MODE_OFF,
},
{
+ /* for DE2 VI layer which ignores alpha */
+ .drm_fmt = DRM_FORMAT_XBGR1555,
+ .de2_fmt = SUN8I_MIXER_FBFMT_ABGR1555,
+ .rgb = true,
+ .csc = SUN8I_CSC_MODE_OFF,
+ },
+ {
.drm_fmt = DRM_FORMAT_RGBA5551,
.de2_fmt = SUN8I_MIXER_FBFMT_RGBA5551,
.rgb = true,
.csc = SUN8I_CSC_MODE_OFF,
},
{
+ /* for DE2 VI layer which ignores alpha */
+ .drm_fmt = DRM_FORMAT_RGBX5551,
+ .de2_fmt = SUN8I_MIXER_FBFMT_RGBA5551,
+ .rgb = true,
+ .csc = SUN8I_CSC_MODE_OFF,
+ },
+ {
.drm_fmt = DRM_FORMAT_BGRA5551,
.de2_fmt = SUN8I_MIXER_FBFMT_BGRA5551,
.rgb = true,
.csc = SUN8I_CSC_MODE_OFF,
},
{
+ /* for DE2 VI layer which ignores alpha */
+ .drm_fmt = DRM_FORMAT_BGRX5551,
+ .de2_fmt = SUN8I_MIXER_FBFMT_BGRA5551,
+ .rgb = true,
+ .csc = SUN8I_CSC_MODE_OFF,
+ },
+ {
+ .drm_fmt = DRM_FORMAT_ARGB2101010,
+ .de2_fmt = SUN8I_MIXER_FBFMT_ARGB2101010,
+ .rgb = true,
+ .csc = SUN8I_CSC_MODE_OFF,
+ },
+ {
+ .drm_fmt = DRM_FORMAT_ABGR2101010,
+ .de2_fmt = SUN8I_MIXER_FBFMT_ABGR2101010,
+ .rgb = true,
+ .csc = SUN8I_CSC_MODE_OFF,
+ },
+ {
+ .drm_fmt = DRM_FORMAT_RGBA1010102,
+ .de2_fmt = SUN8I_MIXER_FBFMT_RGBA1010102,
+ .rgb = true,
+ .csc = SUN8I_CSC_MODE_OFF,
+ },
+ {
+ .drm_fmt = DRM_FORMAT_BGRA1010102,
+ .de2_fmt = SUN8I_MIXER_FBFMT_BGRA1010102,
+ .rgb = true,
+ .csc = SUN8I_CSC_MODE_OFF,
+ },
+ {
.drm_fmt = DRM_FORMAT_UYVY,
.de2_fmt = SUN8I_MIXER_FBFMT_UYVY,
.rgb = false,
@@ -197,12 +277,6 @@ static const struct de2_fmt_info de2_formats[] = {
.csc = SUN8I_CSC_MODE_YUV2RGB,
},
{
- .drm_fmt = DRM_FORMAT_YUV444,
- .de2_fmt = SUN8I_MIXER_FBFMT_RGB888,
- .rgb = true,
- .csc = SUN8I_CSC_MODE_YUV2RGB,
- },
- {
.drm_fmt = DRM_FORMAT_YUV422,
.de2_fmt = SUN8I_MIXER_FBFMT_YUV422,
.rgb = false,
@@ -221,12 +295,6 @@ static const struct de2_fmt_info de2_formats[] = {
.csc = SUN8I_CSC_MODE_YUV2RGB,
},
{
- .drm_fmt = DRM_FORMAT_YVU444,
- .de2_fmt = SUN8I_MIXER_FBFMT_RGB888,
- .rgb = true,
- .csc = SUN8I_CSC_MODE_YVU2RGB,
- },
- {
.drm_fmt = DRM_FORMAT_YVU422,
.de2_fmt = SUN8I_MIXER_FBFMT_YUV422,
.rgb = false,
@@ -244,6 +312,18 @@ static const struct de2_fmt_info de2_formats[] = {
.rgb = false,
.csc = SUN8I_CSC_MODE_YVU2RGB,
},
+ {
+ .drm_fmt = DRM_FORMAT_P010,
+ .de2_fmt = SUN8I_MIXER_FBFMT_P010_YUV,
+ .rgb = false,
+ .csc = SUN8I_CSC_MODE_YUV2RGB,
+ },
+ {
+ .drm_fmt = DRM_FORMAT_P210,
+ .de2_fmt = SUN8I_MIXER_FBFMT_P210_YUV,
+ .rgb = false,
+ .csc = SUN8I_CSC_MODE_YUV2RGB,
+ },
};
const struct de2_fmt_info *sun8i_mixer_format_info(u32 format)
diff --git a/drivers/gpu/drm/sun4i/sun8i_mixer.h b/drivers/gpu/drm/sun4i/sun8i_mixer.h
index c6cc940..345b28b0 100644
--- a/drivers/gpu/drm/sun4i/sun8i_mixer.h
+++ b/drivers/gpu/drm/sun4i/sun8i_mixer.h
@@ -93,6 +93,10 @@
#define SUN8I_MIXER_FBFMT_ABGR1555 17
#define SUN8I_MIXER_FBFMT_RGBA5551 18
#define SUN8I_MIXER_FBFMT_BGRA5551 19
+#define SUN8I_MIXER_FBFMT_ARGB2101010 20
+#define SUN8I_MIXER_FBFMT_ABGR2101010 21
+#define SUN8I_MIXER_FBFMT_RGBA1010102 22
+#define SUN8I_MIXER_FBFMT_BGRA1010102 23
#define SUN8I_MIXER_FBFMT_YUYV 0
#define SUN8I_MIXER_FBFMT_UYVY 1
@@ -109,6 +113,13 @@
/* format 12 is semi-planar YUV411 UVUV */
/* format 13 is semi-planar YUV411 VUVU */
#define SUN8I_MIXER_FBFMT_YUV411 14
+/* format 15 doesn't exist */
+/* format 16 is P010 YVU */
+#define SUN8I_MIXER_FBFMT_P010_YUV 17
+/* format 18 is P210 YVU */
+#define SUN8I_MIXER_FBFMT_P210_YUV 19
+/* format 20 is packed YVU444 10-bit */
+/* format 21 is packed YUV444 10-bit */
/*
* Sub-engines listed bellow are unused for now. The EN registers are here only
diff --git a/drivers/gpu/drm/sun4i/sun8i_vi_layer.c b/drivers/gpu/drm/sun4i/sun8i_vi_layer.c
index 42d445d..b8398ca 100644
--- a/drivers/gpu/drm/sun4i/sun8i_vi_layer.c
+++ b/drivers/gpu/drm/sun4i/sun8i_vi_layer.c
@@ -398,26 +398,26 @@ static const struct drm_plane_funcs sun8i_vi_layer_funcs = {
};
/*
- * While all RGB formats are supported, VI planes don't support
- * alpha blending, so there is no point having formats with alpha
- * channel if their opaque analog exist.
+ * While DE2 VI layer supports same RGB formats as UI layer, alpha
+ * channel is ignored. This structure lists all unique variants
+ * where alpha channel is replaced with "don't care" (X) channel.
*/
static const u32 sun8i_vi_layer_formats[] = {
- DRM_FORMAT_ABGR1555,
- DRM_FORMAT_ABGR4444,
- DRM_FORMAT_ARGB1555,
- DRM_FORMAT_ARGB4444,
DRM_FORMAT_BGR565,
DRM_FORMAT_BGR888,
- DRM_FORMAT_BGRA5551,
- DRM_FORMAT_BGRA4444,
+ DRM_FORMAT_BGRX4444,
+ DRM_FORMAT_BGRX5551,
DRM_FORMAT_BGRX8888,
DRM_FORMAT_RGB565,
DRM_FORMAT_RGB888,
- DRM_FORMAT_RGBA4444,
- DRM_FORMAT_RGBA5551,
+ DRM_FORMAT_RGBX4444,
+ DRM_FORMAT_RGBX5551,
DRM_FORMAT_RGBX8888,
+ DRM_FORMAT_XBGR1555,
+ DRM_FORMAT_XBGR4444,
DRM_FORMAT_XBGR8888,
+ DRM_FORMAT_XRGB1555,
+ DRM_FORMAT_XRGB4444,
DRM_FORMAT_XRGB8888,
DRM_FORMAT_NV16,
@@ -431,11 +431,53 @@ static const u32 sun8i_vi_layer_formats[] = {
DRM_FORMAT_YUV411,
DRM_FORMAT_YUV420,
DRM_FORMAT_YUV422,
- DRM_FORMAT_YUV444,
DRM_FORMAT_YVU411,
DRM_FORMAT_YVU420,
DRM_FORMAT_YVU422,
- DRM_FORMAT_YVU444,
+};
+
+static const u32 sun8i_vi_layer_de3_formats[] = {
+ DRM_FORMAT_ABGR1555,
+ DRM_FORMAT_ABGR2101010,
+ DRM_FORMAT_ABGR4444,
+ DRM_FORMAT_ABGR8888,
+ DRM_FORMAT_ARGB1555,
+ DRM_FORMAT_ARGB2101010,
+ DRM_FORMAT_ARGB4444,
+ DRM_FORMAT_ARGB8888,
+ DRM_FORMAT_BGR565,
+ DRM_FORMAT_BGR888,
+ DRM_FORMAT_BGRA1010102,
+ DRM_FORMAT_BGRA5551,
+ DRM_FORMAT_BGRA4444,
+ DRM_FORMAT_BGRA8888,
+ DRM_FORMAT_BGRX8888,
+ DRM_FORMAT_RGB565,
+ DRM_FORMAT_RGB888,
+ DRM_FORMAT_RGBA1010102,
+ DRM_FORMAT_RGBA4444,
+ DRM_FORMAT_RGBA5551,
+ DRM_FORMAT_RGBA8888,
+ DRM_FORMAT_RGBX8888,
+ DRM_FORMAT_XBGR8888,
+ DRM_FORMAT_XRGB8888,
+
+ DRM_FORMAT_NV16,
+ DRM_FORMAT_NV12,
+ DRM_FORMAT_NV21,
+ DRM_FORMAT_NV61,
+ DRM_FORMAT_P010,
+ DRM_FORMAT_P210,
+ DRM_FORMAT_UYVY,
+ DRM_FORMAT_VYUY,
+ DRM_FORMAT_YUYV,
+ DRM_FORMAT_YVYU,
+ DRM_FORMAT_YUV411,
+ DRM_FORMAT_YUV420,
+ DRM_FORMAT_YUV422,
+ DRM_FORMAT_YVU411,
+ DRM_FORMAT_YVU420,
+ DRM_FORMAT_YVU422,
};
struct sun8i_vi_layer *sun8i_vi_layer_init_one(struct drm_device *drm,
@@ -443,19 +485,27 @@ struct sun8i_vi_layer *sun8i_vi_layer_init_one(struct drm_device *drm,
int index)
{
u32 supported_encodings, supported_ranges;
+ unsigned int plane_cnt, format_count;
struct sun8i_vi_layer *layer;
- unsigned int plane_cnt;
+ const u32 *formats;
int ret;
layer = devm_kzalloc(drm->dev, sizeof(*layer), GFP_KERNEL);
if (!layer)
return ERR_PTR(-ENOMEM);
+ if (mixer->cfg->is_de3) {
+ formats = sun8i_vi_layer_de3_formats;
+ format_count = ARRAY_SIZE(sun8i_vi_layer_de3_formats);
+ } else {
+ formats = sun8i_vi_layer_formats;
+ format_count = ARRAY_SIZE(sun8i_vi_layer_formats);
+ }
+
/* possible crtcs are set later */
ret = drm_universal_plane_init(drm, &layer->plane, 0,
&sun8i_vi_layer_funcs,
- sun8i_vi_layer_formats,
- ARRAY_SIZE(sun8i_vi_layer_formats),
+ formats, format_count,
NULL, DRM_PLANE_TYPE_OVERLAY, NULL);
if (ret) {
dev_err(drm->dev, "Couldn't initialize layer\n");
diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c
index 49ed5577..953c82a 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_util.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
@@ -515,6 +515,7 @@ static int ttm_buffer_object_transfer(struct ttm_buffer_object *bo,
fbo->base.base.resv = &fbo->base.base._resv;
dma_resv_init(&fbo->base.base._resv);
+ fbo->base.base.dev = NULL;
ret = dma_resv_trylock(&fbo->base.base._resv);
WARN_ON(!ret);
diff --git a/drivers/gpu/drm/vgem/vgem_drv.c b/drivers/gpu/drm/vgem/vgem_drv.c
index 5bd60ded..909eba4 100644
--- a/drivers/gpu/drm/vgem/vgem_drv.c
+++ b/drivers/gpu/drm/vgem/vgem_drv.c
@@ -196,9 +196,10 @@ static struct drm_gem_object *vgem_gem_create(struct drm_device *dev,
return ERR_CAST(obj);
ret = drm_gem_handle_create(file, &obj->base, handle);
- drm_gem_object_put_unlocked(&obj->base);
- if (ret)
+ if (ret) {
+ drm_gem_object_put_unlocked(&obj->base);
return ERR_PTR(ret);
+ }
return &obj->base;
}
@@ -221,7 +222,9 @@ static int vgem_gem_dumb_create(struct drm_file *file, struct drm_device *dev,
args->size = gem_object->size;
args->pitch = pitch;
- DRM_DEBUG("Created object of size %lld\n", size);
+ drm_gem_object_put_unlocked(gem_object);
+
+ DRM_DEBUG("Created object of size %llu\n", args->size);
return 0;
}
diff --git a/drivers/gpu/drm/virtio/virtgpu_object.c b/drivers/gpu/drm/virtio/virtgpu_object.c
index 017a9e0..3af7ec8 100644
--- a/drivers/gpu/drm/virtio/virtgpu_object.c
+++ b/drivers/gpu/drm/virtio/virtgpu_object.c
@@ -42,8 +42,8 @@ static int virtio_gpu_resource_id_get(struct virtio_gpu_device *vgdev,
* "f91a9dd35715 Fix unlinking resources from hash
* table." (Feb 2019) fixes the bug.
*/
- static int handle;
- handle++;
+ static atomic_t seqno = ATOMIC_INIT(0);
+ int handle = atomic_inc_return(&seqno);
*resid = handle + 1;
} else {
int handle = ida_alloc(&vgdev->resource_ida, GFP_KERNEL);
@@ -99,6 +99,7 @@ struct drm_gem_object *virtio_gpu_create_object(struct drm_device *dev,
return NULL;
bo->base.base.funcs = &virtio_gpu_gem_funcs;
+ bo->base.map_cached = true;
return &bo->base.base;
}
diff --git a/drivers/hid/hid-alps.c b/drivers/hid/hid-alps.c
index ae79a7c..fa70415 100644
--- a/drivers/hid/hid-alps.c
+++ b/drivers/hid/hid-alps.c
@@ -730,7 +730,7 @@ static int alps_input_configured(struct hid_device *hdev, struct hid_input *hi)
if (data->has_sp) {
input2 = input_allocate_device();
if (!input2) {
- input_free_device(input2);
+ ret = -ENOMEM;
goto exit;
}
diff --git a/drivers/hid/hid-apple.c b/drivers/hid/hid-apple.c
index 6ac8bec..d732d1d 100644
--- a/drivers/hid/hid-apple.c
+++ b/drivers/hid/hid-apple.c
@@ -340,7 +340,8 @@ static int apple_input_mapping(struct hid_device *hdev, struct hid_input *hi,
unsigned long **bit, int *max)
{
if (usage->hid == (HID_UP_CUSTOM | 0x0003) ||
- usage->hid == (HID_UP_MSVENDOR | 0x0003)) {
+ usage->hid == (HID_UP_MSVENDOR | 0x0003) ||
+ usage->hid == (HID_UP_HPVENDOR2 | 0x0003)) {
/* The fn key on Apple USB keyboards */
set_bit(EV_REP, hi->input->evbit);
hid_map_usage_clear(hi, usage, bit, max, EV_KEY, KEY_FN);
diff --git a/drivers/hid/hid-bigbenff.c b/drivers/hid/hid-bigbenff.c
index 3f6abd1..db6da21 100644
--- a/drivers/hid/hid-bigbenff.c
+++ b/drivers/hid/hid-bigbenff.c
@@ -174,6 +174,7 @@ static __u8 pid0902_rdesc_fixed[] = {
struct bigben_device {
struct hid_device *hid;
struct hid_report *report;
+ bool removed;
u8 led_state; /* LED1 = 1 .. LED4 = 8 */
u8 right_motor_on; /* right motor off/on 0/1 */
u8 left_motor_force; /* left motor force 0-255 */
@@ -190,6 +191,9 @@ static void bigben_worker(struct work_struct *work)
struct bigben_device, worker);
struct hid_field *report_field = bigben->report->field[0];
+ if (bigben->removed)
+ return;
+
if (bigben->work_led) {
bigben->work_led = false;
report_field->value[0] = 0x01; /* 1 = led message */
@@ -220,10 +224,16 @@ static void bigben_worker(struct work_struct *work)
static int hid_bigben_play_effect(struct input_dev *dev, void *data,
struct ff_effect *effect)
{
- struct bigben_device *bigben = data;
+ struct hid_device *hid = input_get_drvdata(dev);
+ struct bigben_device *bigben = hid_get_drvdata(hid);
u8 right_motor_on;
u8 left_motor_force;
+ if (!bigben) {
+ hid_err(hid, "no device data\n");
+ return 0;
+ }
+
if (effect->type != FF_RUMBLE)
return 0;
@@ -298,8 +308,8 @@ static void bigben_remove(struct hid_device *hid)
{
struct bigben_device *bigben = hid_get_drvdata(hid);
+ bigben->removed = true;
cancel_work_sync(&bigben->worker);
- hid_hw_close(hid);
hid_hw_stop(hid);
}
@@ -319,6 +329,7 @@ static int bigben_probe(struct hid_device *hid,
return -ENOMEM;
hid_set_drvdata(hid, bigben);
bigben->hid = hid;
+ bigben->removed = false;
error = hid_parse(hid);
if (error) {
@@ -341,10 +352,10 @@ static int bigben_probe(struct hid_device *hid,
INIT_WORK(&bigben->worker, bigben_worker);
- error = input_ff_create_memless(hidinput->input, bigben,
+ error = input_ff_create_memless(hidinput->input, NULL,
hid_bigben_play_effect);
if (error)
- return error;
+ goto error_hw_stop;
name_sz = strlen(dev_name(&hid->dev)) + strlen(":red:bigben#") + 1;
@@ -354,8 +365,10 @@ static int bigben_probe(struct hid_device *hid,
sizeof(struct led_classdev) + name_sz,
GFP_KERNEL
);
- if (!led)
- return -ENOMEM;
+ if (!led) {
+ error = -ENOMEM;
+ goto error_hw_stop;
+ }
name = (void *)(&led[1]);
snprintf(name, name_sz,
"%s:red:bigben%d",
@@ -369,7 +382,7 @@ static int bigben_probe(struct hid_device *hid,
bigben->leds[n] = led;
error = devm_led_classdev_register(&hid->dev, led);
if (error)
- return error;
+ goto error_hw_stop;
}
/* initial state: LED1 is on, no rumble effect */
@@ -383,6 +396,10 @@ static int bigben_probe(struct hid_device *hid,
hid_info(hid, "LED and force feedback support for BigBen gamepad\n");
return 0;
+
+error_hw_stop:
+ hid_hw_stop(hid);
+ return error;
}
static __u8 *bigben_report_fixup(struct hid_device *hid, __u8 *rdesc,
diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c
index 851fe54..359616e 100644
--- a/drivers/hid/hid-core.c
+++ b/drivers/hid/hid-core.c
@@ -1741,7 +1741,9 @@ int hid_report_raw_event(struct hid_device *hid, int type, u8 *data, u32 size,
rsize = ((report->size - 1) >> 3) + 1;
- if (rsize > HID_MAX_BUFFER_SIZE)
+ if (report_enum->numbered && rsize >= HID_MAX_BUFFER_SIZE)
+ rsize = HID_MAX_BUFFER_SIZE - 1;
+ else if (rsize > HID_MAX_BUFFER_SIZE)
rsize = HID_MAX_BUFFER_SIZE;
if (csize < rsize) {
diff --git a/drivers/hid/hid-google-hammer.c b/drivers/hid/hid-google-hammer.c
index 2aa4ed1..85a054f 100644
--- a/drivers/hid/hid-google-hammer.c
+++ b/drivers/hid/hid-google-hammer.c
@@ -533,6 +533,8 @@ static const struct hid_device_id hammer_devices[] = {
{ HID_DEVICE(BUS_USB, HID_GROUP_GENERIC,
USB_VENDOR_ID_GOOGLE, USB_DEVICE_ID_GOOGLE_MASTERBALL) },
{ HID_DEVICE(BUS_USB, HID_GROUP_GENERIC,
+ USB_VENDOR_ID_GOOGLE, USB_DEVICE_ID_GOOGLE_MOONBALL) },
+ { HID_DEVICE(BUS_USB, HID_GROUP_GENERIC,
USB_VENDOR_ID_GOOGLE, USB_DEVICE_ID_GOOGLE_STAFF) },
{ HID_DEVICE(BUS_USB, HID_GROUP_GENERIC,
USB_VENDOR_ID_GOOGLE, USB_DEVICE_ID_GOOGLE_WAND) },
diff --git a/drivers/hid/hid-hyperv.c b/drivers/hid/hid-hyperv.c
index dddfca5..0b6ee1d 100644
--- a/drivers/hid/hid-hyperv.c
+++ b/drivers/hid/hid-hyperv.c
@@ -193,8 +193,7 @@ static void mousevsc_on_receive_device_info(struct mousevsc_dev *input_device,
goto cleanup;
/* The pointer is not NULL when we resume from hibernation */
- if (input_device->hid_desc != NULL)
- kfree(input_device->hid_desc);
+ kfree(input_device->hid_desc);
input_device->hid_desc = kmemdup(desc, desc->bLength, GFP_ATOMIC);
if (!input_device->hid_desc)
@@ -207,8 +206,7 @@ static void mousevsc_on_receive_device_info(struct mousevsc_dev *input_device,
}
/* The pointer is not NULL when we resume from hibernation */
- if (input_device->report_desc != NULL)
- kfree(input_device->report_desc);
+ kfree(input_device->report_desc);
input_device->report_desc = kzalloc(input_device->report_desc_size,
GFP_ATOMIC);
diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h
index 3a400ce..9f22134 100644
--- a/drivers/hid/hid-ids.h
+++ b/drivers/hid/hid-ids.h
@@ -478,6 +478,7 @@
#define USB_DEVICE_ID_GOOGLE_WHISKERS 0x5030
#define USB_DEVICE_ID_GOOGLE_MASTERBALL 0x503c
#define USB_DEVICE_ID_GOOGLE_MAGNEMITE 0x503d
+#define USB_DEVICE_ID_GOOGLE_MOONBALL 0x5044
#define USB_VENDOR_ID_GOTOP 0x08f2
#define USB_DEVICE_ID_SUPER_Q2 0x007f
@@ -726,6 +727,7 @@
#define USB_DEVICE_ID_LENOVO_X1_COVER 0x6085
#define USB_DEVICE_ID_LENOVO_X1_TAB 0x60a3
#define USB_DEVICE_ID_LENOVO_X1_TAB3 0x60b5
+#define USB_DEVICE_ID_LENOVO_PIXART_USB_MOUSE_608D 0x608d
#define USB_VENDOR_ID_LG 0x1fd2
#define USB_DEVICE_ID_LG_MULTITOUCH 0x0064
diff --git a/drivers/hid/hid-ite.c b/drivers/hid/hid-ite.c
index c436e12..6c55682 100644
--- a/drivers/hid/hid-ite.c
+++ b/drivers/hid/hid-ite.c
@@ -41,8 +41,9 @@ static const struct hid_device_id ite_devices[] = {
{ HID_USB_DEVICE(USB_VENDOR_ID_ITE, USB_DEVICE_ID_ITE8595) },
{ HID_USB_DEVICE(USB_VENDOR_ID_258A, USB_DEVICE_ID_258A_6A88) },
/* ITE8595 USB kbd ctlr, with Synaptics touchpad connected to it. */
- { HID_USB_DEVICE(USB_VENDOR_ID_SYNAPTICS,
- USB_DEVICE_ID_SYNAPTICS_ACER_SWITCH5_012) },
+ { HID_DEVICE(BUS_USB, HID_GROUP_GENERIC,
+ USB_VENDOR_ID_SYNAPTICS,
+ USB_DEVICE_ID_SYNAPTICS_ACER_SWITCH5_012) },
{ }
};
MODULE_DEVICE_TABLE(hid, ite_devices);
diff --git a/drivers/hid/hid-logitech-hidpp.c b/drivers/hid/hid-logitech-hidpp.c
index 70e1cb9..094f4f1 100644
--- a/drivers/hid/hid-logitech-hidpp.c
+++ b/drivers/hid/hid-logitech-hidpp.c
@@ -1256,36 +1256,35 @@ static int hidpp20_battery_map_status_voltage(u8 data[3], int *voltage,
{
int status;
- long charge_sts = (long)data[2];
+ long flags = (long) data[2];
- *level = POWER_SUPPLY_CAPACITY_LEVEL_UNKNOWN;
- switch (data[2] & 0xe0) {
- case 0x00:
- status = POWER_SUPPLY_STATUS_CHARGING;
- break;
- case 0x20:
- status = POWER_SUPPLY_STATUS_FULL;
- *level = POWER_SUPPLY_CAPACITY_LEVEL_FULL;
- break;
- case 0x40:
+ if (flags & 0x80)
+ switch (flags & 0x07) {
+ case 0:
+ status = POWER_SUPPLY_STATUS_CHARGING;
+ break;
+ case 1:
+ status = POWER_SUPPLY_STATUS_FULL;
+ *level = POWER_SUPPLY_CAPACITY_LEVEL_FULL;
+ break;
+ case 2:
+ status = POWER_SUPPLY_STATUS_NOT_CHARGING;
+ break;
+ default:
+ status = POWER_SUPPLY_STATUS_UNKNOWN;
+ break;
+ }
+ else
status = POWER_SUPPLY_STATUS_DISCHARGING;
- break;
- case 0xe0:
- status = POWER_SUPPLY_STATUS_NOT_CHARGING;
- break;
- default:
- status = POWER_SUPPLY_STATUS_UNKNOWN;
- }
*charge_type = POWER_SUPPLY_CHARGE_TYPE_STANDARD;
- if (test_bit(3, &charge_sts)) {
+ if (test_bit(3, &flags)) {
*charge_type = POWER_SUPPLY_CHARGE_TYPE_FAST;
}
- if (test_bit(4, &charge_sts)) {
+ if (test_bit(4, &flags)) {
*charge_type = POWER_SUPPLY_CHARGE_TYPE_TRICKLE;
}
-
- if (test_bit(5, &charge_sts)) {
+ if (test_bit(5, &flags)) {
*level = POWER_SUPPLY_CAPACITY_LEVEL_CRITICAL;
}
diff --git a/drivers/hid/hid-picolcd_fb.c b/drivers/hid/hid-picolcd_fb.c
index a549c42..33c102a 100644
--- a/drivers/hid/hid-picolcd_fb.c
+++ b/drivers/hid/hid-picolcd_fb.c
@@ -458,9 +458,9 @@ static ssize_t picolcd_fb_update_rate_show(struct device *dev,
if (ret >= PAGE_SIZE)
break;
else if (i == fb_update_rate)
- ret += snprintf(buf+ret, PAGE_SIZE-ret, "[%u] ", i);
+ ret += scnprintf(buf+ret, PAGE_SIZE-ret, "[%u] ", i);
else
- ret += snprintf(buf+ret, PAGE_SIZE-ret, "%u ", i);
+ ret += scnprintf(buf+ret, PAGE_SIZE-ret, "%u ", i);
if (ret > 0)
buf[min(ret, (size_t)PAGE_SIZE)-1] = '\n';
return ret;
diff --git a/drivers/hid/hid-quirks.c b/drivers/hid/hid-quirks.c
index 0e7b2d9..3735546 100644
--- a/drivers/hid/hid-quirks.c
+++ b/drivers/hid/hid-quirks.c
@@ -103,6 +103,7 @@ static const struct hid_device_id hid_quirks[] = {
{ HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_PENSKETCH_M912), HID_QUIRK_MULTI_INPUT },
{ HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_EASYPEN_M406XE), HID_QUIRK_MULTI_INPUT },
{ HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_PIXART_USB_OPTICAL_MOUSE_ID2), HID_QUIRK_ALWAYS_POLL },
+ { HID_USB_DEVICE(USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_PIXART_USB_MOUSE_608D), HID_QUIRK_ALWAYS_POLL },
{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_C007), HID_QUIRK_ALWAYS_POLL },
{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_C077), HID_QUIRK_ALWAYS_POLL },
{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_KEYBOARD_G710_PLUS), HID_QUIRK_NOGET },
diff --git a/drivers/hid/hid-sensor-custom.c b/drivers/hid/hid-sensor-custom.c
index fb827c2..4d25577 100644
--- a/drivers/hid/hid-sensor-custom.c
+++ b/drivers/hid/hid-sensor-custom.c
@@ -313,7 +313,7 @@ static ssize_t show_value(struct device *dev, struct device_attribute *attr,
while (i < ret) {
if (i + attribute->size > ret) {
- len += snprintf(&buf[len],
+ len += scnprintf(&buf[len],
PAGE_SIZE - len,
"%d ", values[i]);
break;
@@ -336,10 +336,10 @@ static ssize_t show_value(struct device *dev, struct device_attribute *attr,
++i;
break;
}
- len += snprintf(&buf[len], PAGE_SIZE - len,
+ len += scnprintf(&buf[len], PAGE_SIZE - len,
"%lld ", value);
}
- len += snprintf(&buf[len], PAGE_SIZE - len, "\n");
+ len += scnprintf(&buf[len], PAGE_SIZE - len, "\n");
return len;
} else if (input)
diff --git a/drivers/hid/i2c-hid/i2c-hid-dmi-quirks.c b/drivers/hid/i2c-hid/i2c-hid-dmi-quirks.c
index d31ea82..a66f080 100644
--- a/drivers/hid/i2c-hid/i2c-hid-dmi-quirks.c
+++ b/drivers/hid/i2c-hid/i2c-hid-dmi-quirks.c
@@ -342,6 +342,14 @@ static const struct dmi_system_id i2c_hid_dmi_desc_override_table[] = {
.driver_data = (void *)&sipodev_desc
},
{
+ .ident = "Trekstor SURFBOOK E11B",
+ .matches = {
+ DMI_EXACT_MATCH(DMI_SYS_VENDOR, "TREKSTOR"),
+ DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "SURFBOOK E11B"),
+ },
+ .driver_data = (void *)&sipodev_desc
+ },
+ {
.ident = "Direkt-Tek DTLAPY116-2",
.matches = {
DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Direkt-Tek"),
diff --git a/drivers/hid/usbhid/hiddev.c b/drivers/hid/usbhid/hiddev.c
index a970b80..4140dea 100644
--- a/drivers/hid/usbhid/hiddev.c
+++ b/drivers/hid/usbhid/hiddev.c
@@ -932,9 +932,9 @@ void hiddev_disconnect(struct hid_device *hid)
hiddev->exist = 0;
if (hiddev->open) {
- mutex_unlock(&hiddev->existancelock);
hid_hw_close(hiddev->hid);
wake_up_interruptible(&hiddev->wait);
+ mutex_unlock(&hiddev->existancelock);
} else {
mutex_unlock(&hiddev->existancelock);
kfree(hiddev);
diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig
index 47ac20a..05a3083 100644
--- a/drivers/hwmon/Kconfig
+++ b/drivers/hwmon/Kconfig
@@ -280,6 +280,15 @@
This driver can also be built as a module. If so, the module
will be called asc7621.
+config SENSORS_AXI_FAN_CONTROL
+ tristate "Analog Devices FAN Control HDL Core driver"
+ help
+ If you say yes here you get support for the Analog Devices
+ AXI HDL FAN monitoring core.
+
+ This driver can also be built as a module. If so, the module
+ will be called axi-fan-control
+
config SENSORS_K8TEMP
tristate "AMD Athlon64/FX or Opteron temperature sensor"
depends on X86 && PCI
diff --git a/drivers/hwmon/Makefile b/drivers/hwmon/Makefile
index 613f509..b0b9c8e 100644
--- a/drivers/hwmon/Makefile
+++ b/drivers/hwmon/Makefile
@@ -52,6 +52,7 @@
obj-$(CONFIG_SENSORS_ASC7621) += asc7621.o
obj-$(CONFIG_SENSORS_ASPEED) += aspeed-pwm-tacho.o
obj-$(CONFIG_SENSORS_ATXP1) += atxp1.o
+obj-$(CONFIG_SENSORS_AXI_FAN_CONTROL) += axi-fan-control.o
obj-$(CONFIG_SENSORS_CORETEMP) += coretemp.o
obj-$(CONFIG_SENSORS_DA9052_ADC)+= da9052-hwmon.o
obj-$(CONFIG_SENSORS_DA9055)+= da9055-hwmon.o
diff --git a/drivers/hwmon/acpi_power_meter.c b/drivers/hwmon/acpi_power_meter.c
index 4cf2545..0db8ef4 100644
--- a/drivers/hwmon/acpi_power_meter.c
+++ b/drivers/hwmon/acpi_power_meter.c
@@ -355,7 +355,9 @@ static ssize_t show_str(struct device *dev,
struct acpi_device *acpi_dev = to_acpi_device(dev);
struct acpi_power_meter_resource *resource = acpi_dev->driver_data;
acpi_string val;
+ int ret;
+ mutex_lock(&resource->lock);
switch (attr->index) {
case 0:
val = resource->model_number;
@@ -372,8 +374,9 @@ static ssize_t show_str(struct device *dev,
val = "";
break;
}
-
- return sprintf(buf, "%s\n", val);
+ ret = sprintf(buf, "%s\n", val);
+ mutex_unlock(&resource->lock);
+ return ret;
}
static ssize_t show_val(struct device *dev,
@@ -817,11 +820,12 @@ static void acpi_power_meter_notify(struct acpi_device *device, u32 event)
resource = acpi_driver_data(device);
- mutex_lock(&resource->lock);
switch (event) {
case METER_NOTIFY_CONFIG:
+ mutex_lock(&resource->lock);
free_capabilities(resource);
res = read_capabilities(resource);
+ mutex_unlock(&resource->lock);
if (res)
break;
@@ -830,15 +834,12 @@ static void acpi_power_meter_notify(struct acpi_device *device, u32 event)
break;
case METER_NOTIFY_TRIP:
sysfs_notify(&device->dev.kobj, NULL, POWER_AVERAGE_NAME);
- update_meter(resource);
break;
case METER_NOTIFY_CAP:
sysfs_notify(&device->dev.kobj, NULL, POWER_CAP_NAME);
- update_cap(resource);
break;
case METER_NOTIFY_INTERVAL:
sysfs_notify(&device->dev.kobj, NULL, POWER_AVG_INTERVAL_NAME);
- update_avg_interval(resource);
break;
case METER_NOTIFY_CAPPING:
sysfs_notify(&device->dev.kobj, NULL, POWER_ALARM_NAME);
@@ -848,7 +849,6 @@ static void acpi_power_meter_notify(struct acpi_device *device, u32 event)
WARN(1, "Unexpected event %d\n", event);
break;
}
- mutex_unlock(&resource->lock);
acpi_bus_generate_netlink_event(ACPI_POWER_METER_CLASS,
dev_name(&device->dev), event, 0);
@@ -912,8 +912,8 @@ static int acpi_power_meter_remove(struct acpi_device *device)
resource = acpi_driver_data(device);
hwmon_device_unregister(resource->hwmon_dev);
- free_capabilities(resource);
remove_attrs(resource);
+ free_capabilities(resource);
kfree(resource);
return 0;
diff --git a/drivers/hwmon/adt7462.c b/drivers/hwmon/adt7462.c
index 9632e2e..319a051 100644
--- a/drivers/hwmon/adt7462.c
+++ b/drivers/hwmon/adt7462.c
@@ -413,7 +413,7 @@ static int ADT7462_REG_VOLT(struct adt7462_data *data, int which)
return 0x95;
break;
}
- return -ENODEV;
+ return 0;
}
/* Provide labels for sysfs */
diff --git a/drivers/hwmon/adt7475.c b/drivers/hwmon/adt7475.c
index 01c2eeb..0540804 100644
--- a/drivers/hwmon/adt7475.c
+++ b/drivers/hwmon/adt7475.c
@@ -19,6 +19,7 @@
#include <linux/hwmon-vid.h>
#include <linux/err.h>
#include <linux/jiffies.h>
+#include <linux/of.h>
#include <linux/util_macros.h>
/* Indexes for the sysfs hooks */
@@ -193,6 +194,7 @@ struct adt7475_data {
unsigned long measure_updated;
bool valid;
+ u8 config2;
u8 config4;
u8 config5;
u8 has_voltage;
@@ -1458,6 +1460,85 @@ static int adt7475_update_limits(struct i2c_client *client)
return 0;
}
+static int set_property_bit(const struct i2c_client *client, char *property,
+ u8 *config, u8 bit_index)
+{
+ u32 prop_value = 0;
+ int ret = of_property_read_u32(client->dev.of_node, property,
+ &prop_value);
+
+ if (!ret) {
+ if (prop_value)
+ *config |= (1 << bit_index);
+ else
+ *config &= ~(1 << bit_index);
+ }
+
+ return ret;
+}
+
+static int load_attenuators(const struct i2c_client *client, int chip,
+ struct adt7475_data *data)
+{
+ int ret;
+
+ if (chip == adt7476 || chip == adt7490) {
+ set_property_bit(client, "adi,bypass-attenuator-in0",
+ &data->config4, 4);
+ set_property_bit(client, "adi,bypass-attenuator-in1",
+ &data->config4, 5);
+ set_property_bit(client, "adi,bypass-attenuator-in3",
+ &data->config4, 6);
+ set_property_bit(client, "adi,bypass-attenuator-in4",
+ &data->config4, 7);
+
+ ret = i2c_smbus_write_byte_data(client, REG_CONFIG4,
+ data->config4);
+ if (ret < 0)
+ return ret;
+ } else if (chip == adt7473 || chip == adt7475) {
+ set_property_bit(client, "adi,bypass-attenuator-in1",
+ &data->config2, 5);
+
+ ret = i2c_smbus_write_byte_data(client, REG_CONFIG2,
+ data->config2);
+ if (ret < 0)
+ return ret;
+ }
+
+ return 0;
+}
+
+static int adt7475_set_pwm_polarity(struct i2c_client *client)
+{
+ u32 states[ADT7475_PWM_COUNT];
+ int ret, i;
+ u8 val;
+
+ ret = of_property_read_u32_array(client->dev.of_node,
+ "adi,pwm-active-state", states,
+ ARRAY_SIZE(states));
+ if (ret)
+ return ret;
+
+ for (i = 0; i < ADT7475_PWM_COUNT; i++) {
+ ret = adt7475_read(PWM_CONFIG_REG(i));
+ if (ret < 0)
+ return ret;
+ val = ret;
+ if (states[i])
+ val &= ~BIT(4);
+ else
+ val |= BIT(4);
+
+ ret = i2c_smbus_write_byte_data(client, PWM_CONFIG_REG(i), val);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
static int adt7475_probe(struct i2c_client *client,
const struct i2c_device_id *id)
{
@@ -1472,7 +1553,7 @@ static int adt7475_probe(struct i2c_client *client,
struct adt7475_data *data;
struct device *hwmon_dev;
int i, ret = 0, revision, group_num = 0;
- u8 config2, config3;
+ u8 config3;
data = devm_kzalloc(&client->dev, sizeof(*data), GFP_KERNEL);
if (data == NULL)
@@ -1546,8 +1627,12 @@ static int adt7475_probe(struct i2c_client *client,
}
/* Voltage attenuators can be bypassed, globally or individually */
- config2 = adt7475_read(REG_CONFIG2);
- if (config2 & CONFIG2_ATTN) {
+ data->config2 = adt7475_read(REG_CONFIG2);
+ ret = load_attenuators(client, chip, data);
+ if (ret)
+ dev_warn(&client->dev, "Error configuring attenuator bypass\n");
+
+ if (data->config2 & CONFIG2_ATTN) {
data->bypass_attn = (0x3 << 3) | 0x3;
} else {
data->bypass_attn = ((data->config4 & CONFIG4_ATTN_IN10) >> 4) |
@@ -1562,6 +1647,10 @@ static int adt7475_probe(struct i2c_client *client,
for (i = 0; i < ADT7475_PWM_COUNT; i++)
adt7475_read_pwm(client, i);
+ ret = adt7475_set_pwm_polarity(client);
+ if (ret && ret != -EINVAL)
+ dev_warn(&client->dev, "Error configuring pwm polarity\n");
+
/* Start monitoring */
switch (chip) {
case adt7475:
diff --git a/drivers/hwmon/axi-fan-control.c b/drivers/hwmon/axi-fan-control.c
new file mode 100644
index 0000000..38d9cdb
--- /dev/null
+++ b/drivers/hwmon/axi-fan-control.c
@@ -0,0 +1,469 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Fan Control HDL CORE driver
+ *
+ * Copyright 2019 Analog Devices Inc.
+ */
+#include <linux/bits.h>
+#include <linux/clk.h>
+#include <linux/fpga/adi-axi-common.h>
+#include <linux/hwmon.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+
+#define ADI_AXI_PCORE_VER_MAJOR(version) (((version) >> 16) & 0xff)
+#define ADI_AXI_PCORE_VER_MINOR(version) (((version) >> 8) & 0xff)
+#define ADI_AXI_PCORE_VER_PATCH(version) ((version) & 0xff)
+
+/* register map */
+#define ADI_REG_RSTN 0x0080
+#define ADI_REG_PWM_WIDTH 0x0084
+#define ADI_REG_TACH_PERIOD 0x0088
+#define ADI_REG_TACH_TOLERANCE 0x008c
+#define ADI_REG_PWM_PERIOD 0x00c0
+#define ADI_REG_TACH_MEASUR 0x00c4
+#define ADI_REG_TEMPERATURE 0x00c8
+
+#define ADI_REG_IRQ_MASK 0x0040
+#define ADI_REG_IRQ_PENDING 0x0044
+#define ADI_REG_IRQ_SRC 0x0048
+
+/* IRQ sources */
+#define ADI_IRQ_SRC_PWM_CHANGED BIT(0)
+#define ADI_IRQ_SRC_TACH_ERR BIT(1)
+#define ADI_IRQ_SRC_TEMP_INCREASE BIT(2)
+#define ADI_IRQ_SRC_NEW_MEASUR BIT(3)
+#define ADI_IRQ_SRC_MASK GENMASK(3, 0)
+#define ADI_IRQ_MASK_OUT_ALL 0xFFFFFFFFU
+
+#define SYSFS_PWM_MAX 255
+
+struct axi_fan_control_data {
+ void __iomem *base;
+ struct device *hdev;
+ unsigned long clk_rate;
+ int irq;
+ /* pulses per revolution */
+ u32 ppr;
+ bool hw_pwm_req;
+ bool update_tacho_params;
+ u8 fan_fault;
+};
+
+static inline void axi_iowrite(const u32 val, const u32 reg,
+ const struct axi_fan_control_data *ctl)
+{
+ iowrite32(val, ctl->base + reg);
+}
+
+static inline u32 axi_ioread(const u32 reg,
+ const struct axi_fan_control_data *ctl)
+{
+ return ioread32(ctl->base + reg);
+}
+
+static long axi_fan_control_get_pwm_duty(const struct axi_fan_control_data *ctl)
+{
+ u32 pwm_width = axi_ioread(ADI_REG_PWM_WIDTH, ctl);
+ u32 pwm_period = axi_ioread(ADI_REG_PWM_PERIOD, ctl);
+ /*
+ * PWM_PERIOD is a RO register set by the core. It should never be 0.
+ * For now we are trusting the HW...
+ */
+ return DIV_ROUND_CLOSEST(pwm_width * SYSFS_PWM_MAX, pwm_period);
+}
+
+static int axi_fan_control_set_pwm_duty(const long val,
+ struct axi_fan_control_data *ctl)
+{
+ u32 pwm_period = axi_ioread(ADI_REG_PWM_PERIOD, ctl);
+ u32 new_width;
+ long __val = clamp_val(val, 0, SYSFS_PWM_MAX);
+
+ new_width = DIV_ROUND_CLOSEST(__val * pwm_period, SYSFS_PWM_MAX);
+
+ axi_iowrite(new_width, ADI_REG_PWM_WIDTH, ctl);
+
+ return 0;
+}
+
+static long axi_fan_control_get_fan_rpm(const struct axi_fan_control_data *ctl)
+{
+ const u32 tach = axi_ioread(ADI_REG_TACH_MEASUR, ctl);
+
+ if (tach == 0)
+ /* should we return error, EAGAIN maybe? */
+ return 0;
+ /*
+ * The tacho period should be:
+ * TACH = 60/(ppr * rpm), where rpm is revolutions per second
+ * and ppr is pulses per revolution.
+ * Given the tacho period, we can multiply it by the input clock
+ * so that we know how many clocks we need to have this period.
+ * From this, we can derive the RPM value.
+ */
+ return DIV_ROUND_CLOSEST(60 * ctl->clk_rate, ctl->ppr * tach);
+}
+
+static int axi_fan_control_read_temp(struct device *dev, u32 attr, long *val)
+{
+ struct axi_fan_control_data *ctl = dev_get_drvdata(dev);
+ long raw_temp;
+
+ switch (attr) {
+ case hwmon_temp_input:
+ raw_temp = axi_ioread(ADI_REG_TEMPERATURE, ctl);
+ /*
+ * The formula for the temperature is:
+ * T = (ADC * 501.3743 / 2^bits) - 273.6777
+ * It's multiplied by 1000 to have millidegrees as
+ * specified by the hwmon sysfs interface.
+ */
+ *val = ((raw_temp * 501374) >> 16) - 273677;
+ return 0;
+ default:
+ return -ENOTSUPP;
+ }
+}
+
+static int axi_fan_control_read_fan(struct device *dev, u32 attr, long *val)
+{
+ struct axi_fan_control_data *ctl = dev_get_drvdata(dev);
+
+ switch (attr) {
+ case hwmon_fan_fault:
+ *val = ctl->fan_fault;
+ /* clear it now */
+ ctl->fan_fault = 0;
+ return 0;
+ case hwmon_fan_input:
+ *val = axi_fan_control_get_fan_rpm(ctl);
+ return 0;
+ default:
+ return -ENOTSUPP;
+ }
+}
+
+static int axi_fan_control_read_pwm(struct device *dev, u32 attr, long *val)
+{
+ struct axi_fan_control_data *ctl = dev_get_drvdata(dev);
+
+ switch (attr) {
+ case hwmon_pwm_input:
+ *val = axi_fan_control_get_pwm_duty(ctl);
+ return 0;
+ default:
+ return -ENOTSUPP;
+ }
+}
+
+static int axi_fan_control_write_pwm(struct device *dev, u32 attr, long val)
+{
+ struct axi_fan_control_data *ctl = dev_get_drvdata(dev);
+
+ switch (attr) {
+ case hwmon_pwm_input:
+ return axi_fan_control_set_pwm_duty(val, ctl);
+ default:
+ return -ENOTSUPP;
+ }
+}
+
+static int axi_fan_control_read_labels(struct device *dev,
+ enum hwmon_sensor_types type,
+ u32 attr, int channel, const char **str)
+{
+ switch (type) {
+ case hwmon_fan:
+ *str = "FAN";
+ return 0;
+ case hwmon_temp:
+ *str = "SYSMON4";
+ return 0;
+ default:
+ return -ENOTSUPP;
+ }
+}
+
+static int axi_fan_control_read(struct device *dev,
+ enum hwmon_sensor_types type,
+ u32 attr, int channel, long *val)
+{
+ switch (type) {
+ case hwmon_fan:
+ return axi_fan_control_read_fan(dev, attr, val);
+ case hwmon_pwm:
+ return axi_fan_control_read_pwm(dev, attr, val);
+ case hwmon_temp:
+ return axi_fan_control_read_temp(dev, attr, val);
+ default:
+ return -ENOTSUPP;
+ }
+}
+
+static int axi_fan_control_write(struct device *dev,
+ enum hwmon_sensor_types type,
+ u32 attr, int channel, long val)
+{
+ switch (type) {
+ case hwmon_pwm:
+ return axi_fan_control_write_pwm(dev, attr, val);
+ default:
+ return -ENOTSUPP;
+ }
+}
+
+static umode_t axi_fan_control_fan_is_visible(const u32 attr)
+{
+ switch (attr) {
+ case hwmon_fan_input:
+ case hwmon_fan_fault:
+ case hwmon_fan_label:
+ return 0444;
+ default:
+ return 0;
+ }
+}
+
+static umode_t axi_fan_control_pwm_is_visible(const u32 attr)
+{
+ switch (attr) {
+ case hwmon_pwm_input:
+ return 0644;
+ default:
+ return 0;
+ }
+}
+
+static umode_t axi_fan_control_temp_is_visible(const u32 attr)
+{
+ switch (attr) {
+ case hwmon_temp_input:
+ case hwmon_temp_label:
+ return 0444;
+ default:
+ return 0;
+ }
+}
+
+static umode_t axi_fan_control_is_visible(const void *data,
+ enum hwmon_sensor_types type,
+ u32 attr, int channel)
+{
+ switch (type) {
+ case hwmon_fan:
+ return axi_fan_control_fan_is_visible(attr);
+ case hwmon_pwm:
+ return axi_fan_control_pwm_is_visible(attr);
+ case hwmon_temp:
+ return axi_fan_control_temp_is_visible(attr);
+ default:
+ return 0;
+ }
+}
+
+/*
+ * This core has two main ways of changing the PWM duty cycle. It is done,
+ * either by a request from userspace (writing on pwm1_input) or by the
+ * core itself. When the change is done by the core, it will use predefined
+ * parameters to evaluate the tach signal and, on that case we cannot set them.
+ * On the other hand, when the request is done by the user, with some arbitrary
+ * value that the core does not now about, we have to provide the tach
+ * parameters so that, the core can evaluate the signal. On the IRQ handler we
+ * distinguish this by using the ADI_IRQ_SRC_TEMP_INCREASE interrupt. This tell
+ * us that the CORE requested a new duty cycle. After this, there is 5s delay
+ * on which the core waits for the fan rotation speed to stabilize. After this
+ * we get ADI_IRQ_SRC_PWM_CHANGED irq where we will decide if we need to set
+ * the tach parameters or not on the next tach measurement cycle (corresponding
+ * already to the ney duty cycle) based on the %ctl->hw_pwm_req flag.
+ */
+static irqreturn_t axi_fan_control_irq_handler(int irq, void *data)
+{
+ struct axi_fan_control_data *ctl = (struct axi_fan_control_data *)data;
+ u32 irq_pending = axi_ioread(ADI_REG_IRQ_PENDING, ctl);
+ u32 clear_mask;
+
+ if (irq_pending & ADI_IRQ_SRC_NEW_MEASUR) {
+ if (ctl->update_tacho_params) {
+ u32 new_tach = axi_ioread(ADI_REG_TACH_MEASUR, ctl);
+
+ /* get 25% tolerance */
+ u32 tach_tol = DIV_ROUND_CLOSEST(new_tach * 25, 100);
+ /* set new tacho parameters */
+ axi_iowrite(new_tach, ADI_REG_TACH_PERIOD, ctl);
+ axi_iowrite(tach_tol, ADI_REG_TACH_TOLERANCE, ctl);
+ ctl->update_tacho_params = false;
+ }
+ }
+
+ if (irq_pending & ADI_IRQ_SRC_PWM_CHANGED) {
+ /*
+ * if the pwm changes on behalf of software,
+ * we need to provide new tacho parameters to the core.
+ * Wait for the next measurement for that...
+ */
+ if (!ctl->hw_pwm_req) {
+ ctl->update_tacho_params = true;
+ } else {
+ ctl->hw_pwm_req = false;
+ sysfs_notify(&ctl->hdev->kobj, NULL, "pwm1");
+ }
+ }
+
+ if (irq_pending & ADI_IRQ_SRC_TEMP_INCREASE)
+ /* hardware requested a new pwm */
+ ctl->hw_pwm_req = true;
+
+ if (irq_pending & ADI_IRQ_SRC_TACH_ERR)
+ ctl->fan_fault = 1;
+
+ /* clear all interrupts */
+ clear_mask = irq_pending & ADI_IRQ_SRC_MASK;
+ axi_iowrite(clear_mask, ADI_REG_IRQ_PENDING, ctl);
+
+ return IRQ_HANDLED;
+}
+
+static int axi_fan_control_init(struct axi_fan_control_data *ctl,
+ const struct device_node *np)
+{
+ int ret;
+
+ /* get fan pulses per revolution */
+ ret = of_property_read_u32(np, "pulses-per-revolution", &ctl->ppr);
+ if (ret)
+ return ret;
+
+ /* 1, 2 and 4 are the typical and accepted values */
+ if (ctl->ppr != 1 && ctl->ppr != 2 && ctl->ppr != 4)
+ return -EINVAL;
+ /*
+ * Enable all IRQs
+ */
+ axi_iowrite(ADI_IRQ_MASK_OUT_ALL &
+ ~(ADI_IRQ_SRC_NEW_MEASUR | ADI_IRQ_SRC_TACH_ERR |
+ ADI_IRQ_SRC_PWM_CHANGED | ADI_IRQ_SRC_TEMP_INCREASE),
+ ADI_REG_IRQ_MASK, ctl);
+
+ /* bring the device out of reset */
+ axi_iowrite(0x01, ADI_REG_RSTN, ctl);
+
+ return ret;
+}
+
+static const struct hwmon_channel_info *axi_fan_control_info[] = {
+ HWMON_CHANNEL_INFO(pwm, HWMON_PWM_INPUT),
+ HWMON_CHANNEL_INFO(fan, HWMON_F_INPUT | HWMON_F_FAULT | HWMON_F_LABEL),
+ HWMON_CHANNEL_INFO(temp, HWMON_T_INPUT | HWMON_T_LABEL),
+ NULL
+};
+
+static const struct hwmon_ops axi_fan_control_hwmon_ops = {
+ .is_visible = axi_fan_control_is_visible,
+ .read = axi_fan_control_read,
+ .write = axi_fan_control_write,
+ .read_string = axi_fan_control_read_labels,
+};
+
+static const struct hwmon_chip_info axi_chip_info = {
+ .ops = &axi_fan_control_hwmon_ops,
+ .info = axi_fan_control_info,
+};
+
+static const u32 version_1_0_0 = ADI_AXI_PCORE_VER(1, 0, 'a');
+
+static const struct of_device_id axi_fan_control_of_match[] = {
+ { .compatible = "adi,axi-fan-control-1.00.a",
+ .data = (void *)&version_1_0_0},
+ {},
+};
+MODULE_DEVICE_TABLE(of, axi_fan_control_of_match);
+
+static int axi_fan_control_probe(struct platform_device *pdev)
+{
+ struct axi_fan_control_data *ctl;
+ struct clk *clk;
+ const struct of_device_id *id;
+ const char *name = "axi_fan_control";
+ u32 version;
+ int ret;
+
+ id = of_match_node(axi_fan_control_of_match, pdev->dev.of_node);
+ if (!id)
+ return -EINVAL;
+
+ ctl = devm_kzalloc(&pdev->dev, sizeof(*ctl), GFP_KERNEL);
+ if (!ctl)
+ return -ENOMEM;
+
+ ctl->base = devm_platform_ioremap_resource(pdev, 0);
+ if (IS_ERR(ctl->base))
+ return PTR_ERR(ctl->base);
+
+ clk = devm_clk_get(&pdev->dev, NULL);
+ if (IS_ERR(clk)) {
+ dev_err(&pdev->dev, "clk_get failed with %ld\n", PTR_ERR(clk));
+ return PTR_ERR(clk);
+ }
+
+ ctl->clk_rate = clk_get_rate(clk);
+ if (!ctl->clk_rate)
+ return -EINVAL;
+
+ version = axi_ioread(ADI_AXI_REG_VERSION, ctl);
+ if (ADI_AXI_PCORE_VER_MAJOR(version) !=
+ ADI_AXI_PCORE_VER_MAJOR((*(u32 *)id->data))) {
+ dev_err(&pdev->dev, "Major version mismatch. Expected %d.%.2d.%c, Reported %d.%.2d.%c\n",
+ ADI_AXI_PCORE_VER_MAJOR((*(u32 *)id->data)),
+ ADI_AXI_PCORE_VER_MINOR((*(u32 *)id->data)),
+ ADI_AXI_PCORE_VER_PATCH((*(u32 *)id->data)),
+ ADI_AXI_PCORE_VER_MAJOR(version),
+ ADI_AXI_PCORE_VER_MINOR(version),
+ ADI_AXI_PCORE_VER_PATCH(version));
+ return -ENODEV;
+ }
+
+ ctl->irq = platform_get_irq(pdev, 0);
+ if (ctl->irq < 0)
+ return ctl->irq;
+
+ ret = devm_request_threaded_irq(&pdev->dev, ctl->irq, NULL,
+ axi_fan_control_irq_handler,
+ IRQF_ONESHOT | IRQF_TRIGGER_HIGH,
+ pdev->driver_override, ctl);
+ if (ret) {
+ dev_err(&pdev->dev, "failed to request an irq, %d", ret);
+ return ret;
+ }
+
+ ret = axi_fan_control_init(ctl, pdev->dev.of_node);
+ if (ret) {
+ dev_err(&pdev->dev, "Failed to initialize device\n");
+ return ret;
+ }
+
+ ctl->hdev = devm_hwmon_device_register_with_info(&pdev->dev,
+ name,
+ ctl,
+ &axi_chip_info,
+ NULL);
+
+ return PTR_ERR_OR_ZERO(ctl->hdev);
+}
+
+static struct platform_driver axi_fan_control_driver = {
+ .driver = {
+ .name = "axi_fan_control_driver",
+ .of_match_table = axi_fan_control_of_match,
+ },
+ .probe = axi_fan_control_probe,
+};
+module_platform_driver(axi_fan_control_driver);
+
+MODULE_AUTHOR("Nuno Sa <nuno.sa@analog.com>");
+MODULE_DESCRIPTION("Analog Devices Fan Control HDL CORE driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/hwmon/ibmaem.c b/drivers/hwmon/ibmaem.c
index d05ab71..a4ec852 100644
--- a/drivers/hwmon/ibmaem.c
+++ b/drivers/hwmon/ibmaem.c
@@ -219,7 +219,7 @@ struct aem_read_sensor_req {
struct aem_read_sensor_resp {
struct aem_iana_id id;
- u8 bytes[0];
+ u8 bytes[];
} __packed;
/* Data structures to talk to the IPMI layer */
diff --git a/drivers/hwmon/ibmpowernv.c b/drivers/hwmon/ibmpowernv.c
index 0e525cf..a750647 100644
--- a/drivers/hwmon/ibmpowernv.c
+++ b/drivers/hwmon/ibmpowernv.c
@@ -186,7 +186,7 @@ static void make_sensor_label(struct device_node *np,
u32 id;
size_t n;
- n = snprintf(sdata->label, sizeof(sdata->label), "%s", label);
+ n = scnprintf(sdata->label, sizeof(sdata->label), "%s", label);
/*
* Core temp pretty print
@@ -199,11 +199,11 @@ static void make_sensor_label(struct device_node *np,
* The digital thermal sensors are associated
* with a core.
*/
- n += snprintf(sdata->label + n,
+ n += scnprintf(sdata->label + n,
sizeof(sdata->label) - n, " %d",
cpuid);
else
- n += snprintf(sdata->label + n,
+ n += scnprintf(sdata->label + n,
sizeof(sdata->label) - n, " phy%d", id);
}
@@ -211,7 +211,7 @@ static void make_sensor_label(struct device_node *np,
* Membuffer pretty print
*/
if (!of_property_read_u32(np, "ibm,chip-id", &id))
- n += snprintf(sdata->label + n, sizeof(sdata->label) - n,
+ n += scnprintf(sdata->label + n, sizeof(sdata->label) - n,
" %d", id & 0xffff);
}
diff --git a/drivers/hwmon/k10temp.c b/drivers/hwmon/k10temp.c
index e39354f..3f37d5d 100644
--- a/drivers/hwmon/k10temp.c
+++ b/drivers/hwmon/k10temp.c
@@ -96,13 +96,20 @@ struct k10temp_data {
void (*read_tempreg)(struct pci_dev *pdev, u32 *regval);
int temp_offset;
u32 temp_adjust_mask;
- bool show_tdie;
- u32 show_tccd;
+ u32 show_temp;
u32 svi_addr[2];
+ bool is_zen;
bool show_current;
int cfactor[2];
};
+#define TCTL_BIT 0
+#define TDIE_BIT 1
+#define TCCD_BIT(x) ((x) + 2)
+
+#define HAVE_TEMP(d, channel) ((d)->show_temp & BIT(channel))
+#define HAVE_TDIE(d) HAVE_TEMP(d, TDIE_BIT)
+
struct tctl_offset {
u8 model;
char const *id;
@@ -180,8 +187,8 @@ static long get_raw_temp(struct k10temp_data *data)
}
const char *k10temp_temp_label[] = {
- "Tdie",
"Tctl",
+ "Tdie",
"Tccd1",
"Tccd2",
"Tccd3",
@@ -269,13 +276,13 @@ static int k10temp_read_temp(struct device *dev, u32 attr, int channel,
switch (attr) {
case hwmon_temp_input:
switch (channel) {
- case 0: /* Tdie */
- *val = get_raw_temp(data) - data->temp_offset;
+ case 0: /* Tctl */
+ *val = get_raw_temp(data);
if (*val < 0)
*val = 0;
break;
- case 1: /* Tctl */
- *val = get_raw_temp(data);
+ case 1: /* Tdie */
+ *val = get_raw_temp(data) - data->temp_offset;
if (*val < 0)
*val = 0;
break;
@@ -333,23 +340,11 @@ static umode_t k10temp_is_visible(const void *_data,
case hwmon_temp:
switch (attr) {
case hwmon_temp_input:
- switch (channel) {
- case 0: /* Tdie, or Tctl if we don't show it */
- break;
- case 1: /* Tctl */
- if (!data->show_tdie)
- return 0;
- break;
- case 2 ... 9: /* Tccd{1-8} */
- if (!(data->show_tccd & BIT(channel - 2)))
- return 0;
- break;
- default:
+ if (!HAVE_TEMP(data, channel))
return 0;
- }
break;
case hwmon_temp_max:
- if (channel || data->show_tdie)
+ if (channel || data->is_zen)
return 0;
break;
case hwmon_temp_crit:
@@ -368,20 +363,9 @@ static umode_t k10temp_is_visible(const void *_data,
return 0;
break;
case hwmon_temp_label:
- /* No labels if we don't show the die temperature */
- if (!data->show_tdie)
+ /* Show temperature labels only on Zen CPUs */
+ if (!data->is_zen || !HAVE_TEMP(data, channel))
return 0;
- switch (channel) {
- case 0: /* Tdie */
- case 1: /* Tctl */
- break;
- case 2 ... 9: /* Tccd{1-8} */
- if (!(data->show_tccd & BIT(channel - 2)))
- return 0;
- break;
- default:
- return 0;
- }
break;
default:
return 0;
@@ -480,7 +464,7 @@ static void k10temp_init_debugfs(struct k10temp_data *data)
char name[32];
/* Only show debugfs data for Family 17h/18h CPUs */
- if (!data->show_tdie)
+ if (!data->is_zen)
return;
scnprintf(name, sizeof(name), "k10temp-%s", pci_name(data->pdev));
@@ -546,7 +530,7 @@ static void k10temp_get_ccd_support(struct pci_dev *pdev,
amd_smn_read(amd_pci_dev_to_node_id(pdev),
F17H_M70H_CCD_TEMP(i), ®val);
if (regval & F17H_M70H_CCD_TEMP_VALID)
- data->show_tccd |= BIT(i);
+ data->show_temp |= BIT(TCCD_BIT(i));
}
}
@@ -573,6 +557,7 @@ static int k10temp_probe(struct pci_dev *pdev, const struct pci_device_id *id)
return -ENOMEM;
data->pdev = pdev;
+ data->show_temp |= BIT(TCTL_BIT); /* Always show Tctl */
if (boot_cpu_data.x86 == 0x15 &&
((boot_cpu_data.x86_model & 0xf0) == 0x60 ||
@@ -582,7 +567,8 @@ static int k10temp_probe(struct pci_dev *pdev, const struct pci_device_id *id)
} else if (boot_cpu_data.x86 == 0x17 || boot_cpu_data.x86 == 0x18) {
data->temp_adjust_mask = CUR_TEMP_RANGE_SEL_MASK;
data->read_tempreg = read_tempreg_nb_f17;
- data->show_tdie = true;
+ data->show_temp |= BIT(TDIE_BIT); /* show Tdie */
+ data->is_zen = true;
switch (boot_cpu_data.x86_model) {
case 0x1: /* Zen */
diff --git a/drivers/hwmon/lm73.c b/drivers/hwmon/lm73.c
index 1eeb9d7..733c48b 100644
--- a/drivers/hwmon/lm73.c
+++ b/drivers/hwmon/lm73.c
@@ -262,10 +262,20 @@ static int lm73_detect(struct i2c_client *new_client,
return 0;
}
+static const struct of_device_id lm73_of_match[] = {
+ {
+ .compatible = "ti,lm73",
+ },
+ { },
+};
+
+MODULE_DEVICE_TABLE(of, lm73_of_match);
+
static struct i2c_driver lm73_driver = {
.class = I2C_CLASS_HWMON,
.driver = {
.name = "lm73",
+ .of_match_table = lm73_of_match,
},
.probe = lm73_probe,
.id_table = lm73_ids,
diff --git a/drivers/hwmon/nct7904.c b/drivers/hwmon/nct7904.c
index 281c81e..1f5743d 100644
--- a/drivers/hwmon/nct7904.c
+++ b/drivers/hwmon/nct7904.c
@@ -7,6 +7,11 @@
*
* Copyright (c) 2019 Advantech
* Author: Amy.Shih <amy.shih@advantech.com.tw>
+ *
+ * Supports the following chips:
+ *
+ * Chip #vin #fan #pwm #temp #dts chip ID
+ * nct7904d 20 12 4 5 8 0xc5
*/
#include <linux/module.h>
@@ -820,6 +825,10 @@ static const struct hwmon_channel_info *nct7904_info[] = {
HWMON_F_INPUT | HWMON_F_MIN | HWMON_F_ALARM,
HWMON_F_INPUT | HWMON_F_MIN | HWMON_F_ALARM,
HWMON_F_INPUT | HWMON_F_MIN | HWMON_F_ALARM,
+ HWMON_F_INPUT | HWMON_F_MIN | HWMON_F_ALARM,
+ HWMON_F_INPUT | HWMON_F_MIN | HWMON_F_ALARM,
+ HWMON_F_INPUT | HWMON_F_MIN | HWMON_F_ALARM,
+ HWMON_F_INPUT | HWMON_F_MIN | HWMON_F_ALARM,
HWMON_F_INPUT | HWMON_F_MIN | HWMON_F_ALARM),
HWMON_CHANNEL_INFO(pwm,
HWMON_PWM_INPUT | HWMON_PWM_ENABLE,
@@ -853,6 +862,18 @@ static const struct hwmon_channel_info *nct7904_info[] = {
HWMON_T_CRIT_HYST,
HWMON_T_INPUT | HWMON_T_ALARM | HWMON_T_MAX |
HWMON_T_MAX_HYST | HWMON_T_TYPE | HWMON_T_CRIT |
+ HWMON_T_CRIT_HYST,
+ HWMON_T_INPUT | HWMON_T_ALARM | HWMON_T_MAX |
+ HWMON_T_MAX_HYST | HWMON_T_TYPE | HWMON_T_CRIT |
+ HWMON_T_CRIT_HYST,
+ HWMON_T_INPUT | HWMON_T_ALARM | HWMON_T_MAX |
+ HWMON_T_MAX_HYST | HWMON_T_TYPE | HWMON_T_CRIT |
+ HWMON_T_CRIT_HYST,
+ HWMON_T_INPUT | HWMON_T_ALARM | HWMON_T_MAX |
+ HWMON_T_MAX_HYST | HWMON_T_TYPE | HWMON_T_CRIT |
+ HWMON_T_CRIT_HYST,
+ HWMON_T_INPUT | HWMON_T_ALARM | HWMON_T_MAX |
+ HWMON_T_MAX_HYST | HWMON_T_TYPE | HWMON_T_CRIT |
HWMON_T_CRIT_HYST),
NULL
};
diff --git a/drivers/hwmon/pmbus/Kconfig b/drivers/hwmon/pmbus/Kconfig
index a9ea062..de12a56 100644
--- a/drivers/hwmon/pmbus/Kconfig
+++ b/drivers/hwmon/pmbus/Kconfig
@@ -92,10 +92,10 @@
be called irps5401.
config SENSORS_ISL68137
- tristate "Intersil ISL68137"
+ tristate "Renesas Digital Multiphase Voltage Regulators"
help
- If you say yes here you get hardware monitoring support for Intersil
- ISL68137.
+ If you say yes here you get hardware monitoring support for Renesas
+ digital multiphase voltage regulators.
This driver can also be built as a module. If so, the module will
be called isl68137.
@@ -113,8 +113,8 @@
tristate "Linear Technologies LTC2978 and compatibles"
help
If you say yes here you get hardware monitoring support for Linear
- Technology LTC2974, LTC2975, LTC2977, LTC2978, LTC2980, LTC3880,
- LTC3883, LTC3886, LTC3887, LTCM2987, LTM4675, and LTM4676.
+ Technology LTC2972, LTC2974, LTC2975, LTC2977, LTC2978, LTC2979,
+ LTC2980, and LTM2987.
This driver can also be built as a module. If so, the module will
be called ltc2978.
@@ -123,9 +123,10 @@
bool "Regulator support for LTC2978 and compatibles"
depends on SENSORS_LTC2978 && REGULATOR
help
- If you say yes here you get regulator support for Linear
- Technology LTC2974, LTC2977, LTC2978, LTC3880, LTC3883, LTM4676
- and LTM4686.
+ If you say yes here you get regulator support for Linear Technology
+ LTC3880, LTC3883, LTC3884, LTC3886, LTC3887, LTC3889, LTC7880,
+ LTM4644, LTM4675, LTM4676, LTM4677, LTM4678, LTM4680, LTM4686,
+ and LTM4700.
config SENSORS_LTC3815
tristate "Linear Technologies LTC3815"
@@ -209,10 +210,10 @@
be called tps40422.
config SENSORS_TPS53679
- tristate "TI TPS53679, TPS53688"
+ tristate "TI TPS53647, TPS53667, TPS53679, TPS53681, TPS53688"
help
If you say yes here you get hardware monitoring support for TI
- TPS53679, TPS53688
+ TPS53647, TPS53667, TPS53679, TPS53681, and TPS53688.
This driver can also be built as a module. If so, the module will
be called tps53679.
diff --git a/drivers/hwmon/pmbus/adm1275.c b/drivers/hwmon/pmbus/adm1275.c
index 5caa37fb..e25f541 100644
--- a/drivers/hwmon/pmbus/adm1275.c
+++ b/drivers/hwmon/pmbus/adm1275.c
@@ -226,7 +226,8 @@ static int adm1275_write_pmon_config(const struct adm1275_data *data,
return ret;
}
-static int adm1275_read_word_data(struct i2c_client *client, int page, int reg)
+static int adm1275_read_word_data(struct i2c_client *client, int page,
+ int phase, int reg)
{
const struct pmbus_driver_info *info = pmbus_get_driver_info(client);
const struct adm1275_data *data = to_adm1275_data(info);
@@ -239,58 +240,68 @@ static int adm1275_read_word_data(struct i2c_client *client, int page, int reg)
case PMBUS_IOUT_UC_FAULT_LIMIT:
if (!data->have_uc_fault)
return -ENXIO;
- ret = pmbus_read_word_data(client, 0, ADM1275_IOUT_WARN2_LIMIT);
+ ret = pmbus_read_word_data(client, 0, 0xff,
+ ADM1275_IOUT_WARN2_LIMIT);
break;
case PMBUS_IOUT_OC_FAULT_LIMIT:
if (!data->have_oc_fault)
return -ENXIO;
- ret = pmbus_read_word_data(client, 0, ADM1275_IOUT_WARN2_LIMIT);
+ ret = pmbus_read_word_data(client, 0, 0xff,
+ ADM1275_IOUT_WARN2_LIMIT);
break;
case PMBUS_VOUT_OV_WARN_LIMIT:
if (data->have_vout)
return -ENODATA;
- ret = pmbus_read_word_data(client, 0,
+ ret = pmbus_read_word_data(client, 0, 0xff,
ADM1075_VAUX_OV_WARN_LIMIT);
break;
case PMBUS_VOUT_UV_WARN_LIMIT:
if (data->have_vout)
return -ENODATA;
- ret = pmbus_read_word_data(client, 0,
+ ret = pmbus_read_word_data(client, 0, 0xff,
ADM1075_VAUX_UV_WARN_LIMIT);
break;
case PMBUS_READ_VOUT:
if (data->have_vout)
return -ENODATA;
- ret = pmbus_read_word_data(client, 0, ADM1075_READ_VAUX);
+ ret = pmbus_read_word_data(client, 0, 0xff,
+ ADM1075_READ_VAUX);
break;
case PMBUS_VIRT_READ_IOUT_MIN:
if (!data->have_iout_min)
return -ENXIO;
- ret = pmbus_read_word_data(client, 0, ADM1293_IOUT_MIN);
+ ret = pmbus_read_word_data(client, 0, 0xff,
+ ADM1293_IOUT_MIN);
break;
case PMBUS_VIRT_READ_IOUT_MAX:
- ret = pmbus_read_word_data(client, 0, ADM1275_PEAK_IOUT);
+ ret = pmbus_read_word_data(client, 0, 0xff,
+ ADM1275_PEAK_IOUT);
break;
case PMBUS_VIRT_READ_VOUT_MAX:
- ret = pmbus_read_word_data(client, 0, ADM1275_PEAK_VOUT);
+ ret = pmbus_read_word_data(client, 0, 0xff,
+ ADM1275_PEAK_VOUT);
break;
case PMBUS_VIRT_READ_VIN_MAX:
- ret = pmbus_read_word_data(client, 0, ADM1275_PEAK_VIN);
+ ret = pmbus_read_word_data(client, 0, 0xff,
+ ADM1275_PEAK_VIN);
break;
case PMBUS_VIRT_READ_PIN_MIN:
if (!data->have_pin_min)
return -ENXIO;
- ret = pmbus_read_word_data(client, 0, ADM1293_PIN_MIN);
+ ret = pmbus_read_word_data(client, 0, 0xff,
+ ADM1293_PIN_MIN);
break;
case PMBUS_VIRT_READ_PIN_MAX:
if (!data->have_pin_max)
return -ENXIO;
- ret = pmbus_read_word_data(client, 0, ADM1276_PEAK_PIN);
+ ret = pmbus_read_word_data(client, 0, 0xff,
+ ADM1276_PEAK_PIN);
break;
case PMBUS_VIRT_READ_TEMP_MAX:
if (!data->have_temp_max)
return -ENXIO;
- ret = pmbus_read_word_data(client, 0, ADM1278_PEAK_TEMP);
+ ret = pmbus_read_word_data(client, 0, 0xff,
+ ADM1278_PEAK_TEMP);
break;
case PMBUS_VIRT_RESET_IOUT_HISTORY:
case PMBUS_VIRT_RESET_VOUT_HISTORY:
diff --git a/drivers/hwmon/pmbus/ibm-cffps.c b/drivers/hwmon/pmbus/ibm-cffps.c
index 3795fe5..7d300f2 100644
--- a/drivers/hwmon/pmbus/ibm-cffps.c
+++ b/drivers/hwmon/pmbus/ibm-cffps.c
@@ -33,9 +33,12 @@
#define CFFPS_INPUT_HISTORY_CMD 0xD6
#define CFFPS_INPUT_HISTORY_SIZE 100
+#define CFFPS_CCIN_REVISION GENMASK(7, 0)
+#define CFFPS_CCIN_REVISION_LEGACY 0xde
#define CFFPS_CCIN_VERSION GENMASK(15, 8)
#define CFFPS_CCIN_VERSION_1 0x2b
#define CFFPS_CCIN_VERSION_2 0x2e
+#define CFFPS_CCIN_VERSION_3 0x51
/* STATUS_MFR_SPECIFIC bits */
#define CFFPS_MFR_FAN_FAULT BIT(0)
@@ -148,7 +151,7 @@ static ssize_t ibm_cffps_debugfs_read(struct file *file, char __user *buf,
struct ibm_cffps *psu = to_psu(idxp, idx);
char data[I2C_SMBUS_BLOCK_MAX + 2] = { 0 };
- pmbus_set_page(psu->client, 0);
+ pmbus_set_page(psu->client, 0, 0xff);
switch (idx) {
case CFFPS_DEBUGFS_INPUT_HISTORY:
@@ -247,7 +250,7 @@ static ssize_t ibm_cffps_debugfs_write(struct file *file,
switch (idx) {
case CFFPS_DEBUGFS_ON_OFF_CONFIG:
- pmbus_set_page(psu->client, 0);
+ pmbus_set_page(psu->client, 0, 0xff);
rc = simple_write_to_buffer(&data, 1, ppos, buf, count);
if (rc <= 0)
@@ -325,13 +328,13 @@ static int ibm_cffps_read_byte_data(struct i2c_client *client, int page,
}
static int ibm_cffps_read_word_data(struct i2c_client *client, int page,
- int reg)
+ int phase, int reg)
{
int rc, mfr;
switch (reg) {
case PMBUS_STATUS_WORD:
- rc = pmbus_read_word_data(client, page, reg);
+ rc = pmbus_read_word_data(client, page, phase, reg);
if (rc < 0)
return rc;
@@ -348,7 +351,8 @@ static int ibm_cffps_read_word_data(struct i2c_client *client, int page,
rc |= PB_STATUS_OFF;
break;
case PMBUS_VIRT_READ_VMON:
- rc = pmbus_read_word_data(client, page, CFFPS_12VCS_VOUT_CMD);
+ rc = pmbus_read_word_data(client, page, phase,
+ CFFPS_12VCS_VOUT_CMD);
break;
default:
rc = -ENODATA;
@@ -379,7 +383,7 @@ static int ibm_cffps_led_brightness_set(struct led_classdev *led_cdev,
dev_dbg(&psu->client->dev, "LED brightness set: %d. Command: %d.\n",
brightness, next_led_state);
- pmbus_set_page(psu->client, 0);
+ pmbus_set_page(psu->client, 0, 0xff);
rc = i2c_smbus_write_byte_data(psu->client, CFFPS_SYS_CONFIG_CMD,
next_led_state);
@@ -401,7 +405,7 @@ static int ibm_cffps_led_blink_set(struct led_classdev *led_cdev,
dev_dbg(&psu->client->dev, "LED blink set.\n");
- pmbus_set_page(psu->client, 0);
+ pmbus_set_page(psu->client, 0, 0xff);
rc = i2c_smbus_write_byte_data(psu->client, CFFPS_SYS_CONFIG_CMD,
CFFPS_LED_BLINK);
@@ -485,11 +489,14 @@ static int ibm_cffps_probe(struct i2c_client *client,
vs = (enum versions)id->driver_data;
if (vs == cffps_unknown) {
+ u16 ccin_revision = 0;
u16 ccin_version = CFFPS_CCIN_VERSION_1;
int ccin = i2c_smbus_read_word_swapped(client, CFFPS_CCIN_CMD);
- if (ccin > 0)
+ if (ccin > 0) {
+ ccin_revision = FIELD_GET(CFFPS_CCIN_REVISION, ccin);
ccin_version = FIELD_GET(CFFPS_CCIN_VERSION, ccin);
+ }
switch (ccin_version) {
default:
@@ -499,6 +506,12 @@ static int ibm_cffps_probe(struct i2c_client *client,
case CFFPS_CCIN_VERSION_2:
vs = cffps2;
break;
+ case CFFPS_CCIN_VERSION_3:
+ if (ccin_revision == CFFPS_CCIN_REVISION_LEGACY)
+ vs = cffps1;
+ else
+ vs = cffps2;
+ break;
}
/* Set the client name to include the version number. */
diff --git a/drivers/hwmon/pmbus/ir35221.c b/drivers/hwmon/pmbus/ir35221.c
index 0d878bc..3eea3e0 100644
--- a/drivers/hwmon/pmbus/ir35221.c
+++ b/drivers/hwmon/pmbus/ir35221.c
@@ -21,37 +21,42 @@
#define IR35221_MFR_IOUT_VALLEY 0xcb
#define IR35221_MFR_TEMP_VALLEY 0xcc
-static int ir35221_read_word_data(struct i2c_client *client, int page, int reg)
+static int ir35221_read_word_data(struct i2c_client *client, int page,
+ int phase, int reg)
{
int ret;
switch (reg) {
case PMBUS_VIRT_READ_VIN_MAX:
- ret = pmbus_read_word_data(client, page, IR35221_MFR_VIN_PEAK);
+ ret = pmbus_read_word_data(client, page, phase,
+ IR35221_MFR_VIN_PEAK);
break;
case PMBUS_VIRT_READ_VOUT_MAX:
- ret = pmbus_read_word_data(client, page, IR35221_MFR_VOUT_PEAK);
+ ret = pmbus_read_word_data(client, page, phase,
+ IR35221_MFR_VOUT_PEAK);
break;
case PMBUS_VIRT_READ_IOUT_MAX:
- ret = pmbus_read_word_data(client, page, IR35221_MFR_IOUT_PEAK);
+ ret = pmbus_read_word_data(client, page, phase,
+ IR35221_MFR_IOUT_PEAK);
break;
case PMBUS_VIRT_READ_TEMP_MAX:
- ret = pmbus_read_word_data(client, page, IR35221_MFR_TEMP_PEAK);
+ ret = pmbus_read_word_data(client, page, phase,
+ IR35221_MFR_TEMP_PEAK);
break;
case PMBUS_VIRT_READ_VIN_MIN:
- ret = pmbus_read_word_data(client, page,
+ ret = pmbus_read_word_data(client, page, phase,
IR35221_MFR_VIN_VALLEY);
break;
case PMBUS_VIRT_READ_VOUT_MIN:
- ret = pmbus_read_word_data(client, page,
+ ret = pmbus_read_word_data(client, page, phase,
IR35221_MFR_VOUT_VALLEY);
break;
case PMBUS_VIRT_READ_IOUT_MIN:
- ret = pmbus_read_word_data(client, page,
+ ret = pmbus_read_word_data(client, page, phase,
IR35221_MFR_IOUT_VALLEY);
break;
case PMBUS_VIRT_READ_TEMP_MIN:
- ret = pmbus_read_word_data(client, page,
+ ret = pmbus_read_word_data(client, page, phase,
IR35221_MFR_TEMP_VALLEY);
break;
default:
diff --git a/drivers/hwmon/pmbus/isl68137.c b/drivers/hwmon/pmbus/isl68137.c
index 515596c..4d23152 100644
--- a/drivers/hwmon/pmbus/isl68137.c
+++ b/drivers/hwmon/pmbus/isl68137.c
@@ -1,8 +1,9 @@
// SPDX-License-Identifier: GPL-2.0+
/*
- * Hardware monitoring driver for Intersil ISL68137
+ * Hardware monitoring driver for Renesas Digital Multiphase Voltage Regulators
*
* Copyright (c) 2017 Google Inc
+ * Copyright (c) 2020 Renesas Electronics America
*
*/
@@ -14,9 +15,19 @@
#include <linux/module.h>
#include <linux/string.h>
#include <linux/sysfs.h>
+
#include "pmbus.h"
#define ISL68137_VOUT_AVS 0x30
+#define RAA_DMPVR2_READ_VMON 0xc8
+
+enum versions {
+ isl68137,
+ raa_dmpvr2_1rail,
+ raa_dmpvr2_2rail,
+ raa_dmpvr2_3rail,
+ raa_dmpvr2_hv,
+};
static ssize_t isl68137_avs_enable_show_page(struct i2c_client *client,
int page,
@@ -49,7 +60,8 @@ static ssize_t isl68137_avs_enable_store_page(struct i2c_client *client,
* enabling AVS control is the workaround.
*/
if (op_val == ISL68137_VOUT_AVS) {
- rc = pmbus_read_word_data(client, page, PMBUS_VOUT_COMMAND);
+ rc = pmbus_read_word_data(client, page, 0xff,
+ PMBUS_VOUT_COMMAND);
if (rc < 0)
return rc;
@@ -98,13 +110,31 @@ static const struct attribute_group enable_group = {
.attrs = enable_attrs,
};
-static const struct attribute_group *attribute_groups[] = {
+static const struct attribute_group *isl68137_attribute_groups[] = {
&enable_group,
NULL,
};
-static struct pmbus_driver_info isl68137_info = {
- .pages = 2,
+static int raa_dmpvr2_read_word_data(struct i2c_client *client, int page,
+ int phase, int reg)
+{
+ int ret;
+
+ switch (reg) {
+ case PMBUS_VIRT_READ_VMON:
+ ret = pmbus_read_word_data(client, page, phase,
+ RAA_DMPVR2_READ_VMON);
+ break;
+ default:
+ ret = -ENODATA;
+ break;
+ }
+
+ return ret;
+}
+
+static struct pmbus_driver_info raa_dmpvr_info = {
+ .pages = 3,
.format[PSC_VOLTAGE_IN] = direct,
.format[PSC_VOLTAGE_OUT] = direct,
.format[PSC_CURRENT_IN] = direct,
@@ -113,7 +143,7 @@ static struct pmbus_driver_info isl68137_info = {
.format[PSC_TEMPERATURE] = direct,
.m[PSC_VOLTAGE_IN] = 1,
.b[PSC_VOLTAGE_IN] = 0,
- .R[PSC_VOLTAGE_IN] = 3,
+ .R[PSC_VOLTAGE_IN] = 2,
.m[PSC_VOLTAGE_OUT] = 1,
.b[PSC_VOLTAGE_OUT] = 0,
.R[PSC_VOLTAGE_OUT] = 3,
@@ -133,24 +163,76 @@ static struct pmbus_driver_info isl68137_info = {
| PMBUS_HAVE_STATUS_INPUT | PMBUS_HAVE_TEMP | PMBUS_HAVE_TEMP2
| PMBUS_HAVE_TEMP3 | PMBUS_HAVE_STATUS_TEMP
| PMBUS_HAVE_VOUT | PMBUS_HAVE_STATUS_VOUT
- | PMBUS_HAVE_IOUT | PMBUS_HAVE_STATUS_IOUT | PMBUS_HAVE_POUT,
- .func[1] = PMBUS_HAVE_VOUT | PMBUS_HAVE_STATUS_VOUT
- | PMBUS_HAVE_IOUT | PMBUS_HAVE_STATUS_IOUT | PMBUS_HAVE_POUT,
- .groups = attribute_groups,
+ | PMBUS_HAVE_IOUT | PMBUS_HAVE_STATUS_IOUT | PMBUS_HAVE_POUT
+ | PMBUS_HAVE_VMON,
+ .func[1] = PMBUS_HAVE_IIN | PMBUS_HAVE_PIN | PMBUS_HAVE_STATUS_INPUT
+ | PMBUS_HAVE_TEMP | PMBUS_HAVE_TEMP3 | PMBUS_HAVE_STATUS_TEMP
+ | PMBUS_HAVE_VOUT | PMBUS_HAVE_STATUS_VOUT | PMBUS_HAVE_IOUT
+ | PMBUS_HAVE_STATUS_IOUT | PMBUS_HAVE_POUT,
+ .func[2] = PMBUS_HAVE_IIN | PMBUS_HAVE_PIN | PMBUS_HAVE_STATUS_INPUT
+ | PMBUS_HAVE_TEMP | PMBUS_HAVE_TEMP3 | PMBUS_HAVE_STATUS_TEMP
+ | PMBUS_HAVE_VOUT | PMBUS_HAVE_STATUS_VOUT | PMBUS_HAVE_IOUT
+ | PMBUS_HAVE_STATUS_IOUT | PMBUS_HAVE_POUT,
};
static int isl68137_probe(struct i2c_client *client,
const struct i2c_device_id *id)
{
- return pmbus_do_probe(client, id, &isl68137_info);
+ struct pmbus_driver_info *info;
+
+ info = devm_kzalloc(&client->dev, sizeof(*info), GFP_KERNEL);
+ if (!info)
+ return -ENOMEM;
+ memcpy(info, &raa_dmpvr_info, sizeof(*info));
+
+ switch (id->driver_data) {
+ case isl68137:
+ info->pages = 2;
+ info->R[PSC_VOLTAGE_IN] = 3;
+ info->func[0] &= ~PMBUS_HAVE_VMON;
+ info->func[1] = PMBUS_HAVE_VOUT | PMBUS_HAVE_STATUS_VOUT
+ | PMBUS_HAVE_IOUT | PMBUS_HAVE_STATUS_IOUT
+ | PMBUS_HAVE_POUT;
+ info->groups = isl68137_attribute_groups;
+ break;
+ case raa_dmpvr2_1rail:
+ info->pages = 1;
+ info->read_word_data = raa_dmpvr2_read_word_data;
+ break;
+ case raa_dmpvr2_2rail:
+ info->pages = 2;
+ info->read_word_data = raa_dmpvr2_read_word_data;
+ break;
+ case raa_dmpvr2_3rail:
+ info->read_word_data = raa_dmpvr2_read_word_data;
+ break;
+ case raa_dmpvr2_hv:
+ info->pages = 1;
+ info->R[PSC_VOLTAGE_IN] = 1;
+ info->m[PSC_VOLTAGE_OUT] = 2;
+ info->R[PSC_VOLTAGE_OUT] = 2;
+ info->m[PSC_CURRENT_IN] = 2;
+ info->m[PSC_POWER] = 2;
+ info->R[PSC_POWER] = -1;
+ info->read_word_data = raa_dmpvr2_read_word_data;
+ break;
+ default:
+ return -ENODEV;
+ }
+
+ return pmbus_do_probe(client, id, info);
}
-static const struct i2c_device_id isl68137_id[] = {
- {"isl68137", 0},
+static const struct i2c_device_id raa_dmpvr_id[] = {
+ {"isl68137", isl68137},
+ {"raa_dmpvr2_1rail", raa_dmpvr2_1rail},
+ {"raa_dmpvr2_2rail", raa_dmpvr2_2rail},
+ {"raa_dmpvr2_3rail", raa_dmpvr2_3rail},
+ {"raa_dmpvr2_hv", raa_dmpvr2_hv},
{}
};
-MODULE_DEVICE_TABLE(i2c, isl68137_id);
+MODULE_DEVICE_TABLE(i2c, raa_dmpvr_id);
/* This is the driver that will be inserted */
static struct i2c_driver isl68137_driver = {
@@ -159,11 +241,11 @@ static struct i2c_driver isl68137_driver = {
},
.probe = isl68137_probe,
.remove = pmbus_do_remove,
- .id_table = isl68137_id,
+ .id_table = raa_dmpvr_id,
};
module_i2c_driver(isl68137_driver);
MODULE_AUTHOR("Maxim Sloyko <maxims@google.com>");
-MODULE_DESCRIPTION("PMBus driver for Intersil ISL68137");
+MODULE_DESCRIPTION("PMBus driver for Renesas digital multiphase voltage regulators");
MODULE_LICENSE("GPL");
diff --git a/drivers/hwmon/pmbus/lm25066.c b/drivers/hwmon/pmbus/lm25066.c
index 05fce86..9e4cf08 100644
--- a/drivers/hwmon/pmbus/lm25066.c
+++ b/drivers/hwmon/pmbus/lm25066.c
@@ -211,7 +211,8 @@ struct lm25066_data {
#define to_lm25066_data(x) container_of(x, struct lm25066_data, info)
-static int lm25066_read_word_data(struct i2c_client *client, int page, int reg)
+static int lm25066_read_word_data(struct i2c_client *client, int page,
+ int phase, int reg)
{
const struct pmbus_driver_info *info = pmbus_get_driver_info(client);
const struct lm25066_data *data = to_lm25066_data(info);
@@ -219,7 +220,7 @@ static int lm25066_read_word_data(struct i2c_client *client, int page, int reg)
switch (reg) {
case PMBUS_VIRT_READ_VMON:
- ret = pmbus_read_word_data(client, 0, LM25066_READ_VAUX);
+ ret = pmbus_read_word_data(client, 0, 0xff, LM25066_READ_VAUX);
if (ret < 0)
break;
/* Adjust returned value to match VIN coefficients */
@@ -244,33 +245,40 @@ static int lm25066_read_word_data(struct i2c_client *client, int page, int reg)
}
break;
case PMBUS_READ_IIN:
- ret = pmbus_read_word_data(client, 0, LM25066_MFR_READ_IIN);
+ ret = pmbus_read_word_data(client, 0, 0xff,
+ LM25066_MFR_READ_IIN);
break;
case PMBUS_READ_PIN:
- ret = pmbus_read_word_data(client, 0, LM25066_MFR_READ_PIN);
+ ret = pmbus_read_word_data(client, 0, 0xff,
+ LM25066_MFR_READ_PIN);
break;
case PMBUS_IIN_OC_WARN_LIMIT:
- ret = pmbus_read_word_data(client, 0,
+ ret = pmbus_read_word_data(client, 0, 0xff,
LM25066_MFR_IIN_OC_WARN_LIMIT);
break;
case PMBUS_PIN_OP_WARN_LIMIT:
- ret = pmbus_read_word_data(client, 0,
+ ret = pmbus_read_word_data(client, 0, 0xff,
LM25066_MFR_PIN_OP_WARN_LIMIT);
break;
case PMBUS_VIRT_READ_VIN_AVG:
- ret = pmbus_read_word_data(client, 0, LM25066_READ_AVG_VIN);
+ ret = pmbus_read_word_data(client, 0, 0xff,
+ LM25066_READ_AVG_VIN);
break;
case PMBUS_VIRT_READ_VOUT_AVG:
- ret = pmbus_read_word_data(client, 0, LM25066_READ_AVG_VOUT);
+ ret = pmbus_read_word_data(client, 0, 0xff,
+ LM25066_READ_AVG_VOUT);
break;
case PMBUS_VIRT_READ_IIN_AVG:
- ret = pmbus_read_word_data(client, 0, LM25066_READ_AVG_IIN);
+ ret = pmbus_read_word_data(client, 0, 0xff,
+ LM25066_READ_AVG_IIN);
break;
case PMBUS_VIRT_READ_PIN_AVG:
- ret = pmbus_read_word_data(client, 0, LM25066_READ_AVG_PIN);
+ ret = pmbus_read_word_data(client, 0, 0xff,
+ LM25066_READ_AVG_PIN);
break;
case PMBUS_VIRT_READ_PIN_MAX:
- ret = pmbus_read_word_data(client, 0, LM25066_READ_PIN_PEAK);
+ ret = pmbus_read_word_data(client, 0, 0xff,
+ LM25066_READ_PIN_PEAK);
break;
case PMBUS_VIRT_RESET_PIN_HISTORY:
ret = 0;
@@ -288,13 +296,14 @@ static int lm25066_read_word_data(struct i2c_client *client, int page, int reg)
return ret;
}
-static int lm25056_read_word_data(struct i2c_client *client, int page, int reg)
+static int lm25056_read_word_data(struct i2c_client *client, int page,
+ int phase, int reg)
{
int ret;
switch (reg) {
case PMBUS_VIRT_VMON_UV_WARN_LIMIT:
- ret = pmbus_read_word_data(client, 0,
+ ret = pmbus_read_word_data(client, 0, 0xff,
LM25056_VAUX_UV_WARN_LIMIT);
if (ret < 0)
break;
@@ -302,7 +311,7 @@ static int lm25056_read_word_data(struct i2c_client *client, int page, int reg)
ret = DIV_ROUND_CLOSEST(ret * 293, 6140);
break;
case PMBUS_VIRT_VMON_OV_WARN_LIMIT:
- ret = pmbus_read_word_data(client, 0,
+ ret = pmbus_read_word_data(client, 0, 0xff,
LM25056_VAUX_OV_WARN_LIMIT);
if (ret < 0)
break;
@@ -310,7 +319,7 @@ static int lm25056_read_word_data(struct i2c_client *client, int page, int reg)
ret = DIV_ROUND_CLOSEST(ret * 293, 6140);
break;
default:
- ret = lm25066_read_word_data(client, page, reg);
+ ret = lm25066_read_word_data(client, page, phase, reg);
break;
}
return ret;
diff --git a/drivers/hwmon/pmbus/ltc2978.c b/drivers/hwmon/pmbus/ltc2978.c
index f01f488..7b0e6b3 100644
--- a/drivers/hwmon/pmbus/ltc2978.c
+++ b/drivers/hwmon/pmbus/ltc2978.c
@@ -19,8 +19,15 @@
#include <linux/regulator/driver.h>
#include "pmbus.h"
-enum chips { ltc2974, ltc2975, ltc2977, ltc2978, ltc2980, ltc3880, ltc3882,
- ltc3883, ltc3886, ltc3887, ltm2987, ltm4675, ltm4676, ltm4686 };
+enum chips {
+ /* Managers */
+ ltc2972, ltc2974, ltc2975, ltc2977, ltc2978, ltc2979, ltc2980,
+ /* Controllers */
+ ltc3880, ltc3882, ltc3883, ltc3884, ltc3886, ltc3887, ltc3889, ltc7880,
+ /* Modules */
+ ltm2987, ltm4664, ltm4675, ltm4676, ltm4677, ltm4678, ltm4680, ltm4686,
+ ltm4700,
+};
/* Common for all chips */
#define LTC2978_MFR_VOUT_PEAK 0xdd
@@ -43,9 +50,10 @@ enum chips { ltc2974, ltc2975, ltc2977, ltc2978, ltc2980, ltc3880, ltc3882,
#define LTC3880_MFR_CLEAR_PEAKS 0xe3
#define LTC3880_MFR_TEMPERATURE2_PEAK 0xf4
-/* LTC3883 and LTC3886 only */
+/* LTC3883, LTC3884, LTC3886, LTC3889 and LTC7880 only */
#define LTC3883_MFR_IIN_PEAK 0xe1
+
/* LTC2975 only */
#define LTC2975_MFR_IIN_PEAK 0xc4
#define LTC2975_MFR_IIN_MIN 0xc5
@@ -54,27 +62,41 @@ enum chips { ltc2974, ltc2975, ltc2977, ltc2978, ltc2980, ltc3880, ltc3882,
#define LTC2978_ID_MASK 0xfff0
+#define LTC2972_ID 0x0310
#define LTC2974_ID 0x0210
#define LTC2975_ID 0x0220
#define LTC2977_ID 0x0130
#define LTC2978_ID_REV1 0x0110 /* Early revision */
#define LTC2978_ID_REV2 0x0120
+#define LTC2979_ID_A 0x8060
+#define LTC2979_ID_B 0x8070
#define LTC2980_ID_A 0x8030 /* A/B for two die IDs */
#define LTC2980_ID_B 0x8040
#define LTC3880_ID 0x4020
#define LTC3882_ID 0x4200
#define LTC3882_ID_D1 0x4240 /* Dash 1 */
#define LTC3883_ID 0x4300
+#define LTC3884_ID 0x4C00
#define LTC3886_ID 0x4600
#define LTC3887_ID 0x4700
#define LTM2987_ID_A 0x8010 /* A/B for two die IDs */
#define LTM2987_ID_B 0x8020
+#define LTC3889_ID 0x4900
+#define LTC7880_ID 0x49E0
+#define LTM4664_ID 0x4120
#define LTM4675_ID 0x47a0
#define LTM4676_ID_REV1 0x4400
#define LTM4676_ID_REV2 0x4480
#define LTM4676A_ID 0x47e0
+#define LTM4677_ID_REV1 0x47B0
+#define LTM4677_ID_REV2 0x47D0
+#define LTM4678_ID_REV1 0x4100
+#define LTM4678_ID_REV2 0x4110
+#define LTM4680_ID 0x4140
#define LTM4686_ID 0x4770
+#define LTM4700_ID 0x4130
+#define LTC2972_NUM_PAGES 2
#define LTC2974_NUM_PAGES 4
#define LTC2978_NUM_PAGES 8
#define LTC3880_NUM_PAGES 2
@@ -82,8 +104,8 @@ enum chips { ltc2974, ltc2975, ltc2977, ltc2978, ltc2980, ltc3880, ltc3882,
#define LTC_POLL_TIMEOUT 100 /* in milli-seconds */
-#define LTC_NOT_BUSY BIT(5)
-#define LTC_NOT_PENDING BIT(4)
+#define LTC_NOT_BUSY BIT(6)
+#define LTC_NOT_PENDING BIT(5)
/*
* LTC2978 clears peak data whenever the CLEAR_FAULTS command is executed, which
@@ -151,7 +173,8 @@ static int ltc_wait_ready(struct i2c_client *client)
return -ETIMEDOUT;
}
-static int ltc_read_word_data(struct i2c_client *client, int page, int reg)
+static int ltc_read_word_data(struct i2c_client *client, int page, int phase,
+ int reg)
{
int ret;
@@ -159,7 +182,7 @@ static int ltc_read_word_data(struct i2c_client *client, int page, int reg)
if (ret < 0)
return ret;
- return pmbus_read_word_data(client, page, reg);
+ return pmbus_read_word_data(client, page, 0xff, reg);
}
static int ltc_read_byte_data(struct i2c_client *client, int page, int reg)
@@ -202,7 +225,7 @@ static int ltc_get_max(struct ltc2978_data *data, struct i2c_client *client,
{
int ret;
- ret = ltc_read_word_data(client, page, reg);
+ ret = ltc_read_word_data(client, page, 0xff, reg);
if (ret >= 0) {
if (lin11_to_val(ret) > lin11_to_val(*pmax))
*pmax = ret;
@@ -216,7 +239,7 @@ static int ltc_get_min(struct ltc2978_data *data, struct i2c_client *client,
{
int ret;
- ret = ltc_read_word_data(client, page, reg);
+ ret = ltc_read_word_data(client, page, 0xff, reg);
if (ret >= 0) {
if (lin11_to_val(ret) < lin11_to_val(*pmin))
*pmin = ret;
@@ -238,7 +261,8 @@ static int ltc2978_read_word_data_common(struct i2c_client *client, int page,
&data->vin_max);
break;
case PMBUS_VIRT_READ_VOUT_MAX:
- ret = ltc_read_word_data(client, page, LTC2978_MFR_VOUT_PEAK);
+ ret = ltc_read_word_data(client, page, 0xff,
+ LTC2978_MFR_VOUT_PEAK);
if (ret >= 0) {
/*
* VOUT is 16 bit unsigned with fixed exponent,
@@ -269,7 +293,8 @@ static int ltc2978_read_word_data_common(struct i2c_client *client, int page,
return ret;
}
-static int ltc2978_read_word_data(struct i2c_client *client, int page, int reg)
+static int ltc2978_read_word_data(struct i2c_client *client, int page,
+ int phase, int reg)
{
const struct pmbus_driver_info *info = pmbus_get_driver_info(client);
struct ltc2978_data *data = to_ltc2978_data(info);
@@ -281,7 +306,8 @@ static int ltc2978_read_word_data(struct i2c_client *client, int page, int reg)
&data->vin_min);
break;
case PMBUS_VIRT_READ_VOUT_MIN:
- ret = ltc_read_word_data(client, page, LTC2978_MFR_VOUT_MIN);
+ ret = ltc_read_word_data(client, page, phase,
+ LTC2978_MFR_VOUT_MIN);
if (ret >= 0) {
/*
* VOUT_MIN is known to not be supported on some lots
@@ -314,7 +340,8 @@ static int ltc2978_read_word_data(struct i2c_client *client, int page, int reg)
return ret;
}
-static int ltc2974_read_word_data(struct i2c_client *client, int page, int reg)
+static int ltc2974_read_word_data(struct i2c_client *client, int page,
+ int phase, int reg)
{
const struct pmbus_driver_info *info = pmbus_get_driver_info(client);
struct ltc2978_data *data = to_ltc2978_data(info);
@@ -333,13 +360,14 @@ static int ltc2974_read_word_data(struct i2c_client *client, int page, int reg)
ret = 0;
break;
default:
- ret = ltc2978_read_word_data(client, page, reg);
+ ret = ltc2978_read_word_data(client, page, phase, reg);
break;
}
return ret;
}
-static int ltc2975_read_word_data(struct i2c_client *client, int page, int reg)
+static int ltc2975_read_word_data(struct i2c_client *client, int page,
+ int phase, int reg)
{
const struct pmbus_driver_info *info = pmbus_get_driver_info(client);
struct ltc2978_data *data = to_ltc2978_data(info);
@@ -367,13 +395,14 @@ static int ltc2975_read_word_data(struct i2c_client *client, int page, int reg)
ret = 0;
break;
default:
- ret = ltc2978_read_word_data(client, page, reg);
+ ret = ltc2978_read_word_data(client, page, phase, reg);
break;
}
return ret;
}
-static int ltc3880_read_word_data(struct i2c_client *client, int page, int reg)
+static int ltc3880_read_word_data(struct i2c_client *client, int page,
+ int phase, int reg)
{
const struct pmbus_driver_info *info = pmbus_get_driver_info(client);
struct ltc2978_data *data = to_ltc2978_data(info);
@@ -405,7 +434,8 @@ static int ltc3880_read_word_data(struct i2c_client *client, int page, int reg)
return ret;
}
-static int ltc3883_read_word_data(struct i2c_client *client, int page, int reg)
+static int ltc3883_read_word_data(struct i2c_client *client, int page,
+ int phase, int reg)
{
const struct pmbus_driver_info *info = pmbus_get_driver_info(client);
struct ltc2978_data *data = to_ltc2978_data(info);
@@ -420,7 +450,7 @@ static int ltc3883_read_word_data(struct i2c_client *client, int page, int reg)
ret = 0;
break;
default:
- ret = ltc3880_read_word_data(client, page, reg);
+ ret = ltc3880_read_word_data(client, page, phase, reg);
break;
}
return ret;
@@ -492,20 +522,30 @@ static int ltc2978_write_word_data(struct i2c_client *client, int page,
}
static const struct i2c_device_id ltc2978_id[] = {
+ {"ltc2972", ltc2972},
{"ltc2974", ltc2974},
{"ltc2975", ltc2975},
{"ltc2977", ltc2977},
{"ltc2978", ltc2978},
+ {"ltc2979", ltc2979},
{"ltc2980", ltc2980},
{"ltc3880", ltc3880},
{"ltc3882", ltc3882},
{"ltc3883", ltc3883},
+ {"ltc3884", ltc3884},
{"ltc3886", ltc3886},
{"ltc3887", ltc3887},
+ {"ltc3889", ltc3889},
+ {"ltc7880", ltc7880},
{"ltm2987", ltm2987},
+ {"ltm4664", ltm4664},
{"ltm4675", ltm4675},
{"ltm4676", ltm4676},
+ {"ltm4677", ltm4677},
+ {"ltm4678", ltm4678},
+ {"ltm4680", ltm4680},
{"ltm4686", ltm4686},
+ {"ltm4700", ltm4700},
{}
};
MODULE_DEVICE_TABLE(i2c, ltc2978_id);
@@ -555,7 +595,9 @@ static int ltc2978_get_id(struct i2c_client *client)
chip_id &= LTC2978_ID_MASK;
- if (chip_id == LTC2974_ID)
+ if (chip_id == LTC2972_ID)
+ return ltc2972;
+ else if (chip_id == LTC2974_ID)
return ltc2974;
else if (chip_id == LTC2975_ID)
return ltc2975;
@@ -563,6 +605,8 @@ static int ltc2978_get_id(struct i2c_client *client)
return ltc2977;
else if (chip_id == LTC2978_ID_REV1 || chip_id == LTC2978_ID_REV2)
return ltc2978;
+ else if (chip_id == LTC2979_ID_A || chip_id == LTC2979_ID_B)
+ return ltc2979;
else if (chip_id == LTC2980_ID_A || chip_id == LTC2980_ID_B)
return ltc2980;
else if (chip_id == LTC3880_ID)
@@ -571,19 +615,35 @@ static int ltc2978_get_id(struct i2c_client *client)
return ltc3882;
else if (chip_id == LTC3883_ID)
return ltc3883;
+ else if (chip_id == LTC3884_ID)
+ return ltc3884;
else if (chip_id == LTC3886_ID)
return ltc3886;
else if (chip_id == LTC3887_ID)
return ltc3887;
+ else if (chip_id == LTC3889_ID)
+ return ltc3889;
+ else if (chip_id == LTC7880_ID)
+ return ltc7880;
else if (chip_id == LTM2987_ID_A || chip_id == LTM2987_ID_B)
return ltm2987;
+ else if (chip_id == LTM4664_ID)
+ return ltm4664;
else if (chip_id == LTM4675_ID)
return ltm4675;
else if (chip_id == LTM4676_ID_REV1 || chip_id == LTM4676_ID_REV2 ||
chip_id == LTM4676A_ID)
return ltm4676;
+ else if (chip_id == LTM4677_ID_REV1 || chip_id == LTM4677_ID_REV2)
+ return ltm4677;
+ else if (chip_id == LTM4678_ID_REV1 || chip_id == LTM4678_ID_REV2)
+ return ltm4678;
+ else if (chip_id == LTM4680_ID)
+ return ltm4680;
else if (chip_id == LTM4686_ID)
return ltm4686;
+ else if (chip_id == LTM4700_ID)
+ return ltm4700;
dev_err(&client->dev, "Unsupported chip ID 0x%x\n", chip_id);
return -ENODEV;
@@ -637,6 +697,19 @@ static int ltc2978_probe(struct i2c_client *client,
data->temp2_max = 0x7c00;
switch (data->id) {
+ case ltc2972:
+ info->read_word_data = ltc2975_read_word_data;
+ info->pages = LTC2972_NUM_PAGES;
+ info->func[0] = PMBUS_HAVE_IIN | PMBUS_HAVE_PIN
+ | PMBUS_HAVE_VIN | PMBUS_HAVE_STATUS_INPUT
+ | PMBUS_HAVE_TEMP2;
+ for (i = 0; i < info->pages; i++) {
+ info->func[i] |= PMBUS_HAVE_VOUT
+ | PMBUS_HAVE_STATUS_VOUT | PMBUS_HAVE_POUT
+ | PMBUS_HAVE_TEMP | PMBUS_HAVE_STATUS_TEMP
+ | PMBUS_HAVE_IOUT | PMBUS_HAVE_STATUS_IOUT;
+ }
+ break;
case ltc2974:
info->read_word_data = ltc2974_read_word_data;
info->pages = LTC2974_NUM_PAGES;
@@ -662,8 +735,10 @@ static int ltc2978_probe(struct i2c_client *client,
| PMBUS_HAVE_IOUT | PMBUS_HAVE_STATUS_IOUT;
}
break;
+
case ltc2977:
case ltc2978:
+ case ltc2979:
case ltc2980:
case ltm2987:
info->read_word_data = ltc2978_read_word_data;
@@ -680,6 +755,7 @@ static int ltc2978_probe(struct i2c_client *client,
case ltc3887:
case ltm4675:
case ltm4676:
+ case ltm4677:
case ltm4686:
data->features |= FEAT_CLEAR_PEAKS | FEAT_NEEDS_POLLING;
info->read_word_data = ltc3880_read_word_data;
@@ -721,7 +797,14 @@ static int ltc2978_probe(struct i2c_client *client,
| PMBUS_HAVE_PIN | PMBUS_HAVE_POUT | PMBUS_HAVE_TEMP
| PMBUS_HAVE_TEMP2 | PMBUS_HAVE_STATUS_TEMP;
break;
+ case ltc3884:
case ltc3886:
+ case ltc3889:
+ case ltc7880:
+ case ltm4664:
+ case ltm4678:
+ case ltm4680:
+ case ltm4700:
data->features |= FEAT_CLEAR_PEAKS | FEAT_NEEDS_POLLING;
info->read_word_data = ltc3883_read_word_data;
info->pages = LTC3880_NUM_PAGES;
@@ -752,22 +835,33 @@ static int ltc2978_probe(struct i2c_client *client,
return pmbus_do_probe(client, id, info);
}
+
#ifdef CONFIG_OF
static const struct of_device_id ltc2978_of_match[] = {
+ { .compatible = "lltc,ltc2972" },
{ .compatible = "lltc,ltc2974" },
{ .compatible = "lltc,ltc2975" },
{ .compatible = "lltc,ltc2977" },
{ .compatible = "lltc,ltc2978" },
+ { .compatible = "lltc,ltc2979" },
{ .compatible = "lltc,ltc2980" },
{ .compatible = "lltc,ltc3880" },
{ .compatible = "lltc,ltc3882" },
{ .compatible = "lltc,ltc3883" },
+ { .compatible = "lltc,ltc3884" },
{ .compatible = "lltc,ltc3886" },
{ .compatible = "lltc,ltc3887" },
+ { .compatible = "lltc,ltc3889" },
+ { .compatible = "lltc,ltc7880" },
{ .compatible = "lltc,ltm2987" },
+ { .compatible = "lltc,ltm4664" },
{ .compatible = "lltc,ltm4675" },
{ .compatible = "lltc,ltm4676" },
+ { .compatible = "lltc,ltm4677" },
+ { .compatible = "lltc,ltm4678" },
+ { .compatible = "lltc,ltm4680" },
{ .compatible = "lltc,ltm4686" },
+ { .compatible = "lltc,ltm4700" },
{ }
};
MODULE_DEVICE_TABLE(of, ltc2978_of_match);
diff --git a/drivers/hwmon/pmbus/ltc3815.c b/drivers/hwmon/pmbus/ltc3815.c
index b83a18a..3036263 100644
--- a/drivers/hwmon/pmbus/ltc3815.c
+++ b/drivers/hwmon/pmbus/ltc3815.c
@@ -55,7 +55,7 @@ static int ltc3815_write_byte(struct i2c_client *client, int page, u8 reg)
* LTC3815 does not support the CLEAR_FAULTS command.
* Emulate it by clearing the status register.
*/
- ret = pmbus_read_word_data(client, 0, PMBUS_STATUS_WORD);
+ ret = pmbus_read_word_data(client, 0, 0xff, PMBUS_STATUS_WORD);
if (ret > 0) {
pmbus_write_word_data(client, 0, PMBUS_STATUS_WORD,
ret);
@@ -69,25 +69,31 @@ static int ltc3815_write_byte(struct i2c_client *client, int page, u8 reg)
return ret;
}
-static int ltc3815_read_word_data(struct i2c_client *client, int page, int reg)
+static int ltc3815_read_word_data(struct i2c_client *client, int page,
+ int phase, int reg)
{
int ret;
switch (reg) {
case PMBUS_VIRT_READ_VIN_MAX:
- ret = pmbus_read_word_data(client, page, LTC3815_MFR_VIN_PEAK);
+ ret = pmbus_read_word_data(client, page, phase,
+ LTC3815_MFR_VIN_PEAK);
break;
case PMBUS_VIRT_READ_VOUT_MAX:
- ret = pmbus_read_word_data(client, page, LTC3815_MFR_VOUT_PEAK);
+ ret = pmbus_read_word_data(client, page, phase,
+ LTC3815_MFR_VOUT_PEAK);
break;
case PMBUS_VIRT_READ_TEMP_MAX:
- ret = pmbus_read_word_data(client, page, LTC3815_MFR_TEMP_PEAK);
+ ret = pmbus_read_word_data(client, page, phase,
+ LTC3815_MFR_TEMP_PEAK);
break;
case PMBUS_VIRT_READ_IOUT_MAX:
- ret = pmbus_read_word_data(client, page, LTC3815_MFR_IOUT_PEAK);
+ ret = pmbus_read_word_data(client, page, phase,
+ LTC3815_MFR_IOUT_PEAK);
break;
case PMBUS_VIRT_READ_IIN_MAX:
- ret = pmbus_read_word_data(client, page, LTC3815_MFR_IIN_PEAK);
+ ret = pmbus_read_word_data(client, page, phase,
+ LTC3815_MFR_IIN_PEAK);
break;
case PMBUS_VIRT_RESET_VOUT_HISTORY:
case PMBUS_VIRT_RESET_VIN_HISTORY:
diff --git a/drivers/hwmon/pmbus/max16064.c b/drivers/hwmon/pmbus/max16064.c
index b3e7b8d..288e93f 100644
--- a/drivers/hwmon/pmbus/max16064.c
+++ b/drivers/hwmon/pmbus/max16064.c
@@ -15,17 +15,18 @@
#define MAX16064_MFR_VOUT_PEAK 0xd4
#define MAX16064_MFR_TEMPERATURE_PEAK 0xd6
-static int max16064_read_word_data(struct i2c_client *client, int page, int reg)
+static int max16064_read_word_data(struct i2c_client *client, int page,
+ int phase, int reg)
{
int ret;
switch (reg) {
case PMBUS_VIRT_READ_VOUT_MAX:
- ret = pmbus_read_word_data(client, page,
+ ret = pmbus_read_word_data(client, page, phase,
MAX16064_MFR_VOUT_PEAK);
break;
case PMBUS_VIRT_READ_TEMP_MAX:
- ret = pmbus_read_word_data(client, page,
+ ret = pmbus_read_word_data(client, page, phase,
MAX16064_MFR_TEMPERATURE_PEAK);
break;
case PMBUS_VIRT_RESET_VOUT_HISTORY:
diff --git a/drivers/hwmon/pmbus/max20730.c b/drivers/hwmon/pmbus/max20730.c
index 294e221..c0bb054 100644
--- a/drivers/hwmon/pmbus/max20730.c
+++ b/drivers/hwmon/pmbus/max20730.c
@@ -85,7 +85,8 @@ static u32 max_current[][5] = {
[max20743] = { 18900, 24100, 29200, 34100 },
};
-static int max20730_read_word_data(struct i2c_client *client, int page, int reg)
+static int max20730_read_word_data(struct i2c_client *client, int page,
+ int phase, int reg)
{
const struct pmbus_driver_info *info = pmbus_get_driver_info(client);
const struct max20730_data *data = to_max20730_data(info);
diff --git a/drivers/hwmon/pmbus/max31785.c b/drivers/hwmon/pmbus/max31785.c
index 254b0f9..d9aa5c8 100644
--- a/drivers/hwmon/pmbus/max31785.c
+++ b/drivers/hwmon/pmbus/max31785.c
@@ -72,7 +72,7 @@ static int max31785_read_long_data(struct i2c_client *client, int page,
cmdbuf[0] = reg;
- rc = pmbus_set_page(client, page);
+ rc = pmbus_set_page(client, page, 0xff);
if (rc < 0)
return rc;
@@ -110,7 +110,7 @@ static int max31785_get_pwm_mode(struct i2c_client *client, int page)
if (config < 0)
return config;
- command = pmbus_read_word_data(client, page, PMBUS_FAN_COMMAND_1);
+ command = pmbus_read_word_data(client, page, 0xff, PMBUS_FAN_COMMAND_1);
if (command < 0)
return command;
@@ -126,7 +126,7 @@ static int max31785_get_pwm_mode(struct i2c_client *client, int page)
}
static int max31785_read_word_data(struct i2c_client *client, int page,
- int reg)
+ int phase, int reg)
{
u32 val;
int rv;
diff --git a/drivers/hwmon/pmbus/max34440.c b/drivers/hwmon/pmbus/max34440.c
index 5c63a66..18b4e07 100644
--- a/drivers/hwmon/pmbus/max34440.c
+++ b/drivers/hwmon/pmbus/max34440.c
@@ -41,7 +41,8 @@ struct max34440_data {
#define to_max34440_data(x) container_of(x, struct max34440_data, info)
-static int max34440_read_word_data(struct i2c_client *client, int page, int reg)
+static int max34440_read_word_data(struct i2c_client *client, int page,
+ int phase, int reg)
{
int ret;
const struct pmbus_driver_info *info = pmbus_get_driver_info(client);
@@ -49,44 +50,44 @@ static int max34440_read_word_data(struct i2c_client *client, int page, int reg)
switch (reg) {
case PMBUS_VIRT_READ_VOUT_MIN:
- ret = pmbus_read_word_data(client, page,
+ ret = pmbus_read_word_data(client, page, phase,
MAX34440_MFR_VOUT_MIN);
break;
case PMBUS_VIRT_READ_VOUT_MAX:
- ret = pmbus_read_word_data(client, page,
+ ret = pmbus_read_word_data(client, page, phase,
MAX34440_MFR_VOUT_PEAK);
break;
case PMBUS_VIRT_READ_IOUT_AVG:
if (data->id != max34446 && data->id != max34451)
return -ENXIO;
- ret = pmbus_read_word_data(client, page,
+ ret = pmbus_read_word_data(client, page, phase,
MAX34446_MFR_IOUT_AVG);
break;
case PMBUS_VIRT_READ_IOUT_MAX:
- ret = pmbus_read_word_data(client, page,
+ ret = pmbus_read_word_data(client, page, phase,
MAX34440_MFR_IOUT_PEAK);
break;
case PMBUS_VIRT_READ_POUT_AVG:
if (data->id != max34446)
return -ENXIO;
- ret = pmbus_read_word_data(client, page,
+ ret = pmbus_read_word_data(client, page, phase,
MAX34446_MFR_POUT_AVG);
break;
case PMBUS_VIRT_READ_POUT_MAX:
if (data->id != max34446)
return -ENXIO;
- ret = pmbus_read_word_data(client, page,
+ ret = pmbus_read_word_data(client, page, phase,
MAX34446_MFR_POUT_PEAK);
break;
case PMBUS_VIRT_READ_TEMP_AVG:
if (data->id != max34446 && data->id != max34460 &&
data->id != max34461)
return -ENXIO;
- ret = pmbus_read_word_data(client, page,
+ ret = pmbus_read_word_data(client, page, phase,
MAX34446_MFR_TEMPERATURE_AVG);
break;
case PMBUS_VIRT_READ_TEMP_MAX:
- ret = pmbus_read_word_data(client, page,
+ ret = pmbus_read_word_data(client, page, phase,
MAX34440_MFR_TEMPERATURE_PEAK);
break;
case PMBUS_VIRT_RESET_POUT_HISTORY:
@@ -159,14 +160,14 @@ static int max34440_read_byte_data(struct i2c_client *client, int page, int reg)
int mfg_status;
if (page >= 0) {
- ret = pmbus_set_page(client, page);
+ ret = pmbus_set_page(client, page, 0xff);
if (ret < 0)
return ret;
}
switch (reg) {
case PMBUS_STATUS_IOUT:
- mfg_status = pmbus_read_word_data(client, 0,
+ mfg_status = pmbus_read_word_data(client, 0, 0xff,
PMBUS_STATUS_MFR_SPECIFIC);
if (mfg_status < 0)
return mfg_status;
@@ -176,7 +177,7 @@ static int max34440_read_byte_data(struct i2c_client *client, int page, int reg)
ret |= PB_IOUT_OC_FAULT;
break;
case PMBUS_STATUS_TEMPERATURE:
- mfg_status = pmbus_read_word_data(client, 0,
+ mfg_status = pmbus_read_word_data(client, 0, 0xff,
PMBUS_STATUS_MFR_SPECIFIC);
if (mfg_status < 0)
return mfg_status;
diff --git a/drivers/hwmon/pmbus/max8688.c b/drivers/hwmon/pmbus/max8688.c
index bc5f4cb..643ccfc 100644
--- a/drivers/hwmon/pmbus/max8688.c
+++ b/drivers/hwmon/pmbus/max8688.c
@@ -28,7 +28,8 @@
#define MAX8688_STATUS_OT_FAULT BIT(13)
#define MAX8688_STATUS_OT_WARNING BIT(14)
-static int max8688_read_word_data(struct i2c_client *client, int page, int reg)
+static int max8688_read_word_data(struct i2c_client *client, int page,
+ int phase, int reg)
{
int ret;
@@ -37,13 +38,15 @@ static int max8688_read_word_data(struct i2c_client *client, int page, int reg)
switch (reg) {
case PMBUS_VIRT_READ_VOUT_MAX:
- ret = pmbus_read_word_data(client, 0, MAX8688_MFR_VOUT_PEAK);
+ ret = pmbus_read_word_data(client, 0, 0xff,
+ MAX8688_MFR_VOUT_PEAK);
break;
case PMBUS_VIRT_READ_IOUT_MAX:
- ret = pmbus_read_word_data(client, 0, MAX8688_MFR_IOUT_PEAK);
+ ret = pmbus_read_word_data(client, 0, 0xff,
+ MAX8688_MFR_IOUT_PEAK);
break;
case PMBUS_VIRT_READ_TEMP_MAX:
- ret = pmbus_read_word_data(client, 0,
+ ret = pmbus_read_word_data(client, 0, 0xff,
MAX8688_MFR_TEMPERATURE_PEAK);
break;
case PMBUS_VIRT_RESET_VOUT_HISTORY:
@@ -94,7 +97,7 @@ static int max8688_read_byte_data(struct i2c_client *client, int page, int reg)
switch (reg) {
case PMBUS_STATUS_VOUT:
- mfg_status = pmbus_read_word_data(client, 0,
+ mfg_status = pmbus_read_word_data(client, 0, 0xff,
MAX8688_MFG_STATUS);
if (mfg_status < 0)
return mfg_status;
@@ -108,7 +111,7 @@ static int max8688_read_byte_data(struct i2c_client *client, int page, int reg)
ret |= PB_VOLTAGE_OV_FAULT;
break;
case PMBUS_STATUS_IOUT:
- mfg_status = pmbus_read_word_data(client, 0,
+ mfg_status = pmbus_read_word_data(client, 0, 0xff,
MAX8688_MFG_STATUS);
if (mfg_status < 0)
return mfg_status;
@@ -120,7 +123,7 @@ static int max8688_read_byte_data(struct i2c_client *client, int page, int reg)
ret |= PB_IOUT_OC_FAULT;
break;
case PMBUS_STATUS_TEMPERATURE:
- mfg_status = pmbus_read_word_data(client, 0,
+ mfg_status = pmbus_read_word_data(client, 0, 0xff,
MAX8688_MFG_STATUS);
if (mfg_status < 0)
return mfg_status;
diff --git a/drivers/hwmon/pmbus/pmbus.c b/drivers/hwmon/pmbus/pmbus.c
index 51e8312b..6d384e8 100644
--- a/drivers/hwmon/pmbus/pmbus.c
+++ b/drivers/hwmon/pmbus/pmbus.c
@@ -102,10 +102,10 @@ static int pmbus_identify(struct i2c_client *client,
int page;
for (page = 1; page < PMBUS_PAGES; page++) {
- if (pmbus_set_page(client, page) < 0)
+ if (pmbus_set_page(client, page, 0xff) < 0)
break;
}
- pmbus_set_page(client, 0);
+ pmbus_set_page(client, 0, 0xff);
info->pages = page;
} else {
info->pages = 1;
diff --git a/drivers/hwmon/pmbus/pmbus.h b/drivers/hwmon/pmbus/pmbus.h
index 13b34bd..18e06fc 100644
--- a/drivers/hwmon/pmbus/pmbus.h
+++ b/drivers/hwmon/pmbus/pmbus.h
@@ -119,6 +119,9 @@ enum pmbus_regs {
PMBUS_MFR_DATE = 0x9D,
PMBUS_MFR_SERIAL = 0x9E,
+ PMBUS_IC_DEVICE_ID = 0xAD,
+ PMBUS_IC_DEVICE_REV = 0xAE,
+
/*
* Virtual registers.
* Useful to support attributes which are not supported by standard PMBus
@@ -359,6 +362,7 @@ enum pmbus_sensor_classes {
};
#define PMBUS_PAGES 32 /* Per PMBus specification */
+#define PMBUS_PHASES 8 /* Maximum number of phases per page */
/* Functionality bit mask */
#define PMBUS_HAVE_VIN BIT(0)
@@ -385,13 +389,15 @@ enum pmbus_sensor_classes {
#define PMBUS_HAVE_PWM34 BIT(21)
#define PMBUS_HAVE_SAMPLES BIT(22)
-#define PMBUS_PAGE_VIRTUAL BIT(31)
+#define PMBUS_PHASE_VIRTUAL BIT(30) /* Phases on this page are virtual */
+#define PMBUS_PAGE_VIRTUAL BIT(31) /* Page is virtual */
enum pmbus_data_format { linear = 0, direct, vid };
enum vrm_version { vr11 = 0, vr12, vr13, imvp9, amd625mv };
struct pmbus_driver_info {
int pages; /* Total number of pages */
+ u8 phases[PMBUS_PAGES]; /* Number of phases per page */
enum pmbus_data_format format[PSC_NUM_CLASSES];
enum vrm_version vrm_version[PMBUS_PAGES]; /* vrm version per page */
/*
@@ -403,6 +409,7 @@ struct pmbus_driver_info {
int R[PSC_NUM_CLASSES]; /* exponent */
u32 func[PMBUS_PAGES]; /* Functionality, per page */
+ u32 pfunc[PMBUS_PHASES];/* Functionality, per phase */
/*
* The following functions map manufacturing specific register values
* to PMBus standard register values. Specify only if mapping is
@@ -415,7 +422,8 @@ struct pmbus_driver_info {
* the standard register.
*/
int (*read_byte_data)(struct i2c_client *client, int page, int reg);
- int (*read_word_data)(struct i2c_client *client, int page, int reg);
+ int (*read_word_data)(struct i2c_client *client, int page, int phase,
+ int reg);
int (*write_word_data)(struct i2c_client *client, int page, int reg,
u16 word);
int (*write_byte)(struct i2c_client *client, int page, u8 value);
@@ -454,9 +462,11 @@ extern const struct regulator_ops pmbus_regulator_ops;
/* Function declarations */
void pmbus_clear_cache(struct i2c_client *client);
-int pmbus_set_page(struct i2c_client *client, int page);
-int pmbus_read_word_data(struct i2c_client *client, int page, u8 reg);
-int pmbus_write_word_data(struct i2c_client *client, int page, u8 reg, u16 word);
+int pmbus_set_page(struct i2c_client *client, int page, int phase);
+int pmbus_read_word_data(struct i2c_client *client, int page, int phase,
+ u8 reg);
+int pmbus_write_word_data(struct i2c_client *client, int page, u8 reg,
+ u16 word);
int pmbus_read_byte_data(struct i2c_client *client, int page, u8 reg);
int pmbus_write_byte(struct i2c_client *client, int page, u8 value);
int pmbus_write_byte_data(struct i2c_client *client, int page, u8 reg,
diff --git a/drivers/hwmon/pmbus/pmbus_core.c b/drivers/hwmon/pmbus/pmbus_core.c
index d9c17fe..8d321bf 100644
--- a/drivers/hwmon/pmbus/pmbus_core.c
+++ b/drivers/hwmon/pmbus/pmbus_core.c
@@ -49,6 +49,7 @@ struct pmbus_sensor {
char name[PMBUS_NAME_SIZE]; /* sysfs sensor name */
struct device_attribute attribute;
u8 page; /* page number */
+ u8 phase; /* phase number, 0xff for all phases */
u16 reg; /* register */
enum pmbus_sensor_classes class; /* sensor class */
bool update; /* runtime sensor update needed */
@@ -109,6 +110,7 @@ struct pmbus_data {
int (*read_status)(struct i2c_client *client, int page);
u8 currpage;
+ u8 currphase; /* current phase, 0xff for all */
};
struct pmbus_debugfs_entry {
@@ -146,15 +148,16 @@ void pmbus_clear_cache(struct i2c_client *client)
}
EXPORT_SYMBOL_GPL(pmbus_clear_cache);
-int pmbus_set_page(struct i2c_client *client, int page)
+int pmbus_set_page(struct i2c_client *client, int page, int phase)
{
struct pmbus_data *data = i2c_get_clientdata(client);
int rv;
- if (page < 0 || page == data->currpage)
+ if (page < 0)
return 0;
- if (!(data->info->func[page] & PMBUS_PAGE_VIRTUAL)) {
+ if (!(data->info->func[page] & PMBUS_PAGE_VIRTUAL) &&
+ data->info->pages > 1 && page != data->currpage) {
rv = i2c_smbus_write_byte_data(client, PMBUS_PAGE, page);
if (rv < 0)
return rv;
@@ -166,9 +169,17 @@ int pmbus_set_page(struct i2c_client *client, int page)
if (rv != page)
return -EIO;
}
-
data->currpage = page;
+ if (data->info->phases[page] && data->currphase != phase &&
+ !(data->info->func[page] & PMBUS_PHASE_VIRTUAL)) {
+ rv = i2c_smbus_write_byte_data(client, PMBUS_PHASE,
+ phase);
+ if (rv)
+ return rv;
+ }
+ data->currphase = phase;
+
return 0;
}
EXPORT_SYMBOL_GPL(pmbus_set_page);
@@ -177,7 +188,7 @@ int pmbus_write_byte(struct i2c_client *client, int page, u8 value)
{
int rv;
- rv = pmbus_set_page(client, page);
+ rv = pmbus_set_page(client, page, 0xff);
if (rv < 0)
return rv;
@@ -208,7 +219,7 @@ int pmbus_write_word_data(struct i2c_client *client, int page, u8 reg,
{
int rv;
- rv = pmbus_set_page(client, page);
+ rv = pmbus_set_page(client, page, 0xff);
if (rv < 0)
return rv;
@@ -286,11 +297,11 @@ int pmbus_update_fan(struct i2c_client *client, int page, int id,
}
EXPORT_SYMBOL_GPL(pmbus_update_fan);
-int pmbus_read_word_data(struct i2c_client *client, int page, u8 reg)
+int pmbus_read_word_data(struct i2c_client *client, int page, int phase, u8 reg)
{
int rv;
- rv = pmbus_set_page(client, page);
+ rv = pmbus_set_page(client, page, phase);
if (rv < 0)
return rv;
@@ -320,14 +331,15 @@ static int pmbus_read_virt_reg(struct i2c_client *client, int page, int reg)
* _pmbus_read_word_data() is similar to pmbus_read_word_data(), but checks if
* a device specific mapping function exists and calls it if necessary.
*/
-static int _pmbus_read_word_data(struct i2c_client *client, int page, int reg)
+static int _pmbus_read_word_data(struct i2c_client *client, int page,
+ int phase, int reg)
{
struct pmbus_data *data = i2c_get_clientdata(client);
const struct pmbus_driver_info *info = data->info;
int status;
if (info->read_word_data) {
- status = info->read_word_data(client, page, reg);
+ status = info->read_word_data(client, page, phase, reg);
if (status != -ENODATA)
return status;
}
@@ -335,14 +347,20 @@ static int _pmbus_read_word_data(struct i2c_client *client, int page, int reg)
if (reg >= PMBUS_VIRT_BASE)
return pmbus_read_virt_reg(client, page, reg);
- return pmbus_read_word_data(client, page, reg);
+ return pmbus_read_word_data(client, page, phase, reg);
+}
+
+/* Same as above, but without phase parameter, for use in check functions */
+static int __pmbus_read_word_data(struct i2c_client *client, int page, int reg)
+{
+ return _pmbus_read_word_data(client, page, 0xff, reg);
}
int pmbus_read_byte_data(struct i2c_client *client, int page, u8 reg)
{
int rv;
- rv = pmbus_set_page(client, page);
+ rv = pmbus_set_page(client, page, 0xff);
if (rv < 0)
return rv;
@@ -354,7 +372,7 @@ int pmbus_write_byte_data(struct i2c_client *client, int page, u8 reg, u8 value)
{
int rv;
- rv = pmbus_set_page(client, page);
+ rv = pmbus_set_page(client, page, 0xff);
if (rv < 0)
return rv;
@@ -440,7 +458,7 @@ static int pmbus_get_fan_rate(struct i2c_client *client, int page, int id,
have_rpm = !!(config & pmbus_fan_rpm_mask[id]);
if (want_rpm == have_rpm)
- return pmbus_read_word_data(client, page,
+ return pmbus_read_word_data(client, page, 0xff,
pmbus_fan_command_registers[id]);
/* Can't sensibly map between RPM and PWM, just return zero */
@@ -530,7 +548,7 @@ EXPORT_SYMBOL_GPL(pmbus_check_byte_register);
bool pmbus_check_word_register(struct i2c_client *client, int page, int reg)
{
- return pmbus_check_register(client, _pmbus_read_word_data, page, reg);
+ return pmbus_check_register(client, __pmbus_read_word_data, page, reg);
}
EXPORT_SYMBOL_GPL(pmbus_check_word_register);
@@ -595,6 +613,7 @@ static struct pmbus_data *pmbus_update_device(struct device *dev)
sensor->data
= _pmbus_read_word_data(client,
sensor->page,
+ sensor->phase,
sensor->reg);
}
pmbus_clear_faults(client);
@@ -1076,7 +1095,8 @@ static int pmbus_add_boolean(struct pmbus_data *data,
static struct pmbus_sensor *pmbus_add_sensor(struct pmbus_data *data,
const char *name, const char *type,
- int seq, int page, int reg,
+ int seq, int page, int phase,
+ int reg,
enum pmbus_sensor_classes class,
bool update, bool readonly,
bool convert)
@@ -1100,6 +1120,7 @@ static struct pmbus_sensor *pmbus_add_sensor(struct pmbus_data *data,
readonly = true;
sensor->page = page;
+ sensor->phase = phase;
sensor->reg = reg;
sensor->class = class;
sensor->update = update;
@@ -1119,7 +1140,7 @@ static struct pmbus_sensor *pmbus_add_sensor(struct pmbus_data *data,
static int pmbus_add_label(struct pmbus_data *data,
const char *name, int seq,
- const char *lstring, int index)
+ const char *lstring, int index, int phase)
{
struct pmbus_label *label;
struct device_attribute *a;
@@ -1131,11 +1152,21 @@ static int pmbus_add_label(struct pmbus_data *data,
a = &label->attribute;
snprintf(label->name, sizeof(label->name), "%s%d_label", name, seq);
- if (!index)
- strncpy(label->label, lstring, sizeof(label->label) - 1);
- else
- snprintf(label->label, sizeof(label->label), "%s%d", lstring,
- index);
+ if (!index) {
+ if (phase == 0xff)
+ strncpy(label->label, lstring,
+ sizeof(label->label) - 1);
+ else
+ snprintf(label->label, sizeof(label->label), "%s.%d",
+ lstring, phase);
+ } else {
+ if (phase == 0xff)
+ snprintf(label->label, sizeof(label->label), "%s%d",
+ lstring, index);
+ else
+ snprintf(label->label, sizeof(label->label), "%s%d.%d",
+ lstring, index, phase);
+ }
pmbus_dev_attr_init(a, label->name, 0444, pmbus_show_label, NULL);
return pmbus_add_attribute(data, &a->attr);
@@ -1200,7 +1231,7 @@ static int pmbus_add_limit_attrs(struct i2c_client *client,
for (i = 0; i < nlimit; i++) {
if (pmbus_check_word_register(client, page, l->reg)) {
curr = pmbus_add_sensor(data, name, l->attr, index,
- page, l->reg, attr->class,
+ page, 0xff, l->reg, attr->class,
attr->update || l->update,
false, true);
if (!curr)
@@ -1227,7 +1258,7 @@ static int pmbus_add_sensor_attrs_one(struct i2c_client *client,
struct pmbus_data *data,
const struct pmbus_driver_info *info,
const char *name,
- int index, int page,
+ int index, int page, int phase,
const struct pmbus_sensor_attr *attr,
bool paged)
{
@@ -1237,15 +1268,16 @@ static int pmbus_add_sensor_attrs_one(struct i2c_client *client,
if (attr->label) {
ret = pmbus_add_label(data, name, index, attr->label,
- paged ? page + 1 : 0);
+ paged ? page + 1 : 0, phase);
if (ret)
return ret;
}
- base = pmbus_add_sensor(data, name, "input", index, page, attr->reg,
- attr->class, true, true, true);
+ base = pmbus_add_sensor(data, name, "input", index, page, phase,
+ attr->reg, attr->class, true, true, true);
if (!base)
return -ENOMEM;
- if (attr->sfunc) {
+ /* No limit and alarm attributes for phase specific sensors */
+ if (attr->sfunc && phase == 0xff) {
ret = pmbus_add_limit_attrs(client, data, info, name,
index, page, base, attr);
if (ret < 0)
@@ -1315,10 +1347,25 @@ static int pmbus_add_sensor_attrs(struct i2c_client *client,
continue;
ret = pmbus_add_sensor_attrs_one(client, data, info,
name, index, page,
- attrs, paged);
+ 0xff, attrs, paged);
if (ret)
return ret;
index++;
+ if (info->phases[page]) {
+ int phase;
+
+ for (phase = 0; phase < info->phases[page];
+ phase++) {
+ if (!(info->pfunc[phase] & attrs->func))
+ continue;
+ ret = pmbus_add_sensor_attrs_one(client,
+ data, info, name, index, page,
+ phase, attrs, paged);
+ if (ret)
+ return ret;
+ index++;
+ }
+ }
}
attrs++;
}
@@ -1822,7 +1869,7 @@ static int pmbus_add_fan_ctrl(struct i2c_client *client,
struct pmbus_sensor *sensor;
sensor = pmbus_add_sensor(data, "fan", "target", index, page,
- PMBUS_VIRT_FAN_TARGET_1 + id, PSC_FAN,
+ PMBUS_VIRT_FAN_TARGET_1 + id, 0xff, PSC_FAN,
false, false, true);
if (!sensor)
@@ -1833,14 +1880,14 @@ static int pmbus_add_fan_ctrl(struct i2c_client *client,
return 0;
sensor = pmbus_add_sensor(data, "pwm", NULL, index, page,
- PMBUS_VIRT_PWM_1 + id, PSC_PWM,
+ PMBUS_VIRT_PWM_1 + id, 0xff, PSC_PWM,
false, false, true);
if (!sensor)
return -ENOMEM;
sensor = pmbus_add_sensor(data, "pwm", "enable", index, page,
- PMBUS_VIRT_PWM_ENABLE_1 + id, PSC_PWM,
+ PMBUS_VIRT_PWM_ENABLE_1 + id, 0xff, PSC_PWM,
true, false, false);
if (!sensor)
@@ -1882,7 +1929,7 @@ static int pmbus_add_fan_attributes(struct i2c_client *client,
continue;
if (pmbus_add_sensor(data, "fan", "input", index,
- page, pmbus_fan_registers[f],
+ page, pmbus_fan_registers[f], 0xff,
PSC_FAN, true, true, true) == NULL)
return -ENOMEM;
@@ -1964,7 +2011,7 @@ static ssize_t pmbus_show_samples(struct device *dev,
struct i2c_client *client = to_i2c_client(dev->parent);
struct pmbus_samples_reg *reg = to_samples_reg(devattr);
- val = _pmbus_read_word_data(client, reg->page, reg->attr->reg);
+ val = _pmbus_read_word_data(client, reg->page, 0xff, reg->attr->reg);
if (val < 0)
return val;
@@ -2120,7 +2167,7 @@ static int pmbus_read_status_byte(struct i2c_client *client, int page)
static int pmbus_read_status_word(struct i2c_client *client, int page)
{
- return _pmbus_read_word_data(client, page, PMBUS_STATUS_WORD);
+ return _pmbus_read_word_data(client, page, 0xff, PMBUS_STATUS_WORD);
}
static int pmbus_init_common(struct i2c_client *client, struct pmbus_data *data,
@@ -2482,6 +2529,8 @@ int pmbus_do_probe(struct i2c_client *client, const struct i2c_device_id *id,
if (pdata)
data->flags = pdata->flags;
data->info = info;
+ data->currpage = 0xff;
+ data->currphase = 0xfe;
ret = pmbus_init_common(client, data, info);
if (ret < 0)
diff --git a/drivers/hwmon/pmbus/tps53679.c b/drivers/hwmon/pmbus/tps53679.c
index 9c22e90..157c99f 100644
--- a/drivers/hwmon/pmbus/tps53679.c
+++ b/drivers/hwmon/pmbus/tps53679.c
@@ -6,13 +6,21 @@
* Copyright (c) 2017 Vadim Pasternak <vadimp@mellanox.com>
*/
+#include <linux/bits.h>
#include <linux/err.h>
#include <linux/i2c.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/module.h>
+#include <linux/of_device.h>
#include "pmbus.h"
+enum chips {
+ tps53647, tps53667, tps53679, tps53681, tps53688
+};
+
+#define TPS53647_PAGE_NUM 1
+
#define TPS53679_PROT_VR12_5MV 0x01 /* VR12.0 mode, 5-mV DAC */
#define TPS53679_PROT_VR12_5_10MV 0x02 /* VR12.5 mode, 10-mV DAC */
#define TPS53679_PROT_VR13_10MV 0x04 /* VR13.0 mode, 10-mV DAC */
@@ -20,13 +28,19 @@
#define TPS53679_PROT_VR13_5MV 0x07 /* VR13.0 mode, 5-mV DAC */
#define TPS53679_PAGE_NUM 2
-static int tps53679_identify(struct i2c_client *client,
- struct pmbus_driver_info *info)
+#define TPS53681_DEVICE_ID 0x81
+
+#define TPS53681_PMBUS_REVISION 0x33
+
+#define TPS53681_MFR_SPECIFIC_20 0xe4 /* Number of phases, per page */
+
+static int tps53679_identify_mode(struct i2c_client *client,
+ struct pmbus_driver_info *info)
{
u8 vout_params;
int i, ret;
- for (i = 0; i < TPS53679_PAGE_NUM; i++) {
+ for (i = 0; i < info->pages; i++) {
/* Read the register with VOUT scaling value.*/
ret = pmbus_read_byte_data(client, i, PMBUS_VOUT_MODE);
if (ret < 0)
@@ -52,48 +66,180 @@ static int tps53679_identify(struct i2c_client *client,
return 0;
}
+static int tps53679_identify_phases(struct i2c_client *client,
+ struct pmbus_driver_info *info)
+{
+ int ret;
+
+ /* On TPS53681, only channel A provides per-phase output current */
+ ret = pmbus_read_byte_data(client, 0, TPS53681_MFR_SPECIFIC_20);
+ if (ret < 0)
+ return ret;
+ info->phases[0] = (ret & 0x07) + 1;
+
+ return 0;
+}
+
+static int tps53679_identify_chip(struct i2c_client *client,
+ u8 revision, u16 id)
+{
+ u8 buf[I2C_SMBUS_BLOCK_MAX];
+ int ret;
+
+ ret = pmbus_read_byte_data(client, 0, PMBUS_REVISION);
+ if (ret < 0)
+ return ret;
+ if (ret != revision) {
+ dev_err(&client->dev, "Unexpected PMBus revision 0x%x\n", ret);
+ return -ENODEV;
+ }
+
+ ret = i2c_smbus_read_block_data(client, PMBUS_IC_DEVICE_ID, buf);
+ if (ret < 0)
+ return ret;
+ if (ret != 1 || buf[0] != id) {
+ dev_err(&client->dev, "Unexpected device ID 0x%x\n", buf[0]);
+ return -ENODEV;
+ }
+ return 0;
+}
+
+/*
+ * Common identification function for chips with multi-phase support.
+ * Since those chips have special configuration registers, we want to have
+ * some level of reassurance that we are really talking with the chip
+ * being probed. Check PMBus revision and chip ID.
+ */
+static int tps53679_identify_multiphase(struct i2c_client *client,
+ struct pmbus_driver_info *info,
+ int pmbus_rev, int device_id)
+{
+ int ret;
+
+ ret = tps53679_identify_chip(client, pmbus_rev, device_id);
+ if (ret < 0)
+ return ret;
+
+ ret = tps53679_identify_mode(client, info);
+ if (ret < 0)
+ return ret;
+
+ return tps53679_identify_phases(client, info);
+}
+
+static int tps53679_identify(struct i2c_client *client,
+ struct pmbus_driver_info *info)
+{
+ return tps53679_identify_mode(client, info);
+}
+
+static int tps53681_identify(struct i2c_client *client,
+ struct pmbus_driver_info *info)
+{
+ return tps53679_identify_multiphase(client, info,
+ TPS53681_PMBUS_REVISION,
+ TPS53681_DEVICE_ID);
+}
+
+static int tps53681_read_word_data(struct i2c_client *client, int page,
+ int phase, int reg)
+{
+ /*
+ * For reading the total output current (READ_IOUT) for all phases,
+ * the chip datasheet is a bit vague. It says "PHASE must be set to
+ * FFh to access all phases simultaneously. PHASE may also be set to
+ * 80h readack (!) the total phase current".
+ * Experiments show that the command does _not_ report the total
+ * current for all phases if the phase is set to 0xff. Instead, it
+ * appears to report the current of one of the phases. Override phase
+ * parameter with 0x80 when reading the total output current on page 0.
+ */
+ if (reg == PMBUS_READ_IOUT && page == 0 && phase == 0xff)
+ return pmbus_read_word_data(client, page, 0x80, reg);
+ return -ENODATA;
+}
+
static struct pmbus_driver_info tps53679_info = {
- .pages = TPS53679_PAGE_NUM,
.format[PSC_VOLTAGE_IN] = linear,
.format[PSC_VOLTAGE_OUT] = vid,
.format[PSC_TEMPERATURE] = linear,
.format[PSC_CURRENT_OUT] = linear,
.format[PSC_POWER] = linear,
- .func[0] = PMBUS_HAVE_VIN | PMBUS_HAVE_VOUT | PMBUS_HAVE_STATUS_VOUT |
+ .func[0] = PMBUS_HAVE_VIN | PMBUS_HAVE_IIN | PMBUS_HAVE_PIN |
+ PMBUS_HAVE_STATUS_INPUT |
+ PMBUS_HAVE_VOUT | PMBUS_HAVE_STATUS_VOUT |
PMBUS_HAVE_IOUT | PMBUS_HAVE_STATUS_IOUT |
PMBUS_HAVE_TEMP | PMBUS_HAVE_STATUS_TEMP |
PMBUS_HAVE_POUT,
- .func[1] = PMBUS_HAVE_VIN | PMBUS_HAVE_VOUT | PMBUS_HAVE_STATUS_VOUT |
+ .func[1] = PMBUS_HAVE_VOUT | PMBUS_HAVE_STATUS_VOUT |
PMBUS_HAVE_IOUT | PMBUS_HAVE_STATUS_IOUT |
PMBUS_HAVE_TEMP | PMBUS_HAVE_STATUS_TEMP |
PMBUS_HAVE_POUT,
- .identify = tps53679_identify,
+ .pfunc[0] = PMBUS_HAVE_IOUT,
+ .pfunc[1] = PMBUS_HAVE_IOUT,
+ .pfunc[2] = PMBUS_HAVE_IOUT,
+ .pfunc[3] = PMBUS_HAVE_IOUT,
+ .pfunc[4] = PMBUS_HAVE_IOUT,
+ .pfunc[5] = PMBUS_HAVE_IOUT,
};
static int tps53679_probe(struct i2c_client *client,
const struct i2c_device_id *id)
{
+ struct device *dev = &client->dev;
struct pmbus_driver_info *info;
+ enum chips chip_id;
- info = devm_kmemdup(&client->dev, &tps53679_info, sizeof(*info),
- GFP_KERNEL);
+ if (dev->of_node)
+ chip_id = (enum chips)of_device_get_match_data(dev);
+ else
+ chip_id = id->driver_data;
+
+ info = devm_kmemdup(dev, &tps53679_info, sizeof(*info), GFP_KERNEL);
if (!info)
return -ENOMEM;
+ switch (chip_id) {
+ case tps53647:
+ case tps53667:
+ info->pages = TPS53647_PAGE_NUM;
+ info->identify = tps53679_identify;
+ break;
+ case tps53679:
+ case tps53688:
+ info->pages = TPS53679_PAGE_NUM;
+ info->identify = tps53679_identify;
+ break;
+ case tps53681:
+ info->pages = TPS53679_PAGE_NUM;
+ info->phases[0] = 6;
+ info->identify = tps53681_identify;
+ info->read_word_data = tps53681_read_word_data;
+ break;
+ default:
+ return -ENODEV;
+ }
+
return pmbus_do_probe(client, id, info);
}
static const struct i2c_device_id tps53679_id[] = {
- {"tps53679", 0},
- {"tps53688", 0},
+ {"tps53647", tps53647},
+ {"tps53667", tps53667},
+ {"tps53679", tps53679},
+ {"tps53681", tps53681},
+ {"tps53688", tps53688},
{}
};
MODULE_DEVICE_TABLE(i2c, tps53679_id);
static const struct of_device_id __maybe_unused tps53679_of_match[] = {
- {.compatible = "ti,tps53679"},
- {.compatible = "ti,tps53688"},
+ {.compatible = "ti,tps53647", .data = (void *)tps53647},
+ {.compatible = "ti,tps53667", .data = (void *)tps53667},
+ {.compatible = "ti,tps53679", .data = (void *)tps53679},
+ {.compatible = "ti,tps53681", .data = (void *)tps53681},
+ {.compatible = "ti,tps53688", .data = (void *)tps53688},
{}
};
MODULE_DEVICE_TABLE(of, tps53679_of_match);
diff --git a/drivers/hwmon/pmbus/ucd9000.c b/drivers/hwmon/pmbus/ucd9000.c
index 23ea341..81f4c4f 100644
--- a/drivers/hwmon/pmbus/ucd9000.c
+++ b/drivers/hwmon/pmbus/ucd9000.c
@@ -370,7 +370,7 @@ static void ucd9000_probe_gpio(struct i2c_client *client,
#ifdef CONFIG_DEBUG_FS
static int ucd9000_get_mfr_status(struct i2c_client *client, u8 *buffer)
{
- int ret = pmbus_set_page(client, 0);
+ int ret = pmbus_set_page(client, 0, 0xff);
if (ret < 0)
return ret;
diff --git a/drivers/hwmon/pmbus/xdpe12284.c b/drivers/hwmon/pmbus/xdpe12284.c
index 3d47806..d5103fc 100644
--- a/drivers/hwmon/pmbus/xdpe12284.c
+++ b/drivers/hwmon/pmbus/xdpe12284.c
@@ -18,6 +18,60 @@
#define XDPE122_AMD_625MV 0x10 /* AMD mode 6.25mV */
#define XDPE122_PAGE_NUM 2
+static int xdpe122_read_word_data(struct i2c_client *client, int page,
+ int phase, int reg)
+{
+ const struct pmbus_driver_info *info = pmbus_get_driver_info(client);
+ long val;
+ s16 exponent;
+ s32 mantissa;
+ int ret;
+
+ switch (reg) {
+ case PMBUS_VOUT_OV_FAULT_LIMIT:
+ case PMBUS_VOUT_UV_FAULT_LIMIT:
+ ret = pmbus_read_word_data(client, page, phase, reg);
+ if (ret < 0)
+ return ret;
+
+ /* Convert register value to LINEAR11 data. */
+ exponent = ((s16)ret) >> 11;
+ mantissa = ((s16)((ret & GENMASK(10, 0)) << 5)) >> 5;
+ val = mantissa * 1000L;
+ if (exponent >= 0)
+ val <<= exponent;
+ else
+ val >>= -exponent;
+
+ /* Convert data to VID register. */
+ switch (info->vrm_version[page]) {
+ case vr13:
+ if (val >= 500)
+ return 1 + DIV_ROUND_CLOSEST(val - 500, 10);
+ return 0;
+ case vr12:
+ if (val >= 250)
+ return 1 + DIV_ROUND_CLOSEST(val - 250, 5);
+ return 0;
+ case imvp9:
+ if (val >= 200)
+ return 1 + DIV_ROUND_CLOSEST(val - 200, 10);
+ return 0;
+ case amd625mv:
+ if (val >= 200 && val <= 1550)
+ return DIV_ROUND_CLOSEST((1550 - val) * 100,
+ 625);
+ return 0;
+ default:
+ return -EINVAL;
+ }
+ default:
+ return -ENODATA;
+ }
+
+ return 0;
+}
+
static int xdpe122_identify(struct i2c_client *client,
struct pmbus_driver_info *info)
{
@@ -70,6 +124,7 @@ static struct pmbus_driver_info xdpe122_info = {
PMBUS_HAVE_TEMP | PMBUS_HAVE_STATUS_TEMP |
PMBUS_HAVE_POUT | PMBUS_HAVE_PIN | PMBUS_HAVE_STATUS_INPUT,
.identify = xdpe122_identify,
+ .read_word_data = xdpe122_read_word_data,
};
static int xdpe122_probe(struct i2c_client *client,
@@ -94,8 +149,8 @@ static const struct i2c_device_id xdpe122_id[] = {
MODULE_DEVICE_TABLE(i2c, xdpe122_id);
static const struct of_device_id __maybe_unused xdpe122_of_match[] = {
- {.compatible = "infineon, xdpe12254"},
- {.compatible = "infineon, xdpe12284"},
+ {.compatible = "infineon,xdpe12254"},
+ {.compatible = "infineon,xdpe12284"},
{}
};
MODULE_DEVICE_TABLE(of, xdpe122_of_match);
diff --git a/drivers/hwmon/pmbus/zl6100.c b/drivers/hwmon/pmbus/zl6100.c
index 190b898..3a827d0 100644
--- a/drivers/hwmon/pmbus/zl6100.c
+++ b/drivers/hwmon/pmbus/zl6100.c
@@ -125,7 +125,8 @@ static inline void zl6100_wait(const struct zl6100_data *data)
}
}
-static int zl6100_read_word_data(struct i2c_client *client, int page, int reg)
+static int zl6100_read_word_data(struct i2c_client *client, int page,
+ int phase, int reg)
{
const struct pmbus_driver_info *info = pmbus_get_driver_info(client);
struct zl6100_data *data = to_zl6100_data(info);
@@ -167,7 +168,7 @@ static int zl6100_read_word_data(struct i2c_client *client, int page, int reg)
}
zl6100_wait(data);
- ret = pmbus_read_word_data(client, page, vreg);
+ ret = pmbus_read_word_data(client, page, phase, vreg);
data->access = ktime_get();
if (ret < 0)
return ret;
diff --git a/drivers/hwmon/w83627ehf.c b/drivers/hwmon/w83627ehf.c
index 7ffadc2..5a51201 100644
--- a/drivers/hwmon/w83627ehf.c
+++ b/drivers/hwmon/w83627ehf.c
@@ -1346,8 +1346,13 @@ w83627ehf_is_visible(const void *drvdata, enum hwmon_sensor_types type,
/* channel 0.., name 1.. */
if (!(data->have_temp & (1 << channel)))
return 0;
- if (attr == hwmon_temp_input || attr == hwmon_temp_label)
+ if (attr == hwmon_temp_input)
return 0444;
+ if (attr == hwmon_temp_label) {
+ if (data->temp_label)
+ return 0444;
+ return 0;
+ }
if (channel == 2 && data->temp3_val_only)
return 0;
if (attr == hwmon_temp_max) {
diff --git a/drivers/hwtracing/intel_th/msu.c b/drivers/hwtracing/intel_th/msu.c
index 8e48c74..255f8f4 100644
--- a/drivers/hwtracing/intel_th/msu.c
+++ b/drivers/hwtracing/intel_th/msu.c
@@ -718,9 +718,6 @@ static int msc_win_set_lockout(struct msc_window *win,
if (old != expect) {
ret = -EINVAL;
- dev_warn_ratelimited(msc_dev(win->msc),
- "expected lockout state %d, got %d\n",
- expect, old);
goto unlock;
}
@@ -741,6 +738,10 @@ static int msc_win_set_lockout(struct msc_window *win,
/* from intel_th_msc_window_unlock(), don't warn if not locked */
if (expect == WIN_LOCKED && old == new)
return 0;
+
+ dev_warn_ratelimited(msc_dev(win->msc),
+ "expected lockout state %d, got %d\n",
+ expect, old);
}
return ret;
@@ -760,7 +761,7 @@ static int msc_configure(struct msc *msc)
lockdep_assert_held(&msc->buf_mutex);
if (msc->mode > MSC_MODE_MULTI)
- return -ENOTSUPP;
+ return -EINVAL;
if (msc->mode == MSC_MODE_MULTI) {
if (msc_win_set_lockout(msc->cur_win, WIN_READY, WIN_INUSE))
@@ -1294,7 +1295,7 @@ static int msc_buffer_alloc(struct msc *msc, unsigned long *nr_pages,
} else if (msc->mode == MSC_MODE_MULTI) {
ret = msc_buffer_multi_alloc(msc, nr_pages, nr_wins);
} else {
- ret = -ENOTSUPP;
+ ret = -EINVAL;
}
if (!ret) {
@@ -1530,7 +1531,7 @@ static ssize_t intel_th_msc_read(struct file *file, char __user *buf,
if (ret >= 0)
*ppos = iter->offset;
} else {
- ret = -ENOTSUPP;
+ ret = -EINVAL;
}
put_count:
diff --git a/drivers/hwtracing/intel_th/pci.c b/drivers/hwtracing/intel_th/pci.c
index e9d90b5..86aa6a4 100644
--- a/drivers/hwtracing/intel_th/pci.c
+++ b/drivers/hwtracing/intel_th/pci.c
@@ -235,6 +235,11 @@ static const struct pci_device_id intel_th_pci_id_table[] = {
.driver_data = (kernel_ulong_t)&intel_th_2x,
},
{
+ /* Elkhart Lake CPU */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x4529),
+ .driver_data = (kernel_ulong_t)&intel_th_2x,
+ },
+ {
/* Elkhart Lake */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x4b26),
.driver_data = (kernel_ulong_t)&intel_th_2x,
diff --git a/drivers/hwtracing/stm/p_sys-t.c b/drivers/hwtracing/stm/p_sys-t.c
index b178a54..360b5c0 100644
--- a/drivers/hwtracing/stm/p_sys-t.c
+++ b/drivers/hwtracing/stm/p_sys-t.c
@@ -238,7 +238,7 @@ static struct configfs_attribute *sys_t_policy_attrs[] = {
static inline bool sys_t_need_ts(struct sys_t_output *op)
{
if (op->node.ts_interval &&
- time_after(op->ts_jiffies + op->node.ts_interval, jiffies)) {
+ time_after(jiffies, op->ts_jiffies + op->node.ts_interval)) {
op->ts_jiffies = jiffies;
return true;
@@ -250,8 +250,8 @@ static inline bool sys_t_need_ts(struct sys_t_output *op)
static bool sys_t_need_clock_sync(struct sys_t_output *op)
{
if (op->node.clocksync_interval &&
- time_after(op->clocksync_jiffies + op->node.clocksync_interval,
- jiffies)) {
+ time_after(jiffies,
+ op->clocksync_jiffies + op->node.clocksync_interval)) {
op->clocksync_jiffies = jiffies;
return true;
diff --git a/drivers/i2c/busses/i2c-altera.c b/drivers/i2c/busses/i2c-altera.c
index 5255d37..1de23b4 100644
--- a/drivers/i2c/busses/i2c-altera.c
+++ b/drivers/i2c/busses/i2c-altera.c
@@ -171,7 +171,7 @@ static void altr_i2c_init(struct altr_i2c_dev *idev)
/* SCL Low Time */
writel(t_low, idev->base + ALTR_I2C_SCL_LOW);
/* SDA Hold Time, 300ns */
- writel(div_u64(300 * clk_mhz, 1000), idev->base + ALTR_I2C_SDA_HOLD);
+ writel(3 * clk_mhz / 10, idev->base + ALTR_I2C_SDA_HOLD);
/* Mask all master interrupt bits */
altr_i2c_int_enable(idev, ALTR_I2C_ALL_IRQ, false);
diff --git a/drivers/i2c/busses/i2c-designware-pcidrv.c b/drivers/i2c/busses/i2c-designware-pcidrv.c
index 050adda..05b35ac 100644
--- a/drivers/i2c/busses/i2c-designware-pcidrv.c
+++ b/drivers/i2c/busses/i2c-designware-pcidrv.c
@@ -313,6 +313,7 @@ static void i2c_dw_pci_remove(struct pci_dev *pdev)
pm_runtime_get_noresume(&pdev->dev);
i2c_del_adapter(&dev->adapter);
+ devm_free_irq(&pdev->dev, dev->irq, dev);
pci_free_irq_vectors(pdev);
}
diff --git a/drivers/i2c/busses/i2c-gpio.c b/drivers/i2c/busses/i2c-gpio.c
index 3a9e840..a4a6825 100644
--- a/drivers/i2c/busses/i2c-gpio.c
+++ b/drivers/i2c/busses/i2c-gpio.c
@@ -348,7 +348,7 @@ static struct gpio_desc *i2c_gpio_get_desc(struct device *dev,
if (ret == -ENOENT)
retdesc = ERR_PTR(-EPROBE_DEFER);
- if (ret != -EPROBE_DEFER)
+ if (PTR_ERR(retdesc) != -EPROBE_DEFER)
dev_err(dev, "error trying to get descriptor: %d\n", ret);
return retdesc;
diff --git a/drivers/i2c/busses/i2c-hix5hd2.c b/drivers/i2c/busses/i2c-hix5hd2.c
index 8497c7a..224f830 100644
--- a/drivers/i2c/busses/i2c-hix5hd2.c
+++ b/drivers/i2c/busses/i2c-hix5hd2.c
@@ -477,6 +477,7 @@ static int hix5hd2_i2c_remove(struct platform_device *pdev)
i2c_del_adapter(&priv->adap);
pm_runtime_disable(priv->dev);
pm_runtime_set_suspended(priv->dev);
+ clk_disable_unprepare(priv->clk);
return 0;
}
diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c
index ca4f096..a9c03f5 100644
--- a/drivers/i2c/busses/i2c-i801.c
+++ b/drivers/i2c/busses/i2c-i801.c
@@ -132,11 +132,6 @@
#define TCOBASE 0x050
#define TCOCTL 0x054
-#define ACPIBASE 0x040
-#define ACPIBASE_SMI_OFF 0x030
-#define ACPICTRL 0x044
-#define ACPICTRL_EN 0x080
-
#define SBREG_BAR 0x10
#define SBREG_SMBCTRL 0xc6000c
#define SBREG_SMBCTRL_DNV 0xcf000c
@@ -1553,7 +1548,7 @@ i801_add_tco_spt(struct i801_priv *priv, struct pci_dev *pci_dev,
pci_bus_write_config_byte(pci_dev->bus, devfn, 0xe1, hidden);
spin_unlock(&p2sb_spinlock);
- res = &tco_res[ICH_RES_MEM_OFF];
+ res = &tco_res[1];
if (pci_dev->device == PCI_DEVICE_ID_INTEL_DNV_SMBUS)
res->start = (resource_size_t)base64_addr + SBREG_SMBCTRL_DNV;
else
@@ -1563,7 +1558,7 @@ i801_add_tco_spt(struct i801_priv *priv, struct pci_dev *pci_dev,
res->flags = IORESOURCE_MEM;
return platform_device_register_resndata(&pci_dev->dev, "iTCO_wdt", -1,
- tco_res, 3, &spt_tco_platform_data,
+ tco_res, 2, &spt_tco_platform_data,
sizeof(spt_tco_platform_data));
}
@@ -1576,17 +1571,16 @@ static struct platform_device *
i801_add_tco_cnl(struct i801_priv *priv, struct pci_dev *pci_dev,
struct resource *tco_res)
{
- return platform_device_register_resndata(&pci_dev->dev, "iTCO_wdt", -1,
- tco_res, 2, &cnl_tco_platform_data,
- sizeof(cnl_tco_platform_data));
+ return platform_device_register_resndata(&pci_dev->dev,
+ "iTCO_wdt", -1, tco_res, 1, &cnl_tco_platform_data,
+ sizeof(cnl_tco_platform_data));
}
static void i801_add_tco(struct i801_priv *priv)
{
- u32 base_addr, tco_base, tco_ctl, ctrl_val;
struct pci_dev *pci_dev = priv->pci_dev;
- struct resource tco_res[3], *res;
- unsigned int devfn;
+ struct resource tco_res[2], *res;
+ u32 tco_base, tco_ctl;
/* If we have ACPI based watchdog use that instead */
if (acpi_has_watchdog())
@@ -1601,30 +1595,15 @@ static void i801_add_tco(struct i801_priv *priv)
return;
memset(tco_res, 0, sizeof(tco_res));
-
- res = &tco_res[ICH_RES_IO_TCO];
+ /*
+ * Always populate the main iTCO IO resource here. The second entry
+ * for NO_REBOOT MMIO is filled by the SPT specific function.
+ */
+ res = &tco_res[0];
res->start = tco_base & ~1;
res->end = res->start + 32 - 1;
res->flags = IORESOURCE_IO;
- /*
- * Power Management registers.
- */
- devfn = PCI_DEVFN(PCI_SLOT(pci_dev->devfn), 2);
- pci_bus_read_config_dword(pci_dev->bus, devfn, ACPIBASE, &base_addr);
-
- res = &tco_res[ICH_RES_IO_SMI];
- res->start = (base_addr & ~1) + ACPIBASE_SMI_OFF;
- res->end = res->start + 3;
- res->flags = IORESOURCE_IO;
-
- /*
- * Enable the ACPI I/O space.
- */
- pci_bus_read_config_dword(pci_dev->bus, devfn, ACPICTRL, &ctrl_val);
- ctrl_val |= ACPICTRL_EN;
- pci_bus_write_config_dword(pci_dev->bus, devfn, ACPICTRL, ctrl_val);
-
if (priv->features & FEATURE_TCO_CNL)
priv->tco_pdev = i801_add_tco_cnl(priv, pci_dev, tco_res);
else
diff --git a/drivers/i2c/busses/i2c-jz4780.c b/drivers/i2c/busses/i2c-jz4780.c
index 16a67a6..b426fc9 100644
--- a/drivers/i2c/busses/i2c-jz4780.c
+++ b/drivers/i2c/busses/i2c-jz4780.c
@@ -78,25 +78,6 @@
#define X1000_I2C_DC_STOP BIT(9)
-static const char * const jz4780_i2c_abrt_src[] = {
- "ABRT_7B_ADDR_NOACK",
- "ABRT_10ADDR1_NOACK",
- "ABRT_10ADDR2_NOACK",
- "ABRT_XDATA_NOACK",
- "ABRT_GCALL_NOACK",
- "ABRT_GCALL_READ",
- "ABRT_HS_ACKD",
- "SBYTE_ACKDET",
- "ABRT_HS_NORSTRT",
- "SBYTE_NORSTRT",
- "ABRT_10B_RD_NORSTRT",
- "ABRT_MASTER_DIS",
- "ARB_LOST",
- "SLVFLUSH_TXFIFO",
- "SLV_ARBLOST",
- "SLVRD_INTX",
-};
-
#define JZ4780_I2C_INTST_IGC BIT(11)
#define JZ4780_I2C_INTST_ISTT BIT(10)
#define JZ4780_I2C_INTST_ISTP BIT(9)
@@ -576,21 +557,8 @@ static irqreturn_t jz4780_i2c_irq(int irqno, void *dev_id)
static void jz4780_i2c_txabrt(struct jz4780_i2c *i2c, int src)
{
- int i;
-
- dev_err(&i2c->adap.dev, "txabrt: 0x%08x\n", src);
- dev_err(&i2c->adap.dev, "device addr=%x\n",
- jz4780_i2c_readw(i2c, JZ4780_I2C_TAR));
- dev_err(&i2c->adap.dev, "send cmd count:%d %d\n",
- i2c->cmd, i2c->cmd_buf[i2c->cmd]);
- dev_err(&i2c->adap.dev, "receive data count:%d %d\n",
- i2c->cmd, i2c->data_buf[i2c->cmd]);
-
- for (i = 0; i < 16; i++) {
- if (src & BIT(i))
- dev_dbg(&i2c->adap.dev, "I2C TXABRT[%d]=%s\n",
- i, jz4780_i2c_abrt_src[i]);
- }
+ dev_dbg(&i2c->adap.dev, "txabrt: 0x%08x, cmd: %d, send: %d, recv: %d\n",
+ src, i2c->cmd, i2c->cmd_buf[i2c->cmd], i2c->data_buf[i2c->cmd]);
}
static inline int jz4780_i2c_xfer_read(struct jz4780_i2c *i2c,
diff --git a/drivers/i2c/busses/i2c-nvidia-gpu.c b/drivers/i2c/busses/i2c-nvidia-gpu.c
index 62e18b4..f5d25ce 100644
--- a/drivers/i2c/busses/i2c-nvidia-gpu.c
+++ b/drivers/i2c/busses/i2c-nvidia-gpu.c
@@ -8,6 +8,7 @@
#include <linux/delay.h>
#include <linux/i2c.h>
#include <linux/interrupt.h>
+#include <linux/iopoll.h>
#include <linux/module.h>
#include <linux/pci.h>
#include <linux/platform_device.h>
@@ -75,20 +76,15 @@ static void gpu_enable_i2c_bus(struct gpu_i2c_dev *i2cd)
static int gpu_i2c_check_status(struct gpu_i2c_dev *i2cd)
{
- unsigned long target = jiffies + msecs_to_jiffies(1000);
u32 val;
+ int ret;
- do {
- val = readl(i2cd->regs + I2C_MST_CNTL);
- if (!(val & I2C_MST_CNTL_CYCLE_TRIGGER))
- break;
- if ((val & I2C_MST_CNTL_STATUS) !=
- I2C_MST_CNTL_STATUS_BUS_BUSY)
- break;
- usleep_range(500, 600);
- } while (time_is_after_jiffies(target));
+ ret = readl_poll_timeout(i2cd->regs + I2C_MST_CNTL, val,
+ !(val & I2C_MST_CNTL_CYCLE_TRIGGER) ||
+ (val & I2C_MST_CNTL_STATUS) != I2C_MST_CNTL_STATUS_BUS_BUSY,
+ 500, 1000 * USEC_PER_MSEC);
- if (time_is_before_jiffies(target)) {
+ if (ret) {
dev_err(i2cd->dev, "i2c timeout error %x\n", val);
return -ETIMEDOUT;
}
diff --git a/drivers/i2c/busses/i2c-pca-platform.c b/drivers/i2c/busses/i2c-pca-platform.c
index a7a8184..635dd69 100644
--- a/drivers/i2c/busses/i2c-pca-platform.c
+++ b/drivers/i2c/busses/i2c-pca-platform.c
@@ -140,7 +140,7 @@ static int i2c_pca_pf_probe(struct platform_device *pdev)
int ret = 0;
int irq;
- irq = platform_get_irq(pdev, 0);
+ irq = platform_get_irq_optional(pdev, 0);
/* If irq is 0, we do polling. */
if (irq < 0)
irq = 0;
diff --git a/drivers/i2c/busses/i2c-st.c b/drivers/i2c/busses/i2c-st.c
index 54e1fc8..f7f7b5b 100644
--- a/drivers/i2c/busses/i2c-st.c
+++ b/drivers/i2c/busses/i2c-st.c
@@ -434,6 +434,7 @@ static void st_i2c_wr_fill_tx_fifo(struct st_i2c_dev *i2c_dev)
/**
* st_i2c_rd_fill_tx_fifo() - Fill the Tx FIFO in read mode
* @i2c_dev: Controller's private data
+ * @max: Maximum amount of data to fill into the Tx FIFO
*
* This functions fills the Tx FIFO with fixed pattern when
* in read mode to trigger clock.
diff --git a/drivers/i2c/i2c-core-acpi.c b/drivers/i2c/i2c-core-acpi.c
index 8f3dbc9..8b0ff78 100644
--- a/drivers/i2c/i2c-core-acpi.c
+++ b/drivers/i2c/i2c-core-acpi.c
@@ -394,9 +394,17 @@ EXPORT_SYMBOL_GPL(i2c_acpi_find_adapter_by_handle);
static struct i2c_client *i2c_acpi_find_client_by_adev(struct acpi_device *adev)
{
struct device *dev;
+ struct i2c_client *client;
dev = bus_find_device_by_acpi_dev(&i2c_bus_type, adev);
- return dev ? i2c_verify_client(dev) : NULL;
+ if (!dev)
+ return NULL;
+
+ client = i2c_verify_client(dev);
+ if (!client)
+ put_device(dev);
+
+ return client;
}
static int i2c_acpi_notify(struct notifier_block *nb, unsigned long value,
diff --git a/drivers/i3c/device.c b/drivers/i3c/device.c
index 9e2e140..bb8e60d 100644
--- a/drivers/i3c/device.c
+++ b/drivers/i3c/device.c
@@ -213,40 +213,34 @@ i3c_device_match_id(struct i3c_device *i3cdev,
{
struct i3c_device_info devinfo;
const struct i3c_device_id *id;
+ u16 manuf, part, ext_info;
+ bool rndpid;
i3c_device_get_info(i3cdev, &devinfo);
- /*
- * The lower 32bits of the provisional ID is just filled with a random
- * value, try to match using DCR info.
- */
- if (!I3C_PID_RND_LOWER_32BITS(devinfo.pid)) {
- u16 manuf = I3C_PID_MANUF_ID(devinfo.pid);
- u16 part = I3C_PID_PART_ID(devinfo.pid);
- u16 ext_info = I3C_PID_EXTRA_INFO(devinfo.pid);
+ manuf = I3C_PID_MANUF_ID(devinfo.pid);
+ part = I3C_PID_PART_ID(devinfo.pid);
+ ext_info = I3C_PID_EXTRA_INFO(devinfo.pid);
+ rndpid = I3C_PID_RND_LOWER_32BITS(devinfo.pid);
- /* First try to match by manufacturer/part ID. */
- for (id = id_table; id->match_flags != 0; id++) {
- if ((id->match_flags & I3C_MATCH_MANUF_AND_PART) !=
- I3C_MATCH_MANUF_AND_PART)
- continue;
-
- if (manuf != id->manuf_id || part != id->part_id)
- continue;
-
- if ((id->match_flags & I3C_MATCH_EXTRA_INFO) &&
- ext_info != id->extra_info)
- continue;
-
- return id;
- }
- }
-
- /* Fallback to DCR match. */
for (id = id_table; id->match_flags != 0; id++) {
if ((id->match_flags & I3C_MATCH_DCR) &&
- id->dcr == devinfo.dcr)
- return id;
+ id->dcr != devinfo.dcr)
+ continue;
+
+ if ((id->match_flags & I3C_MATCH_MANUF) &&
+ id->manuf_id != manuf)
+ continue;
+
+ if ((id->match_flags & I3C_MATCH_PART) &&
+ (rndpid || id->part_id != part))
+ continue;
+
+ if ((id->match_flags & I3C_MATCH_EXTRA_INFO) &&
+ (rndpid || id->extra_info != ext_info))
+ continue;
+
+ return id;
}
return NULL;
diff --git a/drivers/i3c/master.c b/drivers/i3c/master.c
index 7f8f896..d79cd6d 100644
--- a/drivers/i3c/master.c
+++ b/drivers/i3c/master.c
@@ -241,12 +241,34 @@ static ssize_t hdrcap_show(struct device *dev,
}
static DEVICE_ATTR_RO(hdrcap);
+static ssize_t modalias_show(struct device *dev,
+ struct device_attribute *da, char *buf)
+{
+ struct i3c_device *i3c = dev_to_i3cdev(dev);
+ struct i3c_device_info devinfo;
+ u16 manuf, part, ext;
+
+ i3c_device_get_info(i3c, &devinfo);
+ manuf = I3C_PID_MANUF_ID(devinfo.pid);
+ part = I3C_PID_PART_ID(devinfo.pid);
+ ext = I3C_PID_EXTRA_INFO(devinfo.pid);
+
+ if (I3C_PID_RND_LOWER_32BITS(devinfo.pid))
+ return sprintf(buf, "i3c:dcr%02Xmanuf%04X", devinfo.dcr,
+ manuf);
+
+ return sprintf(buf, "i3c:dcr%02Xmanuf%04Xpart%04Xext%04X",
+ devinfo.dcr, manuf, part, ext);
+}
+static DEVICE_ATTR_RO(modalias);
+
static struct attribute *i3c_device_attrs[] = {
&dev_attr_bcr.attr,
&dev_attr_dcr.attr,
&dev_attr_pid.attr,
&dev_attr_dynamic_address.attr,
&dev_attr_hdrcap.attr,
+ &dev_attr_modalias.attr,
NULL,
};
ATTRIBUTE_GROUPS(i3c_device);
@@ -267,7 +289,7 @@ static int i3c_device_uevent(struct device *dev, struct kobj_uevent_env *env)
devinfo.dcr, manuf);
return add_uevent_var(env,
- "MODALIAS=i3c:dcr%02Xmanuf%04Xpart%04xext%04x",
+ "MODALIAS=i3c:dcr%02Xmanuf%04Xpart%04Xext%04X",
devinfo.dcr, manuf, part, ext);
}
@@ -1953,7 +1975,7 @@ of_i3c_master_add_i2c_boardinfo(struct i3c_master_controller *master,
* DEFSLVS command.
*/
if (boardinfo->base.flags & I2C_CLIENT_TEN) {
- dev_err(&master->dev, "I2C device with 10 bit address not supported.");
+ dev_err(dev, "I2C device with 10 bit address not supported.");
return -ENOTSUPP;
}
@@ -2138,7 +2160,7 @@ static int i3c_master_i2c_adapter_init(struct i3c_master_controller *master)
* correctly even if one or more i2c devices are not registered.
*/
i3c_bus_for_each_i2cdev(&master->bus, i2cdev)
- i2cdev->dev = i2c_new_device(adap, &i2cdev->boardinfo->base);
+ i2cdev->dev = i2c_new_client_device(adap, &i2cdev->boardinfo->base);
return 0;
}
diff --git a/drivers/i3c/master/dw-i3c-master.c b/drivers/i3c/master/dw-i3c-master.c
index bd26c3b..5c5306c 100644
--- a/drivers/i3c/master/dw-i3c-master.c
+++ b/drivers/i3c/master/dw-i3c-master.c
@@ -221,7 +221,7 @@ struct dw_i3c_xfer {
struct completion comp;
int ret;
unsigned int ncmds;
- struct dw_i3c_cmd cmds[0];
+ struct dw_i3c_cmd cmds[];
};
struct dw_i3c_master {
diff --git a/drivers/i3c/master/i3c-master-cdns.c b/drivers/i3c/master/i3c-master-cdns.c
index 5471279..3fee8bd 100644
--- a/drivers/i3c/master/i3c-master-cdns.c
+++ b/drivers/i3c/master/i3c-master-cdns.c
@@ -388,7 +388,7 @@ struct cdns_i3c_xfer {
struct completion comp;
int ret;
unsigned int ncmds;
- struct cdns_i3c_cmd cmds[0];
+ struct cdns_i3c_cmd cmds[];
};
struct cdns_i3c_data {
diff --git a/drivers/ide/ide-gd.c b/drivers/ide/ide-gd.c
index 1bb99b5..05c2698 100644
--- a/drivers/ide/ide-gd.c
+++ b/drivers/ide/ide-gd.c
@@ -361,7 +361,7 @@ static const struct block_device_operations ide_gd_ops = {
.release = ide_gd_release,
.ioctl = ide_gd_ioctl,
#ifdef CONFIG_COMPAT
- .ioctl = ide_gd_compat_ioctl,
+ .compat_ioctl = ide_gd_compat_ioctl,
#endif
.getgeo = ide_gd_getgeo,
.check_events = ide_gd_check_events,
diff --git a/drivers/iio/accel/adxl372.c b/drivers/iio/accel/adxl372.c
index 67b8817..60daf04 100644
--- a/drivers/iio/accel/adxl372.c
+++ b/drivers/iio/accel/adxl372.c
@@ -237,6 +237,7 @@ static const struct adxl372_axis_lookup adxl372_axis_lookup_table[] = {
.realbits = 12, \
.storagebits = 16, \
.shift = 4, \
+ .endianness = IIO_BE, \
}, \
}
diff --git a/drivers/iio/accel/st_accel_i2c.c b/drivers/iio/accel/st_accel_i2c.c
index 633955d..849cf74 100644
--- a/drivers/iio/accel/st_accel_i2c.c
+++ b/drivers/iio/accel/st_accel_i2c.c
@@ -110,7 +110,7 @@ MODULE_DEVICE_TABLE(of, st_accel_of_match);
#ifdef CONFIG_ACPI
static const struct acpi_device_id st_accel_acpi_match[] = {
- {"SMO8840", (kernel_ulong_t)LNG2DM_ACCEL_DEV_NAME},
+ {"SMO8840", (kernel_ulong_t)LIS2DH12_ACCEL_DEV_NAME},
{"SMO8A90", (kernel_ulong_t)LNG2DM_ACCEL_DEV_NAME},
{ },
};
diff --git a/drivers/iio/adc/at91-sama5d2_adc.c b/drivers/iio/adc/at91-sama5d2_adc.c
index a5c7771..9d96f7d 100644
--- a/drivers/iio/adc/at91-sama5d2_adc.c
+++ b/drivers/iio/adc/at91-sama5d2_adc.c
@@ -723,6 +723,7 @@ static int at91_adc_configure_trigger(struct iio_trigger *trig, bool state)
for_each_set_bit(bit, indio->active_scan_mask, indio->num_channels) {
struct iio_chan_spec const *chan = at91_adc_chan_get(indio, bit);
+ u32 cor;
if (!chan)
continue;
@@ -732,6 +733,20 @@ static int at91_adc_configure_trigger(struct iio_trigger *trig, bool state)
continue;
if (state) {
+ cor = at91_adc_readl(st, AT91_SAMA5D2_COR);
+
+ if (chan->differential)
+ cor |= (BIT(chan->channel) |
+ BIT(chan->channel2)) <<
+ AT91_SAMA5D2_COR_DIFF_OFFSET;
+ else
+ cor &= ~(BIT(chan->channel) <<
+ AT91_SAMA5D2_COR_DIFF_OFFSET);
+
+ at91_adc_writel(st, AT91_SAMA5D2_COR, cor);
+ }
+
+ if (state) {
at91_adc_writel(st, AT91_SAMA5D2_CHER,
BIT(chan->channel));
/* enable irq only if not using DMA */
diff --git a/drivers/iio/adc/stm32-dfsdm-adc.c b/drivers/iio/adc/stm32-dfsdm-adc.c
index 2aad2cd..76a60d9 100644
--- a/drivers/iio/adc/stm32-dfsdm-adc.c
+++ b/drivers/iio/adc/stm32-dfsdm-adc.c
@@ -842,31 +842,6 @@ static inline void stm32_dfsdm_process_data(struct stm32_dfsdm_adc *adc,
}
}
-static irqreturn_t stm32_dfsdm_adc_trigger_handler(int irq, void *p)
-{
- struct iio_poll_func *pf = p;
- struct iio_dev *indio_dev = pf->indio_dev;
- struct stm32_dfsdm_adc *adc = iio_priv(indio_dev);
- int available = stm32_dfsdm_adc_dma_residue(adc);
-
- while (available >= indio_dev->scan_bytes) {
- s32 *buffer = (s32 *)&adc->rx_buf[adc->bufi];
-
- stm32_dfsdm_process_data(adc, buffer);
-
- iio_push_to_buffers_with_timestamp(indio_dev, buffer,
- pf->timestamp);
- available -= indio_dev->scan_bytes;
- adc->bufi += indio_dev->scan_bytes;
- if (adc->bufi >= adc->buf_sz)
- adc->bufi = 0;
- }
-
- iio_trigger_notify_done(indio_dev->trig);
-
- return IRQ_HANDLED;
-}
-
static void stm32_dfsdm_dma_buffer_done(void *data)
{
struct iio_dev *indio_dev = data;
@@ -874,11 +849,6 @@ static void stm32_dfsdm_dma_buffer_done(void *data)
int available = stm32_dfsdm_adc_dma_residue(adc);
size_t old_pos;
- if (indio_dev->currentmode & INDIO_BUFFER_TRIGGERED) {
- iio_trigger_poll_chained(indio_dev->trig);
- return;
- }
-
/*
* FIXME: In Kernel interface does not support cyclic DMA buffer,and
* offers only an interface to push data samples per samples.
@@ -906,7 +876,15 @@ static void stm32_dfsdm_dma_buffer_done(void *data)
adc->bufi = 0;
old_pos = 0;
}
- /* regular iio buffer without trigger */
+ /*
+ * In DMA mode the trigger services of IIO are not used
+ * (e.g. no call to iio_trigger_poll).
+ * Calling irq handler associated to the hardware trigger is not
+ * relevant as the conversions have already been done. Data
+ * transfers are performed directly in DMA callback instead.
+ * This implementation avoids to call trigger irq handler that
+ * may sleep, in an atomic context (DMA irq handler context).
+ */
if (adc->dev_data->type == DFSDM_IIO)
iio_push_to_buffers(indio_dev, buffer);
}
@@ -1536,8 +1514,7 @@ static int stm32_dfsdm_adc_init(struct iio_dev *indio_dev)
}
ret = iio_triggered_buffer_setup(indio_dev,
- &iio_pollfunc_store_time,
- &stm32_dfsdm_adc_trigger_handler,
+ &iio_pollfunc_store_time, NULL,
&stm32_dfsdm_buffer_setup_ops);
if (ret) {
stm32_dfsdm_dma_release(indio_dev);
diff --git a/drivers/iio/chemical/Kconfig b/drivers/iio/chemical/Kconfig
index 0b91de4..a7e65a5 100644
--- a/drivers/iio/chemical/Kconfig
+++ b/drivers/iio/chemical/Kconfig
@@ -91,6 +91,8 @@
tristate "SPS30 particulate matter sensor"
depends on I2C
select CRC8
+ select IIO_BUFFER
+ select IIO_TRIGGERED_BUFFER
help
Say Y here to build support for the Sensirion SPS30 particulate
matter sensor.
diff --git a/drivers/iio/light/vcnl4000.c b/drivers/iio/light/vcnl4000.c
index b0e241a..e5b00a6 100644
--- a/drivers/iio/light/vcnl4000.c
+++ b/drivers/iio/light/vcnl4000.c
@@ -167,16 +167,17 @@ static int vcnl4200_init(struct vcnl4000_data *data)
data->vcnl4200_ps.reg = VCNL4200_PS_DATA;
switch (id) {
case VCNL4200_PROD_ID:
- /* Integration time is 50ms, but the experiments */
- /* show 54ms in total. */
- data->vcnl4200_al.sampling_rate = ktime_set(0, 54000 * 1000);
- data->vcnl4200_ps.sampling_rate = ktime_set(0, 4200 * 1000);
+ /* Default wait time is 50ms, add 20% tolerance. */
+ data->vcnl4200_al.sampling_rate = ktime_set(0, 60000 * 1000);
+ /* Default wait time is 4.8ms, add 20% tolerance. */
+ data->vcnl4200_ps.sampling_rate = ktime_set(0, 5760 * 1000);
data->al_scale = 24000;
break;
case VCNL4040_PROD_ID:
- /* Integration time is 80ms, add 10ms. */
- data->vcnl4200_al.sampling_rate = ktime_set(0, 100000 * 1000);
- data->vcnl4200_ps.sampling_rate = ktime_set(0, 100000 * 1000);
+ /* Default wait time is 80ms, add 20% tolerance. */
+ data->vcnl4200_al.sampling_rate = ktime_set(0, 96000 * 1000);
+ /* Default wait time is 5ms, add 20% tolerance. */
+ data->vcnl4200_ps.sampling_rate = ktime_set(0, 6000 * 1000);
data->al_scale = 120000;
break;
}
diff --git a/drivers/iio/magnetometer/ak8974.c b/drivers/iio/magnetometer/ak8974.c
index fc7e910..d329967 100644
--- a/drivers/iio/magnetometer/ak8974.c
+++ b/drivers/iio/magnetometer/ak8974.c
@@ -564,7 +564,7 @@ static int ak8974_read_raw(struct iio_dev *indio_dev,
* We read all axes and discard all but one, for optimized
* reading, use the triggered buffer.
*/
- *val = le16_to_cpu(hw_values[chan->address]);
+ *val = (s16)le16_to_cpu(hw_values[chan->address]);
ret = IIO_VAL_INT;
}
diff --git a/drivers/iio/proximity/ping.c b/drivers/iio/proximity/ping.c
index 34aff10..12b893c 100644
--- a/drivers/iio/proximity/ping.c
+++ b/drivers/iio/proximity/ping.c
@@ -269,7 +269,7 @@ static const struct iio_chan_spec ping_chan_spec[] = {
static const struct of_device_id of_ping_match[] = {
{ .compatible = "parallax,ping", .data = &pa_ping_cfg},
- { .compatible = "parallax,laserping", .data = &pa_ping_cfg},
+ { .compatible = "parallax,laserping", .data = &pa_laser_ping_cfg},
{},
};
diff --git a/drivers/iio/trigger/stm32-timer-trigger.c b/drivers/iio/trigger/stm32-timer-trigger.c
index 2e0d32a..2f82e8c 100644
--- a/drivers/iio/trigger/stm32-timer-trigger.c
+++ b/drivers/iio/trigger/stm32-timer-trigger.c
@@ -161,7 +161,8 @@ static int stm32_timer_start(struct stm32_timer_trigger *priv,
return 0;
}
-static void stm32_timer_stop(struct stm32_timer_trigger *priv)
+static void stm32_timer_stop(struct stm32_timer_trigger *priv,
+ struct iio_trigger *trig)
{
u32 ccer, cr1;
@@ -179,6 +180,12 @@ static void stm32_timer_stop(struct stm32_timer_trigger *priv)
regmap_write(priv->regmap, TIM_PSC, 0);
regmap_write(priv->regmap, TIM_ARR, 0);
+ /* Force disable master mode */
+ if (stm32_timer_is_trgo2_name(trig->name))
+ regmap_update_bits(priv->regmap, TIM_CR2, TIM_CR2_MMS2, 0);
+ else
+ regmap_update_bits(priv->regmap, TIM_CR2, TIM_CR2_MMS, 0);
+
/* Make sure that registers are updated */
regmap_update_bits(priv->regmap, TIM_EGR, TIM_EGR_UG, TIM_EGR_UG);
}
@@ -197,7 +204,7 @@ static ssize_t stm32_tt_store_frequency(struct device *dev,
return ret;
if (freq == 0) {
- stm32_timer_stop(priv);
+ stm32_timer_stop(priv, trig);
} else {
ret = stm32_timer_start(priv, trig, freq);
if (ret)
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index 68cc1b2..15e99a8 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -1191,6 +1191,7 @@ struct ib_cm_id *ib_cm_insert_listen(struct ib_device *device,
/* Sharing an ib_cm_id with different handlers is not
* supported */
spin_unlock_irqrestore(&cm.lock, flags);
+ ib_destroy_cm_id(cm_id);
return ERR_PTR(-EINVAL);
}
refcount_inc(&cm_id_priv->refcount);
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 72f0321..2dec3a0 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -3212,19 +3212,26 @@ int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
int ret;
id_priv = container_of(id, struct rdma_id_private, id);
+ memcpy(cma_dst_addr(id_priv), dst_addr, rdma_addr_size(dst_addr));
if (id_priv->state == RDMA_CM_IDLE) {
ret = cma_bind_addr(id, src_addr, dst_addr);
- if (ret)
+ if (ret) {
+ memset(cma_dst_addr(id_priv), 0,
+ rdma_addr_size(dst_addr));
return ret;
+ }
}
- if (cma_family(id_priv) != dst_addr->sa_family)
+ if (cma_family(id_priv) != dst_addr->sa_family) {
+ memset(cma_dst_addr(id_priv), 0, rdma_addr_size(dst_addr));
return -EINVAL;
+ }
- if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_ADDR_QUERY))
+ if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_ADDR_QUERY)) {
+ memset(cma_dst_addr(id_priv), 0, rdma_addr_size(dst_addr));
return -EINVAL;
+ }
- memcpy(cma_dst_addr(id_priv), dst_addr, rdma_addr_size(dst_addr));
if (cma_any_addr(dst_addr)) {
ret = cma_resolve_loopback(id_priv);
} else {
diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h
index b1457b3..cf42acc 100644
--- a/drivers/infiniband/core/core_priv.h
+++ b/drivers/infiniband/core/core_priv.h
@@ -338,6 +338,20 @@ static inline struct ib_qp *_ib_create_qp(struct ib_device *dev,
qp->pd = pd;
qp->uobject = uobj;
qp->real_qp = qp;
+
+ qp->qp_type = attr->qp_type;
+ qp->rwq_ind_tbl = attr->rwq_ind_tbl;
+ qp->send_cq = attr->send_cq;
+ qp->recv_cq = attr->recv_cq;
+ qp->srq = attr->srq;
+ qp->rwq_ind_tbl = attr->rwq_ind_tbl;
+ qp->event_handler = attr->event_handler;
+
+ atomic_set(&qp->usecnt, 0);
+ spin_lock_init(&qp->mr_lock);
+ INIT_LIST_HEAD(&qp->rdma_mrs);
+ INIT_LIST_HEAD(&qp->sig_mrs);
+
/*
* We don't track XRC QPs for now, because they don't have PD
* and more importantly they are created internaly by driver,
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index f6c2552..d0b3d35 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -896,7 +896,9 @@ static int add_one_compat_dev(struct ib_device *device,
cdev->dev.parent = device->dev.parent;
rdma_init_coredev(cdev, device, read_pnet(&rnet->net));
cdev->dev.release = compatdev_release;
- dev_set_name(&cdev->dev, "%s", dev_name(&device->dev));
+ ret = dev_set_name(&cdev->dev, "%s", dev_name(&device->dev));
+ if (ret)
+ goto add_err;
ret = device_add(&cdev->dev);
if (ret)
diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c
index ade7182..da8adad 100644
--- a/drivers/infiniband/core/iwcm.c
+++ b/drivers/infiniband/core/iwcm.c
@@ -159,8 +159,10 @@ static void dealloc_work_entries(struct iwcm_id_private *cm_id_priv)
{
struct list_head *e, *tmp;
- list_for_each_safe(e, tmp, &cm_id_priv->work_free_list)
+ list_for_each_safe(e, tmp, &cm_id_priv->work_free_list) {
+ list_del(e);
kfree(list_entry(e, struct iwcm_work, free_list));
+ }
}
static int alloc_work_entries(struct iwcm_id_private *cm_id_priv, int count)
diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c
index 37b433aa..9eec26d1 100644
--- a/drivers/infiniband/core/nldev.c
+++ b/drivers/infiniband/core/nldev.c
@@ -918,6 +918,10 @@ static int nldev_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
nla_strlcpy(name, tb[RDMA_NLDEV_ATTR_DEV_NAME],
IB_DEVICE_NAME_MAX);
+ if (strlen(name) == 0) {
+ err = -EINVAL;
+ goto done;
+ }
err = ib_device_rename(device, name);
goto done;
}
@@ -1514,7 +1518,7 @@ static int nldev_newlink(struct sk_buff *skb, struct nlmsghdr *nlh,
nla_strlcpy(ibdev_name, tb[RDMA_NLDEV_ATTR_DEV_NAME],
sizeof(ibdev_name));
- if (strchr(ibdev_name, '%'))
+ if (strchr(ibdev_name, '%') || strlen(ibdev_name) == 0)
return -EINVAL;
nla_strlcpy(type, tb[RDMA_NLDEV_ATTR_LINK_TYPE], sizeof(type));
@@ -1757,6 +1761,8 @@ static int nldev_stat_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
if (ret)
goto err_msg;
} else {
+ if (!tb[RDMA_NLDEV_ATTR_RES_LQPN])
+ goto err_msg;
qpn = nla_get_u32(tb[RDMA_NLDEV_ATTR_RES_LQPN]);
if (tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]) {
cntn = nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]);
diff --git a/drivers/infiniband/core/rw.c b/drivers/infiniband/core/rw.c
index 4fad732..06e5b67 100644
--- a/drivers/infiniband/core/rw.c
+++ b/drivers/infiniband/core/rw.c
@@ -273,6 +273,23 @@ static int rdma_rw_init_single_wr(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
return 1;
}
+static void rdma_rw_unmap_sg(struct ib_device *dev, struct scatterlist *sg,
+ u32 sg_cnt, enum dma_data_direction dir)
+{
+ if (is_pci_p2pdma_page(sg_page(sg)))
+ pci_p2pdma_unmap_sg(dev->dma_device, sg, sg_cnt, dir);
+ else
+ ib_dma_unmap_sg(dev, sg, sg_cnt, dir);
+}
+
+static int rdma_rw_map_sg(struct ib_device *dev, struct scatterlist *sg,
+ u32 sg_cnt, enum dma_data_direction dir)
+{
+ if (is_pci_p2pdma_page(sg_page(sg)))
+ return pci_p2pdma_map_sg(dev->dma_device, sg, sg_cnt, dir);
+ return ib_dma_map_sg(dev, sg, sg_cnt, dir);
+}
+
/**
* rdma_rw_ctx_init - initialize a RDMA READ/WRITE context
* @ctx: context to initialize
@@ -295,11 +312,7 @@ int rdma_rw_ctx_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u8 port_num,
struct ib_device *dev = qp->pd->device;
int ret;
- if (is_pci_p2pdma_page(sg_page(sg)))
- ret = pci_p2pdma_map_sg(dev->dma_device, sg, sg_cnt, dir);
- else
- ret = ib_dma_map_sg(dev, sg, sg_cnt, dir);
-
+ ret = rdma_rw_map_sg(dev, sg, sg_cnt, dir);
if (!ret)
return -ENOMEM;
sg_cnt = ret;
@@ -338,7 +351,7 @@ int rdma_rw_ctx_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u8 port_num,
return ret;
out_unmap_sg:
- ib_dma_unmap_sg(dev, sg, sg_cnt, dir);
+ rdma_rw_unmap_sg(dev, sg, sg_cnt, dir);
return ret;
}
EXPORT_SYMBOL(rdma_rw_ctx_init);
@@ -588,11 +601,7 @@ void rdma_rw_ctx_destroy(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u8 port_num,
break;
}
- if (is_pci_p2pdma_page(sg_page(sg)))
- pci_p2pdma_unmap_sg(qp->pd->device->dma_device, sg,
- sg_cnt, dir);
- else
- ib_dma_unmap_sg(qp->pd->device, sg, sg_cnt, dir);
+ rdma_rw_unmap_sg(qp->pd->device, sg, sg_cnt, dir);
}
EXPORT_SYMBOL(rdma_rw_ctx_destroy);
diff --git a/drivers/infiniband/core/security.c b/drivers/infiniband/core/security.c
index 6eb6d27..75e7ec0 100644
--- a/drivers/infiniband/core/security.c
+++ b/drivers/infiniband/core/security.c
@@ -339,27 +339,20 @@ static struct ib_ports_pkeys *get_new_pps(const struct ib_qp *qp,
if (!new_pps)
return NULL;
- if (qp_attr_mask & (IB_QP_PKEY_INDEX | IB_QP_PORT)) {
- if (!qp_pps) {
- new_pps->main.port_num = qp_attr->port_num;
- new_pps->main.pkey_index = qp_attr->pkey_index;
- } else {
- new_pps->main.port_num = (qp_attr_mask & IB_QP_PORT) ?
- qp_attr->port_num :
- qp_pps->main.port_num;
-
- new_pps->main.pkey_index =
- (qp_attr_mask & IB_QP_PKEY_INDEX) ?
- qp_attr->pkey_index :
- qp_pps->main.pkey_index;
- }
- new_pps->main.state = IB_PORT_PKEY_VALID;
- } else if (qp_pps) {
+ if (qp_attr_mask & IB_QP_PORT)
+ new_pps->main.port_num = qp_attr->port_num;
+ else if (qp_pps)
new_pps->main.port_num = qp_pps->main.port_num;
+
+ if (qp_attr_mask & IB_QP_PKEY_INDEX)
+ new_pps->main.pkey_index = qp_attr->pkey_index;
+ else if (qp_pps)
new_pps->main.pkey_index = qp_pps->main.pkey_index;
- if (qp_pps->main.state != IB_PORT_PKEY_NOT_VALID)
- new_pps->main.state = IB_PORT_PKEY_VALID;
- }
+
+ if (((qp_attr_mask & IB_QP_PKEY_INDEX) &&
+ (qp_attr_mask & IB_QP_PORT)) ||
+ (qp_pps && qp_pps->main.state != IB_PORT_PKEY_NOT_VALID))
+ new_pps->main.state = IB_PORT_PKEY_VALID;
if (qp_attr_mask & IB_QP_ALT_PATH) {
new_pps->alt.port_num = qp_attr->alt_port_num;
diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c
index b8c657b..3b1e627 100644
--- a/drivers/infiniband/core/umem_odp.c
+++ b/drivers/infiniband/core/umem_odp.c
@@ -181,14 +181,28 @@ ib_umem_odp_alloc_child(struct ib_umem_odp *root, unsigned long addr,
odp_data->page_shift = PAGE_SHIFT;
odp_data->notifier.ops = ops;
+ /*
+ * A mmget must be held when registering a notifier, the owming_mm only
+ * has a mm_grab at this point.
+ */
+ if (!mmget_not_zero(umem->owning_mm)) {
+ ret = -EFAULT;
+ goto out_free;
+ }
+
odp_data->tgid = get_pid(root->tgid);
ret = ib_init_umem_odp(odp_data, ops);
- if (ret) {
- put_pid(odp_data->tgid);
- kfree(odp_data);
- return ERR_PTR(ret);
- }
+ if (ret)
+ goto out_tgid;
+ mmput(umem->owning_mm);
return odp_data;
+
+out_tgid:
+ put_pid(odp_data->tgid);
+ mmput(umem->owning_mm);
+out_free:
+ kfree(odp_data);
+ return ERR_PTR(ret);
}
EXPORT_SYMBOL(ib_umem_odp_alloc_child);
@@ -261,8 +275,8 @@ void ib_umem_odp_release(struct ib_umem_odp *umem_odp)
mmu_interval_notifier_remove(&umem_odp->notifier);
kvfree(umem_odp->dma_list);
kvfree(umem_odp->page_list);
- put_pid(umem_odp->tgid);
}
+ put_pid(umem_odp->tgid);
kfree(umem_odp);
}
EXPORT_SYMBOL(ib_umem_odp_release);
diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c
index d1407fa..da229eab 100644
--- a/drivers/infiniband/core/user_mad.c
+++ b/drivers/infiniband/core/user_mad.c
@@ -1129,17 +1129,30 @@ static const struct file_operations umad_sm_fops = {
.llseek = no_llseek,
};
+static struct ib_umad_port *get_port(struct ib_device *ibdev,
+ struct ib_umad_device *umad_dev,
+ unsigned int port)
+{
+ if (!umad_dev)
+ return ERR_PTR(-EOPNOTSUPP);
+ if (!rdma_is_port_valid(ibdev, port))
+ return ERR_PTR(-EINVAL);
+ if (!rdma_cap_ib_mad(ibdev, port))
+ return ERR_PTR(-EOPNOTSUPP);
+
+ return &umad_dev->ports[port - rdma_start_port(ibdev)];
+}
+
static int ib_umad_get_nl_info(struct ib_device *ibdev, void *client_data,
struct ib_client_nl_info *res)
{
- struct ib_umad_device *umad_dev = client_data;
+ struct ib_umad_port *port = get_port(ibdev, client_data, res->port);
- if (!rdma_is_port_valid(ibdev, res->port))
- return -EINVAL;
+ if (IS_ERR(port))
+ return PTR_ERR(port);
res->abi = IB_USER_MAD_ABI_VERSION;
- res->cdev = &umad_dev->ports[res->port - rdma_start_port(ibdev)].dev;
-
+ res->cdev = &port->dev;
return 0;
}
@@ -1154,15 +1167,13 @@ MODULE_ALIAS_RDMA_CLIENT("umad");
static int ib_issm_get_nl_info(struct ib_device *ibdev, void *client_data,
struct ib_client_nl_info *res)
{
- struct ib_umad_device *umad_dev =
- ib_get_client_data(ibdev, &umad_client);
+ struct ib_umad_port *port = get_port(ibdev, client_data, res->port);
- if (!rdma_is_port_valid(ibdev, res->port))
- return -EINVAL;
+ if (IS_ERR(port))
+ return PTR_ERR(port);
res->abi = IB_USER_MAD_ABI_VERSION;
- res->cdev = &umad_dev->ports[res->port - rdma_start_port(ibdev)].sm_dev;
-
+ res->cdev = &port->sm_dev;
return 0;
}
@@ -1312,6 +1323,9 @@ static void ib_umad_kill_port(struct ib_umad_port *port)
struct ib_umad_file *file;
int id;
+ cdev_device_del(&port->sm_cdev, &port->sm_dev);
+ cdev_device_del(&port->cdev, &port->dev);
+
mutex_lock(&port->file_mutex);
/* Mark ib_dev NULL and block ioctl or other file ops to progress
@@ -1331,8 +1345,6 @@ static void ib_umad_kill_port(struct ib_umad_port *port)
mutex_unlock(&port->file_mutex);
- cdev_device_del(&port->sm_cdev, &port->sm_dev);
- cdev_device_del(&port->cdev, &port->dev);
ida_free(&umad_ida, port->dev_num);
/* balances device_initialize() */
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index c8693f5..060b4eb 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -1445,16 +1445,7 @@ static int create_qp(struct uverbs_attr_bundle *attrs,
if (ret)
goto err_cb;
- qp->pd = pd;
- qp->send_cq = attr.send_cq;
- qp->recv_cq = attr.recv_cq;
- qp->srq = attr.srq;
- qp->rwq_ind_tbl = ind_tbl;
- qp->event_handler = attr.event_handler;
- qp->qp_type = attr.qp_type;
- atomic_set(&qp->usecnt, 0);
atomic_inc(&pd->usecnt);
- qp->port = 0;
if (attr.send_cq)
atomic_inc(&attr.send_cq->usecnt);
if (attr.recv_cq)
@@ -2745,12 +2736,6 @@ static int kern_spec_to_ib_spec_action(struct uverbs_attr_bundle *attrs,
return 0;
}
-static size_t kern_spec_filter_sz(const struct ib_uverbs_flow_spec_hdr *spec)
-{
- /* Returns user space filter size, includes padding */
- return (spec->size - sizeof(struct ib_uverbs_flow_spec_hdr)) / 2;
-}
-
static ssize_t spec_filter_size(const void *kern_spec_filter, u16 kern_filter_size,
u16 ib_real_filter_sz)
{
@@ -2894,11 +2879,16 @@ int ib_uverbs_kern_spec_to_ib_spec_filter(enum ib_flow_spec_type type,
static int kern_spec_to_ib_spec_filter(struct ib_uverbs_flow_spec *kern_spec,
union ib_flow_spec *ib_spec)
{
- ssize_t kern_filter_sz;
+ size_t kern_filter_sz;
void *kern_spec_mask;
void *kern_spec_val;
- kern_filter_sz = kern_spec_filter_sz(&kern_spec->hdr);
+ if (check_sub_overflow((size_t)kern_spec->hdr.size,
+ sizeof(struct ib_uverbs_flow_spec_hdr),
+ &kern_filter_sz))
+ return -EINVAL;
+
+ kern_filter_sz /= 2;
kern_spec_val = (void *)kern_spec +
sizeof(struct ib_uverbs_flow_spec_hdr);
diff --git a/drivers/infiniband/core/uverbs_std_types.c b/drivers/infiniband/core/uverbs_std_types.c
index 994d874..3abfc63 100644
--- a/drivers/infiniband/core/uverbs_std_types.c
+++ b/drivers/infiniband/core/uverbs_std_types.c
@@ -220,6 +220,7 @@ void ib_uverbs_free_event_queue(struct ib_uverbs_event_queue *event_queue)
list_for_each_entry_safe(entry, tmp, &event_queue->event_list, list) {
if (entry->counter)
list_del(&entry->obj_list);
+ list_del(&entry->list);
kfree(entry);
}
spin_unlock_irq(&event_queue->lock);
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index 3ebae3b..e62c9df 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -1185,16 +1185,6 @@ struct ib_qp *ib_create_qp_user(struct ib_pd *pd,
if (ret)
goto err;
- qp->qp_type = qp_init_attr->qp_type;
- qp->rwq_ind_tbl = qp_init_attr->rwq_ind_tbl;
-
- atomic_set(&qp->usecnt, 0);
- qp->mrs_used = 0;
- spin_lock_init(&qp->mr_lock);
- INIT_LIST_HEAD(&qp->rdma_mrs);
- INIT_LIST_HEAD(&qp->sig_mrs);
- qp->port = 0;
-
if (qp_init_attr->qp_type == IB_QPT_XRC_TGT) {
struct ib_qp *xrc_qp =
create_xrc_qp_user(qp, qp_init_attr, udata);
diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c
index ee1182f..d69dece3 100644
--- a/drivers/infiniband/hw/cxgb4/cm.c
+++ b/drivers/infiniband/hw/cxgb4/cm.c
@@ -3036,6 +3036,10 @@ static int terminate(struct c4iw_dev *dev, struct sk_buff *skb)
C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
}
+ /* As per draft-hilland-iwarp-verbs-v1.0, sec 6.2.3,
+ * when entering the TERM state the RNIC MUST initiate a CLOSE.
+ */
+ c4iw_ep_disconnect(ep, 1, GFP_KERNEL);
c4iw_put_ep(&ep->com);
} else
pr_warn("TERM received tid %u no ep/qp\n", tid);
diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
index bbcac53..89ac2f9 100644
--- a/drivers/infiniband/hw/cxgb4/qp.c
+++ b/drivers/infiniband/hw/cxgb4/qp.c
@@ -1948,10 +1948,10 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp,
qhp->attr.layer_etype = attrs->layer_etype;
qhp->attr.ecode = attrs->ecode;
ep = qhp->ep;
- c4iw_get_ep(&ep->com);
- disconnect = 1;
if (!internal) {
+ c4iw_get_ep(&ep->com);
terminate = 1;
+ disconnect = 1;
} else {
terminate = qhp->attr.send_term;
ret = rdma_fini(rhp, qhp, ep);
diff --git a/drivers/infiniband/hw/hfi1/affinity.c b/drivers/infiniband/hw/hfi1/affinity.c
index c142b23..1aeea5d 100644
--- a/drivers/infiniband/hw/hfi1/affinity.c
+++ b/drivers/infiniband/hw/hfi1/affinity.c
@@ -479,6 +479,8 @@ static int _dev_comp_vect_mappings_create(struct hfi1_devdata *dd,
rvt_get_ibdev_name(&(dd)->verbs_dev.rdi), i, cpu);
}
+ free_cpumask_var(available_cpus);
+ free_cpumask_var(non_intr_cpus);
return 0;
fail:
diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c
index bef6946..2591158 100644
--- a/drivers/infiniband/hw/hfi1/file_ops.c
+++ b/drivers/infiniband/hw/hfi1/file_ops.c
@@ -200,23 +200,24 @@ static int hfi1_file_open(struct inode *inode, struct file *fp)
fd = kzalloc(sizeof(*fd), GFP_KERNEL);
- if (fd) {
- fd->rec_cpu_num = -1; /* no cpu affinity by default */
- fd->mm = current->mm;
- mmgrab(fd->mm);
- fd->dd = dd;
- kobject_get(&fd->dd->kobj);
- fp->private_data = fd;
- } else {
- fp->private_data = NULL;
-
- if (atomic_dec_and_test(&dd->user_refcount))
- complete(&dd->user_comp);
-
- return -ENOMEM;
- }
-
+ if (!fd || init_srcu_struct(&fd->pq_srcu))
+ goto nomem;
+ spin_lock_init(&fd->pq_rcu_lock);
+ spin_lock_init(&fd->tid_lock);
+ spin_lock_init(&fd->invalid_lock);
+ fd->rec_cpu_num = -1; /* no cpu affinity by default */
+ fd->mm = current->mm;
+ mmgrab(fd->mm);
+ fd->dd = dd;
+ kobject_get(&fd->dd->kobj);
+ fp->private_data = fd;
return 0;
+nomem:
+ kfree(fd);
+ fp->private_data = NULL;
+ if (atomic_dec_and_test(&dd->user_refcount))
+ complete(&dd->user_comp);
+ return -ENOMEM;
}
static long hfi1_file_ioctl(struct file *fp, unsigned int cmd,
@@ -301,21 +302,30 @@ static long hfi1_file_ioctl(struct file *fp, unsigned int cmd,
static ssize_t hfi1_write_iter(struct kiocb *kiocb, struct iov_iter *from)
{
struct hfi1_filedata *fd = kiocb->ki_filp->private_data;
- struct hfi1_user_sdma_pkt_q *pq = fd->pq;
+ struct hfi1_user_sdma_pkt_q *pq;
struct hfi1_user_sdma_comp_q *cq = fd->cq;
int done = 0, reqs = 0;
unsigned long dim = from->nr_segs;
+ int idx;
- if (!cq || !pq)
+ idx = srcu_read_lock(&fd->pq_srcu);
+ pq = srcu_dereference(fd->pq, &fd->pq_srcu);
+ if (!cq || !pq) {
+ srcu_read_unlock(&fd->pq_srcu, idx);
return -EIO;
+ }
- if (!iter_is_iovec(from) || !dim)
+ if (!iter_is_iovec(from) || !dim) {
+ srcu_read_unlock(&fd->pq_srcu, idx);
return -EINVAL;
+ }
trace_hfi1_sdma_request(fd->dd, fd->uctxt->ctxt, fd->subctxt, dim);
- if (atomic_read(&pq->n_reqs) == pq->n_max_reqs)
+ if (atomic_read(&pq->n_reqs) == pq->n_max_reqs) {
+ srcu_read_unlock(&fd->pq_srcu, idx);
return -ENOSPC;
+ }
while (dim) {
int ret;
@@ -333,6 +343,7 @@ static ssize_t hfi1_write_iter(struct kiocb *kiocb, struct iov_iter *from)
reqs++;
}
+ srcu_read_unlock(&fd->pq_srcu, idx);
return reqs;
}
@@ -707,6 +718,7 @@ static int hfi1_file_close(struct inode *inode, struct file *fp)
if (atomic_dec_and_test(&dd->user_refcount))
complete(&dd->user_comp);
+ cleanup_srcu_struct(&fdata->pq_srcu);
kfree(fdata);
return 0;
}
diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h
index 6365e8f..cae12f4 100644
--- a/drivers/infiniband/hw/hfi1/hfi.h
+++ b/drivers/infiniband/hw/hfi1/hfi.h
@@ -1444,10 +1444,13 @@ struct mmu_rb_handler;
/* Private data for file operations */
struct hfi1_filedata {
+ struct srcu_struct pq_srcu;
struct hfi1_devdata *dd;
struct hfi1_ctxtdata *uctxt;
struct hfi1_user_sdma_comp_q *cq;
- struct hfi1_user_sdma_pkt_q *pq;
+ /* update side lock for SRCU */
+ spinlock_t pq_rcu_lock;
+ struct hfi1_user_sdma_pkt_q __rcu *pq;
u16 subctxt;
/* for cpu affinity; -1 if none */
int rec_cpu_num;
diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.c b/drivers/infiniband/hw/hfi1/user_exp_rcv.c
index f05742a..4da03f8 100644
--- a/drivers/infiniband/hw/hfi1/user_exp_rcv.c
+++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.c
@@ -87,9 +87,6 @@ int hfi1_user_exp_rcv_init(struct hfi1_filedata *fd,
{
int ret = 0;
- spin_lock_init(&fd->tid_lock);
- spin_lock_init(&fd->invalid_lock);
-
fd->entry_to_rb = kcalloc(uctxt->expected_count,
sizeof(struct rb_node *),
GFP_KERNEL);
@@ -142,10 +139,12 @@ void hfi1_user_exp_rcv_free(struct hfi1_filedata *fd)
{
struct hfi1_ctxtdata *uctxt = fd->uctxt;
+ mutex_lock(&uctxt->exp_mutex);
if (!EXP_TID_SET_EMPTY(uctxt->tid_full_list))
unlock_exp_tids(uctxt, &uctxt->tid_full_list, fd);
if (!EXP_TID_SET_EMPTY(uctxt->tid_used_list))
unlock_exp_tids(uctxt, &uctxt->tid_used_list, fd);
+ mutex_unlock(&uctxt->exp_mutex);
kfree(fd->invalid_tids);
fd->invalid_tids = NULL;
diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c
index fd754a1..13e4203 100644
--- a/drivers/infiniband/hw/hfi1/user_sdma.c
+++ b/drivers/infiniband/hw/hfi1/user_sdma.c
@@ -141,6 +141,7 @@ static int defer_packet_queue(
*/
xchg(&pq->state, SDMA_PKT_Q_DEFERRED);
if (list_empty(&pq->busy.list)) {
+ pq->busy.lock = &sde->waitlock;
iowait_get_priority(&pq->busy);
iowait_queue(pkts_sent, &pq->busy, &sde->dmawait);
}
@@ -155,6 +156,7 @@ static void activate_packet_queue(struct iowait *wait, int reason)
{
struct hfi1_user_sdma_pkt_q *pq =
container_of(wait, struct hfi1_user_sdma_pkt_q, busy);
+ pq->busy.lock = NULL;
xchg(&pq->state, SDMA_PKT_Q_ACTIVE);
wake_up(&wait->wait_dma);
};
@@ -179,7 +181,6 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt,
pq = kzalloc(sizeof(*pq), GFP_KERNEL);
if (!pq)
return -ENOMEM;
-
pq->dd = dd;
pq->ctxt = uctxt->ctxt;
pq->subctxt = fd->subctxt;
@@ -236,7 +237,7 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt,
goto pq_mmu_fail;
}
- fd->pq = pq;
+ rcu_assign_pointer(fd->pq, pq);
fd->cq = cq;
return 0;
@@ -257,6 +258,21 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt,
return ret;
}
+static void flush_pq_iowait(struct hfi1_user_sdma_pkt_q *pq)
+{
+ unsigned long flags;
+ seqlock_t *lock = pq->busy.lock;
+
+ if (!lock)
+ return;
+ write_seqlock_irqsave(lock, flags);
+ if (!list_empty(&pq->busy.list)) {
+ list_del_init(&pq->busy.list);
+ pq->busy.lock = NULL;
+ }
+ write_sequnlock_irqrestore(lock, flags);
+}
+
int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd,
struct hfi1_ctxtdata *uctxt)
{
@@ -264,8 +280,14 @@ int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd,
trace_hfi1_sdma_user_free_queues(uctxt->dd, uctxt->ctxt, fd->subctxt);
- pq = fd->pq;
+ spin_lock(&fd->pq_rcu_lock);
+ pq = srcu_dereference_check(fd->pq, &fd->pq_srcu,
+ lockdep_is_held(&fd->pq_rcu_lock));
if (pq) {
+ rcu_assign_pointer(fd->pq, NULL);
+ spin_unlock(&fd->pq_rcu_lock);
+ synchronize_srcu(&fd->pq_srcu);
+ /* at this point there can be no more new requests */
if (pq->handler)
hfi1_mmu_rb_unregister(pq->handler);
iowait_sdma_drain(&pq->busy);
@@ -276,8 +298,10 @@ int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd,
kfree(pq->reqs);
kfree(pq->req_in_use);
kmem_cache_destroy(pq->txreq_cache);
+ flush_pq_iowait(pq);
kfree(pq);
- fd->pq = NULL;
+ } else {
+ spin_unlock(&fd->pq_rcu_lock);
}
if (fd->cq) {
vfree(fd->cq->comps);
@@ -321,7 +345,8 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
{
int ret = 0, i;
struct hfi1_ctxtdata *uctxt = fd->uctxt;
- struct hfi1_user_sdma_pkt_q *pq = fd->pq;
+ struct hfi1_user_sdma_pkt_q *pq =
+ srcu_dereference(fd->pq, &fd->pq_srcu);
struct hfi1_user_sdma_comp_q *cq = fd->cq;
struct hfi1_devdata *dd = pq->dd;
unsigned long idx = 0;
@@ -580,11 +605,12 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
if (ret < 0) {
if (ret != -EBUSY)
goto free_req;
- wait_event_interruptible_timeout(
+ if (wait_event_interruptible_timeout(
pq->busy.wait_dma,
- (pq->state == SDMA_PKT_Q_ACTIVE),
+ pq->state == SDMA_PKT_Q_ACTIVE,
msecs_to_jiffies(
- SDMA_IOWAIT_TIMEOUT));
+ SDMA_IOWAIT_TIMEOUT)) <= 0)
+ flush_pq_iowait(pq);
}
}
*count += idx;
diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c
index 089e201..2f6323a 100644
--- a/drivers/infiniband/hw/hfi1/verbs.c
+++ b/drivers/infiniband/hw/hfi1/verbs.c
@@ -515,10 +515,11 @@ static inline void hfi1_handle_packet(struct hfi1_packet *packet,
opa_get_lid(packet->dlid, 9B));
if (!mcast)
goto drop;
+ rcu_read_lock();
list_for_each_entry_rcu(p, &mcast->qp_list, list) {
packet->qp = p->qp;
if (hfi1_do_pkey_check(packet))
- goto drop;
+ goto unlock_drop;
spin_lock_irqsave(&packet->qp->r_lock, flags);
packet_handler = qp_ok(packet);
if (likely(packet_handler))
@@ -527,6 +528,7 @@ static inline void hfi1_handle_packet(struct hfi1_packet *packet,
ibp->rvp.n_pkt_drops++;
spin_unlock_irqrestore(&packet->qp->r_lock, flags);
}
+ rcu_read_unlock();
/*
* Notify rvt_multicast_detach() if it is waiting for us
* to finish.
diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
index 367a71b..3dec3de 100644
--- a/drivers/infiniband/hw/mlx5/cq.c
+++ b/drivers/infiniband/hw/mlx5/cq.c
@@ -330,6 +330,22 @@ static void mlx5_handle_error_cqe(struct mlx5_ib_dev *dev,
dump_cqe(dev, cqe);
}
+static void handle_atomics(struct mlx5_ib_qp *qp, struct mlx5_cqe64 *cqe64,
+ u16 tail, u16 head)
+{
+ u16 idx;
+
+ do {
+ idx = tail & (qp->sq.wqe_cnt - 1);
+ if (idx == head)
+ break;
+
+ tail = qp->sq.w_list[idx].next;
+ } while (1);
+ tail = qp->sq.w_list[idx].next;
+ qp->sq.last_poll = tail;
+}
+
static void free_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf)
{
mlx5_frag_buf_free(dev->mdev, &buf->frag_buf);
@@ -368,7 +384,7 @@ static void get_sig_err_item(struct mlx5_sig_err_cqe *cqe,
}
static void sw_comp(struct mlx5_ib_qp *qp, int num_entries, struct ib_wc *wc,
- int *npolled, int is_send)
+ int *npolled, bool is_send)
{
struct mlx5_ib_wq *wq;
unsigned int cur;
@@ -383,10 +399,16 @@ static void sw_comp(struct mlx5_ib_qp *qp, int num_entries, struct ib_wc *wc,
return;
for (i = 0; i < cur && np < num_entries; i++) {
- wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
+ unsigned int idx;
+
+ idx = (is_send) ? wq->last_poll : wq->tail;
+ idx &= (wq->wqe_cnt - 1);
+ wc->wr_id = wq->wrid[idx];
wc->status = IB_WC_WR_FLUSH_ERR;
wc->vendor_err = MLX5_CQE_SYNDROME_WR_FLUSH_ERR;
wq->tail++;
+ if (is_send)
+ wq->last_poll = wq->w_list[idx].next;
np++;
wc->qp = &qp->ibqp;
wc++;
@@ -473,6 +495,7 @@ static int mlx5_poll_one(struct mlx5_ib_cq *cq,
wqe_ctr = be16_to_cpu(cqe64->wqe_counter);
idx = wqe_ctr & (wq->wqe_cnt - 1);
handle_good_req(wc, cqe64, wq, idx);
+ handle_atomics(*cur_qp, cqe64, wq->last_poll, idx);
wc->wr_id = wq->wrid[idx];
wq->tail = wq->wqe_head[idx] + 1;
wc->status = IB_WC_SUCCESS;
diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c
index d7efc9f..46e1ab7 100644
--- a/drivers/infiniband/hw/mlx5/devx.c
+++ b/drivers/infiniband/hw/mlx5/devx.c
@@ -2319,14 +2319,12 @@ static int deliver_event(struct devx_event_subscription *event_sub,
if (ev_file->omit_data) {
spin_lock_irqsave(&ev_file->lock, flags);
- if (!list_empty(&event_sub->event_list)) {
+ if (!list_empty(&event_sub->event_list) ||
+ ev_file->is_destroyed) {
spin_unlock_irqrestore(&ev_file->lock, flags);
return 0;
}
- /* is_destroyed is ignored here because we don't have any memory
- * allocation to clean up for the omit_data case
- */
list_add_tail(&event_sub->event_list, &ev_file->event_list);
spin_unlock_irqrestore(&ev_file->lock, flags);
wake_up_interruptible(&ev_file->poll_wait);
@@ -2473,11 +2471,11 @@ static ssize_t devx_async_cmd_event_read(struct file *filp, char __user *buf,
return -ERESTARTSYS;
}
- if (list_empty(&ev_queue->event_list) &&
- ev_queue->is_destroyed)
- return -EIO;
-
spin_lock_irq(&ev_queue->lock);
+ if (ev_queue->is_destroyed) {
+ spin_unlock_irq(&ev_queue->lock);
+ return -EIO;
+ }
}
event = list_entry(ev_queue->event_list.next,
@@ -2551,10 +2549,6 @@ static ssize_t devx_async_event_read(struct file *filp, char __user *buf,
return -EOVERFLOW;
}
- if (ev_file->is_destroyed) {
- spin_unlock_irq(&ev_file->lock);
- return -EIO;
- }
while (list_empty(&ev_file->event_list)) {
spin_unlock_irq(&ev_file->lock);
@@ -2667,8 +2661,10 @@ static int devx_async_cmd_event_destroy_uobj(struct ib_uobject *uobj,
spin_lock_irq(&comp_ev_file->ev_queue.lock);
list_for_each_entry_safe(entry, tmp,
- &comp_ev_file->ev_queue.event_list, list)
+ &comp_ev_file->ev_queue.event_list, list) {
+ list_del(&entry->list);
kvfree(entry);
+ }
spin_unlock_irq(&comp_ev_file->ev_queue.lock);
return 0;
};
@@ -2680,11 +2676,29 @@ static int devx_async_event_destroy_uobj(struct ib_uobject *uobj,
container_of(uobj, struct devx_async_event_file,
uobj);
struct devx_event_subscription *event_sub, *event_sub_tmp;
- struct devx_async_event_data *entry, *tmp;
struct mlx5_ib_dev *dev = ev_file->dev;
spin_lock_irq(&ev_file->lock);
ev_file->is_destroyed = 1;
+
+ /* free the pending events allocation */
+ if (ev_file->omit_data) {
+ struct devx_event_subscription *event_sub, *tmp;
+
+ list_for_each_entry_safe(event_sub, tmp, &ev_file->event_list,
+ event_list)
+ list_del_init(&event_sub->event_list);
+
+ } else {
+ struct devx_async_event_data *entry, *tmp;
+
+ list_for_each_entry_safe(entry, tmp, &ev_file->event_list,
+ list) {
+ list_del(&entry->list);
+ kfree(entry);
+ }
+ }
+
spin_unlock_irq(&ev_file->lock);
wake_up_interruptible(&ev_file->poll_wait);
@@ -2699,15 +2713,6 @@ static int devx_async_event_destroy_uobj(struct ib_uobject *uobj,
}
mutex_unlock(&dev->devx_event_table.event_xa_lock);
- /* free the pending events allocation */
- if (!ev_file->omit_data) {
- spin_lock_irq(&ev_file->lock);
- list_for_each_entry_safe(entry, tmp,
- &ev_file->event_list, list)
- kfree(entry); /* read can't come any more */
- spin_unlock_irq(&ev_file->lock);
- }
-
put_device(&dev->ib_dev.dev);
return 0;
};
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index e874d68..ffa7c21 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -2283,8 +2283,8 @@ static int mlx5_ib_mmap_offset(struct mlx5_ib_dev *dev,
static u64 mlx5_entry_to_mmap_offset(struct mlx5_user_mmap_entry *entry)
{
- u16 cmd = entry->rdma_entry.start_pgoff >> 16;
- u16 index = entry->rdma_entry.start_pgoff & 0xFFFF;
+ u64 cmd = (entry->rdma_entry.start_pgoff >> 16) & 0xFFFF;
+ u64 index = entry->rdma_entry.start_pgoff & 0xFFFF;
return (((index >> 8) << 16) | (cmd << MLX5_IB_MMAP_CMD_SHIFT) |
(index & 0xFF)) << PAGE_SHIFT;
@@ -5722,9 +5722,10 @@ mlx5_ib_counter_alloc_stats(struct rdma_counter *counter)
const struct mlx5_ib_counters *cnts =
get_counters(dev, counter->port - 1);
- /* Q counters are in the beginning of all counters */
return rdma_alloc_hw_stats_struct(cnts->names,
- cnts->num_q_counters,
+ cnts->num_q_counters +
+ cnts->num_cong_counters +
+ cnts->num_ext_ppcnt_counters,
RDMA_HW_STATS_DEFAULT_LIFESPAN);
}
@@ -6545,7 +6546,7 @@ static int mlx5_ib_init_var_table(struct mlx5_ib_dev *dev)
doorbell_bar_offset);
bar_size = (1ULL << log_doorbell_bar_size) * 4096;
var_table->stride_size = 1ULL << log_doorbell_stride;
- var_table->num_var_hw_entries = bar_size / var_table->stride_size;
+ var_table->num_var_hw_entries = div64_u64(bar_size, var_table->stride_size);
mutex_init(&var_table->bitmap_lock);
var_table->bitmap = bitmap_zalloc(var_table->num_var_hw_entries,
GFP_KERNEL);
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index d9bffcc..f3bdbd5 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -288,6 +288,7 @@ struct mlx5_ib_wq {
unsigned head;
unsigned tail;
u16 cur_post;
+ u16 last_poll;
void *cur_edge;
};
@@ -636,6 +637,7 @@ struct mlx5_ib_mr {
/* For ODP and implicit */
atomic_t num_deferred_work;
+ wait_queue_head_t q_deferred_work;
struct xarray implicit_children;
union {
struct rcu_head rcu;
diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
index 4216814..bf50cd9 100644
--- a/drivers/infiniband/hw/mlx5/odp.c
+++ b/drivers/infiniband/hw/mlx5/odp.c
@@ -235,7 +235,8 @@ static void free_implicit_child_mr(struct mlx5_ib_mr *mr, bool need_imr_xlt)
mr->parent = NULL;
mlx5_mr_cache_free(mr->dev, mr);
ib_umem_odp_release(odp);
- atomic_dec(&imr->num_deferred_work);
+ if (atomic_dec_and_test(&imr->num_deferred_work))
+ wake_up(&imr->q_deferred_work);
}
static void free_implicit_child_mr_work(struct work_struct *work)
@@ -554,6 +555,7 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd,
imr->umem = &umem_odp->umem;
imr->is_odp_implicit = true;
atomic_set(&imr->num_deferred_work, 0);
+ init_waitqueue_head(&imr->q_deferred_work);
xa_init(&imr->implicit_children);
err = mlx5_ib_update_xlt(imr, 0,
@@ -611,10 +613,7 @@ void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *imr)
* under xa_lock while the child is in the xarray. Thus at this point
* it is only decreasing, and all work holding it is now on the wq.
*/
- if (atomic_read(&imr->num_deferred_work)) {
- flush_workqueue(system_unbound_wq);
- WARN_ON(atomic_read(&imr->num_deferred_work));
- }
+ wait_event(imr->q_deferred_work, !atomic_read(&imr->num_deferred_work));
/*
* Fence the imr before we destroy the children. This allows us to
@@ -645,10 +644,7 @@ void mlx5_ib_fence_odp_mr(struct mlx5_ib_mr *mr)
/* Wait for all running page-fault handlers to finish. */
synchronize_srcu(&mr->dev->odp_srcu);
- if (atomic_read(&mr->num_deferred_work)) {
- flush_workqueue(system_unbound_wq);
- WARN_ON(atomic_read(&mr->num_deferred_work));
- }
+ wait_event(mr->q_deferred_work, !atomic_read(&mr->num_deferred_work));
dma_fence_odp_mr(mr);
}
@@ -1720,7 +1716,8 @@ static void destroy_prefetch_work(struct prefetch_mr_work *work)
u32 i;
for (i = 0; i < work->num_sge; ++i)
- atomic_dec(&work->frags[i].mr->num_deferred_work);
+ if (atomic_dec_and_test(&work->frags[i].mr->num_deferred_work))
+ wake_up(&work->frags[i].mr->q_deferred_work);
kvfree(work);
}
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index a4f8e70..8fe149e 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -3441,9 +3441,6 @@ static int __mlx5_ib_qp_set_counter(struct ib_qp *qp,
struct mlx5_ib_qp_base *base;
u32 set_id;
- if (!MLX5_CAP_GEN(dev->mdev, rts2rts_qp_counters_set_id))
- return 0;
-
if (counter)
set_id = counter->id;
else
@@ -3778,6 +3775,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
qp->sq.cur_post = 0;
if (qp->sq.wqe_cnt)
qp->sq.cur_edge = get_sq_edge(&qp->sq, 0);
+ qp->sq.last_poll = 0;
qp->db.db[MLX5_RCV_DBR] = 0;
qp->db.db[MLX5_SND_DBR] = 0;
}
@@ -6207,6 +6205,10 @@ struct ib_wq *mlx5_ib_create_wq(struct ib_pd *pd,
if (udata->outlen && udata->outlen < min_resp_len)
return ERR_PTR(-EINVAL);
+ if (!capable(CAP_SYS_RAWIO) &&
+ init_attr->create_flags & IB_WQ_FLAGS_DELAY_DROP)
+ return ERR_PTR(-EPERM);
+
dev = to_mdev(pd->device);
switch (init_attr->wq_type) {
case IB_WQT_RQ:
@@ -6576,6 +6578,7 @@ void mlx5_ib_drain_rq(struct ib_qp *qp)
*/
int mlx5_ib_qp_set_counter(struct ib_qp *qp, struct rdma_counter *counter)
{
+ struct mlx5_ib_dev *dev = to_mdev(qp->device);
struct mlx5_ib_qp *mqp = to_mqp(qp);
int err = 0;
@@ -6585,6 +6588,11 @@ int mlx5_ib_qp_set_counter(struct ib_qp *qp, struct rdma_counter *counter)
goto out;
}
+ if (!MLX5_CAP_GEN(dev->mdev, rts2rts_qp_counters_set_id)) {
+ err = -EOPNOTSUPP;
+ goto out;
+ }
+
if (mqp->state == IB_QPS_RTS) {
err = __mlx5_ib_qp_set_counter(qp, counter);
if (!err)
diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c
index 33778d4..5ef93f8 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.c
+++ b/drivers/infiniband/hw/qib/qib_verbs.c
@@ -329,8 +329,10 @@ void qib_ib_rcv(struct qib_ctxtdata *rcd, void *rhdr, void *data, u32 tlen)
if (mcast == NULL)
goto drop;
this_cpu_inc(ibp->pmastats->n_multicast_rcv);
+ rcu_read_lock();
list_for_each_entry_rcu(p, &mcast->qp_list, list)
qib_qp_rcv(rcd, hdr, 1, data, tlen, p->qp);
+ rcu_read_unlock();
/*
* Notify rvt_multicast_detach() if it is waiting for us
* to finish.
diff --git a/drivers/infiniband/sw/rdmavt/cq.c b/drivers/infiniband/sw/rdmavt/cq.c
index 13d7f66..5724cbb 100644
--- a/drivers/infiniband/sw/rdmavt/cq.c
+++ b/drivers/infiniband/sw/rdmavt/cq.c
@@ -327,7 +327,7 @@ void rvt_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
if (cq->ip)
kref_put(&cq->ip->ref, rvt_release_mmap_info);
else
- vfree(cq->queue);
+ vfree(cq->kqueue);
}
/**
diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c
index 3cdf75d..7858d49 100644
--- a/drivers/infiniband/sw/rdmavt/qp.c
+++ b/drivers/infiniband/sw/rdmavt/qp.c
@@ -61,6 +61,8 @@
#define RVT_RWQ_COUNT_THRESHOLD 16
static void rvt_rc_timeout(struct timer_list *t);
+static void rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp,
+ enum ib_qp_type type);
/*
* Convert the AETH RNR timeout code into the number of microseconds.
@@ -452,40 +454,41 @@ int rvt_driver_qp_init(struct rvt_dev_info *rdi)
}
/**
- * free_all_qps - check for QPs still in use
+ * rvt_free_qp_cb - callback function to reset a qp
+ * @qp: the qp to reset
+ * @v: a 64-bit value
+ *
+ * This function resets the qp and removes it from the
+ * qp hash table.
+ */
+static void rvt_free_qp_cb(struct rvt_qp *qp, u64 v)
+{
+ unsigned int *qp_inuse = (unsigned int *)v;
+ struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device);
+
+ /* Reset the qp and remove it from the qp hash list */
+ rvt_reset_qp(rdi, qp, qp->ibqp.qp_type);
+
+ /* Increment the qp_inuse count */
+ (*qp_inuse)++;
+}
+
+/**
+ * rvt_free_all_qps - check for QPs still in use
* @rdi: rvt device info structure
*
* There should not be any QPs still in use.
* Free memory for table.
+ * Return the number of QPs still in use.
*/
static unsigned rvt_free_all_qps(struct rvt_dev_info *rdi)
{
- unsigned long flags;
- struct rvt_qp *qp;
- unsigned n, qp_inuse = 0;
- spinlock_t *ql; /* work around too long line below */
-
- if (rdi->driver_f.free_all_qps)
- qp_inuse = rdi->driver_f.free_all_qps(rdi);
+ unsigned int qp_inuse = 0;
qp_inuse += rvt_mcast_tree_empty(rdi);
- if (!rdi->qp_dev)
- return qp_inuse;
+ rvt_qp_iter(rdi, (u64)&qp_inuse, rvt_free_qp_cb);
- ql = &rdi->qp_dev->qpt_lock;
- spin_lock_irqsave(ql, flags);
- for (n = 0; n < rdi->qp_dev->qp_table_size; n++) {
- qp = rcu_dereference_protected(rdi->qp_dev->qp_table[n],
- lockdep_is_held(ql));
- RCU_INIT_POINTER(rdi->qp_dev->qp_table[n], NULL);
-
- for (; qp; qp = rcu_dereference_protected(qp->next,
- lockdep_is_held(ql)))
- qp_inuse++;
- }
- spin_unlock_irqrestore(ql, flags);
- synchronize_rcu();
return qp_inuse;
}
@@ -902,14 +905,14 @@ static void rvt_init_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp,
}
/**
- * rvt_reset_qp - initialize the QP state to the reset state
+ * _rvt_reset_qp - initialize the QP state to the reset state
* @qp: the QP to reset
* @type: the QP type
*
* r_lock, s_hlock, and s_lock are required to be held by the caller
*/
-static void rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp,
- enum ib_qp_type type)
+static void _rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp,
+ enum ib_qp_type type)
__must_hold(&qp->s_lock)
__must_hold(&qp->s_hlock)
__must_hold(&qp->r_lock)
@@ -955,6 +958,27 @@ static void rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp,
lockdep_assert_held(&qp->s_lock);
}
+/**
+ * rvt_reset_qp - initialize the QP state to the reset state
+ * @rdi: the device info
+ * @qp: the QP to reset
+ * @type: the QP type
+ *
+ * This is the wrapper function to acquire the r_lock, s_hlock, and s_lock
+ * before calling _rvt_reset_qp().
+ */
+static void rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp,
+ enum ib_qp_type type)
+{
+ spin_lock_irq(&qp->r_lock);
+ spin_lock(&qp->s_hlock);
+ spin_lock(&qp->s_lock);
+ _rvt_reset_qp(rdi, qp, type);
+ spin_unlock(&qp->s_lock);
+ spin_unlock(&qp->s_hlock);
+ spin_unlock_irq(&qp->r_lock);
+}
+
/** rvt_free_qpn - Free a qpn from the bit map
* @qpt: QP table
* @qpn: queue pair number to free
@@ -1546,7 +1570,7 @@ int rvt_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
switch (new_state) {
case IB_QPS_RESET:
if (qp->state != IB_QPS_RESET)
- rvt_reset_qp(rdi, qp, ibqp->qp_type);
+ _rvt_reset_qp(rdi, qp, ibqp->qp_type);
break;
case IB_QPS_RTR:
@@ -1695,13 +1719,7 @@ int rvt_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
struct rvt_qp *qp = ibqp_to_rvtqp(ibqp);
struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device);
- spin_lock_irq(&qp->r_lock);
- spin_lock(&qp->s_hlock);
- spin_lock(&qp->s_lock);
rvt_reset_qp(rdi, qp, ibqp->qp_type);
- spin_unlock(&qp->s_lock);
- spin_unlock(&qp->s_hlock);
- spin_unlock_irq(&qp->r_lock);
wait_event(qp->wait, !atomic_read(&qp->refcount));
/* qpn is now available for use again */
diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c
index 116cafc..4bc8870 100644
--- a/drivers/infiniband/sw/rxe/rxe_comp.c
+++ b/drivers/infiniband/sw/rxe/rxe_comp.c
@@ -329,7 +329,7 @@ static inline enum comp_state check_ack(struct rxe_qp *qp,
qp->comp.psn = pkt->psn;
if (qp->req.wait_psn) {
qp->req.wait_psn = 0;
- rxe_run_task(&qp->req.task, 1);
+ rxe_run_task(&qp->req.task, 0);
}
}
return COMPST_ERROR_RETRY;
@@ -463,7 +463,7 @@ static void do_complete(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
*/
if (qp->req.wait_fence) {
qp->req.wait_fence = 0;
- rxe_run_task(&qp->req.task, 1);
+ rxe_run_task(&qp->req.task, 0);
}
}
@@ -479,7 +479,7 @@ static inline enum comp_state complete_ack(struct rxe_qp *qp,
if (qp->req.need_rd_atomic) {
qp->comp.timeout_retry = 0;
qp->req.need_rd_atomic = 0;
- rxe_run_task(&qp->req.task, 1);
+ rxe_run_task(&qp->req.task, 0);
}
}
@@ -725,7 +725,7 @@ int rxe_completer(void *arg)
RXE_CNT_COMP_RETRY);
qp->req.need_retry = 1;
qp->comp.started_retry = 1;
- rxe_run_task(&qp->req.task, 1);
+ rxe_run_task(&qp->req.task, 0);
}
if (pkt) {
diff --git a/drivers/infiniband/sw/siw/siw_cm.c b/drivers/infiniband/sw/siw/siw_cm.c
index 0c3f058..c5651a9 100644
--- a/drivers/infiniband/sw/siw/siw_cm.c
+++ b/drivers/infiniband/sw/siw/siw_cm.c
@@ -1225,10 +1225,9 @@ static void siw_cm_llp_data_ready(struct sock *sk)
read_lock(&sk->sk_callback_lock);
cep = sk_to_cep(sk);
- if (!cep) {
- WARN_ON(1);
+ if (!cep)
goto out;
- }
+
siw_dbg_cep(cep, "state: %d\n", cep->state);
switch (cep->state) {
diff --git a/drivers/infiniband/sw/siw/siw_main.c b/drivers/infiniband/sw/siw/siw_main.c
index 96ed349..5cd40fb9 100644
--- a/drivers/infiniband/sw/siw/siw_main.c
+++ b/drivers/infiniband/sw/siw/siw_main.c
@@ -388,6 +388,9 @@ static struct siw_device *siw_device_create(struct net_device *netdev)
{ .max_segment_size = SZ_2G };
base_dev->num_comp_vectors = num_possible_cpus();
+ xa_init_flags(&sdev->qp_xa, XA_FLAGS_ALLOC1);
+ xa_init_flags(&sdev->mem_xa, XA_FLAGS_ALLOC1);
+
ib_set_device_ops(base_dev, &siw_device_ops);
rv = ib_device_set_netdev(base_dev, netdev, 1);
if (rv)
@@ -415,9 +418,6 @@ static struct siw_device *siw_device_create(struct net_device *netdev)
sdev->attrs.max_srq_wr = SIW_MAX_SRQ_WR;
sdev->attrs.max_srq_sge = SIW_MAX_SGE;
- xa_init_flags(&sdev->qp_xa, XA_FLAGS_ALLOC1);
- xa_init_flags(&sdev->mem_xa, XA_FLAGS_ALLOC1);
-
INIT_LIST_HEAD(&sdev->cep_list);
INIT_LIST_HEAD(&sdev->qp_list);
diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c
index b273e42..a1a0352 100644
--- a/drivers/infiniband/ulp/isert/ib_isert.c
+++ b/drivers/infiniband/ulp/isert/ib_isert.c
@@ -2575,6 +2575,17 @@ isert_wait4logout(struct isert_conn *isert_conn)
}
}
+static void
+isert_wait4cmds(struct iscsi_conn *conn)
+{
+ isert_info("iscsi_conn %p\n", conn);
+
+ if (conn->sess) {
+ target_sess_cmd_list_set_waiting(conn->sess->se_sess);
+ target_wait_for_sess_cmds(conn->sess->se_sess);
+ }
+}
+
/**
* isert_put_unsol_pending_cmds() - Drop commands waiting for
* unsolicitate dataout
@@ -2622,6 +2633,7 @@ static void isert_wait_conn(struct iscsi_conn *conn)
ib_drain_qp(isert_conn->qp);
isert_put_unsol_pending_cmds(conn);
+ isert_wait4cmds(conn);
isert_wait4logout(isert_conn);
queue_work(isert_release_wq, &isert_conn->release_work);
diff --git a/drivers/input/input.c b/drivers/input/input.c
index fce43e6..3cfd2c1 100644
--- a/drivers/input/input.c
+++ b/drivers/input/input.c
@@ -190,6 +190,7 @@ static void input_repeat_key(struct timer_list *t)
input_value_sync
};
+ input_set_timestamp(dev, ktime_get());
input_pass_values(dev, vals, ARRAY_SIZE(vals));
if (dev->rep[REP_PERIOD])
diff --git a/drivers/input/keyboard/goldfish_events.c b/drivers/input/keyboard/goldfish_events.c
index bc8c85a..57d435f 100644
--- a/drivers/input/keyboard/goldfish_events.c
+++ b/drivers/input/keyboard/goldfish_events.c
@@ -30,7 +30,7 @@ struct event_dev {
struct input_dev *input;
int irq;
void __iomem *addr;
- char name[0];
+ char name[];
};
static irqreturn_t events_interrupt(int irq, void *dev_id)
diff --git a/drivers/input/keyboard/gpio_keys.c b/drivers/input/keyboard/gpio_keys.c
index 1f56d53..53c9ff3 100644
--- a/drivers/input/keyboard/gpio_keys.c
+++ b/drivers/input/keyboard/gpio_keys.c
@@ -55,7 +55,7 @@ struct gpio_keys_drvdata {
struct input_dev *input;
struct mutex disable_lock;
unsigned short *keymap;
- struct gpio_button_data data[0];
+ struct gpio_button_data data[];
};
/*
diff --git a/drivers/input/keyboard/gpio_keys_polled.c b/drivers/input/keyboard/gpio_keys_polled.c
index 6eb0a2f..c3937d2 100644
--- a/drivers/input/keyboard/gpio_keys_polled.c
+++ b/drivers/input/keyboard/gpio_keys_polled.c
@@ -38,7 +38,7 @@ struct gpio_keys_polled_dev {
const struct gpio_keys_platform_data *pdata;
unsigned long rel_axis_seen[BITS_TO_LONGS(REL_CNT)];
unsigned long abs_axis_seen[BITS_TO_LONGS(ABS_CNT)];
- struct gpio_keys_button_data data[0];
+ struct gpio_keys_button_data data[];
};
static void gpio_keys_button_event(struct input_dev *input,
diff --git a/drivers/input/keyboard/tca6416-keypad.c b/drivers/input/keyboard/tca6416-keypad.c
index 2a14769..2175876 100644
--- a/drivers/input/keyboard/tca6416-keypad.c
+++ b/drivers/input/keyboard/tca6416-keypad.c
@@ -33,7 +33,7 @@ MODULE_DEVICE_TABLE(i2c, tca6416_id);
struct tca6416_drv_data {
struct input_dev *input;
- struct tca6416_button data[0];
+ struct tca6416_button data[];
};
struct tca6416_keypad_chip {
@@ -48,7 +48,7 @@ struct tca6416_keypad_chip {
int irqnum;
u16 pinmask;
bool use_polling;
- struct tca6416_button buttons[0];
+ struct tca6416_button buttons[];
};
static int tca6416_write_reg(struct tca6416_keypad_chip *chip, int reg, u16 val)
diff --git a/drivers/input/keyboard/tm2-touchkey.c b/drivers/input/keyboard/tm2-touchkey.c
index 14b55ba..fb078e0 100644
--- a/drivers/input/keyboard/tm2-touchkey.c
+++ b/drivers/input/keyboard/tm2-touchkey.c
@@ -75,6 +75,14 @@ static struct touchkey_variant aries_touchkey_variant = {
.cmd_led_off = ARIES_TOUCHKEY_CMD_LED_OFF,
};
+static const struct touchkey_variant tc360_touchkey_variant = {
+ .keycode_reg = 0x00,
+ .base_reg = 0x00,
+ .fixed_regulator = true,
+ .cmd_led_on = TM2_TOUCHKEY_CMD_LED_ON,
+ .cmd_led_off = TM2_TOUCHKEY_CMD_LED_OFF,
+};
+
static int tm2_touchkey_led_brightness_set(struct led_classdev *led_dev,
enum led_brightness brightness)
{
@@ -327,6 +335,9 @@ static const struct of_device_id tm2_touchkey_of_match[] = {
}, {
.compatible = "cypress,aries-touchkey",
.data = &aries_touchkey_variant,
+ }, {
+ .compatible = "coreriver,tc360-touchkey",
+ .data = &tc360_touchkey_variant,
},
{ },
};
diff --git a/drivers/input/mouse/cyapa_gen5.c b/drivers/input/mouse/cyapa_gen5.c
index 14239fb..7f012bf 100644
--- a/drivers/input/mouse/cyapa_gen5.c
+++ b/drivers/input/mouse/cyapa_gen5.c
@@ -250,7 +250,7 @@ struct cyapa_tsg_bin_image_data_record {
struct cyapa_tsg_bin_image {
struct cyapa_tsg_bin_image_head image_head;
- struct cyapa_tsg_bin_image_data_record records[0];
+ struct cyapa_tsg_bin_image_data_record records[];
} __packed;
struct pip_bl_packet_start {
@@ -271,7 +271,7 @@ struct pip_bl_cmd_head {
u8 report_id; /* Bootloader output report id, must be 40h */
u8 rsvd; /* Reserved, must be 0 */
struct pip_bl_packet_start packet_start;
- u8 data[0]; /* Command data variable based on commands */
+ u8 data[]; /* Command data variable based on commands */
} __packed;
/* Initiate bootload command data structure. */
@@ -300,7 +300,7 @@ struct tsg_bl_metadata_row_params {
struct tsg_bl_flash_row_head {
u8 flash_array_id;
__le16 flash_row_id;
- u8 flash_data[0];
+ u8 flash_data[];
} __packed;
struct pip_app_cmd_head {
@@ -314,7 +314,7 @@ struct pip_app_cmd_head {
* Bit 6-0: command code.
*/
u8 cmd_code;
- u8 parameter_data[0]; /* Parameter data variable based on cmd_code */
+ u8 parameter_data[]; /* Parameter data variable based on cmd_code */
} __packed;
/* Application get/set parameter command data structure */
diff --git a/drivers/input/mouse/psmouse-smbus.c b/drivers/input/mouse/psmouse-smbus.c
index 027efdd..a472489 100644
--- a/drivers/input/mouse/psmouse-smbus.c
+++ b/drivers/input/mouse/psmouse-smbus.c
@@ -190,6 +190,7 @@ static int psmouse_smbus_create_companion(struct device *dev, void *data)
struct psmouse_smbus_dev *smbdev = data;
unsigned short addr_list[] = { smbdev->board.addr, I2C_CLIENT_END };
struct i2c_adapter *adapter;
+ struct i2c_client *client;
adapter = i2c_verify_adapter(dev);
if (!adapter)
@@ -198,12 +199,13 @@ static int psmouse_smbus_create_companion(struct device *dev, void *data)
if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_HOST_NOTIFY))
return 0;
- smbdev->client = i2c_new_probed_device(adapter, &smbdev->board,
- addr_list, NULL);
- if (!smbdev->client)
+ client = i2c_new_scanned_device(adapter, &smbdev->board,
+ addr_list, NULL);
+ if (IS_ERR(client))
return 0;
/* We have our(?) device, stop iterating i2c bus. */
+ smbdev->client = client;
return 1;
}
diff --git a/drivers/input/mouse/synaptics.c b/drivers/input/mouse/synaptics.c
index 1ae6f8b..4d20362 100644
--- a/drivers/input/mouse/synaptics.c
+++ b/drivers/input/mouse/synaptics.c
@@ -146,7 +146,6 @@ static const char * const topbuttonpad_pnp_ids[] = {
"LEN0042", /* Yoga */
"LEN0045",
"LEN0047",
- "LEN0049",
"LEN2000", /* S540 */
"LEN2001", /* Edge E431 */
"LEN2002", /* Edge E531 */
@@ -166,9 +165,11 @@ static const char * const smbus_pnp_ids[] = {
/* all of the topbuttonpad_pnp_ids are valid, we just add some extras */
"LEN0048", /* X1 Carbon 3 */
"LEN0046", /* X250 */
+ "LEN0049", /* Yoga 11e */
"LEN004a", /* W541 */
"LEN005b", /* P50 */
"LEN005e", /* T560 */
+ "LEN006c", /* T470s */
"LEN0071", /* T480 */
"LEN0072", /* X1 Carbon Gen 5 (2017) - Elan/ALPS trackpoint */
"LEN0073", /* X1 Carbon G5 (Elantech) */
@@ -179,11 +180,13 @@ static const char * const smbus_pnp_ids[] = {
"LEN0097", /* X280 -> ALPS trackpoint */
"LEN009b", /* T580 */
"LEN200f", /* T450s */
+ "LEN2044", /* L470 */
"LEN2054", /* E480 */
"LEN2055", /* E580 */
"SYN3052", /* HP EliteBook 840 G4 */
"SYN3221", /* HP 15-ay000 */
"SYN323d", /* HP Spectre X360 13-w013dx */
+ "SYN3257", /* HP Envy 13-ad105ng */
NULL
};
diff --git a/drivers/input/rmi4/rmi_f11.c b/drivers/input/rmi4/rmi_f11.c
index 6adea8a..ffa39ab 100644
--- a/drivers/input/rmi4/rmi_f11.c
+++ b/drivers/input/rmi4/rmi_f11.c
@@ -1203,8 +1203,8 @@ static int rmi_f11_initialize(struct rmi_function *fn)
* If distance threshold values are set, switch to reduced reporting
* mode so they actually get used by the controller.
*/
- if (ctrl->ctrl0_11[RMI_F11_DELTA_X_THRESHOLD] ||
- ctrl->ctrl0_11[RMI_F11_DELTA_Y_THRESHOLD]) {
+ if (sensor->axis_align.delta_x_threshold ||
+ sensor->axis_align.delta_y_threshold) {
ctrl->ctrl0_11[0] &= ~RMI_F11_REPORT_MODE_MASK;
ctrl->ctrl0_11[0] |= RMI_F11_REPORT_MODE_REDUCED;
}
diff --git a/drivers/input/touchscreen/ili210x.c b/drivers/input/touchscreen/ili210x.c
index 4a17096..199cf3d 100644
--- a/drivers/input/touchscreen/ili210x.c
+++ b/drivers/input/touchscreen/ili210x.c
@@ -167,6 +167,36 @@ static const struct ili2xxx_chip ili211x_chip = {
.resolution = 2048,
};
+static bool ili212x_touchdata_to_coords(const u8 *touchdata,
+ unsigned int finger,
+ unsigned int *x, unsigned int *y)
+{
+ u16 val;
+
+ val = get_unaligned_be16(touchdata + 3 + (finger * 5) + 0);
+ if (!(val & BIT(15))) /* Touch indication */
+ return false;
+
+ *x = val & 0x3fff;
+ *y = get_unaligned_be16(touchdata + 3 + (finger * 5) + 2);
+
+ return true;
+}
+
+static bool ili212x_check_continue_polling(const u8 *data, bool touch)
+{
+ return touch;
+}
+
+static const struct ili2xxx_chip ili212x_chip = {
+ .read_reg = ili210x_read_reg,
+ .get_touch_data = ili210x_read_touch_data,
+ .parse_touch_data = ili212x_touchdata_to_coords,
+ .continue_polling = ili212x_check_continue_polling,
+ .max_touches = 10,
+ .has_calibrate_reg = true,
+};
+
static int ili251x_read_reg(struct i2c_client *client,
u8 reg, void *buf, size_t len)
{
@@ -321,7 +351,7 @@ static umode_t ili210x_calibrate_visible(struct kobject *kobj,
struct i2c_client *client = to_i2c_client(dev);
struct ili210x *priv = i2c_get_clientdata(client);
- return priv->chip->has_calibrate_reg;
+ return priv->chip->has_calibrate_reg ? attr->mode : 0;
}
static const struct attribute_group ili210x_attr_group = {
@@ -447,6 +477,7 @@ static int ili210x_i2c_probe(struct i2c_client *client,
static const struct i2c_device_id ili210x_i2c_id[] = {
{ "ili210x", (long)&ili210x_chip },
{ "ili2117", (long)&ili211x_chip },
+ { "ili2120", (long)&ili212x_chip },
{ "ili251x", (long)&ili251x_chip },
{ }
};
@@ -455,6 +486,7 @@ MODULE_DEVICE_TABLE(i2c, ili210x_i2c_id);
static const struct of_device_id ili210x_dt_ids[] = {
{ .compatible = "ilitek,ili210x", .data = &ili210x_chip },
{ .compatible = "ilitek,ili2117", .data = &ili211x_chip },
+ { .compatible = "ilitek,ili2120", .data = &ili212x_chip },
{ .compatible = "ilitek,ili251x", .data = &ili251x_chip },
{ }
};
diff --git a/drivers/input/touchscreen/raydium_i2c_ts.c b/drivers/input/touchscreen/raydium_i2c_ts.c
index 6ed9f22..fe24543 100644
--- a/drivers/input/touchscreen/raydium_i2c_ts.c
+++ b/drivers/input/touchscreen/raydium_i2c_ts.c
@@ -432,7 +432,7 @@ static int raydium_i2c_write_object(struct i2c_client *client,
return 0;
}
-static bool raydium_i2c_boot_trigger(struct i2c_client *client)
+static int raydium_i2c_boot_trigger(struct i2c_client *client)
{
static const u8 cmd[7][6] = {
{ 0x08, 0x0C, 0x09, 0x00, 0x50, 0xD7 },
@@ -457,10 +457,10 @@ static bool raydium_i2c_boot_trigger(struct i2c_client *client)
}
}
- return false;
+ return 0;
}
-static bool raydium_i2c_fw_trigger(struct i2c_client *client)
+static int raydium_i2c_fw_trigger(struct i2c_client *client)
{
static const u8 cmd[5][11] = {
{ 0, 0x09, 0x71, 0x0C, 0x09, 0x00, 0x50, 0xD7, 0, 0, 0 },
@@ -483,7 +483,7 @@ static bool raydium_i2c_fw_trigger(struct i2c_client *client)
}
}
- return false;
+ return 0;
}
static int raydium_i2c_check_path(struct i2c_client *client)
diff --git a/drivers/interconnect/core.c b/drivers/interconnect/core.c
index f277e46..2c6515e 100644
--- a/drivers/interconnect/core.c
+++ b/drivers/interconnect/core.c
@@ -445,6 +445,11 @@ struct icc_path *of_icc_get(struct device *dev, const char *name)
path->name = kasprintf(GFP_KERNEL, "%s-%s",
src_node->name, dst_node->name);
+ if (!path->name) {
+ kfree(path);
+ return ERR_PTR(-ENOMEM);
+ }
+
return path;
}
EXPORT_SYMBOL_GPL(of_icc_get);
@@ -579,6 +584,10 @@ struct icc_path *icc_get(struct device *dev, const int src_id, const int dst_id)
}
path->name = kasprintf(GFP_KERNEL, "%s-%s", src->name, dst->name);
+ if (!path->name) {
+ kfree(path);
+ path = ERR_PTR(-ENOMEM);
+ }
out:
mutex_unlock(&icc_lock);
return path;
diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile
index 2104fb8..9f33fdb 100644
--- a/drivers/iommu/Makefile
+++ b/drivers/iommu/Makefile
@@ -14,8 +14,8 @@
obj-$(CONFIG_AMD_IOMMU) += amd_iommu.o amd_iommu_init.o amd_iommu_quirks.o
obj-$(CONFIG_AMD_IOMMU_DEBUGFS) += amd_iommu_debugfs.o
obj-$(CONFIG_AMD_IOMMU_V2) += amd_iommu_v2.o
-obj-$(CONFIG_ARM_SMMU) += arm-smmu-mod.o
-arm-smmu-mod-objs += arm-smmu.o arm-smmu-impl.o arm-smmu-qcom.o
+obj-$(CONFIG_ARM_SMMU) += arm_smmu.o
+arm_smmu-objs += arm-smmu.o arm-smmu-impl.o arm-smmu-qcom.o
obj-$(CONFIG_ARM_SMMU_V3) += arm-smmu-v3.o
obj-$(CONFIG_DMAR_TABLE) += dmar.o
obj-$(CONFIG_INTEL_IOMMU) += intel-iommu.o intel-pasid.o
diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index aac132b..20cce36 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -3826,7 +3826,7 @@ int amd_iommu_activate_guest_mode(void *data)
entry->lo.fields_vapic.ga_tag = ir_data->ga_tag;
return modify_irte_ga(ir_data->irq_2_irte.devid,
- ir_data->irq_2_irte.index, entry, NULL);
+ ir_data->irq_2_irte.index, entry, ir_data);
}
EXPORT_SYMBOL(amd_iommu_activate_guest_mode);
@@ -3852,7 +3852,7 @@ int amd_iommu_deactivate_guest_mode(void *data)
APICID_TO_IRTE_DEST_HI(cfg->dest_apicid);
return modify_irte_ga(ir_data->irq_2_irte.devid,
- ir_data->irq_2_irte.index, entry, NULL);
+ ir_data->irq_2_irte.index, entry, ir_data);
}
EXPORT_SYMBOL(amd_iommu_deactivate_guest_mode);
diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index 2759a8d..6be3853 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -2523,6 +2523,7 @@ static int __init early_amd_iommu_init(void)
struct acpi_table_header *ivrs_base;
acpi_status status;
int i, remap_cache_sz, ret = 0;
+ u32 pci_id;
if (!amd_iommu_detected)
return -ENODEV;
@@ -2610,6 +2611,16 @@ static int __init early_amd_iommu_init(void)
if (ret)
goto out;
+ /* Disable IOMMU if there's Stoney Ridge graphics */
+ for (i = 0; i < 32; i++) {
+ pci_id = read_pci_config(0, i, 0, 0);
+ if ((pci_id & 0xffff) == 0x1002 && (pci_id >> 16) == 0x98e4) {
+ pr_info("Disable IOMMU on Stoney Ridge\n");
+ amd_iommu_disabled = true;
+ break;
+ }
+ }
+
/* Disable any previously enabled IOMMUs */
if (!is_kdump_kernel() || amd_iommu_disabled)
disable_iommus();
@@ -2718,7 +2729,7 @@ static int __init state_next(void)
ret = early_amd_iommu_init();
init_state = ret ? IOMMU_INIT_ERROR : IOMMU_ACPI_FINISHED;
if (init_state == IOMMU_ACPI_FINISHED && amd_iommu_disabled) {
- pr_info("AMD IOMMU disabled on kernel command-line\n");
+ pr_info("AMD IOMMU disabled\n");
init_state = IOMMU_CMDLINE_DISABLED;
ret = -EINVAL;
}
diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index a2e96a5..ba128d1 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -177,15 +177,15 @@ static int cookie_init_hw_msi_region(struct iommu_dma_cookie *cookie,
start -= iova_offset(iovad, start);
num_pages = iova_align(iovad, end - start) >> iova_shift(iovad);
- msi_page = kcalloc(num_pages, sizeof(*msi_page), GFP_KERNEL);
- if (!msi_page)
- return -ENOMEM;
-
for (i = 0; i < num_pages; i++) {
- msi_page[i].phys = start;
- msi_page[i].iova = start;
- INIT_LIST_HEAD(&msi_page[i].list);
- list_add(&msi_page[i].list, &cookie->msi_page_list);
+ msi_page = kmalloc(sizeof(*msi_page), GFP_KERNEL);
+ if (!msi_page)
+ return -ENOMEM;
+
+ msi_page->phys = start;
+ msi_page->iova = start;
+ INIT_LIST_HEAD(&msi_page->list);
+ list_add(&msi_page->list, &cookie->msi_page_list);
start += iovad->granule;
}
diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c
index 071bb42..f77dae7 100644
--- a/drivers/iommu/dmar.c
+++ b/drivers/iommu/dmar.c
@@ -28,6 +28,7 @@
#include <linux/slab.h>
#include <linux/iommu.h>
#include <linux/numa.h>
+#include <linux/limits.h>
#include <asm/irq_remapping.h>
#include <asm/iommu_table.h>
@@ -128,6 +129,13 @@ dmar_alloc_pci_notify_info(struct pci_dev *dev, unsigned long event)
BUG_ON(dev->is_virtfn);
+ /*
+ * Ignore devices that have a domain number higher than what can
+ * be looked up in DMAR, e.g. VMD subdevices with domain 0x10000
+ */
+ if (pci_domain_nr(dev->bus) > U16_MAX)
+ return NULL;
+
/* Only generate path[] for device addition event */
if (event == BUS_NOTIFY_ADD_DEVICE)
for (tmp = dev; tmp; tmp = tmp->bus->self)
@@ -363,7 +371,8 @@ dmar_find_dmaru(struct acpi_dmar_hardware_unit *drhd)
{
struct dmar_drhd_unit *dmaru;
- list_for_each_entry_rcu(dmaru, &dmar_drhd_units, list)
+ list_for_each_entry_rcu(dmaru, &dmar_drhd_units, list,
+ dmar_rcu_check())
if (dmaru->segment == drhd->segment &&
dmaru->reg_base_addr == drhd->address)
return dmaru;
@@ -440,12 +449,13 @@ static int __init dmar_parse_one_andd(struct acpi_dmar_header *header,
/* Check for NUL termination within the designated length */
if (strnlen(andd->device_name, header->length - 8) == header->length - 8) {
- WARN_TAINT(1, TAINT_FIRMWARE_WORKAROUND,
+ pr_warn(FW_BUG
"Your BIOS is broken; ANDD object name is not NUL-terminated\n"
"BIOS vendor: %s; Ver: %s; Product Version: %s\n",
dmi_get_system_info(DMI_BIOS_VENDOR),
dmi_get_system_info(DMI_BIOS_VERSION),
dmi_get_system_info(DMI_PRODUCT_VERSION));
+ add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
return -EINVAL;
}
pr_info("ANDD device: %x name: %s\n", andd->device_number,
@@ -471,14 +481,14 @@ static int dmar_parse_one_rhsa(struct acpi_dmar_header *header, void *arg)
return 0;
}
}
- WARN_TAINT(
- 1, TAINT_FIRMWARE_WORKAROUND,
+ pr_warn(FW_BUG
"Your BIOS is broken; RHSA refers to non-existent DMAR unit at %llx\n"
"BIOS vendor: %s; Ver: %s; Product Version: %s\n",
- drhd->reg_base_addr,
+ rhsa->base_address,
dmi_get_system_info(DMI_BIOS_VENDOR),
dmi_get_system_info(DMI_BIOS_VERSION),
dmi_get_system_info(DMI_PRODUCT_VERSION));
+ add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
return 0;
}
@@ -827,14 +837,14 @@ int __init dmar_table_init(void)
static void warn_invalid_dmar(u64 addr, const char *message)
{
- WARN_TAINT_ONCE(
- 1, TAINT_FIRMWARE_WORKAROUND,
+ pr_warn_once(FW_BUG
"Your BIOS is broken; DMAR reported at address %llx%s!\n"
"BIOS vendor: %s; Ver: %s; Product Version: %s\n",
addr, message,
dmi_get_system_info(DMI_BIOS_VENDOR),
dmi_get_system_info(DMI_BIOS_VERSION),
dmi_get_system_info(DMI_PRODUCT_VERSION));
+ add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
}
static int __ref
diff --git a/drivers/iommu/intel-iommu-debugfs.c b/drivers/iommu/intel-iommu-debugfs.c
index c1257be..3eb1fe2 100644
--- a/drivers/iommu/intel-iommu-debugfs.c
+++ b/drivers/iommu/intel-iommu-debugfs.c
@@ -33,38 +33,42 @@ struct iommu_regset {
#define IOMMU_REGSET_ENTRY(_reg_) \
{ DMAR_##_reg_##_REG, __stringify(_reg_) }
-static const struct iommu_regset iommu_regs[] = {
+
+static const struct iommu_regset iommu_regs_32[] = {
IOMMU_REGSET_ENTRY(VER),
- IOMMU_REGSET_ENTRY(CAP),
- IOMMU_REGSET_ENTRY(ECAP),
IOMMU_REGSET_ENTRY(GCMD),
IOMMU_REGSET_ENTRY(GSTS),
- IOMMU_REGSET_ENTRY(RTADDR),
- IOMMU_REGSET_ENTRY(CCMD),
IOMMU_REGSET_ENTRY(FSTS),
IOMMU_REGSET_ENTRY(FECTL),
IOMMU_REGSET_ENTRY(FEDATA),
IOMMU_REGSET_ENTRY(FEADDR),
IOMMU_REGSET_ENTRY(FEUADDR),
- IOMMU_REGSET_ENTRY(AFLOG),
IOMMU_REGSET_ENTRY(PMEN),
IOMMU_REGSET_ENTRY(PLMBASE),
IOMMU_REGSET_ENTRY(PLMLIMIT),
- IOMMU_REGSET_ENTRY(PHMBASE),
- IOMMU_REGSET_ENTRY(PHMLIMIT),
- IOMMU_REGSET_ENTRY(IQH),
- IOMMU_REGSET_ENTRY(IQT),
- IOMMU_REGSET_ENTRY(IQA),
IOMMU_REGSET_ENTRY(ICS),
- IOMMU_REGSET_ENTRY(IRTA),
- IOMMU_REGSET_ENTRY(PQH),
- IOMMU_REGSET_ENTRY(PQT),
- IOMMU_REGSET_ENTRY(PQA),
IOMMU_REGSET_ENTRY(PRS),
IOMMU_REGSET_ENTRY(PECTL),
IOMMU_REGSET_ENTRY(PEDATA),
IOMMU_REGSET_ENTRY(PEADDR),
IOMMU_REGSET_ENTRY(PEUADDR),
+};
+
+static const struct iommu_regset iommu_regs_64[] = {
+ IOMMU_REGSET_ENTRY(CAP),
+ IOMMU_REGSET_ENTRY(ECAP),
+ IOMMU_REGSET_ENTRY(RTADDR),
+ IOMMU_REGSET_ENTRY(CCMD),
+ IOMMU_REGSET_ENTRY(AFLOG),
+ IOMMU_REGSET_ENTRY(PHMBASE),
+ IOMMU_REGSET_ENTRY(PHMLIMIT),
+ IOMMU_REGSET_ENTRY(IQH),
+ IOMMU_REGSET_ENTRY(IQT),
+ IOMMU_REGSET_ENTRY(IQA),
+ IOMMU_REGSET_ENTRY(IRTA),
+ IOMMU_REGSET_ENTRY(PQH),
+ IOMMU_REGSET_ENTRY(PQT),
+ IOMMU_REGSET_ENTRY(PQA),
IOMMU_REGSET_ENTRY(MTRRCAP),
IOMMU_REGSET_ENTRY(MTRRDEF),
IOMMU_REGSET_ENTRY(MTRR_FIX64K_00000),
@@ -127,10 +131,16 @@ static int iommu_regset_show(struct seq_file *m, void *unused)
* by adding the offset to the pointer (virtual address).
*/
raw_spin_lock_irqsave(&iommu->register_lock, flag);
- for (i = 0 ; i < ARRAY_SIZE(iommu_regs); i++) {
- value = dmar_readq(iommu->reg + iommu_regs[i].offset);
+ for (i = 0 ; i < ARRAY_SIZE(iommu_regs_32); i++) {
+ value = dmar_readl(iommu->reg + iommu_regs_32[i].offset);
seq_printf(m, "%-16s\t0x%02x\t\t0x%016llx\n",
- iommu_regs[i].regs, iommu_regs[i].offset,
+ iommu_regs_32[i].regs, iommu_regs_32[i].offset,
+ value);
+ }
+ for (i = 0 ; i < ARRAY_SIZE(iommu_regs_64); i++) {
+ value = dmar_readq(iommu->reg + iommu_regs_64[i].offset);
+ seq_printf(m, "%-16s\t0x%02x\t\t0x%016llx\n",
+ iommu_regs_64[i].regs, iommu_regs_64[i].offset,
value);
}
raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
@@ -272,9 +282,16 @@ static int dmar_translation_struct_show(struct seq_file *m, void *unused)
{
struct dmar_drhd_unit *drhd;
struct intel_iommu *iommu;
+ u32 sts;
rcu_read_lock();
for_each_active_iommu(iommu, drhd) {
+ sts = dmar_readl(iommu->reg + DMAR_GSTS_REG);
+ if (!(sts & DMA_GSTS_TES)) {
+ seq_printf(m, "DMA Remapping is not enabled on %s\n",
+ iommu->name);
+ continue;
+ }
root_tbl_walk(m, iommu);
seq_putc(m, '\n');
}
@@ -415,6 +432,7 @@ static int ir_translation_struct_show(struct seq_file *m, void *unused)
struct dmar_drhd_unit *drhd;
struct intel_iommu *iommu;
u64 irta;
+ u32 sts;
rcu_read_lock();
for_each_active_iommu(iommu, drhd) {
@@ -424,7 +442,8 @@ static int ir_translation_struct_show(struct seq_file *m, void *unused)
seq_printf(m, "Remapped Interrupt supported on IOMMU: %s\n",
iommu->name);
- if (iommu->ir_table) {
+ sts = dmar_readl(iommu->reg + DMAR_GSTS_REG);
+ if (iommu->ir_table && (sts & DMA_GSTS_IRES)) {
irta = virt_to_phys(iommu->ir_table->base);
seq_printf(m, " IR table address:%llx\n", irta);
ir_tbl_remap_entry_show(m, iommu);
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 9dc3767..4be5494 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -762,6 +762,11 @@ static int iommu_dummy(struct device *dev)
return dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO;
}
+static bool attach_deferred(struct device *dev)
+{
+ return dev->archdata.iommu == DEFER_DEVICE_DOMAIN_INFO;
+}
+
/**
* is_downstream_to_pci_bridge - test if a device belongs to the PCI
* sub-hierarchy of a candidate PCI-PCI bridge
@@ -2510,8 +2515,7 @@ struct dmar_domain *find_domain(struct device *dev)
{
struct device_domain_info *info;
- if (unlikely(dev->archdata.iommu == DEFER_DEVICE_DOMAIN_INFO ||
- dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO))
+ if (unlikely(attach_deferred(dev) || iommu_dummy(dev)))
return NULL;
if (dev_is_pci(dev))
@@ -2525,18 +2529,14 @@ struct dmar_domain *find_domain(struct device *dev)
return NULL;
}
-static struct dmar_domain *deferred_attach_domain(struct device *dev)
+static void do_deferred_attach(struct device *dev)
{
- if (unlikely(dev->archdata.iommu == DEFER_DEVICE_DOMAIN_INFO)) {
- struct iommu_domain *domain;
+ struct iommu_domain *domain;
- dev->archdata.iommu = NULL;
- domain = iommu_get_domain_for_dev(dev);
- if (domain)
- intel_iommu_attach_device(domain, dev);
- }
-
- return find_domain(dev);
+ dev->archdata.iommu = NULL;
+ domain = iommu_get_domain_for_dev(dev);
+ if (domain)
+ intel_iommu_attach_device(domain, dev);
}
static inline struct device_domain_info *
@@ -2916,7 +2916,7 @@ static int identity_mapping(struct device *dev)
struct device_domain_info *info;
info = dev->archdata.iommu;
- if (info && info != DUMMY_DEVICE_DOMAIN_INFO && info != DEFER_DEVICE_DOMAIN_INFO)
+ if (info)
return (info->domain == si_domain);
return 0;
@@ -3587,6 +3587,9 @@ static bool iommu_need_mapping(struct device *dev)
if (iommu_dummy(dev))
return false;
+ if (unlikely(attach_deferred(dev)))
+ do_deferred_attach(dev);
+
ret = identity_mapping(dev);
if (ret) {
u64 dma_mask = *dev->dma_mask;
@@ -3635,7 +3638,7 @@ static dma_addr_t __intel_map_single(struct device *dev, phys_addr_t paddr,
BUG_ON(dir == DMA_NONE);
- domain = deferred_attach_domain(dev);
+ domain = find_domain(dev);
if (!domain)
return DMA_MAPPING_ERROR;
@@ -3855,7 +3858,7 @@ static int intel_map_sg(struct device *dev, struct scatterlist *sglist, int nele
if (!iommu_need_mapping(dev))
return dma_direct_map_sg(dev, sglist, nelems, dir, attrs);
- domain = deferred_attach_domain(dev);
+ domain = find_domain(dev);
if (!domain)
return 0;
@@ -3950,7 +3953,11 @@ bounce_map_single(struct device *dev, phys_addr_t paddr, size_t size,
int prot = 0;
int ret;
- domain = deferred_attach_domain(dev);
+ if (unlikely(attach_deferred(dev)))
+ do_deferred_attach(dev);
+
+ domain = find_domain(dev);
+
if (WARN_ON(dir == DMA_NONE || !domain))
return DMA_MAPPING_ERROR;
@@ -4254,10 +4261,11 @@ static void quirk_ioat_snb_local_iommu(struct pci_dev *pdev)
/* we know that the this iommu should be at offset 0xa000 from vtbar */
drhd = dmar_find_matched_drhd_unit(pdev);
- if (WARN_TAINT_ONCE(!drhd || drhd->reg_base_addr - vtbar != 0xa000,
- TAINT_FIRMWARE_WORKAROUND,
- "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n"))
+ if (!drhd || drhd->reg_base_addr - vtbar != 0xa000) {
+ pr_warn_once(FW_BUG "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n");
+ add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
pdev->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
+ }
}
DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB, quirk_ioat_snb_local_iommu);
@@ -4453,14 +4461,16 @@ int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header, void *arg)
struct dmar_rmrr_unit *rmrru;
rmrr = (struct acpi_dmar_reserved_memory *)header;
- if (rmrr_sanity_check(rmrr))
- WARN_TAINT(1, TAINT_FIRMWARE_WORKAROUND,
+ if (rmrr_sanity_check(rmrr)) {
+ pr_warn(FW_BUG
"Your BIOS is broken; bad RMRR [%#018Lx-%#018Lx]\n"
"BIOS vendor: %s; Ver: %s; Product Version: %s\n",
rmrr->base_address, rmrr->end_address,
dmi_get_system_info(DMI_BIOS_VENDOR),
dmi_get_system_info(DMI_BIOS_VERSION),
dmi_get_system_info(DMI_PRODUCT_VERSION));
+ add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
+ }
rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
if (!rmrru)
@@ -5123,6 +5133,9 @@ int __init intel_iommu_init(void)
down_write(&dmar_global_lock);
+ if (!no_iommu)
+ intel_iommu_debugfs_init();
+
if (no_iommu || dmar_disabled) {
/*
* We exit the function here to ensure IOMMU's remapping and
@@ -5186,6 +5199,7 @@ int __init intel_iommu_init(void)
init_iommu_pm_ops();
+ down_read(&dmar_global_lock);
for_each_active_iommu(iommu, drhd) {
iommu_device_sysfs_add(&iommu->iommu, NULL,
intel_iommu_groups,
@@ -5193,6 +5207,7 @@ int __init intel_iommu_init(void)
iommu_device_set_ops(&iommu->iommu, &intel_iommu_ops);
iommu_device_register(&iommu->iommu);
}
+ up_read(&dmar_global_lock);
bus_set_iommu(&pci_bus_type, &intel_iommu_ops);
if (si_domain && !hw_pass_through)
@@ -5203,7 +5218,6 @@ int __init intel_iommu_init(void)
down_read(&dmar_global_lock);
if (probe_acpi_namespace_devices())
pr_warn("ACPI name space devices didn't probe correctly\n");
- up_read(&dmar_global_lock);
/* Finally, we enable the DMA remapping hardware. */
for_each_iommu(iommu, drhd) {
@@ -5212,10 +5226,11 @@ int __init intel_iommu_init(void)
iommu_disable_protect_mem_regions(iommu);
}
+ up_read(&dmar_global_lock);
+
pr_info("Intel(R) Virtualization Technology for Directed I/O\n");
intel_iommu_enabled = 1;
- intel_iommu_debugfs_init();
return 0;
@@ -5693,8 +5708,10 @@ static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
u64 phys = 0;
pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level);
- if (pte)
- phys = dma_pte_addr(pte);
+ if (pte && dma_pte_present(pte))
+ phys = dma_pte_addr(pte) +
+ (iova & (BIT_MASK(level_to_offset_bits(level) +
+ VTD_PAGE_SHIFT) - 1));
return phys;
}
@@ -6133,7 +6150,7 @@ intel_iommu_aux_get_pasid(struct iommu_domain *domain, struct device *dev)
static bool intel_iommu_is_attach_deferred(struct iommu_domain *domain,
struct device *dev)
{
- return dev->archdata.iommu == DEFER_DEVICE_DOMAIN_INFO;
+ return attach_deferred(dev);
}
static int
diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
index 983b084..04fbd4b 100644
--- a/drivers/iommu/io-pgtable-arm.c
+++ b/drivers/iommu/io-pgtable-arm.c
@@ -468,7 +468,7 @@ static int arm_lpae_map(struct io_pgtable_ops *ops, unsigned long iova,
arm_lpae_iopte *ptep = data->pgd;
int ret, lvl = data->start_level;
arm_lpae_iopte prot;
- long iaext = (long)iova >> cfg->ias;
+ long iaext = (s64)iova >> cfg->ias;
/* If no access, then nothing to do */
if (!(iommu_prot & (IOMMU_READ | IOMMU_WRITE)))
@@ -645,7 +645,7 @@ static size_t arm_lpae_unmap(struct io_pgtable_ops *ops, unsigned long iova,
struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops);
struct io_pgtable_cfg *cfg = &data->iop.cfg;
arm_lpae_iopte *ptep = data->pgd;
- long iaext = (long)iova >> cfg->ias;
+ long iaext = (s64)iova >> cfg->ias;
if (WARN_ON(!size || (size & cfg->pgsize_bitmap) != size))
return 0;
diff --git a/drivers/iommu/qcom_iommu.c b/drivers/iommu/qcom_iommu.c
index 39759db..4328da0 100644
--- a/drivers/iommu/qcom_iommu.c
+++ b/drivers/iommu/qcom_iommu.c
@@ -344,21 +344,19 @@ static void qcom_iommu_domain_free(struct iommu_domain *domain)
{
struct qcom_iommu_domain *qcom_domain = to_qcom_iommu_domain(domain);
- if (WARN_ON(qcom_domain->iommu)) /* forgot to detach? */
- return;
-
iommu_put_dma_cookie(domain);
- /* NOTE: unmap can be called after client device is powered off,
- * for example, with GPUs or anything involving dma-buf. So we
- * cannot rely on the device_link. Make sure the IOMMU is on to
- * avoid unclocked accesses in the TLB inv path:
- */
- pm_runtime_get_sync(qcom_domain->iommu->dev);
-
- free_io_pgtable_ops(qcom_domain->pgtbl_ops);
-
- pm_runtime_put_sync(qcom_domain->iommu->dev);
+ if (qcom_domain->iommu) {
+ /*
+ * NOTE: unmap can be called after client device is powered
+ * off, for example, with GPUs or anything involving dma-buf.
+ * So we cannot rely on the device_link. Make sure the IOMMU
+ * is on to avoid unclocked accesses in the TLB inv path:
+ */
+ pm_runtime_get_sync(qcom_domain->iommu->dev);
+ free_io_pgtable_ops(qcom_domain->pgtbl_ops);
+ pm_runtime_put_sync(qcom_domain->iommu->dev);
+ }
kfree(qcom_domain);
}
@@ -404,7 +402,7 @@ static void qcom_iommu_detach_dev(struct iommu_domain *domain, struct device *de
struct qcom_iommu_domain *qcom_domain = to_qcom_iommu_domain(domain);
unsigned i;
- if (!qcom_domain->iommu)
+ if (WARN_ON(!qcom_domain->iommu))
return;
pm_runtime_get_sync(qcom_iommu->dev);
@@ -417,8 +415,6 @@ static void qcom_iommu_detach_dev(struct iommu_domain *domain, struct device *de
ctx->domain = NULL;
}
pm_runtime_put_sync(qcom_iommu->dev);
-
- qcom_domain->iommu = NULL;
}
static int qcom_iommu_map(struct iommu_domain *domain, unsigned long iova,
diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c
index c1f7af9..1eec9d4 100644
--- a/drivers/irqchip/irq-gic-v3.c
+++ b/drivers/irqchip/irq-gic-v3.c
@@ -34,6 +34,7 @@
#define GICD_INT_NMI_PRI (GICD_INT_DEF_PRI & ~0x80)
#define FLAGS_WORKAROUND_GICR_WAKER_MSM8996 (1ULL << 0)
+#define FLAGS_WORKAROUND_CAVIUM_ERRATUM_38539 (1ULL << 1)
struct redist_region {
void __iomem *redist_base;
@@ -1464,6 +1465,15 @@ static bool gic_enable_quirk_msm8996(void *data)
return true;
}
+static bool gic_enable_quirk_cavium_38539(void *data)
+{
+ struct gic_chip_data *d = data;
+
+ d->flags |= FLAGS_WORKAROUND_CAVIUM_ERRATUM_38539;
+
+ return true;
+}
+
static bool gic_enable_quirk_hip06_07(void *data)
{
struct gic_chip_data *d = data;
@@ -1503,6 +1513,19 @@ static const struct gic_quirk gic_quirks[] = {
.init = gic_enable_quirk_hip06_07,
},
{
+ /*
+ * Reserved register accesses generate a Synchronous
+ * External Abort. This erratum applies to:
+ * - ThunderX: CN88xx
+ * - OCTEON TX: CN83xx, CN81xx
+ * - OCTEON TX2: CN93xx, CN96xx, CN98xx, CNF95xx*
+ */
+ .desc = "GICv3: Cavium erratum 38539",
+ .iidr = 0xa000034c,
+ .mask = 0xe8f00fff,
+ .init = gic_enable_quirk_cavium_38539,
+ },
+ {
}
};
@@ -1577,7 +1600,12 @@ static int __init gic_init_bases(void __iomem *dist_base,
pr_info("%d SPIs implemented\n", GIC_LINE_NR - 32);
pr_info("%d Extended SPIs implemented\n", GIC_ESPI_NR);
- gic_data.rdists.gicd_typer2 = readl_relaxed(gic_data.dist_base + GICD_TYPER2);
+ /*
+ * ThunderX1 explodes on reading GICD_TYPER2, in violation of the
+ * architecture spec (which says that reserved registers are RES0).
+ */
+ if (!(gic_data.flags & FLAGS_WORKAROUND_CAVIUM_ERRATUM_38539))
+ gic_data.rdists.gicd_typer2 = readl_relaxed(gic_data.dist_base + GICD_TYPER2);
gic_data.domain = irq_domain_create_tree(handle, &gic_irq_domain_ops,
&gic_data);
diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c
index 7543e39..db38a68 100644
--- a/drivers/lightnvm/core.c
+++ b/drivers/lightnvm/core.c
@@ -380,12 +380,11 @@ static int nvm_create_tgt(struct nvm_dev *dev, struct nvm_ioctl_create *create)
goto err_dev;
}
- tqueue = blk_alloc_queue_node(GFP_KERNEL, dev->q->node);
+ tqueue = blk_alloc_queue(tt->make_rq, dev->q->node);
if (!tqueue) {
ret = -ENOMEM;
goto err_disk;
}
- blk_queue_make_request(tqueue, tt->make_rq);
strlcpy(tdisk->disk_name, create->tgtname, sizeof(tdisk->disk_name));
tdisk->flags = GENHD_FL_EXT_DEVT;
diff --git a/drivers/lightnvm/pblk-sysfs.c b/drivers/lightnvm/pblk-sysfs.c
index 7d8958d..6387302 100644
--- a/drivers/lightnvm/pblk-sysfs.c
+++ b/drivers/lightnvm/pblk-sysfs.c
@@ -37,7 +37,7 @@ static ssize_t pblk_sysfs_luns_show(struct pblk *pblk, char *page)
active = 0;
up(&rlun->wr_sem);
}
- sz += snprintf(page + sz, PAGE_SIZE - sz,
+ sz += scnprintf(page + sz, PAGE_SIZE - sz,
"pblk: pos:%d, ch:%d, lun:%d - %d\n",
i,
rlun->bppa.a.ch,
@@ -120,7 +120,7 @@ static ssize_t pblk_sysfs_ppaf(struct pblk *pblk, char *page)
struct nvm_addrf_12 *ppaf = (struct nvm_addrf_12 *)&pblk->addrf;
struct nvm_addrf_12 *gppaf = (struct nvm_addrf_12 *)&geo->addrf;
- sz = snprintf(page, PAGE_SIZE,
+ sz = scnprintf(page, PAGE_SIZE,
"g:(b:%d)blk:%d/%d,pg:%d/%d,lun:%d/%d,ch:%d/%d,pl:%d/%d,sec:%d/%d\n",
pblk->addrf_len,
ppaf->blk_offset, ppaf->blk_len,
@@ -130,7 +130,7 @@ static ssize_t pblk_sysfs_ppaf(struct pblk *pblk, char *page)
ppaf->pln_offset, ppaf->pln_len,
ppaf->sec_offset, ppaf->sec_len);
- sz += snprintf(page + sz, PAGE_SIZE - sz,
+ sz += scnprintf(page + sz, PAGE_SIZE - sz,
"d:blk:%d/%d,pg:%d/%d,lun:%d/%d,ch:%d/%d,pl:%d/%d,sec:%d/%d\n",
gppaf->blk_offset, gppaf->blk_len,
gppaf->pg_offset, gppaf->pg_len,
@@ -142,7 +142,7 @@ static ssize_t pblk_sysfs_ppaf(struct pblk *pblk, char *page)
struct nvm_addrf *ppaf = &pblk->addrf;
struct nvm_addrf *gppaf = &geo->addrf;
- sz = snprintf(page, PAGE_SIZE,
+ sz = scnprintf(page, PAGE_SIZE,
"pblk:(s:%d)ch:%d/%d,lun:%d/%d,chk:%d/%d/sec:%d/%d\n",
pblk->addrf_len,
ppaf->ch_offset, ppaf->ch_len,
@@ -150,7 +150,7 @@ static ssize_t pblk_sysfs_ppaf(struct pblk *pblk, char *page)
ppaf->chk_offset, ppaf->chk_len,
ppaf->sec_offset, ppaf->sec_len);
- sz += snprintf(page + sz, PAGE_SIZE - sz,
+ sz += scnprintf(page + sz, PAGE_SIZE - sz,
"device:ch:%d/%d,lun:%d/%d,chk:%d/%d,sec:%d/%d\n",
gppaf->ch_offset, gppaf->ch_len,
gppaf->lun_offset, gppaf->lun_len,
@@ -278,11 +278,11 @@ static ssize_t pblk_sysfs_lines(struct pblk *pblk, char *page)
pblk_err(pblk, "corrupted free line list:%d/%d\n",
nr_free_lines, free_line_cnt);
- sz = snprintf(page, PAGE_SIZE - sz,
+ sz = scnprintf(page, PAGE_SIZE - sz,
"line: nluns:%d, nblks:%d, nsecs:%d\n",
geo->all_luns, lm->blk_per_line, lm->sec_per_line);
- sz += snprintf(page + sz, PAGE_SIZE - sz,
+ sz += scnprintf(page + sz, PAGE_SIZE - sz,
"lines:d:%d,l:%d-f:%d,m:%d/%d,c:%d,b:%d,co:%d(d:%d,l:%d)t:%d\n",
cur_data, cur_log,
nr_free_lines,
@@ -292,12 +292,12 @@ static ssize_t pblk_sysfs_lines(struct pblk *pblk, char *page)
d_line_cnt, l_line_cnt,
l_mg->nr_lines);
- sz += snprintf(page + sz, PAGE_SIZE - sz,
+ sz += scnprintf(page + sz, PAGE_SIZE - sz,
"GC: full:%d, high:%d, mid:%d, low:%d, empty:%d, werr: %d, queue:%d\n",
gc_full, gc_high, gc_mid, gc_low, gc_empty, gc_werr,
atomic_read(&pblk->gc.read_inflight_gc));
- sz += snprintf(page + sz, PAGE_SIZE - sz,
+ sz += scnprintf(page + sz, PAGE_SIZE - sz,
"data (%d) cur:%d, left:%d, vsc:%d, s:%d, map:%d/%d (%d)\n",
cur_data, cur_sec, msecs, vsc, sec_in_line,
map_weight, lm->sec_per_line,
@@ -313,19 +313,19 @@ static ssize_t pblk_sysfs_lines_info(struct pblk *pblk, char *page)
struct pblk_line_meta *lm = &pblk->lm;
ssize_t sz = 0;
- sz = snprintf(page, PAGE_SIZE - sz,
+ sz = scnprintf(page, PAGE_SIZE - sz,
"smeta - len:%d, secs:%d\n",
lm->smeta_len, lm->smeta_sec);
- sz += snprintf(page + sz, PAGE_SIZE - sz,
+ sz += scnprintf(page + sz, PAGE_SIZE - sz,
"emeta - len:%d, sec:%d, bb_start:%d\n",
lm->emeta_len[0], lm->emeta_sec[0],
lm->emeta_bb);
- sz += snprintf(page + sz, PAGE_SIZE - sz,
+ sz += scnprintf(page + sz, PAGE_SIZE - sz,
"bitmap lengths: sec:%d, blk:%d, lun:%d\n",
lm->sec_bitmap_len,
lm->blk_bitmap_len,
lm->lun_bitmap_len);
- sz += snprintf(page + sz, PAGE_SIZE - sz,
+ sz += scnprintf(page + sz, PAGE_SIZE - sz,
"blk_line:%d, sec_line:%d, sec_blk:%d\n",
lm->blk_per_line,
lm->sec_per_line,
@@ -344,12 +344,12 @@ static ssize_t pblk_get_write_amp(u64 user, u64 gc, u64 pad,
{
int sz;
- sz = snprintf(page, PAGE_SIZE,
+ sz = scnprintf(page, PAGE_SIZE,
"user:%lld gc:%lld pad:%lld WA:",
user, gc, pad);
if (!user) {
- sz += snprintf(page + sz, PAGE_SIZE - sz, "NaN\n");
+ sz += scnprintf(page + sz, PAGE_SIZE - sz, "NaN\n");
} else {
u64 wa_int;
u32 wa_frac;
@@ -358,7 +358,7 @@ static ssize_t pblk_get_write_amp(u64 user, u64 gc, u64 pad,
wa_int = div64_u64(wa_int, user);
wa_int = div_u64_rem(wa_int, 100000, &wa_frac);
- sz += snprintf(page + sz, PAGE_SIZE - sz, "%llu.%05u\n",
+ sz += scnprintf(page + sz, PAGE_SIZE - sz, "%llu.%05u\n",
wa_int, wa_frac);
}
@@ -401,9 +401,9 @@ static ssize_t pblk_sysfs_get_padding_dist(struct pblk *pblk, char *page)
total = atomic64_read(&pblk->nr_flush) - pblk->nr_flush_rst;
if (!total) {
for (i = 0; i < (buckets + 1); i++)
- sz += snprintf(page + sz, PAGE_SIZE - sz,
+ sz += scnprintf(page + sz, PAGE_SIZE - sz,
"%d:0 ", i);
- sz += snprintf(page + sz, PAGE_SIZE - sz, "\n");
+ sz += scnprintf(page + sz, PAGE_SIZE - sz, "\n");
return sz;
}
@@ -411,7 +411,7 @@ static ssize_t pblk_sysfs_get_padding_dist(struct pblk *pblk, char *page)
for (i = 0; i < buckets; i++)
total_buckets += atomic64_read(&pblk->pad_dist[i]);
- sz += snprintf(page + sz, PAGE_SIZE - sz, "0:%lld%% ",
+ sz += scnprintf(page + sz, PAGE_SIZE - sz, "0:%lld%% ",
bucket_percentage(total - total_buckets, total));
for (i = 0; i < buckets; i++) {
@@ -419,10 +419,10 @@ static ssize_t pblk_sysfs_get_padding_dist(struct pblk *pblk, char *page)
p = bucket_percentage(atomic64_read(&pblk->pad_dist[i]),
total);
- sz += snprintf(page + sz, PAGE_SIZE - sz, "%d:%lld%% ",
+ sz += scnprintf(page + sz, PAGE_SIZE - sz, "%d:%lld%% ",
i + 1, p);
}
- sz += snprintf(page + sz, PAGE_SIZE - sz, "\n");
+ sz += scnprintf(page + sz, PAGE_SIZE - sz, "\n");
return sz;
}
diff --git a/drivers/macintosh/therm_windtunnel.c b/drivers/macintosh/therm_windtunnel.c
index 8c74457..a0d87ed 100644
--- a/drivers/macintosh/therm_windtunnel.c
+++ b/drivers/macintosh/therm_windtunnel.c
@@ -300,9 +300,11 @@ static int control_loop(void *dummy)
/* i2c probing and setup */
/************************************************************************/
-static int
-do_attach( struct i2c_adapter *adapter )
+static void do_attach(struct i2c_adapter *adapter)
{
+ struct i2c_board_info info = { };
+ struct device_node *np;
+
/* scan 0x48-0x4f (DS1775) and 0x2c-2x2f (ADM1030) */
static const unsigned short scan_ds1775[] = {
0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f,
@@ -313,25 +315,24 @@ do_attach( struct i2c_adapter *adapter )
I2C_CLIENT_END
};
- if( strncmp(adapter->name, "uni-n", 5) )
- return 0;
+ if (x.running || strncmp(adapter->name, "uni-n", 5))
+ return;
- if( !x.running ) {
- struct i2c_board_info info;
-
- memset(&info, 0, sizeof(struct i2c_board_info));
- strlcpy(info.type, "therm_ds1775", I2C_NAME_SIZE);
+ np = of_find_compatible_node(adapter->dev.of_node, NULL, "MAC,ds1775");
+ if (np) {
+ of_node_put(np);
+ } else {
+ strlcpy(info.type, "MAC,ds1775", I2C_NAME_SIZE);
i2c_new_probed_device(adapter, &info, scan_ds1775, NULL);
-
- strlcpy(info.type, "therm_adm1030", I2C_NAME_SIZE);
- i2c_new_probed_device(adapter, &info, scan_adm1030, NULL);
-
- if( x.thermostat && x.fan ) {
- x.running = 1;
- x.poll_task = kthread_run(control_loop, NULL, "g4fand");
- }
}
- return 0;
+
+ np = of_find_compatible_node(adapter->dev.of_node, NULL, "MAC,adm1030");
+ if (np) {
+ of_node_put(np);
+ } else {
+ strlcpy(info.type, "MAC,adm1030", I2C_NAME_SIZE);
+ i2c_new_probed_device(adapter, &info, scan_adm1030, NULL);
+ }
}
static int
@@ -404,8 +405,8 @@ attach_thermostat( struct i2c_client *cl )
enum chip { ds1775, adm1030 };
static const struct i2c_device_id therm_windtunnel_id[] = {
- { "therm_ds1775", ds1775 },
- { "therm_adm1030", adm1030 },
+ { "MAC,ds1775", ds1775 },
+ { "MAC,adm1030", adm1030 },
{ }
};
MODULE_DEVICE_TABLE(i2c, therm_windtunnel_id);
@@ -414,6 +415,7 @@ static int
do_probe(struct i2c_client *cl, const struct i2c_device_id *id)
{
struct i2c_adapter *adapter = cl->adapter;
+ int ret = 0;
if( !i2c_check_functionality(adapter, I2C_FUNC_SMBUS_WORD_DATA
| I2C_FUNC_SMBUS_WRITE_BYTE) )
@@ -421,11 +423,19 @@ do_probe(struct i2c_client *cl, const struct i2c_device_id *id)
switch (id->driver_data) {
case adm1030:
- return attach_fan( cl );
+ ret = attach_fan(cl);
+ break;
case ds1775:
- return attach_thermostat(cl);
+ ret = attach_thermostat(cl);
+ break;
}
- return 0;
+
+ if (!x.running && x.thermostat && x.fan) {
+ x.running = 1;
+ x.poll_task = kthread_run(control_loop, NULL, "g4fand");
+ }
+
+ return ret;
}
static struct i2c_driver g4fan_driver = {
diff --git a/drivers/macintosh/windfarm_ad7417_sensor.c b/drivers/macintosh/windfarm_ad7417_sensor.c
index 1256059..e7dec32 100644
--- a/drivers/macintosh/windfarm_ad7417_sensor.c
+++ b/drivers/macintosh/windfarm_ad7417_sensor.c
@@ -312,9 +312,16 @@ static const struct i2c_device_id wf_ad7417_id[] = {
};
MODULE_DEVICE_TABLE(i2c, wf_ad7417_id);
+static const struct of_device_id wf_ad7417_of_id[] = {
+ { .compatible = "ad7417", },
+ { }
+};
+MODULE_DEVICE_TABLE(of, wf_ad7417_of_id);
+
static struct i2c_driver wf_ad7417_driver = {
.driver = {
.name = "wf_ad7417",
+ .of_match_table = wf_ad7417_of_id,
},
.probe = wf_ad7417_probe,
.remove = wf_ad7417_remove,
diff --git a/drivers/macintosh/windfarm_fcu_controls.c b/drivers/macintosh/windfarm_fcu_controls.c
index 67daeec..2470e5a 100644
--- a/drivers/macintosh/windfarm_fcu_controls.c
+++ b/drivers/macintosh/windfarm_fcu_controls.c
@@ -580,9 +580,16 @@ static const struct i2c_device_id wf_fcu_id[] = {
};
MODULE_DEVICE_TABLE(i2c, wf_fcu_id);
+static const struct of_device_id wf_fcu_of_id[] = {
+ { .compatible = "fcu", },
+ { }
+};
+MODULE_DEVICE_TABLE(of, wf_fcu_of_id);
+
static struct i2c_driver wf_fcu_driver = {
.driver = {
.name = "wf_fcu",
+ .of_match_table = wf_fcu_of_id,
},
.probe = wf_fcu_probe,
.remove = wf_fcu_remove,
diff --git a/drivers/macintosh/windfarm_lm75_sensor.c b/drivers/macintosh/windfarm_lm75_sensor.c
index 282c28a..1e5fa09 100644
--- a/drivers/macintosh/windfarm_lm75_sensor.c
+++ b/drivers/macintosh/windfarm_lm75_sensor.c
@@ -14,6 +14,7 @@
#include <linux/init.h>
#include <linux/wait.h>
#include <linux/i2c.h>
+#include <linux/of_device.h>
#include <asm/prom.h>
#include <asm/machdep.h>
#include <asm/io.h>
@@ -91,9 +92,14 @@ static int wf_lm75_probe(struct i2c_client *client,
const struct i2c_device_id *id)
{
struct wf_lm75_sensor *lm;
- int rc, ds1775 = id->driver_data;
+ int rc, ds1775;
const char *name, *loc;
+ if (id)
+ ds1775 = id->driver_data;
+ else
+ ds1775 = !!of_device_get_match_data(&client->dev);
+
DBG("wf_lm75: creating %s device at address 0x%02x\n",
ds1775 ? "ds1775" : "lm75", client->addr);
@@ -164,9 +170,17 @@ static const struct i2c_device_id wf_lm75_id[] = {
};
MODULE_DEVICE_TABLE(i2c, wf_lm75_id);
+static const struct of_device_id wf_lm75_of_id[] = {
+ { .compatible = "lm75", .data = (void *)0},
+ { .compatible = "ds1775", .data = (void *)1 },
+ { }
+};
+MODULE_DEVICE_TABLE(of, wf_lm75_of_id);
+
static struct i2c_driver wf_lm75_driver = {
.driver = {
.name = "wf_lm75",
+ .of_match_table = wf_lm75_of_id,
},
.probe = wf_lm75_probe,
.remove = wf_lm75_remove,
diff --git a/drivers/macintosh/windfarm_lm87_sensor.c b/drivers/macintosh/windfarm_lm87_sensor.c
index b03a33b..d011899 100644
--- a/drivers/macintosh/windfarm_lm87_sensor.c
+++ b/drivers/macintosh/windfarm_lm87_sensor.c
@@ -166,9 +166,16 @@ static const struct i2c_device_id wf_lm87_id[] = {
};
MODULE_DEVICE_TABLE(i2c, wf_lm87_id);
+static const struct of_device_id wf_lm87_of_id[] = {
+ { .compatible = "lm87cimt", },
+ { }
+};
+MODULE_DEVICE_TABLE(of, wf_lm87_of_id);
+
static struct i2c_driver wf_lm87_driver = {
.driver = {
.name = "wf_lm87",
+ .of_match_table = wf_lm87_of_id,
},
.probe = wf_lm87_probe,
.remove = wf_lm87_remove,
diff --git a/drivers/macintosh/windfarm_max6690_sensor.c b/drivers/macintosh/windfarm_max6690_sensor.c
index e666cc0..1e7b03d 100644
--- a/drivers/macintosh/windfarm_max6690_sensor.c
+++ b/drivers/macintosh/windfarm_max6690_sensor.c
@@ -120,9 +120,16 @@ static const struct i2c_device_id wf_max6690_id[] = {
};
MODULE_DEVICE_TABLE(i2c, wf_max6690_id);
+static const struct of_device_id wf_max6690_of_id[] = {
+ { .compatible = "max6690", },
+ { }
+};
+MODULE_DEVICE_TABLE(of, wf_max6690_of_id);
+
static struct i2c_driver wf_max6690_driver = {
.driver = {
.name = "wf_max6690",
+ .of_match_table = wf_max6690_of_id,
},
.probe = wf_max6690_probe,
.remove = wf_max6690_remove,
diff --git a/drivers/macintosh/windfarm_smu_sat.c b/drivers/macintosh/windfarm_smu_sat.c
index c84ec49..cb75dc0 100644
--- a/drivers/macintosh/windfarm_smu_sat.c
+++ b/drivers/macintosh/windfarm_smu_sat.c
@@ -341,9 +341,16 @@ static const struct i2c_device_id wf_sat_id[] = {
};
MODULE_DEVICE_TABLE(i2c, wf_sat_id);
+static const struct of_device_id wf_sat_of_id[] = {
+ { .compatible = "smu-sat", },
+ { }
+};
+MODULE_DEVICE_TABLE(of, wf_sat_of_id);
+
static struct i2c_driver wf_sat_driver = {
.driver = {
.name = "wf_smu_sat",
+ .of_match_table = wf_sat_of_id,
},
.probe = wf_sat_probe,
.remove = wf_sat_remove,
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index fa872df..72856e5 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -101,64 +101,6 @@
#define insert_lock(s, b) ((b)->level <= (s)->lock)
-/*
- * These macros are for recursing down the btree - they handle the details of
- * locking and looking up nodes in the cache for you. They're best treated as
- * mere syntax when reading code that uses them.
- *
- * op->lock determines whether we take a read or a write lock at a given depth.
- * If you've got a read lock and find that you need a write lock (i.e. you're
- * going to have to split), set op->lock and return -EINTR; btree_root() will
- * call you again and you'll have the correct lock.
- */
-
-/**
- * btree - recurse down the btree on a specified key
- * @fn: function to call, which will be passed the child node
- * @key: key to recurse on
- * @b: parent btree node
- * @op: pointer to struct btree_op
- */
-#define btree(fn, key, b, op, ...) \
-({ \
- int _r, l = (b)->level - 1; \
- bool _w = l <= (op)->lock; \
- struct btree *_child = bch_btree_node_get((b)->c, op, key, l, \
- _w, b); \
- if (!IS_ERR(_child)) { \
- _r = bch_btree_ ## fn(_child, op, ##__VA_ARGS__); \
- rw_unlock(_w, _child); \
- } else \
- _r = PTR_ERR(_child); \
- _r; \
-})
-
-/**
- * btree_root - call a function on the root of the btree
- * @fn: function to call, which will be passed the child node
- * @c: cache set
- * @op: pointer to struct btree_op
- */
-#define btree_root(fn, c, op, ...) \
-({ \
- int _r = -EINTR; \
- do { \
- struct btree *_b = (c)->root; \
- bool _w = insert_lock(op, _b); \
- rw_lock(_w, _b, _b->level); \
- if (_b == (c)->root && \
- _w == insert_lock(op, _b)) { \
- _r = bch_btree_ ## fn(_b, op, ##__VA_ARGS__); \
- } \
- rw_unlock(_w, _b); \
- bch_cannibalize_unlock(c); \
- if (_r == -EINTR) \
- schedule(); \
- } while (_r == -EINTR); \
- \
- finish_wait(&(c)->btree_cache_wait, &(op)->wait); \
- _r; \
-})
static inline struct bset *write_block(struct btree *b)
{
@@ -1848,7 +1790,7 @@ static void bch_btree_gc(struct cache_set *c)
/* if CACHE_SET_IO_DISABLE set, gc thread should stop too */
do {
- ret = btree_root(gc_root, c, &op, &writes, &stats);
+ ret = bcache_btree_root(gc_root, c, &op, &writes, &stats);
closure_sync(&writes);
cond_resched();
@@ -1946,7 +1888,7 @@ static int bch_btree_check_recurse(struct btree *b, struct btree_op *op)
}
if (p)
- ret = btree(check_recurse, p, b, op);
+ ret = bcache_btree(check_recurse, p, b, op);
p = k;
} while (p && !ret);
@@ -1955,13 +1897,176 @@ static int bch_btree_check_recurse(struct btree *b, struct btree_op *op)
return ret;
}
+
+static int bch_btree_check_thread(void *arg)
+{
+ int ret;
+ struct btree_check_info *info = arg;
+ struct btree_check_state *check_state = info->state;
+ struct cache_set *c = check_state->c;
+ struct btree_iter iter;
+ struct bkey *k, *p;
+ int cur_idx, prev_idx, skip_nr;
+ int i, n;
+
+ k = p = NULL;
+ i = n = 0;
+ cur_idx = prev_idx = 0;
+ ret = 0;
+
+ /* root node keys are checked before thread created */
+ bch_btree_iter_init(&c->root->keys, &iter, NULL);
+ k = bch_btree_iter_next_filter(&iter, &c->root->keys, bch_ptr_bad);
+ BUG_ON(!k);
+
+ p = k;
+ while (k) {
+ /*
+ * Fetch a root node key index, skip the keys which
+ * should be fetched by other threads, then check the
+ * sub-tree indexed by the fetched key.
+ */
+ spin_lock(&check_state->idx_lock);
+ cur_idx = check_state->key_idx;
+ check_state->key_idx++;
+ spin_unlock(&check_state->idx_lock);
+
+ skip_nr = cur_idx - prev_idx;
+
+ while (skip_nr) {
+ k = bch_btree_iter_next_filter(&iter,
+ &c->root->keys,
+ bch_ptr_bad);
+ if (k)
+ p = k;
+ else {
+ /*
+ * No more keys to check in root node,
+ * current checking threads are enough,
+ * stop creating more.
+ */
+ atomic_set(&check_state->enough, 1);
+ /* Update check_state->enough earlier */
+ smp_mb__after_atomic();
+ goto out;
+ }
+ skip_nr--;
+ cond_resched();
+ }
+
+ if (p) {
+ struct btree_op op;
+
+ btree_node_prefetch(c->root, p);
+ c->gc_stats.nodes++;
+ bch_btree_op_init(&op, 0);
+ ret = bcache_btree(check_recurse, p, c->root, &op);
+ if (ret)
+ goto out;
+ }
+ p = NULL;
+ prev_idx = cur_idx;
+ cond_resched();
+ }
+
+out:
+ info->result = ret;
+ /* update check_state->started among all CPUs */
+ smp_mb__before_atomic();
+ if (atomic_dec_and_test(&check_state->started))
+ wake_up(&check_state->wait);
+
+ return ret;
+}
+
+
+
+static int bch_btree_chkthread_nr(void)
+{
+ int n = num_online_cpus()/2;
+
+ if (n == 0)
+ n = 1;
+ else if (n > BCH_BTR_CHKTHREAD_MAX)
+ n = BCH_BTR_CHKTHREAD_MAX;
+
+ return n;
+}
+
int bch_btree_check(struct cache_set *c)
{
- struct btree_op op;
+ int ret = 0;
+ int i;
+ struct bkey *k = NULL;
+ struct btree_iter iter;
+ struct btree_check_state *check_state;
+ char name[32];
- bch_btree_op_init(&op, SHRT_MAX);
+ /* check and mark root node keys */
+ for_each_key_filter(&c->root->keys, k, &iter, bch_ptr_invalid)
+ bch_initial_mark_key(c, c->root->level, k);
- return btree_root(check_recurse, c, &op);
+ bch_initial_mark_key(c, c->root->level + 1, &c->root->key);
+
+ if (c->root->level == 0)
+ return 0;
+
+ check_state = kzalloc(sizeof(struct btree_check_state), GFP_KERNEL);
+ if (!check_state)
+ return -ENOMEM;
+
+ check_state->c = c;
+ check_state->total_threads = bch_btree_chkthread_nr();
+ check_state->key_idx = 0;
+ spin_lock_init(&check_state->idx_lock);
+ atomic_set(&check_state->started, 0);
+ atomic_set(&check_state->enough, 0);
+ init_waitqueue_head(&check_state->wait);
+
+ /*
+ * Run multiple threads to check btree nodes in parallel,
+ * if check_state->enough is non-zero, it means current
+ * running check threads are enough, unncessary to create
+ * more.
+ */
+ for (i = 0; i < check_state->total_threads; i++) {
+ /* fetch latest check_state->enough earlier */
+ smp_mb__before_atomic();
+ if (atomic_read(&check_state->enough))
+ break;
+
+ check_state->infos[i].result = 0;
+ check_state->infos[i].state = check_state;
+ snprintf(name, sizeof(name), "bch_btrchk[%u]", i);
+ atomic_inc(&check_state->started);
+
+ check_state->infos[i].thread =
+ kthread_run(bch_btree_check_thread,
+ &check_state->infos[i],
+ name);
+ if (IS_ERR(check_state->infos[i].thread)) {
+ pr_err("fails to run thread bch_btrchk[%d]", i);
+ for (--i; i >= 0; i--)
+ kthread_stop(check_state->infos[i].thread);
+ ret = -ENOMEM;
+ goto out;
+ }
+ }
+
+ wait_event_interruptible(check_state->wait,
+ atomic_read(&check_state->started) == 0 ||
+ test_bit(CACHE_SET_IO_DISABLE, &c->flags));
+
+ for (i = 0; i < check_state->total_threads; i++) {
+ if (check_state->infos[i].result) {
+ ret = check_state->infos[i].result;
+ goto out;
+ }
+ }
+
+out:
+ kfree(check_state);
+ return ret;
}
void bch_initial_gc_finish(struct cache_set *c)
@@ -2401,7 +2506,7 @@ static int bch_btree_map_nodes_recurse(struct btree *b, struct btree_op *op,
while ((k = bch_btree_iter_next_filter(&iter, &b->keys,
bch_ptr_bad))) {
- ret = btree(map_nodes_recurse, k, b,
+ ret = bcache_btree(map_nodes_recurse, k, b,
op, from, fn, flags);
from = NULL;
@@ -2419,10 +2524,10 @@ static int bch_btree_map_nodes_recurse(struct btree *b, struct btree_op *op,
int __bch_btree_map_nodes(struct btree_op *op, struct cache_set *c,
struct bkey *from, btree_map_nodes_fn *fn, int flags)
{
- return btree_root(map_nodes_recurse, c, op, from, fn, flags);
+ return bcache_btree_root(map_nodes_recurse, c, op, from, fn, flags);
}
-static int bch_btree_map_keys_recurse(struct btree *b, struct btree_op *op,
+int bch_btree_map_keys_recurse(struct btree *b, struct btree_op *op,
struct bkey *from, btree_map_keys_fn *fn,
int flags)
{
@@ -2435,7 +2540,8 @@ static int bch_btree_map_keys_recurse(struct btree *b, struct btree_op *op,
while ((k = bch_btree_iter_next_filter(&iter, &b->keys, bch_ptr_bad))) {
ret = !b->level
? fn(op, b, k)
- : btree(map_keys_recurse, k, b, op, from, fn, flags);
+ : bcache_btree(map_keys_recurse, k,
+ b, op, from, fn, flags);
from = NULL;
if (ret != MAP_CONTINUE)
@@ -2452,7 +2558,7 @@ static int bch_btree_map_keys_recurse(struct btree *b, struct btree_op *op,
int bch_btree_map_keys(struct btree_op *op, struct cache_set *c,
struct bkey *from, btree_map_keys_fn *fn, int flags)
{
- return btree_root(map_keys_recurse, c, op, from, fn, flags);
+ return bcache_btree_root(map_keys_recurse, c, op, from, fn, flags);
}
/* Keybuf code */
diff --git a/drivers/md/bcache/btree.h b/drivers/md/bcache/btree.h
index f4dcca4..2579699 100644
--- a/drivers/md/bcache/btree.h
+++ b/drivers/md/bcache/btree.h
@@ -145,6 +145,9 @@ struct btree {
struct bio *bio;
};
+
+
+
#define BTREE_FLAG(flag) \
static inline bool btree_node_ ## flag(struct btree *b) \
{ return test_bit(BTREE_NODE_ ## flag, &b->flags); } \
@@ -216,6 +219,25 @@ struct btree_op {
unsigned int insert_collision:1;
};
+struct btree_check_state;
+struct btree_check_info {
+ struct btree_check_state *state;
+ struct task_struct *thread;
+ int result;
+};
+
+#define BCH_BTR_CHKTHREAD_MAX 64
+struct btree_check_state {
+ struct cache_set *c;
+ int total_threads;
+ int key_idx;
+ spinlock_t idx_lock;
+ atomic_t started;
+ atomic_t enough;
+ wait_queue_head_t wait;
+ struct btree_check_info infos[BCH_BTR_CHKTHREAD_MAX];
+};
+
static inline void bch_btree_op_init(struct btree_op *op, int write_lock_level)
{
memset(op, 0, sizeof(struct btree_op));
@@ -284,6 +306,65 @@ static inline void force_wake_up_gc(struct cache_set *c)
wake_up_gc(c);
}
+/*
+ * These macros are for recursing down the btree - they handle the details of
+ * locking and looking up nodes in the cache for you. They're best treated as
+ * mere syntax when reading code that uses them.
+ *
+ * op->lock determines whether we take a read or a write lock at a given depth.
+ * If you've got a read lock and find that you need a write lock (i.e. you're
+ * going to have to split), set op->lock and return -EINTR; btree_root() will
+ * call you again and you'll have the correct lock.
+ */
+
+/**
+ * btree - recurse down the btree on a specified key
+ * @fn: function to call, which will be passed the child node
+ * @key: key to recurse on
+ * @b: parent btree node
+ * @op: pointer to struct btree_op
+ */
+#define bcache_btree(fn, key, b, op, ...) \
+({ \
+ int _r, l = (b)->level - 1; \
+ bool _w = l <= (op)->lock; \
+ struct btree *_child = bch_btree_node_get((b)->c, op, key, l, \
+ _w, b); \
+ if (!IS_ERR(_child)) { \
+ _r = bch_btree_ ## fn(_child, op, ##__VA_ARGS__); \
+ rw_unlock(_w, _child); \
+ } else \
+ _r = PTR_ERR(_child); \
+ _r; \
+})
+
+/**
+ * btree_root - call a function on the root of the btree
+ * @fn: function to call, which will be passed the child node
+ * @c: cache set
+ * @op: pointer to struct btree_op
+ */
+#define bcache_btree_root(fn, c, op, ...) \
+({ \
+ int _r = -EINTR; \
+ do { \
+ struct btree *_b = (c)->root; \
+ bool _w = insert_lock(op, _b); \
+ rw_lock(_w, _b, _b->level); \
+ if (_b == (c)->root && \
+ _w == insert_lock(op, _b)) { \
+ _r = bch_btree_ ## fn(_b, op, ##__VA_ARGS__); \
+ } \
+ rw_unlock(_w, _b); \
+ bch_cannibalize_unlock(c); \
+ if (_r == -EINTR) \
+ schedule(); \
+ } while (_r == -EINTR); \
+ \
+ finish_wait(&(c)->btree_cache_wait, &(op)->wait); \
+ _r; \
+})
+
#define MAP_DONE 0
#define MAP_CONTINUE 1
@@ -314,6 +395,9 @@ typedef int (btree_map_keys_fn)(struct btree_op *op, struct btree *b,
struct bkey *k);
int bch_btree_map_keys(struct btree_op *op, struct cache_set *c,
struct bkey *from, btree_map_keys_fn *fn, int flags);
+int bch_btree_map_keys_recurse(struct btree *b, struct btree_op *op,
+ struct bkey *from, btree_map_keys_fn *fn,
+ int flags);
typedef bool (keybuf_pred_fn)(struct keybuf *buf, struct bkey *k);
diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c
index 6730820..0e3ff97 100644
--- a/drivers/md/bcache/journal.c
+++ b/drivers/md/bcache/journal.c
@@ -417,8 +417,6 @@ int bch_journal_replay(struct cache_set *s, struct list_head *list)
/* Journalling */
-#define nr_to_fifo_front(p, front_p, mask) (((p) - (front_p)) & (mask))
-
static void btree_flush_write(struct cache_set *c)
{
struct btree *b, *t, *btree_nodes[BTREE_FLUSH_NR];
@@ -510,9 +508,8 @@ static void btree_flush_write(struct cache_set *c)
* journal entry can be reclaimed). These selected nodes
* will be ignored and skipped in the folowing for-loop.
*/
- if (nr_to_fifo_front(btree_current_write(b)->journal,
- fifo_front_p,
- mask) != 0) {
+ if (((btree_current_write(b)->journal - fifo_front_p) &
+ mask) != 0) {
mutex_unlock(&b->write_lock);
continue;
}
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index 820d840..71a90fb 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -1161,8 +1161,7 @@ static void quit_max_writeback_rate(struct cache_set *c,
/* Cached devices - read & write stuff */
-static blk_qc_t cached_dev_make_request(struct request_queue *q,
- struct bio *bio)
+blk_qc_t cached_dev_make_request(struct request_queue *q, struct bio *bio)
{
struct search *s;
struct bcache_device *d = bio->bi_disk->private_data;
@@ -1266,7 +1265,6 @@ void bch_cached_dev_request_init(struct cached_dev *dc)
{
struct gendisk *g = dc->disk.disk;
- g->queue->make_request_fn = cached_dev_make_request;
g->queue->backing_dev_info->congested_fn = cached_dev_congested;
dc->disk.cache_miss = cached_dev_cache_miss;
dc->disk.ioctl = cached_dev_ioctl;
@@ -1301,8 +1299,7 @@ static void flash_dev_nodata(struct closure *cl)
continue_at(cl, search_free, NULL);
}
-static blk_qc_t flash_dev_make_request(struct request_queue *q,
- struct bio *bio)
+blk_qc_t flash_dev_make_request(struct request_queue *q, struct bio *bio)
{
struct search *s;
struct closure *cl;
diff --git a/drivers/md/bcache/request.h b/drivers/md/bcache/request.h
index c64dbd7..bb005c9 100644
--- a/drivers/md/bcache/request.h
+++ b/drivers/md/bcache/request.h
@@ -37,7 +37,10 @@ unsigned int bch_get_congested(const struct cache_set *c);
void bch_data_insert(struct closure *cl);
void bch_cached_dev_request_init(struct cached_dev *dc);
+blk_qc_t cached_dev_make_request(struct request_queue *q, struct bio *bio);
+
void bch_flash_dev_request_init(struct bcache_device *d);
+blk_qc_t flash_dev_make_request(struct request_queue *q, struct bio *bio);
extern struct kmem_cache *bch_search_cache;
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index 2749daf..d98354f 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -816,7 +816,7 @@ static void bcache_device_free(struct bcache_device *d)
}
static int bcache_device_init(struct bcache_device *d, unsigned int block_size,
- sector_t sectors)
+ sector_t sectors, make_request_fn make_request_fn)
{
struct request_queue *q;
const size_t max_stripes = min_t(size_t, INT_MAX,
@@ -866,11 +866,10 @@ static int bcache_device_init(struct bcache_device *d, unsigned int block_size,
d->disk->fops = &bcache_ops;
d->disk->private_data = d;
- q = blk_alloc_queue(GFP_KERNEL);
+ q = blk_alloc_queue(make_request_fn, NUMA_NO_NODE);
if (!q)
return -ENOMEM;
- blk_queue_make_request(q, NULL);
d->disk->queue = q;
q->queuedata = d;
q->backing_dev_info->congested_data = d;
@@ -1339,7 +1338,8 @@ static int cached_dev_init(struct cached_dev *dc, unsigned int block_size)
q->limits.raid_partial_stripes_expensive;
ret = bcache_device_init(&dc->disk, block_size,
- dc->bdev->bd_part->nr_sects - dc->sb.data_offset);
+ dc->bdev->bd_part->nr_sects - dc->sb.data_offset,
+ cached_dev_make_request);
if (ret)
return ret;
@@ -1451,7 +1451,8 @@ static int flash_dev_run(struct cache_set *c, struct uuid_entry *u)
kobject_init(&d->kobj, &bch_flash_dev_ktype);
- if (bcache_device_init(d, block_bytes(c), u->sectors))
+ if (bcache_device_init(d, block_bytes(c), u->sectors,
+ flash_dev_make_request))
goto err;
bcache_device_attach(d, c, u - c->uuids);
@@ -1917,23 +1918,6 @@ static int run_cache_set(struct cache_set *c)
if (bch_btree_check(c))
goto err;
- /*
- * bch_btree_check() may occupy too much system memory which
- * has negative effects to user space application (e.g. data
- * base) performance. Shrink the mca cache memory proactively
- * here to avoid competing memory with user space workloads..
- */
- if (!c->shrinker_disabled) {
- struct shrink_control sc;
-
- sc.gfp_mask = GFP_KERNEL;
- sc.nr_to_scan = c->btree_cache_used * c->btree_pages;
- /* first run to clear b->accessed tag */
- c->shrink.scan_objects(&c->shrink, &sc);
- /* second run to reap non-accessed nodes */
- c->shrink.scan_objects(&c->shrink, &sc);
- }
-
bch_journal_mark(c, &journal);
bch_initial_gc_finish(c);
pr_debug("btree_check() done");
diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
index 3470fae..3232769 100644
--- a/drivers/md/bcache/sysfs.c
+++ b/drivers/md/bcache/sysfs.c
@@ -154,7 +154,7 @@ static ssize_t bch_snprint_string_list(char *buf,
size_t i;
for (i = 0; list[i]; i++)
- out += snprintf(out, buf + size - out,
+ out += scnprintf(out, buf + size - out,
i == selected ? "[%s] " : "%s ", list[i]);
out[-1] = '\n';
diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
index 4a40f9e..3f7641f 100644
--- a/drivers/md/bcache/writeback.c
+++ b/drivers/md/bcache/writeback.c
@@ -183,7 +183,7 @@ static void update_writeback_rate(struct work_struct *work)
*/
set_bit(BCACHE_DEV_RATE_DW_RUNNING, &dc->disk.flags);
/* paired with where BCACHE_DEV_RATE_DW_RUNNING is tested */
- smp_mb();
+ smp_mb__after_atomic();
/*
* CACHE_SET_IO_DISABLE might be set via sysfs interface,
@@ -193,7 +193,7 @@ static void update_writeback_rate(struct work_struct *work)
test_bit(CACHE_SET_IO_DISABLE, &c->flags)) {
clear_bit(BCACHE_DEV_RATE_DW_RUNNING, &dc->disk.flags);
/* paired with where BCACHE_DEV_RATE_DW_RUNNING is tested */
- smp_mb();
+ smp_mb__after_atomic();
return;
}
@@ -229,7 +229,7 @@ static void update_writeback_rate(struct work_struct *work)
*/
clear_bit(BCACHE_DEV_RATE_DW_RUNNING, &dc->disk.flags);
/* paired with where BCACHE_DEV_RATE_DW_RUNNING is tested */
- smp_mb();
+ smp_mb__after_atomic();
}
static unsigned int writeback_delay(struct cached_dev *dc,
@@ -785,7 +785,9 @@ static int sectors_dirty_init_fn(struct btree_op *_op, struct btree *b,
return MAP_CONTINUE;
}
-void bch_sectors_dirty_init(struct bcache_device *d)
+static int bch_root_node_dirty_init(struct cache_set *c,
+ struct bcache_device *d,
+ struct bkey *k)
{
struct sectors_dirty_init op;
int ret;
@@ -796,8 +798,13 @@ void bch_sectors_dirty_init(struct bcache_device *d)
op.start = KEY(op.inode, 0, 0);
do {
- ret = bch_btree_map_keys(&op.op, d->c, &op.start,
- sectors_dirty_init_fn, 0);
+ ret = bcache_btree(map_keys_recurse,
+ k,
+ c->root,
+ &op.op,
+ &op.start,
+ sectors_dirty_init_fn,
+ 0);
if (ret == -EAGAIN)
schedule_timeout_interruptible(
msecs_to_jiffies(INIT_KEYS_SLEEP_MS));
@@ -806,6 +813,151 @@ void bch_sectors_dirty_init(struct bcache_device *d)
break;
}
} while (ret == -EAGAIN);
+
+ return ret;
+}
+
+static int bch_dirty_init_thread(void *arg)
+{
+ struct dirty_init_thrd_info *info = arg;
+ struct bch_dirty_init_state *state = info->state;
+ struct cache_set *c = state->c;
+ struct btree_iter iter;
+ struct bkey *k, *p;
+ int cur_idx, prev_idx, skip_nr;
+ int i;
+
+ k = p = NULL;
+ i = 0;
+ cur_idx = prev_idx = 0;
+
+ bch_btree_iter_init(&c->root->keys, &iter, NULL);
+ k = bch_btree_iter_next_filter(&iter, &c->root->keys, bch_ptr_bad);
+ BUG_ON(!k);
+
+ p = k;
+
+ while (k) {
+ spin_lock(&state->idx_lock);
+ cur_idx = state->key_idx;
+ state->key_idx++;
+ spin_unlock(&state->idx_lock);
+
+ skip_nr = cur_idx - prev_idx;
+
+ while (skip_nr) {
+ k = bch_btree_iter_next_filter(&iter,
+ &c->root->keys,
+ bch_ptr_bad);
+ if (k)
+ p = k;
+ else {
+ atomic_set(&state->enough, 1);
+ /* Update state->enough earlier */
+ smp_mb__after_atomic();
+ goto out;
+ }
+ skip_nr--;
+ cond_resched();
+ }
+
+ if (p) {
+ if (bch_root_node_dirty_init(c, state->d, p) < 0)
+ goto out;
+ }
+
+ p = NULL;
+ prev_idx = cur_idx;
+ cond_resched();
+ }
+
+out:
+ /* In order to wake up state->wait in time */
+ smp_mb__before_atomic();
+ if (atomic_dec_and_test(&state->started))
+ wake_up(&state->wait);
+
+ return 0;
+}
+
+static int bch_btre_dirty_init_thread_nr(void)
+{
+ int n = num_online_cpus()/2;
+
+ if (n == 0)
+ n = 1;
+ else if (n > BCH_DIRTY_INIT_THRD_MAX)
+ n = BCH_DIRTY_INIT_THRD_MAX;
+
+ return n;
+}
+
+void bch_sectors_dirty_init(struct bcache_device *d)
+{
+ int i;
+ struct bkey *k = NULL;
+ struct btree_iter iter;
+ struct sectors_dirty_init op;
+ struct cache_set *c = d->c;
+ struct bch_dirty_init_state *state;
+ char name[32];
+
+ /* Just count root keys if no leaf node */
+ if (c->root->level == 0) {
+ bch_btree_op_init(&op.op, -1);
+ op.inode = d->id;
+ op.count = 0;
+ op.start = KEY(op.inode, 0, 0);
+
+ for_each_key_filter(&c->root->keys,
+ k, &iter, bch_ptr_invalid)
+ sectors_dirty_init_fn(&op.op, c->root, k);
+ return;
+ }
+
+ state = kzalloc(sizeof(struct bch_dirty_init_state), GFP_KERNEL);
+ if (!state) {
+ pr_warn("sectors dirty init failed: cannot allocate memory");
+ return;
+ }
+
+ state->c = c;
+ state->d = d;
+ state->total_threads = bch_btre_dirty_init_thread_nr();
+ state->key_idx = 0;
+ spin_lock_init(&state->idx_lock);
+ atomic_set(&state->started, 0);
+ atomic_set(&state->enough, 0);
+ init_waitqueue_head(&state->wait);
+
+ for (i = 0; i < state->total_threads; i++) {
+ /* Fetch latest state->enough earlier */
+ smp_mb__before_atomic();
+ if (atomic_read(&state->enough))
+ break;
+
+ state->infos[i].state = state;
+ atomic_inc(&state->started);
+ snprintf(name, sizeof(name), "bch_dirty_init[%d]", i);
+
+ state->infos[i].thread =
+ kthread_run(bch_dirty_init_thread,
+ &state->infos[i],
+ name);
+ if (IS_ERR(state->infos[i].thread)) {
+ pr_err("fails to run thread bch_dirty_init[%d]", i);
+ for (--i; i >= 0; i--)
+ kthread_stop(state->infos[i].thread);
+ goto out;
+ }
+ }
+
+ wait_event_interruptible(state->wait,
+ atomic_read(&state->started) == 0 ||
+ test_bit(CACHE_SET_IO_DISABLE, &c->flags));
+
+out:
+ kfree(state);
}
void bch_cached_dev_writeback_init(struct cached_dev *dc)
diff --git a/drivers/md/bcache/writeback.h b/drivers/md/bcache/writeback.h
index 4e4c681..b029843 100644
--- a/drivers/md/bcache/writeback.h
+++ b/drivers/md/bcache/writeback.h
@@ -16,6 +16,7 @@
#define BCH_AUTO_GC_DIRTY_THRESHOLD 50
+#define BCH_DIRTY_INIT_THRD_MAX 64
/*
* 14 (16384ths) is chosen here as something that each backing device
* should be a reasonable fraction of the share, and not to blow up
@@ -23,6 +24,24 @@
*/
#define WRITEBACK_SHARE_SHIFT 14
+struct bch_dirty_init_state;
+struct dirty_init_thrd_info {
+ struct bch_dirty_init_state *state;
+ struct task_struct *thread;
+};
+
+struct bch_dirty_init_state {
+ struct cache_set *c;
+ struct bcache_device *d;
+ int total_threads;
+ int key_idx;
+ spinlock_t idx_lock;
+ atomic_t started;
+ atomic_t enough;
+ wait_queue_head_t wait;
+ struct dirty_init_thrd_info infos[BCH_DIRTY_INIT_THRD_MAX];
+};
+
static inline uint64_t bcache_dev_sectors_dirty(struct bcache_device *d)
{
uint64_t i, ret = 0;
diff --git a/drivers/md/dm-bio-record.h b/drivers/md/dm-bio-record.h
index c82578a..2ea0360 100644
--- a/drivers/md/dm-bio-record.h
+++ b/drivers/md/dm-bio-record.h
@@ -20,8 +20,13 @@
struct dm_bio_details {
struct gendisk *bi_disk;
u8 bi_partno;
+ int __bi_remaining;
unsigned long bi_flags;
struct bvec_iter bi_iter;
+ bio_end_io_t *bi_end_io;
+#if defined(CONFIG_BLK_DEV_INTEGRITY)
+ struct bio_integrity_payload *bi_integrity;
+#endif
};
static inline void dm_bio_record(struct dm_bio_details *bd, struct bio *bio)
@@ -30,6 +35,11 @@ static inline void dm_bio_record(struct dm_bio_details *bd, struct bio *bio)
bd->bi_partno = bio->bi_partno;
bd->bi_flags = bio->bi_flags;
bd->bi_iter = bio->bi_iter;
+ bd->__bi_remaining = atomic_read(&bio->__bi_remaining);
+ bd->bi_end_io = bio->bi_end_io;
+#if defined(CONFIG_BLK_DEV_INTEGRITY)
+ bd->bi_integrity = bio_integrity(bio);
+#endif
}
static inline void dm_bio_restore(struct dm_bio_details *bd, struct bio *bio)
@@ -38,6 +48,11 @@ static inline void dm_bio_restore(struct dm_bio_details *bd, struct bio *bio)
bio->bi_partno = bd->bi_partno;
bio->bi_flags = bd->bi_flags;
bio->bi_iter = bd->bi_iter;
+ atomic_set(&bio->__bi_remaining, bd->__bi_remaining);
+ bio->bi_end_io = bd->bi_end_io;
+#if defined(CONFIG_BLK_DEV_INTEGRITY)
+ bio->bi_integrity = bd->bi_integrity;
+#endif
}
#endif
diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c
index 2d32821..d3bb355 100644
--- a/drivers/md/dm-cache-target.c
+++ b/drivers/md/dm-cache-target.c
@@ -2846,8 +2846,8 @@ static void cache_postsuspend(struct dm_target *ti)
prevent_background_work(cache);
BUG_ON(atomic_read(&cache->nr_io_migrations));
- cancel_delayed_work(&cache->waker);
- flush_workqueue(cache->wq);
+ cancel_delayed_work_sync(&cache->waker);
+ drain_workqueue(cache->wq);
WARN_ON(cache->tracker.in_flight);
/*
@@ -3492,7 +3492,7 @@ static void cache_io_hints(struct dm_target *ti, struct queue_limits *limits)
static struct target_type cache_target = {
.name = "cache",
- .version = {2, 1, 0},
+ .version = {2, 2, 0},
.module = THIS_MODULE,
.ctr = cache_ctr,
.dtr = cache_dtr,
diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c
index b225b3e4..2f03fec 100644
--- a/drivers/md/dm-integrity.c
+++ b/drivers/md/dm-integrity.c
@@ -6,6 +6,8 @@
* This file is released under the GPL.
*/
+#include "dm-bio-record.h"
+
#include <linux/compiler.h>
#include <linux/module.h>
#include <linux/device-mapper.h>
@@ -201,17 +203,19 @@ struct dm_integrity_c {
__u8 log2_blocks_per_bitmap_bit;
unsigned char mode;
- int suspending;
int failed;
struct crypto_shash *internal_hash;
+ struct dm_target *ti;
+
/* these variables are locked with endio_wait.lock */
struct rb_root in_progress;
struct list_head wait_list;
wait_queue_head_t endio_wait;
struct workqueue_struct *wait_wq;
+ struct workqueue_struct *offload_wq;
unsigned char commit_seq;
commit_id_t commit_ids[N_COMMIT_IDS];
@@ -293,11 +297,7 @@ struct dm_integrity_io {
struct completion *completion;
- struct gendisk *orig_bi_disk;
- u8 orig_bi_partno;
- bio_end_io_t *orig_bi_end_io;
- struct bio_integrity_payload *orig_bi_integrity;
- struct bvec_iter orig_bi_iter;
+ struct dm_bio_details bio_details;
};
struct journal_completion {
@@ -1439,7 +1439,7 @@ static void dec_in_flight(struct dm_integrity_io *dio)
dio->range.logical_sector += dio->range.n_sectors;
bio_advance(bio, dio->range.n_sectors << SECTOR_SHIFT);
INIT_WORK(&dio->work, integrity_bio_wait);
- queue_work(ic->wait_wq, &dio->work);
+ queue_work(ic->offload_wq, &dio->work);
return;
}
do_endio_flush(ic, dio);
@@ -1450,14 +1450,9 @@ static void integrity_end_io(struct bio *bio)
{
struct dm_integrity_io *dio = dm_per_bio_data(bio, sizeof(struct dm_integrity_io));
- bio->bi_iter = dio->orig_bi_iter;
- bio->bi_disk = dio->orig_bi_disk;
- bio->bi_partno = dio->orig_bi_partno;
- if (dio->orig_bi_integrity) {
- bio->bi_integrity = dio->orig_bi_integrity;
+ dm_bio_restore(&dio->bio_details, bio);
+ if (bio->bi_integrity)
bio->bi_opf |= REQ_INTEGRITY;
- }
- bio->bi_end_io = dio->orig_bi_end_io;
if (dio->completion)
complete(dio->completion);
@@ -1542,7 +1537,7 @@ static void integrity_metadata(struct work_struct *w)
}
}
- __bio_for_each_segment(bv, bio, iter, dio->orig_bi_iter) {
+ __bio_for_each_segment(bv, bio, iter, dio->bio_details.bi_iter) {
unsigned pos;
char *mem, *checksums_ptr;
@@ -1586,7 +1581,7 @@ static void integrity_metadata(struct work_struct *w)
if (likely(checksums != checksums_onstack))
kfree(checksums);
} else {
- struct bio_integrity_payload *bip = dio->orig_bi_integrity;
+ struct bio_integrity_payload *bip = dio->bio_details.bi_integrity;
if (bip) {
struct bio_vec biv;
@@ -1865,7 +1860,7 @@ static void dm_integrity_map_continue(struct dm_integrity_io *dio, bool from_map
if (need_sync_io && from_map) {
INIT_WORK(&dio->work, integrity_bio_wait);
- queue_work(ic->metadata_wq, &dio->work);
+ queue_work(ic->offload_wq, &dio->work);
return;
}
@@ -2005,20 +2000,13 @@ static void dm_integrity_map_continue(struct dm_integrity_io *dio, bool from_map
} else
dio->completion = NULL;
- dio->orig_bi_iter = bio->bi_iter;
-
- dio->orig_bi_disk = bio->bi_disk;
- dio->orig_bi_partno = bio->bi_partno;
+ dm_bio_record(&dio->bio_details, bio);
bio_set_dev(bio, ic->dev->bdev);
-
- dio->orig_bi_integrity = bio_integrity(bio);
bio->bi_integrity = NULL;
bio->bi_opf &= ~REQ_INTEGRITY;
-
- dio->orig_bi_end_io = bio->bi_end_io;
bio->bi_end_io = integrity_end_io;
-
bio->bi_iter.bi_size = dio->range.n_sectors << SECTOR_SHIFT;
+
generic_make_request(bio);
if (need_sync_io) {
@@ -2315,7 +2303,7 @@ static void integrity_writer(struct work_struct *w)
unsigned prev_free_sectors;
/* the following test is not needed, but it tests the replay code */
- if (READ_ONCE(ic->suspending) && !ic->meta_dev)
+ if (unlikely(dm_suspended(ic->ti)) && !ic->meta_dev)
return;
spin_lock_irq(&ic->endio_wait.lock);
@@ -2376,7 +2364,7 @@ static void integrity_recalc(struct work_struct *w)
next_chunk:
- if (unlikely(READ_ONCE(ic->suspending)))
+ if (unlikely(dm_suspended(ic->ti)))
goto unlock_ret;
range.logical_sector = le64_to_cpu(ic->sb->recalc_sector);
@@ -2501,7 +2489,7 @@ static void bitmap_block_work(struct work_struct *w)
dio->range.n_sectors, BITMAP_OP_TEST_ALL_SET)) {
remove_range(ic, &dio->range);
INIT_WORK(&dio->work, integrity_bio_wait);
- queue_work(ic->wait_wq, &dio->work);
+ queue_work(ic->offload_wq, &dio->work);
} else {
block_bitmap_op(ic, ic->journal, dio->range.logical_sector,
dio->range.n_sectors, BITMAP_OP_SET);
@@ -2524,7 +2512,7 @@ static void bitmap_block_work(struct work_struct *w)
remove_range(ic, &dio->range);
INIT_WORK(&dio->work, integrity_bio_wait);
- queue_work(ic->wait_wq, &dio->work);
+ queue_work(ic->offload_wq, &dio->work);
}
queue_delayed_work(ic->commit_wq, &ic->bitmap_flush_work, ic->bitmap_flush_interval);
@@ -2804,8 +2792,6 @@ static void dm_integrity_postsuspend(struct dm_target *ti)
del_timer_sync(&ic->autocommit_timer);
- WRITE_ONCE(ic->suspending, 1);
-
if (ic->recalc_wq)
drain_workqueue(ic->recalc_wq);
@@ -2834,8 +2820,6 @@ static void dm_integrity_postsuspend(struct dm_target *ti)
#endif
}
- WRITE_ONCE(ic->suspending, 0);
-
BUG_ON(!RB_EMPTY_ROOT(&ic->in_progress));
ic->journal_uptodate = true;
@@ -2888,17 +2872,24 @@ static void dm_integrity_resume(struct dm_target *ti)
} else {
replay_journal(ic);
if (ic->mode == 'B') {
- int mode;
ic->sb->flags |= cpu_to_le32(SB_FLAG_DIRTY_BITMAP);
ic->sb->log2_blocks_per_bitmap_bit = ic->log2_blocks_per_bitmap_bit;
r = sync_rw_sb(ic, REQ_OP_WRITE, REQ_FUA);
if (unlikely(r))
dm_integrity_io_error(ic, "writing superblock", r);
- mode = ic->recalculate_flag ? BITMAP_OP_SET : BITMAP_OP_CLEAR;
- block_bitmap_op(ic, ic->journal, 0, ic->provided_data_sectors, mode);
- block_bitmap_op(ic, ic->recalc_bitmap, 0, ic->provided_data_sectors, mode);
- block_bitmap_op(ic, ic->may_write_bitmap, 0, ic->provided_data_sectors, mode);
+ block_bitmap_op(ic, ic->journal, 0, ic->provided_data_sectors, BITMAP_OP_CLEAR);
+ block_bitmap_op(ic, ic->recalc_bitmap, 0, ic->provided_data_sectors, BITMAP_OP_CLEAR);
+ block_bitmap_op(ic, ic->may_write_bitmap, 0, ic->provided_data_sectors, BITMAP_OP_CLEAR);
+ if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING) &&
+ le64_to_cpu(ic->sb->recalc_sector) < ic->provided_data_sectors) {
+ block_bitmap_op(ic, ic->journal, le64_to_cpu(ic->sb->recalc_sector),
+ ic->provided_data_sectors - le64_to_cpu(ic->sb->recalc_sector), BITMAP_OP_SET);
+ block_bitmap_op(ic, ic->recalc_bitmap, le64_to_cpu(ic->sb->recalc_sector),
+ ic->provided_data_sectors - le64_to_cpu(ic->sb->recalc_sector), BITMAP_OP_SET);
+ block_bitmap_op(ic, ic->may_write_bitmap, le64_to_cpu(ic->sb->recalc_sector),
+ ic->provided_data_sectors - le64_to_cpu(ic->sb->recalc_sector), BITMAP_OP_SET);
+ }
rw_journal_sectors(ic, REQ_OP_WRITE, REQ_FUA | REQ_SYNC, 0,
ic->n_bitmap_blocks * (BITMAP_BLOCK_SIZE >> SECTOR_SHIFT), NULL);
}
@@ -2967,7 +2958,7 @@ static void dm_integrity_status(struct dm_target *ti, status_type_t type,
DMEMIT(" meta_device:%s", ic->meta_dev->name);
if (ic->sectors_per_block != 1)
DMEMIT(" block_size:%u", ic->sectors_per_block << SECTOR_SHIFT);
- if (ic->recalculate_flag)
+ if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING))
DMEMIT(" recalculate");
DMEMIT(" journal_sectors:%u", ic->initial_sectors - SB_SECTORS);
DMEMIT(" interleave_sectors:%u", 1U << ic->sb->log2_interleave_sectors);
@@ -3623,6 +3614,7 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv)
}
ti->private = ic;
ti->per_io_data_size = sizeof(struct dm_integrity_io);
+ ic->ti = ti;
ic->in_progress = RB_ROOT;
INIT_LIST_HEAD(&ic->wait_list);
@@ -3836,6 +3828,14 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv)
goto bad;
}
+ ic->offload_wq = alloc_workqueue("dm-integrity-offload", WQ_MEM_RECLAIM,
+ METADATA_WORKQUEUE_MAX_ACTIVE);
+ if (!ic->offload_wq) {
+ ti->error = "Cannot allocate workqueue";
+ r = -ENOMEM;
+ goto bad;
+ }
+
ic->commit_wq = alloc_workqueue("dm-integrity-commit", WQ_MEM_RECLAIM, 1);
if (!ic->commit_wq) {
ti->error = "Cannot allocate workqueue";
@@ -4140,6 +4140,8 @@ static void dm_integrity_dtr(struct dm_target *ti)
destroy_workqueue(ic->metadata_wq);
if (ic->wait_wq)
destroy_workqueue(ic->wait_wq);
+ if (ic->offload_wq)
+ destroy_workqueue(ic->offload_wq);
if (ic->commit_wq)
destroy_workqueue(ic->commit_wq);
if (ic->writer_wq)
@@ -4200,7 +4202,7 @@ static void dm_integrity_dtr(struct dm_target *ti)
static struct target_type integrity_target = {
.name = "integrity",
- .version = {1, 4, 0},
+ .version = {1, 5, 0},
.module = THIS_MODULE,
.features = DM_TARGET_SINGLETON | DM_TARGET_INTEGRITY,
.ctr = dm_integrity_ctr,
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index 2bc18c9..58fd137 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -2053,7 +2053,7 @@ static int multipath_busy(struct dm_target *ti)
*---------------------------------------------------------------*/
static struct target_type multipath_target = {
.name = "multipath",
- .version = {1, 13, 0},
+ .version = {1, 14, 0},
.features = DM_TARGET_SINGLETON | DM_TARGET_IMMUTABLE |
DM_TARGET_PASSES_INTEGRITY,
.module = THIS_MODULE,
diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c
index fc9947d..76b6b32 100644
--- a/drivers/md/dm-thin-metadata.c
+++ b/drivers/md/dm-thin-metadata.c
@@ -960,9 +960,9 @@ int dm_pool_metadata_close(struct dm_pool_metadata *pmd)
DMWARN("%s: __commit_transaction() failed, error = %d",
__func__, r);
}
+ pmd_write_unlock(pmd);
if (!pmd->fail_io)
__destroy_persistent_data_objects(pmd);
- pmd_write_unlock(pmd);
kfree(pmd);
return 0;
diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c
index 0d61e9c..eec9f25 100644
--- a/drivers/md/dm-verity-target.c
+++ b/drivers/md/dm-verity-target.c
@@ -1221,7 +1221,7 @@ static int verity_ctr(struct dm_target *ti, unsigned argc, char **argv)
static struct target_type verity_target = {
.name = "verity",
- .version = {1, 5, 0},
+ .version = {1, 6, 0},
.module = THIS_MODULE,
.ctr = verity_ctr,
.dtr = verity_dtr,
diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c
index b9e27e3..a09bdc0 100644
--- a/drivers/md/dm-writecache.c
+++ b/drivers/md/dm-writecache.c
@@ -625,6 +625,12 @@ static void writecache_add_to_freelist(struct dm_writecache *wc, struct wc_entry
wc->freelist_size++;
}
+static inline void writecache_verify_watermark(struct dm_writecache *wc)
+{
+ if (unlikely(wc->freelist_size + wc->writeback_size <= wc->freelist_high_watermark))
+ queue_work(wc->writeback_wq, &wc->writeback_work);
+}
+
static struct wc_entry *writecache_pop_from_freelist(struct dm_writecache *wc, sector_t expected_sector)
{
struct wc_entry *e;
@@ -650,8 +656,8 @@ static struct wc_entry *writecache_pop_from_freelist(struct dm_writecache *wc, s
list_del(&e->lru);
}
wc->freelist_size--;
- if (unlikely(wc->freelist_size + wc->writeback_size <= wc->freelist_high_watermark))
- queue_work(wc->writeback_wq, &wc->writeback_work);
+
+ writecache_verify_watermark(wc);
return e;
}
@@ -842,7 +848,7 @@ static void writecache_suspend(struct dm_target *ti)
}
wc_unlock(wc);
- flush_workqueue(wc->writeback_wq);
+ drain_workqueue(wc->writeback_wq);
wc_lock(wc);
if (flush_on_suspend)
@@ -965,6 +971,8 @@ static void writecache_resume(struct dm_target *ti)
writecache_commit_flushed(wc, false);
}
+ writecache_verify_watermark(wc);
+
wc_unlock(wc);
}
@@ -2312,7 +2320,7 @@ static void writecache_status(struct dm_target *ti, status_type_t type,
static struct target_type writecache_target = {
.name = "writecache",
- .version = {1, 1, 1},
+ .version = {1, 2, 0},
.module = THIS_MODULE,
.ctr = writecache_ctr,
.dtr = writecache_dtr,
diff --git a/drivers/md/dm-zoned-target.c b/drivers/md/dm-zoned-target.c
index 70a1063..f4f83d3 100644
--- a/drivers/md/dm-zoned-target.c
+++ b/drivers/md/dm-zoned-target.c
@@ -533,8 +533,9 @@ static int dmz_queue_chunk_work(struct dmz_target *dmz, struct bio *bio)
/* Get the BIO chunk work. If one is not active yet, create one */
cw = radix_tree_lookup(&dmz->chunk_rxtree, chunk);
- if (!cw) {
-
+ if (cw) {
+ dmz_get_chunk_work(cw);
+ } else {
/* Create a new chunk work */
cw = kmalloc(sizeof(struct dm_chunk_work), GFP_NOIO);
if (unlikely(!cw)) {
@@ -543,7 +544,7 @@ static int dmz_queue_chunk_work(struct dmz_target *dmz, struct bio *bio)
}
INIT_WORK(&cw->work, dmz_chunk_work);
- refcount_set(&cw->refcount, 0);
+ refcount_set(&cw->refcount, 1);
cw->target = dmz;
cw->chunk = chunk;
bio_list_init(&cw->bio_list);
@@ -556,7 +557,6 @@ static int dmz_queue_chunk_work(struct dmz_target *dmz, struct bio *bio)
}
bio_list_add(&cw->bio_list, bio);
- dmz_get_chunk_work(cw);
dmz_reclaim_bio_acc(dmz->reclaim);
if (queue_work(dmz->chunk_wq, &cw->work))
@@ -967,7 +967,7 @@ static int dmz_iterate_devices(struct dm_target *ti,
static struct target_type dmz_type = {
.name = "zoned",
- .version = {1, 0, 0},
+ .version = {1, 1, 0},
.features = DM_TARGET_SINGLETON | DM_TARGET_ZONED_HM,
.module = THIS_MODULE,
.ctr = dmz_ctr,
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index b89f07e..753302e 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -25,6 +25,7 @@
#include <linux/wait.h>
#include <linux/pr.h>
#include <linux/refcount.h>
+#include <linux/part_stat.h>
#define DM_MSG_PREFIX "core"
@@ -1788,7 +1789,8 @@ static int dm_any_congested(void *congested_data, int bdi_bits)
* With request-based DM we only need to check the
* top-level queue for congestion.
*/
- r = md->queue->backing_dev_info->wb.state & bdi_bits;
+ struct backing_dev_info *bdi = md->queue->backing_dev_info;
+ r = bdi->wb.congested->state & bdi_bits;
} else {
map = dm_get_live_table_fast(md);
if (map)
@@ -1854,15 +1856,6 @@ static const struct dax_operations dm_dax_ops;
static void dm_wq_work(struct work_struct *work);
-static void dm_init_normal_md_queue(struct mapped_device *md)
-{
- /*
- * Initialize aspects of queue that aren't relevant for blk-mq
- */
- md->queue->backing_dev_info->congested_data = md;
- md->queue->backing_dev_info->congested_fn = dm_any_congested;
-}
-
static void cleanup_mapped_device(struct mapped_device *md)
{
if (md->wq)
@@ -1946,16 +1939,15 @@ static struct mapped_device *alloc_dev(int minor)
INIT_LIST_HEAD(&md->table_devices);
spin_lock_init(&md->uevent_lock);
- md->queue = blk_alloc_queue_node(GFP_KERNEL, numa_node_id);
- if (!md->queue)
- goto bad;
- md->queue->queuedata = md;
/*
* default to bio-based required ->make_request_fn until DM
* table is loaded and md->type established. If request-based
* table is loaded: blk-mq will override accordingly.
*/
- blk_queue_make_request(md->queue, dm_make_request);
+ md->queue = blk_alloc_queue(dm_make_request, numa_node_id);
+ if (!md->queue)
+ goto bad;
+ md->queue->queuedata = md;
md->disk = alloc_disk_node(1, md->numa_node_id);
if (!md->disk)
@@ -2249,6 +2241,12 @@ struct queue_limits *dm_get_queue_limits(struct mapped_device *md)
}
EXPORT_SYMBOL_GPL(dm_get_queue_limits);
+static void dm_init_congested_fn(struct mapped_device *md)
+{
+ md->queue->backing_dev_info->congested_data = md;
+ md->queue->backing_dev_info->congested_fn = dm_any_congested;
+}
+
/*
* Setup the DM device's queue based on md's type
*/
@@ -2265,11 +2263,12 @@ int dm_setup_md_queue(struct mapped_device *md, struct dm_table *t)
DMERR("Cannot initialize queue for request-based dm-mq mapped device");
return r;
}
+ dm_init_congested_fn(md);
break;
case DM_TYPE_BIO_BASED:
case DM_TYPE_DAX_BIO_BASED:
case DM_TYPE_NVME_BIO_BASED:
- dm_init_normal_md_queue(md);
+ dm_init_congested_fn(md);
break;
case DM_TYPE_NONE:
WARN_ON_ONCE(true);
@@ -2368,6 +2367,7 @@ static void __dm_destroy(struct mapped_device *md, bool wait)
map = dm_get_live_table(md, &srcu_idx);
if (!dm_suspended_md(md)) {
dm_table_presuspend_targets(map);
+ set_bit(DMF_SUSPENDED, &md->flags);
dm_table_postsuspend_targets(map);
}
/* dm_put_live_table must be before msleep, otherwise deadlock is possible */
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 469f551..271e8a5 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -58,8 +58,10 @@
#include <linux/delay.h>
#include <linux/raid/md_p.h>
#include <linux/raid/md_u.h>
+#include <linux/raid/detect.h>
#include <linux/slab.h>
#include <linux/percpu-refcount.h>
+#include <linux/part_stat.h>
#include <trace/events/block.h>
#include "md.h"
@@ -2491,12 +2493,12 @@ static int lock_rdev(struct md_rdev *rdev, dev_t dev, int shared)
{
int err = 0;
struct block_device *bdev;
- char b[BDEVNAME_SIZE];
bdev = blkdev_get_by_dev(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL,
shared ? (struct md_rdev *)lock_rdev : rdev);
if (IS_ERR(bdev)) {
- pr_warn("md: could not open %s.\n", __bdevname(dev, b));
+ pr_warn("md: could not open device unknown-block(%u,%u).\n",
+ MAJOR(dev), MINOR(dev));
return PTR_ERR(bdev);
}
rdev->bdev = bdev;
@@ -5621,12 +5623,11 @@ static int md_alloc(dev_t dev, char *name)
mddev->hold_active = UNTIL_STOP;
error = -ENOMEM;
- mddev->queue = blk_alloc_queue(GFP_KERNEL);
+ mddev->queue = blk_alloc_queue(md_make_request, NUMA_NO_NODE);
if (!mddev->queue)
goto abort;
mddev->queue->queuedata = mddev;
- blk_queue_make_request(mddev->queue, md_make_request);
blk_set_stacking_limits(&mddev->queue->limits);
disk = alloc_disk(1 << shift);
@@ -6184,7 +6185,7 @@ EXPORT_SYMBOL_GPL(md_stop_writes);
static void mddev_detach(struct mddev *mddev)
{
md_bitmap_wait_behind_writes(mddev);
- if (mddev->pers && mddev->pers->quiesce) {
+ if (mddev->pers && mddev->pers->quiesce && !mddev->suspended) {
mddev->pers->quiesce(mddev, 1);
mddev->pers->quiesce(mddev, 0);
}
diff --git a/drivers/misc/altera-stapl/altera.c b/drivers/misc/altera-stapl/altera.c
index 25e5f24..5bdf574 100644
--- a/drivers/misc/altera-stapl/altera.c
+++ b/drivers/misc/altera-stapl/altera.c
@@ -2112,8 +2112,8 @@ static int altera_execute(struct altera_state *astate,
return status;
}
-static int altera_get_note(u8 *p, s32 program_size,
- s32 *offset, char *key, char *value, int length)
+static int altera_get_note(u8 *p, s32 program_size, s32 *offset,
+ char *key, char *value, int keylen, int vallen)
/*
* Gets key and value of NOTE fields in the JBC file.
* Can be called in two modes: if offset pointer is NULL,
@@ -2170,7 +2170,7 @@ static int altera_get_note(u8 *p, s32 program_size,
&p[note_table + (8 * i) + 4])];
if (value != NULL)
- strlcpy(value, value_ptr, length);
+ strlcpy(value, value_ptr, vallen);
}
}
@@ -2189,13 +2189,13 @@ static int altera_get_note(u8 *p, s32 program_size,
strlcpy(key, &p[note_strings +
get_unaligned_be32(
&p[note_table + (8 * i)])],
- length);
+ keylen);
if (value != NULL)
strlcpy(value, &p[note_strings +
get_unaligned_be32(
&p[note_table + (8 * i) + 4])],
- length);
+ vallen);
*offset = i + 1;
}
@@ -2449,7 +2449,7 @@ int altera_init(struct altera_config *config, const struct firmware *fw)
__func__, (format_version == 2) ? "Jam STAPL" :
"pre-standardized Jam 1.1");
while (altera_get_note((u8 *)fw->data, fw->size,
- &offset, key, value, 256) == 0)
+ &offset, key, value, 32, 256) == 0)
printk(KERN_INFO "%s: NOTE \"%s\" = \"%s\"\n",
__func__, key, value);
}
diff --git a/drivers/misc/cardreader/rts5227.c b/drivers/misc/cardreader/rts5227.c
index 4feed29..423fecc 100644
--- a/drivers/misc/cardreader/rts5227.c
+++ b/drivers/misc/cardreader/rts5227.c
@@ -394,7 +394,7 @@ static const struct pcr_ops rts522a_pcr_ops = {
void rts522a_init_params(struct rtsx_pcr *pcr)
{
rts5227_init_params(pcr);
-
+ pcr->tx_initial_phase = SET_CLOCK_PHASE(20, 20, 11);
pcr->reg_pm_ctrl3 = RTS522A_PM_CTRL3;
pcr->option.ocp_en = 1;
diff --git a/drivers/misc/cardreader/rts5249.c b/drivers/misc/cardreader/rts5249.c
index db936e4..1a81cda 100644
--- a/drivers/misc/cardreader/rts5249.c
+++ b/drivers/misc/cardreader/rts5249.c
@@ -618,6 +618,7 @@ static const struct pcr_ops rts524a_pcr_ops = {
void rts524a_init_params(struct rtsx_pcr *pcr)
{
rts5249_init_params(pcr);
+ pcr->tx_initial_phase = SET_CLOCK_PHASE(27, 29, 11);
pcr->option.ltr_l1off_sspwrgate = LTR_L1OFF_SSPWRGATE_5250_DEF;
pcr->option.ltr_l1off_snooze_sspwrgate =
LTR_L1OFF_SNOOZE_SSPWRGATE_5250_DEF;
@@ -733,6 +734,7 @@ static const struct pcr_ops rts525a_pcr_ops = {
void rts525a_init_params(struct rtsx_pcr *pcr)
{
rts5249_init_params(pcr);
+ pcr->tx_initial_phase = SET_CLOCK_PHASE(25, 29, 11);
pcr->option.ltr_l1off_sspwrgate = LTR_L1OFF_SSPWRGATE_5250_DEF;
pcr->option.ltr_l1off_snooze_sspwrgate =
LTR_L1OFF_SNOOZE_SSPWRGATE_5250_DEF;
diff --git a/drivers/misc/cardreader/rts5260.c b/drivers/misc/cardreader/rts5260.c
index 4214f02..711054e 100644
--- a/drivers/misc/cardreader/rts5260.c
+++ b/drivers/misc/cardreader/rts5260.c
@@ -662,7 +662,7 @@ void rts5260_init_params(struct rtsx_pcr *pcr)
pcr->sd30_drive_sel_1v8 = CFG_DRIVER_TYPE_B;
pcr->sd30_drive_sel_3v3 = CFG_DRIVER_TYPE_B;
pcr->aspm_en = ASPM_L1_EN;
- pcr->tx_initial_phase = SET_CLOCK_PHASE(1, 29, 16);
+ pcr->tx_initial_phase = SET_CLOCK_PHASE(27, 29, 11);
pcr->rx_initial_phase = SET_CLOCK_PHASE(24, 6, 5);
pcr->ic_version = rts5260_get_ic_version(pcr);
diff --git a/drivers/misc/cardreader/rts5261.c b/drivers/misc/cardreader/rts5261.c
index bc4967a..78c3b1d 100644
--- a/drivers/misc/cardreader/rts5261.c
+++ b/drivers/misc/cardreader/rts5261.c
@@ -764,7 +764,7 @@ void rts5261_init_params(struct rtsx_pcr *pcr)
pcr->sd30_drive_sel_1v8 = CFG_DRIVER_TYPE_B;
pcr->sd30_drive_sel_3v3 = CFG_DRIVER_TYPE_B;
pcr->aspm_en = ASPM_L1_EN;
- pcr->tx_initial_phase = SET_CLOCK_PHASE(20, 27, 16);
+ pcr->tx_initial_phase = SET_CLOCK_PHASE(27, 27, 11);
pcr->rx_initial_phase = SET_CLOCK_PHASE(24, 6, 5);
pcr->ic_version = rts5261_get_ic_version(pcr);
diff --git a/drivers/misc/eeprom/at24.c b/drivers/misc/eeprom/at24.c
index 031eb64..282c9ef 100644
--- a/drivers/misc/eeprom/at24.c
+++ b/drivers/misc/eeprom/at24.c
@@ -712,13 +712,14 @@ static int at24_probe(struct i2c_client *client)
* chip is functional.
*/
err = at24_read(at24, 0, &test_byte, 1);
- pm_runtime_idle(dev);
if (err) {
pm_runtime_disable(dev);
regulator_disable(at24->vcc_reg);
return -ENODEV;
}
+ pm_runtime_idle(dev);
+
if (writable)
dev_info(dev, "%u byte %s EEPROM, writable, %u bytes/write\n",
byte_len, client->name, at24->write_max);
diff --git a/drivers/misc/habanalabs/device.c b/drivers/misc/habanalabs/device.c
index b155e95..b680b0c 100644
--- a/drivers/misc/habanalabs/device.c
+++ b/drivers/misc/habanalabs/device.c
@@ -598,7 +598,9 @@ int hl_device_set_debug_mode(struct hl_device *hdev, bool enable)
goto out;
}
- hdev->asic_funcs->halt_coresight(hdev);
+ if (!hdev->hard_reset_pending)
+ hdev->asic_funcs->halt_coresight(hdev);
+
hdev->in_debug = 0;
goto out;
@@ -1189,6 +1191,7 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
if (hdev->asic_funcs->get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
dev_info(hdev->dev,
"H/W state is dirty, must reset before initializing\n");
+ hdev->asic_funcs->halt_engines(hdev, true);
hdev->asic_funcs->hw_fini(hdev, true);
}
diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c
index 7344e8a..b8a8de2 100644
--- a/drivers/misc/habanalabs/goya/goya.c
+++ b/drivers/misc/habanalabs/goya/goya.c
@@ -895,6 +895,11 @@ void goya_init_dma_qmans(struct hl_device *hdev)
*/
static void goya_disable_external_queues(struct hl_device *hdev)
{
+ struct goya_device *goya = hdev->asic_specific;
+
+ if (!(goya->hw_cap_initialized & HW_CAP_DMA))
+ return;
+
WREG32(mmDMA_QM_0_GLBL_CFG0, 0);
WREG32(mmDMA_QM_1_GLBL_CFG0, 0);
WREG32(mmDMA_QM_2_GLBL_CFG0, 0);
@@ -956,6 +961,11 @@ static int goya_stop_external_queues(struct hl_device *hdev)
{
int rc, retval = 0;
+ struct goya_device *goya = hdev->asic_specific;
+
+ if (!(goya->hw_cap_initialized & HW_CAP_DMA))
+ return retval;
+
rc = goya_stop_queue(hdev,
mmDMA_QM_0_GLBL_CFG1,
mmDMA_QM_0_CP_STS,
@@ -1744,9 +1754,18 @@ void goya_init_tpc_qmans(struct hl_device *hdev)
*/
static void goya_disable_internal_queues(struct hl_device *hdev)
{
+ struct goya_device *goya = hdev->asic_specific;
+
+ if (!(goya->hw_cap_initialized & HW_CAP_MME))
+ goto disable_tpc;
+
WREG32(mmMME_QM_GLBL_CFG0, 0);
WREG32(mmMME_CMDQ_GLBL_CFG0, 0);
+disable_tpc:
+ if (!(goya->hw_cap_initialized & HW_CAP_TPC))
+ return;
+
WREG32(mmTPC0_QM_GLBL_CFG0, 0);
WREG32(mmTPC0_CMDQ_GLBL_CFG0, 0);
@@ -1782,8 +1801,12 @@ static void goya_disable_internal_queues(struct hl_device *hdev)
*/
static int goya_stop_internal_queues(struct hl_device *hdev)
{
+ struct goya_device *goya = hdev->asic_specific;
int rc, retval = 0;
+ if (!(goya->hw_cap_initialized & HW_CAP_MME))
+ goto stop_tpc;
+
/*
* Each queue (QMAN) is a separate H/W logic. That means that each
* QMAN can be stopped independently and failure to stop one does NOT
@@ -1810,6 +1833,10 @@ static int goya_stop_internal_queues(struct hl_device *hdev)
retval = -EIO;
}
+stop_tpc:
+ if (!(goya->hw_cap_initialized & HW_CAP_TPC))
+ return retval;
+
rc = goya_stop_queue(hdev,
mmTPC0_QM_GLBL_CFG1,
mmTPC0_QM_CP_STS,
@@ -1975,6 +2002,11 @@ static int goya_stop_internal_queues(struct hl_device *hdev)
static void goya_dma_stall(struct hl_device *hdev)
{
+ struct goya_device *goya = hdev->asic_specific;
+
+ if (!(goya->hw_cap_initialized & HW_CAP_DMA))
+ return;
+
WREG32(mmDMA_QM_0_GLBL_CFG1, 1 << DMA_QM_0_GLBL_CFG1_DMA_STOP_SHIFT);
WREG32(mmDMA_QM_1_GLBL_CFG1, 1 << DMA_QM_1_GLBL_CFG1_DMA_STOP_SHIFT);
WREG32(mmDMA_QM_2_GLBL_CFG1, 1 << DMA_QM_2_GLBL_CFG1_DMA_STOP_SHIFT);
@@ -1984,6 +2016,11 @@ static void goya_dma_stall(struct hl_device *hdev)
static void goya_tpc_stall(struct hl_device *hdev)
{
+ struct goya_device *goya = hdev->asic_specific;
+
+ if (!(goya->hw_cap_initialized & HW_CAP_TPC))
+ return;
+
WREG32(mmTPC0_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
WREG32(mmTPC1_CFG_TPC_STALL, 1 << TPC1_CFG_TPC_STALL_V_SHIFT);
WREG32(mmTPC2_CFG_TPC_STALL, 1 << TPC2_CFG_TPC_STALL_V_SHIFT);
@@ -1996,6 +2033,11 @@ static void goya_tpc_stall(struct hl_device *hdev)
static void goya_mme_stall(struct hl_device *hdev)
{
+ struct goya_device *goya = hdev->asic_specific;
+
+ if (!(goya->hw_cap_initialized & HW_CAP_MME))
+ return;
+
WREG32(mmMME_STALL, 0xFFFFFFFF);
}
@@ -4648,8 +4690,6 @@ static int goya_memset_device_memory(struct hl_device *hdev, u64 addr, u64 size,
rc = goya_send_job_on_qman0(hdev, job);
- hl_cb_put(job->patched_cb);
-
hl_debugfs_remove_job(hdev, job);
kfree(job);
cb->cs_cnt--;
diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index aa54d35..a971c4b 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -1732,8 +1732,11 @@ static int mmc_do_erase(struct mmc_card *card, unsigned int from,
* the erase operation does not exceed the max_busy_timeout, we should
* use R1B response. Or we need to prevent the host from doing hw busy
* detection, which is done by converting to a R1 response instead.
+ * Note, some hosts requires R1B, which also means they are on their own
+ * when it comes to deal with the busy timeout.
*/
- if (card->host->max_busy_timeout &&
+ if (!(card->host->caps & MMC_CAP_NEED_RSP_BUSY) &&
+ card->host->max_busy_timeout &&
busy_timeout > card->host->max_busy_timeout) {
cmd.flags = MMC_RSP_SPI_R1 | MMC_RSP_R1 | MMC_CMD_AC;
} else {
diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
index f6912de..de14b58 100644
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c
@@ -1910,9 +1910,12 @@ static int mmc_sleep(struct mmc_host *host)
* If the max_busy_timeout of the host is specified, validate it against
* the sleep cmd timeout. A failure means we need to prevent the host
* from doing hw busy detection, which is done by converting to a R1
- * response instead of a R1B.
+ * response instead of a R1B. Note, some hosts requires R1B, which also
+ * means they are on their own when it comes to deal with the busy
+ * timeout.
*/
- if (host->max_busy_timeout && (timeout_ms > host->max_busy_timeout)) {
+ if (!(host->caps & MMC_CAP_NEED_RSP_BUSY) && host->max_busy_timeout &&
+ (timeout_ms > host->max_busy_timeout)) {
cmd.flags = MMC_RSP_R1 | MMC_CMD_AC;
} else {
cmd.flags = MMC_RSP_R1B | MMC_CMD_AC;
diff --git a/drivers/mmc/core/mmc_ops.c b/drivers/mmc/core/mmc_ops.c
index da425ee..e025604 100644
--- a/drivers/mmc/core/mmc_ops.c
+++ b/drivers/mmc/core/mmc_ops.c
@@ -542,9 +542,11 @@ int __mmc_switch(struct mmc_card *card, u8 set, u8 index, u8 value,
* If the max_busy_timeout of the host is specified, make sure it's
* enough to fit the used timeout_ms. In case it's not, let's instruct
* the host to avoid HW busy detection, by converting to a R1 response
- * instead of a R1B.
+ * instead of a R1B. Note, some hosts requires R1B, which also means
+ * they are on their own when it comes to deal with the busy timeout.
*/
- if (host->max_busy_timeout && (timeout_ms > host->max_busy_timeout))
+ if (!(host->caps & MMC_CAP_NEED_RSP_BUSY) && host->max_busy_timeout &&
+ (timeout_ms > host->max_busy_timeout))
use_r1b_resp = false;
cmd.opcode = MMC_SWITCH;
diff --git a/drivers/mmc/host/rtsx_pci_sdmmc.c b/drivers/mmc/host/rtsx_pci_sdmmc.c
index bd50935..1108797 100644
--- a/drivers/mmc/host/rtsx_pci_sdmmc.c
+++ b/drivers/mmc/host/rtsx_pci_sdmmc.c
@@ -606,19 +606,22 @@ static int sd_change_phase(struct realtek_pci_sdmmc *host,
u8 sample_point, bool rx)
{
struct rtsx_pcr *pcr = host->pcr;
-
+ u16 SD_VP_CTL = 0;
dev_dbg(sdmmc_dev(host), "%s(%s): sample_point = %d\n",
__func__, rx ? "RX" : "TX", sample_point);
rtsx_pci_write_register(pcr, CLK_CTL, CHANGE_CLK, CHANGE_CLK);
- if (rx)
+ if (rx) {
+ SD_VP_CTL = SD_VPRX_CTL;
rtsx_pci_write_register(pcr, SD_VPRX_CTL,
PHASE_SELECT_MASK, sample_point);
- else
+ } else {
+ SD_VP_CTL = SD_VPTX_CTL;
rtsx_pci_write_register(pcr, SD_VPTX_CTL,
PHASE_SELECT_MASK, sample_point);
- rtsx_pci_write_register(pcr, SD_VPCLK0_CTL, PHASE_NOT_RESET, 0);
- rtsx_pci_write_register(pcr, SD_VPCLK0_CTL, PHASE_NOT_RESET,
+ }
+ rtsx_pci_write_register(pcr, SD_VP_CTL, PHASE_NOT_RESET, 0);
+ rtsx_pci_write_register(pcr, SD_VP_CTL, PHASE_NOT_RESET,
PHASE_NOT_RESET);
rtsx_pci_write_register(pcr, CLK_CTL, CHANGE_CLK, 0);
rtsx_pci_write_register(pcr, SD_CFG1, SD_ASYNC_FIFO_NOT_RST, 0);
diff --git a/drivers/mmc/host/sdhci-acpi.c b/drivers/mmc/host/sdhci-acpi.c
index 9651dca..2a2173d 100644
--- a/drivers/mmc/host/sdhci-acpi.c
+++ b/drivers/mmc/host/sdhci-acpi.c
@@ -23,6 +23,7 @@
#include <linux/pm.h>
#include <linux/pm_runtime.h>
#include <linux/delay.h>
+#include <linux/dmi.h>
#include <linux/mmc/host.h>
#include <linux/mmc/pm.h>
@@ -72,9 +73,16 @@ struct sdhci_acpi_host {
const struct sdhci_acpi_slot *slot;
struct platform_device *pdev;
bool use_runtime_pm;
+ bool is_intel;
+ bool reset_signal_volt_on_suspend;
unsigned long private[0] ____cacheline_aligned;
};
+enum {
+ DMI_QUIRK_RESET_SD_SIGNAL_VOLT_ON_SUSP = BIT(0),
+ DMI_QUIRK_SD_NO_WRITE_PROTECT = BIT(1),
+};
+
static inline void *sdhci_acpi_priv(struct sdhci_acpi_host *c)
{
return (void *)c->private;
@@ -391,6 +399,8 @@ static int intel_probe_slot(struct platform_device *pdev, struct acpi_device *ad
host->mmc_host_ops.start_signal_voltage_switch =
intel_start_signal_voltage_switch;
+ c->is_intel = true;
+
return 0;
}
@@ -647,6 +657,36 @@ static const struct acpi_device_id sdhci_acpi_ids[] = {
};
MODULE_DEVICE_TABLE(acpi, sdhci_acpi_ids);
+static const struct dmi_system_id sdhci_acpi_quirks[] = {
+ {
+ /*
+ * The Lenovo Miix 320-10ICR has a bug in the _PS0 method of
+ * the SHC1 ACPI device, this bug causes it to reprogram the
+ * wrong LDO (DLDO3) to 1.8V if 1.8V modes are used and the
+ * card is (runtime) suspended + resumed. DLDO3 is used for
+ * the LCD and setting it to 1.8V causes the LCD to go black.
+ */
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+ DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo MIIX 320-10ICR"),
+ },
+ .driver_data = (void *)DMI_QUIRK_RESET_SD_SIGNAL_VOLT_ON_SUSP,
+ },
+ {
+ /*
+ * The Acer Aspire Switch 10 (SW5-012) microSD slot always
+ * reports the card being write-protected even though microSD
+ * cards do not have a write-protect switch at all.
+ */
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "Aspire SW5-012"),
+ },
+ .driver_data = (void *)DMI_QUIRK_SD_NO_WRITE_PROTECT,
+ },
+ {} /* Terminating entry */
+};
+
static const struct sdhci_acpi_slot *sdhci_acpi_get_slot(struct acpi_device *adev)
{
const struct sdhci_acpi_uid_slot *u;
@@ -663,17 +703,23 @@ static int sdhci_acpi_probe(struct platform_device *pdev)
struct device *dev = &pdev->dev;
const struct sdhci_acpi_slot *slot;
struct acpi_device *device, *child;
+ const struct dmi_system_id *id;
struct sdhci_acpi_host *c;
struct sdhci_host *host;
struct resource *iomem;
resource_size_t len;
size_t priv_size;
+ int quirks = 0;
int err;
device = ACPI_COMPANION(dev);
if (!device)
return -ENODEV;
+ id = dmi_first_match(sdhci_acpi_quirks);
+ if (id)
+ quirks = (long)id->driver_data;
+
slot = sdhci_acpi_get_slot(device);
/* Power on the SDHCI controller and its children */
@@ -759,6 +805,12 @@ static int sdhci_acpi_probe(struct platform_device *pdev)
dev_warn(dev, "failed to setup card detect gpio\n");
c->use_runtime_pm = false;
}
+
+ if (quirks & DMI_QUIRK_RESET_SD_SIGNAL_VOLT_ON_SUSP)
+ c->reset_signal_volt_on_suspend = true;
+
+ if (quirks & DMI_QUIRK_SD_NO_WRITE_PROTECT)
+ host->mmc->caps2 |= MMC_CAP2_NO_WRITE_PROTECT;
}
err = sdhci_setup_host(host);
@@ -823,17 +875,39 @@ static int sdhci_acpi_remove(struct platform_device *pdev)
return 0;
}
+static void __maybe_unused sdhci_acpi_reset_signal_voltage_if_needed(
+ struct device *dev)
+{
+ struct sdhci_acpi_host *c = dev_get_drvdata(dev);
+ struct sdhci_host *host = c->host;
+
+ if (c->is_intel && c->reset_signal_volt_on_suspend &&
+ host->mmc->ios.signal_voltage != MMC_SIGNAL_VOLTAGE_330) {
+ struct intel_host *intel_host = sdhci_acpi_priv(c);
+ unsigned int fn = INTEL_DSM_V33_SWITCH;
+ u32 result = 0;
+
+ intel_dsm(intel_host, dev, fn, &result);
+ }
+}
+
#ifdef CONFIG_PM_SLEEP
static int sdhci_acpi_suspend(struct device *dev)
{
struct sdhci_acpi_host *c = dev_get_drvdata(dev);
struct sdhci_host *host = c->host;
+ int ret;
if (host->tuning_mode != SDHCI_TUNING_MODE_3)
mmc_retune_needed(host->mmc);
- return sdhci_suspend_host(host);
+ ret = sdhci_suspend_host(host);
+ if (ret)
+ return ret;
+
+ sdhci_acpi_reset_signal_voltage_if_needed(dev);
+ return 0;
}
static int sdhci_acpi_resume(struct device *dev)
@@ -853,11 +927,17 @@ static int sdhci_acpi_runtime_suspend(struct device *dev)
{
struct sdhci_acpi_host *c = dev_get_drvdata(dev);
struct sdhci_host *host = c->host;
+ int ret;
if (host->tuning_mode != SDHCI_TUNING_MODE_3)
mmc_retune_needed(host->mmc);
- return sdhci_runtime_suspend_host(host);
+ ret = sdhci_runtime_suspend_host(host);
+ if (ret)
+ return ret;
+
+ sdhci_acpi_reset_signal_voltage_if_needed(dev);
+ return 0;
}
static int sdhci_acpi_runtime_resume(struct device *dev)
diff --git a/drivers/mmc/host/sdhci-cadence.c b/drivers/mmc/host/sdhci-cadence.c
index 5827d37..e573495 100644
--- a/drivers/mmc/host/sdhci-cadence.c
+++ b/drivers/mmc/host/sdhci-cadence.c
@@ -11,6 +11,7 @@
#include <linux/mmc/host.h>
#include <linux/mmc/mmc.h>
#include <linux/of.h>
+#include <linux/of_device.h>
#include "sdhci-pltfm.h"
@@ -235,6 +236,11 @@ static const struct sdhci_ops sdhci_cdns_ops = {
.set_uhs_signaling = sdhci_cdns_set_uhs_signaling,
};
+static const struct sdhci_pltfm_data sdhci_cdns_uniphier_pltfm_data = {
+ .ops = &sdhci_cdns_ops,
+ .quirks2 = SDHCI_QUIRK2_PRESET_VALUE_BROKEN,
+};
+
static const struct sdhci_pltfm_data sdhci_cdns_pltfm_data = {
.ops = &sdhci_cdns_ops,
};
@@ -334,6 +340,7 @@ static void sdhci_cdns_hs400_enhanced_strobe(struct mmc_host *mmc,
static int sdhci_cdns_probe(struct platform_device *pdev)
{
struct sdhci_host *host;
+ const struct sdhci_pltfm_data *data;
struct sdhci_pltfm_host *pltfm_host;
struct sdhci_cdns_priv *priv;
struct clk *clk;
@@ -350,8 +357,12 @@ static int sdhci_cdns_probe(struct platform_device *pdev)
if (ret)
return ret;
+ data = of_device_get_match_data(dev);
+ if (!data)
+ data = &sdhci_cdns_pltfm_data;
+
nr_phy_params = sdhci_cdns_phy_param_count(dev->of_node);
- host = sdhci_pltfm_init(pdev, &sdhci_cdns_pltfm_data,
+ host = sdhci_pltfm_init(pdev, data,
struct_size(priv, phy_params, nr_phy_params));
if (IS_ERR(host)) {
ret = PTR_ERR(host);
@@ -431,7 +442,10 @@ static const struct dev_pm_ops sdhci_cdns_pm_ops = {
};
static const struct of_device_id sdhci_cdns_match[] = {
- { .compatible = "socionext,uniphier-sd4hc" },
+ {
+ .compatible = "socionext,uniphier-sd4hc",
+ .data = &sdhci_cdns_uniphier_pltfm_data,
+ },
{ .compatible = "cdns,sd4hc" },
{ /* sentinel */ }
};
diff --git a/drivers/mmc/host/sdhci-msm.c b/drivers/mmc/host/sdhci-msm.c
index c3a160c..3955fa5d 100644
--- a/drivers/mmc/host/sdhci-msm.c
+++ b/drivers/mmc/host/sdhci-msm.c
@@ -1590,7 +1590,7 @@ static u32 sdhci_msm_cqe_irq(struct sdhci_host *host, u32 intmask)
return 0;
}
-void sdhci_msm_cqe_disable(struct mmc_host *mmc, bool recovery)
+static void sdhci_msm_cqe_disable(struct mmc_host *mmc, bool recovery)
{
struct sdhci_host *host = mmc_priv(mmc);
unsigned long flags;
diff --git a/drivers/mmc/host/sdhci-of-at91.c b/drivers/mmc/host/sdhci-of-at91.c
index ab2bd31..fcef5c0 100644
--- a/drivers/mmc/host/sdhci-of-at91.c
+++ b/drivers/mmc/host/sdhci-of-at91.c
@@ -132,7 +132,8 @@ static void sdhci_at91_reset(struct sdhci_host *host, u8 mask)
sdhci_reset(host, mask);
- if (host->mmc->caps & MMC_CAP_NONREMOVABLE)
+ if ((host->mmc->caps & MMC_CAP_NONREMOVABLE)
+ || mmc_gpio_get_cd(host->mmc) >= 0)
sdhci_at91_set_force_card_detect(host);
if (priv->cal_always_on && (mask & SDHCI_RESET_ALL))
@@ -427,8 +428,11 @@ static int sdhci_at91_probe(struct platform_device *pdev)
* detection procedure using the SDMCC_CD signal is bypassed.
* This bit is reset when a software reset for all command is performed
* so we need to implement our own reset function to set back this bit.
+ *
+ * WA: SAMA5D2 doesn't drive CMD if using CD GPIO line.
*/
- if (host->mmc->caps & MMC_CAP_NONREMOVABLE)
+ if ((host->mmc->caps & MMC_CAP_NONREMOVABLE)
+ || mmc_gpio_get_cd(host->mmc) >= 0)
sdhci_at91_set_force_card_detect(host);
pm_runtime_put_autosuspend(&pdev->dev);
diff --git a/drivers/mmc/host/sdhci-omap.c b/drivers/mmc/host/sdhci-omap.c
index 8820531..c497817 100644
--- a/drivers/mmc/host/sdhci-omap.c
+++ b/drivers/mmc/host/sdhci-omap.c
@@ -1192,6 +1192,9 @@ static int sdhci_omap_probe(struct platform_device *pdev)
if (of_find_property(dev->of_node, "dmas", NULL))
sdhci_switch_external_dma(host, true);
+ /* R1B responses is required to properly manage HW busy detection. */
+ mmc->caps |= MMC_CAP_NEED_RSP_BUSY;
+
ret = sdhci_setup_host(host);
if (ret)
goto err_put_sync;
diff --git a/drivers/mmc/host/sdhci-pci-gli.c b/drivers/mmc/host/sdhci-pci-gli.c
index 5eea8d70..ce15a05 100644
--- a/drivers/mmc/host/sdhci-pci-gli.c
+++ b/drivers/mmc/host/sdhci-pci-gli.c
@@ -262,10 +262,26 @@ static int gl9750_execute_tuning(struct sdhci_host *host, u32 opcode)
return 0;
}
+static void gli_pcie_enable_msi(struct sdhci_pci_slot *slot)
+{
+ int ret;
+
+ ret = pci_alloc_irq_vectors(slot->chip->pdev, 1, 1,
+ PCI_IRQ_MSI | PCI_IRQ_MSIX);
+ if (ret < 0) {
+ pr_warn("%s: enable PCI MSI failed, error=%d\n",
+ mmc_hostname(slot->host->mmc), ret);
+ return;
+ }
+
+ slot->host->irq = pci_irq_vector(slot->chip->pdev, 0);
+}
+
static int gli_probe_slot_gl9750(struct sdhci_pci_slot *slot)
{
struct sdhci_host *host = slot->host;
+ gli_pcie_enable_msi(slot);
slot->host->mmc->caps2 |= MMC_CAP2_NO_SDIO;
sdhci_enable_v4_mode(host);
@@ -276,6 +292,7 @@ static int gli_probe_slot_gl9755(struct sdhci_pci_slot *slot)
{
struct sdhci_host *host = slot->host;
+ gli_pcie_enable_msi(slot);
slot->host->mmc->caps2 |= MMC_CAP2_NO_SDIO;
sdhci_enable_v4_mode(host);
diff --git a/drivers/mmc/host/sdhci-tegra.c b/drivers/mmc/host/sdhci-tegra.c
index 403ac44..a25c3a4 100644
--- a/drivers/mmc/host/sdhci-tegra.c
+++ b/drivers/mmc/host/sdhci-tegra.c
@@ -1552,6 +1552,9 @@ static int sdhci_tegra_probe(struct platform_device *pdev)
if (tegra_host->soc_data->nvquirks & NVQUIRK_ENABLE_DDR50)
host->mmc->caps |= MMC_CAP_1_8V_DDR;
+ /* R1B responses is required to properly manage HW busy detection. */
+ host->mmc->caps |= MMC_CAP_NEED_RSP_BUSY;
+
tegra_sdhci_parse_dt(host);
tegra_host->power_gpio = devm_gpiod_get_optional(&pdev->dev, "power",
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index 25a8f93..db8884a 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -149,6 +149,7 @@
config IFB
tristate "Intermediate Functional Block support"
depends on NET_CLS_ACT
+ select NET_REDIRECT
---help---
This is an intermediate driver that allows sharing of
resources.
diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c
index 1cc2cd8..c816985 100644
--- a/drivers/net/bonding/bond_alb.c
+++ b/drivers/net/bonding/bond_alb.c
@@ -50,11 +50,6 @@ struct arp_pkt {
};
#pragma pack()
-static inline struct arp_pkt *arp_pkt(const struct sk_buff *skb)
-{
- return (struct arp_pkt *)skb_network_header(skb);
-}
-
/* Forward declaration */
static void alb_send_learning_packets(struct slave *slave, u8 mac_addr[],
bool strict_match);
@@ -553,10 +548,11 @@ static void rlb_req_update_subnet_clients(struct bonding *bond, __be32 src_ip)
spin_unlock(&bond->mode_lock);
}
-static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bond)
+static struct slave *rlb_choose_channel(struct sk_buff *skb,
+ struct bonding *bond,
+ const struct arp_pkt *arp)
{
struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
- struct arp_pkt *arp = arp_pkt(skb);
struct slave *assigned_slave, *curr_active_slave;
struct rlb_client_info *client_info;
u32 hash_index = 0;
@@ -653,8 +649,12 @@ static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bon
*/
static struct slave *rlb_arp_xmit(struct sk_buff *skb, struct bonding *bond)
{
- struct arp_pkt *arp = arp_pkt(skb);
struct slave *tx_slave = NULL;
+ struct arp_pkt *arp;
+
+ if (!pskb_network_may_pull(skb, sizeof(*arp)))
+ return NULL;
+ arp = (struct arp_pkt *)skb_network_header(skb);
/* Don't modify or load balance ARPs that do not originate locally
* (e.g.,arrive via a bridge).
@@ -664,7 +664,7 @@ static struct slave *rlb_arp_xmit(struct sk_buff *skb, struct bonding *bond)
if (arp->op_code == htons(ARPOP_REPLY)) {
/* the arp must be sent on the selected rx channel */
- tx_slave = rlb_choose_channel(skb, bond);
+ tx_slave = rlb_choose_channel(skb, bond, arp);
if (tx_slave)
bond_hw_addr_copy(arp->mac_src, tx_slave->dev->dev_addr,
tx_slave->dev->addr_len);
@@ -676,7 +676,7 @@ static struct slave *rlb_arp_xmit(struct sk_buff *skb, struct bonding *bond)
* When the arp reply is received the entry will be updated
* with the correct unicast address of the client.
*/
- tx_slave = rlb_choose_channel(skb, bond);
+ tx_slave = rlb_choose_channel(skb, bond, arp);
/* The ARP reply packets must be delayed so that
* they can cancel out the influence of the ARP request.
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 48d5ec7..d10805e 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -3526,6 +3526,47 @@ static void bond_fold_stats(struct rtnl_link_stats64 *_res,
}
}
+#ifdef CONFIG_LOCKDEP
+static int bond_get_lowest_level_rcu(struct net_device *dev)
+{
+ struct net_device *ldev, *next, *now, *dev_stack[MAX_NEST_DEV + 1];
+ struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1];
+ int cur = 0, max = 0;
+
+ now = dev;
+ iter = &dev->adj_list.lower;
+
+ while (1) {
+ next = NULL;
+ while (1) {
+ ldev = netdev_next_lower_dev_rcu(now, &iter);
+ if (!ldev)
+ break;
+
+ next = ldev;
+ niter = &ldev->adj_list.lower;
+ dev_stack[cur] = now;
+ iter_stack[cur++] = iter;
+ if (max <= cur)
+ max = cur;
+ break;
+ }
+
+ if (!next) {
+ if (!cur)
+ return max;
+ next = dev_stack[--cur];
+ niter = iter_stack[cur];
+ }
+
+ now = next;
+ iter = niter;
+ }
+
+ return max;
+}
+#endif
+
static void bond_get_stats(struct net_device *bond_dev,
struct rtnl_link_stats64 *stats)
{
@@ -3533,11 +3574,17 @@ static void bond_get_stats(struct net_device *bond_dev,
struct rtnl_link_stats64 temp;
struct list_head *iter;
struct slave *slave;
+ int nest_level = 0;
- spin_lock(&bond->stats_lock);
- memcpy(stats, &bond->bond_stats, sizeof(*stats));
rcu_read_lock();
+#ifdef CONFIG_LOCKDEP
+ nest_level = bond_get_lowest_level_rcu(bond_dev);
+#endif
+
+ spin_lock_nested(&bond->stats_lock, nest_level);
+ memcpy(stats, &bond->bond_stats, sizeof(*stats));
+
bond_for_each_slave_rcu(bond, slave, iter) {
const struct rtnl_link_stats64 *new =
dev_get_stats(slave->dev, &temp);
@@ -3547,10 +3594,10 @@ static void bond_get_stats(struct net_device *bond_dev,
/* save off the slave stats for the next run */
memcpy(&slave->slave_stats, new, sizeof(*new));
}
- rcu_read_unlock();
memcpy(&bond->bond_stats, stats, sizeof(*stats));
spin_unlock(&bond->stats_lock);
+ rcu_read_unlock();
}
static int bond_do_ioctl(struct net_device *bond_dev, struct ifreq *ifr, int cmd)
@@ -3640,6 +3687,8 @@ static int bond_do_ioctl(struct net_device *bond_dev, struct ifreq *ifr, int cmd
case BOND_RELEASE_OLD:
case SIOCBONDRELEASE:
res = bond_release(bond_dev, slave_dev);
+ if (!res)
+ netdev_update_lockdep_key(slave_dev);
break;
case BOND_SETHWADDR_OLD:
case SIOCBONDSETHWADDR:
diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c
index ddb3916..215c109 100644
--- a/drivers/net/bonding/bond_options.c
+++ b/drivers/net/bonding/bond_options.c
@@ -1398,6 +1398,8 @@ static int bond_option_slaves_set(struct bonding *bond,
case '-':
slave_dbg(bond->dev, dev, "Releasing interface\n");
ret = bond_release(bond->dev, dev);
+ if (!ret)
+ netdev_update_lockdep_key(dev);
break;
default:
diff --git a/drivers/net/caif/caif_spi.c b/drivers/net/caif/caif_spi.c
index 8e81bdf..63f2548 100644
--- a/drivers/net/caif/caif_spi.c
+++ b/drivers/net/caif/caif_spi.c
@@ -141,29 +141,29 @@ static ssize_t dbgfs_state(struct file *file, char __user *user_buf,
return 0;
/* Print out debug information. */
- len += snprintf((buf + len), (DEBUGFS_BUF_SIZE - len),
- "CAIF SPI debug information:\n");
+ len += scnprintf((buf + len), (DEBUGFS_BUF_SIZE - len),
+ "CAIF SPI debug information:\n");
- len += snprintf((buf + len), (DEBUGFS_BUF_SIZE - len), FLAVOR);
+ len += scnprintf((buf + len), (DEBUGFS_BUF_SIZE - len), FLAVOR);
- len += snprintf((buf + len), (DEBUGFS_BUF_SIZE - len),
- "STATE: %d\n", cfspi->dbg_state);
- len += snprintf((buf + len), (DEBUGFS_BUF_SIZE - len),
- "Previous CMD: 0x%x\n", cfspi->pcmd);
- len += snprintf((buf + len), (DEBUGFS_BUF_SIZE - len),
- "Current CMD: 0x%x\n", cfspi->cmd);
- len += snprintf((buf + len), (DEBUGFS_BUF_SIZE - len),
- "Previous TX len: %d\n", cfspi->tx_ppck_len);
- len += snprintf((buf + len), (DEBUGFS_BUF_SIZE - len),
- "Previous RX len: %d\n", cfspi->rx_ppck_len);
- len += snprintf((buf + len), (DEBUGFS_BUF_SIZE - len),
- "Current TX len: %d\n", cfspi->tx_cpck_len);
- len += snprintf((buf + len), (DEBUGFS_BUF_SIZE - len),
- "Current RX len: %d\n", cfspi->rx_cpck_len);
- len += snprintf((buf + len), (DEBUGFS_BUF_SIZE - len),
- "Next TX len: %d\n", cfspi->tx_npck_len);
- len += snprintf((buf + len), (DEBUGFS_BUF_SIZE - len),
- "Next RX len: %d\n", cfspi->rx_npck_len);
+ len += scnprintf((buf + len), (DEBUGFS_BUF_SIZE - len),
+ "STATE: %d\n", cfspi->dbg_state);
+ len += scnprintf((buf + len), (DEBUGFS_BUF_SIZE - len),
+ "Previous CMD: 0x%x\n", cfspi->pcmd);
+ len += scnprintf((buf + len), (DEBUGFS_BUF_SIZE - len),
+ "Current CMD: 0x%x\n", cfspi->cmd);
+ len += scnprintf((buf + len), (DEBUGFS_BUF_SIZE - len),
+ "Previous TX len: %d\n", cfspi->tx_ppck_len);
+ len += scnprintf((buf + len), (DEBUGFS_BUF_SIZE - len),
+ "Previous RX len: %d\n", cfspi->rx_ppck_len);
+ len += scnprintf((buf + len), (DEBUGFS_BUF_SIZE - len),
+ "Current TX len: %d\n", cfspi->tx_cpck_len);
+ len += scnprintf((buf + len), (DEBUGFS_BUF_SIZE - len),
+ "Current RX len: %d\n", cfspi->rx_cpck_len);
+ len += scnprintf((buf + len), (DEBUGFS_BUF_SIZE - len),
+ "Next TX len: %d\n", cfspi->tx_npck_len);
+ len += scnprintf((buf + len), (DEBUGFS_BUF_SIZE - len),
+ "Next RX len: %d\n", cfspi->rx_npck_len);
if (len > DEBUGFS_BUF_SIZE)
len = DEBUGFS_BUF_SIZE;
@@ -180,23 +180,23 @@ static ssize_t print_frame(char *buf, size_t size, char *frm,
int len = 0;
int i;
for (i = 0; i < count; i++) {
- len += snprintf((buf + len), (size - len),
+ len += scnprintf((buf + len), (size - len),
"[0x" BYTE_HEX_FMT "]",
frm[i]);
if ((i == cut) && (count > (cut * 2))) {
/* Fast forward. */
i = count - cut;
- len += snprintf((buf + len), (size - len),
- "--- %zu bytes skipped ---\n",
- count - (cut * 2));
+ len += scnprintf((buf + len), (size - len),
+ "--- %zu bytes skipped ---\n",
+ count - (cut * 2));
}
if ((!(i % 10)) && i) {
- len += snprintf((buf + len), (DEBUGFS_BUF_SIZE - len),
- "\n");
+ len += scnprintf((buf + len), (DEBUGFS_BUF_SIZE - len),
+ "\n");
}
}
- len += snprintf((buf + len), (DEBUGFS_BUF_SIZE - len), "\n");
+ len += scnprintf((buf + len), (DEBUGFS_BUF_SIZE - len), "\n");
return len;
}
@@ -214,18 +214,18 @@ static ssize_t dbgfs_frame(struct file *file, char __user *user_buf,
return 0;
/* Print out debug information. */
- len += snprintf((buf + len), (DEBUGFS_BUF_SIZE - len),
- "Current frame:\n");
+ len += scnprintf((buf + len), (DEBUGFS_BUF_SIZE - len),
+ "Current frame:\n");
- len += snprintf((buf + len), (DEBUGFS_BUF_SIZE - len),
- "Tx data (Len: %d):\n", cfspi->tx_cpck_len);
+ len += scnprintf((buf + len), (DEBUGFS_BUF_SIZE - len),
+ "Tx data (Len: %d):\n", cfspi->tx_cpck_len);
len += print_frame((buf + len), (DEBUGFS_BUF_SIZE - len),
cfspi->xfer.va_tx[0],
(cfspi->tx_cpck_len + SPI_CMD_SZ), 100);
- len += snprintf((buf + len), (DEBUGFS_BUF_SIZE - len),
- "Rx data (Len: %d):\n", cfspi->rx_cpck_len);
+ len += scnprintf((buf + len), (DEBUGFS_BUF_SIZE - len),
+ "Rx data (Len: %d):\n", cfspi->rx_cpck_len);
len += print_frame((buf + len), (DEBUGFS_BUF_SIZE - len),
cfspi->xfer.va_rx,
diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c
index 6ee06a4..68834a2 100644
--- a/drivers/net/can/dev.c
+++ b/drivers/net/can/dev.c
@@ -883,6 +883,7 @@ static const struct nla_policy can_policy[IFLA_CAN_MAX + 1] = {
= { .len = sizeof(struct can_bittiming) },
[IFLA_CAN_DATA_BITTIMING_CONST]
= { .len = sizeof(struct can_bittiming_const) },
+ [IFLA_CAN_TERMINATION] = { .type = NLA_U16 },
};
static int can_validate(struct nlattr *tb[], struct nlattr *data[],
diff --git a/drivers/net/can/slcan.c b/drivers/net/can/slcan.c
index 2f5c287..a366428 100644
--- a/drivers/net/can/slcan.c
+++ b/drivers/net/can/slcan.c
@@ -625,7 +625,10 @@ static int slcan_open(struct tty_struct *tty)
tty->disc_data = NULL;
clear_bit(SLF_INUSE, &sl->flags);
slc_free_netdev(sl->dev);
+ /* do not call free_netdev before rtnl_unlock */
+ rtnl_unlock();
free_netdev(sl->dev);
+ return err;
err_exit:
rtnl_unlock();
diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c
index 449a221..1a69286 100644
--- a/drivers/net/dsa/b53/b53_common.c
+++ b/drivers/net/dsa/b53/b53_common.c
@@ -1366,6 +1366,9 @@ void b53_vlan_add(struct dsa_switch *ds, int port,
b53_get_vlan_entry(dev, vid, vl);
+ if (vid == 0 && vid == b53_default_pvid(dev))
+ untagged = true;
+
vl->members |= BIT(port);
if (untagged && !dsa_is_cpu_port(ds, port))
vl->untag |= BIT(port);
diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c
index d195554..b0f5280 100644
--- a/drivers/net/dsa/bcm_sf2.c
+++ b/drivers/net/dsa/bcm_sf2.c
@@ -69,8 +69,7 @@ static void bcm_sf2_imp_setup(struct dsa_switch *ds, int port)
/* Force link status for IMP port */
reg = core_readl(priv, offset);
reg |= (MII_SW_OR | LINK_STS);
- if (priv->type == BCM7278_DEVICE_ID)
- reg |= GMII_SPEED_UP_2G;
+ reg &= ~GMII_SPEED_UP_2G;
core_writel(priv, reg, offset);
/* Enable Broadcast, Multicast, Unicast forwarding to IMP port */
diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c
index 022466c..7cbd1bd 100644
--- a/drivers/net/dsa/mt7530.c
+++ b/drivers/net/dsa/mt7530.c
@@ -566,7 +566,7 @@ mt7530_mib_reset(struct dsa_switch *ds)
static void
mt7530_port_set_status(struct mt7530_priv *priv, int port, int enable)
{
- u32 mask = PMCR_TX_EN | PMCR_RX_EN;
+ u32 mask = PMCR_TX_EN | PMCR_RX_EN | PMCR_FORCE_LNK;
if (enable)
mt7530_set(priv, MT7530_PMCR_P(port), mask);
@@ -1444,7 +1444,7 @@ static void mt7530_phylink_mac_config(struct dsa_switch *ds, int port,
mcr_new &= ~(PMCR_FORCE_SPEED_1000 | PMCR_FORCE_SPEED_100 |
PMCR_FORCE_FDX | PMCR_TX_FC_EN | PMCR_RX_FC_EN);
mcr_new |= PMCR_IFG_XMIT(1) | PMCR_MAC_MODE | PMCR_BACKOFF_EN |
- PMCR_BACKPR_EN | PMCR_FORCE_MODE | PMCR_FORCE_LNK;
+ PMCR_BACKPR_EN | PMCR_FORCE_MODE;
/* Are we connected to external phy */
if (port == 5 && dsa_is_user_port(ds, 5))
diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index 8c92895..2f993e6 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -2769,6 +2769,8 @@ static u64 mv88e6xxx_devlink_atu_bin_get(struct mv88e6xxx_chip *chip,
goto unlock;
}
+ occupancy &= MV88E6XXX_G2_ATU_STATS_MASK;
+
unlock:
mv88e6xxx_reg_unlock(chip);
diff --git a/drivers/net/dsa/mv88e6xxx/chip.h b/drivers/net/dsa/mv88e6xxx/chip.h
index f332cb4..79cad5e 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.h
+++ b/drivers/net/dsa/mv88e6xxx/chip.h
@@ -236,7 +236,7 @@ struct mv88e6xxx_port {
bool mirror_ingress;
bool mirror_egress;
unsigned int serdes_irq;
- char serdes_irq_name[32];
+ char serdes_irq_name[64];
};
struct mv88e6xxx_chip {
@@ -293,16 +293,16 @@ struct mv88e6xxx_chip {
struct mv88e6xxx_irq g1_irq;
struct mv88e6xxx_irq g2_irq;
int irq;
- char irq_name[32];
+ char irq_name[64];
int device_irq;
- char device_irq_name[32];
+ char device_irq_name[64];
int watchdog_irq;
- char watchdog_irq_name[32];
+ char watchdog_irq_name[64];
int atu_prob_irq;
- char atu_prob_irq_name[32];
+ char atu_prob_irq_name[64];
int vtu_prob_irq;
- char vtu_prob_irq_name[32];
+ char vtu_prob_irq_name[64];
struct kthread_worker *kworker;
struct kthread_delayed_work irq_poll_work;
diff --git a/drivers/net/dsa/mv88e6xxx/global1.c b/drivers/net/dsa/mv88e6xxx/global1.c
index b016cc2..ca3a7a7 100644
--- a/drivers/net/dsa/mv88e6xxx/global1.c
+++ b/drivers/net/dsa/mv88e6xxx/global1.c
@@ -278,13 +278,13 @@ int mv88e6095_g1_set_egress_port(struct mv88e6xxx_chip *chip,
switch (direction) {
case MV88E6XXX_EGRESS_DIR_INGRESS:
dest_port_chip = &chip->ingress_dest_port;
- reg &= MV88E6185_G1_MONITOR_CTL_INGRESS_DEST_MASK;
+ reg &= ~MV88E6185_G1_MONITOR_CTL_INGRESS_DEST_MASK;
reg |= port <<
__bf_shf(MV88E6185_G1_MONITOR_CTL_INGRESS_DEST_MASK);
break;
case MV88E6XXX_EGRESS_DIR_EGRESS:
dest_port_chip = &chip->egress_dest_port;
- reg &= MV88E6185_G1_MONITOR_CTL_EGRESS_DEST_MASK;
+ reg &= ~MV88E6185_G1_MONITOR_CTL_EGRESS_DEST_MASK;
reg |= port <<
__bf_shf(MV88E6185_G1_MONITOR_CTL_EGRESS_DEST_MASK);
break;
diff --git a/drivers/net/dsa/mv88e6xxx/global2.c b/drivers/net/dsa/mv88e6xxx/global2.c
index 0150301..8fd4830 100644
--- a/drivers/net/dsa/mv88e6xxx/global2.c
+++ b/drivers/net/dsa/mv88e6xxx/global2.c
@@ -1099,6 +1099,13 @@ int mv88e6xxx_g2_irq_setup(struct mv88e6xxx_chip *chip)
{
int err, irq, virq;
+ chip->g2_irq.masked = ~0;
+ mv88e6xxx_reg_lock(chip);
+ err = mv88e6xxx_g2_int_mask(chip, ~chip->g2_irq.masked);
+ mv88e6xxx_reg_unlock(chip);
+ if (err)
+ return err;
+
chip->g2_irq.domain = irq_domain_add_simple(
chip->dev->of_node, 16, 0, &mv88e6xxx_g2_irq_domain_ops, chip);
if (!chip->g2_irq.domain)
@@ -1108,7 +1115,6 @@ int mv88e6xxx_g2_irq_setup(struct mv88e6xxx_chip *chip)
irq_create_mapping(chip->g2_irq.domain, irq);
chip->g2_irq.chip = mv88e6xxx_g2_irq_chip;
- chip->g2_irq.masked = ~0;
chip->device_irq = irq_find_mapping(chip->g1_irq.domain,
MV88E6XXX_G1_STS_IRQ_DEVICE);
diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c
index 03ba6d2..7edea57 100644
--- a/drivers/net/dsa/sja1105/sja1105_main.c
+++ b/drivers/net/dsa/sja1105/sja1105_main.c
@@ -1741,7 +1741,8 @@ static void sja1105_teardown(struct dsa_switch *ds)
if (!dsa_is_user_port(ds, port))
continue;
- kthread_destroy_worker(sp->xmit_worker);
+ if (sp->xmit_worker)
+ kthread_destroy_worker(sp->xmit_worker);
}
sja1105_tas_teardown(ds);
diff --git a/drivers/net/ethernet/amazon/ena/ena_com.c b/drivers/net/ethernet/amazon/ena/ena_com.c
index ea62604..1fb58f9 100644
--- a/drivers/net/ethernet/amazon/ena/ena_com.c
+++ b/drivers/net/ethernet/amazon/ena/ena_com.c
@@ -200,6 +200,11 @@ static void comp_ctxt_release(struct ena_com_admin_queue *queue,
static struct ena_comp_ctx *get_comp_ctxt(struct ena_com_admin_queue *queue,
u16 command_id, bool capture)
{
+ if (unlikely(!queue->comp_ctx)) {
+ pr_err("Completion context is NULL\n");
+ return NULL;
+ }
+
if (unlikely(command_id >= queue->q_depth)) {
pr_err("command id is larger than the queue size. cmd_id: %u queue size %d\n",
command_id, queue->q_depth);
@@ -1041,9 +1046,41 @@ static int ena_com_get_feature(struct ena_com_dev *ena_dev,
feature_ver);
}
+int ena_com_get_current_hash_function(struct ena_com_dev *ena_dev)
+{
+ return ena_dev->rss.hash_func;
+}
+
+static void ena_com_hash_key_fill_default_key(struct ena_com_dev *ena_dev)
+{
+ struct ena_admin_feature_rss_flow_hash_control *hash_key =
+ (ena_dev->rss).hash_key;
+
+ netdev_rss_key_fill(&hash_key->key, sizeof(hash_key->key));
+ /* The key is stored in the device in u32 array
+ * as well as the API requires the key to be passed in this
+ * format. Thus the size of our array should be divided by 4
+ */
+ hash_key->keys_num = sizeof(hash_key->key) / sizeof(u32);
+}
+
static int ena_com_hash_key_allocate(struct ena_com_dev *ena_dev)
{
struct ena_rss *rss = &ena_dev->rss;
+ struct ena_admin_feature_rss_flow_hash_control *hash_key;
+ struct ena_admin_get_feat_resp get_resp;
+ int rc;
+
+ hash_key = (ena_dev->rss).hash_key;
+
+ rc = ena_com_get_feature_ex(ena_dev, &get_resp,
+ ENA_ADMIN_RSS_HASH_FUNCTION,
+ ena_dev->rss.hash_key_dma_addr,
+ sizeof(ena_dev->rss.hash_key), 0);
+ if (unlikely(rc)) {
+ hash_key = NULL;
+ return -EOPNOTSUPP;
+ }
rss->hash_key =
dma_alloc_coherent(ena_dev->dmadev, sizeof(*rss->hash_key),
@@ -1254,30 +1291,6 @@ static int ena_com_ind_tbl_convert_to_device(struct ena_com_dev *ena_dev)
return 0;
}
-static int ena_com_ind_tbl_convert_from_device(struct ena_com_dev *ena_dev)
-{
- u16 dev_idx_to_host_tbl[ENA_TOTAL_NUM_QUEUES] = { (u16)-1 };
- struct ena_rss *rss = &ena_dev->rss;
- u8 idx;
- u16 i;
-
- for (i = 0; i < ENA_TOTAL_NUM_QUEUES; i++)
- dev_idx_to_host_tbl[ena_dev->io_sq_queues[i].idx] = i;
-
- for (i = 0; i < 1 << rss->tbl_log_size; i++) {
- if (rss->rss_ind_tbl[i].cq_idx > ENA_TOTAL_NUM_QUEUES)
- return -EINVAL;
- idx = (u8)rss->rss_ind_tbl[i].cq_idx;
-
- if (dev_idx_to_host_tbl[idx] > ENA_TOTAL_NUM_QUEUES)
- return -EINVAL;
-
- rss->host_rss_ind_tbl[i] = dev_idx_to_host_tbl[idx];
- }
-
- return 0;
-}
-
static void ena_com_update_intr_delay_resolution(struct ena_com_dev *ena_dev,
u16 intr_delay_resolution)
{
@@ -2297,15 +2310,16 @@ int ena_com_fill_hash_function(struct ena_com_dev *ena_dev,
switch (func) {
case ENA_ADMIN_TOEPLITZ:
- if (key_len > sizeof(hash_key->key)) {
- pr_err("key len (%hu) is bigger than the max supported (%zu)\n",
- key_len, sizeof(hash_key->key));
- return -EINVAL;
+ if (key) {
+ if (key_len != sizeof(hash_key->key)) {
+ pr_err("key len (%hu) doesn't equal the supported size (%zu)\n",
+ key_len, sizeof(hash_key->key));
+ return -EINVAL;
+ }
+ memcpy(hash_key->key, key, key_len);
+ rss->hash_init_val = init_val;
+ hash_key->keys_num = key_len >> 2;
}
-
- memcpy(hash_key->key, key, key_len);
- rss->hash_init_val = init_val;
- hash_key->keys_num = key_len >> 2;
break;
case ENA_ADMIN_CRC32:
rss->hash_init_val = init_val;
@@ -2342,7 +2356,11 @@ int ena_com_get_hash_function(struct ena_com_dev *ena_dev,
if (unlikely(rc))
return rc;
- rss->hash_func = get_resp.u.flow_hash_func.selected_func;
+ /* ffs() returns 1 in case the lsb is set */
+ rss->hash_func = ffs(get_resp.u.flow_hash_func.selected_func);
+ if (rss->hash_func)
+ rss->hash_func--;
+
if (func)
*func = rss->hash_func;
@@ -2606,10 +2624,6 @@ int ena_com_indirect_table_get(struct ena_com_dev *ena_dev, u32 *ind_tbl)
if (!ind_tbl)
return 0;
- rc = ena_com_ind_tbl_convert_from_device(ena_dev);
- if (unlikely(rc))
- return rc;
-
for (i = 0; i < (1 << rss->tbl_log_size); i++)
ind_tbl[i] = rss->host_rss_ind_tbl[i];
@@ -2626,9 +2640,15 @@ int ena_com_rss_init(struct ena_com_dev *ena_dev, u16 indr_tbl_log_size)
if (unlikely(rc))
goto err_indr_tbl;
+ /* The following function might return unsupported in case the
+ * device doesn't support setting the key / hash function. We can safely
+ * ignore this error and have indirection table support only.
+ */
rc = ena_com_hash_key_allocate(ena_dev);
- if (unlikely(rc))
+ if (unlikely(rc) && rc != -EOPNOTSUPP)
goto err_hash_key;
+ else if (rc != -EOPNOTSUPP)
+ ena_com_hash_key_fill_default_key(ena_dev);
rc = ena_com_hash_ctrl_init(ena_dev);
if (unlikely(rc))
diff --git a/drivers/net/ethernet/amazon/ena/ena_com.h b/drivers/net/ethernet/amazon/ena/ena_com.h
index 0ce37d5..469f298 100644
--- a/drivers/net/ethernet/amazon/ena/ena_com.h
+++ b/drivers/net/ethernet/amazon/ena/ena_com.h
@@ -44,6 +44,7 @@
#include <linux/spinlock.h>
#include <linux/types.h>
#include <linux/wait.h>
+#include <linux/netdevice.h>
#include "ena_common_defs.h"
#include "ena_admin_defs.h"
@@ -655,6 +656,14 @@ int ena_com_rss_init(struct ena_com_dev *ena_dev, u16 log_size);
*/
void ena_com_rss_destroy(struct ena_com_dev *ena_dev);
+/* ena_com_get_current_hash_function - Get RSS hash function
+ * @ena_dev: ENA communication layer struct
+ *
+ * Return the current hash function.
+ * @return: 0 or one of the ena_admin_hash_functions values.
+ */
+int ena_com_get_current_hash_function(struct ena_com_dev *ena_dev);
+
/* ena_com_fill_hash_function - Fill RSS hash function
* @ena_dev: ENA communication layer struct
* @func: The hash function (Toeplitz or crc)
diff --git a/drivers/net/ethernet/amazon/ena/ena_ethtool.c b/drivers/net/ethernet/amazon/ena/ena_ethtool.c
index b4e891d..ced1d57 100644
--- a/drivers/net/ethernet/amazon/ena/ena_ethtool.c
+++ b/drivers/net/ethernet/amazon/ena/ena_ethtool.c
@@ -636,6 +636,28 @@ static u32 ena_get_rxfh_key_size(struct net_device *netdev)
return ENA_HASH_KEY_SIZE;
}
+static int ena_indirection_table_get(struct ena_adapter *adapter, u32 *indir)
+{
+ struct ena_com_dev *ena_dev = adapter->ena_dev;
+ int i, rc;
+
+ if (!indir)
+ return 0;
+
+ rc = ena_com_indirect_table_get(ena_dev, indir);
+ if (rc)
+ return rc;
+
+ /* Our internal representation of the indices is: even indices
+ * for Tx and uneven indices for Rx. We need to convert the Rx
+ * indices to be consecutive
+ */
+ for (i = 0; i < ENA_RX_RSS_TABLE_SIZE; i++)
+ indir[i] = ENA_IO_RXQ_IDX_TO_COMBINED_IDX(indir[i]);
+
+ return rc;
+}
+
static int ena_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key,
u8 *hfunc)
{
@@ -644,11 +666,25 @@ static int ena_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key,
u8 func;
int rc;
- rc = ena_com_indirect_table_get(adapter->ena_dev, indir);
+ rc = ena_indirection_table_get(adapter, indir);
if (rc)
return rc;
+ /* We call this function in order to check if the device
+ * supports getting/setting the hash function.
+ */
rc = ena_com_get_hash_function(adapter->ena_dev, &ena_func, key);
+
+ if (rc) {
+ if (rc == -EOPNOTSUPP) {
+ key = NULL;
+ hfunc = NULL;
+ rc = 0;
+ }
+
+ return rc;
+ }
+
if (rc)
return rc;
@@ -657,7 +693,7 @@ static int ena_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key,
func = ETH_RSS_HASH_TOP;
break;
case ENA_ADMIN_CRC32:
- func = ETH_RSS_HASH_XOR;
+ func = ETH_RSS_HASH_CRC32;
break;
default:
netif_err(adapter, drv, netdev,
@@ -700,10 +736,13 @@ static int ena_set_rxfh(struct net_device *netdev, const u32 *indir,
}
switch (hfunc) {
+ case ETH_RSS_HASH_NO_CHANGE:
+ func = ena_com_get_current_hash_function(ena_dev);
+ break;
case ETH_RSS_HASH_TOP:
func = ENA_ADMIN_TOEPLITZ;
break;
- case ETH_RSS_HASH_XOR:
+ case ETH_RSS_HASH_CRC32:
func = ENA_ADMIN_CRC32;
break;
default:
@@ -814,6 +853,7 @@ static const struct ethtool_ops ena_ethtool_ops = {
.set_channels = ena_set_channels,
.get_tunable = ena_get_tunable,
.set_tunable = ena_set_tunable,
+ .get_ts_info = ethtool_op_get_ts_info,
};
void ena_set_ethtool_ops(struct net_device *netdev)
diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
index 894e8c1..cada6e7 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
@@ -1018,13 +1018,9 @@ static int ena_refill_rx_bufs(struct ena_ring *rx_ring, u32 num)
struct ena_rx_buffer *rx_info;
req_id = rx_ring->free_ids[next_to_use];
- rc = validate_rx_req_id(rx_ring, req_id);
- if (unlikely(rc < 0))
- break;
rx_info = &rx_ring->rx_buffer_info[req_id];
-
rc = ena_alloc_rx_page(rx_ring, rx_info,
GFP_ATOMIC | __GFP_COMP);
if (unlikely(rc < 0)) {
@@ -1379,9 +1375,15 @@ static struct sk_buff *ena_rx_skb(struct ena_ring *rx_ring,
struct ena_rx_buffer *rx_info;
u16 len, req_id, buf = 0;
void *va;
+ int rc;
len = ena_bufs[buf].len;
req_id = ena_bufs[buf].req_id;
+
+ rc = validate_rx_req_id(rx_ring, req_id);
+ if (unlikely(rc < 0))
+ return NULL;
+
rx_info = &rx_ring->rx_buffer_info[req_id];
if (unlikely(!rx_info->page)) {
@@ -1454,6 +1456,11 @@ static struct sk_buff *ena_rx_skb(struct ena_ring *rx_ring,
buf++;
len = ena_bufs[buf].len;
req_id = ena_bufs[buf].req_id;
+
+ rc = validate_rx_req_id(rx_ring, req_id);
+ if (unlikely(rc < 0))
+ return NULL;
+
rx_info = &rx_ring->rx_buffer_info[req_id];
} while (1);
@@ -1968,7 +1975,7 @@ static int ena_enable_msix(struct ena_adapter *adapter)
}
/* Reserved the max msix vectors we might need */
- msix_vecs = ENA_MAX_MSIX_VEC(adapter->num_io_queues);
+ msix_vecs = ENA_MAX_MSIX_VEC(adapter->max_num_io_queues);
netif_dbg(adapter, probe, adapter->netdev,
"trying to enable MSI-X, vectors %d\n", msix_vecs);
@@ -2068,6 +2075,7 @@ static int ena_request_mgmnt_irq(struct ena_adapter *adapter)
static int ena_request_io_irq(struct ena_adapter *adapter)
{
+ u32 io_queue_count = adapter->num_io_queues + adapter->xdp_num_queues;
unsigned long flags = 0;
struct ena_irq *irq;
int rc = 0, i, k;
@@ -2078,7 +2086,7 @@ static int ena_request_io_irq(struct ena_adapter *adapter)
return -EINVAL;
}
- for (i = ENA_IO_IRQ_FIRST_IDX; i < adapter->msix_vecs; i++) {
+ for (i = ENA_IO_IRQ_FIRST_IDX; i < ENA_MAX_MSIX_VEC(io_queue_count); i++) {
irq = &adapter->irq_tbl[i];
rc = request_irq(irq->vector, irq->handler, flags, irq->name,
irq->data);
@@ -2119,6 +2127,7 @@ static void ena_free_mgmnt_irq(struct ena_adapter *adapter)
static void ena_free_io_irq(struct ena_adapter *adapter)
{
+ u32 io_queue_count = adapter->num_io_queues + adapter->xdp_num_queues;
struct ena_irq *irq;
int i;
@@ -2129,7 +2138,7 @@ static void ena_free_io_irq(struct ena_adapter *adapter)
}
#endif /* CONFIG_RFS_ACCEL */
- for (i = ENA_IO_IRQ_FIRST_IDX; i < adapter->msix_vecs; i++) {
+ for (i = ENA_IO_IRQ_FIRST_IDX; i < ENA_MAX_MSIX_VEC(io_queue_count); i++) {
irq = &adapter->irq_tbl[i];
irq_set_affinity_hint(irq->vector, NULL);
free_irq(irq->vector, irq->data);
@@ -2144,12 +2153,13 @@ static void ena_disable_msix(struct ena_adapter *adapter)
static void ena_disable_io_intr_sync(struct ena_adapter *adapter)
{
+ u32 io_queue_count = adapter->num_io_queues + adapter->xdp_num_queues;
int i;
if (!netif_running(adapter->netdev))
return;
- for (i = ENA_IO_IRQ_FIRST_IDX; i < adapter->msix_vecs; i++)
+ for (i = ENA_IO_IRQ_FIRST_IDX; i < ENA_MAX_MSIX_VEC(io_queue_count); i++)
synchronize_irq(adapter->irq_tbl[i].vector);
}
@@ -3476,6 +3486,7 @@ static int ena_restore_device(struct ena_adapter *adapter)
netif_carrier_on(adapter->netdev);
mod_timer(&adapter->timer_service, round_jiffies(jiffies + HZ));
+ adapter->last_keep_alive_jiffies = jiffies;
dev_err(&pdev->dev,
"Device reset completed successfully, Driver info: %s\n",
version);
@@ -3706,8 +3717,8 @@ static void check_for_missing_keep_alive(struct ena_adapter *adapter)
if (adapter->keep_alive_timeout == ENA_HW_HINTS_NO_TIMEOUT)
return;
- keep_alive_expired = round_jiffies(adapter->last_keep_alive_jiffies +
- adapter->keep_alive_timeout);
+ keep_alive_expired = adapter->last_keep_alive_jiffies +
+ adapter->keep_alive_timeout;
if (unlikely(time_is_before_jiffies(keep_alive_expired))) {
netif_err(adapter, drv, adapter->netdev,
"Keep alive watchdog timeout.\n");
@@ -3809,7 +3820,7 @@ static void ena_timer_service(struct timer_list *t)
}
/* Reset the timer */
- mod_timer(&adapter->timer_service, jiffies + HZ);
+ mod_timer(&adapter->timer_service, round_jiffies(jiffies + HZ));
}
static int ena_calc_max_io_queue_num(struct pci_dev *pdev,
@@ -4325,13 +4336,15 @@ static int ena_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
/*****************************************************************************/
-/* ena_remove - Device Removal Routine
+/* __ena_shutoff - Helper used in both PCI remove/shutdown routines
* @pdev: PCI device information struct
+ * @shutdown: Is it a shutdown operation? If false, means it is a removal
*
- * ena_remove is called by the PCI subsystem to alert the driver
- * that it should release a PCI device.
+ * __ena_shutoff is a helper routine that does the real work on shutdown and
+ * removal paths; the difference between those paths is with regards to whether
+ * dettach or unregister the netdevice.
*/
-static void ena_remove(struct pci_dev *pdev)
+static void __ena_shutoff(struct pci_dev *pdev, bool shutdown)
{
struct ena_adapter *adapter = pci_get_drvdata(pdev);
struct ena_com_dev *ena_dev;
@@ -4350,13 +4363,17 @@ static void ena_remove(struct pci_dev *pdev)
cancel_work_sync(&adapter->reset_task);
- rtnl_lock();
+ rtnl_lock(); /* lock released inside the below if-else block */
ena_destroy_device(adapter, true);
- rtnl_unlock();
-
- unregister_netdev(netdev);
-
- free_netdev(netdev);
+ if (shutdown) {
+ netif_device_detach(netdev);
+ dev_close(netdev);
+ rtnl_unlock();
+ } else {
+ rtnl_unlock();
+ unregister_netdev(netdev);
+ free_netdev(netdev);
+ }
ena_com_rss_destroy(ena_dev);
@@ -4371,6 +4388,30 @@ static void ena_remove(struct pci_dev *pdev)
vfree(ena_dev);
}
+/* ena_remove - Device Removal Routine
+ * @pdev: PCI device information struct
+ *
+ * ena_remove is called by the PCI subsystem to alert the driver
+ * that it should release a PCI device.
+ */
+
+static void ena_remove(struct pci_dev *pdev)
+{
+ __ena_shutoff(pdev, false);
+}
+
+/* ena_shutdown - Device Shutdown Routine
+ * @pdev: PCI device information struct
+ *
+ * ena_shutdown is called by the PCI subsystem to alert the driver that
+ * a shutdown/reboot (or kexec) is happening and device must be disabled.
+ */
+
+static void ena_shutdown(struct pci_dev *pdev)
+{
+ __ena_shutoff(pdev, true);
+}
+
#ifdef CONFIG_PM
/* ena_suspend - PM suspend callback
* @pdev: PCI device information struct
@@ -4420,6 +4461,7 @@ static struct pci_driver ena_pci_driver = {
.id_table = ena_pci_tbl,
.probe = ena_probe,
.remove = ena_remove,
+ .shutdown = ena_shutdown,
#ifdef CONFIG_PM
.suspend = ena_suspend,
.resume = ena_resume,
diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.h b/drivers/net/ethernet/amazon/ena/ena_netdev.h
index 094324f..8795e0b 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.h
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.h
@@ -130,6 +130,8 @@
#define ENA_IO_TXQ_IDX(q) (2 * (q))
#define ENA_IO_RXQ_IDX(q) (2 * (q) + 1)
+#define ENA_IO_TXQ_IDX_TO_COMBINED_IDX(q) ((q) / 2)
+#define ENA_IO_RXQ_IDX_TO_COMBINED_IDX(q) (((q) - 1) / 2)
#define ENA_MGMNT_IRQ_IDX 0
#define ENA_IO_IRQ_FIRST_IDX 1
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c b/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c
index a1f99be..7b55633 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c
@@ -722,6 +722,11 @@ static int aq_ethtool_set_priv_flags(struct net_device *ndev, u32 flags)
if (flags & ~AQ_PRIV_FLAGS_MASK)
return -EOPNOTSUPP;
+ if (hweight32((flags | priv_flags) & AQ_HW_LOOPBACK_MASK) > 1) {
+ netdev_info(ndev, "Can't enable more than one loopback simultaneously\n");
+ return -EINVAL;
+ }
+
cfg->priv_flags = flags;
if ((priv_flags ^ flags) & BIT(AQ_HW_LOOPBACK_DMA_NET)) {
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_filters.c b/drivers/net/ethernet/aquantia/atlantic/aq_filters.c
index 6102251..03ff92b 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_filters.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_filters.c
@@ -163,7 +163,7 @@ aq_check_approve_fvlan(struct aq_nic_s *aq_nic,
}
if ((aq_nic->ndev->features & NETIF_F_HW_VLAN_CTAG_FILTER) &&
- (!test_bit(be16_to_cpu(fsp->h_ext.vlan_tci),
+ (!test_bit(be16_to_cpu(fsp->h_ext.vlan_tci) & VLAN_VID_MASK,
aq_nic->active_vlans))) {
netdev_err(aq_nic->ndev,
"ethtool: unknown vlan-id specified");
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_hw.h b/drivers/net/ethernet/aquantia/atlantic/aq_hw.h
index cc70c60..251767c 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_hw.h
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_hw.h
@@ -337,6 +337,8 @@ struct aq_fw_ops {
void (*enable_ptp)(struct aq_hw_s *self, int enable);
+ void (*adjust_ptp)(struct aq_hw_s *self, uint64_t adj);
+
int (*set_eee_rate)(struct aq_hw_s *self, u32 speed);
int (*get_eee_rate)(struct aq_hw_s *self, u32 *rate,
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
index c85e3e2..e95f6a6 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
@@ -533,8 +533,10 @@ unsigned int aq_nic_map_skb(struct aq_nic_s *self, struct sk_buff *skb,
dx_buff->len,
DMA_TO_DEVICE);
- if (unlikely(dma_mapping_error(aq_nic_get_dev(self), dx_buff->pa)))
+ if (unlikely(dma_mapping_error(aq_nic_get_dev(self), dx_buff->pa))) {
+ ret = 0;
goto exit;
+ }
first = dx_buff;
dx_buff->len_pkt = skb->len;
@@ -655,10 +657,6 @@ int aq_nic_xmit(struct aq_nic_s *self, struct sk_buff *skb)
if (likely(frags)) {
err = self->aq_hw_ops->hw_ring_tx_xmit(self->aq_hw,
ring, frags);
- if (err >= 0) {
- ++ring->stats.tx.packets;
- ring->stats.tx.bytes += skb->len;
- }
} else {
err = NETDEV_TX_BUSY;
}
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c b/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c
index 6b27af0..78b6f32 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c
@@ -359,7 +359,8 @@ static int aq_suspend_common(struct device *dev, bool deep)
netif_device_detach(nic->ndev);
netif_tx_stop_all_queues(nic->ndev);
- aq_nic_stop(nic);
+ if (netif_running(nic->ndev))
+ aq_nic_stop(nic);
if (deep) {
aq_nic_deinit(nic, !nic->aq_hw->aq_nic_cfg->wol);
@@ -375,7 +376,7 @@ static int atl_resume_common(struct device *dev, bool deep)
{
struct pci_dev *pdev = to_pci_dev(dev);
struct aq_nic_s *nic;
- int ret;
+ int ret = 0;
nic = pci_get_drvdata(pdev);
@@ -390,9 +391,11 @@ static int atl_resume_common(struct device *dev, bool deep)
goto err_exit;
}
- ret = aq_nic_start(nic);
- if (ret)
- goto err_exit;
+ if (netif_running(nic->ndev)) {
+ ret = aq_nic_start(nic);
+ if (ret)
+ goto err_exit;
+ }
netif_device_attach(nic->ndev);
netif_tx_start_all_queues(nic->ndev);
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c
index 951d86f..bae95a6 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c
@@ -272,9 +272,12 @@ bool aq_ring_tx_clean(struct aq_ring_s *self)
}
}
- if (unlikely(buff->is_eop))
- dev_kfree_skb_any(buff->skb);
+ if (unlikely(buff->is_eop)) {
+ ++self->stats.rx.packets;
+ self->stats.tx.bytes += buff->skb->len;
+ dev_kfree_skb_any(buff->skb);
+ }
buff->pa = 0U;
buff->eop_index = 0xffffU;
self->sw_head = aq_ring_next_dx(self, self->sw_head);
@@ -351,7 +354,8 @@ int aq_ring_rx_clean(struct aq_ring_s *self,
err = 0;
goto err_exit;
}
- if (buff->is_error || buff->is_cso_err) {
+ if (buff->is_error ||
+ (buff->is_lro && buff->is_cso_err)) {
buff_ = buff;
do {
next_ = buff_->next,
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.h b/drivers/net/ethernet/aquantia/atlantic/aq_ring.h
index 991e4d3..2c96f20 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.h
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.h
@@ -78,7 +78,8 @@ struct __packed aq_ring_buff_s {
u32 is_cleaned:1;
u32 is_error:1;
u32 is_vlan:1;
- u32 rsvd3:4;
+ u32 is_lro:1;
+ u32 rsvd3:3;
u16 eop_index;
u16 rsvd4;
};
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
index ec041f7..d20d91c 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
@@ -823,6 +823,8 @@ static int hw_atl_b0_hw_ring_rx_receive(struct aq_hw_s *self,
}
}
+ buff->is_lro = !!(HW_ATL_B0_RXD_WB_STAT2_RSCCNT &
+ rxd_wb->status);
if (HW_ATL_B0_RXD_WB_STAT2_EOP & rxd_wb->status) {
buff->len = rxd_wb->pkt_len %
AQ_CFG_RX_FRAME_MAX;
@@ -835,8 +837,7 @@ static int hw_atl_b0_hw_ring_rx_receive(struct aq_hw_s *self,
rxd_wb->pkt_len > AQ_CFG_RX_FRAME_MAX ?
AQ_CFG_RX_FRAME_MAX : rxd_wb->pkt_len;
- if (HW_ATL_B0_RXD_WB_STAT2_RSCCNT &
- rxd_wb->status) {
+ if (buff->is_lro) {
/* LRO */
buff->next = rxd_wb->next_desc_ptr;
++ring->stats.rx.lro_packets;
@@ -884,13 +885,16 @@ static int hw_atl_b0_hw_packet_filter_set(struct aq_hw_s *self,
{
struct aq_nic_cfg_s *cfg = self->aq_nic_cfg;
unsigned int i = 0U;
+ u32 vlan_promisc;
+ u32 l2_promisc;
- hw_atl_rpfl2promiscuous_mode_en_set(self,
- IS_FILTER_ENABLED(IFF_PROMISC));
+ l2_promisc = IS_FILTER_ENABLED(IFF_PROMISC) ||
+ !!(cfg->priv_flags & BIT(AQ_HW_LOOPBACK_DMA_NET));
+ vlan_promisc = l2_promisc || cfg->is_vlan_force_promisc;
- hw_atl_rpf_vlan_prom_mode_en_set(self,
- IS_FILTER_ENABLED(IFF_PROMISC) ||
- cfg->is_vlan_force_promisc);
+ hw_atl_rpfl2promiscuous_mode_en_set(self, l2_promisc);
+
+ hw_atl_rpf_vlan_prom_mode_en_set(self, vlan_promisc);
hw_atl_rpfl2multicast_flr_en_set(self,
IS_FILTER_ENABLED(IFF_ALLMULTI) &&
@@ -1161,6 +1165,8 @@ static int hw_atl_b0_adj_sys_clock(struct aq_hw_s *self, s64 delta)
{
self->ptp_clk_offset += delta;
+ self->aq_fw_ops->adjust_ptp(self, self->ptp_clk_offset);
+
return 0;
}
@@ -1211,7 +1217,7 @@ static int hw_atl_b0_gpio_pulse(struct aq_hw_s *self, u32 index,
fwreq.ptp_gpio_ctrl.index = index;
fwreq.ptp_gpio_ctrl.period = period;
/* Apply time offset */
- fwreq.ptp_gpio_ctrl.start = start - self->ptp_clk_offset;
+ fwreq.ptp_gpio_ctrl.start = start;
size = sizeof(fwreq.msg_id) + sizeof(fwreq.ptp_gpio_ctrl);
return self->aq_fw_ops->send_fw_request(self, &fwreq, size);
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c
index f547baa..354705f 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c
@@ -22,6 +22,7 @@
#define HW_ATL_MIF_ADDR 0x0208U
#define HW_ATL_MIF_VAL 0x020CU
+#define HW_ATL_MPI_RPC_ADDR 0x0334U
#define HW_ATL_RPC_CONTROL_ADR 0x0338U
#define HW_ATL_RPC_STATE_ADR 0x033CU
@@ -53,15 +54,14 @@ enum mcp_area {
};
static int hw_atl_utils_ver_match(u32 ver_expected, u32 ver_actual);
-
static int hw_atl_utils_mpi_set_state(struct aq_hw_s *self,
enum hal_atl_utils_fw_state_e state);
-
static u32 hw_atl_utils_get_mpi_mbox_tid(struct aq_hw_s *self);
static u32 hw_atl_utils_mpi_get_state(struct aq_hw_s *self);
static u32 hw_atl_utils_mif_cmd_get(struct aq_hw_s *self);
static u32 hw_atl_utils_mif_addr_get(struct aq_hw_s *self);
static u32 hw_atl_utils_rpc_state_get(struct aq_hw_s *self);
+static u32 aq_fw1x_rpc_get(struct aq_hw_s *self);
int hw_atl_utils_initfw(struct aq_hw_s *self, const struct aq_fw_ops **fw_ops)
{
@@ -476,6 +476,10 @@ static int hw_atl_utils_init_ucp(struct aq_hw_s *self,
self, self->mbox_addr,
self->mbox_addr != 0U,
1000U, 10000U);
+ err = readx_poll_timeout_atomic(aq_fw1x_rpc_get, self,
+ self->rpc_addr,
+ self->rpc_addr != 0U,
+ 1000U, 100000U);
return err;
}
@@ -531,6 +535,12 @@ int hw_atl_utils_fw_rpc_wait(struct aq_hw_s *self,
self, fw.val,
sw.tid == fw.tid,
1000U, 100000U);
+ if (err < 0)
+ goto err_exit;
+
+ err = aq_hw_err_from_flags(self);
+ if (err < 0)
+ goto err_exit;
if (fw.len == 0xFFFFU) {
err = hw_atl_utils_fw_rpc_call(self, sw.len);
@@ -1025,6 +1035,11 @@ static u32 hw_atl_utils_rpc_state_get(struct aq_hw_s *self)
return aq_hw_read_reg(self, HW_ATL_RPC_STATE_ADR);
}
+static u32 aq_fw1x_rpc_get(struct aq_hw_s *self)
+{
+ return aq_hw_read_reg(self, HW_ATL_MPI_RPC_ADDR);
+}
+
const struct aq_fw_ops aq_fw_1x_ops = {
.init = hw_atl_utils_mpi_create,
.deinit = hw_atl_fw1x_deinit,
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c
index 97ebf84..77a4ed6 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c
@@ -30,6 +30,9 @@
#define HW_ATL_FW3X_EXT_CONTROL_ADDR 0x378
#define HW_ATL_FW3X_EXT_STATE_ADDR 0x37c
+#define HW_ATL_FW3X_PTP_ADJ_LSW_ADDR 0x50a0
+#define HW_ATL_FW3X_PTP_ADJ_MSW_ADDR 0x50a4
+
#define HW_ATL_FW2X_CAP_PAUSE BIT(CAPS_HI_PAUSE)
#define HW_ATL_FW2X_CAP_ASYM_PAUSE BIT(CAPS_HI_ASYMMETRIC_PAUSE)
#define HW_ATL_FW2X_CAP_SLEEP_PROXY BIT(CAPS_HI_SLEEP_PROXY)
@@ -475,6 +478,14 @@ static void aq_fw3x_enable_ptp(struct aq_hw_s *self, int enable)
aq_hw_write_reg(self, HW_ATL_FW3X_EXT_CONTROL_ADDR, ptp_opts);
}
+static void aq_fw3x_adjust_ptp(struct aq_hw_s *self, uint64_t adj)
+{
+ aq_hw_write_reg(self, HW_ATL_FW3X_PTP_ADJ_LSW_ADDR,
+ (adj >> 0) & 0xffffffff);
+ aq_hw_write_reg(self, HW_ATL_FW3X_PTP_ADJ_MSW_ADDR,
+ (adj >> 32) & 0xffffffff);
+}
+
static int aq_fw2x_led_control(struct aq_hw_s *self, u32 mode)
{
if (self->fw_ver_actual < HW_ATL_FW_VER_LED)
@@ -633,4 +644,5 @@ const struct aq_fw_ops aq_fw_2x_ops = {
.enable_ptp = aq_fw3x_enable_ptp,
.led_control = aq_fw2x_led_control,
.set_phyloopback = aq_fw2x_set_phyloopback,
+ .adjust_ptp = aq_fw3x_adjust_ptp,
};
diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c
index e0611cb..15b31cd 100644
--- a/drivers/net/ethernet/broadcom/bcmsysport.c
+++ b/drivers/net/ethernet/broadcom/bcmsysport.c
@@ -2135,7 +2135,7 @@ static int bcm_sysport_rule_set(struct bcm_sysport_priv *priv,
return -ENOSPC;
index = find_first_zero_bit(priv->filters, RXCHK_BRCM_TAG_MAX);
- if (index > RXCHK_BRCM_TAG_MAX)
+ if (index >= RXCHK_BRCM_TAG_MAX)
return -ENOSPC;
/* Location is the classification ID, and index is the position
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 597e6fd..d28b406 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -6880,12 +6880,12 @@ static int bnxt_alloc_ctx_mem(struct bnxt *bp)
}
ena |= FUNC_BACKING_STORE_CFG_REQ_DFLT_ENABLES;
rc = bnxt_hwrm_func_backing_store_cfg(bp, ena);
- if (rc)
+ if (rc) {
netdev_err(bp->dev, "Failed configuring context mem, rc = %d.\n",
rc);
- else
- ctx->flags |= BNXT_CTX_FLAG_INITED;
-
+ return rc;
+ }
+ ctx->flags |= BNXT_CTX_FLAG_INITED;
return 0;
}
@@ -7406,14 +7406,22 @@ static int bnxt_hwrm_port_qstats_ext(struct bnxt *bp)
pri2cos = &resp2->pri0_cos_queue_id;
for (i = 0; i < 8; i++) {
u8 queue_id = pri2cos[i];
+ u8 queue_idx;
+ /* Per port queue IDs start from 0, 10, 20, etc */
+ queue_idx = queue_id % 10;
+ if (queue_idx > BNXT_MAX_QUEUE) {
+ bp->pri2cos_valid = false;
+ goto qstats_done;
+ }
for (j = 0; j < bp->max_q; j++) {
if (bp->q_ids[j] == queue_id)
- bp->pri2cos[i] = j;
+ bp->pri2cos_idx[i] = queue_idx;
}
}
bp->pri2cos_valid = 1;
}
+qstats_done:
mutex_unlock(&bp->hwrm_cmd_lock);
return rc;
}
@@ -10982,13 +10990,13 @@ static int bnxt_change_mtu(struct net_device *dev, int new_mtu)
struct bnxt *bp = netdev_priv(dev);
if (netif_running(dev))
- bnxt_close_nic(bp, false, false);
+ bnxt_close_nic(bp, true, false);
dev->mtu = new_mtu;
bnxt_set_ring_params(bp);
if (netif_running(dev))
- return bnxt_open_nic(bp, false, false);
+ return bnxt_open_nic(bp, true, false);
return 0;
}
@@ -11252,7 +11260,7 @@ static void bnxt_cfg_ntp_filters(struct bnxt *bp)
}
}
if (test_and_clear_bit(BNXT_HWRM_PF_UNLOAD_SP_EVENT, &bp->sp_event))
- netdev_info(bp->dev, "Receive PF driver unload event!");
+ netdev_info(bp->dev, "Receive PF driver unload event!\n");
}
#else
@@ -11669,6 +11677,10 @@ static int bnxt_set_dflt_rings(struct bnxt *bp, bool sh)
bp->rx_nr_rings++;
bp->cp_nr_rings++;
}
+ if (rc) {
+ bp->tx_nr_rings = 0;
+ bp->rx_nr_rings = 0;
+ }
return rc;
}
@@ -11759,7 +11771,7 @@ static int bnxt_pcie_dsn_get(struct bnxt *bp, u8 dsn[])
u32 dw;
if (!pos) {
- netdev_info(bp->dev, "Unable do read adapter's DSN");
+ netdev_info(bp->dev, "Unable do read adapter's DSN\n");
return -EOPNOTSUPP;
}
@@ -11786,6 +11798,14 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
if (version_printed++ == 0)
pr_info("%s", version);
+ /* Clear any pending DMA transactions from crash kernel
+ * while loading driver in capture kernel.
+ */
+ if (is_kdump_kernel()) {
+ pci_clear_master(pdev);
+ pcie_flr(pdev);
+ }
+
max_irqs = bnxt_get_max_irq(pdev);
dev = alloc_etherdev_mq(sizeof(*bp), max_irqs);
if (!dev)
@@ -11954,12 +11974,12 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
bnxt_hwrm_func_drv_unrgtr(bp);
bnxt_free_hwrm_short_cmd_req(bp);
bnxt_free_hwrm_resources(bp);
- bnxt_free_ctx_mem(bp);
- kfree(bp->ctx);
- bp->ctx = NULL;
kfree(bp->fw_health);
bp->fw_health = NULL;
bnxt_cleanup_pci(bp);
+ bnxt_free_ctx_mem(bp);
+ kfree(bp->ctx);
+ bp->ctx = NULL;
init_err_free:
free_netdev(dev);
@@ -11983,10 +12003,10 @@ static void bnxt_shutdown(struct pci_dev *pdev)
dev_close(dev);
bnxt_ulp_shutdown(bp);
+ bnxt_clear_int_mode(bp);
+ pci_disable_device(pdev);
if (system_state == SYSTEM_POWER_OFF) {
- bnxt_clear_int_mode(bp);
- pci_disable_device(pdev);
pci_wake_from_d3(pdev, bp->wol);
pci_set_power_state(pdev, PCI_D3hot);
}
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index cabef0b..63b1706 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -1716,7 +1716,7 @@ struct bnxt {
u16 fw_rx_stats_ext_size;
u16 fw_tx_stats_ext_size;
u16 hw_ring_stats_size;
- u8 pri2cos[8];
+ u8 pri2cos_idx[8];
u8 pri2cos_valid;
u16 hwrm_max_req_len;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c
index fb6f30d..b1511bc 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c
@@ -479,24 +479,26 @@ static int bnxt_dcbnl_ieee_getets(struct net_device *dev, struct ieee_ets *ets)
{
struct bnxt *bp = netdev_priv(dev);
struct ieee_ets *my_ets = bp->ieee_ets;
+ int rc;
ets->ets_cap = bp->max_tc;
if (!my_ets) {
- int rc;
-
if (bp->dcbx_cap & DCB_CAP_DCBX_HOST)
return 0;
my_ets = kzalloc(sizeof(*my_ets), GFP_KERNEL);
if (!my_ets)
- return 0;
+ return -ENOMEM;
rc = bnxt_hwrm_queue_cos2bw_qcfg(bp, my_ets);
if (rc)
- return 0;
+ goto error;
rc = bnxt_hwrm_queue_pri2cos_qcfg(bp, my_ets);
if (rc)
- return 0;
+ goto error;
+
+ /* cache result */
+ bp->ieee_ets = my_ets;
}
ets->cbs = my_ets->cbs;
@@ -505,6 +507,9 @@ static int bnxt_dcbnl_ieee_getets(struct net_device *dev, struct ieee_ets *ets)
memcpy(ets->tc_tsa, my_ets->tc_tsa, sizeof(ets->tc_tsa));
memcpy(ets->prio_tc, my_ets->prio_tc, sizeof(ets->prio_tc));
return 0;
+error:
+ kfree(my_ets);
+ return rc;
}
static int bnxt_dcbnl_ieee_setets(struct net_device *dev, struct ieee_ets *ets)
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c
index eec0168..d3c93cc 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c
@@ -641,14 +641,14 @@ static int bnxt_dl_params_register(struct bnxt *bp)
rc = devlink_params_register(bp->dl, bnxt_dl_params,
ARRAY_SIZE(bnxt_dl_params));
if (rc) {
- netdev_warn(bp->dev, "devlink_params_register failed. rc=%d",
+ netdev_warn(bp->dev, "devlink_params_register failed. rc=%d\n",
rc);
return rc;
}
rc = devlink_port_params_register(&bp->dl_port, bnxt_dl_port_params,
ARRAY_SIZE(bnxt_dl_port_params));
if (rc) {
- netdev_err(bp->dev, "devlink_port_params_register failed");
+ netdev_err(bp->dev, "devlink_port_params_register failed\n");
devlink_params_unregister(bp->dl, bnxt_dl_params,
ARRAY_SIZE(bnxt_dl_params));
return rc;
@@ -679,7 +679,7 @@ int bnxt_dl_register(struct bnxt *bp)
else
dl = devlink_alloc(&bnxt_vf_dl_ops, sizeof(struct bnxt_dl));
if (!dl) {
- netdev_warn(bp->dev, "devlink_alloc failed");
+ netdev_warn(bp->dev, "devlink_alloc failed\n");
return -ENOMEM;
}
@@ -692,7 +692,7 @@ int bnxt_dl_register(struct bnxt *bp)
rc = devlink_register(dl, &bp->pdev->dev);
if (rc) {
- netdev_warn(bp->dev, "devlink_register failed. rc=%d", rc);
+ netdev_warn(bp->dev, "devlink_register failed. rc=%d\n", rc);
goto err_dl_free;
}
@@ -704,7 +704,7 @@ int bnxt_dl_register(struct bnxt *bp)
sizeof(bp->dsn));
rc = devlink_port_register(dl, &bp->dl_port, bp->pf.port_id);
if (rc) {
- netdev_err(bp->dev, "devlink_port_register failed");
+ netdev_err(bp->dev, "devlink_port_register failed\n");
goto err_dl_unreg;
}
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
index 6171fa8..3f8a1de 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
@@ -589,25 +589,25 @@ static void bnxt_get_ethtool_stats(struct net_device *dev,
if (bp->pri2cos_valid) {
for (i = 0; i < 8; i++, j++) {
long n = bnxt_rx_bytes_pri_arr[i].base_off +
- bp->pri2cos[i];
+ bp->pri2cos_idx[i];
buf[j] = le64_to_cpu(*(rx_port_stats_ext + n));
}
for (i = 0; i < 8; i++, j++) {
long n = bnxt_rx_pkts_pri_arr[i].base_off +
- bp->pri2cos[i];
+ bp->pri2cos_idx[i];
buf[j] = le64_to_cpu(*(rx_port_stats_ext + n));
}
for (i = 0; i < 8; i++, j++) {
long n = bnxt_tx_bytes_pri_arr[i].base_off +
- bp->pri2cos[i];
+ bp->pri2cos_idx[i];
buf[j] = le64_to_cpu(*(tx_port_stats_ext + n));
}
for (i = 0; i < 8; i++, j++) {
long n = bnxt_tx_pkts_pri_arr[i].base_off +
- bp->pri2cos[i];
+ bp->pri2cos_idx[i];
buf[j] = le64_to_cpu(*(tx_port_stats_ext + n));
}
@@ -2007,8 +2007,8 @@ int bnxt_flash_package_from_file(struct net_device *dev, const char *filename,
struct hwrm_nvm_install_update_output *resp = bp->hwrm_cmd_resp_addr;
struct hwrm_nvm_install_update_input install = {0};
const struct firmware *fw;
- int rc, hwrm_err = 0;
u32 item_len;
+ int rc = 0;
u16 index;
bnxt_hwrm_fw_set_time(bp);
@@ -2028,7 +2028,7 @@ int bnxt_flash_package_from_file(struct net_device *dev, const char *filename,
}
if (fw->size > item_len) {
- netdev_err(dev, "PKG insufficient update area in nvram: %lu",
+ netdev_err(dev, "PKG insufficient update area in nvram: %lu\n",
(unsigned long)fw->size);
rc = -EFBIG;
} else {
@@ -2052,15 +2052,14 @@ int bnxt_flash_package_from_file(struct net_device *dev, const char *filename,
memcpy(kmem, fw->data, fw->size);
modify.host_src_addr = cpu_to_le64(dma_handle);
- hwrm_err = hwrm_send_message(bp, &modify,
- sizeof(modify),
- FLASH_PACKAGE_TIMEOUT);
+ rc = hwrm_send_message(bp, &modify, sizeof(modify),
+ FLASH_PACKAGE_TIMEOUT);
dma_free_coherent(&bp->pdev->dev, fw->size, kmem,
dma_handle);
}
}
release_firmware(fw);
- if (rc || hwrm_err)
+ if (rc)
goto err_exit;
if ((install_type & 0xffff) == 0)
@@ -2069,20 +2068,19 @@ int bnxt_flash_package_from_file(struct net_device *dev, const char *filename,
install.install_type = cpu_to_le32(install_type);
mutex_lock(&bp->hwrm_cmd_lock);
- hwrm_err = _hwrm_send_message(bp, &install, sizeof(install),
- INSTALL_PACKAGE_TIMEOUT);
- if (hwrm_err) {
+ rc = _hwrm_send_message(bp, &install, sizeof(install),
+ INSTALL_PACKAGE_TIMEOUT);
+ if (rc) {
u8 error_code = ((struct hwrm_err_output *)resp)->cmd_err;
if (resp->error_code && error_code ==
NVM_INSTALL_UPDATE_CMD_ERR_CODE_FRAG_ERR) {
install.flags |= cpu_to_le16(
NVM_INSTALL_UPDATE_REQ_FLAGS_ALLOWED_TO_DEFRAG);
- hwrm_err = _hwrm_send_message(bp, &install,
- sizeof(install),
- INSTALL_PACKAGE_TIMEOUT);
+ rc = _hwrm_send_message(bp, &install, sizeof(install),
+ INSTALL_PACKAGE_TIMEOUT);
}
- if (hwrm_err)
+ if (rc)
goto flash_pkg_exit;
}
@@ -2094,7 +2092,7 @@ int bnxt_flash_package_from_file(struct net_device *dev, const char *filename,
flash_pkg_exit:
mutex_unlock(&bp->hwrm_cmd_lock);
err_exit:
- if (hwrm_err == -EACCES)
+ if (rc == -EACCES)
bnxt_print_admin_err(bp);
return rc;
}
@@ -3338,7 +3336,7 @@ static int bnxt_get_coredump(struct bnxt *bp, void *buf, u32 *dump_len)
kfree(coredump.data);
*dump_len += sizeof(struct bnxt_coredump_record);
if (rc == -ENOBUFS)
- netdev_err(bp->dev, "Firmware returned large coredump buffer");
+ netdev_err(bp->dev, "Firmware returned large coredump buffer\n");
return rc;
}
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c
index 0cc6ec5..9bec256 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c
@@ -50,7 +50,7 @@ static u16 bnxt_flow_get_dst_fid(struct bnxt *pf_bp, struct net_device *dev)
/* check if dev belongs to the same switch */
if (!netdev_port_same_parent_id(pf_bp->dev, dev)) {
- netdev_info(pf_bp->dev, "dev(ifindex=%d) not on same switch",
+ netdev_info(pf_bp->dev, "dev(ifindex=%d) not on same switch\n",
dev->ifindex);
return BNXT_FID_INVALID;
}
@@ -70,7 +70,7 @@ static int bnxt_tc_parse_redir(struct bnxt *bp,
struct net_device *dev = act->dev;
if (!dev) {
- netdev_info(bp->dev, "no dev in mirred action");
+ netdev_info(bp->dev, "no dev in mirred action\n");
return -EINVAL;
}
@@ -106,7 +106,7 @@ static int bnxt_tc_parse_tunnel_set(struct bnxt *bp,
const struct ip_tunnel_key *tun_key = &tun_info->key;
if (ip_tunnel_info_af(tun_info) != AF_INET) {
- netdev_info(bp->dev, "only IPv4 tunnel-encap is supported");
+ netdev_info(bp->dev, "only IPv4 tunnel-encap is supported\n");
return -EOPNOTSUPP;
}
@@ -295,7 +295,7 @@ static int bnxt_tc_parse_actions(struct bnxt *bp,
int i, rc;
if (!flow_action_has_entries(flow_action)) {
- netdev_info(bp->dev, "no actions");
+ netdev_info(bp->dev, "no actions\n");
return -EINVAL;
}
@@ -370,7 +370,7 @@ static int bnxt_tc_parse_flow(struct bnxt *bp,
/* KEY_CONTROL and KEY_BASIC are needed for forming a meaningful key */
if ((dissector->used_keys & BIT(FLOW_DISSECTOR_KEY_CONTROL)) == 0 ||
(dissector->used_keys & BIT(FLOW_DISSECTOR_KEY_BASIC)) == 0) {
- netdev_info(bp->dev, "cannot form TC key: used_keys = 0x%x",
+ netdev_info(bp->dev, "cannot form TC key: used_keys = 0x%x\n",
dissector->used_keys);
return -EOPNOTSUPP;
}
@@ -508,7 +508,7 @@ static int bnxt_hwrm_cfa_flow_free(struct bnxt *bp,
rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
if (rc)
- netdev_info(bp->dev, "%s: Error rc=%d", __func__, rc);
+ netdev_info(bp->dev, "%s: Error rc=%d\n", __func__, rc);
return rc;
}
@@ -841,7 +841,7 @@ static int hwrm_cfa_decap_filter_alloc(struct bnxt *bp,
resp = bnxt_get_hwrm_resp_addr(bp, &req);
*decap_filter_handle = resp->decap_filter_id;
} else {
- netdev_info(bp->dev, "%s: Error rc=%d", __func__, rc);
+ netdev_info(bp->dev, "%s: Error rc=%d\n", __func__, rc);
}
mutex_unlock(&bp->hwrm_cmd_lock);
@@ -859,7 +859,7 @@ static int hwrm_cfa_decap_filter_free(struct bnxt *bp,
rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
if (rc)
- netdev_info(bp->dev, "%s: Error rc=%d", __func__, rc);
+ netdev_info(bp->dev, "%s: Error rc=%d\n", __func__, rc);
return rc;
}
@@ -906,7 +906,7 @@ static int hwrm_cfa_encap_record_alloc(struct bnxt *bp,
resp = bnxt_get_hwrm_resp_addr(bp, &req);
*encap_record_handle = resp->encap_record_id;
} else {
- netdev_info(bp->dev, "%s: Error rc=%d", __func__, rc);
+ netdev_info(bp->dev, "%s: Error rc=%d\n", __func__, rc);
}
mutex_unlock(&bp->hwrm_cmd_lock);
@@ -924,7 +924,7 @@ static int hwrm_cfa_encap_record_free(struct bnxt *bp,
rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
if (rc)
- netdev_info(bp->dev, "%s: Error rc=%d", __func__, rc);
+ netdev_info(bp->dev, "%s: Error rc=%d\n", __func__, rc);
return rc;
}
@@ -943,7 +943,7 @@ static int bnxt_tc_put_l2_node(struct bnxt *bp,
tc_info->l2_ht_params);
if (rc)
netdev_err(bp->dev,
- "Error: %s: rhashtable_remove_fast: %d",
+ "Error: %s: rhashtable_remove_fast: %d\n",
__func__, rc);
kfree_rcu(l2_node, rcu);
}
@@ -972,7 +972,7 @@ bnxt_tc_get_l2_node(struct bnxt *bp, struct rhashtable *l2_table,
if (rc) {
kfree_rcu(l2_node, rcu);
netdev_err(bp->dev,
- "Error: %s: rhashtable_insert_fast: %d",
+ "Error: %s: rhashtable_insert_fast: %d\n",
__func__, rc);
return NULL;
}
@@ -1031,7 +1031,7 @@ static bool bnxt_tc_can_offload(struct bnxt *bp, struct bnxt_tc_flow *flow)
if ((flow->flags & BNXT_TC_FLOW_FLAGS_PORTS) &&
(flow->l4_key.ip_proto != IPPROTO_TCP &&
flow->l4_key.ip_proto != IPPROTO_UDP)) {
- netdev_info(bp->dev, "Cannot offload non-TCP/UDP (%d) ports",
+ netdev_info(bp->dev, "Cannot offload non-TCP/UDP (%d) ports\n",
flow->l4_key.ip_proto);
return false;
}
@@ -1088,7 +1088,7 @@ static int bnxt_tc_put_tunnel_node(struct bnxt *bp,
rc = rhashtable_remove_fast(tunnel_table, &tunnel_node->node,
*ht_params);
if (rc) {
- netdev_err(bp->dev, "rhashtable_remove_fast rc=%d", rc);
+ netdev_err(bp->dev, "rhashtable_remove_fast rc=%d\n", rc);
rc = -1;
}
kfree_rcu(tunnel_node, rcu);
@@ -1129,7 +1129,7 @@ bnxt_tc_get_tunnel_node(struct bnxt *bp, struct rhashtable *tunnel_table,
tunnel_node->refcount++;
return tunnel_node;
err:
- netdev_info(bp->dev, "error rc=%d", rc);
+ netdev_info(bp->dev, "error rc=%d\n", rc);
return NULL;
}
@@ -1187,7 +1187,7 @@ static void bnxt_tc_put_decap_l2_node(struct bnxt *bp,
&decap_l2_node->node,
tc_info->decap_l2_ht_params);
if (rc)
- netdev_err(bp->dev, "rhashtable_remove_fast rc=%d", rc);
+ netdev_err(bp->dev, "rhashtable_remove_fast rc=%d\n", rc);
kfree_rcu(decap_l2_node, rcu);
}
}
@@ -1227,7 +1227,7 @@ static int bnxt_tc_resolve_tunnel_hdrs(struct bnxt *bp,
rt = ip_route_output_key(dev_net(real_dst_dev), &flow);
if (IS_ERR(rt)) {
- netdev_info(bp->dev, "no route to %pI4b", &flow.daddr);
+ netdev_info(bp->dev, "no route to %pI4b\n", &flow.daddr);
return -EOPNOTSUPP;
}
@@ -1241,7 +1241,7 @@ static int bnxt_tc_resolve_tunnel_hdrs(struct bnxt *bp,
if (vlan->real_dev != real_dst_dev) {
netdev_info(bp->dev,
- "dst_dev(%s) doesn't use PF-if(%s)",
+ "dst_dev(%s) doesn't use PF-if(%s)\n",
netdev_name(dst_dev),
netdev_name(real_dst_dev));
rc = -EOPNOTSUPP;
@@ -1253,7 +1253,7 @@ static int bnxt_tc_resolve_tunnel_hdrs(struct bnxt *bp,
#endif
} else if (dst_dev != real_dst_dev) {
netdev_info(bp->dev,
- "dst_dev(%s) for %pI4b is not PF-if(%s)",
+ "dst_dev(%s) for %pI4b is not PF-if(%s)\n",
netdev_name(dst_dev), &flow.daddr,
netdev_name(real_dst_dev));
rc = -EOPNOTSUPP;
@@ -1262,7 +1262,7 @@ static int bnxt_tc_resolve_tunnel_hdrs(struct bnxt *bp,
nbr = dst_neigh_lookup(&rt->dst, &flow.daddr);
if (!nbr) {
- netdev_info(bp->dev, "can't lookup neighbor for %pI4b",
+ netdev_info(bp->dev, "can't lookup neighbor for %pI4b\n",
&flow.daddr);
rc = -EOPNOTSUPP;
goto put_rt;
@@ -1472,7 +1472,7 @@ static int __bnxt_tc_del_flow(struct bnxt *bp,
rc = rhashtable_remove_fast(&tc_info->flow_table, &flow_node->node,
tc_info->flow_ht_params);
if (rc)
- netdev_err(bp->dev, "Error: %s: rhashtable_remove_fast rc=%d",
+ netdev_err(bp->dev, "Error: %s: rhashtable_remove_fast rc=%d\n",
__func__, rc);
kfree_rcu(flow_node, rcu);
@@ -1587,7 +1587,7 @@ static int bnxt_tc_add_flow(struct bnxt *bp, u16 src_fid,
free_node:
kfree_rcu(new_node, rcu);
done:
- netdev_err(bp->dev, "Error: %s: cookie=0x%lx error=%d",
+ netdev_err(bp->dev, "Error: %s: cookie=0x%lx error=%d\n",
__func__, tc_flow_cmd->cookie, rc);
return rc;
}
@@ -1700,7 +1700,7 @@ bnxt_hwrm_cfa_flow_stats_get(struct bnxt *bp, int num_flows,
le64_to_cpu(resp_bytes[i]);
}
} else {
- netdev_info(bp->dev, "error rc=%d", rc);
+ netdev_info(bp->dev, "error rc=%d\n", rc);
}
mutex_unlock(&bp->hwrm_cmd_lock);
@@ -1970,7 +1970,7 @@ static int bnxt_tc_indr_block_event(struct notifier_block *nb,
bp);
if (rc)
netdev_info(bp->dev,
- "Failed to register indirect blk: dev: %s",
+ "Failed to register indirect blk: dev: %s\n",
netdev->name);
break;
case NETDEV_UNREGISTER:
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c
index b010b34..6f2faf8 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c
@@ -43,7 +43,7 @@ static int hwrm_cfa_vfr_alloc(struct bnxt *bp, u16 vf_idx,
netdev_dbg(bp->dev, "tx_cfa_action=0x%x, rx_cfa_code=0x%x",
*tx_cfa_action, *rx_cfa_code);
} else {
- netdev_info(bp->dev, "%s error rc=%d", __func__, rc);
+ netdev_info(bp->dev, "%s error rc=%d\n", __func__, rc);
}
mutex_unlock(&bp->hwrm_cmd_lock);
@@ -60,7 +60,7 @@ static int hwrm_cfa_vfr_free(struct bnxt *bp, u16 vf_idx)
rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
if (rc)
- netdev_info(bp->dev, "%s error rc=%d", __func__, rc);
+ netdev_info(bp->dev, "%s error rc=%d\n", __func__, rc);
return rc;
}
@@ -465,7 +465,7 @@ static int bnxt_vf_reps_create(struct bnxt *bp)
return 0;
err:
- netdev_info(bp->dev, "%s error=%d", __func__, rc);
+ netdev_info(bp->dev, "%s error=%d\n", __func__, rc);
kfree(cfa_code_map);
__bnxt_vf_reps_destroy(bp);
return rc;
@@ -488,7 +488,7 @@ int bnxt_dl_eswitch_mode_set(struct devlink *devlink, u16 mode,
mutex_lock(&bp->sriov_lock);
if (bp->eswitch_mode == mode) {
- netdev_info(bp->dev, "already in %s eswitch mode",
+ netdev_info(bp->dev, "already in %s eswitch mode\n",
mode == DEVLINK_ESWITCH_MODE_LEGACY ?
"legacy" : "switchdev");
rc = -EINVAL;
@@ -508,7 +508,7 @@ int bnxt_dl_eswitch_mode_set(struct devlink *devlink, u16 mode,
}
if (pci_num_vf(bp->pdev) == 0) {
- netdev_info(bp->dev, "Enable VFs before setting switchdev mode");
+ netdev_info(bp->dev, "Enable VFs before setting switchdev mode\n");
rc = -EPERM;
goto done;
}
diff --git a/drivers/net/ethernet/broadcom/cnic_defs.h b/drivers/net/ethernet/broadcom/cnic_defs.h
index b384997..99e2c6d 100644
--- a/drivers/net/ethernet/broadcom/cnic_defs.h
+++ b/drivers/net/ethernet/broadcom/cnic_defs.h
@@ -543,13 +543,13 @@ struct l4_kwq_update_pg {
#define L4_KWQ_UPDATE_PG_RESERVERD2_SHIFT 2
#endif
#if defined(__BIG_ENDIAN)
- u16 reserverd3;
+ u16 reserved3;
u8 da0;
u8 da1;
#elif defined(__LITTLE_ENDIAN)
u8 da1;
u8 da0;
- u16 reserverd3;
+ u16 reserved3;
#endif
#if defined(__BIG_ENDIAN)
u8 da2;
diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
index e50a153..1d678be 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
@@ -94,12 +94,6 @@ static inline void dmadesc_set_length_status(struct bcmgenet_priv *priv,
bcmgenet_writel(value, d + DMA_DESC_LENGTH_STATUS);
}
-static inline u32 dmadesc_get_length_status(struct bcmgenet_priv *priv,
- void __iomem *d)
-{
- return bcmgenet_readl(d + DMA_DESC_LENGTH_STATUS);
-}
-
static inline void dmadesc_set_addr(struct bcmgenet_priv *priv,
void __iomem *d,
dma_addr_t addr)
@@ -508,61 +502,6 @@ static int bcmgenet_set_link_ksettings(struct net_device *dev,
return phy_ethtool_ksettings_set(dev->phydev, cmd);
}
-static void bcmgenet_set_rx_csum(struct net_device *dev,
- netdev_features_t wanted)
-{
- struct bcmgenet_priv *priv = netdev_priv(dev);
- u32 rbuf_chk_ctrl;
- bool rx_csum_en;
-
- rx_csum_en = !!(wanted & NETIF_F_RXCSUM);
-
- rbuf_chk_ctrl = bcmgenet_rbuf_readl(priv, RBUF_CHK_CTRL);
-
- /* enable rx checksumming */
- if (rx_csum_en)
- rbuf_chk_ctrl |= RBUF_RXCHK_EN | RBUF_L3_PARSE_DIS;
- else
- rbuf_chk_ctrl &= ~RBUF_RXCHK_EN;
- priv->desc_rxchk_en = rx_csum_en;
-
- /* If UniMAC forwards CRC, we need to skip over it to get
- * a valid CHK bit to be set in the per-packet status word
- */
- if (rx_csum_en && priv->crc_fwd_en)
- rbuf_chk_ctrl |= RBUF_SKIP_FCS;
- else
- rbuf_chk_ctrl &= ~RBUF_SKIP_FCS;
-
- bcmgenet_rbuf_writel(priv, rbuf_chk_ctrl, RBUF_CHK_CTRL);
-}
-
-static void bcmgenet_set_tx_csum(struct net_device *dev,
- netdev_features_t wanted)
-{
- struct bcmgenet_priv *priv = netdev_priv(dev);
- bool desc_64b_en;
- u32 tbuf_ctrl, rbuf_ctrl;
-
- tbuf_ctrl = bcmgenet_tbuf_ctrl_get(priv);
- rbuf_ctrl = bcmgenet_rbuf_readl(priv, RBUF_CTRL);
-
- desc_64b_en = !!(wanted & NETIF_F_HW_CSUM);
-
- /* enable 64 bytes descriptor in both directions (RBUF and TBUF) */
- if (desc_64b_en) {
- tbuf_ctrl |= RBUF_64B_EN;
- rbuf_ctrl |= RBUF_64B_EN;
- } else {
- tbuf_ctrl &= ~RBUF_64B_EN;
- rbuf_ctrl &= ~RBUF_64B_EN;
- }
- priv->desc_64b_en = desc_64b_en;
-
- bcmgenet_tbuf_ctrl_set(priv, tbuf_ctrl);
- bcmgenet_rbuf_writel(priv, rbuf_ctrl, RBUF_CTRL);
-}
-
static int bcmgenet_set_features(struct net_device *dev,
netdev_features_t features)
{
@@ -578,9 +517,6 @@ static int bcmgenet_set_features(struct net_device *dev,
reg = bcmgenet_umac_readl(priv, UMAC_CMD);
priv->crc_fwd_en = !!(reg & CMD_CRC_FWD);
- bcmgenet_set_tx_csum(dev, features);
- bcmgenet_set_rx_csum(dev, features);
-
clk_disable_unprepare(priv->clk);
return ret;
@@ -1475,8 +1411,8 @@ static void bcmgenet_tx_reclaim_all(struct net_device *dev)
/* Reallocate the SKB to put enough headroom in front of it and insert
* the transmit checksum offsets in the descriptors
*/
-static struct sk_buff *bcmgenet_put_tx_csum(struct net_device *dev,
- struct sk_buff *skb)
+static struct sk_buff *bcmgenet_add_tsb(struct net_device *dev,
+ struct sk_buff *skb)
{
struct bcmgenet_priv *priv = netdev_priv(dev);
struct status_64 *status = NULL;
@@ -1590,13 +1526,11 @@ static netdev_tx_t bcmgenet_xmit(struct sk_buff *skb, struct net_device *dev)
*/
GENET_CB(skb)->bytes_sent = skb->len;
- /* set the SKB transmit checksum */
- if (priv->desc_64b_en) {
- skb = bcmgenet_put_tx_csum(dev, skb);
- if (!skb) {
- ret = NETDEV_TX_OK;
- goto out;
- }
+ /* add the Transmit Status Block */
+ skb = bcmgenet_add_tsb(dev, skb);
+ if (!skb) {
+ ret = NETDEV_TX_OK;
+ goto out;
}
for (i = 0; i <= nr_frags; i++) {
@@ -1775,6 +1709,9 @@ static unsigned int bcmgenet_desc_rx(struct bcmgenet_rx_ring *ring,
while ((rxpktprocessed < rxpkttoprocess) &&
(rxpktprocessed < budget)) {
+ struct status_64 *status;
+ __be16 rx_csum;
+
cb = &priv->rx_cbs[ring->read_ptr];
skb = bcmgenet_rx_refill(priv, cb);
@@ -1783,20 +1720,12 @@ static unsigned int bcmgenet_desc_rx(struct bcmgenet_rx_ring *ring,
goto next;
}
- if (!priv->desc_64b_en) {
- dma_length_status =
- dmadesc_get_length_status(priv, cb->bd_addr);
- } else {
- struct status_64 *status;
- __be16 rx_csum;
-
- status = (struct status_64 *)skb->data;
- dma_length_status = status->length_status;
+ status = (struct status_64 *)skb->data;
+ dma_length_status = status->length_status;
+ if (dev->features & NETIF_F_RXCSUM) {
rx_csum = (__force __be16)(status->rx_csum & 0xffff);
- if (priv->desc_rxchk_en) {
- skb->csum = (__force __wsum)ntohs(rx_csum);
- skb->ip_summed = CHECKSUM_COMPLETE;
- }
+ skb->csum = (__force __wsum)ntohs(rx_csum);
+ skb->ip_summed = CHECKSUM_COMPLETE;
}
/* DMA flags and length are still valid no matter how
@@ -1840,14 +1769,10 @@ static unsigned int bcmgenet_desc_rx(struct bcmgenet_rx_ring *ring,
} /* error packet */
skb_put(skb, len);
- if (priv->desc_64b_en) {
- skb_pull(skb, 64);
- len -= 64;
- }
- /* remove hardware 2bytes added for IP alignment */
- skb_pull(skb, 2);
- len -= 2;
+ /* remove RSB and hardware 2bytes added for IP alignment */
+ skb_pull(skb, 66);
+ len -= 66;
if (priv->crc_fwd_en) {
skb_trim(skb, len - ETH_FCS_LEN);
@@ -1965,6 +1890,8 @@ static void umac_enable_set(struct bcmgenet_priv *priv, u32 mask, bool enable)
u32 reg;
reg = bcmgenet_umac_readl(priv, UMAC_CMD);
+ if (reg & CMD_SW_RESET)
+ return;
if (enable)
reg |= mask;
else
@@ -1984,11 +1911,9 @@ static void reset_umac(struct bcmgenet_priv *priv)
bcmgenet_rbuf_ctrl_set(priv, 0);
udelay(10);
- /* disable MAC while updating its registers */
- bcmgenet_umac_writel(priv, 0, UMAC_CMD);
-
- /* issue soft reset with (rg)mii loopback to ensure a stable rxclk */
- bcmgenet_umac_writel(priv, CMD_SW_RESET | CMD_LCL_LOOP_EN, UMAC_CMD);
+ /* issue soft reset and disable MAC while updating its registers */
+ bcmgenet_umac_writel(priv, CMD_SW_RESET, UMAC_CMD);
+ udelay(2);
}
static void bcmgenet_intr_disable(struct bcmgenet_priv *priv)
@@ -2038,11 +1963,28 @@ static void init_umac(struct bcmgenet_priv *priv)
bcmgenet_umac_writel(priv, ENET_MAX_MTU_SIZE, UMAC_MAX_FRAME_LEN);
- /* init rx registers, enable ip header optimization */
+ /* init tx registers, enable TSB */
+ reg = bcmgenet_tbuf_ctrl_get(priv);
+ reg |= TBUF_64B_EN;
+ bcmgenet_tbuf_ctrl_set(priv, reg);
+
+ /* init rx registers, enable ip header optimization and RSB */
reg = bcmgenet_rbuf_readl(priv, RBUF_CTRL);
- reg |= RBUF_ALIGN_2B;
+ reg |= RBUF_ALIGN_2B | RBUF_64B_EN;
bcmgenet_rbuf_writel(priv, reg, RBUF_CTRL);
+ /* enable rx checksumming */
+ reg = bcmgenet_rbuf_readl(priv, RBUF_CHK_CTRL);
+ reg |= RBUF_RXCHK_EN | RBUF_L3_PARSE_DIS;
+ /* If UniMAC forwards CRC, we need to skip over it to get
+ * a valid CHK bit to be set in the per-packet status word
+ */
+ if (priv->crc_fwd_en)
+ reg |= RBUF_SKIP_FCS;
+ else
+ reg &= ~RBUF_SKIP_FCS;
+ bcmgenet_rbuf_writel(priv, reg, RBUF_CHK_CTRL);
+
if (!GENET_IS_V1(priv) && !GENET_IS_V2(priv))
bcmgenet_rbuf_writel(priv, 1, RBUF_TBUF_SIZE_CTRL);
diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.h b/drivers/net/ethernet/broadcom/genet/bcmgenet.h
index 61a6fe9..daf8fb2 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.h
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.h
@@ -273,6 +273,7 @@ struct bcmgenet_mib_counters {
#define RBUF_FLTR_LEN_SHIFT 8
#define TBUF_CTRL 0x00
+#define TBUF_64B_EN (1 << 0)
#define TBUF_BP_MC 0x0C
#define TBUF_ENERGY_CTRL 0x14
#define TBUF_EEE_EN (1 << 0)
@@ -662,8 +663,6 @@ struct bcmgenet_priv {
unsigned int irq0_stat;
/* HW descriptors/checksum variables */
- bool desc_64b_en;
- bool desc_rxchk_en;
bool crc_fwd_en;
u32 dma_max_burst_length;
diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c b/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c
index ea20d94..c9a4369 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c
@@ -132,8 +132,12 @@ int bcmgenet_wol_power_down_cfg(struct bcmgenet_priv *priv,
return -EINVAL;
}
- /* disable RX */
+ /* Can't suspend with WoL if MAC is still in reset */
reg = bcmgenet_umac_readl(priv, UMAC_CMD);
+ if (reg & CMD_SW_RESET)
+ reg &= ~CMD_SW_RESET;
+
+ /* disable RX */
reg &= ~CMD_RX_EN;
bcmgenet_umac_writel(priv, reg, UMAC_CMD);
mdelay(10);
diff --git a/drivers/net/ethernet/broadcom/genet/bcmmii.c b/drivers/net/ethernet/broadcom/genet/bcmmii.c
index 6392a25..b5930f8 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmmii.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmmii.c
@@ -95,6 +95,12 @@ void bcmgenet_mii_setup(struct net_device *dev)
CMD_HD_EN |
CMD_RX_PAUSE_IGNORE | CMD_TX_PAUSE_IGNORE);
reg |= cmd_bits;
+ if (reg & CMD_SW_RESET) {
+ reg &= ~CMD_SW_RESET;
+ bcmgenet_umac_writel(priv, reg, UMAC_CMD);
+ udelay(2);
+ reg |= CMD_TX_EN | CMD_RX_EN;
+ }
bcmgenet_umac_writel(priv, reg, UMAC_CMD);
} else {
/* done if nothing has changed */
@@ -181,38 +187,8 @@ int bcmgenet_mii_config(struct net_device *dev, bool init)
const char *phy_name = NULL;
u32 id_mode_dis = 0;
u32 port_ctrl;
- int bmcr = -1;
- int ret;
u32 reg;
- /* MAC clocking workaround during reset of umac state machines */
- reg = bcmgenet_umac_readl(priv, UMAC_CMD);
- if (reg & CMD_SW_RESET) {
- /* An MII PHY must be isolated to prevent TXC contention */
- if (priv->phy_interface == PHY_INTERFACE_MODE_MII) {
- ret = phy_read(phydev, MII_BMCR);
- if (ret >= 0) {
- bmcr = ret;
- ret = phy_write(phydev, MII_BMCR,
- bmcr | BMCR_ISOLATE);
- }
- if (ret) {
- netdev_err(dev, "failed to isolate PHY\n");
- return ret;
- }
- }
- /* Switch MAC clocking to RGMII generated clock */
- bcmgenet_sys_writel(priv, PORT_MODE_EXT_GPHY, SYS_PORT_CTRL);
- /* Ensure 5 clks with Rx disabled
- * followed by 5 clks with Reset asserted
- */
- udelay(4);
- reg &= ~(CMD_SW_RESET | CMD_LCL_LOOP_EN);
- bcmgenet_umac_writel(priv, reg, UMAC_CMD);
- /* Ensure 5 more clocks before Rx is enabled */
- udelay(2);
- }
-
switch (priv->phy_interface) {
case PHY_INTERFACE_MODE_INTERNAL:
phy_name = "internal PHY";
@@ -282,10 +258,6 @@ int bcmgenet_mii_config(struct net_device *dev, bool init)
bcmgenet_sys_writel(priv, port_ctrl, SYS_PORT_CTRL);
- /* Restore the MII PHY after isolation */
- if (bmcr >= 0)
- phy_write(phydev, MII_BMCR, bmcr);
-
priv->ext_phy = !priv->internal_phy &&
(priv->phy_interface != PHY_INTERFACE_MODE_MOCA);
@@ -294,6 +266,7 @@ int bcmgenet_mii_config(struct net_device *dev, bool init)
*/
if (priv->ext_phy) {
reg = bcmgenet_ext_readl(priv, EXT_RGMII_OOB_CTRL);
+ reg &= ~ID_MODE_DIS;
reg |= id_mode_dis;
if (GENET_IS_V1(priv) || GENET_IS_V2(priv) || GENET_IS_V3(priv))
reg |= RGMII_MODE_EN_V123;
diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h
index dbf7070..a3f0f27 100644
--- a/drivers/net/ethernet/cadence/macb.h
+++ b/drivers/net/ethernet/cadence/macb.h
@@ -652,6 +652,7 @@
#define MACB_CAPS_GEM_HAS_PTP 0x00000040
#define MACB_CAPS_BD_RD_PREFETCH 0x00000080
#define MACB_CAPS_NEEDS_RSTONUBR 0x00000100
+#define MACB_CAPS_MACB_IS_EMAC 0x08000000
#define MACB_CAPS_FIFO_MODE 0x10000000
#define MACB_CAPS_GIGABIT_MODE_AVAILABLE 0x20000000
#define MACB_CAPS_SG_DISABLED 0x40000000
diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c
index 4508f0d..2c28da1 100644
--- a/drivers/net/ethernet/cadence/macb_main.c
+++ b/drivers/net/ethernet/cadence/macb_main.c
@@ -572,8 +572,21 @@ static void macb_mac_config(struct phylink_config *config, unsigned int mode,
old_ctrl = ctrl = macb_or_gem_readl(bp, NCFGR);
/* Clear all the bits we might set later */
- ctrl &= ~(GEM_BIT(GBE) | MACB_BIT(SPD) | MACB_BIT(FD) | MACB_BIT(PAE) |
- GEM_BIT(SGMIIEN) | GEM_BIT(PCSSEL));
+ ctrl &= ~(MACB_BIT(SPD) | MACB_BIT(FD) | MACB_BIT(PAE));
+
+ if (bp->caps & MACB_CAPS_MACB_IS_EMAC) {
+ if (state->interface == PHY_INTERFACE_MODE_RMII)
+ ctrl |= MACB_BIT(RM9200_RMII);
+ } else {
+ ctrl &= ~(GEM_BIT(GBE) | GEM_BIT(SGMIIEN) | GEM_BIT(PCSSEL));
+
+ /* We do not support MLO_PAUSE_RX yet */
+ if (state->pause & MLO_PAUSE_TX)
+ ctrl |= MACB_BIT(PAE);
+
+ if (state->interface == PHY_INTERFACE_MODE_SGMII)
+ ctrl |= GEM_BIT(SGMIIEN) | GEM_BIT(PCSSEL);
+ }
if (state->speed == SPEED_1000)
ctrl |= GEM_BIT(GBE);
@@ -583,13 +596,6 @@ static void macb_mac_config(struct phylink_config *config, unsigned int mode,
if (state->duplex)
ctrl |= MACB_BIT(FD);
- /* We do not support MLO_PAUSE_RX yet */
- if (state->pause & MLO_PAUSE_TX)
- ctrl |= MACB_BIT(PAE);
-
- if (state->interface == PHY_INTERFACE_MODE_SGMII)
- ctrl |= GEM_BIT(SGMIIEN) | GEM_BIT(PCSSEL);
-
/* Apply the new configuration, if any */
if (old_ctrl ^ ctrl)
macb_or_gem_writel(bp, NCFGR, ctrl);
@@ -608,9 +614,10 @@ static void macb_mac_link_down(struct phylink_config *config, unsigned int mode,
unsigned int q;
u32 ctrl;
- for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue)
- queue_writel(queue, IDR,
- bp->rx_intr_mask | MACB_TX_INT_FLAGS | MACB_BIT(HRESP));
+ if (!(bp->caps & MACB_CAPS_MACB_IS_EMAC))
+ for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue)
+ queue_writel(queue, IDR,
+ bp->rx_intr_mask | MACB_TX_INT_FLAGS | MACB_BIT(HRESP));
/* Disable Rx and Tx */
ctrl = macb_readl(bp, NCR) & ~(MACB_BIT(RE) | MACB_BIT(TE));
@@ -627,17 +634,19 @@ static void macb_mac_link_up(struct phylink_config *config, unsigned int mode,
struct macb_queue *queue;
unsigned int q;
- macb_set_tx_clk(bp->tx_clk, bp->speed, ndev);
+ if (!(bp->caps & MACB_CAPS_MACB_IS_EMAC)) {
+ macb_set_tx_clk(bp->tx_clk, bp->speed, ndev);
- /* Initialize rings & buffers as clearing MACB_BIT(TE) in link down
- * cleared the pipeline and control registers.
- */
- bp->macbgem_ops.mog_init_rings(bp);
- macb_init_buffers(bp);
+ /* Initialize rings & buffers as clearing MACB_BIT(TE) in link down
+ * cleared the pipeline and control registers.
+ */
+ bp->macbgem_ops.mog_init_rings(bp);
+ macb_init_buffers(bp);
- for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue)
- queue_writel(queue, IER,
- bp->rx_intr_mask | MACB_TX_INT_FLAGS | MACB_BIT(HRESP));
+ for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue)
+ queue_writel(queue, IER,
+ bp->rx_intr_mask | MACB_TX_INT_FLAGS | MACB_BIT(HRESP));
+ }
/* Enable Rx and Tx */
macb_writel(bp, NCR, macb_readl(bp, NCR) | MACB_BIT(RE) | MACB_BIT(TE));
@@ -3790,6 +3799,10 @@ static int at91ether_open(struct net_device *dev)
u32 ctl;
int ret;
+ ret = pm_runtime_get_sync(&lp->pdev->dev);
+ if (ret < 0)
+ return ret;
+
/* Clear internal statistics */
ctl = macb_readl(lp, NCR);
macb_writel(lp, NCR, ctl | MACB_BIT(CLRSTAT));
@@ -3854,7 +3867,7 @@ static int at91ether_close(struct net_device *dev)
q->rx_buffers, q->rx_buffers_dma);
q->rx_buffers = NULL;
- return 0;
+ return pm_runtime_put(&lp->pdev->dev);
}
/* Transmit packet */
@@ -4037,7 +4050,6 @@ static int at91ether_init(struct platform_device *pdev)
struct net_device *dev = platform_get_drvdata(pdev);
struct macb *bp = netdev_priv(dev);
int err;
- u32 reg;
bp->queues[0].bp = bp;
@@ -4051,11 +4063,7 @@ static int at91ether_init(struct platform_device *pdev)
macb_writel(bp, NCR, 0);
- reg = MACB_BF(CLK, MACB_CLK_DIV32) | MACB_BIT(BIG);
- if (bp->phy_interface == PHY_INTERFACE_MODE_RMII)
- reg |= MACB_BIT(RM9200_RMII);
-
- macb_writel(bp, NCFGR, reg);
+ macb_writel(bp, NCFGR, MACB_BF(CLK, MACB_CLK_DIV32) | MACB_BIT(BIG));
return 0;
}
@@ -4214,7 +4222,7 @@ static const struct macb_config sama5d4_config = {
};
static const struct macb_config emac_config = {
- .caps = MACB_CAPS_NEEDS_RSTONUBR,
+ .caps = MACB_CAPS_NEEDS_RSTONUBR | MACB_CAPS_MACB_IS_EMAC,
.clk_init = at91ether_clk_init,
.init = at91ether_init,
};
diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
index 17a4110..8ff28ed 100644
--- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
+++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
@@ -410,10 +410,19 @@ void bgx_lmac_rx_tx_enable(int node, int bgx_idx, int lmacid, bool enable)
lmac = &bgx->lmac[lmacid];
cfg = bgx_reg_read(bgx, lmacid, BGX_CMRX_CFG);
- if (enable)
+ if (enable) {
cfg |= CMR_PKT_RX_EN | CMR_PKT_TX_EN;
- else
+
+ /* enable TX FIFO Underflow interrupt */
+ bgx_reg_modify(bgx, lmacid, BGX_GMP_GMI_TXX_INT_ENA_W1S,
+ GMI_TXX_INT_UNDFLW);
+ } else {
cfg &= ~(CMR_PKT_RX_EN | CMR_PKT_TX_EN);
+
+ /* Disable TX FIFO Underflow interrupt */
+ bgx_reg_modify(bgx, lmacid, BGX_GMP_GMI_TXX_INT_ENA_W1C,
+ GMI_TXX_INT_UNDFLW);
+ }
bgx_reg_write(bgx, lmacid, BGX_CMRX_CFG, cfg);
if (bgx->is_rgx)
@@ -1535,6 +1544,48 @@ static int bgx_init_phy(struct bgx *bgx)
return bgx_init_of_phy(bgx);
}
+static irqreturn_t bgx_intr_handler(int irq, void *data)
+{
+ struct bgx *bgx = (struct bgx *)data;
+ u64 status, val;
+ int lmac;
+
+ for (lmac = 0; lmac < bgx->lmac_count; lmac++) {
+ status = bgx_reg_read(bgx, lmac, BGX_GMP_GMI_TXX_INT);
+ if (status & GMI_TXX_INT_UNDFLW) {
+ pci_err(bgx->pdev, "BGX%d lmac%d UNDFLW\n",
+ bgx->bgx_id, lmac);
+ val = bgx_reg_read(bgx, lmac, BGX_CMRX_CFG);
+ val &= ~CMR_EN;
+ bgx_reg_write(bgx, lmac, BGX_CMRX_CFG, val);
+ val |= CMR_EN;
+ bgx_reg_write(bgx, lmac, BGX_CMRX_CFG, val);
+ }
+ /* clear interrupts */
+ bgx_reg_write(bgx, lmac, BGX_GMP_GMI_TXX_INT, status);
+ }
+
+ return IRQ_HANDLED;
+}
+
+static void bgx_register_intr(struct pci_dev *pdev)
+{
+ struct bgx *bgx = pci_get_drvdata(pdev);
+ int ret;
+
+ ret = pci_alloc_irq_vectors(pdev, BGX_LMAC_VEC_OFFSET,
+ BGX_LMAC_VEC_OFFSET, PCI_IRQ_ALL_TYPES);
+ if (ret < 0) {
+ pci_err(pdev, "Req for #%d msix vectors failed\n",
+ BGX_LMAC_VEC_OFFSET);
+ return;
+ }
+ ret = pci_request_irq(pdev, GMPX_GMI_TX_INT, bgx_intr_handler, NULL,
+ bgx, "BGX%d", bgx->bgx_id);
+ if (ret)
+ pci_free_irq(pdev, GMPX_GMI_TX_INT, bgx);
+}
+
static int bgx_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
{
int err;
@@ -1550,7 +1601,7 @@ static int bgx_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
pci_set_drvdata(pdev, bgx);
- err = pci_enable_device(pdev);
+ err = pcim_enable_device(pdev);
if (err) {
dev_err(dev, "Failed to enable PCI device\n");
pci_set_drvdata(pdev, NULL);
@@ -1604,6 +1655,8 @@ static int bgx_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
bgx_init_hw(bgx);
+ bgx_register_intr(pdev);
+
/* Enable all LMACs */
for (lmac = 0; lmac < bgx->lmac_count; lmac++) {
err = bgx_lmac_enable(bgx, lmac);
@@ -1620,6 +1673,7 @@ static int bgx_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
err_enable:
bgx_vnic[bgx->bgx_id] = NULL;
+ pci_free_irq(pdev, GMPX_GMI_TX_INT, bgx);
err_release_regions:
pci_release_regions(pdev);
err_disable_device:
@@ -1637,6 +1691,8 @@ static void bgx_remove(struct pci_dev *pdev)
for (lmac = 0; lmac < bgx->lmac_count; lmac++)
bgx_lmac_disable(bgx, lmac);
+ pci_free_irq(pdev, GMPX_GMI_TX_INT, bgx);
+
bgx_vnic[bgx->bgx_id] = NULL;
pci_release_regions(pdev);
pci_disable_device(pdev);
diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.h b/drivers/net/ethernet/cavium/thunder/thunder_bgx.h
index 2588870..cdea493 100644
--- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.h
+++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.h
@@ -180,6 +180,15 @@
#define BGX_GMP_GMI_TXX_BURST 0x38228
#define BGX_GMP_GMI_TXX_MIN_PKT 0x38240
#define BGX_GMP_GMI_TXX_SGMII_CTL 0x38300
+#define BGX_GMP_GMI_TXX_INT 0x38500
+#define BGX_GMP_GMI_TXX_INT_W1S 0x38508
+#define BGX_GMP_GMI_TXX_INT_ENA_W1C 0x38510
+#define BGX_GMP_GMI_TXX_INT_ENA_W1S 0x38518
+#define GMI_TXX_INT_PTP_LOST BIT_ULL(4)
+#define GMI_TXX_INT_LATE_COL BIT_ULL(3)
+#define GMI_TXX_INT_XSDEF BIT_ULL(2)
+#define GMI_TXX_INT_XSCOL BIT_ULL(1)
+#define GMI_TXX_INT_UNDFLW BIT_ULL(0)
#define BGX_MSIX_VEC_0_29_ADDR 0x400000 /* +(0..29) << 4 */
#define BGX_MSIX_VEC_0_29_CTL 0x400008
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
index 2a2938b..fc052489 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
@@ -902,7 +902,7 @@ void clear_all_filters(struct adapter *adapter)
adapter->tids.tid_tab[i];
if (f && (f->valid || f->pending))
- cxgb4_del_filter(dev, i, &f->fs);
+ cxgb4_del_filter(dev, f->tid, &f->fs);
}
sb = t4_read_reg(adapter, LE_DB_SRVR_START_INDEX_A);
@@ -910,7 +910,7 @@ void clear_all_filters(struct adapter *adapter)
f = (struct filter_entry *)adapter->tids.tid_tab[i];
if (f && (f->valid || f->pending))
- cxgb4_del_filter(dev, i, &f->fs);
+ cxgb4_del_filter(dev, f->tid, &f->fs);
}
}
}
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index 649842a..97f90ed 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -5381,12 +5381,11 @@ static inline bool is_x_10g_port(const struct link_config *lc)
static int cfg_queues(struct adapter *adap)
{
u32 avail_qsets, avail_eth_qsets, avail_uld_qsets;
+ u32 i, n10g = 0, qidx = 0, n1g = 0;
+ u32 ncpus = num_online_cpus();
u32 niqflint, neq, num_ulds;
struct sge *s = &adap->sge;
- u32 i, n10g = 0, qidx = 0;
-#ifndef CONFIG_CHELSIO_T4_DCB
- int q10g = 0;
-#endif
+ u32 q10g = 0, q1g;
/* Reduce memory usage in kdump environment, disable all offload. */
if (is_kdump_kernel() || (is_uld(adap) && t4_uld_mem_alloc(adap))) {
@@ -5424,44 +5423,50 @@ static int cfg_queues(struct adapter *adap)
n10g += is_x_10g_port(&adap2pinfo(adap, i)->link_cfg);
avail_eth_qsets = min_t(u32, avail_qsets, MAX_ETH_QSETS);
+
+ /* We default to 1 queue per non-10G port and up to # of cores queues
+ * per 10G port.
+ */
+ if (n10g)
+ q10g = (avail_eth_qsets - (adap->params.nports - n10g)) / n10g;
+
+ n1g = adap->params.nports - n10g;
#ifdef CONFIG_CHELSIO_T4_DCB
/* For Data Center Bridging support we need to be able to support up
* to 8 Traffic Priorities; each of which will be assigned to its
* own TX Queue in order to prevent Head-Of-Line Blocking.
*/
+ q1g = 8;
if (adap->params.nports * 8 > avail_eth_qsets) {
dev_err(adap->pdev_dev, "DCB avail_eth_qsets=%d < %d!\n",
avail_eth_qsets, adap->params.nports * 8);
return -ENOMEM;
}
- for_each_port(adap, i) {
- struct port_info *pi = adap2pinfo(adap, i);
+ if (adap->params.nports * ncpus < avail_eth_qsets)
+ q10g = max(8U, ncpus);
+ else
+ q10g = max(8U, q10g);
- pi->first_qset = qidx;
- pi->nqsets = is_kdump_kernel() ? 1 : 8;
- qidx += pi->nqsets;
- }
+ while ((q10g * n10g) > (avail_eth_qsets - n1g * q1g))
+ q10g--;
+
#else /* !CONFIG_CHELSIO_T4_DCB */
- /* We default to 1 queue per non-10G port and up to # of cores queues
- * per 10G port.
- */
- if (n10g)
- q10g = (avail_eth_qsets - (adap->params.nports - n10g)) / n10g;
- if (q10g > netif_get_num_default_rss_queues())
- q10g = netif_get_num_default_rss_queues();
-
- if (is_kdump_kernel())
+ q1g = 1;
+ q10g = min(q10g, ncpus);
+#endif /* !CONFIG_CHELSIO_T4_DCB */
+ if (is_kdump_kernel()) {
q10g = 1;
+ q1g = 1;
+ }
for_each_port(adap, i) {
struct port_info *pi = adap2pinfo(adap, i);
pi->first_qset = qidx;
- pi->nqsets = is_x_10g_port(&pi->link_cfg) ? q10g : 1;
+ pi->nqsets = is_x_10g_port(&pi->link_cfg) ? q10g : q1g;
qidx += pi->nqsets;
}
-#endif /* !CONFIG_CHELSIO_T4_DCB */
s->ethqsets = qidx;
s->max_ethqsets = qidx; /* MSI-X may lower it later */
@@ -5473,7 +5478,7 @@ static int cfg_queues(struct adapter *adap)
* capped by the number of available cores.
*/
num_ulds = adap->num_uld + adap->num_ofld_uld;
- i = min_t(u32, MAX_OFLD_QSETS, num_online_cpus());
+ i = min_t(u32, MAX_OFLD_QSETS, ncpus);
avail_uld_qsets = roundup(i, adap->params.nports);
if (avail_qsets < num_ulds * adap->params.nports) {
adap->params.offload = 0;
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ptp.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ptp.c
index 58a039c..af1f40c 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ptp.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ptp.c
@@ -246,6 +246,9 @@ static int cxgb4_ptp_fineadjtime(struct adapter *adapter, s64 delta)
FW_PTP_CMD_PORTID_V(0));
c.retval_len16 = cpu_to_be32(FW_CMD_LEN16_V(sizeof(c) / 16));
c.u.ts.sc = FW_PTP_SC_ADJ_FTIME;
+ c.u.ts.sign = (delta < 0) ? 1 : 0;
+ if (delta < 0)
+ delta = -delta;
c.u.ts.tm = cpu_to_be64(delta);
err = t4_wr_mbox(adapter, adapter->mbox, &c, sizeof(c), NULL);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c
index 97cda50..cab3d17 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c
@@ -1307,8 +1307,9 @@ static inline void *write_tso_wr(struct adapter *adap, struct sk_buff *skb,
int t4_sge_eth_txq_egress_update(struct adapter *adap, struct sge_eth_txq *eq,
int maxreclaim)
{
+ unsigned int reclaimed, hw_cidx;
struct sge_txq *q = &eq->q;
- unsigned int reclaimed;
+ int hw_in_use;
if (!q->in_use || !__netif_tx_trylock(eq->txq))
return 0;
@@ -1316,12 +1317,17 @@ int t4_sge_eth_txq_egress_update(struct adapter *adap, struct sge_eth_txq *eq,
/* Reclaim pending completed TX Descriptors. */
reclaimed = reclaim_completed_tx(adap, &eq->q, maxreclaim, true);
+ hw_cidx = ntohs(READ_ONCE(q->stat->cidx));
+ hw_in_use = q->pidx - hw_cidx;
+ if (hw_in_use < 0)
+ hw_in_use += q->size;
+
/* If the TX Queue is currently stopped and there's now more than half
* the queue available, restart it. Otherwise bail out since the rest
* of what we want do here is with the possibility of shipping any
* currently buffered Coalesced TX Work Request.
*/
- if (netif_tx_queue_stopped(eq->txq) && txq_avail(q) > (q->size / 2)) {
+ if (netif_tx_queue_stopped(eq->txq) && hw_in_use < (q->size / 2)) {
netif_tx_wake_queue(eq->txq);
eq->q.restarts++;
}
@@ -1486,16 +1492,7 @@ static netdev_tx_t cxgb4_eth_xmit(struct sk_buff *skb, struct net_device *dev)
* has opened up.
*/
eth_txq_stop(q);
-
- /* If we're using the SGE Doorbell Queue Timer facility, we
- * don't need to ask the Firmware to send us Egress Queue CIDX
- * Updates: the Hardware will do this automatically. And
- * since we send the Ingress Queue CIDX Updates to the
- * corresponding Ethernet Response Queue, we'll get them very
- * quickly.
- */
- if (!q->dbqt)
- wr_mid |= FW_WR_EQUEQ_F | FW_WR_EQUIQ_F;
+ wr_mid |= FW_WR_EQUEQ_F | FW_WR_EQUIQ_F;
}
wr = (void *)&q->q.desc[q->q.pidx];
@@ -1805,16 +1802,7 @@ static netdev_tx_t cxgb4_vf_eth_xmit(struct sk_buff *skb,
* has opened up.
*/
eth_txq_stop(txq);
-
- /* If we're using the SGE Doorbell Queue Timer facility, we
- * don't need to ask the Firmware to send us Egress Queue CIDX
- * Updates: the Hardware will do this automatically. And
- * since we send the Ingress Queue CIDX Updates to the
- * corresponding Ethernet Response Queue, we'll get them very
- * quickly.
- */
- if (!txq->dbqt)
- wr_mid |= FW_WR_EQUEQ_F | FW_WR_EQUIQ_F;
+ wr_mid |= FW_WR_EQUEQ_F | FW_WR_EQUIQ_F;
}
/* Start filling in our Work Request. Note that we do _not_ handle
@@ -3370,26 +3358,6 @@ static void t4_tx_completion_handler(struct sge_rspq *rspq,
}
txq = &s->ethtxq[pi->first_qset + rspq->idx];
-
- /* We've got the Hardware Consumer Index Update in the Egress Update
- * message. If we're using the SGE Doorbell Queue Timer mechanism,
- * these Egress Update messages will be our sole CIDX Updates we get
- * since we don't want to chew up PCIe bandwidth for both Ingress
- * Messages and Status Page writes. However, The code which manages
- * reclaiming successfully DMA'ed TX Work Requests uses the CIDX value
- * stored in the Status Page at the end of the TX Queue. It's easiest
- * to simply copy the CIDX Update value from the Egress Update message
- * to the Status Page. Also note that no Endian issues need to be
- * considered here since both are Big Endian and we're just copying
- * bytes consistently ...
- */
- if (txq->dbqt) {
- struct cpl_sge_egr_update *egr;
-
- egr = (struct cpl_sge_egr_update *)rsp;
- WRITE_ONCE(txq->q.stat->cidx, egr->cidx);
- }
-
t4_sge_eth_txq_egress_update(adapter, txq, -1);
}
diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c
index bbd7b31..ddf60dc 100644
--- a/drivers/net/ethernet/cisco/enic/enic_main.c
+++ b/drivers/net/ethernet/cisco/enic/enic_main.c
@@ -2013,10 +2013,10 @@ static int enic_stop(struct net_device *netdev)
napi_disable(&enic->napi[i]);
netif_carrier_off(netdev);
- netif_tx_disable(netdev);
if (vnic_dev_get_intr_mode(enic->vdev) == VNIC_DEV_INTR_MODE_MSIX)
for (i = 0; i < enic->wq_count; i++)
napi_disable(&enic->napi[enic_cq_wq(enic, i)]);
+ netif_tx_disable(netdev);
if (!enic_is_dynamic(enic) && !enic_is_sriov_vf(enic))
enic_dev_del_station_addr(enic);
diff --git a/drivers/net/ethernet/davicom/dm9000.c b/drivers/net/ethernet/davicom/dm9000.c
index 1ea3372..e94ae9b 100644
--- a/drivers/net/ethernet/davicom/dm9000.c
+++ b/drivers/net/ethernet/davicom/dm9000.c
@@ -1405,6 +1405,8 @@ static struct dm9000_plat_data *dm9000_parse_dt(struct device *dev)
mac_addr = of_get_mac_address(np);
if (!IS_ERR(mac_addr))
ether_addr_copy(pdata->dev_addr, mac_addr);
+ else if (PTR_ERR(mac_addr) == -EPROBE_DEFER)
+ return ERR_CAST(mac_addr);
return pdata;
}
diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
index fd93d54..ca74a68 100644
--- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
+++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
@@ -1,4 +1,5 @@
/* Copyright 2008 - 2016 Freescale Semiconductor Inc.
+ * Copyright 2020 NXP
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -123,7 +124,22 @@ MODULE_PARM_DESC(tx_timeout, "The Tx timeout in ms");
#define FSL_QMAN_MAX_OAL 127
/* Default alignment for start of data in an Rx FD */
+#ifdef CONFIG_DPAA_ERRATUM_A050385
+/* aligning data start to 64 avoids DMA transaction splits, unless the buffer
+ * is crossing a 4k page boundary
+ */
+#define DPAA_FD_DATA_ALIGNMENT (fman_has_errata_a050385() ? 64 : 16)
+/* aligning to 256 avoids DMA transaction splits caused by 4k page boundary
+ * crossings; also, all SG fragments except the last must have a size multiple
+ * of 256 to avoid DMA transaction splits
+ */
+#define DPAA_A050385_ALIGN 256
+#define DPAA_FD_RX_DATA_ALIGNMENT (fman_has_errata_a050385() ? \
+ DPAA_A050385_ALIGN : 16)
+#else
#define DPAA_FD_DATA_ALIGNMENT 16
+#define DPAA_FD_RX_DATA_ALIGNMENT DPAA_FD_DATA_ALIGNMENT
+#endif
/* The DPAA requires 256 bytes reserved and mapped for the SGT */
#define DPAA_SGT_SIZE 256
@@ -158,8 +174,13 @@ MODULE_PARM_DESC(tx_timeout, "The Tx timeout in ms");
#define DPAA_PARSE_RESULTS_SIZE sizeof(struct fman_prs_result)
#define DPAA_TIME_STAMP_SIZE 8
#define DPAA_HASH_RESULTS_SIZE 8
+#ifdef CONFIG_DPAA_ERRATUM_A050385
+#define DPAA_RX_PRIV_DATA_SIZE (DPAA_A050385_ALIGN - (DPAA_PARSE_RESULTS_SIZE\
+ + DPAA_TIME_STAMP_SIZE + DPAA_HASH_RESULTS_SIZE))
+#else
#define DPAA_RX_PRIV_DATA_SIZE (u16)(DPAA_TX_PRIV_DATA_SIZE + \
dpaa_rx_extra_headroom)
+#endif
#define DPAA_ETH_PCD_RXQ_NUM 128
@@ -180,7 +201,12 @@ static struct dpaa_bp *dpaa_bp_array[BM_MAX_NUM_OF_POOLS];
#define DPAA_BP_RAW_SIZE 4096
+#ifdef CONFIG_DPAA_ERRATUM_A050385
+#define dpaa_bp_size(raw_size) (SKB_WITH_OVERHEAD(raw_size) & \
+ ~(DPAA_A050385_ALIGN - 1))
+#else
#define dpaa_bp_size(raw_size) SKB_WITH_OVERHEAD(raw_size)
+#endif
static int dpaa_max_frm;
@@ -1192,7 +1218,7 @@ static int dpaa_eth_init_rx_port(struct fman_port *port, struct dpaa_bp *bp,
buf_prefix_content.pass_prs_result = true;
buf_prefix_content.pass_hash_result = true;
buf_prefix_content.pass_time_stamp = true;
- buf_prefix_content.data_align = DPAA_FD_DATA_ALIGNMENT;
+ buf_prefix_content.data_align = DPAA_FD_RX_DATA_ALIGNMENT;
rx_p = ¶ms.specific_params.rx_params;
rx_p->err_fqid = errq->fqid;
@@ -1662,6 +1688,8 @@ static u8 rx_csum_offload(const struct dpaa_priv *priv, const struct qm_fd *fd)
return CHECKSUM_NONE;
}
+#define PTR_IS_ALIGNED(x, a) (IS_ALIGNED((unsigned long)(x), (a)))
+
/* Build a linear skb around the received buffer.
* We are guaranteed there is enough room at the end of the data buffer to
* accommodate the shared info area of the skb.
@@ -1733,8 +1761,7 @@ static struct sk_buff *sg_fd_to_skb(const struct dpaa_priv *priv,
sg_addr = qm_sg_addr(&sgt[i]);
sg_vaddr = phys_to_virt(sg_addr);
- WARN_ON(!IS_ALIGNED((unsigned long)sg_vaddr,
- SMP_CACHE_BYTES));
+ WARN_ON(!PTR_IS_ALIGNED(sg_vaddr, SMP_CACHE_BYTES));
dma_unmap_page(priv->rx_dma_dev, sg_addr,
DPAA_BP_RAW_SIZE, DMA_FROM_DEVICE);
@@ -2022,6 +2049,75 @@ static inline int dpaa_xmit(struct dpaa_priv *priv,
return 0;
}
+#ifdef CONFIG_DPAA_ERRATUM_A050385
+int dpaa_a050385_wa(struct net_device *net_dev, struct sk_buff **s)
+{
+ struct dpaa_priv *priv = netdev_priv(net_dev);
+ struct sk_buff *new_skb, *skb = *s;
+ unsigned char *start, i;
+
+ /* check linear buffer alignment */
+ if (!PTR_IS_ALIGNED(skb->data, DPAA_A050385_ALIGN))
+ goto workaround;
+
+ /* linear buffers just need to have an aligned start */
+ if (!skb_is_nonlinear(skb))
+ return 0;
+
+ /* linear data size for nonlinear skbs needs to be aligned */
+ if (!IS_ALIGNED(skb_headlen(skb), DPAA_A050385_ALIGN))
+ goto workaround;
+
+ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+ skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+
+ /* all fragments need to have aligned start addresses */
+ if (!IS_ALIGNED(skb_frag_off(frag), DPAA_A050385_ALIGN))
+ goto workaround;
+
+ /* all but last fragment need to have aligned sizes */
+ if (!IS_ALIGNED(skb_frag_size(frag), DPAA_A050385_ALIGN) &&
+ (i < skb_shinfo(skb)->nr_frags - 1))
+ goto workaround;
+ }
+
+ return 0;
+
+workaround:
+ /* copy all the skb content into a new linear buffer */
+ new_skb = netdev_alloc_skb(net_dev, skb->len + DPAA_A050385_ALIGN - 1 +
+ priv->tx_headroom);
+ if (!new_skb)
+ return -ENOMEM;
+
+ /* NET_SKB_PAD bytes already reserved, adding up to tx_headroom */
+ skb_reserve(new_skb, priv->tx_headroom - NET_SKB_PAD);
+
+ /* Workaround for DPAA_A050385 requires data start to be aligned */
+ start = PTR_ALIGN(new_skb->data, DPAA_A050385_ALIGN);
+ if (start - new_skb->data != 0)
+ skb_reserve(new_skb, start - new_skb->data);
+
+ skb_put(new_skb, skb->len);
+ skb_copy_bits(skb, 0, new_skb->data, skb->len);
+ skb_copy_header(new_skb, skb);
+ new_skb->dev = skb->dev;
+
+ /* We move the headroom when we align it so we have to reset the
+ * network and transport header offsets relative to the new data
+ * pointer. The checksum offload relies on these offsets.
+ */
+ skb_set_network_header(new_skb, skb_network_offset(skb));
+ skb_set_transport_header(new_skb, skb_transport_offset(skb));
+
+ /* TODO: does timestamping need the result in the old skb? */
+ dev_kfree_skb(skb);
+ *s = new_skb;
+
+ return 0;
+}
+#endif
+
static netdev_tx_t
dpaa_start_xmit(struct sk_buff *skb, struct net_device *net_dev)
{
@@ -2068,6 +2164,14 @@ dpaa_start_xmit(struct sk_buff *skb, struct net_device *net_dev)
nonlinear = skb_is_nonlinear(skb);
}
+#ifdef CONFIG_DPAA_ERRATUM_A050385
+ if (unlikely(fman_has_errata_a050385())) {
+ if (dpaa_a050385_wa(net_dev, &skb))
+ goto enomem;
+ nonlinear = skb_is_nonlinear(skb);
+ }
+#endif
+
if (nonlinear) {
/* Just create a S/G fd based on the skb */
err = skb_to_sg_fd(priv, skb, &fd);
@@ -2741,9 +2845,7 @@ static inline u16 dpaa_get_headroom(struct dpaa_buffer_layout *bl)
headroom = (u16)(bl->priv_data_size + DPAA_PARSE_RESULTS_SIZE +
DPAA_TIME_STAMP_SIZE + DPAA_HASH_RESULTS_SIZE);
- return DPAA_FD_DATA_ALIGNMENT ? ALIGN(headroom,
- DPAA_FD_DATA_ALIGNMENT) :
- headroom;
+ return ALIGN(headroom, DPAA_FD_DATA_ALIGNMENT);
}
static int dpaa_eth_probe(struct platform_device *pdev)
diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index 4432a59..23c5fef 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -2529,15 +2529,15 @@ fec_enet_set_coalesce(struct net_device *ndev, struct ethtool_coalesce *ec)
return -EINVAL;
}
- cycle = fec_enet_us_to_itr_clock(ndev, fep->rx_time_itr);
+ cycle = fec_enet_us_to_itr_clock(ndev, ec->rx_coalesce_usecs);
if (cycle > 0xFFFF) {
dev_err(dev, "Rx coalesced usec exceed hardware limitation\n");
return -EINVAL;
}
- cycle = fec_enet_us_to_itr_clock(ndev, fep->tx_time_itr);
+ cycle = fec_enet_us_to_itr_clock(ndev, ec->tx_coalesce_usecs);
if (cycle > 0xFFFF) {
- dev_err(dev, "Rx coalesced usec exceed hardware limitation\n");
+ dev_err(dev, "Tx coalesced usec exceed hardware limitation\n");
return -EINVAL;
}
diff --git a/drivers/net/ethernet/freescale/fman/Kconfig b/drivers/net/ethernet/freescale/fman/Kconfig
index 0139cb9..3415018 100644
--- a/drivers/net/ethernet/freescale/fman/Kconfig
+++ b/drivers/net/ethernet/freescale/fman/Kconfig
@@ -8,3 +8,31 @@
help
Freescale Data-Path Acceleration Architecture Frame Manager
(FMan) support
+
+config DPAA_ERRATUM_A050385
+ bool
+ depends on ARM64 && FSL_DPAA
+ default y
+ help
+ DPAA FMan erratum A050385 software workaround implementation:
+ align buffers, data start, SG fragment length to avoid FMan DMA
+ splits.
+ FMAN DMA read or writes under heavy traffic load may cause FMAN
+ internal resource leak thus stopping further packet processing.
+ The FMAN internal queue can overflow when FMAN splits single
+ read or write transactions into multiple smaller transactions
+ such that more than 17 AXI transactions are in flight from FMAN
+ to interconnect. When the FMAN internal queue overflows, it can
+ stall further packet processing. The issue can occur with any
+ one of the following three conditions:
+ 1. FMAN AXI transaction crosses 4K address boundary (Errata
+ A010022)
+ 2. FMAN DMA address for an AXI transaction is not 16 byte
+ aligned, i.e. the last 4 bits of an address are non-zero
+ 3. Scatter Gather (SG) frames have more than one SG buffer in
+ the SG list and any one of the buffers, except the last
+ buffer in the SG list has data size that is not a multiple
+ of 16 bytes, i.e., other than 16, 32, 48, 64, etc.
+ With any one of the above three conditions present, there is
+ likelihood of stalled FMAN packet processing, especially under
+ stress with multiple ports injecting line-rate traffic.
diff --git a/drivers/net/ethernet/freescale/fman/fman.c b/drivers/net/ethernet/freescale/fman/fman.c
index 934111d..f151d6e 100644
--- a/drivers/net/ethernet/freescale/fman/fman.c
+++ b/drivers/net/ethernet/freescale/fman/fman.c
@@ -1,5 +1,6 @@
/*
* Copyright 2008-2015 Freescale Semiconductor Inc.
+ * Copyright 2020 NXP
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -566,6 +567,10 @@ struct fman_cfg {
u32 qmi_def_tnums_thresh;
};
+#ifdef CONFIG_DPAA_ERRATUM_A050385
+static bool fman_has_err_a050385;
+#endif
+
static irqreturn_t fman_exceptions(struct fman *fman,
enum fman_exceptions exception)
{
@@ -2518,6 +2523,14 @@ struct fman *fman_bind(struct device *fm_dev)
}
EXPORT_SYMBOL(fman_bind);
+#ifdef CONFIG_DPAA_ERRATUM_A050385
+bool fman_has_errata_a050385(void)
+{
+ return fman_has_err_a050385;
+}
+EXPORT_SYMBOL(fman_has_errata_a050385);
+#endif
+
static irqreturn_t fman_err_irq(int irq, void *handle)
{
struct fman *fman = (struct fman *)handle;
@@ -2845,6 +2858,11 @@ static struct fman *read_dts_node(struct platform_device *of_dev)
goto fman_free;
}
+#ifdef CONFIG_DPAA_ERRATUM_A050385
+ fman_has_err_a050385 =
+ of_property_read_bool(fm_node, "fsl,erratum-a050385");
+#endif
+
return fman;
fman_node_put:
diff --git a/drivers/net/ethernet/freescale/fman/fman.h b/drivers/net/ethernet/freescale/fman/fman.h
index 935c317..f2ede13 100644
--- a/drivers/net/ethernet/freescale/fman/fman.h
+++ b/drivers/net/ethernet/freescale/fman/fman.h
@@ -1,5 +1,6 @@
/*
* Copyright 2008-2015 Freescale Semiconductor Inc.
+ * Copyright 2020 NXP
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -398,6 +399,10 @@ u16 fman_get_max_frm(void);
int fman_get_rx_extra_headroom(void);
+#ifdef CONFIG_DPAA_ERRATUM_A050385
+bool fman_has_errata_a050385(void);
+#endif
+
struct fman *fman_bind(struct device *dev);
#endif /* __FM_H */
diff --git a/drivers/net/ethernet/freescale/fman/fman_memac.c b/drivers/net/ethernet/freescale/fman/fman_memac.c
index e190187..0d2b4ab 100644
--- a/drivers/net/ethernet/freescale/fman/fman_memac.c
+++ b/drivers/net/ethernet/freescale/fman/fman_memac.c
@@ -782,7 +782,7 @@ int memac_adjust_link(struct fman_mac *memac, u16 speed)
/* Set full duplex */
tmp &= ~IF_MODE_HD;
- if (memac->phy_if == PHY_INTERFACE_MODE_RGMII) {
+ if (phy_interface_mode_is_rgmii(memac->phy_if)) {
/* Configure RGMII in manual mode */
tmp &= ~IF_MODE_RGMII_AUTO;
tmp &= ~IF_MODE_RGMII_SP_MASK;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h b/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h
index 1b03139..d87158a 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h
@@ -46,6 +46,7 @@ enum HCLGE_MBX_OPCODE {
HCLGE_MBX_PUSH_VLAN_INFO, /* (PF -> VF) push port base vlan */
HCLGE_MBX_GET_MEDIA_TYPE, /* (VF -> PF) get media type */
HCLGE_MBX_PUSH_PROMISC_INFO, /* (PF -> VF) push vf promisc info */
+ HCLGE_MBX_VF_UNINIT, /* (VF -> PF) vf is unintializing */
HCLGE_MBX_GET_VF_FLR_STATUS = 200, /* (M7 -> PF) get vf flr status */
HCLGE_MBX_PUSH_LINK_STATUS, /* (M7 -> PF) get port link status */
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index acb796c..a7f40aa 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -1711,7 +1711,7 @@ static int hns3_setup_tc(struct net_device *netdev, void *type_data)
netif_dbg(h, drv, netdev, "setup tc: num_tc=%u\n", tc);
return (kinfo->dcb_ops && kinfo->dcb_ops->setup_tc) ?
- kinfo->dcb_ops->setup_tc(h, tc, prio_tc) : -EOPNOTSUPP;
+ kinfo->dcb_ops->setup_tc(h, tc ? tc : 1, prio_tc) : -EOPNOTSUPP;
}
static int hns3_nic_setup_tc(struct net_device *dev, enum tc_setup_type type,
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index ec5f6ee..d3b0cd7 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -2446,10 +2446,12 @@ static int hclge_cfg_mac_speed_dup_hw(struct hclge_dev *hdev, int speed,
int hclge_cfg_mac_speed_dup(struct hclge_dev *hdev, int speed, u8 duplex)
{
+ struct hclge_mac *mac = &hdev->hw.mac;
int ret;
duplex = hclge_check_speed_dup(duplex, speed);
- if (hdev->hw.mac.speed == speed && hdev->hw.mac.duplex == duplex)
+ if (!mac->support_autoneg && mac->speed == speed &&
+ mac->duplex == duplex)
return 0;
ret = hclge_cfg_mac_speed_dup_hw(hdev, speed, duplex);
@@ -6113,6 +6115,9 @@ static int hclge_get_all_rules(struct hnae3_handle *handle,
static void hclge_fd_get_flow_tuples(const struct flow_keys *fkeys,
struct hclge_fd_rule_tuples *tuples)
{
+#define flow_ip6_src fkeys->addrs.v6addrs.src.in6_u.u6_addr32
+#define flow_ip6_dst fkeys->addrs.v6addrs.dst.in6_u.u6_addr32
+
tuples->ether_proto = be16_to_cpu(fkeys->basic.n_proto);
tuples->ip_proto = fkeys->basic.ip_proto;
tuples->dst_port = be16_to_cpu(fkeys->ports.dst);
@@ -6121,12 +6126,12 @@ static void hclge_fd_get_flow_tuples(const struct flow_keys *fkeys,
tuples->src_ip[3] = be32_to_cpu(fkeys->addrs.v4addrs.src);
tuples->dst_ip[3] = be32_to_cpu(fkeys->addrs.v4addrs.dst);
} else {
- memcpy(tuples->src_ip,
- fkeys->addrs.v6addrs.src.in6_u.u6_addr32,
- sizeof(tuples->src_ip));
- memcpy(tuples->dst_ip,
- fkeys->addrs.v6addrs.dst.in6_u.u6_addr32,
- sizeof(tuples->dst_ip));
+ int i;
+
+ for (i = 0; i < IPV6_SIZE; i++) {
+ tuples->src_ip[i] = be32_to_cpu(flow_ip6_src[i]);
+ tuples->dst_ip[i] = be32_to_cpu(flow_ip6_dst[i]);
+ }
}
}
@@ -7740,16 +7745,27 @@ static int hclge_set_vlan_filter_ctrl(struct hclge_dev *hdev, u8 vlan_type,
struct hclge_desc desc;
int ret;
- hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_VLAN_FILTER_CTRL, false);
-
+ /* read current vlan filter parameter */
+ hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_VLAN_FILTER_CTRL, true);
req = (struct hclge_vlan_filter_ctrl_cmd *)desc.data;
req->vlan_type = vlan_type;
- req->vlan_fe = filter_en ? fe_type : 0;
req->vf_id = vf_id;
ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+ if (ret) {
+ dev_err(&hdev->pdev->dev,
+ "failed to get vlan filter config, ret = %d.\n", ret);
+ return ret;
+ }
+
+ /* modify and write new config parameter */
+ hclge_cmd_reuse_desc(&desc, false);
+ req->vlan_fe = filter_en ?
+ (req->vlan_fe | fe_type) : (req->vlan_fe & ~fe_type);
+
+ ret = hclge_cmd_send(&hdev->hw, &desc, 1);
if (ret)
- dev_err(&hdev->pdev->dev, "set vlan filter fail, ret =%d.\n",
+ dev_err(&hdev->pdev->dev, "failed to set vlan filter, ret = %d.\n",
ret);
return ret;
@@ -8267,6 +8283,7 @@ void hclge_rm_vport_all_vlan_table(struct hclge_vport *vport, bool is_del_list)
kfree(vlan);
}
}
+ clear_bit(vport->vport_id, hdev->vf_vlan_full);
}
void hclge_uninit_vport_vlan_table(struct hclge_dev *hdev)
@@ -8483,6 +8500,28 @@ static int hclge_set_vf_vlan_filter(struct hnae3_handle *handle, int vfid,
}
}
+static void hclge_clear_vf_vlan(struct hclge_dev *hdev)
+{
+ struct hclge_vlan_info *vlan_info;
+ struct hclge_vport *vport;
+ int ret;
+ int vf;
+
+ /* clear port base vlan for all vf */
+ for (vf = HCLGE_VF_VPORT_START_NUM; vf < hdev->num_alloc_vport; vf++) {
+ vport = &hdev->vport[vf];
+ vlan_info = &vport->port_base_vlan_cfg.vlan_info;
+
+ ret = hclge_set_vlan_filter_hw(hdev, htons(ETH_P_8021Q),
+ vport->vport_id,
+ vlan_info->vlan_tag, true);
+ if (ret)
+ dev_err(&hdev->pdev->dev,
+ "failed to clear vf vlan for vf%d, ret = %d\n",
+ vf - HCLGE_VF_VPORT_START_NUM, ret);
+ }
+}
+
int hclge_set_vlan_filter(struct hnae3_handle *handle, __be16 proto,
u16 vlan_id, bool is_kill)
{
@@ -9834,6 +9873,13 @@ static int hclge_reset_ae_dev(struct hnae3_ae_dev *ae_dev)
return ret;
}
+ ret = init_mgr_tbl(hdev);
+ if (ret) {
+ dev_err(&pdev->dev,
+ "failed to reinit manager table, ret = %d\n", ret);
+ return ret;
+ }
+
ret = hclge_init_fd_config(hdev);
if (ret) {
dev_err(&pdev->dev, "fd table init fail, ret=%d\n", ret);
@@ -9885,6 +9931,7 @@ static void hclge_uninit_ae_dev(struct hnae3_ae_dev *ae_dev)
struct hclge_mac *mac = &hdev->hw.mac;
hclge_reset_vf_rate(hdev);
+ hclge_clear_vf_vlan(hdev);
hclge_misc_affinity_teardown(hdev);
hclge_state_uninit(hdev);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
index a3c0822..3d850f6 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
@@ -799,6 +799,7 @@ void hclge_mbx_handler(struct hclge_dev *hdev)
hclge_get_link_mode(vport, req);
break;
case HCLGE_MBX_GET_VF_FLR_STATUS:
+ case HCLGE_MBX_VF_UNINIT:
hclge_rm_vport_all_mac_table(vport, true,
HCLGE_MAC_ADDR_UC);
hclge_rm_vport_all_mac_table(vport, true,
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
index 180224e..28db132 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
@@ -566,7 +566,7 @@ static void hclge_tm_vport_tc_info_update(struct hclge_vport *vport)
*/
kinfo->num_tc = vport->vport_id ? 1 :
min_t(u16, vport->alloc_tqps, hdev->tm_info.num_tc);
- vport->qs_offset = (vport->vport_id ? hdev->tm_info.num_tc : 0) +
+ vport->qs_offset = (vport->vport_id ? HNAE3_MAX_TC : 0) +
(vport->vport_id ? (vport->vport_id - 1) : 0);
max_rss_size = min_t(u16, hdev->rss_size_max,
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
index d659720..0510d85 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
@@ -2803,6 +2803,9 @@ static void hclgevf_uninit_hdev(struct hclgevf_dev *hdev)
{
hclgevf_state_uninit(hdev);
+ hclgevf_send_mbx_msg(hdev, HCLGE_MBX_VF_UNINIT, 0, NULL, 0,
+ false, NULL, 0);
+
if (test_bit(HCLGEVF_STATE_IRQ_INITED, &hdev->state)) {
hclgevf_misc_irq_uninit(hdev);
hclgevf_uninit_msi(hdev);
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_cmdq.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_cmdq.c
index eb53c15..5f2d57d 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_cmdq.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_cmdq.c
@@ -389,7 +389,8 @@ static int cmdq_sync_cmd_direct_resp(struct hinic_cmdq *cmdq,
spin_unlock_bh(&cmdq->cmdq_lock);
- if (!wait_for_completion_timeout(&done, CMDQ_TIMEOUT)) {
+ if (!wait_for_completion_timeout(&done,
+ msecs_to_jiffies(CMDQ_TIMEOUT))) {
spin_lock_bh(&cmdq->cmdq_lock);
if (cmdq->errcode[curr_prod_idx] == &errcode)
@@ -623,6 +624,8 @@ static int cmdq_cmd_ceq_handler(struct hinic_cmdq *cmdq, u16 ci,
if (!CMDQ_WQE_COMPLETED(be32_to_cpu(ctrl->ctrl_info)))
return -EBUSY;
+ dma_rmb();
+
errcode = CMDQ_WQE_ERRCODE_GET(be32_to_cpu(status->status_info), VAL);
cmdq_sync_cmd_handler(cmdq, ci, errcode);
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c
index 6f2cf56..c7c75b7 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c
@@ -297,6 +297,7 @@ static int set_hw_ioctxt(struct hinic_hwdev *hwdev, unsigned int rq_depth,
}
hw_ioctxt.func_idx = HINIC_HWIF_FUNC_IDX(hwif);
+ hw_ioctxt.ppf_idx = HINIC_HWIF_PPF_IDX(hwif);
hw_ioctxt.set_cmdq_depth = HW_IOCTXT_SET_CMDQ_DEPTH_DEFAULT;
hw_ioctxt.cmdq_depth = 0;
@@ -359,50 +360,6 @@ static int wait_for_db_state(struct hinic_hwdev *hwdev)
return -EFAULT;
}
-static int wait_for_io_stopped(struct hinic_hwdev *hwdev)
-{
- struct hinic_cmd_io_status cmd_io_status;
- struct hinic_hwif *hwif = hwdev->hwif;
- struct pci_dev *pdev = hwif->pdev;
- struct hinic_pfhwdev *pfhwdev;
- unsigned long end;
- u16 out_size;
- int err;
-
- if (!HINIC_IS_PF(hwif) && !HINIC_IS_PPF(hwif)) {
- dev_err(&pdev->dev, "Unsupported PCI Function type\n");
- return -EINVAL;
- }
-
- pfhwdev = container_of(hwdev, struct hinic_pfhwdev, hwdev);
-
- cmd_io_status.func_idx = HINIC_HWIF_FUNC_IDX(hwif);
-
- end = jiffies + msecs_to_jiffies(IO_STATUS_TIMEOUT);
- do {
- err = hinic_msg_to_mgmt(&pfhwdev->pf_to_mgmt, HINIC_MOD_COMM,
- HINIC_COMM_CMD_IO_STATUS_GET,
- &cmd_io_status, sizeof(cmd_io_status),
- &cmd_io_status, &out_size,
- HINIC_MGMT_MSG_SYNC);
- if ((err) || (out_size != sizeof(cmd_io_status))) {
- dev_err(&pdev->dev, "Failed to get IO status, ret = %d\n",
- err);
- return err;
- }
-
- if (cmd_io_status.status == IO_STOPPED) {
- dev_info(&pdev->dev, "IO stopped\n");
- return 0;
- }
-
- msleep(20);
- } while (time_before(jiffies, end));
-
- dev_err(&pdev->dev, "Wait for IO stopped - Timeout\n");
- return -ETIMEDOUT;
-}
-
/**
* clear_io_resource - set the IO resources as not active in the NIC
* @hwdev: the NIC HW device
@@ -422,11 +379,8 @@ static int clear_io_resources(struct hinic_hwdev *hwdev)
return -EINVAL;
}
- err = wait_for_io_stopped(hwdev);
- if (err) {
- dev_err(&pdev->dev, "IO has not stopped yet\n");
- return err;
- }
+ /* sleep 100ms to wait for firmware stopping I/O */
+ msleep(100);
cmd_clear_io_res.func_idx = HINIC_HWIF_FUNC_IDX(hwif);
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h
index b069045..66fd234 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h
@@ -151,8 +151,8 @@ struct hinic_cmd_hw_ioctxt {
u8 lro_en;
u8 rsvd3;
+ u8 ppf_idx;
u8 rsvd4;
- u8 rsvd5;
u16 rq_depth;
u16 rx_buf_sz_idx;
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_eqs.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_eqs.c
index 79243b6..c0b6bcb 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_eqs.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_eqs.c
@@ -188,7 +188,7 @@ static u8 eq_cons_idx_checksum_set(u32 val)
* eq_update_ci - update the HW cons idx of event queue
* @eq: the event queue to update the cons idx for
**/
-static void eq_update_ci(struct hinic_eq *eq)
+static void eq_update_ci(struct hinic_eq *eq, u32 arm_state)
{
u32 val, addr = EQ_CONS_IDX_REG_ADDR(eq);
@@ -202,7 +202,7 @@ static void eq_update_ci(struct hinic_eq *eq)
val |= HINIC_EQ_CI_SET(eq->cons_idx, IDX) |
HINIC_EQ_CI_SET(eq->wrapped, WRAPPED) |
- HINIC_EQ_CI_SET(EQ_ARMED, INT_ARMED);
+ HINIC_EQ_CI_SET(arm_state, INT_ARMED);
val |= HINIC_EQ_CI_SET(eq_cons_idx_checksum_set(val), XOR_CHKSUM);
@@ -235,6 +235,8 @@ static void aeq_irq_handler(struct hinic_eq *eq)
if (HINIC_EQ_ELEM_DESC_GET(aeqe_desc, WRAPPED) == eq->wrapped)
break;
+ dma_rmb();
+
event = HINIC_EQ_ELEM_DESC_GET(aeqe_desc, TYPE);
if (event >= HINIC_MAX_AEQ_EVENTS) {
dev_err(&pdev->dev, "Unknown AEQ Event %d\n", event);
@@ -347,7 +349,7 @@ static void eq_irq_handler(void *data)
else if (eq->type == HINIC_CEQ)
ceq_irq_handler(eq);
- eq_update_ci(eq);
+ eq_update_ci(eq, EQ_ARMED);
}
/**
@@ -702,7 +704,7 @@ static int init_eq(struct hinic_eq *eq, struct hinic_hwif *hwif,
}
set_eq_ctrls(eq);
- eq_update_ci(eq);
+ eq_update_ci(eq, EQ_ARMED);
err = alloc_eq_pages(eq);
if (err) {
@@ -752,18 +754,28 @@ static int init_eq(struct hinic_eq *eq, struct hinic_hwif *hwif,
**/
static void remove_eq(struct hinic_eq *eq)
{
- struct msix_entry *entry = &eq->msix_entry;
-
- free_irq(entry->vector, eq);
+ hinic_set_msix_state(eq->hwif, eq->msix_entry.entry,
+ HINIC_MSIX_DISABLE);
+ free_irq(eq->msix_entry.vector, eq);
if (eq->type == HINIC_AEQ) {
struct hinic_eq_work *aeq_work = &eq->aeq_work;
cancel_work_sync(&aeq_work->work);
+ /* clear aeq_len to avoid hw access host memory */
+ hinic_hwif_write_reg(eq->hwif,
+ HINIC_CSR_AEQ_CTRL_1_ADDR(eq->q_id), 0);
} else if (eq->type == HINIC_CEQ) {
tasklet_kill(&eq->ceq_tasklet);
+ /* clear ceq_len to avoid hw access host memory */
+ hinic_hwif_write_reg(eq->hwif,
+ HINIC_CSR_CEQ_CTRL_1_ADDR(eq->q_id), 0);
}
+ /* update cons_idx to avoid invalid interrupt */
+ eq->cons_idx = hinic_hwif_read_reg(eq->hwif, EQ_PROD_IDX_REG_ADDR(eq));
+ eq_update_ci(eq, EQ_NOT_ARMED);
+
free_eq_pages(eq);
}
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_if.h b/drivers/net/ethernet/huawei/hinic/hinic_hw_if.h
index 5177945..c7bb9ce 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_if.h
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_if.h
@@ -137,6 +137,7 @@
#define HINIC_HWIF_FUNC_IDX(hwif) ((hwif)->attr.func_idx)
#define HINIC_HWIF_PCI_INTF(hwif) ((hwif)->attr.pci_intf_idx)
#define HINIC_HWIF_PF_IDX(hwif) ((hwif)->attr.pf_idx)
+#define HINIC_HWIF_PPF_IDX(hwif) ((hwif)->attr.ppf_idx)
#define HINIC_FUNC_TYPE(hwif) ((hwif)->attr.func_type)
#define HINIC_IS_PF(hwif) (HINIC_FUNC_TYPE(hwif) == HINIC_PF)
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.c
index c1a6be6..8995e32 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.c
@@ -43,7 +43,7 @@
#define MSG_NOT_RESP 0xFFFF
-#define MGMT_MSG_TIMEOUT 1000
+#define MGMT_MSG_TIMEOUT 5000
#define mgmt_to_pfhwdev(pf_mgmt) \
container_of(pf_mgmt, struct hinic_pfhwdev, pf_to_mgmt)
@@ -267,7 +267,8 @@ static int msg_to_mgmt_sync(struct hinic_pf_to_mgmt *pf_to_mgmt,
goto unlock_sync_msg;
}
- if (!wait_for_completion_timeout(recv_done, MGMT_MSG_TIMEOUT)) {
+ if (!wait_for_completion_timeout(recv_done,
+ msecs_to_jiffies(MGMT_MSG_TIMEOUT))) {
dev_err(&pdev->dev, "MGMT timeout, MSG id = %d\n", msg_id);
err = -ETIMEDOUT;
goto unlock_sync_msg;
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.h b/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.h
index f4a339b..79091e1 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.h
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.h
@@ -94,6 +94,7 @@ struct hinic_rq {
struct hinic_wq *wq;
+ struct cpumask affinity_mask;
u32 irq;
u16 msix_entry;
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_main.c b/drivers/net/ethernet/huawei/hinic/hinic_main.c
index 02a14f5..1356097 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_main.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_main.c
@@ -356,7 +356,8 @@ static void hinic_enable_rss(struct hinic_dev *nic_dev)
if (!num_cpus)
num_cpus = num_online_cpus();
- nic_dev->num_qps = min_t(u16, nic_dev->max_qps, num_cpus);
+ nic_dev->num_qps = hinic_hwdev_num_qps(hwdev);
+ nic_dev->num_qps = min_t(u16, nic_dev->num_qps, num_cpus);
nic_dev->rss_limit = nic_dev->num_qps;
nic_dev->num_rss = nic_dev->num_qps;
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_rx.c b/drivers/net/ethernet/huawei/hinic/hinic_rx.c
index 56ea6d6..815649e 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_rx.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_rx.c
@@ -350,6 +350,9 @@ static int rxq_recv(struct hinic_rxq *rxq, int budget)
if (!rq_wqe)
break;
+ /* make sure we read rx_done before packet length */
+ dma_rmb();
+
cqe = rq->cqe[ci];
status = be32_to_cpu(cqe->status);
hinic_rq_get_sge(rxq->rq, rq_wqe, ci, &sge);
@@ -475,7 +478,6 @@ static int rx_request_irq(struct hinic_rxq *rxq)
struct hinic_hwdev *hwdev = nic_dev->hwdev;
struct hinic_rq *rq = rxq->rq;
struct hinic_qp *qp;
- struct cpumask mask;
int err;
rx_add_napi(rxq);
@@ -492,8 +494,8 @@ static int rx_request_irq(struct hinic_rxq *rxq)
}
qp = container_of(rq, struct hinic_qp, rq);
- cpumask_set_cpu(qp->q_id % num_online_cpus(), &mask);
- return irq_set_affinity_hint(rq->irq, &mask);
+ cpumask_set_cpu(qp->q_id % num_online_cpus(), &rq->affinity_mask);
+ return irq_set_affinity_hint(rq->irq, &rq->affinity_mask);
}
static void rx_free_irq(struct hinic_rxq *rxq)
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_tx.c b/drivers/net/ethernet/huawei/hinic/hinic_tx.c
index 0e13d1c..3650164 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_tx.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_tx.c
@@ -45,7 +45,7 @@
#define HW_CONS_IDX(sq) be16_to_cpu(*(u16 *)((sq)->hw_ci_addr))
-#define MIN_SKB_LEN 17
+#define MIN_SKB_LEN 32
#define MAX_PAYLOAD_OFFSET 221
#define TRANSPORT_OFFSET(l4_hdr, skb) ((u32)((l4_hdr) - (skb)->data))
@@ -622,6 +622,8 @@ static int free_tx_poll(struct napi_struct *napi, int budget)
do {
hw_ci = HW_CONS_IDX(sq) & wq->mask;
+ dma_rmb();
+
/* Reading a WQEBB to get real WQE size and consumer index. */
sq_wqe = hinic_sq_read_wqebb(sq, &skb, &wqe_size, &sw_ci);
if ((!sq_wqe) ||
diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index c75239d..4bd3324 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -2142,6 +2142,8 @@ static void __ibmvnic_reset(struct work_struct *work)
{
struct ibmvnic_rwi *rwi;
struct ibmvnic_adapter *adapter;
+ bool saved_state = false;
+ unsigned long flags;
u32 reset_state;
int rc = 0;
@@ -2153,17 +2155,25 @@ static void __ibmvnic_reset(struct work_struct *work)
return;
}
- reset_state = adapter->state;
-
rwi = get_next_rwi(adapter);
while (rwi) {
+ spin_lock_irqsave(&adapter->state_lock, flags);
+
if (adapter->state == VNIC_REMOVING ||
adapter->state == VNIC_REMOVED) {
+ spin_unlock_irqrestore(&adapter->state_lock, flags);
kfree(rwi);
rc = EBUSY;
break;
}
+ if (!saved_state) {
+ reset_state = adapter->state;
+ adapter->state = VNIC_RESETTING;
+ saved_state = true;
+ }
+ spin_unlock_irqrestore(&adapter->state_lock, flags);
+
if (rwi->reset_reason == VNIC_RESET_CHANGE_PARAM) {
/* CHANGE_PARAM requestor holds rtnl_lock */
rc = do_change_param_reset(adapter, rwi, reset_state);
@@ -5091,6 +5101,7 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id)
__ibmvnic_delayed_reset);
INIT_LIST_HEAD(&adapter->rwi_list);
spin_lock_init(&adapter->rwi_lock);
+ spin_lock_init(&adapter->state_lock);
mutex_init(&adapter->fw_lock);
init_completion(&adapter->init_done);
init_completion(&adapter->fw_done);
@@ -5163,8 +5174,17 @@ static int ibmvnic_remove(struct vio_dev *dev)
{
struct net_device *netdev = dev_get_drvdata(&dev->dev);
struct ibmvnic_adapter *adapter = netdev_priv(netdev);
+ unsigned long flags;
+
+ spin_lock_irqsave(&adapter->state_lock, flags);
+ if (adapter->state == VNIC_RESETTING) {
+ spin_unlock_irqrestore(&adapter->state_lock, flags);
+ return -EBUSY;
+ }
adapter->state = VNIC_REMOVING;
+ spin_unlock_irqrestore(&adapter->state_lock, flags);
+
rtnl_lock();
unregister_netdevice(netdev);
diff --git a/drivers/net/ethernet/ibm/ibmvnic.h b/drivers/net/ethernet/ibm/ibmvnic.h
index 60eccaf..f8416e1 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.h
+++ b/drivers/net/ethernet/ibm/ibmvnic.h
@@ -941,7 +941,8 @@ enum vnic_state {VNIC_PROBING = 1,
VNIC_CLOSING,
VNIC_CLOSED,
VNIC_REMOVING,
- VNIC_REMOVED};
+ VNIC_REMOVED,
+ VNIC_RESETTING};
enum ibmvnic_reset_reason {VNIC_RESET_FAILOVER = 1,
VNIC_RESET_MOBILITY,
@@ -1090,4 +1091,7 @@ struct ibmvnic_adapter {
struct ibmvnic_tunables desired;
struct ibmvnic_tunables fallback;
+
+ /* Used for serializatin of state field */
+ spinlock_t state_lock;
};
diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
index 69523ac..56b9e44 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
@@ -2362,7 +2362,7 @@ static int i40e_vc_enable_queues_msg(struct i40e_vf *vf, u8 *msg)
goto error_param;
}
- if (i40e_vc_validate_vqs_bitmaps(vqs)) {
+ if (!i40e_vc_validate_vqs_bitmaps(vqs)) {
aq_ret = I40E_ERR_PARAM;
goto error_param;
}
@@ -2424,7 +2424,7 @@ static int i40e_vc_disable_queues_msg(struct i40e_vf *vf, u8 *msg)
goto error_param;
}
- if (i40e_vc_validate_vqs_bitmaps(vqs)) {
+ if (!i40e_vc_validate_vqs_bitmaps(vqs)) {
aq_ret = I40E_ERR_PARAM;
goto error_param;
}
diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
index 4459bc5..6873998 100644
--- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
+++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
@@ -1660,6 +1660,7 @@ struct ice_aqc_get_pkg_info_resp {
__le32 count;
struct ice_aqc_get_pkg_info pkg_info[1];
};
+
/**
* struct ice_aq_desc - Admin Queue (AQ) descriptor
* @flags: ICE_AQ_FLAG_* flags
diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c
index d8e975c..81885ef 100644
--- a/drivers/net/ethernet/intel/ice/ice_base.c
+++ b/drivers/net/ethernet/intel/ice/ice_base.c
@@ -324,7 +324,7 @@ int ice_setup_rx_ctx(struct ice_ring *ring)
if (err)
return err;
- dev_info(&vsi->back->pdev->dev, "Registered XDP mem model MEM_TYPE_ZERO_COPY on Rx ring %d\n",
+ dev_info(ice_pf_to_dev(vsi->back), "Registered XDP mem model MEM_TYPE_ZERO_COPY on Rx ring %d\n",
ring->q_index);
} else {
ring->zca.free = NULL;
@@ -405,8 +405,7 @@ int ice_setup_rx_ctx(struct ice_ring *ring)
/* Absolute queue number out of 2K needs to be passed */
err = ice_write_rxq_ctx(hw, &rlan_ctx, pf_q);
if (err) {
- dev_err(&vsi->back->pdev->dev,
- "Failed to set LAN Rx queue context for absolute Rx queue %d error: %d\n",
+ dev_err(ice_pf_to_dev(vsi->back), "Failed to set LAN Rx queue context for absolute Rx queue %d error: %d\n",
pf_q, err);
return -EIO;
}
@@ -428,8 +427,7 @@ int ice_setup_rx_ctx(struct ice_ring *ring)
ice_alloc_rx_bufs_slow_zc(ring, ICE_DESC_UNUSED(ring)) :
ice_alloc_rx_bufs(ring, ICE_DESC_UNUSED(ring));
if (err)
- dev_info(&vsi->back->pdev->dev,
- "Failed allocate some buffers on %sRx ring %d (pf_q %d)\n",
+ dev_info(ice_pf_to_dev(vsi->back), "Failed allocate some buffers on %sRx ring %d (pf_q %d)\n",
ring->xsk_umem ? "UMEM enabled " : "",
ring->q_index, pf_q);
@@ -490,8 +488,7 @@ int ice_vsi_ctrl_rx_ring(struct ice_vsi *vsi, bool ena, u16 rxq_idx)
/* wait for the change to finish */
ret = ice_pf_rxq_wait(pf, pf_q, ena);
if (ret)
- dev_err(ice_pf_to_dev(pf),
- "VSI idx %d Rx ring %d %sable timeout\n",
+ dev_err(ice_pf_to_dev(pf), "VSI idx %d Rx ring %d %sable timeout\n",
vsi->idx, pf_q, (ena ? "en" : "dis"));
return ret;
@@ -506,20 +503,15 @@ int ice_vsi_ctrl_rx_ring(struct ice_vsi *vsi, bool ena, u16 rxq_idx)
*/
int ice_vsi_alloc_q_vectors(struct ice_vsi *vsi)
{
- struct ice_pf *pf = vsi->back;
- int v_idx = 0, num_q_vectors;
- struct device *dev;
- int err;
+ struct device *dev = ice_pf_to_dev(vsi->back);
+ int v_idx, err;
- dev = ice_pf_to_dev(pf);
if (vsi->q_vectors[0]) {
dev_dbg(dev, "VSI %d has existing q_vectors\n", vsi->vsi_num);
return -EEXIST;
}
- num_q_vectors = vsi->num_q_vectors;
-
- for (v_idx = 0; v_idx < num_q_vectors; v_idx++) {
+ for (v_idx = 0; v_idx < vsi->num_q_vectors; v_idx++) {
err = ice_vsi_alloc_q_vector(vsi, v_idx);
if (err)
goto err_out;
@@ -648,8 +640,7 @@ ice_vsi_cfg_txq(struct ice_vsi *vsi, struct ice_ring *ring,
status = ice_ena_vsi_txq(vsi->port_info, vsi->idx, tc, ring->q_handle,
1, qg_buf, buf_len, NULL);
if (status) {
- dev_err(ice_pf_to_dev(pf),
- "Failed to set LAN Tx queue context, error: %d\n",
+ dev_err(ice_pf_to_dev(pf), "Failed to set LAN Tx queue context, error: %d\n",
status);
return -ENODEV;
}
@@ -815,14 +806,12 @@ ice_vsi_stop_tx_ring(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src,
* queues at the hardware level anyway.
*/
if (status == ICE_ERR_RESET_ONGOING) {
- dev_dbg(&vsi->back->pdev->dev,
- "Reset in progress. LAN Tx queues already disabled\n");
+ dev_dbg(ice_pf_to_dev(vsi->back), "Reset in progress. LAN Tx queues already disabled\n");
} else if (status == ICE_ERR_DOES_NOT_EXIST) {
- dev_dbg(&vsi->back->pdev->dev,
- "LAN Tx queues do not exist, nothing to disable\n");
+ dev_dbg(ice_pf_to_dev(vsi->back), "LAN Tx queues do not exist, nothing to disable\n");
} else if (status) {
- dev_err(&vsi->back->pdev->dev,
- "Failed to disable LAN Tx queues, error: %d\n", status);
+ dev_err(ice_pf_to_dev(vsi->back), "Failed to disable LAN Tx queues, error: %d\n",
+ status);
return -ENODEV;
}
diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c
index 0207e28..04d5db0 100644
--- a/drivers/net/ethernet/intel/ice/ice_common.c
+++ b/drivers/net/ethernet/intel/ice/ice_common.c
@@ -25,20 +25,6 @@ static enum ice_status ice_set_mac_type(struct ice_hw *hw)
}
/**
- * ice_dev_onetime_setup - Temporary HW/FW workarounds
- * @hw: pointer to the HW structure
- *
- * This function provides temporary workarounds for certain issues
- * that are expected to be fixed in the HW/FW.
- */
-void ice_dev_onetime_setup(struct ice_hw *hw)
-{
-#define MBX_PF_VT_PFALLOC 0x00231E80
- /* set VFs per PF */
- wr32(hw, MBX_PF_VT_PFALLOC, rd32(hw, PF_VT_PFALLOC_HIF));
-}
-
-/**
* ice_clear_pf_cfg - Clear PF configuration
* @hw: pointer to the hardware structure
*
@@ -602,10 +588,10 @@ void ice_output_fw_log(struct ice_hw *hw, struct ice_aq_desc *desc, void *buf)
}
/**
- * ice_get_itr_intrl_gran - determine int/intrl granularity
+ * ice_get_itr_intrl_gran
* @hw: pointer to the HW struct
*
- * Determines the ITR/intrl granularities based on the maximum aggregate
+ * Determines the ITR/INTRL granularities based on the maximum aggregate
* bandwidth according to the device's configuration during power-on.
*/
static void ice_get_itr_intrl_gran(struct ice_hw *hw)
@@ -763,8 +749,6 @@ enum ice_status ice_init_hw(struct ice_hw *hw)
if (status)
goto err_unroll_sched;
- ice_dev_onetime_setup(hw);
-
/* Get MAC information */
/* A single port can report up to two (LAN and WoL) addresses */
mac_buf = devm_kcalloc(ice_hw_to_dev(hw), 2,
@@ -834,7 +818,7 @@ void ice_deinit_hw(struct ice_hw *hw)
*/
enum ice_status ice_check_reset(struct ice_hw *hw)
{
- u32 cnt, reg = 0, grst_delay;
+ u32 cnt, reg = 0, grst_delay, uld_mask;
/* Poll for Device Active state in case a recent CORER, GLOBR,
* or EMPR has occurred. The grst delay value is in 100ms units.
@@ -856,13 +840,20 @@ enum ice_status ice_check_reset(struct ice_hw *hw)
return ICE_ERR_RESET_FAILED;
}
-#define ICE_RESET_DONE_MASK (GLNVM_ULD_CORER_DONE_M | \
- GLNVM_ULD_GLOBR_DONE_M)
+#define ICE_RESET_DONE_MASK (GLNVM_ULD_PCIER_DONE_M |\
+ GLNVM_ULD_PCIER_DONE_1_M |\
+ GLNVM_ULD_CORER_DONE_M |\
+ GLNVM_ULD_GLOBR_DONE_M |\
+ GLNVM_ULD_POR_DONE_M |\
+ GLNVM_ULD_POR_DONE_1_M |\
+ GLNVM_ULD_PCIER_DONE_2_M)
+
+ uld_mask = ICE_RESET_DONE_MASK;
/* Device is Active; check Global Reset processes are done */
for (cnt = 0; cnt < ICE_PF_RESET_WAIT_COUNT; cnt++) {
- reg = rd32(hw, GLNVM_ULD) & ICE_RESET_DONE_MASK;
- if (reg == ICE_RESET_DONE_MASK) {
+ reg = rd32(hw, GLNVM_ULD) & uld_mask;
+ if (reg == uld_mask) {
ice_debug(hw, ICE_DBG_INIT,
"Global reset processes done. %d\n", cnt);
break;
diff --git a/drivers/net/ethernet/intel/ice/ice_common.h b/drivers/net/ethernet/intel/ice/ice_common.h
index b5c013f..f9fc005 100644
--- a/drivers/net/ethernet/intel/ice/ice_common.h
+++ b/drivers/net/ethernet/intel/ice/ice_common.h
@@ -54,8 +54,6 @@ enum ice_status ice_get_caps(struct ice_hw *hw);
void ice_set_safe_mode_caps(struct ice_hw *hw);
-void ice_dev_onetime_setup(struct ice_hw *hw);
-
enum ice_status
ice_write_rxq_ctx(struct ice_hw *hw, struct ice_rlan_ctx *rlan_ctx,
u32 rxq_index);
diff --git a/drivers/net/ethernet/intel/ice/ice_dcb.c b/drivers/net/ethernet/intel/ice/ice_dcb.c
index 713e8a8..adb8dab 100644
--- a/drivers/net/ethernet/intel/ice/ice_dcb.c
+++ b/drivers/net/ethernet/intel/ice/ice_dcb.c
@@ -1323,13 +1323,13 @@ enum ice_status ice_set_dcb_cfg(struct ice_port_info *pi)
}
/**
- * ice_aq_query_port_ets - query port ets configuration
+ * ice_aq_query_port_ets - query port ETS configuration
* @pi: port information structure
* @buf: pointer to buffer
* @buf_size: buffer size in bytes
* @cd: pointer to command details structure or NULL
*
- * query current port ets configuration
+ * query current port ETS configuration
*/
static enum ice_status
ice_aq_query_port_ets(struct ice_port_info *pi,
@@ -1416,13 +1416,13 @@ ice_update_port_tc_tree_cfg(struct ice_port_info *pi,
}
/**
- * ice_query_port_ets - query port ets configuration
+ * ice_query_port_ets - query port ETS configuration
* @pi: port information structure
* @buf: pointer to buffer
* @buf_size: buffer size in bytes
* @cd: pointer to command details structure or NULL
*
- * query current port ets configuration and update the
+ * query current port ETS configuration and update the
* SW DB with the TC changes
*/
enum ice_status
diff --git a/drivers/net/ethernet/intel/ice/ice_dcb_lib.c b/drivers/net/ethernet/intel/ice/ice_dcb_lib.c
index 0664e5b..7108fb4 100644
--- a/drivers/net/ethernet/intel/ice/ice_dcb_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_dcb_lib.c
@@ -315,9 +315,9 @@ ice_dcb_need_recfg(struct ice_pf *pf, struct ice_dcbx_cfg *old_cfg,
*/
void ice_dcb_rebuild(struct ice_pf *pf)
{
- struct ice_dcbx_cfg *local_dcbx_cfg, *desired_dcbx_cfg, *prev_cfg;
struct ice_aqc_port_ets_elem buf = { 0 };
struct device *dev = ice_pf_to_dev(pf);
+ struct ice_dcbx_cfg *err_cfg;
enum ice_status ret;
ret = ice_query_port_ets(pf->hw.port_info, &buf, sizeof(buf), NULL);
@@ -330,53 +330,25 @@ void ice_dcb_rebuild(struct ice_pf *pf)
if (!test_bit(ICE_FLAG_DCB_ENA, pf->flags))
return;
- local_dcbx_cfg = &pf->hw.port_info->local_dcbx_cfg;
- desired_dcbx_cfg = &pf->hw.port_info->desired_dcbx_cfg;
+ mutex_lock(&pf->tc_mutex);
- /* Save current willing state and force FW to unwilling */
- local_dcbx_cfg->etscfg.willing = 0x0;
- local_dcbx_cfg->pfc.willing = 0x0;
- local_dcbx_cfg->app_mode = ICE_DCBX_APPS_NON_WILLING;
+ if (!pf->hw.port_info->is_sw_lldp)
+ ice_cfg_etsrec_defaults(pf->hw.port_info);
- ice_cfg_etsrec_defaults(pf->hw.port_info);
ret = ice_set_dcb_cfg(pf->hw.port_info);
if (ret) {
- dev_err(dev, "Failed to set DCB to unwilling\n");
+ dev_err(dev, "Failed to set DCB config in rebuild\n");
goto dcb_error;
}
- /* Retrieve DCB config and ensure same as current in SW */
- prev_cfg = kmemdup(local_dcbx_cfg, sizeof(*prev_cfg), GFP_KERNEL);
- if (!prev_cfg)
- goto dcb_error;
-
- ice_init_dcb(&pf->hw, true);
- if (pf->hw.port_info->dcbx_status == ICE_DCBX_STATUS_DIS)
- pf->hw.port_info->is_sw_lldp = true;
- else
- pf->hw.port_info->is_sw_lldp = false;
-
- if (ice_dcb_need_recfg(pf, prev_cfg, local_dcbx_cfg)) {
- /* difference in cfg detected - disable DCB till next MIB */
- dev_err(dev, "Set local MIB not accurate\n");
- kfree(prev_cfg);
- goto dcb_error;
+ if (!pf->hw.port_info->is_sw_lldp) {
+ ret = ice_cfg_lldp_mib_change(&pf->hw, true);
+ if (ret && !pf->hw.port_info->is_sw_lldp) {
+ dev_err(dev, "Failed to register for MIB changes\n");
+ goto dcb_error;
+ }
}
- /* fetched config congruent to previous configuration */
- kfree(prev_cfg);
-
- /* Set the local desired config */
- if (local_dcbx_cfg->dcbx_mode == ICE_DCBX_MODE_CEE)
- memcpy(local_dcbx_cfg, desired_dcbx_cfg,
- sizeof(*local_dcbx_cfg));
-
- ice_cfg_etsrec_defaults(pf->hw.port_info);
- ret = ice_set_dcb_cfg(pf->hw.port_info);
- if (ret) {
- dev_err(dev, "Failed to set desired config\n");
- goto dcb_error;
- }
dev_info(dev, "DCB restored after reset\n");
ret = ice_query_port_ets(pf->hw.port_info, &buf, sizeof(buf), NULL);
if (ret) {
@@ -384,26 +356,32 @@ void ice_dcb_rebuild(struct ice_pf *pf)
goto dcb_error;
}
+ mutex_unlock(&pf->tc_mutex);
+
return;
dcb_error:
dev_err(dev, "Disabling DCB until new settings occur\n");
- prev_cfg = kzalloc(sizeof(*prev_cfg), GFP_KERNEL);
- if (!prev_cfg)
+ err_cfg = kzalloc(sizeof(*err_cfg), GFP_KERNEL);
+ if (!err_cfg) {
+ mutex_unlock(&pf->tc_mutex);
return;
+ }
- prev_cfg->etscfg.willing = true;
- prev_cfg->etscfg.tcbwtable[0] = ICE_TC_MAX_BW;
- prev_cfg->etscfg.tsatable[0] = ICE_IEEE_TSA_ETS;
- memcpy(&prev_cfg->etsrec, &prev_cfg->etscfg, sizeof(prev_cfg->etsrec));
+ err_cfg->etscfg.willing = true;
+ err_cfg->etscfg.tcbwtable[0] = ICE_TC_MAX_BW;
+ err_cfg->etscfg.tsatable[0] = ICE_IEEE_TSA_ETS;
+ memcpy(&err_cfg->etsrec, &err_cfg->etscfg, sizeof(err_cfg->etsrec));
/* Coverity warns the return code of ice_pf_dcb_cfg() is not checked
* here as is done for other calls to that function. That check is
* not necessary since this is in this function's error cleanup path.
* Suppress the Coverity warning with the following comment...
*/
/* coverity[check_return] */
- ice_pf_dcb_cfg(pf, prev_cfg, false);
- kfree(prev_cfg);
+ ice_pf_dcb_cfg(pf, err_cfg, false);
+ kfree(err_cfg);
+
+ mutex_unlock(&pf->tc_mutex);
}
/**
@@ -434,9 +412,9 @@ static int ice_dcb_init_cfg(struct ice_pf *pf, bool locked)
}
/**
- * ice_dcb_sw_default_config - Apply a default DCB config
+ * ice_dcb_sw_dflt_cfg - Apply a default DCB config
* @pf: PF to apply config to
- * @ets_willing: configure ets willing
+ * @ets_willing: configure ETS willing
* @locked: was this function called with RTNL held
*/
static int ice_dcb_sw_dflt_cfg(struct ice_pf *pf, bool ets_willing, bool locked)
@@ -599,8 +577,7 @@ int ice_init_pf_dcb(struct ice_pf *pf, bool locked)
goto dcb_init_err;
}
- dev_info(dev,
- "DCB is enabled in the hardware, max number of TCs supported on this port are %d\n",
+ dev_info(dev, "DCB is enabled in the hardware, max number of TCs supported on this port are %d\n",
pf->hw.func_caps.common_cap.maxtc);
if (err) {
struct ice_vsi *pf_vsi;
@@ -610,8 +587,8 @@ int ice_init_pf_dcb(struct ice_pf *pf, bool locked)
clear_bit(ICE_FLAG_FW_LLDP_AGENT, pf->flags);
err = ice_dcb_sw_dflt_cfg(pf, true, locked);
if (err) {
- dev_err(dev,
- "Failed to set local DCB config %d\n", err);
+ dev_err(dev, "Failed to set local DCB config %d\n",
+ err);
err = -EIO;
goto dcb_init_err;
}
@@ -777,6 +754,8 @@ ice_dcb_process_lldp_set_mib_change(struct ice_pf *pf,
}
}
+ mutex_lock(&pf->tc_mutex);
+
/* store the old configuration */
tmp_dcbx_cfg = pf->hw.port_info->local_dcbx_cfg;
@@ -787,20 +766,20 @@ ice_dcb_process_lldp_set_mib_change(struct ice_pf *pf,
ret = ice_get_dcb_cfg(pf->hw.port_info);
if (ret) {
dev_err(dev, "Failed to get DCB config\n");
- return;
+ goto out;
}
/* No change detected in DCBX configs */
if (!memcmp(&tmp_dcbx_cfg, &pi->local_dcbx_cfg, sizeof(tmp_dcbx_cfg))) {
dev_dbg(dev, "No change detected in DCBX configuration.\n");
- return;
+ goto out;
}
need_reconfig = ice_dcb_need_recfg(pf, &tmp_dcbx_cfg,
&pi->local_dcbx_cfg);
ice_dcbnl_flush_apps(pf, &tmp_dcbx_cfg, &pi->local_dcbx_cfg);
if (!need_reconfig)
- return;
+ goto out;
/* Enable DCB tagging only when more than one TC */
if (ice_dcb_get_num_tc(&pi->local_dcbx_cfg) > 1) {
@@ -814,7 +793,7 @@ ice_dcb_process_lldp_set_mib_change(struct ice_pf *pf,
pf_vsi = ice_get_main_vsi(pf);
if (!pf_vsi) {
dev_dbg(dev, "PF VSI doesn't exist\n");
- return;
+ goto out;
}
rtnl_lock();
@@ -823,13 +802,15 @@ ice_dcb_process_lldp_set_mib_change(struct ice_pf *pf,
ret = ice_query_port_ets(pf->hw.port_info, &buf, sizeof(buf), NULL);
if (ret) {
dev_err(dev, "Query Port ETS failed\n");
- rtnl_unlock();
- return;
+ goto unlock_rtnl;
}
/* changes in configuration update VSI */
ice_pf_dcb_recfg(pf);
ice_ena_vsi(pf_vsi, true);
+unlock_rtnl:
rtnl_unlock();
+out:
+ mutex_unlock(&pf->tc_mutex);
}
diff --git a/drivers/net/ethernet/intel/ice/ice_dcb_nl.c b/drivers/net/ethernet/intel/ice/ice_dcb_nl.c
index d870c1a..b61aba4 100644
--- a/drivers/net/ethernet/intel/ice/ice_dcb_nl.c
+++ b/drivers/net/ethernet/intel/ice/ice_dcb_nl.c
@@ -297,8 +297,7 @@ ice_dcbnl_get_pfc_cfg(struct net_device *netdev, int prio, u8 *setting)
return;
*setting = (pi->local_dcbx_cfg.pfc.pfcena >> prio) & 0x1;
- dev_dbg(ice_pf_to_dev(pf),
- "Get PFC Config up=%d, setting=%d, pfcenable=0x%x\n",
+ dev_dbg(ice_pf_to_dev(pf), "Get PFC Config up=%d, setting=%d, pfcenable=0x%x\n",
prio, *setting, pi->local_dcbx_cfg.pfc.pfcena);
}
@@ -418,8 +417,8 @@ ice_dcbnl_get_pg_tc_cfg_tx(struct net_device *netdev, int prio,
return;
*pgid = pi->local_dcbx_cfg.etscfg.prio_table[prio];
- dev_dbg(ice_pf_to_dev(pf),
- "Get PG config prio=%d tc=%d\n", prio, *pgid);
+ dev_dbg(ice_pf_to_dev(pf), "Get PG config prio=%d tc=%d\n", prio,
+ *pgid);
}
/**
@@ -713,13 +712,13 @@ static int ice_dcbnl_delapp(struct net_device *netdev, struct dcb_app *app)
return -EINVAL;
mutex_lock(&pf->tc_mutex);
- ret = dcb_ieee_delapp(netdev, app);
- if (ret)
- goto delapp_out;
-
old_cfg = &pf->hw.port_info->local_dcbx_cfg;
- if (old_cfg->numapps == 1)
+ if (old_cfg->numapps <= 1)
+ goto delapp_out;
+
+ ret = dcb_ieee_delapp(netdev, app);
+ if (ret)
goto delapp_out;
new_cfg = &pf->hw.port_info->desired_dcbx_cfg;
@@ -882,8 +881,7 @@ ice_dcbnl_vsi_del_app(struct ice_vsi *vsi,
sapp.protocol = app->prot_id;
sapp.priority = app->priority;
err = ice_dcbnl_delapp(vsi->netdev, &sapp);
- dev_dbg(&vsi->back->pdev->dev,
- "Deleting app for VSI idx=%d err=%d sel=%d proto=0x%x, prio=%d\n",
+ dev_dbg(ice_pf_to_dev(vsi->back), "Deleting app for VSI idx=%d err=%d sel=%d proto=0x%x, prio=%d\n",
vsi->idx, err, app->selector, app->prot_id, app->priority);
}
diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c
index 90c6a3c..77c412a 100644
--- a/drivers/net/ethernet/intel/ice/ice_ethtool.c
+++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c
@@ -166,13 +166,24 @@ static void
ice_get_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *drvinfo)
{
struct ice_netdev_priv *np = netdev_priv(netdev);
+ u8 oem_ver, oem_patch, nvm_ver_hi, nvm_ver_lo;
struct ice_vsi *vsi = np->vsi;
struct ice_pf *pf = vsi->back;
+ struct ice_hw *hw = &pf->hw;
+ u16 oem_build;
strlcpy(drvinfo->driver, KBUILD_MODNAME, sizeof(drvinfo->driver));
strlcpy(drvinfo->version, ice_drv_ver, sizeof(drvinfo->version));
- strlcpy(drvinfo->fw_version, ice_nvm_version_str(&pf->hw),
- sizeof(drvinfo->fw_version));
+
+ /* Display NVM version (from which the firmware version can be
+ * determined) which contains more pertinent information.
+ */
+ ice_get_nvm_version(hw, &oem_ver, &oem_build, &oem_patch,
+ &nvm_ver_hi, &nvm_ver_lo);
+ snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version),
+ "%x.%02x 0x%x %d.%d.%d", nvm_ver_hi, nvm_ver_lo,
+ hw->nvm.eetrack, oem_ver, oem_build, oem_patch);
+
strlcpy(drvinfo->bus_info, pci_name(pf->pdev),
sizeof(drvinfo->bus_info));
drvinfo->n_priv_flags = ICE_PRIV_FLAG_ARRAY_SIZE;
@@ -363,8 +374,7 @@ static int ice_reg_pattern_test(struct ice_hw *hw, u32 reg, u32 mask)
val = rd32(hw, reg);
if (val == pattern)
continue;
- dev_err(dev,
- "%s: reg pattern test failed - reg 0x%08x pat 0x%08x val 0x%08x\n"
+ dev_err(dev, "%s: reg pattern test failed - reg 0x%08x pat 0x%08x val 0x%08x\n"
, __func__, reg, pattern, val);
return 1;
}
@@ -372,8 +382,7 @@ static int ice_reg_pattern_test(struct ice_hw *hw, u32 reg, u32 mask)
wr32(hw, reg, orig_val);
val = rd32(hw, reg);
if (val != orig_val) {
- dev_err(dev,
- "%s: reg restore test failed - reg 0x%08x orig 0x%08x val 0x%08x\n"
+ dev_err(dev, "%s: reg restore test failed - reg 0x%08x orig 0x%08x val 0x%08x\n"
, __func__, reg, orig_val, val);
return 1;
}
@@ -791,8 +800,7 @@ ice_self_test(struct net_device *netdev, struct ethtool_test *eth_test,
set_bit(__ICE_TESTING, pf->state);
if (ice_active_vfs(pf)) {
- dev_warn(dev,
- "Please take active VFs and Netqueues offline and restart the adapter before running NIC diagnostics\n");
+ dev_warn(dev, "Please take active VFs and Netqueues offline and restart the adapter before running NIC diagnostics\n");
data[ICE_ETH_TEST_REG] = 1;
data[ICE_ETH_TEST_EEPROM] = 1;
data[ICE_ETH_TEST_INTR] = 1;
@@ -1047,7 +1055,7 @@ ice_set_fecparam(struct net_device *netdev, struct ethtool_fecparam *fecparam)
fec = ICE_FEC_NONE;
break;
default:
- dev_warn(&vsi->back->pdev->dev, "Unsupported FEC mode: %d\n",
+ dev_warn(ice_pf_to_dev(vsi->back), "Unsupported FEC mode: %d\n",
fecparam->fec);
return -EINVAL;
}
@@ -1200,8 +1208,7 @@ static int ice_set_priv_flags(struct net_device *netdev, u32 flags)
* events to respond to.
*/
if (status)
- dev_info(dev,
- "Failed to unreg for LLDP events\n");
+ dev_info(dev, "Failed to unreg for LLDP events\n");
/* The AQ call to stop the FW LLDP agent will generate
* an error if the agent is already stopped.
@@ -1256,8 +1263,7 @@ static int ice_set_priv_flags(struct net_device *netdev, u32 flags)
/* Register for MIB change events */
status = ice_cfg_lldp_mib_change(&pf->hw, true);
if (status)
- dev_dbg(dev,
- "Fail to enable MIB change events\n");
+ dev_dbg(dev, "Fail to enable MIB change events\n");
}
}
if (test_bit(ICE_FLAG_LEGACY_RX, change_flags)) {
@@ -1710,291 +1716,13 @@ ice_get_settings_link_up(struct ethtool_link_ksettings *ks,
{
struct ice_netdev_priv *np = netdev_priv(netdev);
struct ice_port_info *pi = np->vsi->port_info;
- struct ethtool_link_ksettings cap_ksettings;
struct ice_link_status *link_info;
struct ice_vsi *vsi = np->vsi;
- bool unrecog_phy_high = false;
- bool unrecog_phy_low = false;
link_info = &vsi->port_info->phy.link_info;
- /* Initialize supported and advertised settings based on PHY settings */
- switch (link_info->phy_type_low) {
- case ICE_PHY_TYPE_LOW_100BASE_TX:
- ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg);
- ethtool_link_ksettings_add_link_mode(ks, supported,
- 100baseT_Full);
- ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg);
- ethtool_link_ksettings_add_link_mode(ks, advertising,
- 100baseT_Full);
- break;
- case ICE_PHY_TYPE_LOW_100M_SGMII:
- ethtool_link_ksettings_add_link_mode(ks, supported,
- 100baseT_Full);
- break;
- case ICE_PHY_TYPE_LOW_1000BASE_T:
- ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg);
- ethtool_link_ksettings_add_link_mode(ks, supported,
- 1000baseT_Full);
- ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg);
- ethtool_link_ksettings_add_link_mode(ks, advertising,
- 1000baseT_Full);
- break;
- case ICE_PHY_TYPE_LOW_1G_SGMII:
- ethtool_link_ksettings_add_link_mode(ks, supported,
- 1000baseT_Full);
- break;
- case ICE_PHY_TYPE_LOW_1000BASE_SX:
- case ICE_PHY_TYPE_LOW_1000BASE_LX:
- ethtool_link_ksettings_add_link_mode(ks, supported,
- 1000baseX_Full);
- break;
- case ICE_PHY_TYPE_LOW_1000BASE_KX:
- ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg);
- ethtool_link_ksettings_add_link_mode(ks, supported,
- 1000baseKX_Full);
- ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg);
- ethtool_link_ksettings_add_link_mode(ks, advertising,
- 1000baseKX_Full);
- break;
- case ICE_PHY_TYPE_LOW_2500BASE_T:
- ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg);
- ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg);
- ethtool_link_ksettings_add_link_mode(ks, supported,
- 2500baseT_Full);
- ethtool_link_ksettings_add_link_mode(ks, advertising,
- 2500baseT_Full);
- break;
- case ICE_PHY_TYPE_LOW_2500BASE_X:
- ethtool_link_ksettings_add_link_mode(ks, supported,
- 2500baseX_Full);
- break;
- case ICE_PHY_TYPE_LOW_2500BASE_KX:
- ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg);
- ethtool_link_ksettings_add_link_mode(ks, supported,
- 2500baseX_Full);
- ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg);
- ethtool_link_ksettings_add_link_mode(ks, advertising,
- 2500baseX_Full);
- break;
- case ICE_PHY_TYPE_LOW_5GBASE_T:
- case ICE_PHY_TYPE_LOW_5GBASE_KR:
- ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg);
- ethtool_link_ksettings_add_link_mode(ks, supported,
- 5000baseT_Full);
- ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg);
- ethtool_link_ksettings_add_link_mode(ks, advertising,
- 5000baseT_Full);
- break;
- case ICE_PHY_TYPE_LOW_10GBASE_T:
- ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg);
- ethtool_link_ksettings_add_link_mode(ks, supported,
- 10000baseT_Full);
- ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg);
- ethtool_link_ksettings_add_link_mode(ks, advertising,
- 10000baseT_Full);
- break;
- case ICE_PHY_TYPE_LOW_10G_SFI_DA:
- case ICE_PHY_TYPE_LOW_10G_SFI_AOC_ACC:
- case ICE_PHY_TYPE_LOW_10G_SFI_C2C:
- ethtool_link_ksettings_add_link_mode(ks, supported,
- 10000baseT_Full);
- break;
- case ICE_PHY_TYPE_LOW_10GBASE_SR:
- ethtool_link_ksettings_add_link_mode(ks, supported,
- 10000baseSR_Full);
- break;
- case ICE_PHY_TYPE_LOW_10GBASE_LR:
- ethtool_link_ksettings_add_link_mode(ks, supported,
- 10000baseLR_Full);
- break;
- case ICE_PHY_TYPE_LOW_10GBASE_KR_CR1:
- ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg);
- ethtool_link_ksettings_add_link_mode(ks, supported,
- 10000baseKR_Full);
- ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg);
- ethtool_link_ksettings_add_link_mode(ks, advertising,
- 10000baseKR_Full);
- break;
- case ICE_PHY_TYPE_LOW_25GBASE_T:
- case ICE_PHY_TYPE_LOW_25GBASE_CR:
- case ICE_PHY_TYPE_LOW_25GBASE_CR_S:
- case ICE_PHY_TYPE_LOW_25GBASE_CR1:
- ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg);
- ethtool_link_ksettings_add_link_mode(ks, supported,
- 25000baseCR_Full);
- ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg);
- ethtool_link_ksettings_add_link_mode(ks, advertising,
- 25000baseCR_Full);
- break;
- case ICE_PHY_TYPE_LOW_25G_AUI_AOC_ACC:
- case ICE_PHY_TYPE_LOW_25G_AUI_C2C:
- ethtool_link_ksettings_add_link_mode(ks, supported,
- 25000baseCR_Full);
- break;
- case ICE_PHY_TYPE_LOW_25GBASE_SR:
- case ICE_PHY_TYPE_LOW_25GBASE_LR:
- ethtool_link_ksettings_add_link_mode(ks, supported,
- 25000baseSR_Full);
- break;
- case ICE_PHY_TYPE_LOW_25GBASE_KR:
- case ICE_PHY_TYPE_LOW_25GBASE_KR1:
- case ICE_PHY_TYPE_LOW_25GBASE_KR_S:
- ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg);
- ethtool_link_ksettings_add_link_mode(ks, supported,
- 25000baseKR_Full);
- ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg);
- ethtool_link_ksettings_add_link_mode(ks, advertising,
- 25000baseKR_Full);
- break;
- case ICE_PHY_TYPE_LOW_40GBASE_CR4:
- ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg);
- ethtool_link_ksettings_add_link_mode(ks, supported,
- 40000baseCR4_Full);
- ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg);
- ethtool_link_ksettings_add_link_mode(ks, advertising,
- 40000baseCR4_Full);
- break;
- case ICE_PHY_TYPE_LOW_40G_XLAUI_AOC_ACC:
- case ICE_PHY_TYPE_LOW_40G_XLAUI:
- ethtool_link_ksettings_add_link_mode(ks, supported,
- 40000baseCR4_Full);
- break;
- case ICE_PHY_TYPE_LOW_40GBASE_SR4:
- ethtool_link_ksettings_add_link_mode(ks, supported,
- 40000baseSR4_Full);
- break;
- case ICE_PHY_TYPE_LOW_40GBASE_LR4:
- ethtool_link_ksettings_add_link_mode(ks, supported,
- 40000baseLR4_Full);
- break;
- case ICE_PHY_TYPE_LOW_40GBASE_KR4:
- ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg);
- ethtool_link_ksettings_add_link_mode(ks, supported,
- 40000baseKR4_Full);
- ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg);
- ethtool_link_ksettings_add_link_mode(ks, advertising,
- 40000baseKR4_Full);
- break;
- case ICE_PHY_TYPE_LOW_50GBASE_CR2:
- case ICE_PHY_TYPE_LOW_50GBASE_CP:
- ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg);
- ethtool_link_ksettings_add_link_mode(ks, supported,
- 50000baseCR2_Full);
- ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg);
- ethtool_link_ksettings_add_link_mode(ks, advertising,
- 50000baseCR2_Full);
- break;
- case ICE_PHY_TYPE_LOW_50G_LAUI2_AOC_ACC:
- case ICE_PHY_TYPE_LOW_50G_LAUI2:
- case ICE_PHY_TYPE_LOW_50G_AUI2_AOC_ACC:
- case ICE_PHY_TYPE_LOW_50G_AUI2:
- case ICE_PHY_TYPE_LOW_50GBASE_SR:
- case ICE_PHY_TYPE_LOW_50G_AUI1_AOC_ACC:
- case ICE_PHY_TYPE_LOW_50G_AUI1:
- ethtool_link_ksettings_add_link_mode(ks, supported,
- 50000baseCR2_Full);
- break;
- case ICE_PHY_TYPE_LOW_50GBASE_KR2:
- case ICE_PHY_TYPE_LOW_50GBASE_KR_PAM4:
- ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg);
- ethtool_link_ksettings_add_link_mode(ks, supported,
- 50000baseKR2_Full);
- ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg);
- ethtool_link_ksettings_add_link_mode(ks, advertising,
- 50000baseKR2_Full);
- break;
- case ICE_PHY_TYPE_LOW_50GBASE_SR2:
- case ICE_PHY_TYPE_LOW_50GBASE_LR2:
- case ICE_PHY_TYPE_LOW_50GBASE_FR:
- case ICE_PHY_TYPE_LOW_50GBASE_LR:
- ethtool_link_ksettings_add_link_mode(ks, supported,
- 50000baseSR2_Full);
- break;
- case ICE_PHY_TYPE_LOW_100GBASE_CR4:
- ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg);
- ethtool_link_ksettings_add_link_mode(ks, supported,
- 100000baseCR4_Full);
- ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg);
- ethtool_link_ksettings_add_link_mode(ks, advertising,
- 100000baseCR4_Full);
- break;
- case ICE_PHY_TYPE_LOW_100G_CAUI4_AOC_ACC:
- case ICE_PHY_TYPE_LOW_100G_CAUI4:
- case ICE_PHY_TYPE_LOW_100G_AUI4_AOC_ACC:
- case ICE_PHY_TYPE_LOW_100G_AUI4:
- case ICE_PHY_TYPE_LOW_100GBASE_CR_PAM4:
- ethtool_link_ksettings_add_link_mode(ks, supported,
- 100000baseCR4_Full);
- break;
- case ICE_PHY_TYPE_LOW_100GBASE_CP2:
- ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg);
- ethtool_link_ksettings_add_link_mode(ks, supported,
- 100000baseCR4_Full);
- ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg);
- ethtool_link_ksettings_add_link_mode(ks, advertising,
- 100000baseCR4_Full);
- break;
- case ICE_PHY_TYPE_LOW_100GBASE_SR4:
- case ICE_PHY_TYPE_LOW_100GBASE_SR2:
- ethtool_link_ksettings_add_link_mode(ks, supported,
- 100000baseSR4_Full);
- break;
- case ICE_PHY_TYPE_LOW_100GBASE_LR4:
- case ICE_PHY_TYPE_LOW_100GBASE_DR:
- ethtool_link_ksettings_add_link_mode(ks, supported,
- 100000baseLR4_ER4_Full);
- break;
- case ICE_PHY_TYPE_LOW_100GBASE_KR4:
- case ICE_PHY_TYPE_LOW_100GBASE_KR_PAM4:
- ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg);
- ethtool_link_ksettings_add_link_mode(ks, supported,
- 100000baseKR4_Full);
- ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg);
- ethtool_link_ksettings_add_link_mode(ks, advertising,
- 100000baseKR4_Full);
- break;
- default:
- unrecog_phy_low = true;
- }
-
- switch (link_info->phy_type_high) {
- case ICE_PHY_TYPE_HIGH_100GBASE_KR2_PAM4:
- ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg);
- ethtool_link_ksettings_add_link_mode(ks, supported,
- 100000baseKR4_Full);
- ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg);
- ethtool_link_ksettings_add_link_mode(ks, advertising,
- 100000baseKR4_Full);
- break;
- case ICE_PHY_TYPE_HIGH_100G_CAUI2_AOC_ACC:
- case ICE_PHY_TYPE_HIGH_100G_CAUI2:
- case ICE_PHY_TYPE_HIGH_100G_AUI2_AOC_ACC:
- case ICE_PHY_TYPE_HIGH_100G_AUI2:
- ethtool_link_ksettings_add_link_mode(ks, supported,
- 100000baseCR4_Full);
- break;
- default:
- unrecog_phy_high = true;
- }
-
- if (unrecog_phy_low && unrecog_phy_high) {
- /* if we got here and link is up something bad is afoot */
- netdev_info(netdev,
- "WARNING: Unrecognized PHY_Low (0x%llx).\n",
- (u64)link_info->phy_type_low);
- netdev_info(netdev,
- "WARNING: Unrecognized PHY_High (0x%llx).\n",
- (u64)link_info->phy_type_high);
- }
-
- /* Now that we've worked out everything that could be supported by the
- * current PHY type, get what is supported by the NVM and intersect
- * them to get what is truly supported
- */
- memset(&cap_ksettings, 0, sizeof(cap_ksettings));
- ice_phy_type_to_ethtool(netdev, &cap_ksettings);
- ethtool_intersect_link_masks(ks, &cap_ksettings);
+ /* Get supported and advertised settings from PHY ability with media */
+ ice_phy_type_to_ethtool(netdev, ks);
switch (link_info->link_speed) {
case ICE_AQ_LINK_SPEED_100GB:
@@ -2028,8 +1756,7 @@ ice_get_settings_link_up(struct ethtool_link_ksettings *ks,
ks->base.speed = SPEED_100;
break;
default:
- netdev_info(netdev,
- "WARNING: Unrecognized link_speed (0x%x).\n",
+ netdev_info(netdev, "WARNING: Unrecognized link_speed (0x%x).\n",
link_info->link_speed);
break;
}
@@ -2845,13 +2572,11 @@ ice_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring)
new_tx_cnt = ALIGN(ring->tx_pending, ICE_REQ_DESC_MULTIPLE);
if (new_tx_cnt != ring->tx_pending)
- netdev_info(netdev,
- "Requested Tx descriptor count rounded up to %d\n",
+ netdev_info(netdev, "Requested Tx descriptor count rounded up to %d\n",
new_tx_cnt);
new_rx_cnt = ALIGN(ring->rx_pending, ICE_REQ_DESC_MULTIPLE);
if (new_rx_cnt != ring->rx_pending)
- netdev_info(netdev,
- "Requested Rx descriptor count rounded up to %d\n",
+ netdev_info(netdev, "Requested Rx descriptor count rounded up to %d\n",
new_rx_cnt);
/* if nothing to do return success */
@@ -3211,13 +2936,6 @@ ice_set_pauseparam(struct net_device *netdev, struct ethtool_pauseparam *pause)
else
return -EINVAL;
- /* Tell the OS link is going down, the link will go back up when fw
- * says it is ready asynchronously
- */
- ice_print_link_msg(vsi, false);
- netif_carrier_off(netdev);
- netif_tx_stop_all_queues(netdev);
-
/* Set the FC mode and only restart AN if link is up */
status = ice_set_fc(pi, &aq_failures, link_up);
@@ -3718,8 +3436,7 @@ ice_set_rc_coalesce(enum ice_container_type c_type, struct ethtool_coalesce *ec,
if (ec->rx_coalesce_usecs_high > ICE_MAX_INTRL ||
(ec->rx_coalesce_usecs_high &&
ec->rx_coalesce_usecs_high < pf->hw.intrl_gran)) {
- netdev_info(vsi->netdev,
- "Invalid value, %s-usecs-high valid values are 0 (disabled), %d-%d\n",
+ netdev_info(vsi->netdev, "Invalid value, %s-usecs-high valid values are 0 (disabled), %d-%d\n",
c_type_str, pf->hw.intrl_gran,
ICE_MAX_INTRL);
return -EINVAL;
@@ -3737,8 +3454,7 @@ ice_set_rc_coalesce(enum ice_container_type c_type, struct ethtool_coalesce *ec,
break;
case ICE_TX_CONTAINER:
if (ec->tx_coalesce_usecs_high) {
- netdev_info(vsi->netdev,
- "setting %s-usecs-high is not supported\n",
+ netdev_info(vsi->netdev, "setting %s-usecs-high is not supported\n",
c_type_str);
return -EINVAL;
}
@@ -3755,35 +3471,24 @@ ice_set_rc_coalesce(enum ice_container_type c_type, struct ethtool_coalesce *ec,
itr_setting = rc->itr_setting & ~ICE_ITR_DYNAMIC;
if (coalesce_usecs != itr_setting && use_adaptive_coalesce) {
- netdev_info(vsi->netdev,
- "%s interrupt throttling cannot be changed if adaptive-%s is enabled\n",
+ netdev_info(vsi->netdev, "%s interrupt throttling cannot be changed if adaptive-%s is enabled\n",
c_type_str, c_type_str);
return -EINVAL;
}
if (coalesce_usecs > ICE_ITR_MAX) {
- netdev_info(vsi->netdev,
- "Invalid value, %s-usecs range is 0-%d\n",
+ netdev_info(vsi->netdev, "Invalid value, %s-usecs range is 0-%d\n",
c_type_str, ICE_ITR_MAX);
return -EINVAL;
}
- /* hardware only supports an ITR granularity of 2us */
- if (coalesce_usecs % 2 != 0) {
- netdev_info(vsi->netdev,
- "Invalid value, %s-usecs must be even\n",
- c_type_str);
- return -EINVAL;
- }
-
if (use_adaptive_coalesce) {
rc->itr_setting |= ICE_ITR_DYNAMIC;
} else {
- /* store user facing value how it was set */
+ /* save the user set usecs */
rc->itr_setting = coalesce_usecs;
- /* set to static and convert to value HW understands */
- rc->target_itr =
- ITR_TO_REG(ITR_REG_ALIGN(rc->itr_setting));
+ /* device ITR granularity is in 2 usec increments */
+ rc->target_itr = ITR_REG_ALIGN(rc->itr_setting);
}
return 0;
@@ -3877,6 +3582,30 @@ ice_is_coalesce_param_invalid(struct net_device *netdev,
}
/**
+ * ice_print_if_odd_usecs - print message if user tries to set odd [tx|rx]-usecs
+ * @netdev: netdev used for print
+ * @itr_setting: previous user setting
+ * @use_adaptive_coalesce: if adaptive coalesce is enabled or being enabled
+ * @coalesce_usecs: requested value of [tx|rx]-usecs
+ * @c_type_str: either "rx" or "tx" to match user set field of [tx|rx]-usecs
+ */
+static void
+ice_print_if_odd_usecs(struct net_device *netdev, u16 itr_setting,
+ u32 use_adaptive_coalesce, u32 coalesce_usecs,
+ const char *c_type_str)
+{
+ if (use_adaptive_coalesce)
+ return;
+
+ itr_setting = ITR_TO_REG(itr_setting);
+
+ if (itr_setting != coalesce_usecs && (coalesce_usecs % 2))
+ netdev_info(netdev, "User set %s-usecs to %d, device only supports even values. Rounding down and attempting to set %s-usecs to %d\n",
+ c_type_str, coalesce_usecs, c_type_str,
+ ITR_REG_ALIGN(coalesce_usecs));
+}
+
+/**
* __ice_set_coalesce - set ITR/INTRL values for the device
* @netdev: pointer to the netdev associated with this query
* @ec: ethtool structure to fill with driver's coalesce settings
@@ -3896,8 +3625,19 @@ __ice_set_coalesce(struct net_device *netdev, struct ethtool_coalesce *ec,
return -EINVAL;
if (q_num < 0) {
+ struct ice_q_vector *q_vector = vsi->q_vectors[0];
int v_idx;
+ if (q_vector) {
+ ice_print_if_odd_usecs(netdev, q_vector->rx.itr_setting,
+ ec->use_adaptive_rx_coalesce,
+ ec->rx_coalesce_usecs, "rx");
+
+ ice_print_if_odd_usecs(netdev, q_vector->tx.itr_setting,
+ ec->use_adaptive_tx_coalesce,
+ ec->tx_coalesce_usecs, "tx");
+ }
+
ice_for_each_q_vector(vsi, v_idx) {
/* In some cases if DCB is configured the num_[rx|tx]q
* can be less than vsi->num_q_vectors. This check
@@ -4012,8 +3752,7 @@ ice_get_module_info(struct net_device *netdev,
}
break;
default:
- netdev_warn(netdev,
- "SFF Module Type not recognized.\n");
+ netdev_warn(netdev, "SFF Module Type not recognized.\n");
return -EINVAL;
}
return 0;
@@ -4081,11 +3820,11 @@ ice_get_module_eeprom(struct net_device *netdev,
static const struct ethtool_ops ice_ethtool_ops = {
.get_link_ksettings = ice_get_link_ksettings,
.set_link_ksettings = ice_set_link_ksettings,
- .get_drvinfo = ice_get_drvinfo,
- .get_regs_len = ice_get_regs_len,
- .get_regs = ice_get_regs,
- .get_msglevel = ice_get_msglevel,
- .set_msglevel = ice_set_msglevel,
+ .get_drvinfo = ice_get_drvinfo,
+ .get_regs_len = ice_get_regs_len,
+ .get_regs = ice_get_regs,
+ .get_msglevel = ice_get_msglevel,
+ .set_msglevel = ice_set_msglevel,
.self_test = ice_self_test,
.get_link = ethtool_op_get_link,
.get_eeprom_len = ice_get_eeprom_len,
@@ -4112,8 +3851,8 @@ static const struct ethtool_ops ice_ethtool_ops = {
.get_channels = ice_get_channels,
.set_channels = ice_set_channels,
.get_ts_info = ethtool_op_get_ts_info,
- .get_per_queue_coalesce = ice_get_per_q_coalesce,
- .set_per_queue_coalesce = ice_set_per_q_coalesce,
+ .get_per_queue_coalesce = ice_get_per_q_coalesce,
+ .set_per_queue_coalesce = ice_set_per_q_coalesce,
.get_fecparam = ice_get_fecparam,
.set_fecparam = ice_set_fecparam,
.get_module_info = ice_get_module_info,
diff --git a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
index f2cabab..6db3d04 100644
--- a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
+++ b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
@@ -267,8 +267,14 @@
#define GLNVM_GENS_SR_SIZE_S 5
#define GLNVM_GENS_SR_SIZE_M ICE_M(0x7, 5)
#define GLNVM_ULD 0x000B6008
+#define GLNVM_ULD_PCIER_DONE_M BIT(0)
+#define GLNVM_ULD_PCIER_DONE_1_M BIT(1)
#define GLNVM_ULD_CORER_DONE_M BIT(3)
#define GLNVM_ULD_GLOBR_DONE_M BIT(4)
+#define GLNVM_ULD_POR_DONE_M BIT(5)
+#define GLNVM_ULD_POR_DONE_1_M BIT(8)
+#define GLNVM_ULD_PCIER_DONE_2_M BIT(9)
+#define GLNVM_ULD_PE_DONE_M BIT(10)
#define GLPCI_CNF2 0x000BE004
#define GLPCI_CNF2_CACHELINE_SIZE_M BIT(1)
#define PF_FUNC_RID 0x0009E880
@@ -331,7 +337,6 @@
#define GLV_TEPC(_VSI) (0x00312000 + ((_VSI) * 4))
#define GLV_UPRCL(_i) (0x003B2000 + ((_i) * 8))
#define GLV_UPTCL(_i) (0x0030A000 + ((_i) * 8))
-#define PF_VT_PFALLOC_HIF 0x0009DD80
#define VSIQF_HKEY_MAX_INDEX 12
#define VSIQF_HLUT_MAX_INDEX 15
#define VFINT_DYN_CTLN(_i) (0x00003800 + ((_i) * 4))
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
index 1874c9f..d974e2f 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_lib.c
@@ -117,8 +117,7 @@ static void ice_vsi_set_num_desc(struct ice_vsi *vsi)
vsi->num_tx_desc = ICE_DFLT_NUM_TX_DESC;
break;
default:
- dev_dbg(&vsi->back->pdev->dev,
- "Not setting number of Tx/Rx descriptors for VSI type %d\n",
+ dev_dbg(ice_pf_to_dev(vsi->back), "Not setting number of Tx/Rx descriptors for VSI type %d\n",
vsi->type);
break;
}
@@ -724,7 +723,7 @@ static void ice_vsi_setup_q_map(struct ice_vsi *vsi, struct ice_vsi_ctx *ctxt)
vsi->num_txq = tx_count;
if (vsi->type == ICE_VSI_VF && vsi->num_txq != vsi->num_rxq) {
- dev_dbg(&vsi->back->pdev->dev, "VF VSI should have same number of Tx and Rx queues. Hence making them equal\n");
+ dev_dbg(ice_pf_to_dev(vsi->back), "VF VSI should have same number of Tx and Rx queues. Hence making them equal\n");
/* since there is a chance that num_rxq could have been changed
* in the above for loop, make num_txq equal to num_rxq.
*/
@@ -929,8 +928,7 @@ static int ice_vsi_setup_vector_base(struct ice_vsi *vsi)
vsi->base_vector = ice_get_res(pf, pf->irq_tracker, num_q_vectors,
vsi->idx);
if (vsi->base_vector < 0) {
- dev_err(dev,
- "Failed to get tracking for %d vectors for VSI %d, err=%d\n",
+ dev_err(dev, "Failed to get tracking for %d vectors for VSI %d, err=%d\n",
num_q_vectors, vsi->vsi_num, vsi->base_vector);
return -ENOENT;
}
@@ -1232,8 +1230,9 @@ static void ice_vsi_set_rss_flow_fld(struct ice_vsi *vsi)
*
* Returns 0 on success or ENOMEM on failure.
*/
-int ice_add_mac_to_list(struct ice_vsi *vsi, struct list_head *add_list,
- const u8 *macaddr)
+int
+ice_add_mac_to_list(struct ice_vsi *vsi, struct list_head *add_list,
+ const u8 *macaddr)
{
struct ice_fltr_list_entry *tmp;
struct ice_pf *pf = vsi->back;
@@ -1392,12 +1391,10 @@ int ice_vsi_kill_vlan(struct ice_vsi *vsi, u16 vid)
status = ice_remove_vlan(&pf->hw, &tmp_add_list);
if (status == ICE_ERR_DOES_NOT_EXIST) {
- dev_dbg(dev,
- "Failed to remove VLAN %d on VSI %i, it does not exist, status: %d\n",
+ dev_dbg(dev, "Failed to remove VLAN %d on VSI %i, it does not exist, status: %d\n",
vid, vsi->vsi_num, status);
} else if (status) {
- dev_err(dev,
- "Error removing VLAN %d on vsi %i error: %d\n",
+ dev_err(dev, "Error removing VLAN %d on vsi %i error: %d\n",
vid, vsi->vsi_num, status);
err = -EIO;
}
@@ -1453,8 +1450,7 @@ int ice_vsi_cfg_rxqs(struct ice_vsi *vsi)
err = ice_setup_rx_ctx(vsi->rx_rings[i]);
if (err) {
- dev_err(&vsi->back->pdev->dev,
- "ice_setup_rx_ctx failed for RxQ %d, err %d\n",
+ dev_err(ice_pf_to_dev(vsi->back), "ice_setup_rx_ctx failed for RxQ %d, err %d\n",
i, err);
return err;
}
@@ -1623,7 +1619,7 @@ int ice_vsi_manage_vlan_insertion(struct ice_vsi *vsi)
status = ice_update_vsi(hw, vsi->idx, ctxt, NULL);
if (status) {
- dev_err(&vsi->back->pdev->dev, "update VSI for VLAN insert failed, err %d aq_err %d\n",
+ dev_err(ice_pf_to_dev(vsi->back), "update VSI for VLAN insert failed, err %d aq_err %d\n",
status, hw->adminq.sq_last_status);
ret = -EIO;
goto out;
@@ -1669,7 +1665,7 @@ int ice_vsi_manage_vlan_stripping(struct ice_vsi *vsi, bool ena)
status = ice_update_vsi(hw, vsi->idx, ctxt, NULL);
if (status) {
- dev_err(&vsi->back->pdev->dev, "update VSI for VLAN strip failed, ena = %d err %d aq_err %d\n",
+ dev_err(ice_pf_to_dev(vsi->back), "update VSI for VLAN strip failed, ena = %d err %d aq_err %d\n",
ena, status, hw->adminq.sq_last_status);
ret = -EIO;
goto out;
@@ -1834,8 +1830,7 @@ ice_vsi_set_q_vectors_reg_idx(struct ice_vsi *vsi)
struct ice_q_vector *q_vector = vsi->q_vectors[i];
if (!q_vector) {
- dev_err(&vsi->back->pdev->dev,
- "Failed to set reg_idx on q_vector %d VSI %d\n",
+ dev_err(ice_pf_to_dev(vsi->back), "Failed to set reg_idx on q_vector %d VSI %d\n",
i, vsi->vsi_num);
goto clear_reg_idx;
}
@@ -1898,8 +1893,7 @@ ice_vsi_add_rem_eth_mac(struct ice_vsi *vsi, bool add_rule)
status = ice_remove_eth_mac(&pf->hw, &tmp_add_list);
if (status)
- dev_err(dev,
- "Failure Adding or Removing Ethertype on VSI %i error: %d\n",
+ dev_err(dev, "Failure Adding or Removing Ethertype on VSI %i error: %d\n",
vsi->vsi_num, status);
ice_free_fltr_list(dev, &tmp_add_list);
@@ -2384,8 +2378,7 @@ ice_get_res(struct ice_pf *pf, struct ice_res_tracker *res, u16 needed, u16 id)
return -EINVAL;
if (!needed || needed > res->num_entries || id >= ICE_RES_VALID_BIT) {
- dev_err(ice_pf_to_dev(pf),
- "param err: needed=%d, num_entries = %d id=0x%04x\n",
+ dev_err(ice_pf_to_dev(pf), "param err: needed=%d, num_entries = %d id=0x%04x\n",
needed, res->num_entries, id);
return -EINVAL;
}
@@ -2686,7 +2679,6 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, bool init_vsi)
ice_vsi_put_qs(vsi);
ice_vsi_clear_rings(vsi);
ice_vsi_free_arrays(vsi);
- ice_dev_onetime_setup(&pf->hw);
if (vsi->type == ICE_VSI_VF)
ice_vsi_set_num_qs(vsi, vf->vf_id);
else
@@ -2765,8 +2757,7 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, bool init_vsi)
status = ice_cfg_vsi_lan(vsi->port_info, vsi->idx, vsi->tc_cfg.ena_tc,
max_txqs);
if (status) {
- dev_err(ice_pf_to_dev(pf),
- "VSI %d failed lan queue config, error %d\n",
+ dev_err(ice_pf_to_dev(pf), "VSI %d failed lan queue config, error %d\n",
vsi->vsi_num, status);
if (init_vsi) {
ret = -EIO;
@@ -2834,8 +2825,8 @@ static void ice_vsi_update_q_map(struct ice_vsi *vsi, struct ice_vsi_ctx *ctx)
int ice_vsi_cfg_tc(struct ice_vsi *vsi, u8 ena_tc)
{
u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 };
- struct ice_vsi_ctx *ctx;
struct ice_pf *pf = vsi->back;
+ struct ice_vsi_ctx *ctx;
enum ice_status status;
struct device *dev;
int i, ret = 0;
@@ -2892,25 +2883,6 @@ int ice_vsi_cfg_tc(struct ice_vsi *vsi, u8 ena_tc)
#endif /* CONFIG_DCB */
/**
- * ice_nvm_version_str - format the NVM version strings
- * @hw: ptr to the hardware info
- */
-char *ice_nvm_version_str(struct ice_hw *hw)
-{
- u8 oem_ver, oem_patch, ver_hi, ver_lo;
- static char buf[ICE_NVM_VER_LEN];
- u16 oem_build;
-
- ice_get_nvm_version(hw, &oem_ver, &oem_build, &oem_patch, &ver_hi,
- &ver_lo);
-
- snprintf(buf, sizeof(buf), "%x.%02x 0x%x %d.%d.%d", ver_hi, ver_lo,
- hw->nvm.eetrack, oem_ver, oem_build, oem_patch);
-
- return buf;
-}
-
-/**
* ice_update_ring_stats - Update ring statistics
* @ring: ring to update
* @cont: used to increment per-vector counters
@@ -2981,7 +2953,7 @@ ice_vsi_cfg_mac_fltr(struct ice_vsi *vsi, const u8 *macaddr, bool set)
status = ice_remove_mac(&vsi->back->hw, &tmp_add_list);
cfg_mac_fltr_exit:
- ice_free_fltr_list(&vsi->back->pdev->dev, &tmp_add_list);
+ ice_free_fltr_list(ice_pf_to_dev(vsi->back), &tmp_add_list);
return status;
}
@@ -3043,16 +3015,14 @@ int ice_set_dflt_vsi(struct ice_sw *sw, struct ice_vsi *vsi)
/* another VSI is already the default VSI for this switch */
if (ice_is_dflt_vsi_in_use(sw)) {
- dev_err(dev,
- "Default forwarding VSI %d already in use, disable it and try again\n",
+ dev_err(dev, "Default forwarding VSI %d already in use, disable it and try again\n",
sw->dflt_vsi->vsi_num);
return -EEXIST;
}
status = ice_cfg_dflt_vsi(&vsi->back->hw, vsi->idx, true, ICE_FLTR_RX);
if (status) {
- dev_err(dev,
- "Failed to set VSI %d as the default forwarding VSI, error %d\n",
+ dev_err(dev, "Failed to set VSI %d as the default forwarding VSI, error %d\n",
vsi->vsi_num, status);
return -EIO;
}
@@ -3091,8 +3061,7 @@ int ice_clear_dflt_vsi(struct ice_sw *sw)
status = ice_cfg_dflt_vsi(&dflt_vsi->back->hw, dflt_vsi->idx, false,
ICE_FLTR_RX);
if (status) {
- dev_err(dev,
- "Failed to clear the default forwarding VSI %d, error %d\n",
+ dev_err(dev, "Failed to clear the default forwarding VSI %d, error %d\n",
dflt_vsi->vsi_num, status);
return -EIO;
}
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.h b/drivers/net/ethernet/intel/ice/ice_lib.h
index 68fd0d4..e2c0dad 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.h
+++ b/drivers/net/ethernet/intel/ice/ice_lib.h
@@ -97,8 +97,6 @@ void ice_vsi_cfg_frame_size(struct ice_vsi *vsi);
u32 ice_intrl_usec_to_reg(u8 intrl, u8 gran);
-char *ice_nvm_version_str(struct ice_hw *hw);
-
enum ice_status
ice_vsi_cfg_mac_fltr(struct ice_vsi *vsi, const u8 *macaddr, bool set);
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index 5ae67160..5ef2805 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -162,8 +162,7 @@ static int ice_init_mac_fltr(struct ice_pf *pf)
* had an error
*/
if (status && vsi->netdev->reg_state == NETREG_REGISTERED) {
- dev_err(ice_pf_to_dev(pf),
- "Could not add MAC filters error %d. Unregistering device\n",
+ dev_err(ice_pf_to_dev(pf), "Could not add MAC filters error %d. Unregistering device\n",
status);
unregister_netdev(vsi->netdev);
free_netdev(vsi->netdev);
@@ -269,7 +268,7 @@ static int ice_cfg_promisc(struct ice_vsi *vsi, u8 promisc_m, bool set_promisc)
*/
static int ice_vsi_sync_fltr(struct ice_vsi *vsi)
{
- struct device *dev = &vsi->back->pdev->dev;
+ struct device *dev = ice_pf_to_dev(vsi->back);
struct net_device *netdev = vsi->netdev;
bool promisc_forced_on = false;
struct ice_pf *pf = vsi->back;
@@ -335,8 +334,7 @@ static int ice_vsi_sync_fltr(struct ice_vsi *vsi)
!test_and_set_bit(__ICE_FLTR_OVERFLOW_PROMISC,
vsi->state)) {
promisc_forced_on = true;
- netdev_warn(netdev,
- "Reached MAC filter limit, forcing promisc mode on VSI %d\n",
+ netdev_warn(netdev, "Reached MAC filter limit, forcing promisc mode on VSI %d\n",
vsi->vsi_num);
} else {
err = -EIO;
@@ -382,8 +380,7 @@ static int ice_vsi_sync_fltr(struct ice_vsi *vsi)
if (!ice_is_dflt_vsi_in_use(pf->first_sw)) {
err = ice_set_dflt_vsi(pf->first_sw, vsi);
if (err && err != -EEXIST) {
- netdev_err(netdev,
- "Error %d setting default VSI %i Rx rule\n",
+ netdev_err(netdev, "Error %d setting default VSI %i Rx rule\n",
err, vsi->vsi_num);
vsi->current_netdev_flags &=
~IFF_PROMISC;
@@ -395,8 +392,7 @@ static int ice_vsi_sync_fltr(struct ice_vsi *vsi)
if (ice_is_vsi_dflt_vsi(pf->first_sw, vsi)) {
err = ice_clear_dflt_vsi(pf->first_sw);
if (err) {
- netdev_err(netdev,
- "Error %d clearing default VSI %i Rx rule\n",
+ netdev_err(netdev, "Error %d clearing default VSI %i Rx rule\n",
err, vsi->vsi_num);
vsi->current_netdev_flags |=
IFF_PROMISC;
@@ -752,7 +748,7 @@ void ice_print_link_msg(struct ice_vsi *vsi, bool isup)
kfree(caps);
done:
- netdev_info(vsi->netdev, "NIC Link is up %sbps, Requested FEC: %s, FEC: %s, Autoneg: %s, Flow Control: %s\n",
+ netdev_info(vsi->netdev, "NIC Link is up %sbps Full Duplex, Requested FEC: %s, Negotiated FEC: %s, Autoneg: %s, Flow Control: %s\n",
speed, fec_req, fec, an, fc);
ice_print_topo_conflict(vsi);
}
@@ -815,8 +811,7 @@ ice_link_event(struct ice_pf *pf, struct ice_port_info *pi, bool link_up,
*/
result = ice_update_link_info(pi);
if (result)
- dev_dbg(dev,
- "Failed to update link status and re-enable link events for port %d\n",
+ dev_dbg(dev, "Failed to update link status and re-enable link events for port %d\n",
pi->lport);
/* if the old link up/down and speed is the same as the new */
@@ -834,13 +829,13 @@ ice_link_event(struct ice_pf *pf, struct ice_port_info *pi, bool link_up,
result = ice_aq_set_link_restart_an(pi, false, NULL);
if (result) {
- dev_dbg(dev,
- "Failed to set link down, VSI %d error %d\n",
+ dev_dbg(dev, "Failed to set link down, VSI %d error %d\n",
vsi->vsi_num, result);
return result;
}
}
+ ice_dcb_rebuild(pf);
ice_vsi_link_event(vsi, link_up);
ice_print_link_msg(vsi, link_up);
@@ -892,15 +887,13 @@ static int ice_init_link_events(struct ice_port_info *pi)
ICE_AQ_LINK_EVENT_MODULE_QUAL_FAIL));
if (ice_aq_set_event_mask(pi->hw, pi->lport, mask, NULL)) {
- dev_dbg(ice_hw_to_dev(pi->hw),
- "Failed to set link event mask for port %d\n",
+ dev_dbg(ice_hw_to_dev(pi->hw), "Failed to set link event mask for port %d\n",
pi->lport);
return -EIO;
}
if (ice_aq_get_link_info(pi, true, NULL, NULL)) {
- dev_dbg(ice_hw_to_dev(pi->hw),
- "Failed to enable link events for port %d\n",
+ dev_dbg(ice_hw_to_dev(pi->hw), "Failed to enable link events for port %d\n",
pi->lport);
return -EIO;
}
@@ -929,8 +922,8 @@ ice_handle_link_event(struct ice_pf *pf, struct ice_rq_event_info *event)
!!(link_data->link_info & ICE_AQ_LINK_UP),
le16_to_cpu(link_data->link_speed));
if (status)
- dev_dbg(ice_pf_to_dev(pf),
- "Could not process link event, error %d\n", status);
+ dev_dbg(ice_pf_to_dev(pf), "Could not process link event, error %d\n",
+ status);
return status;
}
@@ -979,13 +972,11 @@ static int __ice_clean_ctrlq(struct ice_pf *pf, enum ice_ctl_q q_type)
dev_dbg(dev, "%s Receive Queue VF Error detected\n",
qtype);
if (val & PF_FW_ARQLEN_ARQOVFL_M) {
- dev_dbg(dev,
- "%s Receive Queue Overflow Error detected\n",
+ dev_dbg(dev, "%s Receive Queue Overflow Error detected\n",
qtype);
}
if (val & PF_FW_ARQLEN_ARQCRIT_M)
- dev_dbg(dev,
- "%s Receive Queue Critical Error detected\n",
+ dev_dbg(dev, "%s Receive Queue Critical Error detected\n",
qtype);
val &= ~(PF_FW_ARQLEN_ARQVFE_M | PF_FW_ARQLEN_ARQOVFL_M |
PF_FW_ARQLEN_ARQCRIT_M);
@@ -998,8 +989,8 @@ static int __ice_clean_ctrlq(struct ice_pf *pf, enum ice_ctl_q q_type)
PF_FW_ATQLEN_ATQCRIT_M)) {
oldval = val;
if (val & PF_FW_ATQLEN_ATQVFE_M)
- dev_dbg(dev,
- "%s Send Queue VF Error detected\n", qtype);
+ dev_dbg(dev, "%s Send Queue VF Error detected\n",
+ qtype);
if (val & PF_FW_ATQLEN_ATQOVFL_M) {
dev_dbg(dev, "%s Send Queue Overflow Error detected\n",
qtype);
@@ -1048,8 +1039,7 @@ static int __ice_clean_ctrlq(struct ice_pf *pf, enum ice_ctl_q q_type)
ice_dcb_process_lldp_set_mib_change(pf, &event);
break;
default:
- dev_dbg(dev,
- "%s Receive Queue unknown event 0x%04x ignored\n",
+ dev_dbg(dev, "%s Receive Queue unknown event 0x%04x ignored\n",
qtype, opcode);
break;
}
@@ -1238,7 +1228,7 @@ static void ice_handle_mdd_event(struct ice_pf *pf)
u16 queue = ((reg & GL_MDET_TX_TCLAN_QNUM_M) >>
GL_MDET_TX_TCLAN_QNUM_S);
- if (netif_msg_rx_err(pf))
+ if (netif_msg_tx_err(pf))
dev_info(dev, "Malicious Driver Detection event %d on TX queue %d PF# %d VF# %d\n",
event, queue, pf_num, vf_num);
wr32(hw, GL_MDET_TX_TCLAN, 0xffffffff);
@@ -1335,8 +1325,7 @@ static void ice_handle_mdd_event(struct ice_pf *pf)
vf->num_mdd_events++;
if (vf->num_mdd_events &&
vf->num_mdd_events <= ICE_MDD_EVENTS_THRESHOLD)
- dev_info(dev,
- "VF %d has had %llu MDD events since last boot, Admin might need to reload AVF driver with this number of events\n",
+ dev_info(dev, "VF %d has had %llu MDD events since last boot, Admin might need to reload AVF driver with this number of events\n",
i, vf->num_mdd_events);
}
}
@@ -1367,7 +1356,7 @@ static int ice_force_phys_link_state(struct ice_vsi *vsi, bool link_up)
if (vsi->type != ICE_VSI_PF)
return 0;
- dev = &vsi->back->pdev->dev;
+ dev = ice_pf_to_dev(vsi->back);
pi = vsi->port_info;
@@ -1378,8 +1367,7 @@ static int ice_force_phys_link_state(struct ice_vsi *vsi, bool link_up)
retcode = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_SW_CFG, pcaps,
NULL);
if (retcode) {
- dev_err(dev,
- "Failed to get phy capabilities, VSI %d error %d\n",
+ dev_err(dev, "Failed to get phy capabilities, VSI %d error %d\n",
vsi->vsi_num, retcode);
retcode = -EIO;
goto out;
@@ -1649,8 +1637,8 @@ static int ice_vsi_req_irq_msix(struct ice_vsi *vsi, char *basename)
err = devm_request_irq(dev, irq_num, vsi->irq_handler, 0,
q_vector->name, q_vector);
if (err) {
- netdev_err(vsi->netdev,
- "MSIX request_irq failed, error: %d\n", err);
+ netdev_err(vsi->netdev, "MSIX request_irq failed, error: %d\n",
+ err);
goto free_q_irqs;
}
@@ -1685,7 +1673,7 @@ static int ice_vsi_req_irq_msix(struct ice_vsi *vsi, char *basename)
*/
static int ice_xdp_alloc_setup_rings(struct ice_vsi *vsi)
{
- struct device *dev = &vsi->back->pdev->dev;
+ struct device *dev = ice_pf_to_dev(vsi->back);
int i;
for (i = 0; i < vsi->num_xdp_txq; i++) {
@@ -2664,14 +2652,12 @@ static void ice_set_pf_caps(struct ice_pf *pf)
clear_bit(ICE_FLAG_DCB_CAPABLE, pf->flags);
if (func_caps->common_cap.dcb)
set_bit(ICE_FLAG_DCB_CAPABLE, pf->flags);
-#ifdef CONFIG_PCI_IOV
clear_bit(ICE_FLAG_SRIOV_CAPABLE, pf->flags);
if (func_caps->common_cap.sr_iov_1_1) {
set_bit(ICE_FLAG_SRIOV_CAPABLE, pf->flags);
pf->num_vfs_supported = min_t(int, func_caps->num_allocd_vfs,
ICE_MAX_VF_COUNT);
}
-#endif /* CONFIG_PCI_IOV */
clear_bit(ICE_FLAG_RSS_ENA, pf->flags);
if (func_caps->common_cap.rss_table_size)
set_bit(ICE_FLAG_RSS_ENA, pf->flags);
@@ -2764,8 +2750,7 @@ static int ice_ena_msix_range(struct ice_pf *pf)
}
if (v_actual < v_budget) {
- dev_warn(dev,
- "not enough OS MSI-X vectors. requested = %d, obtained = %d\n",
+ dev_warn(dev, "not enough OS MSI-X vectors. requested = %d, obtained = %d\n",
v_budget, v_actual);
/* 2 vectors for LAN (traffic + OICR) */
#define ICE_MIN_LAN_VECS 2
@@ -2787,8 +2772,7 @@ static int ice_ena_msix_range(struct ice_pf *pf)
goto exit_err;
no_hw_vecs_left_err:
- dev_err(dev,
- "not enough device MSI-X vectors. requested = %d, available = %d\n",
+ dev_err(dev, "not enough device MSI-X vectors. requested = %d, available = %d\n",
needed, v_left);
err = -ERANGE;
exit_err:
@@ -2921,16 +2905,14 @@ ice_log_pkg_init(struct ice_hw *hw, enum ice_status *status)
!memcmp(hw->pkg_name, hw->active_pkg_name,
sizeof(hw->pkg_name))) {
if (hw->pkg_dwnld_status == ICE_AQ_RC_EEXIST)
- dev_info(dev,
- "DDP package already present on device: %s version %d.%d.%d.%d\n",
+ dev_info(dev, "DDP package already present on device: %s version %d.%d.%d.%d\n",
hw->active_pkg_name,
hw->active_pkg_ver.major,
hw->active_pkg_ver.minor,
hw->active_pkg_ver.update,
hw->active_pkg_ver.draft);
else
- dev_info(dev,
- "The DDP package was successfully loaded: %s version %d.%d.%d.%d\n",
+ dev_info(dev, "The DDP package was successfully loaded: %s version %d.%d.%d.%d\n",
hw->active_pkg_name,
hw->active_pkg_ver.major,
hw->active_pkg_ver.minor,
@@ -2938,8 +2920,7 @@ ice_log_pkg_init(struct ice_hw *hw, enum ice_status *status)
hw->active_pkg_ver.draft);
} else if (hw->active_pkg_ver.major != ICE_PKG_SUPP_VER_MAJ ||
hw->active_pkg_ver.minor != ICE_PKG_SUPP_VER_MNR) {
- dev_err(dev,
- "The device has a DDP package that is not supported by the driver. The device has package '%s' version %d.%d.x.x. The driver requires version %d.%d.x.x. Entering Safe Mode.\n",
+ dev_err(dev, "The device has a DDP package that is not supported by the driver. The device has package '%s' version %d.%d.x.x. The driver requires version %d.%d.x.x. Entering Safe Mode.\n",
hw->active_pkg_name,
hw->active_pkg_ver.major,
hw->active_pkg_ver.minor,
@@ -2947,8 +2928,7 @@ ice_log_pkg_init(struct ice_hw *hw, enum ice_status *status)
*status = ICE_ERR_NOT_SUPPORTED;
} else if (hw->active_pkg_ver.major == ICE_PKG_SUPP_VER_MAJ &&
hw->active_pkg_ver.minor == ICE_PKG_SUPP_VER_MNR) {
- dev_info(dev,
- "The driver could not load the DDP package file because a compatible DDP package is already present on the device. The device has package '%s' version %d.%d.%d.%d. The package file found by the driver: '%s' version %d.%d.%d.%d.\n",
+ dev_info(dev, "The driver could not load the DDP package file because a compatible DDP package is already present on the device. The device has package '%s' version %d.%d.%d.%d. The package file found by the driver: '%s' version %d.%d.%d.%d.\n",
hw->active_pkg_name,
hw->active_pkg_ver.major,
hw->active_pkg_ver.minor,
@@ -2960,54 +2940,46 @@ ice_log_pkg_init(struct ice_hw *hw, enum ice_status *status)
hw->pkg_ver.update,
hw->pkg_ver.draft);
} else {
- dev_err(dev,
- "An unknown error occurred when loading the DDP package, please reboot the system. If the problem persists, update the NVM. Entering Safe Mode.\n");
+ dev_err(dev, "An unknown error occurred when loading the DDP package, please reboot the system. If the problem persists, update the NVM. Entering Safe Mode.\n");
*status = ICE_ERR_NOT_SUPPORTED;
}
break;
case ICE_ERR_BUF_TOO_SHORT:
/* fall-through */
case ICE_ERR_CFG:
- dev_err(dev,
- "The DDP package file is invalid. Entering Safe Mode.\n");
+ dev_err(dev, "The DDP package file is invalid. Entering Safe Mode.\n");
break;
case ICE_ERR_NOT_SUPPORTED:
/* Package File version not supported */
if (hw->pkg_ver.major > ICE_PKG_SUPP_VER_MAJ ||
(hw->pkg_ver.major == ICE_PKG_SUPP_VER_MAJ &&
hw->pkg_ver.minor > ICE_PKG_SUPP_VER_MNR))
- dev_err(dev,
- "The DDP package file version is higher than the driver supports. Please use an updated driver. Entering Safe Mode.\n");
+ dev_err(dev, "The DDP package file version is higher than the driver supports. Please use an updated driver. Entering Safe Mode.\n");
else if (hw->pkg_ver.major < ICE_PKG_SUPP_VER_MAJ ||
(hw->pkg_ver.major == ICE_PKG_SUPP_VER_MAJ &&
hw->pkg_ver.minor < ICE_PKG_SUPP_VER_MNR))
- dev_err(dev,
- "The DDP package file version is lower than the driver supports. The driver requires version %d.%d.x.x. Please use an updated DDP Package file. Entering Safe Mode.\n",
+ dev_err(dev, "The DDP package file version is lower than the driver supports. The driver requires version %d.%d.x.x. Please use an updated DDP Package file. Entering Safe Mode.\n",
ICE_PKG_SUPP_VER_MAJ, ICE_PKG_SUPP_VER_MNR);
break;
case ICE_ERR_AQ_ERROR:
switch (hw->pkg_dwnld_status) {
case ICE_AQ_RC_ENOSEC:
case ICE_AQ_RC_EBADSIG:
- dev_err(dev,
- "The DDP package could not be loaded because its signature is not valid. Please use a valid DDP Package. Entering Safe Mode.\n");
+ dev_err(dev, "The DDP package could not be loaded because its signature is not valid. Please use a valid DDP Package. Entering Safe Mode.\n");
return;
case ICE_AQ_RC_ESVN:
- dev_err(dev,
- "The DDP Package could not be loaded because its security revision is too low. Please use an updated DDP Package. Entering Safe Mode.\n");
+ dev_err(dev, "The DDP Package could not be loaded because its security revision is too low. Please use an updated DDP Package. Entering Safe Mode.\n");
return;
case ICE_AQ_RC_EBADMAN:
case ICE_AQ_RC_EBADBUF:
- dev_err(dev,
- "An error occurred on the device while loading the DDP package. The device will be reset.\n");
+ dev_err(dev, "An error occurred on the device while loading the DDP package. The device will be reset.\n");
return;
default:
break;
}
/* fall-through */
default:
- dev_err(dev,
- "An unknown error (%d) occurred when loading the DDP package. Entering Safe Mode.\n",
+ dev_err(dev, "An unknown error (%d) occurred when loading the DDP package. Entering Safe Mode.\n",
*status);
break;
}
@@ -3038,8 +3010,7 @@ ice_load_pkg(const struct firmware *firmware, struct ice_pf *pf)
status = ice_init_pkg(hw, hw->pkg_copy, hw->pkg_size);
ice_log_pkg_init(hw, &status);
} else {
- dev_err(dev,
- "The DDP package file failed to load. Entering Safe Mode.\n");
+ dev_err(dev, "The DDP package file failed to load. Entering Safe Mode.\n");
}
if (status) {
@@ -3065,8 +3036,7 @@ ice_load_pkg(const struct firmware *firmware, struct ice_pf *pf)
static void ice_verify_cacheline_size(struct ice_pf *pf)
{
if (rd32(&pf->hw, GLPCI_CNF2) & GLPCI_CNF2_CACHELINE_SIZE_M)
- dev_warn(ice_pf_to_dev(pf),
- "%d Byte cache line assumption is invalid, driver may have Tx timeouts!\n",
+ dev_warn(ice_pf_to_dev(pf), "%d Byte cache line assumption is invalid, driver may have Tx timeouts!\n",
ICE_CACHE_LINE_BYTES);
}
@@ -3159,8 +3129,7 @@ static void ice_request_fw(struct ice_pf *pf)
dflt_pkg_load:
err = request_firmware(&firmware, ICE_DDP_PKG_FILE, dev);
if (err) {
- dev_err(dev,
- "The DDP package file was not found or could not be read. Entering Safe Mode\n");
+ dev_err(dev, "The DDP package file was not found or could not be read. Entering Safe Mode\n");
return;
}
@@ -3184,7 +3153,9 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent)
struct ice_hw *hw;
int err;
- /* this driver uses devres, see Documentation/driver-api/driver-model/devres.rst */
+ /* this driver uses devres, see
+ * Documentation/driver-api/driver-model/devres.rst
+ */
err = pcim_enable_device(pdev);
if (err)
return err;
@@ -3245,11 +3216,6 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent)
goto err_exit_unroll;
}
- dev_info(dev, "firmware %d.%d.%d api %d.%d.%d nvm %s build 0x%08x\n",
- hw->fw_maj_ver, hw->fw_min_ver, hw->fw_patch,
- hw->api_maj_ver, hw->api_min_ver, hw->api_patch,
- ice_nvm_version_str(hw), hw->fw_build);
-
ice_request_fw(pf);
/* if ice_request_fw fails, ICE_FLAG_ADV_FEATURES bit won't be
@@ -3257,8 +3223,7 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent)
* true
*/
if (ice_is_safe_mode(pf)) {
- dev_err(dev,
- "Package download failed. Advanced features disabled - Device now in Safe Mode\n");
+ dev_err(dev, "Package download failed. Advanced features disabled - Device now in Safe Mode\n");
/* we already got function/device capabilities but these don't
* reflect what the driver needs to do in safe mode. Instead of
* adding conditional logic everywhere to ignore these
@@ -3335,8 +3300,7 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent)
/* tell the firmware we are up */
err = ice_send_version(pf);
if (err) {
- dev_err(dev,
- "probe failed sending driver version %s. error: %d\n",
+ dev_err(dev, "probe failed sending driver version %s. error: %d\n",
ice_drv_ver, err);
goto err_alloc_sw_unroll;
}
@@ -3477,8 +3441,7 @@ static pci_ers_result_t ice_pci_err_slot_reset(struct pci_dev *pdev)
err = pci_enable_device_mem(pdev);
if (err) {
- dev_err(&pdev->dev,
- "Cannot re-enable PCI device after reset, error %d\n",
+ dev_err(&pdev->dev, "Cannot re-enable PCI device after reset, error %d\n",
err);
result = PCI_ERS_RESULT_DISCONNECT;
} else {
@@ -3497,8 +3460,7 @@ static pci_ers_result_t ice_pci_err_slot_reset(struct pci_dev *pdev)
err = pci_cleanup_aer_uncorrect_error_status(pdev);
if (err)
- dev_dbg(&pdev->dev,
- "pci_cleanup_aer_uncorrect_error_status failed, error %d\n",
+ dev_dbg(&pdev->dev, "pci_cleanup_aer_uncorrect_error_status failed, error %d\n",
err);
/* non-fatal, continue */
@@ -3517,8 +3479,8 @@ static void ice_pci_err_resume(struct pci_dev *pdev)
struct ice_pf *pf = pci_get_drvdata(pdev);
if (!pf) {
- dev_err(&pdev->dev,
- "%s failed, device is unrecoverable\n", __func__);
+ dev_err(&pdev->dev, "%s failed, device is unrecoverable\n",
+ __func__);
return;
}
@@ -3766,8 +3728,7 @@ ice_set_tx_maxrate(struct net_device *netdev, int queue_index, u32 maxrate)
/* Validate maxrate requested is within permitted range */
if (maxrate && (maxrate > (ICE_SCHED_MAX_BW / 1000))) {
- netdev_err(netdev,
- "Invalid max rate %d specified for the queue %d\n",
+ netdev_err(netdev, "Invalid max rate %d specified for the queue %d\n",
maxrate, queue_index);
return -EINVAL;
}
@@ -3783,8 +3744,8 @@ ice_set_tx_maxrate(struct net_device *netdev, int queue_index, u32 maxrate)
status = ice_cfg_q_bw_lmt(vsi->port_info, vsi->idx, tc,
q_handle, ICE_MAX_BW, maxrate * 1000);
if (status) {
- netdev_err(netdev,
- "Unable to set Tx max rate, error %d\n", status);
+ netdev_err(netdev, "Unable to set Tx max rate, error %d\n",
+ status);
return -EIO;
}
@@ -3876,15 +3837,13 @@ ice_set_features(struct net_device *netdev, netdev_features_t features)
/* Don't set any netdev advanced features with device in Safe Mode */
if (ice_is_safe_mode(vsi->back)) {
- dev_err(&vsi->back->pdev->dev,
- "Device is in Safe Mode - not enabling advanced netdev features\n");
+ dev_err(ice_pf_to_dev(vsi->back), "Device is in Safe Mode - not enabling advanced netdev features\n");
return ret;
}
/* Do not change setting during reset */
if (ice_is_reset_in_progress(pf->state)) {
- dev_err(&vsi->back->pdev->dev,
- "Device is resetting, changing advanced netdev features temporarily unavailable.\n");
+ dev_err(ice_pf_to_dev(vsi->back), "Device is resetting, changing advanced netdev features temporarily unavailable.\n");
return -EBUSY;
}
@@ -4372,21 +4331,18 @@ int ice_down(struct ice_vsi *vsi)
tx_err = ice_vsi_stop_lan_tx_rings(vsi, ICE_NO_RESET, 0);
if (tx_err)
- netdev_err(vsi->netdev,
- "Failed stop Tx rings, VSI %d error %d\n",
+ netdev_err(vsi->netdev, "Failed stop Tx rings, VSI %d error %d\n",
vsi->vsi_num, tx_err);
if (!tx_err && ice_is_xdp_ena_vsi(vsi)) {
tx_err = ice_vsi_stop_xdp_tx_rings(vsi);
if (tx_err)
- netdev_err(vsi->netdev,
- "Failed stop XDP rings, VSI %d error %d\n",
+ netdev_err(vsi->netdev, "Failed stop XDP rings, VSI %d error %d\n",
vsi->vsi_num, tx_err);
}
rx_err = ice_vsi_stop_rx_rings(vsi);
if (rx_err)
- netdev_err(vsi->netdev,
- "Failed stop Rx rings, VSI %d error %d\n",
+ netdev_err(vsi->netdev, "Failed stop Rx rings, VSI %d error %d\n",
vsi->vsi_num, rx_err);
ice_napi_disable_all(vsi);
@@ -4394,8 +4350,7 @@ int ice_down(struct ice_vsi *vsi)
if (test_bit(ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA, vsi->back->flags)) {
link_err = ice_force_phys_link_state(vsi, false);
if (link_err)
- netdev_err(vsi->netdev,
- "Failed to set physical link down, VSI %d error %d\n",
+ netdev_err(vsi->netdev, "Failed to set physical link down, VSI %d error %d\n",
vsi->vsi_num, link_err);
}
@@ -4406,8 +4361,7 @@ int ice_down(struct ice_vsi *vsi)
ice_clean_rx_ring(vsi->rx_rings[i]);
if (tx_err || rx_err || link_err) {
- netdev_err(vsi->netdev,
- "Failed to close VSI 0x%04X on switch 0x%04X\n",
+ netdev_err(vsi->netdev, "Failed to close VSI 0x%04X on switch 0x%04X\n",
vsi->vsi_num, vsi->vsw->sw_id);
return -EIO;
}
@@ -4426,7 +4380,7 @@ int ice_vsi_setup_tx_rings(struct ice_vsi *vsi)
int i, err = 0;
if (!vsi->num_txq) {
- dev_err(&vsi->back->pdev->dev, "VSI %d has 0 Tx queues\n",
+ dev_err(ice_pf_to_dev(vsi->back), "VSI %d has 0 Tx queues\n",
vsi->vsi_num);
return -EINVAL;
}
@@ -4457,7 +4411,7 @@ int ice_vsi_setup_rx_rings(struct ice_vsi *vsi)
int i, err = 0;
if (!vsi->num_rxq) {
- dev_err(&vsi->back->pdev->dev, "VSI %d has 0 Rx queues\n",
+ dev_err(ice_pf_to_dev(vsi->back), "VSI %d has 0 Rx queues\n",
vsi->vsi_num);
return -EINVAL;
}
@@ -4554,8 +4508,7 @@ static void ice_vsi_release_all(struct ice_pf *pf)
err = ice_vsi_release(pf->vsi[i]);
if (err)
- dev_dbg(ice_pf_to_dev(pf),
- "Failed to release pf->vsi[%d], err %d, vsi_num = %d\n",
+ dev_dbg(ice_pf_to_dev(pf), "Failed to release pf->vsi[%d], err %d, vsi_num = %d\n",
i, err, pf->vsi[i]->vsi_num);
}
}
@@ -4582,8 +4535,7 @@ static int ice_vsi_rebuild_by_type(struct ice_pf *pf, enum ice_vsi_type type)
/* rebuild the VSI */
err = ice_vsi_rebuild(vsi, true);
if (err) {
- dev_err(dev,
- "rebuild VSI failed, err %d, VSI index %d, type %s\n",
+ dev_err(dev, "rebuild VSI failed, err %d, VSI index %d, type %s\n",
err, vsi->idx, ice_vsi_type_str(type));
return err;
}
@@ -4591,8 +4543,7 @@ static int ice_vsi_rebuild_by_type(struct ice_pf *pf, enum ice_vsi_type type)
/* replay filters for the VSI */
status = ice_replay_vsi(&pf->hw, vsi->idx);
if (status) {
- dev_err(dev,
- "replay VSI failed, status %d, VSI index %d, type %s\n",
+ dev_err(dev, "replay VSI failed, status %d, VSI index %d, type %s\n",
status, vsi->idx, ice_vsi_type_str(type));
return -EIO;
}
@@ -4605,8 +4556,7 @@ static int ice_vsi_rebuild_by_type(struct ice_pf *pf, enum ice_vsi_type type)
/* enable the VSI */
err = ice_ena_vsi(vsi, false);
if (err) {
- dev_err(dev,
- "enable VSI failed, err %d, VSI index %d, type %s\n",
+ dev_err(dev, "enable VSI failed, err %d, VSI index %d, type %s\n",
err, vsi->idx, ice_vsi_type_str(type));
return err;
}
@@ -4684,8 +4634,7 @@ static void ice_rebuild(struct ice_pf *pf, enum ice_reset_req reset_type)
}
if (pf->first_sw->dflt_vsi_ena)
- dev_info(dev,
- "Clearing default VSI, re-enable after reset completes\n");
+ dev_info(dev, "Clearing default VSI, re-enable after reset completes\n");
/* clear the default VSI configuration if it exists */
pf->first_sw->dflt_vsi = NULL;
pf->first_sw->dflt_vsi_ena = false;
@@ -4736,8 +4685,7 @@ static void ice_rebuild(struct ice_pf *pf, enum ice_reset_req reset_type)
/* tell the firmware we are up */
ret = ice_send_version(pf);
if (ret) {
- dev_err(dev,
- "Rebuild failed due to error sending driver version: %d\n",
+ dev_err(dev, "Rebuild failed due to error sending driver version: %d\n",
ret);
goto err_vsi_rebuild;
}
@@ -4993,7 +4941,7 @@ static int ice_vsi_update_bridge_mode(struct ice_vsi *vsi, u16 bmode)
status = ice_update_vsi(hw, vsi->idx, ctxt, NULL);
if (status) {
- dev_err(&vsi->back->pdev->dev, "update VSI for bridge mode failed, bmode = %d err %d aq_err %d\n",
+ dev_err(ice_pf_to_dev(vsi->back), "update VSI for bridge mode failed, bmode = %d err %d aq_err %d\n",
bmode, status, hw->adminq.sq_last_status);
ret = -EIO;
goto out;
@@ -5185,8 +5133,7 @@ int ice_open(struct net_device *netdev)
if (pi->phy.link_info.link_info & ICE_AQ_MEDIA_AVAILABLE) {
err = ice_force_phys_link_state(vsi, true);
if (err) {
- netdev_err(netdev,
- "Failed to set physical link up, error %d\n",
+ netdev_err(netdev, "Failed to set physical link up, error %d\n",
err);
return err;
}
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c
index fd17ace..4de61db 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.c
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.c
@@ -644,7 +644,7 @@ static bool ice_page_is_reserved(struct page *page)
* Update the offset within page so that Rx buf will be ready to be reused.
* For systems with PAGE_SIZE < 8192 this function will flip the page offset
* so the second half of page assigned to Rx buffer will be used, otherwise
- * the offset is moved by the @size bytes
+ * the offset is moved by "size" bytes
*/
static void
ice_rx_buf_adjust_pg_offset(struct ice_rx_buf *rx_buf, unsigned int size)
@@ -1078,8 +1078,6 @@ static int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget)
skb = ice_build_skb(rx_ring, rx_buf, &xdp);
else
skb = ice_construct_skb(rx_ring, rx_buf, &xdp);
- } else {
- skb = ice_construct_skb(rx_ring, rx_buf, &xdp);
}
/* exit if we failed to retrieve a buffer */
if (!skb) {
@@ -1621,11 +1619,11 @@ ice_tx_map(struct ice_ring *tx_ring, struct ice_tx_buf *first,
{
u64 td_offset, td_tag, td_cmd;
u16 i = tx_ring->next_to_use;
- skb_frag_t *frag;
unsigned int data_len, size;
struct ice_tx_desc *tx_desc;
struct ice_tx_buf *tx_buf;
struct sk_buff *skb;
+ skb_frag_t *frag;
dma_addr_t dma;
td_tag = off->td_l2tag1;
@@ -1738,9 +1736,8 @@ ice_tx_map(struct ice_ring *tx_ring, struct ice_tx_buf *first,
ice_maybe_stop_tx(tx_ring, DESC_NEEDED);
/* notify HW of packet */
- if (netif_xmit_stopped(txring_txq(tx_ring)) || !netdev_xmit_more()) {
+ if (netif_xmit_stopped(txring_txq(tx_ring)) || !netdev_xmit_more())
writel(i, tx_ring->tail);
- }
return;
@@ -2078,7 +2075,7 @@ static bool __ice_chk_linearize(struct sk_buff *skb)
frag = &skb_shinfo(skb)->frags[0];
/* Initialize size to the negative value of gso_size minus 1. We
- * use this as the worst case scenerio in which the frag ahead
+ * use this as the worst case scenario in which the frag ahead
* of us only provides one byte which is why we are limited to 6
* descriptors for a single transmit as the header and previous
* fragment are already consuming 2 descriptors.
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h
index a862706..7ee00a1 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.h
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.h
@@ -33,8 +33,8 @@
* frame.
*
* Note: For cache line sizes 256 or larger this value is going to end
- * up negative. In these cases we should fall back to the legacy
- * receive path.
+ * up negative. In these cases we should fall back to the legacy
+ * receive path.
*/
#if (PAGE_SIZE < 8192)
#define ICE_2K_TOO_SMALL_WITH_PADDING \
@@ -222,7 +222,7 @@ enum ice_rx_dtype {
#define ICE_ITR_GRAN_S 1 /* ITR granularity is always 2us */
#define ICE_ITR_GRAN_US BIT(ICE_ITR_GRAN_S)
#define ICE_ITR_MASK 0x1FFE /* ITR register value alignment mask */
-#define ITR_REG_ALIGN(setting) __ALIGN_MASK(setting, ~ICE_ITR_MASK)
+#define ITR_REG_ALIGN(setting) ((setting) & ICE_ITR_MASK)
#define ICE_ITR_ADAPTIVE_MIN_INC 0x0002
#define ICE_ITR_ADAPTIVE_MIN_USECS 0x0002
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c
index 35bbc4f..6da048a 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c
@@ -10,7 +10,7 @@
*/
void ice_release_rx_desc(struct ice_ring *rx_ring, u32 val)
{
- u16 prev_ntu = rx_ring->next_to_use;
+ u16 prev_ntu = rx_ring->next_to_use & ~0x7;
rx_ring->next_to_use = val;
diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h
index b361ffa..db0ef6b 100644
--- a/drivers/net/ethernet/intel/ice/ice_type.h
+++ b/drivers/net/ethernet/intel/ice/ice_type.h
@@ -517,7 +517,7 @@ struct ice_hw {
struct ice_fw_log_cfg fw_log;
/* Device max aggregate bandwidths corresponding to the GL_PWR_MODE_CTL
- * register. Used for determining the ITR/intrl granularity during
+ * register. Used for determining the ITR/INTRL granularity during
* initialization.
*/
#define ICE_MAX_AGG_BW_200G 0x0
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
index 82b1e7a..75c70d4 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
@@ -199,8 +199,7 @@ static void ice_dis_vf_mappings(struct ice_vf *vf)
if (vsi->rx_mapping_mode == ICE_VSI_MAP_CONTIG)
wr32(hw, VPLAN_RX_QBASE(vf->vf_id), 0);
else
- dev_err(dev,
- "Scattered mode for VF Rx queues is not yet implemented\n");
+ dev_err(dev, "Scattered mode for VF Rx queues is not yet implemented\n");
}
/**
@@ -402,8 +401,7 @@ static void ice_trigger_vf_reset(struct ice_vf *vf, bool is_vflr, bool is_pfr)
if ((reg & VF_TRANS_PENDING_M) == 0)
break;
- dev_err(dev,
- "VF %d PCI transactions stuck\n", vf->vf_id);
+ dev_err(dev, "VF %d PCI transactions stuck\n", vf->vf_id);
udelay(ICE_PCI_CIAD_WAIT_DELAY_US);
}
}
@@ -462,7 +460,7 @@ static int ice_vsi_manage_pvid(struct ice_vsi *vsi, u16 vid, bool enable)
status = ice_update_vsi(hw, vsi->idx, ctxt, NULL);
if (status) {
- dev_info(&vsi->back->pdev->dev, "update VSI for port VLAN failed, err %d aq_err %d\n",
+ dev_info(ice_pf_to_dev(vsi->back), "update VSI for port VLAN failed, err %d aq_err %d\n",
status, hw->adminq.sq_last_status);
ret = -EIO;
goto out;
@@ -1095,7 +1093,6 @@ bool ice_reset_all_vfs(struct ice_pf *pf, bool is_vflr)
* finished resetting.
*/
for (i = 0, v = 0; i < 10 && v < pf->num_alloc_vfs; i++) {
-
/* Check each VF in sequence */
while (v < pf->num_alloc_vfs) {
u32 reg;
@@ -1553,8 +1550,7 @@ ice_vc_send_msg_to_vf(struct ice_vf *vf, u32 v_opcode,
dev_info(dev, "VF %d failed opcode %d, retval: %d\n", vf->vf_id,
v_opcode, v_retval);
if (vf->num_inval_msgs > ICE_DFLT_NUM_INVAL_MSGS_ALLOWED) {
- dev_err(dev,
- "Number of invalid messages exceeded for VF %d\n",
+ dev_err(dev, "Number of invalid messages exceeded for VF %d\n",
vf->vf_id);
dev_err(dev, "Use PF Control I/F to enable the VF\n");
set_bit(ICE_VF_STATE_DIS, vf->vf_states);
@@ -1569,8 +1565,7 @@ ice_vc_send_msg_to_vf(struct ice_vf *vf, u32 v_opcode,
aq_ret = ice_aq_send_msg_to_vf(&pf->hw, vf->vf_id, v_opcode, v_retval,
msg, msglen, NULL);
if (aq_ret && pf->hw.mailboxq.sq_last_status != ICE_AQ_RC_ENOSYS) {
- dev_info(dev,
- "Unable to send the message to VF %d ret %d aq_err %d\n",
+ dev_info(dev, "Unable to send the message to VF %d ret %d aq_err %d\n",
vf->vf_id, aq_ret, pf->hw.mailboxq.sq_last_status);
return -EIO;
}
@@ -1879,6 +1874,48 @@ static int ice_vc_config_rss_lut(struct ice_vf *vf, u8 *msg)
}
/**
+ * ice_wait_on_vf_reset - poll to make sure a given VF is ready after reset
+ * @vf: The VF being resseting
+ *
+ * The max poll time is about ~800ms, which is about the maximum time it takes
+ * for a VF to be reset and/or a VF driver to be removed.
+ */
+static void ice_wait_on_vf_reset(struct ice_vf *vf)
+{
+ int i;
+
+ for (i = 0; i < ICE_MAX_VF_RESET_TRIES; i++) {
+ if (test_bit(ICE_VF_STATE_INIT, vf->vf_states))
+ break;
+ msleep(ICE_MAX_VF_RESET_SLEEP_MS);
+ }
+}
+
+/**
+ * ice_check_vf_ready_for_cfg - check if VF is ready to be configured/queried
+ * @vf: VF to check if it's ready to be configured/queried
+ *
+ * The purpose of this function is to make sure the VF is not in reset, not
+ * disabled, and initialized so it can be configured and/or queried by a host
+ * administrator.
+ */
+static int ice_check_vf_ready_for_cfg(struct ice_vf *vf)
+{
+ struct ice_pf *pf;
+
+ ice_wait_on_vf_reset(vf);
+
+ if (ice_is_vf_disabled(vf))
+ return -EINVAL;
+
+ pf = vf->pf;
+ if (ice_check_vf_init(pf, vf))
+ return -EBUSY;
+
+ return 0;
+}
+
+/**
* ice_set_vf_spoofchk
* @netdev: network interface device structure
* @vf_id: VF identifier
@@ -1895,16 +1932,16 @@ int ice_set_vf_spoofchk(struct net_device *netdev, int vf_id, bool ena)
enum ice_status status;
struct device *dev;
struct ice_vf *vf;
- int ret = 0;
+ int ret;
dev = ice_pf_to_dev(pf);
if (ice_validate_vf_id(pf, vf_id))
return -EINVAL;
vf = &pf->vf[vf_id];
-
- if (ice_check_vf_init(pf, vf))
- return -EBUSY;
+ ret = ice_check_vf_ready_for_cfg(vf);
+ if (ret)
+ return ret;
vf_vsi = pf->vsi[vf->lan_vsi_idx];
if (!vf_vsi) {
@@ -1914,8 +1951,7 @@ int ice_set_vf_spoofchk(struct net_device *netdev, int vf_id, bool ena)
}
if (vf_vsi->type != ICE_VSI_VF) {
- netdev_err(netdev,
- "Type %d of VSI %d for VF %d is no ICE_VSI_VF\n",
+ netdev_err(netdev, "Type %d of VSI %d for VF %d is no ICE_VSI_VF\n",
vf_vsi->type, vf_vsi->vsi_num, vf->vf_id);
return -ENODEV;
}
@@ -1945,8 +1981,7 @@ int ice_set_vf_spoofchk(struct net_device *netdev, int vf_id, bool ena)
status = ice_update_vsi(&pf->hw, vf_vsi->idx, ctx, NULL);
if (status) {
- dev_err(dev,
- "Failed to %sable spoofchk on VF %d VSI %d\n error %d",
+ dev_err(dev, "Failed to %sable spoofchk on VF %d VSI %d\n error %d",
ena ? "en" : "dis", vf->vf_id, vf_vsi->vsi_num, status);
ret = -EIO;
goto out;
@@ -2063,8 +2098,7 @@ static int ice_vc_ena_qs_msg(struct ice_vf *vf, u8 *msg)
continue;
if (ice_vsi_ctrl_rx_ring(vsi, true, vf_q_id)) {
- dev_err(&vsi->back->pdev->dev,
- "Failed to enable Rx ring %d on VSI %d\n",
+ dev_err(ice_pf_to_dev(vsi->back), "Failed to enable Rx ring %d on VSI %d\n",
vf_q_id, vsi->vsi_num);
v_ret = VIRTCHNL_STATUS_ERR_PARAM;
goto error_param;
@@ -2166,8 +2200,7 @@ static int ice_vc_dis_qs_msg(struct ice_vf *vf, u8 *msg)
if (ice_vsi_stop_tx_ring(vsi, ICE_NO_RESET, vf->vf_id,
ring, &txq_meta)) {
- dev_err(&vsi->back->pdev->dev,
- "Failed to stop Tx ring %d on VSI %d\n",
+ dev_err(ice_pf_to_dev(vsi->back), "Failed to stop Tx ring %d on VSI %d\n",
vf_q_id, vsi->vsi_num);
v_ret = VIRTCHNL_STATUS_ERR_PARAM;
goto error_param;
@@ -2193,8 +2226,7 @@ static int ice_vc_dis_qs_msg(struct ice_vf *vf, u8 *msg)
continue;
if (ice_vsi_ctrl_rx_ring(vsi, false, vf_q_id)) {
- dev_err(&vsi->back->pdev->dev,
- "Failed to stop Rx ring %d on VSI %d\n",
+ dev_err(ice_pf_to_dev(vsi->back), "Failed to stop Rx ring %d on VSI %d\n",
vf_q_id, vsi->vsi_num);
v_ret = VIRTCHNL_STATUS_ERR_PARAM;
goto error_param;
@@ -2357,8 +2389,7 @@ static int ice_vc_cfg_qs_msg(struct ice_vf *vf, u8 *msg)
if (qci->num_queue_pairs > ICE_MAX_BASE_QS_PER_VF ||
qci->num_queue_pairs > min_t(u16, vsi->alloc_txq, vsi->alloc_rxq)) {
- dev_err(ice_pf_to_dev(pf),
- "VF-%d requesting more than supported number of queues: %d\n",
+ dev_err(ice_pf_to_dev(pf), "VF-%d requesting more than supported number of queues: %d\n",
vf->vf_id, min_t(u16, vsi->alloc_txq, vsi->alloc_rxq));
v_ret = VIRTCHNL_STATUS_ERR_PARAM;
goto error_param;
@@ -2570,8 +2601,7 @@ ice_vc_handle_mac_addr_msg(struct ice_vf *vf, u8 *msg, bool set)
*/
if (set && !ice_is_vf_trusted(vf) &&
(vf->num_mac + al->num_elements) > ICE_MAX_MACADDR_PER_VF) {
- dev_err(ice_pf_to_dev(pf),
- "Can't add more MAC addresses, because VF-%d is not trusted, switch the VF to trusted mode in order to add more functionalities\n",
+ dev_err(ice_pf_to_dev(pf), "Can't add more MAC addresses, because VF-%d is not trusted, switch the VF to trusted mode in order to add more functionalities\n",
vf->vf_id);
v_ret = VIRTCHNL_STATUS_ERR_PARAM;
goto handle_mac_exit;
@@ -2648,8 +2678,8 @@ static int ice_vc_request_qs_msg(struct ice_vf *vf, u8 *msg)
struct ice_pf *pf = vf->pf;
u16 max_allowed_vf_queues;
u16 tx_rx_queue_left;
- u16 cur_queues;
struct device *dev;
+ u16 cur_queues;
dev = ice_pf_to_dev(pf);
if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) {
@@ -2670,8 +2700,7 @@ static int ice_vc_request_qs_msg(struct ice_vf *vf, u8 *msg)
vfres->num_queue_pairs = ICE_MAX_BASE_QS_PER_VF;
} else if (req_queues > cur_queues &&
req_queues - cur_queues > tx_rx_queue_left) {
- dev_warn(dev,
- "VF %d requested %u more queues, but only %u left.\n",
+ dev_warn(dev, "VF %d requested %u more queues, but only %u left.\n",
vf->vf_id, req_queues - cur_queues, tx_rx_queue_left);
vfres->num_queue_pairs = min_t(u16, max_allowed_vf_queues,
ICE_MAX_BASE_QS_PER_VF);
@@ -2709,7 +2738,7 @@ ice_set_vf_port_vlan(struct net_device *netdev, int vf_id, u16 vlan_id, u8 qos,
struct ice_vsi *vsi;
struct device *dev;
struct ice_vf *vf;
- int ret = 0;
+ int ret;
dev = ice_pf_to_dev(pf);
if (ice_validate_vf_id(pf, vf_id))
@@ -2727,13 +2756,15 @@ ice_set_vf_port_vlan(struct net_device *netdev, int vf_id, u16 vlan_id, u8 qos,
vf = &pf->vf[vf_id];
vsi = pf->vsi[vf->lan_vsi_idx];
- if (ice_check_vf_init(pf, vf))
- return -EBUSY;
+
+ ret = ice_check_vf_ready_for_cfg(vf);
+ if (ret)
+ return ret;
if (le16_to_cpu(vsi->info.pvid) == vlanprio) {
/* duplicate request, so just return success */
dev_dbg(dev, "Duplicate pvid %d request\n", vlanprio);
- return ret;
+ return 0;
}
/* If PVID, then remove all filters on the old VLAN */
@@ -2744,7 +2775,7 @@ ice_set_vf_port_vlan(struct net_device *netdev, int vf_id, u16 vlan_id, u8 qos,
if (vlan_id || qos) {
ret = ice_vsi_manage_pvid(vsi, vlanprio, true);
if (ret)
- goto error_set_pvid;
+ return ret;
} else {
ice_vsi_manage_pvid(vsi, 0, false);
vsi->info.pvid = 0;
@@ -2757,7 +2788,7 @@ ice_set_vf_port_vlan(struct net_device *netdev, int vf_id, u16 vlan_id, u8 qos,
/* add new VLAN filter for each MAC */
ret = ice_vsi_add_vlan(vsi, vlan_id);
if (ret)
- goto error_set_pvid;
+ return ret;
}
/* The Port VLAN needs to be saved across resets the same as the
@@ -2765,8 +2796,7 @@ ice_set_vf_port_vlan(struct net_device *netdev, int vf_id, u16 vlan_id, u8 qos,
*/
vf->port_vlan_id = le16_to_cpu(vsi->info.pvid);
-error_set_pvid:
- return ret;
+ return 0;
}
/**
@@ -2821,8 +2851,8 @@ static int ice_vc_process_vlan_msg(struct ice_vf *vf, u8 *msg, bool add_v)
for (i = 0; i < vfl->num_elements; i++) {
if (vfl->vlan_id[i] > ICE_MAX_VLANID) {
v_ret = VIRTCHNL_STATUS_ERR_PARAM;
- dev_err(dev,
- "invalid VF VLAN id %d\n", vfl->vlan_id[i]);
+ dev_err(dev, "invalid VF VLAN id %d\n",
+ vfl->vlan_id[i]);
goto error_param;
}
}
@@ -2836,8 +2866,7 @@ static int ice_vc_process_vlan_msg(struct ice_vf *vf, u8 *msg, bool add_v)
if (add_v && !ice_is_vf_trusted(vf) &&
vsi->num_vlan >= ICE_MAX_VLAN_PER_VF) {
- dev_info(dev,
- "VF-%d is not trusted, switch the VF to trusted mode, in order to add more VLAN addresses\n",
+ dev_info(dev, "VF-%d is not trusted, switch the VF to trusted mode, in order to add more VLAN addresses\n",
vf->vf_id);
/* There is no need to let VF know about being not trusted,
* so we can just return success message here
@@ -2860,8 +2889,7 @@ static int ice_vc_process_vlan_msg(struct ice_vf *vf, u8 *msg, bool add_v)
if (!ice_is_vf_trusted(vf) &&
vsi->num_vlan >= ICE_MAX_VLAN_PER_VF) {
- dev_info(dev,
- "VF-%d is not trusted, switch the VF to trusted mode, in order to add more VLAN addresses\n",
+ dev_info(dev, "VF-%d is not trusted, switch the VF to trusted mode, in order to add more VLAN addresses\n",
vf->vf_id);
/* There is no need to let VF know about being
* not trusted, so we can just return success
@@ -2889,8 +2917,7 @@ static int ice_vc_process_vlan_msg(struct ice_vf *vf, u8 *msg, bool add_v)
status = ice_cfg_vlan_pruning(vsi, true, false);
if (status) {
v_ret = VIRTCHNL_STATUS_ERR_PARAM;
- dev_err(dev,
- "Enable VLAN pruning on VLAN ID: %d failed error-%d\n",
+ dev_err(dev, "Enable VLAN pruning on VLAN ID: %d failed error-%d\n",
vid, status);
goto error_param;
}
@@ -2903,8 +2930,7 @@ static int ice_vc_process_vlan_msg(struct ice_vf *vf, u8 *msg, bool add_v)
promisc_m, vid);
if (status) {
v_ret = VIRTCHNL_STATUS_ERR_PARAM;
- dev_err(dev,
- "Enable Unicast/multicast promiscuous mode on VLAN ID:%d failed error-%d\n",
+ dev_err(dev, "Enable Unicast/multicast promiscuous mode on VLAN ID:%d failed error-%d\n",
vid, status);
}
}
@@ -3140,8 +3166,7 @@ void ice_vc_process_vf_msg(struct ice_pf *pf, struct ice_rq_event_info *event)
case VIRTCHNL_OP_GET_VF_RESOURCES:
err = ice_vc_get_vf_res_msg(vf, msg);
if (ice_vf_init_vlan_stripping(vf))
- dev_err(dev,
- "Failed to initialize VLAN stripping for VF %d\n",
+ dev_err(dev, "Failed to initialize VLAN stripping for VF %d\n",
vf->vf_id);
ice_vc_notify_vf_link_state(vf);
break;
@@ -3255,23 +3280,6 @@ ice_get_vf_cfg(struct net_device *netdev, int vf_id, struct ifla_vf_info *ivi)
}
/**
- * ice_wait_on_vf_reset
- * @vf: The VF being resseting
- *
- * Poll to make sure a given VF is ready after reset
- */
-static void ice_wait_on_vf_reset(struct ice_vf *vf)
-{
- int i;
-
- for (i = 0; i < ICE_MAX_VF_RESET_WAIT; i++) {
- if (test_bit(ICE_VF_STATE_INIT, vf->vf_states))
- break;
- msleep(20);
- }
-}
-
-/**
* ice_set_vf_mac
* @netdev: network interface device structure
* @vf_id: VF identifier
@@ -3283,29 +3291,21 @@ int ice_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac)
{
struct ice_pf *pf = ice_netdev_to_pf(netdev);
struct ice_vf *vf;
- int ret = 0;
+ int ret;
if (ice_validate_vf_id(pf, vf_id))
return -EINVAL;
- vf = &pf->vf[vf_id];
- /* Don't set MAC on disabled VF */
- if (ice_is_vf_disabled(vf))
- return -EINVAL;
-
- /* In case VF is in reset mode, wait until it is completed. Depending
- * on factors like queue disabling routine, this could take ~250ms
- */
- ice_wait_on_vf_reset(vf);
-
- if (ice_check_vf_init(pf, vf))
- return -EBUSY;
-
if (is_zero_ether_addr(mac) || is_multicast_ether_addr(mac)) {
netdev_err(netdev, "%pM not a valid unicast address\n", mac);
return -EINVAL;
}
+ vf = &pf->vf[vf_id];
+ ret = ice_check_vf_ready_for_cfg(vf);
+ if (ret)
+ return ret;
+
/* copy MAC into dflt_lan_addr and trigger a VF reset. The reset
* flow will use the updated dflt_lan_addr and add a MAC filter
* using ice_add_mac. Also set pf_set_mac to indicate that the PF has
@@ -3313,12 +3313,11 @@ int ice_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac)
*/
ether_addr_copy(vf->dflt_lan_addr.addr, mac);
vf->pf_set_mac = true;
- netdev_info(netdev,
- "MAC on VF %d set to %pM. VF driver will be reinitialized\n",
+ netdev_info(netdev, "MAC on VF %d set to %pM. VF driver will be reinitialized\n",
vf_id, mac);
ice_vc_reset_vf(vf);
- return ret;
+ return 0;
}
/**
@@ -3332,25 +3331,16 @@ int ice_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac)
int ice_set_vf_trust(struct net_device *netdev, int vf_id, bool trusted)
{
struct ice_pf *pf = ice_netdev_to_pf(netdev);
- struct device *dev;
struct ice_vf *vf;
+ int ret;
- dev = ice_pf_to_dev(pf);
if (ice_validate_vf_id(pf, vf_id))
return -EINVAL;
vf = &pf->vf[vf_id];
- /* Don't set Trusted Mode on disabled VF */
- if (ice_is_vf_disabled(vf))
- return -EINVAL;
-
- /* In case VF is in reset mode, wait until it is completed. Depending
- * on factors like queue disabling routine, this could take ~250ms
- */
- ice_wait_on_vf_reset(vf);
-
- if (ice_check_vf_init(pf, vf))
- return -EBUSY;
+ ret = ice_check_vf_ready_for_cfg(vf);
+ if (ret)
+ return ret;
/* Check if already trusted */
if (trusted == vf->trusted)
@@ -3358,7 +3348,7 @@ int ice_set_vf_trust(struct net_device *netdev, int vf_id, bool trusted)
vf->trusted = trusted;
ice_vc_reset_vf(vf);
- dev_info(dev, "VF %u is now %strusted\n",
+ dev_info(ice_pf_to_dev(pf), "VF %u is now %strusted\n",
vf_id, trusted ? "" : "un");
return 0;
@@ -3376,13 +3366,15 @@ int ice_set_vf_link_state(struct net_device *netdev, int vf_id, int link_state)
{
struct ice_pf *pf = ice_netdev_to_pf(netdev);
struct ice_vf *vf;
+ int ret;
if (ice_validate_vf_id(pf, vf_id))
return -EINVAL;
vf = &pf->vf[vf_id];
- if (ice_check_vf_init(pf, vf))
- return -EBUSY;
+ ret = ice_check_vf_ready_for_cfg(vf);
+ if (ret)
+ return ret;
switch (link_state) {
case IFLA_VF_LINK_STATE_AUTO:
@@ -3418,14 +3410,15 @@ int ice_get_vf_stats(struct net_device *netdev, int vf_id,
struct ice_eth_stats *stats;
struct ice_vsi *vsi;
struct ice_vf *vf;
+ int ret;
if (ice_validate_vf_id(pf, vf_id))
return -EINVAL;
vf = &pf->vf[vf_id];
-
- if (ice_check_vf_init(pf, vf))
- return -EBUSY;
+ ret = ice_check_vf_ready_for_cfg(vf);
+ if (ret)
+ return ret;
vsi = pf->vsi[vf->lan_vsi_idx];
if (!vsi)
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h
index 4647d63..ac67982 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h
@@ -38,7 +38,8 @@
#define ICE_MAX_POLICY_INTR_PER_VF 33
#define ICE_MIN_INTR_PER_VF (ICE_MIN_QS_PER_VF + 1)
#define ICE_DFLT_INTR_PER_VF (ICE_DFLT_QS_PER_VF + 1)
-#define ICE_MAX_VF_RESET_WAIT 15
+#define ICE_MAX_VF_RESET_TRIES 40
+#define ICE_MAX_VF_RESET_SLEEP_MS 20
#define ice_for_each_vf(pf, i) \
for ((i) = 0; (i) < (pf)->num_alloc_vfs; (i)++)
diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c
index 149dca0..4d3407b 100644
--- a/drivers/net/ethernet/intel/ice/ice_xsk.c
+++ b/drivers/net/ethernet/intel/ice/ice_xsk.c
@@ -338,8 +338,8 @@ static int ice_xsk_umem_dma_map(struct ice_vsi *vsi, struct xdp_umem *umem)
DMA_BIDIRECTIONAL,
ICE_RX_DMA_ATTR);
if (dma_mapping_error(dev, dma)) {
- dev_dbg(dev,
- "XSK UMEM DMA mapping error on page num %d", i);
+ dev_dbg(dev, "XSK UMEM DMA mapping error on page num %d\n",
+ i);
goto out_unmap;
}
diff --git a/drivers/net/ethernet/marvell/mvmdio.c b/drivers/net/ethernet/marvell/mvmdio.c
index 0b9e851..d14762d 100644
--- a/drivers/net/ethernet/marvell/mvmdio.c
+++ b/drivers/net/ethernet/marvell/mvmdio.c
@@ -347,7 +347,7 @@ static int orion_mdio_probe(struct platform_device *pdev)
}
- dev->err_interrupt = platform_get_irq(pdev, 0);
+ dev->err_interrupt = platform_get_irq_optional(pdev, 0);
if (dev->err_interrupt > 0 &&
resource_size(r) < MVMDIO_ERR_INT_MASK + 4) {
dev_err(&pdev->dev,
diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c
index 98017e7..11babc7 100644
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c
@@ -3036,11 +3036,10 @@ static int mvneta_poll(struct napi_struct *napi, int budget)
/* For the case where the last mvneta_poll did not process all
* RX packets
*/
- rx_queue = fls(((cause_rx_tx >> 8) & 0xff));
-
cause_rx_tx |= pp->neta_armada3700 ? pp->cause_rx_tx :
port->cause_rx_tx;
+ rx_queue = fls(((cause_rx_tx >> 8) & 0xff));
if (rx_queue) {
rx_queue = rx_queue - 1;
if (pp->bm_priv)
diff --git a/drivers/net/ethernet/mellanox/mlx4/mcg.c b/drivers/net/ethernet/mellanox/mlx4/mcg.c
index 9c48182..9486cae 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mcg.c
+++ b/drivers/net/ethernet/mellanox/mlx4/mcg.c
@@ -906,59 +906,59 @@ static void mlx4_err_rule(struct mlx4_dev *dev, char *str,
int len = 0;
mlx4_err(dev, "%s", str);
- len += snprintf(buf + len, BUF_SIZE - len,
- "port = %d prio = 0x%x qp = 0x%x ",
- rule->port, rule->priority, rule->qpn);
+ len += scnprintf(buf + len, BUF_SIZE - len,
+ "port = %d prio = 0x%x qp = 0x%x ",
+ rule->port, rule->priority, rule->qpn);
list_for_each_entry(cur, &rule->list, list) {
switch (cur->id) {
case MLX4_NET_TRANS_RULE_ID_ETH:
- len += snprintf(buf + len, BUF_SIZE - len,
- "dmac = %pM ", &cur->eth.dst_mac);
+ len += scnprintf(buf + len, BUF_SIZE - len,
+ "dmac = %pM ", &cur->eth.dst_mac);
if (cur->eth.ether_type)
- len += snprintf(buf + len, BUF_SIZE - len,
- "ethertype = 0x%x ",
- be16_to_cpu(cur->eth.ether_type));
+ len += scnprintf(buf + len, BUF_SIZE - len,
+ "ethertype = 0x%x ",
+ be16_to_cpu(cur->eth.ether_type));
if (cur->eth.vlan_id)
- len += snprintf(buf + len, BUF_SIZE - len,
- "vlan-id = %d ",
- be16_to_cpu(cur->eth.vlan_id));
+ len += scnprintf(buf + len, BUF_SIZE - len,
+ "vlan-id = %d ",
+ be16_to_cpu(cur->eth.vlan_id));
break;
case MLX4_NET_TRANS_RULE_ID_IPV4:
if (cur->ipv4.src_ip)
- len += snprintf(buf + len, BUF_SIZE - len,
- "src-ip = %pI4 ",
- &cur->ipv4.src_ip);
+ len += scnprintf(buf + len, BUF_SIZE - len,
+ "src-ip = %pI4 ",
+ &cur->ipv4.src_ip);
if (cur->ipv4.dst_ip)
- len += snprintf(buf + len, BUF_SIZE - len,
- "dst-ip = %pI4 ",
- &cur->ipv4.dst_ip);
+ len += scnprintf(buf + len, BUF_SIZE - len,
+ "dst-ip = %pI4 ",
+ &cur->ipv4.dst_ip);
break;
case MLX4_NET_TRANS_RULE_ID_TCP:
case MLX4_NET_TRANS_RULE_ID_UDP:
if (cur->tcp_udp.src_port)
- len += snprintf(buf + len, BUF_SIZE - len,
- "src-port = %d ",
- be16_to_cpu(cur->tcp_udp.src_port));
+ len += scnprintf(buf + len, BUF_SIZE - len,
+ "src-port = %d ",
+ be16_to_cpu(cur->tcp_udp.src_port));
if (cur->tcp_udp.dst_port)
- len += snprintf(buf + len, BUF_SIZE - len,
- "dst-port = %d ",
- be16_to_cpu(cur->tcp_udp.dst_port));
+ len += scnprintf(buf + len, BUF_SIZE - len,
+ "dst-port = %d ",
+ be16_to_cpu(cur->tcp_udp.dst_port));
break;
case MLX4_NET_TRANS_RULE_ID_IB:
- len += snprintf(buf + len, BUF_SIZE - len,
- "dst-gid = %pI6\n", cur->ib.dst_gid);
- len += snprintf(buf + len, BUF_SIZE - len,
- "dst-gid-mask = %pI6\n",
- cur->ib.dst_gid_msk);
+ len += scnprintf(buf + len, BUF_SIZE - len,
+ "dst-gid = %pI6\n", cur->ib.dst_gid);
+ len += scnprintf(buf + len, BUF_SIZE - len,
+ "dst-gid-mask = %pI6\n",
+ cur->ib.dst_gid_msk);
break;
case MLX4_NET_TRANS_RULE_ID_VXLAN:
- len += snprintf(buf + len, BUF_SIZE - len,
- "VNID = %d ", be32_to_cpu(cur->vxlan.vni));
+ len += scnprintf(buf + len, BUF_SIZE - len,
+ "VNID = %d ", be32_to_cpu(cur->vxlan.vni));
break;
case MLX4_NET_TRANS_RULE_ID_IPV6:
break;
@@ -967,7 +967,7 @@ static void mlx4_err_rule(struct mlx4_dev *dev, char *str,
break;
}
}
- len += snprintf(buf + len, BUF_SIZE - len, "\n");
+ len += scnprintf(buf + len, BUF_SIZE - len, "\n");
mlx4_err(dev, "%s", buf);
if (len >= BUF_SIZE)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 220ef9f..c9606b8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -371,6 +371,7 @@ enum {
struct mlx5e_sq_wqe_info {
u8 opcode;
+ u8 num_wqebbs;
/* Auxiliary data for different opcodes. */
union {
@@ -1059,6 +1060,7 @@ int mlx5e_modify_rq_state(struct mlx5e_rq *rq, int curr_state, int next_state);
void mlx5e_activate_rq(struct mlx5e_rq *rq);
void mlx5e_deactivate_rq(struct mlx5e_rq *rq);
void mlx5e_free_rx_descs(struct mlx5e_rq *rq);
+void mlx5e_free_rx_in_progress_descs(struct mlx5e_rq *rq);
void mlx5e_activate_icosq(struct mlx5e_icosq *icosq);
void mlx5e_deactivate_icosq(struct mlx5e_icosq *icosq);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/health.c b/drivers/net/ethernet/mellanox/mlx5/core/en/health.c
index 3a97564..20b907dc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/health.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/health.c
@@ -200,7 +200,7 @@ int mlx5e_health_report(struct mlx5e_priv *priv,
netdev_err(priv->netdev, err_str);
if (!reporter)
- return err_ctx->recover(&err_ctx->ctx);
+ return err_ctx->recover(err_ctx->ctx);
return devlink_health_report(reporter, err_str, err_ctx);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/health.h b/drivers/net/ethernet/mellanox/mlx5/core/en/health.h
index d3693fa..e54f70d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/health.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/health.h
@@ -10,8 +10,7 @@
static inline bool cqe_syndrome_needs_recover(u8 syndrome)
{
- return syndrome == MLX5_CQE_SYNDROME_LOCAL_LENGTH_ERR ||
- syndrome == MLX5_CQE_SYNDROME_LOCAL_QP_OP_ERR ||
+ return syndrome == MLX5_CQE_SYNDROME_LOCAL_QP_OP_ERR ||
syndrome == MLX5_CQE_SYNDROME_LOCAL_PROT_ERR ||
syndrome == MLX5_CQE_SYNDROME_WR_FLUSH_ERR;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c
index 6c72b59..a01e2de 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c
@@ -90,7 +90,7 @@ static int mlx5e_rx_reporter_err_icosq_cqe_recover(void *ctx)
goto out;
mlx5e_reset_icosq_cc_pc(icosq);
- mlx5e_free_rx_descs(rq);
+ mlx5e_free_rx_in_progress_descs(rq);
clear_bit(MLX5E_SQ_STATE_RECOVERING, &icosq->state);
mlx5e_activate_icosq(icosq);
mlx5e_activate_rq(rq);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
index 7c8796d..f07b139 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
@@ -179,6 +179,16 @@ mlx5e_tx_dma_unmap(struct device *pdev, struct mlx5e_sq_dma *dma)
}
}
+static inline void mlx5e_rqwq_reset(struct mlx5e_rq *rq)
+{
+ if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) {
+ mlx5_wq_ll_reset(&rq->mpwqe.wq);
+ rq->mpwqe.actual_wq_head = 0;
+ } else {
+ mlx5_wq_cyc_reset(&rq->wqe.wq);
+ }
+}
+
/* SW parser related functions */
struct mlx5e_swp_spec {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h
index a3efa29..63116be 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h
@@ -38,8 +38,8 @@ enum {
enum {
MLX5E_TLS_PROGRESS_PARAMS_RECORD_TRACKER_STATE_START = 0,
- MLX5E_TLS_PROGRESS_PARAMS_RECORD_TRACKER_STATE_SEARCHING = 1,
- MLX5E_TLS_PROGRESS_PARAMS_RECORD_TRACKER_STATE_TRACKING = 2,
+ MLX5E_TLS_PROGRESS_PARAMS_RECORD_TRACKER_STATE_TRACKING = 1,
+ MLX5E_TLS_PROGRESS_PARAMS_RECORD_TRACKER_STATE_SEARCHING = 2,
};
struct mlx5e_ktls_offload_context_tx {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c
index f260dd9..52a5662 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c
@@ -218,7 +218,7 @@ tx_sync_info_get(struct mlx5e_ktls_offload_context_tx *priv_tx,
* this packet was already acknowledged and its record info
* was released.
*/
- ends_before = before(tcp_seq + datalen, tls_record_start_seq(record));
+ ends_before = before(tcp_seq + datalen - 1, tls_record_start_seq(record));
if (unlikely(tls_record_is_start_marker(record))) {
ret = ends_before ? MLX5E_KTLS_SYNC_SKIP_NO_DATA : MLX5E_KTLS_SYNC_FAIL;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 454d345..4ef3dc7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -712,6 +712,9 @@ int mlx5e_modify_rq_state(struct mlx5e_rq *rq, int curr_state, int next_state)
if (!in)
return -ENOMEM;
+ if (curr_state == MLX5_RQC_STATE_RST && next_state == MLX5_RQC_STATE_RDY)
+ mlx5e_rqwq_reset(rq);
+
rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx);
MLX5_SET(modify_rq_in, in, rq_state, curr_state);
@@ -810,6 +813,29 @@ int mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq, int wait_time)
return -ETIMEDOUT;
}
+void mlx5e_free_rx_in_progress_descs(struct mlx5e_rq *rq)
+{
+ struct mlx5_wq_ll *wq;
+ u16 head;
+ int i;
+
+ if (rq->wq_type != MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ)
+ return;
+
+ wq = &rq->mpwqe.wq;
+ head = wq->head;
+
+ /* Outstanding UMR WQEs (in progress) start at wq->head */
+ for (i = 0; i < rq->mpwqe.umr_in_progress; i++) {
+ rq->dealloc_wqe(rq, head);
+ head = mlx5_wq_ll_get_wqe_next_ix(wq, head);
+ }
+
+ rq->mpwqe.actual_wq_head = wq->head;
+ rq->mpwqe.umr_in_progress = 0;
+ rq->mpwqe.umr_completed = 0;
+}
+
void mlx5e_free_rx_descs(struct mlx5e_rq *rq)
{
__be16 wqe_ix_be;
@@ -817,14 +843,8 @@ void mlx5e_free_rx_descs(struct mlx5e_rq *rq)
if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) {
struct mlx5_wq_ll *wq = &rq->mpwqe.wq;
- u16 head = wq->head;
- int i;
- /* Outstanding UMR WQEs (in progress) start at wq->head */
- for (i = 0; i < rq->mpwqe.umr_in_progress; i++) {
- rq->dealloc_wqe(rq, head);
- head = mlx5_wq_ll_get_wqe_next_ix(wq, head);
- }
+ mlx5e_free_rx_in_progress_descs(rq);
while (!mlx5_wq_ll_is_empty(wq)) {
struct mlx5e_rx_wqe_ll *wqe;
@@ -5144,7 +5164,6 @@ static void mlx5e_nic_enable(struct mlx5e_priv *priv)
static void mlx5e_nic_disable(struct mlx5e_priv *priv)
{
- struct net_device *netdev = priv->netdev;
struct mlx5_core_dev *mdev = priv->mdev;
#ifdef CONFIG_MLX5_CORE_EN_DCB
@@ -5165,7 +5184,7 @@ static void mlx5e_nic_disable(struct mlx5e_priv *priv)
mlx5e_monitor_counter_cleanup(priv);
mlx5e_disable_async_events(priv);
- mlx5_lag_remove(mdev, netdev);
+ mlx5_lag_remove(mdev);
}
int mlx5e_update_nic_rx(struct mlx5e_priv *priv)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index 7b48cca..6ed307d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -1861,7 +1861,6 @@ static void mlx5e_uplink_rep_enable(struct mlx5e_priv *priv)
static void mlx5e_uplink_rep_disable(struct mlx5e_priv *priv)
{
- struct net_device *netdev = priv->netdev;
struct mlx5_core_dev *mdev = priv->mdev;
struct mlx5e_rep_priv *rpriv = priv->ppriv;
@@ -1870,7 +1869,7 @@ static void mlx5e_uplink_rep_disable(struct mlx5e_priv *priv)
#endif
mlx5_notifier_unregister(mdev, &priv->events_nb);
cancel_work_sync(&rpriv->uplink_priv.reoffload_flows_work);
- mlx5_lag_remove(mdev, netdev);
+ mlx5_lag_remove(mdev);
}
static MLX5E_DEFINE_STATS_GRP(sw_rep, 0);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index 1c3ab69..312d469 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -477,6 +477,7 @@ static inline void mlx5e_fill_icosq_frag_edge(struct mlx5e_icosq *sq,
/* fill sq frag edge with nops to avoid wqe wrapping two pages */
for (; wi < edge_wi; wi++) {
wi->opcode = MLX5_OPCODE_NOP;
+ wi->num_wqebbs = 1;
mlx5e_post_nop(wq, sq->sqn, &sq->pc);
}
}
@@ -525,6 +526,7 @@ static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
umr_wqe->uctrl.xlt_offset = cpu_to_be16(xlt_offset);
sq->db.ico_wqe[pi].opcode = MLX5_OPCODE_UMR;
+ sq->db.ico_wqe[pi].num_wqebbs = MLX5E_UMR_WQEBBS;
sq->db.ico_wqe[pi].umr.rq = rq;
sq->pc += MLX5E_UMR_WQEBBS;
@@ -621,6 +623,7 @@ void mlx5e_poll_ico_cq(struct mlx5e_cq *cq)
ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sqcc);
wi = &sq->db.ico_wqe[ci];
+ sqcc += wi->num_wqebbs;
if (last_wqe && unlikely(get_cqe_opcode(cqe) != MLX5_CQE_REQ)) {
netdev_WARN_ONCE(cq->channel->netdev,
@@ -631,16 +634,12 @@ void mlx5e_poll_ico_cq(struct mlx5e_cq *cq)
break;
}
- if (likely(wi->opcode == MLX5_OPCODE_UMR)) {
- sqcc += MLX5E_UMR_WQEBBS;
+ if (likely(wi->opcode == MLX5_OPCODE_UMR))
wi->umr.rq->mpwqe.umr_completed++;
- } else if (likely(wi->opcode == MLX5_OPCODE_NOP)) {
- sqcc++;
- } else {
+ else if (unlikely(wi->opcode != MLX5_OPCODE_NOP))
netdev_WARN_ONCE(cq->channel->netdev,
"Bad OPCODE in ICOSQ WQE info: 0x%x\n",
wi->opcode);
- }
} while (!last_wqe);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 74091f7..ec5fc52 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -2476,10 +2476,11 @@ static int offload_pedit_fields(struct pedit_headers_action *hdrs,
continue;
if (f->field_bsize == 32) {
- mask_be32 = *(__be32 *)&mask;
+ mask_be32 = (__be32)mask;
mask = (__force unsigned long)cpu_to_le32(be32_to_cpu(mask_be32));
} else if (f->field_bsize == 16) {
- mask_be16 = *(__be16 *)&mask;
+ mask_be32 = (__be32)mask;
+ mask_be16 = *(__be16 *)&mask_be32;
mask = (__force unsigned long)cpu_to_le16(be16_to_cpu(mask_be16));
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
index 257a7c9..800d34e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
@@ -78,6 +78,7 @@ void mlx5e_trigger_irq(struct mlx5e_icosq *sq)
u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
sq->db.ico_wqe[pi].opcode = MLX5_OPCODE_NOP;
+ sq->db.ico_wqe[pi].num_wqebbs = 1;
nopwqe = mlx5e_post_nop(wq, sq->sqn, &sq->pc);
mlx5e_notify_hw(wq, sq->pc, sq->uar_map, &nopwqe->ctrl);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index 5acf60b..e49acd0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -459,12 +459,16 @@ static void esw_destroy_legacy_table(struct mlx5_eswitch *esw)
static int esw_legacy_enable(struct mlx5_eswitch *esw)
{
- int ret;
+ struct mlx5_vport *vport;
+ int ret, i;
ret = esw_create_legacy_table(esw);
if (ret)
return ret;
+ mlx5_esw_for_each_vf_vport(esw, i, vport, esw->esw_funcs.num_vfs)
+ vport->info.link_state = MLX5_VPORT_ADMIN_STATE_AUTO;
+
ret = mlx5_eswitch_enable_pf_vf_vports(esw, MLX5_LEGACY_SRIOV_VPORT_EVENTS);
if (ret)
esw_destroy_legacy_table(esw);
@@ -2452,25 +2456,17 @@ int mlx5_eswitch_set_vepa(struct mlx5_eswitch *esw, u8 setting)
int mlx5_eswitch_get_vepa(struct mlx5_eswitch *esw, u8 *setting)
{
- int err = 0;
-
if (!esw)
return -EOPNOTSUPP;
if (!ESW_ALLOWED(esw))
return -EPERM;
- mutex_lock(&esw->state_lock);
- if (esw->mode != MLX5_ESWITCH_LEGACY) {
- err = -EOPNOTSUPP;
- goto out;
- }
+ if (esw->mode != MLX5_ESWITCH_LEGACY)
+ return -EOPNOTSUPP;
*setting = esw->fdb_table.legacy.vepa_uplink_rule ? 1 : 0;
-
-out:
- mutex_unlock(&esw->state_lock);
- return err;
+ return 0;
}
int mlx5_eswitch_set_vport_trust(struct mlx5_eswitch *esw,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index 979f13b..1a57b2b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -1172,7 +1172,7 @@ static int esw_offloads_start(struct mlx5_eswitch *esw,
return -EINVAL;
}
- mlx5_eswitch_disable(esw, true);
+ mlx5_eswitch_disable(esw, false);
mlx5_eswitch_update_num_of_vfs(esw, esw->dev->priv.sriov.num_vfs);
err = mlx5_eswitch_enable(esw, MLX5_ESWITCH_OFFLOADS);
if (err) {
@@ -2065,7 +2065,7 @@ static int esw_offloads_stop(struct mlx5_eswitch *esw,
{
int err, err1;
- mlx5_eswitch_disable(esw, true);
+ mlx5_eswitch_disable(esw, false);
err = mlx5_eswitch_enable(esw, MLX5_ESWITCH_LEGACY);
if (err) {
NL_SET_ERR_MSG_MOD(extack, "Failed setting eswitch to legacy");
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_chains.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_chains.c
index c5a446e..4276194 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_chains.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_chains.c
@@ -35,7 +35,7 @@
static const unsigned int ESW_POOLS[] = { 4 * 1024 * 1024,
1 * 1024 * 1024,
64 * 1024,
- 4 * 1024, };
+ 128 };
struct mlx5_esw_chains_priv {
struct rhashtable chains_ht;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag.c
index b91eabc..93052b0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.c
@@ -464,9 +464,6 @@ static int mlx5_lag_netdev_event(struct notifier_block *this,
struct mlx5_lag *ldev;
int changed = 0;
- if (!net_eq(dev_net(ndev), &init_net))
- return NOTIFY_DONE;
-
if ((event != NETDEV_CHANGEUPPER) && (event != NETDEV_CHANGELOWERSTATE))
return NOTIFY_DONE;
@@ -586,8 +583,7 @@ void mlx5_lag_add(struct mlx5_core_dev *dev, struct net_device *netdev)
if (!ldev->nb.notifier_call) {
ldev->nb.notifier_call = mlx5_lag_netdev_event;
- if (register_netdevice_notifier_dev_net(netdev, &ldev->nb,
- &ldev->nn)) {
+ if (register_netdevice_notifier_net(&init_net, &ldev->nb)) {
ldev->nb.notifier_call = NULL;
mlx5_core_err(dev, "Failed to register LAG netdev notifier\n");
}
@@ -600,7 +596,7 @@ void mlx5_lag_add(struct mlx5_core_dev *dev, struct net_device *netdev)
}
/* Must be called with intf_mutex held */
-void mlx5_lag_remove(struct mlx5_core_dev *dev, struct net_device *netdev)
+void mlx5_lag_remove(struct mlx5_core_dev *dev)
{
struct mlx5_lag *ldev;
int i;
@@ -619,9 +615,10 @@ void mlx5_lag_remove(struct mlx5_core_dev *dev, struct net_device *netdev)
break;
if (i == MLX5_MAX_PORTS) {
- if (ldev->nb.notifier_call)
- unregister_netdevice_notifier_dev_net(netdev, &ldev->nb,
- &ldev->nn);
+ if (ldev->nb.notifier_call) {
+ unregister_netdevice_notifier_net(&init_net, &ldev->nb);
+ ldev->nb.notifier_call = NULL;
+ }
mlx5_lag_mp_cleanup(ldev);
cancel_delayed_work_sync(&ldev->bond_work);
mlx5_lag_dev_free(ldev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.h b/drivers/net/ethernet/mellanox/mlx5/core/lag.h
index 316ab09..f1068aa 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.h
@@ -44,7 +44,6 @@ struct mlx5_lag {
struct workqueue_struct *wq;
struct delayed_work bond_work;
struct notifier_block nb;
- struct netdev_net_notifier nn;
struct lag_mp lag_mp;
};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
index fcce9e0f..da67b28 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
@@ -157,7 +157,7 @@ int mlx5_query_qcam_reg(struct mlx5_core_dev *mdev, u32 *qcam,
u8 feature_group, u8 access_reg_group);
void mlx5_lag_add(struct mlx5_core_dev *dev, struct net_device *netdev);
-void mlx5_lag_remove(struct mlx5_core_dev *dev, struct net_device *netdev);
+void mlx5_lag_remove(struct mlx5_core_dev *dev);
int mlx5_irq_table_init(struct mlx5_core_dev *dev);
void mlx5_irq_table_cleanup(struct mlx5_core_dev *dev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c
index 6dec2a55..2d93228 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c
@@ -933,7 +933,6 @@ static int dr_actions_l2_rewrite(struct mlx5dr_domain *dmn,
action->rewrite.data = (void *)ops;
action->rewrite.num_of_actions = i;
- action->rewrite.chunk->byte_size = i * sizeof(*ops);
ret = mlx5dr_send_postsend_action(dmn, action);
if (ret) {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c
index c7f10d4..095ec7b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c
@@ -558,7 +558,8 @@ int mlx5dr_send_postsend_action(struct mlx5dr_domain *dmn,
int ret;
send_info.write.addr = (uintptr_t)action->rewrite.data;
- send_info.write.length = action->rewrite.chunk->byte_size;
+ send_info.write.length = action->rewrite.num_of_actions *
+ DR_MODIFY_ACTION_SIZE;
send_info.write.lkey = 0;
send_info.remote_addr = action->rewrite.chunk->mr_addr;
send_info.rkey = action->rewrite.chunk->rkey;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c
index c6c7d1d..aade62a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c
@@ -2307,7 +2307,9 @@ static int dr_ste_build_src_gvmi_qpn_tag(struct mlx5dr_match_param *value,
struct mlx5dr_cmd_vport_cap *vport_cap;
struct mlx5dr_domain *dmn = sb->dmn;
struct mlx5dr_cmd_caps *caps;
+ u8 *bit_mask = sb->bit_mask;
u8 *tag = hw_ste->tag;
+ bool source_gvmi_set;
DR_STE_SET_TAG(src_gvmi_qp, tag, source_qp, misc, source_sqn);
@@ -2328,7 +2330,8 @@ static int dr_ste_build_src_gvmi_qpn_tag(struct mlx5dr_match_param *value,
if (!vport_cap)
return -EINVAL;
- if (vport_cap->vport_gvmi)
+ source_gvmi_set = MLX5_GET(ste_src_gvmi_qp, bit_mask, source_gvmi);
+ if (vport_cap->vport_gvmi && source_gvmi_set)
MLX5_SET(ste_src_gvmi_qp, tag, source_gvmi, vport_cap->vport_gvmi);
misc->source_eswitch_owner_vhca_id = 0;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c
index 3abfc81..c202719 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c
@@ -66,15 +66,20 @@ static int mlx5_cmd_dr_create_flow_table(struct mlx5_flow_root_namespace *ns,
struct mlx5_flow_table *next_ft)
{
struct mlx5dr_table *tbl;
+ u32 flags;
int err;
if (mlx5_dr_is_fw_table(ft->flags))
return mlx5_fs_cmd_get_fw_cmds()->create_flow_table(ns, ft,
log_size,
next_ft);
+ flags = ft->flags;
+ /* turn off encap/decap if not supported for sw-str by fw */
+ if (!MLX5_CAP_FLOWTABLE(ns->dev, sw_owner_reformat_supported))
+ flags = ft->flags & ~(MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT |
+ MLX5_FLOW_TABLE_TUNNEL_EN_DECAP);
- tbl = mlx5dr_table_create(ns->fs_dr_domain.dr_domain,
- ft->level, ft->flags);
+ tbl = mlx5dr_table_create(ns->fs_dr_domain.dr_domain, ft->level, flags);
if (!tbl) {
mlx5_core_err(ns->dev, "Failed creating dr flow_table\n");
return -EINVAL;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
index 1faac31f..23f879d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
@@ -1071,6 +1071,9 @@ int mlx5_core_modify_hca_vport_context(struct mlx5_core_dev *dev,
MLX5_SET64(hca_vport_context, ctx, port_guid, req->port_guid);
if (req->field_select & MLX5_HCA_VPORT_SEL_NODE_GUID)
MLX5_SET64(hca_vport_context, ctx, node_guid, req->node_guid);
+ MLX5_SET(hca_vport_context, ctx, cap_mask1, req->cap_mask1);
+ MLX5_SET(hca_vport_context, ctx, cap_mask1_field_select,
+ req->cap_mask1_perm);
err = mlx5_cmd_exec(dev, in, in_sz, out, sizeof(out));
ex:
kfree(in);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.c b/drivers/net/ethernet/mellanox/mlx5/core/wq.c
index 02f7e4a..01f075f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/wq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.c
@@ -94,6 +94,13 @@ void mlx5_wq_cyc_wqe_dump(struct mlx5_wq_cyc *wq, u16 ix, u8 nstrides)
print_hex_dump(KERN_WARNING, "", DUMP_PREFIX_OFFSET, 16, 1, wqe, len, false);
}
+void mlx5_wq_cyc_reset(struct mlx5_wq_cyc *wq)
+{
+ wq->wqe_ctr = 0;
+ wq->cur_sz = 0;
+ mlx5_wq_cyc_update_db_record(wq);
+}
+
int mlx5_wq_qp_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
void *qpc, struct mlx5_wq_qp *wq,
struct mlx5_wq_ctrl *wq_ctrl)
@@ -192,6 +199,19 @@ int mlx5_cqwq_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
return err;
}
+static void mlx5_wq_ll_init_list(struct mlx5_wq_ll *wq)
+{
+ struct mlx5_wqe_srq_next_seg *next_seg;
+ int i;
+
+ for (i = 0; i < wq->fbc.sz_m1; i++) {
+ next_seg = mlx5_wq_ll_get_wqe(wq, i);
+ next_seg->next_wqe_index = cpu_to_be16(i + 1);
+ }
+ next_seg = mlx5_wq_ll_get_wqe(wq, i);
+ wq->tail_next = &next_seg->next_wqe_index;
+}
+
int mlx5_wq_ll_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
void *wqc, struct mlx5_wq_ll *wq,
struct mlx5_wq_ctrl *wq_ctrl)
@@ -199,9 +219,7 @@ int mlx5_wq_ll_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
u8 log_wq_stride = MLX5_GET(wq, wqc, log_wq_stride);
u8 log_wq_sz = MLX5_GET(wq, wqc, log_wq_sz);
struct mlx5_frag_buf_ctrl *fbc = &wq->fbc;
- struct mlx5_wqe_srq_next_seg *next_seg;
int err;
- int i;
err = mlx5_db_alloc_node(mdev, &wq_ctrl->db, param->db_numa_node);
if (err) {
@@ -220,13 +238,7 @@ int mlx5_wq_ll_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
mlx5_init_fbc(wq_ctrl->buf.frags, log_wq_stride, log_wq_sz, fbc);
- for (i = 0; i < fbc->sz_m1; i++) {
- next_seg = mlx5_wq_ll_get_wqe(wq, i);
- next_seg->next_wqe_index = cpu_to_be16(i + 1);
- }
- next_seg = mlx5_wq_ll_get_wqe(wq, i);
- wq->tail_next = &next_seg->next_wqe_index;
-
+ mlx5_wq_ll_init_list(wq);
wq_ctrl->mdev = mdev;
return 0;
@@ -237,6 +249,15 @@ int mlx5_wq_ll_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
return err;
}
+void mlx5_wq_ll_reset(struct mlx5_wq_ll *wq)
+{
+ wq->head = 0;
+ wq->wqe_ctr = 0;
+ wq->cur_sz = 0;
+ mlx5_wq_ll_init_list(wq);
+ mlx5_wq_ll_update_db_record(wq);
+}
+
void mlx5_wq_destroy(struct mlx5_wq_ctrl *wq_ctrl)
{
mlx5_frag_buf_free(wq_ctrl->mdev, &wq_ctrl->buf);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.h b/drivers/net/ethernet/mellanox/mlx5/core/wq.h
index d9a94bc..4cadc33 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/wq.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.h
@@ -80,6 +80,7 @@ int mlx5_wq_cyc_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
void *wqc, struct mlx5_wq_cyc *wq,
struct mlx5_wq_ctrl *wq_ctrl);
void mlx5_wq_cyc_wqe_dump(struct mlx5_wq_cyc *wq, u16 ix, u8 nstrides);
+void mlx5_wq_cyc_reset(struct mlx5_wq_cyc *wq);
int mlx5_wq_qp_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
void *qpc, struct mlx5_wq_qp *wq,
@@ -92,6 +93,7 @@ int mlx5_cqwq_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
int mlx5_wq_ll_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
void *wqc, struct mlx5_wq_ll *wq,
struct mlx5_wq_ctrl *wq_ctrl);
+void mlx5_wq_ll_reset(struct mlx5_wq_ll *wq);
void mlx5_wq_destroy(struct mlx5_wq_ctrl *wq_ctrl);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c
index 914c33e..e9ded1a 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/pci.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c
@@ -1322,36 +1322,64 @@ static void mlxsw_pci_mbox_free(struct mlxsw_pci *mlxsw_pci,
mbox->mapaddr);
}
-static int mlxsw_pci_sw_reset(struct mlxsw_pci *mlxsw_pci,
- const struct pci_device_id *id)
+static int mlxsw_pci_sys_ready_wait(struct mlxsw_pci *mlxsw_pci,
+ const struct pci_device_id *id,
+ u32 *p_sys_status)
{
unsigned long end;
- char mrsr_pl[MLXSW_REG_MRSR_LEN];
- int err;
+ u32 val;
- mlxsw_reg_mrsr_pack(mrsr_pl);
- err = mlxsw_reg_write(mlxsw_pci->core, MLXSW_REG(mrsr), mrsr_pl);
- if (err)
- return err;
if (id->device == PCI_DEVICE_ID_MELLANOX_SWITCHX2) {
msleep(MLXSW_PCI_SW_RESET_TIMEOUT_MSECS);
return 0;
}
- /* We must wait for the HW to become responsive once again. */
+ /* We must wait for the HW to become responsive. */
msleep(MLXSW_PCI_SW_RESET_WAIT_MSECS);
end = jiffies + msecs_to_jiffies(MLXSW_PCI_SW_RESET_TIMEOUT_MSECS);
do {
- u32 val = mlxsw_pci_read32(mlxsw_pci, FW_READY);
-
+ val = mlxsw_pci_read32(mlxsw_pci, FW_READY);
if ((val & MLXSW_PCI_FW_READY_MASK) == MLXSW_PCI_FW_READY_MAGIC)
return 0;
cond_resched();
} while (time_before(jiffies, end));
+
+ *p_sys_status = val & MLXSW_PCI_FW_READY_MASK;
+
return -EBUSY;
}
+static int mlxsw_pci_sw_reset(struct mlxsw_pci *mlxsw_pci,
+ const struct pci_device_id *id)
+{
+ struct pci_dev *pdev = mlxsw_pci->pdev;
+ char mrsr_pl[MLXSW_REG_MRSR_LEN];
+ u32 sys_status;
+ int err;
+
+ err = mlxsw_pci_sys_ready_wait(mlxsw_pci, id, &sys_status);
+ if (err) {
+ dev_err(&pdev->dev, "Failed to reach system ready status before reset. Status is 0x%x\n",
+ sys_status);
+ return err;
+ }
+
+ mlxsw_reg_mrsr_pack(mrsr_pl);
+ err = mlxsw_reg_write(mlxsw_pci->core, MLXSW_REG(mrsr), mrsr_pl);
+ if (err)
+ return err;
+
+ err = mlxsw_pci_sys_ready_wait(mlxsw_pci, id, &sys_status);
+ if (err) {
+ dev_err(&pdev->dev, "Failed to reach system ready status after reset. Status is 0x%x\n",
+ sys_status);
+ return err;
+ }
+
+ return 0;
+}
+
static int mlxsw_pci_alloc_irq_vectors(struct mlxsw_pci *mlxsw_pci)
{
int err;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h b/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h
index e0d7d2d..43fa8c8 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h
@@ -28,7 +28,7 @@
#define MLXSW_PCI_SW_RESET 0xF0010
#define MLXSW_PCI_SW_RESET_RST_BIT BIT(0)
#define MLXSW_PCI_SW_RESET_TIMEOUT_MSECS 900000
-#define MLXSW_PCI_SW_RESET_WAIT_MSECS 100
+#define MLXSW_PCI_SW_RESET_WAIT_MSECS 200
#define MLXSW_PCI_FW_READY 0xA1844
#define MLXSW_PCI_FW_READY_MASK 0xFFFF
#define MLXSW_PCI_FW_READY_MAGIC 0x5E
diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h
index dd66851..e05d1d1b 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
@@ -3572,7 +3572,7 @@ MLXSW_ITEM32(reg, qeec, mase, 0x10, 31, 1);
* When in bytes mode, value is specified in units of 1000bps.
* Access: RW
*/
-MLXSW_ITEM32(reg, qeec, max_shaper_rate, 0x10, 0, 28);
+MLXSW_ITEM32(reg, qeec, max_shaper_rate, 0x10, 0, 31);
/* reg_qeec_de
* DWRR configuration enable. Enables configuration of the dwrr and
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c
index 5427562..336e5ec 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c
@@ -637,12 +637,12 @@ static int mlxsw_sp_mr_vif_resolve(struct mlxsw_sp_mr_table *mr_table,
return 0;
err_erif_unresolve:
- list_for_each_entry_from_reverse(erve, &mr_vif->route_evif_list,
- vif_node)
+ list_for_each_entry_continue_reverse(erve, &mr_vif->route_evif_list,
+ vif_node)
mlxsw_sp_mr_route_evif_unresolve(mr_table, erve);
err_irif_unresolve:
- list_for_each_entry_from_reverse(irve, &mr_vif->route_ivif_list,
- vif_node)
+ list_for_each_entry_continue_reverse(irve, &mr_vif->route_ivif_list,
+ vif_node)
mlxsw_sp_mr_route_ivif_unresolve(mr_table, irve);
mr_vif->rif = NULL;
return err;
diff --git a/drivers/net/ethernet/micrel/ks8851_mll.c b/drivers/net/ethernet/micrel/ks8851_mll.c
index a41a90c..45cc840 100644
--- a/drivers/net/ethernet/micrel/ks8851_mll.c
+++ b/drivers/net/ethernet/micrel/ks8851_mll.c
@@ -157,21 +157,47 @@ static int msg_enable;
*/
/**
- * ks_rdreg8 - read 8 bit register from device
+ * ks_check_endian - Check whether endianness of the bus is correct
* @ks : The chip information
- * @offset: The register address
*
- * Read a 8bit register from the chip, returning the result
+ * The KS8851-16MLL EESK pin allows selecting the endianness of the 16bit
+ * bus. To maintain optimum performance, the bus endianness should be set
+ * such that it matches the endianness of the CPU.
*/
-static u8 ks_rdreg8(struct ks_net *ks, int offset)
+
+static int ks_check_endian(struct ks_net *ks)
{
- u16 data;
- u8 shift_bit = offset & 0x03;
- u8 shift_data = (offset & 1) << 3;
- ks->cmd_reg_cache = (u16) offset | (u16)(BE0 << shift_bit);
- iowrite16(ks->cmd_reg_cache, ks->hw_addr_cmd);
- data = ioread16(ks->hw_addr);
- return (u8)(data >> shift_data);
+ u16 cider;
+
+ /*
+ * Read CIDER register first, however read it the "wrong" way around.
+ * If the endian strap on the KS8851-16MLL in incorrect and the chip
+ * is operating in different endianness than the CPU, then the meaning
+ * of BE[3:0] byte-enable bits is also swapped such that:
+ * BE[3,2,1,0] becomes BE[1,0,3,2]
+ *
+ * Luckily for us, the byte-enable bits are the top four MSbits of
+ * the address register and the CIDER register is at offset 0xc0.
+ * Hence, by reading address 0xc0c0, which is not impacted by endian
+ * swapping, we assert either BE[3:2] or BE[1:0] while reading the
+ * CIDER register.
+ *
+ * If the bus configuration is correct, reading 0xc0c0 asserts
+ * BE[3:2] and this read returns 0x0000, because to read register
+ * with bottom two LSbits of address set to 0, BE[1:0] must be
+ * asserted.
+ *
+ * If the bus configuration is NOT correct, reading 0xc0c0 asserts
+ * BE[1:0] and this read returns non-zero 0x8872 value.
+ */
+ iowrite16(BE3 | BE2 | KS_CIDER, ks->hw_addr_cmd);
+ cider = ioread16(ks->hw_addr);
+ if (!cider)
+ return 0;
+
+ netdev_err(ks->netdev, "incorrect EESK endian strap setting\n");
+
+ return -EINVAL;
}
/**
@@ -190,22 +216,6 @@ static u16 ks_rdreg16(struct ks_net *ks, int offset)
}
/**
- * ks_wrreg8 - write 8bit register value to chip
- * @ks: The chip information
- * @offset: The register address
- * @value: The value to write
- *
- */
-static void ks_wrreg8(struct ks_net *ks, int offset, u8 value)
-{
- u8 shift_bit = (offset & 0x03);
- u16 value_write = (u16)(value << ((offset & 1) << 3));
- ks->cmd_reg_cache = (u16)offset | (BE0 << shift_bit);
- iowrite16(ks->cmd_reg_cache, ks->hw_addr_cmd);
- iowrite16(value_write, ks->hw_addr);
-}
-
-/**
* ks_wrreg16 - write 16bit register value to chip
* @ks: The chip information
* @offset: The register address
@@ -324,8 +334,7 @@ static void ks_read_config(struct ks_net *ks)
u16 reg_data = 0;
/* Regardless of bus width, 8 bit read should always work.*/
- reg_data = ks_rdreg8(ks, KS_CCR) & 0x00FF;
- reg_data |= ks_rdreg8(ks, KS_CCR+1) << 8;
+ reg_data = ks_rdreg16(ks, KS_CCR);
/* addr/data bus are multiplexed */
ks->sharedbus = (reg_data & CCR_SHARED) == CCR_SHARED;
@@ -429,7 +438,7 @@ static inline void ks_read_qmu(struct ks_net *ks, u16 *buf, u32 len)
/* 1. set sudo DMA mode */
ks_wrreg16(ks, KS_RXFDPR, RXFDPR_RXFPAI);
- ks_wrreg8(ks, KS_RXQCR, (ks->rc_rxqcr | RXQCR_SDA) & 0xff);
+ ks_wrreg16(ks, KS_RXQCR, ks->rc_rxqcr | RXQCR_SDA);
/* 2. read prepend data */
/**
@@ -446,7 +455,7 @@ static inline void ks_read_qmu(struct ks_net *ks, u16 *buf, u32 len)
ks_inblk(ks, buf, ALIGN(len, 4));
/* 4. reset sudo DMA Mode */
- ks_wrreg8(ks, KS_RXQCR, ks->rc_rxqcr);
+ ks_wrreg16(ks, KS_RXQCR, ks->rc_rxqcr);
}
/**
@@ -548,14 +557,17 @@ static irqreturn_t ks_irq(int irq, void *pw)
{
struct net_device *netdev = pw;
struct ks_net *ks = netdev_priv(netdev);
+ unsigned long flags;
u16 status;
+ spin_lock_irqsave(&ks->statelock, flags);
/*this should be the first in IRQ handler */
ks_save_cmd_reg(ks);
status = ks_rdreg16(ks, KS_ISR);
if (unlikely(!status)) {
ks_restore_cmd_reg(ks);
+ spin_unlock_irqrestore(&ks->statelock, flags);
return IRQ_NONE;
}
@@ -581,6 +593,7 @@ static irqreturn_t ks_irq(int irq, void *pw)
ks->netdev->stats.rx_over_errors++;
/* this should be the last in IRQ handler*/
ks_restore_cmd_reg(ks);
+ spin_unlock_irqrestore(&ks->statelock, flags);
return IRQ_HANDLED;
}
@@ -650,6 +663,7 @@ static int ks_net_stop(struct net_device *netdev)
/* shutdown RX/TX QMU */
ks_disable_qmu(ks);
+ ks_disable_int(ks);
/* set powermode to soft power down to save power */
ks_set_powermode(ks, PMECR_PM_SOFTDOWN);
@@ -679,13 +693,13 @@ static void ks_write_qmu(struct ks_net *ks, u8 *pdata, u16 len)
ks->txh.txw[1] = cpu_to_le16(len);
/* 1. set sudo-DMA mode */
- ks_wrreg8(ks, KS_RXQCR, (ks->rc_rxqcr | RXQCR_SDA) & 0xff);
+ ks_wrreg16(ks, KS_RXQCR, ks->rc_rxqcr | RXQCR_SDA);
/* 2. write status/lenth info */
ks_outblk(ks, ks->txh.txw, 4);
/* 3. write pkt data */
ks_outblk(ks, (u16 *)pdata, ALIGN(len, 4));
/* 4. reset sudo-DMA mode */
- ks_wrreg8(ks, KS_RXQCR, ks->rc_rxqcr);
+ ks_wrreg16(ks, KS_RXQCR, ks->rc_rxqcr);
/* 5. Enqueue Tx(move the pkt from TX buffer into TXQ) */
ks_wrreg16(ks, KS_TXQCR, TXQCR_METFE);
/* 6. wait until TXQCR_METFE is auto-cleared */
@@ -706,10 +720,9 @@ static netdev_tx_t ks_start_xmit(struct sk_buff *skb, struct net_device *netdev)
{
netdev_tx_t retv = NETDEV_TX_OK;
struct ks_net *ks = netdev_priv(netdev);
+ unsigned long flags;
- disable_irq(netdev->irq);
- ks_disable_int(ks);
- spin_lock(&ks->statelock);
+ spin_lock_irqsave(&ks->statelock, flags);
/* Extra space are required:
* 4 byte for alignment, 4 for status/length, 4 for CRC
@@ -723,9 +736,7 @@ static netdev_tx_t ks_start_xmit(struct sk_buff *skb, struct net_device *netdev)
dev_kfree_skb(skb);
} else
retv = NETDEV_TX_BUSY;
- spin_unlock(&ks->statelock);
- ks_enable_int(ks);
- enable_irq(netdev->irq);
+ spin_unlock_irqrestore(&ks->statelock, flags);
return retv;
}
@@ -1251,6 +1262,10 @@ static int ks8851_probe(struct platform_device *pdev)
goto err_free;
}
+ err = ks_check_endian(ks);
+ if (err)
+ goto err_free;
+
netdev->irq = platform_get_irq(pdev, 0);
if ((int)netdev->irq < 0) {
diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c
index 86d543a..d3b7373 100644
--- a/drivers/net/ethernet/mscc/ocelot.c
+++ b/drivers/net/ethernet/mscc/ocelot.c
@@ -2176,24 +2176,29 @@ static int ocelot_init_timestamp(struct ocelot *ocelot)
return 0;
}
-static void ocelot_port_set_mtu(struct ocelot *ocelot, int port, size_t mtu)
+/* Configure the maximum SDU (L2 payload) on RX to the value specified in @sdu.
+ * The length of VLAN tags is accounted for automatically via DEV_MAC_TAGS_CFG.
+ */
+static void ocelot_port_set_maxlen(struct ocelot *ocelot, int port, size_t sdu)
{
struct ocelot_port *ocelot_port = ocelot->ports[port];
+ int maxlen = sdu + ETH_HLEN + ETH_FCS_LEN;
int atop_wm;
- ocelot_port_writel(ocelot_port, mtu, DEV_MAC_MAXLEN_CFG);
+ ocelot_port_writel(ocelot_port, maxlen, DEV_MAC_MAXLEN_CFG);
/* Set Pause WM hysteresis
- * 152 = 6 * mtu / OCELOT_BUFFER_CELL_SZ
- * 101 = 4 * mtu / OCELOT_BUFFER_CELL_SZ
+ * 152 = 6 * maxlen / OCELOT_BUFFER_CELL_SZ
+ * 101 = 4 * maxlen / OCELOT_BUFFER_CELL_SZ
*/
ocelot_write_rix(ocelot, SYS_PAUSE_CFG_PAUSE_ENA |
SYS_PAUSE_CFG_PAUSE_STOP(101) |
SYS_PAUSE_CFG_PAUSE_START(152), SYS_PAUSE_CFG, port);
/* Tail dropping watermark */
- atop_wm = (ocelot->shared_queue_sz - 9 * mtu) / OCELOT_BUFFER_CELL_SZ;
- ocelot_write_rix(ocelot, ocelot_wm_enc(9 * mtu),
+ atop_wm = (ocelot->shared_queue_sz - 9 * maxlen) /
+ OCELOT_BUFFER_CELL_SZ;
+ ocelot_write_rix(ocelot, ocelot_wm_enc(9 * maxlen),
SYS_ATOP, port);
ocelot_write(ocelot, ocelot_wm_enc(atop_wm), SYS_ATOP_TOT_CFG);
}
@@ -2222,9 +2227,10 @@ void ocelot_init_port(struct ocelot *ocelot, int port)
DEV_MAC_HDX_CFG);
/* Set Max Length and maximum tags allowed */
- ocelot_port_set_mtu(ocelot, port, VLAN_ETH_FRAME_LEN);
+ ocelot_port_set_maxlen(ocelot, port, ETH_DATA_LEN);
ocelot_port_writel(ocelot_port, DEV_MAC_TAGS_CFG_TAG_ID(ETH_P_8021AD) |
DEV_MAC_TAGS_CFG_VLAN_AWR_ENA |
+ DEV_MAC_TAGS_CFG_VLAN_DBL_AWR_ENA |
DEV_MAC_TAGS_CFG_VLAN_LEN_AWR_ENA,
DEV_MAC_TAGS_CFG);
@@ -2310,18 +2316,18 @@ void ocelot_set_cpu_port(struct ocelot *ocelot, int cpu,
* Only one port can be an NPI at the same time.
*/
if (cpu < ocelot->num_phys_ports) {
- int mtu = VLAN_ETH_FRAME_LEN + OCELOT_TAG_LEN;
+ int sdu = ETH_DATA_LEN + OCELOT_TAG_LEN;
ocelot_write(ocelot, QSYS_EXT_CPU_CFG_EXT_CPUQ_MSK_M |
QSYS_EXT_CPU_CFG_EXT_CPU_PORT(cpu),
QSYS_EXT_CPU_CFG);
if (injection == OCELOT_TAG_PREFIX_SHORT)
- mtu += OCELOT_SHORT_PREFIX_LEN;
+ sdu += OCELOT_SHORT_PREFIX_LEN;
else if (injection == OCELOT_TAG_PREFIX_LONG)
- mtu += OCELOT_LONG_PREFIX_LEN;
+ sdu += OCELOT_LONG_PREFIX_LEN;
- ocelot_port_set_mtu(ocelot, cpu, mtu);
+ ocelot_port_set_maxlen(ocelot, cpu, sdu);
}
/* CPU port Injection/Extraction configuration */
diff --git a/drivers/net/ethernet/mscc/ocelot_board.c b/drivers/net/ethernet/mscc/ocelot_board.c
index b388208..1135a18 100644
--- a/drivers/net/ethernet/mscc/ocelot_board.c
+++ b/drivers/net/ethernet/mscc/ocelot_board.c
@@ -114,6 +114,14 @@ static irqreturn_t ocelot_xtr_irq_handler(int irq, void *arg)
if (err != 4)
break;
+ /* At this point the IFH was read correctly, so it is safe to
+ * presume that there is no error. The err needs to be reset
+ * otherwise a frame could come in CPU queue between the while
+ * condition and the check for error later on. And in that case
+ * the new frame is just removed and not processed.
+ */
+ err = 0;
+
ocelot_parse_ifh(ifh, &info);
ocelot_port = ocelot->ports[info.port];
diff --git a/drivers/net/ethernet/neterion/vxge/vxge-config.h b/drivers/net/ethernet/neterion/vxge/vxge-config.h
index e678ba3..628fa9b 100644
--- a/drivers/net/ethernet/neterion/vxge/vxge-config.h
+++ b/drivers/net/ethernet/neterion/vxge/vxge-config.h
@@ -2045,7 +2045,7 @@ vxge_hw_vpath_strip_fcs_check(struct __vxge_hw_device *hldev, u64 vpath_mask);
if ((level >= VXGE_ERR && VXGE_COMPONENT_LL & VXGE_DEBUG_ERR_MASK) || \
(level >= VXGE_TRACE && VXGE_COMPONENT_LL & VXGE_DEBUG_TRACE_MASK))\
if ((mask & VXGE_DEBUG_MASK) == mask) \
- printk(fmt "\n", __VA_ARGS__); \
+ printk(fmt "\n", ##__VA_ARGS__); \
} while (0)
#else
#define vxge_debug_ll(level, mask, fmt, ...)
diff --git a/drivers/net/ethernet/neterion/vxge/vxge-main.h b/drivers/net/ethernet/neterion/vxge/vxge-main.h
index 59a57ff..9c86f4f 100644
--- a/drivers/net/ethernet/neterion/vxge/vxge-main.h
+++ b/drivers/net/ethernet/neterion/vxge/vxge-main.h
@@ -452,49 +452,49 @@ int vxge_fw_upgrade(struct vxgedev *vdev, char *fw_name, int override);
#if (VXGE_DEBUG_LL_CONFIG & VXGE_DEBUG_MASK)
#define vxge_debug_ll_config(level, fmt, ...) \
- vxge_debug_ll(level, VXGE_DEBUG_LL_CONFIG, fmt, __VA_ARGS__)
+ vxge_debug_ll(level, VXGE_DEBUG_LL_CONFIG, fmt, ##__VA_ARGS__)
#else
#define vxge_debug_ll_config(level, fmt, ...)
#endif
#if (VXGE_DEBUG_INIT & VXGE_DEBUG_MASK)
#define vxge_debug_init(level, fmt, ...) \
- vxge_debug_ll(level, VXGE_DEBUG_INIT, fmt, __VA_ARGS__)
+ vxge_debug_ll(level, VXGE_DEBUG_INIT, fmt, ##__VA_ARGS__)
#else
#define vxge_debug_init(level, fmt, ...)
#endif
#if (VXGE_DEBUG_TX & VXGE_DEBUG_MASK)
#define vxge_debug_tx(level, fmt, ...) \
- vxge_debug_ll(level, VXGE_DEBUG_TX, fmt, __VA_ARGS__)
+ vxge_debug_ll(level, VXGE_DEBUG_TX, fmt, ##__VA_ARGS__)
#else
#define vxge_debug_tx(level, fmt, ...)
#endif
#if (VXGE_DEBUG_RX & VXGE_DEBUG_MASK)
#define vxge_debug_rx(level, fmt, ...) \
- vxge_debug_ll(level, VXGE_DEBUG_RX, fmt, __VA_ARGS__)
+ vxge_debug_ll(level, VXGE_DEBUG_RX, fmt, ##__VA_ARGS__)
#else
#define vxge_debug_rx(level, fmt, ...)
#endif
#if (VXGE_DEBUG_MEM & VXGE_DEBUG_MASK)
#define vxge_debug_mem(level, fmt, ...) \
- vxge_debug_ll(level, VXGE_DEBUG_MEM, fmt, __VA_ARGS__)
+ vxge_debug_ll(level, VXGE_DEBUG_MEM, fmt, ##__VA_ARGS__)
#else
#define vxge_debug_mem(level, fmt, ...)
#endif
#if (VXGE_DEBUG_ENTRYEXIT & VXGE_DEBUG_MASK)
#define vxge_debug_entryexit(level, fmt, ...) \
- vxge_debug_ll(level, VXGE_DEBUG_ENTRYEXIT, fmt, __VA_ARGS__)
+ vxge_debug_ll(level, VXGE_DEBUG_ENTRYEXIT, fmt, ##__VA_ARGS__)
#else
#define vxge_debug_entryexit(level, fmt, ...)
#endif
#if (VXGE_DEBUG_INTR & VXGE_DEBUG_MASK)
#define vxge_debug_intr(level, fmt, ...) \
- vxge_debug_ll(level, VXGE_DEBUG_INTR, fmt, __VA_ARGS__)
+ vxge_debug_ll(level, VXGE_DEBUG_INTR, fmt, ##__VA_ARGS__)
#else
#define vxge_debug_intr(level, fmt, ...)
#endif
diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000_pcie.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000_pcie.c
index b454db2..684e4e0 100644
--- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000_pcie.c
+++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000_pcie.c
@@ -616,7 +616,7 @@ static int enable_bars(struct nfp6000_pcie *nfp, u16 interface)
if (bar->iomem) {
int pf;
- msg += snprintf(msg, end - msg, "0.0: General/MSI-X SRAM, ");
+ msg += scnprintf(msg, end - msg, "0.0: General/MSI-X SRAM, ");
atomic_inc(&bar->refcnt);
bars_free--;
@@ -661,7 +661,7 @@ static int enable_bars(struct nfp6000_pcie *nfp, u16 interface)
/* Configure, and lock, BAR0.1 for PCIe XPB (MSI-X PBA) */
bar = &nfp->bar[1];
- msg += snprintf(msg, end - msg, "0.1: PCIe XPB/MSI-X PBA, ");
+ msg += scnprintf(msg, end - msg, "0.1: PCIe XPB/MSI-X PBA, ");
atomic_inc(&bar->refcnt);
bars_free--;
@@ -680,8 +680,8 @@ static int enable_bars(struct nfp6000_pcie *nfp, u16 interface)
bar->iomem = ioremap(nfp_bar_resource_start(bar),
nfp_bar_resource_len(bar));
if (bar->iomem) {
- msg += snprintf(msg, end - msg,
- "0.%d: Explicit%d, ", 4 + i, i);
+ msg += scnprintf(msg, end - msg,
+ "0.%d: Explicit%d, ", 4 + i, i);
atomic_inc(&bar->refcnt);
bars_free--;
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_dev.c b/drivers/net/ethernet/pensando/ionic/ionic_dev.c
index 87f82f3..46107de 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_dev.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_dev.c
@@ -103,7 +103,7 @@ int ionic_heartbeat_check(struct ionic *ionic)
{
struct ionic_dev *idev = &ionic->idev;
unsigned long hb_time;
- u32 fw_status;
+ u8 fw_status;
u32 hb;
/* wait a little more than one second before testing again */
@@ -111,9 +111,12 @@ int ionic_heartbeat_check(struct ionic *ionic)
if (time_before(hb_time, (idev->last_hb_time + ionic->watchdog_period)))
return 0;
- /* firmware is useful only if fw_status is non-zero */
- fw_status = ioread32(&idev->dev_info_regs->fw_status);
- if (!fw_status)
+ /* firmware is useful only if the running bit is set and
+ * fw_status != 0xff (bad PCI read)
+ */
+ fw_status = ioread8(&idev->dev_info_regs->fw_status);
+ if (fw_status == 0xff ||
+ !(fw_status & IONIC_FW_STS_F_RUNNING))
return -ENXIO;
/* early FW has no heartbeat, else FW will return non-zero */
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_if.h b/drivers/net/ethernet/pensando/ionic/ionic_if.h
index ce07c29..51adf50 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_if.h
+++ b/drivers/net/ethernet/pensando/ionic/ionic_if.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB OR BSD-2-Clause */
+/* SPDX-License-Identifier: (GPL-2.0 OR Linux-OpenIB) OR BSD-2-Clause */
/* Copyright (c) 2017-2019 Pensando Systems, Inc. All rights reserved. */
#ifndef _IONIC_IF_H_
@@ -2445,6 +2445,7 @@ union ionic_dev_info_regs {
u8 version;
u8 asic_type;
u8 asic_rev;
+#define IONIC_FW_STS_F_RUNNING 0x1
u8 fw_status;
u32 fw_heartbeat;
char fw_version[IONIC_DEVINFO_FWVERS_BUFLEN];
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
index 191271f..938e19e 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
@@ -948,18 +948,18 @@ static void ionic_lif_rx_mode(struct ionic_lif *lif, unsigned int rx_mode)
int i;
#define REMAIN(__x) (sizeof(buf) - (__x))
- i = snprintf(buf, sizeof(buf), "rx_mode 0x%04x -> 0x%04x:",
- lif->rx_mode, rx_mode);
+ i = scnprintf(buf, sizeof(buf), "rx_mode 0x%04x -> 0x%04x:",
+ lif->rx_mode, rx_mode);
if (rx_mode & IONIC_RX_MODE_F_UNICAST)
- i += snprintf(&buf[i], REMAIN(i), " RX_MODE_F_UNICAST");
+ i += scnprintf(&buf[i], REMAIN(i), " RX_MODE_F_UNICAST");
if (rx_mode & IONIC_RX_MODE_F_MULTICAST)
- i += snprintf(&buf[i], REMAIN(i), " RX_MODE_F_MULTICAST");
+ i += scnprintf(&buf[i], REMAIN(i), " RX_MODE_F_MULTICAST");
if (rx_mode & IONIC_RX_MODE_F_BROADCAST)
- i += snprintf(&buf[i], REMAIN(i), " RX_MODE_F_BROADCAST");
+ i += scnprintf(&buf[i], REMAIN(i), " RX_MODE_F_BROADCAST");
if (rx_mode & IONIC_RX_MODE_F_PROMISC)
- i += snprintf(&buf[i], REMAIN(i), " RX_MODE_F_PROMISC");
+ i += scnprintf(&buf[i], REMAIN(i), " RX_MODE_F_PROMISC");
if (rx_mode & IONIC_RX_MODE_F_ALLMULTI)
- i += snprintf(&buf[i], REMAIN(i), " RX_MODE_F_ALLMULTI");
+ i += scnprintf(&buf[i], REMAIN(i), " RX_MODE_F_ALLMULTI");
netdev_dbg(lif->netdev, "lif%d %s\n", lif->index, buf);
err = ionic_adminq_post_wait(lif, &ctx);
@@ -1688,7 +1688,7 @@ static int ionic_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
if (!(is_zero_ether_addr(mac) || is_valid_ether_addr(mac)))
return -EINVAL;
- down_read(&ionic->vf_op_lock);
+ down_write(&ionic->vf_op_lock);
if (vf >= pci_num_vf(ionic->pdev) || !ionic->vfs) {
ret = -EINVAL;
@@ -1698,7 +1698,7 @@ static int ionic_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
ether_addr_copy(ionic->vfs[vf].macaddr, mac);
}
- up_read(&ionic->vf_op_lock);
+ up_write(&ionic->vf_op_lock);
return ret;
}
@@ -1719,7 +1719,7 @@ static int ionic_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan,
if (proto != htons(ETH_P_8021Q))
return -EPROTONOSUPPORT;
- down_read(&ionic->vf_op_lock);
+ down_write(&ionic->vf_op_lock);
if (vf >= pci_num_vf(ionic->pdev) || !ionic->vfs) {
ret = -EINVAL;
@@ -1730,7 +1730,7 @@ static int ionic_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan,
ionic->vfs[vf].vlanid = vlan;
}
- up_read(&ionic->vf_op_lock);
+ up_write(&ionic->vf_op_lock);
return ret;
}
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_regs.h b/drivers/net/ethernet/pensando/ionic/ionic_regs.h
index 03ee5a3..2e174f45 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_regs.h
+++ b/drivers/net/ethernet/pensando/ionic/ionic_regs.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB OR BSD-2-Clause */
+/* SPDX-License-Identifier: (GPL-2.0 OR Linux-OpenIB) OR BSD-2-Clause */
/* Copyright (c) 2018-2019 Pensando Systems, Inc. All rights reserved. */
#ifndef IONIC_REGS_H
diff --git a/drivers/net/ethernet/qlogic/qede/qede.h b/drivers/net/ethernet/qlogic/qede/qede.h
index e8a1b27..234c6f3 100644
--- a/drivers/net/ethernet/qlogic/qede/qede.h
+++ b/drivers/net/ethernet/qlogic/qede/qede.h
@@ -163,6 +163,8 @@ struct qede_rdma_dev {
struct list_head entry;
struct list_head rdma_event_list;
struct workqueue_struct *rdma_wq;
+ struct kref refcnt;
+ struct completion event_comp;
bool exp_recovery;
};
diff --git a/drivers/net/ethernet/qlogic/qede/qede_rdma.c b/drivers/net/ethernet/qlogic/qede/qede_rdma.c
index ffabc2d..2d873ae 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_rdma.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_rdma.c
@@ -59,6 +59,9 @@ static void _qede_rdma_dev_add(struct qede_dev *edev)
static int qede_rdma_create_wq(struct qede_dev *edev)
{
INIT_LIST_HEAD(&edev->rdma_info.rdma_event_list);
+ kref_init(&edev->rdma_info.refcnt);
+ init_completion(&edev->rdma_info.event_comp);
+
edev->rdma_info.rdma_wq = create_singlethread_workqueue("rdma_wq");
if (!edev->rdma_info.rdma_wq) {
DP_NOTICE(edev, "qedr: Could not create workqueue\n");
@@ -83,8 +86,23 @@ static void qede_rdma_cleanup_event(struct qede_dev *edev)
}
}
+static void qede_rdma_complete_event(struct kref *ref)
+{
+ struct qede_rdma_dev *rdma_dev =
+ container_of(ref, struct qede_rdma_dev, refcnt);
+
+ /* no more events will be added after this */
+ complete(&rdma_dev->event_comp);
+}
+
static void qede_rdma_destroy_wq(struct qede_dev *edev)
{
+ /* Avoid race with add_event flow, make sure it finishes before
+ * we start accessing the list and cleaning up the work
+ */
+ kref_put(&edev->rdma_info.refcnt, qede_rdma_complete_event);
+ wait_for_completion(&edev->rdma_info.event_comp);
+
qede_rdma_cleanup_event(edev);
destroy_workqueue(edev->rdma_info.rdma_wq);
}
@@ -310,15 +328,24 @@ static void qede_rdma_add_event(struct qede_dev *edev,
if (!edev->rdma_info.qedr_dev)
return;
+ /* We don't want the cleanup flow to start while we're allocating and
+ * scheduling the work
+ */
+ if (!kref_get_unless_zero(&edev->rdma_info.refcnt))
+ return; /* already being destroyed */
+
event_node = qede_rdma_get_free_event_node(edev);
if (!event_node)
- return;
+ goto out;
event_node->event = event;
event_node->ptr = edev;
INIT_WORK(&event_node->work, qede_rdma_handle_event);
queue_work(edev->rdma_info.rdma_wq, &event_node->work);
+
+out:
+ kref_put(&edev->rdma_info.refcnt, qede_rdma_complete_event);
}
void qede_rdma_dev_event_open(struct qede_dev *edev)
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c
index 07f9067..cda5b0a 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c
@@ -1720,7 +1720,7 @@ static int qlcnic_83xx_get_reset_instruction_template(struct qlcnic_adapter *p_d
ahw->reset.seq_error = 0;
ahw->reset.buff = kzalloc(QLC_83XX_RESTART_TEMPLATE_SIZE, GFP_KERNEL);
- if (p_dev->ahw->reset.buff == NULL)
+ if (ahw->reset.buff == NULL)
return -ENOMEM;
p_buff = p_dev->ahw->reset.buff;
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c
index 06de595..fbf4cbc 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c
@@ -13,25 +13,6 @@
#include "rmnet_vnd.h"
#include "rmnet_private.h"
-/* Locking scheme -
- * The shared resource which needs to be protected is realdev->rx_handler_data.
- * For the writer path, this is using rtnl_lock(). The writer paths are
- * rmnet_newlink(), rmnet_dellink() and rmnet_force_unassociate_device(). These
- * paths are already called with rtnl_lock() acquired in. There is also an
- * ASSERT_RTNL() to ensure that we are calling with rtnl acquired. For
- * dereference here, we will need to use rtnl_dereference(). Dev list writing
- * needs to happen with rtnl_lock() acquired for netdev_master_upper_dev_link().
- * For the reader path, the real_dev->rx_handler_data is called in the TX / RX
- * path. We only need rcu_read_lock() for these scenarios. In these cases,
- * the rcu_read_lock() is held in __dev_queue_xmit() and
- * netif_receive_skb_internal(), so readers need to use rcu_dereference_rtnl()
- * to get the relevant information. For dev list reading, we again acquire
- * rcu_read_lock() in rmnet_dellink() for netdev_master_upper_dev_get_rcu().
- * We also use unregister_netdevice_many() to free all rmnet devices in
- * rmnet_force_unassociate_device() so we dont lose the rtnl_lock() and free in
- * same context.
- */
-
/* Local Definitions and Declarations */
static const struct nla_policy rmnet_policy[IFLA_RMNET_MAX + 1] = {
@@ -51,9 +32,10 @@ rmnet_get_port_rtnl(const struct net_device *real_dev)
return rtnl_dereference(real_dev->rx_handler_data);
}
-static int rmnet_unregister_real_device(struct net_device *real_dev,
- struct rmnet_port *port)
+static int rmnet_unregister_real_device(struct net_device *real_dev)
{
+ struct rmnet_port *port = rmnet_get_port_rtnl(real_dev);
+
if (port->nr_rmnet_devs)
return -EINVAL;
@@ -61,9 +43,6 @@ static int rmnet_unregister_real_device(struct net_device *real_dev,
kfree(port);
- /* release reference on real_dev */
- dev_put(real_dev);
-
netdev_dbg(real_dev, "Removed from rmnet\n");
return 0;
}
@@ -89,9 +68,6 @@ static int rmnet_register_real_device(struct net_device *real_dev)
return -EBUSY;
}
- /* hold on to real dev for MAP data */
- dev_hold(real_dev);
-
for (entry = 0; entry < RMNET_MAX_LOGICAL_EP; entry++)
INIT_HLIST_HEAD(&port->muxed_ep[entry]);
@@ -99,28 +75,33 @@ static int rmnet_register_real_device(struct net_device *real_dev)
return 0;
}
-static void rmnet_unregister_bridge(struct net_device *dev,
- struct rmnet_port *port)
+static void rmnet_unregister_bridge(struct rmnet_port *port)
{
- struct rmnet_port *bridge_port;
- struct net_device *bridge_dev;
+ struct net_device *bridge_dev, *real_dev, *rmnet_dev;
+ struct rmnet_port *real_port;
if (port->rmnet_mode != RMNET_EPMODE_BRIDGE)
return;
- /* bridge slave handling */
+ rmnet_dev = port->rmnet_dev;
if (!port->nr_rmnet_devs) {
- bridge_dev = port->bridge_ep;
+ /* bridge device */
+ real_dev = port->bridge_ep;
+ bridge_dev = port->dev;
- bridge_port = rmnet_get_port_rtnl(bridge_dev);
- bridge_port->bridge_ep = NULL;
- bridge_port->rmnet_mode = RMNET_EPMODE_VND;
+ real_port = rmnet_get_port_rtnl(real_dev);
+ real_port->bridge_ep = NULL;
+ real_port->rmnet_mode = RMNET_EPMODE_VND;
} else {
+ /* real device */
bridge_dev = port->bridge_ep;
- bridge_port = rmnet_get_port_rtnl(bridge_dev);
- rmnet_unregister_real_device(bridge_dev, bridge_port);
+ port->bridge_ep = NULL;
+ port->rmnet_mode = RMNET_EPMODE_VND;
}
+
+ netdev_upper_dev_unlink(bridge_dev, rmnet_dev);
+ rmnet_unregister_real_device(bridge_dev);
}
static int rmnet_newlink(struct net *src_net, struct net_device *dev,
@@ -135,6 +116,11 @@ static int rmnet_newlink(struct net *src_net, struct net_device *dev,
int err = 0;
u16 mux_id;
+ if (!tb[IFLA_LINK]) {
+ NL_SET_ERR_MSG_MOD(extack, "link not specified");
+ return -EINVAL;
+ }
+
real_dev = __dev_get_by_index(src_net, nla_get_u32(tb[IFLA_LINK]));
if (!real_dev || !dev)
return -ENODEV;
@@ -157,7 +143,12 @@ static int rmnet_newlink(struct net *src_net, struct net_device *dev,
if (err)
goto err1;
+ err = netdev_upper_dev_link(real_dev, dev, extack);
+ if (err < 0)
+ goto err2;
+
port->rmnet_mode = mode;
+ port->rmnet_dev = dev;
hlist_add_head_rcu(&ep->hlnode, &port->muxed_ep[mux_id]);
@@ -173,8 +164,11 @@ static int rmnet_newlink(struct net *src_net, struct net_device *dev,
return 0;
+err2:
+ unregister_netdevice(dev);
+ rmnet_vnd_dellink(mux_id, port, ep);
err1:
- rmnet_unregister_real_device(real_dev, port);
+ rmnet_unregister_real_device(real_dev);
err0:
kfree(ep);
return err;
@@ -183,77 +177,74 @@ static int rmnet_newlink(struct net *src_net, struct net_device *dev,
static void rmnet_dellink(struct net_device *dev, struct list_head *head)
{
struct rmnet_priv *priv = netdev_priv(dev);
- struct net_device *real_dev;
+ struct net_device *real_dev, *bridge_dev;
+ struct rmnet_port *real_port, *bridge_port;
struct rmnet_endpoint *ep;
- struct rmnet_port *port;
- u8 mux_id;
+ u8 mux_id = priv->mux_id;
real_dev = priv->real_dev;
- if (!real_dev || !rmnet_is_real_dev_registered(real_dev))
+ if (!rmnet_is_real_dev_registered(real_dev))
return;
- port = rmnet_get_port_rtnl(real_dev);
+ real_port = rmnet_get_port_rtnl(real_dev);
+ bridge_dev = real_port->bridge_ep;
+ if (bridge_dev) {
+ bridge_port = rmnet_get_port_rtnl(bridge_dev);
+ rmnet_unregister_bridge(bridge_port);
+ }
- mux_id = rmnet_vnd_get_mux(dev);
-
- ep = rmnet_get_endpoint(port, mux_id);
+ ep = rmnet_get_endpoint(real_port, mux_id);
if (ep) {
hlist_del_init_rcu(&ep->hlnode);
- rmnet_unregister_bridge(dev, port);
- rmnet_vnd_dellink(mux_id, port, ep);
+ rmnet_vnd_dellink(mux_id, real_port, ep);
kfree(ep);
}
- rmnet_unregister_real_device(real_dev, port);
+ netdev_upper_dev_unlink(real_dev, dev);
+ rmnet_unregister_real_device(real_dev);
unregister_netdevice_queue(dev, head);
}
-static void rmnet_force_unassociate_device(struct net_device *dev)
+static void rmnet_force_unassociate_device(struct net_device *real_dev)
{
- struct net_device *real_dev = dev;
struct hlist_node *tmp_ep;
struct rmnet_endpoint *ep;
struct rmnet_port *port;
unsigned long bkt_ep;
LIST_HEAD(list);
- if (!rmnet_is_real_dev_registered(real_dev))
- return;
+ port = rmnet_get_port_rtnl(real_dev);
- ASSERT_RTNL();
-
- port = rmnet_get_port_rtnl(dev);
-
- rcu_read_lock();
- rmnet_unregister_bridge(dev, port);
-
- hash_for_each_safe(port->muxed_ep, bkt_ep, tmp_ep, ep, hlnode) {
- unregister_netdevice_queue(ep->egress_dev, &list);
- rmnet_vnd_dellink(ep->mux_id, port, ep);
-
- hlist_del_init_rcu(&ep->hlnode);
- kfree(ep);
+ if (port->nr_rmnet_devs) {
+ /* real device */
+ rmnet_unregister_bridge(port);
+ hash_for_each_safe(port->muxed_ep, bkt_ep, tmp_ep, ep, hlnode) {
+ unregister_netdevice_queue(ep->egress_dev, &list);
+ netdev_upper_dev_unlink(real_dev, ep->egress_dev);
+ rmnet_vnd_dellink(ep->mux_id, port, ep);
+ hlist_del_init_rcu(&ep->hlnode);
+ kfree(ep);
+ }
+ rmnet_unregister_real_device(real_dev);
+ unregister_netdevice_many(&list);
+ } else {
+ rmnet_unregister_bridge(port);
}
-
- rcu_read_unlock();
- unregister_netdevice_many(&list);
-
- rmnet_unregister_real_device(real_dev, port);
}
static int rmnet_config_notify_cb(struct notifier_block *nb,
unsigned long event, void *data)
{
- struct net_device *dev = netdev_notifier_info_to_dev(data);
+ struct net_device *real_dev = netdev_notifier_info_to_dev(data);
- if (!dev)
+ if (!rmnet_is_real_dev_registered(real_dev))
return NOTIFY_DONE;
switch (event) {
case NETDEV_UNREGISTER:
- netdev_dbg(dev, "Kernel unregister\n");
- rmnet_force_unassociate_device(dev);
+ netdev_dbg(real_dev, "Kernel unregister\n");
+ rmnet_force_unassociate_device(real_dev);
break;
default:
@@ -295,16 +286,18 @@ static int rmnet_changelink(struct net_device *dev, struct nlattr *tb[],
if (!dev)
return -ENODEV;
- real_dev = __dev_get_by_index(dev_net(dev),
- nla_get_u32(tb[IFLA_LINK]));
-
- if (!real_dev || !rmnet_is_real_dev_registered(real_dev))
+ real_dev = priv->real_dev;
+ if (!rmnet_is_real_dev_registered(real_dev))
return -ENODEV;
port = rmnet_get_port_rtnl(real_dev);
if (data[IFLA_RMNET_MUX_ID]) {
mux_id = nla_get_u16(data[IFLA_RMNET_MUX_ID]);
+ if (rmnet_get_endpoint(port, mux_id)) {
+ NL_SET_ERR_MSG_MOD(extack, "MUX ID already exists");
+ return -EINVAL;
+ }
ep = rmnet_get_endpoint(port, priv->mux_id);
if (!ep)
return -ENODEV;
@@ -379,11 +372,10 @@ struct rtnl_link_ops rmnet_link_ops __read_mostly = {
.fill_info = rmnet_fill_info,
};
-/* Needs either rcu_read_lock() or rtnl lock */
-struct rmnet_port *rmnet_get_port(struct net_device *real_dev)
+struct rmnet_port *rmnet_get_port_rcu(struct net_device *real_dev)
{
if (rmnet_is_real_dev_registered(real_dev))
- return rcu_dereference_rtnl(real_dev->rx_handler_data);
+ return rcu_dereference_bh(real_dev->rx_handler_data);
else
return NULL;
}
@@ -409,7 +401,7 @@ int rmnet_add_bridge(struct net_device *rmnet_dev,
struct rmnet_port *port, *slave_port;
int err;
- port = rmnet_get_port(real_dev);
+ port = rmnet_get_port_rtnl(real_dev);
/* If there is more than one rmnet dev attached, its probably being
* used for muxing. Skip the briding in that case
@@ -417,6 +409,9 @@ int rmnet_add_bridge(struct net_device *rmnet_dev,
if (port->nr_rmnet_devs > 1)
return -EINVAL;
+ if (port->rmnet_mode != RMNET_EPMODE_VND)
+ return -EINVAL;
+
if (rmnet_is_real_dev_registered(slave_dev))
return -EBUSY;
@@ -424,9 +419,17 @@ int rmnet_add_bridge(struct net_device *rmnet_dev,
if (err)
return -EBUSY;
- slave_port = rmnet_get_port(slave_dev);
+ err = netdev_master_upper_dev_link(slave_dev, rmnet_dev, NULL, NULL,
+ extack);
+ if (err) {
+ rmnet_unregister_real_device(slave_dev);
+ return err;
+ }
+
+ slave_port = rmnet_get_port_rtnl(slave_dev);
slave_port->rmnet_mode = RMNET_EPMODE_BRIDGE;
slave_port->bridge_ep = real_dev;
+ slave_port->rmnet_dev = rmnet_dev;
port->rmnet_mode = RMNET_EPMODE_BRIDGE;
port->bridge_ep = slave_dev;
@@ -438,16 +441,9 @@ int rmnet_add_bridge(struct net_device *rmnet_dev,
int rmnet_del_bridge(struct net_device *rmnet_dev,
struct net_device *slave_dev)
{
- struct rmnet_priv *priv = netdev_priv(rmnet_dev);
- struct net_device *real_dev = priv->real_dev;
- struct rmnet_port *port, *slave_port;
+ struct rmnet_port *port = rmnet_get_port_rtnl(slave_dev);
- port = rmnet_get_port(real_dev);
- port->rmnet_mode = RMNET_EPMODE_VND;
- port->bridge_ep = NULL;
-
- slave_port = rmnet_get_port(slave_dev);
- rmnet_unregister_real_device(slave_dev, slave_port);
+ rmnet_unregister_bridge(port);
netdev_dbg(slave_dev, "removed from rmnet as slave\n");
return 0;
@@ -473,8 +469,8 @@ static int __init rmnet_init(void)
static void __exit rmnet_exit(void)
{
- unregister_netdevice_notifier(&rmnet_dev_notifier);
rtnl_link_unregister(&rmnet_link_ops);
+ unregister_netdevice_notifier(&rmnet_dev_notifier);
}
module_init(rmnet_init)
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h
index cd0a6bc..be51598 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h
@@ -28,6 +28,7 @@ struct rmnet_port {
u8 rmnet_mode;
struct hlist_head muxed_ep[RMNET_MAX_LOGICAL_EP];
struct net_device *bridge_ep;
+ struct net_device *rmnet_dev;
};
extern struct rtnl_link_ops rmnet_link_ops;
@@ -65,7 +66,7 @@ struct rmnet_priv {
struct rmnet_priv_stats stats;
};
-struct rmnet_port *rmnet_get_port(struct net_device *real_dev);
+struct rmnet_port *rmnet_get_port_rcu(struct net_device *real_dev);
struct rmnet_endpoint *rmnet_get_endpoint(struct rmnet_port *port, u8 mux_id);
int rmnet_add_bridge(struct net_device *rmnet_dev,
struct net_device *slave_dev,
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c
index 1b74bc1..29a7bfa 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c
@@ -159,6 +159,9 @@ static int rmnet_map_egress_handler(struct sk_buff *skb,
static void
rmnet_bridge_handler(struct sk_buff *skb, struct net_device *bridge_dev)
{
+ if (skb_mac_header_was_set(skb))
+ skb_push(skb, skb->mac_len);
+
if (bridge_dev) {
skb->dev = bridge_dev;
dev_queue_xmit(skb);
@@ -184,7 +187,7 @@ rx_handler_result_t rmnet_rx_handler(struct sk_buff **pskb)
return RX_HANDLER_PASS;
dev = skb->dev;
- port = rmnet_get_port(dev);
+ port = rmnet_get_port_rcu(dev);
switch (port->rmnet_mode) {
case RMNET_EPMODE_VND:
@@ -217,7 +220,7 @@ void rmnet_egress_handler(struct sk_buff *skb)
skb->dev = priv->real_dev;
mux_id = priv->mux_id;
- port = rmnet_get_port(skb->dev);
+ port = rmnet_get_port_rcu(skb->dev);
if (!port)
goto drop;
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c
index 509dfc8..26ad40f 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c
@@ -266,14 +266,6 @@ int rmnet_vnd_dellink(u8 id, struct rmnet_port *port,
return 0;
}
-u8 rmnet_vnd_get_mux(struct net_device *rmnet_dev)
-{
- struct rmnet_priv *priv;
-
- priv = netdev_priv(rmnet_dev);
- return priv->mux_id;
-}
-
int rmnet_vnd_do_flow_control(struct net_device *rmnet_dev, int enable)
{
netdev_dbg(rmnet_dev, "Setting VND TX queue state to %d\n", enable);
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.h b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.h
index 54cbaf3..14d77c7 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.h
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.h
@@ -16,6 +16,5 @@ int rmnet_vnd_dellink(u8 id, struct rmnet_port *port,
struct rmnet_endpoint *ep);
void rmnet_vnd_rx_fixup(struct sk_buff *skb, struct net_device *dev);
void rmnet_vnd_tx_fixup(struct sk_buff *skb, struct net_device *dev);
-u8 rmnet_vnd_get_mux(struct net_device *rmnet_dev);
void rmnet_vnd_setup(struct net_device *dev);
#endif /* _RMNET_VND_H_ */
diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index a2168a1..791d99b 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -5194,7 +5194,7 @@ static int rtl_alloc_irq(struct rtl8169_private *tp)
RTL_W8(tp, Config2, RTL_R8(tp, Config2) & ~MSIEnable);
rtl_lock_config_regs(tp);
/* fall through */
- case RTL_GIGA_MAC_VER_07 ... RTL_GIGA_MAC_VER_24:
+ case RTL_GIGA_MAC_VER_07 ... RTL_GIGA_MAC_VER_17:
flags = PCI_IRQ_LEGACY;
break;
default:
@@ -5285,6 +5285,13 @@ static int r8169_mdio_register(struct rtl8169_private *tp)
if (!tp->phydev) {
mdiobus_unregister(new_bus);
return -ENODEV;
+ } else if (!tp->phydev->drv) {
+ /* Most chip versions fail with the genphy driver.
+ * Therefore ensure that the dedicated PHY driver is loaded.
+ */
+ dev_err(&pdev->dev, "realtek.ko not loaded, maybe it needs to be added to initramfs?\n");
+ mdiobus_unregister(new_bus);
+ return -EUNATCH;
}
/* PHY will be woken up in rtl_open() */
@@ -5446,15 +5453,6 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
int chipset, region;
int jumbo_max, rc;
- /* Some tools for creating an initramfs don't consider softdeps, then
- * r8169.ko may be in initramfs, but realtek.ko not. Then the generic
- * PHY driver is used that doesn't work with most chip versions.
- */
- if (!driver_find("RTL8201CP Ethernet", &mdio_bus_type)) {
- dev_err(&pdev->dev, "realtek.ko not loaded, maybe it needs to be added to initramfs?\n");
- return -ENOENT;
- }
-
dev = devm_alloc_etherdev(&pdev->dev, sizeof (*tp));
if (!dev)
return -ENOMEM;
diff --git a/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c b/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c
index c705743..2cc8184 100644
--- a/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c
+++ b/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c
@@ -2277,7 +2277,7 @@ static int __init sxgbe_cmdline_opt(char *str)
if (!str || !*str)
return -EINVAL;
while ((opt = strsep(&str, ",")) != NULL) {
- if (!strncmp(opt, "eee_timer:", 6)) {
+ if (!strncmp(opt, "eee_timer:", 10)) {
if (kstrtoint(opt + 10, 0, &eee_timer))
goto err;
}
diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c
index 52113b7..3f16bd8 100644
--- a/drivers/net/ethernet/sfc/ef10.c
+++ b/drivers/net/ethernet/sfc/ef10.c
@@ -2853,11 +2853,24 @@ efx_ef10_handle_tx_event(struct efx_channel *channel, efx_qword_t *event)
}
/* Transmit timestamps are only available for 8XXX series. They result
- * in three events per packet. These occur in order, and are:
- * - the normal completion event
+ * in up to three events per packet. These occur in order, and are:
+ * - the normal completion event (may be omitted)
* - the low part of the timestamp
* - the high part of the timestamp
*
+ * It's possible for multiple completion events to appear before the
+ * corresponding timestamps. So we can for example get:
+ * COMP N
+ * COMP N+1
+ * TS_LO N
+ * TS_HI N
+ * TS_LO N+1
+ * TS_HI N+1
+ *
+ * In addition it's also possible for the adjacent completions to be
+ * merged, so we may not see COMP N above. As such, the completion
+ * events are not very useful here.
+ *
* Each part of the timestamp is itself split across two 16 bit
* fields in the event.
*/
@@ -2865,17 +2878,7 @@ efx_ef10_handle_tx_event(struct efx_channel *channel, efx_qword_t *event)
switch (tx_ev_type) {
case TX_TIMESTAMP_EVENT_TX_EV_COMPLETION:
- /* In case of Queue flush or FLR, we might have received
- * the previous TX completion event but not the Timestamp
- * events.
- */
- if (tx_queue->completed_desc_ptr != tx_queue->ptr_mask)
- efx_xmit_done(tx_queue, tx_queue->completed_desc_ptr);
-
- tx_ev_desc_ptr = EFX_QWORD_FIELD(*event,
- ESF_DZ_TX_DESCR_INDX);
- tx_queue->completed_desc_ptr =
- tx_ev_desc_ptr & tx_queue->ptr_mask;
+ /* Ignore this event - see above. */
break;
case TX_TIMESTAMP_EVENT_TX_EV_TSTAMP_LO:
@@ -2887,8 +2890,7 @@ efx_ef10_handle_tx_event(struct efx_channel *channel, efx_qword_t *event)
ts_part = efx_ef10_extract_event_ts(event);
tx_queue->completed_timestamp_major = ts_part;
- efx_xmit_done(tx_queue, tx_queue->completed_desc_ptr);
- tx_queue->completed_desc_ptr = tx_queue->ptr_mask;
+ efx_xmit_done_single(tx_queue);
break;
default:
diff --git a/drivers/net/ethernet/sfc/efx.h b/drivers/net/ethernet/sfc/efx.h
index f1bdb04..95395d6 100644
--- a/drivers/net/ethernet/sfc/efx.h
+++ b/drivers/net/ethernet/sfc/efx.h
@@ -20,6 +20,7 @@ netdev_tx_t efx_hard_start_xmit(struct sk_buff *skb,
struct net_device *net_dev);
netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb);
void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index);
+void efx_xmit_done_single(struct efx_tx_queue *tx_queue);
int efx_setup_tc(struct net_device *net_dev, enum tc_setup_type type,
void *type_data);
extern unsigned int efx_piobuf_size;
diff --git a/drivers/net/ethernet/sfc/efx_channels.c b/drivers/net/ethernet/sfc/efx_channels.c
index aeb5e8a..73d4e39 100644
--- a/drivers/net/ethernet/sfc/efx_channels.c
+++ b/drivers/net/ethernet/sfc/efx_channels.c
@@ -583,6 +583,7 @@ struct efx_channel *efx_copy_channel(const struct efx_channel *old_channel)
if (tx_queue->channel)
tx_queue->channel = channel;
tx_queue->buffer = NULL;
+ tx_queue->cb_page = NULL;
memset(&tx_queue->txd, 0, sizeof(tx_queue->txd));
}
diff --git a/drivers/net/ethernet/sfc/mcdi.c b/drivers/net/ethernet/sfc/mcdi.c
index 2713300..15c731d 100644
--- a/drivers/net/ethernet/sfc/mcdi.c
+++ b/drivers/net/ethernet/sfc/mcdi.c
@@ -212,12 +212,14 @@ static void efx_mcdi_send_request(struct efx_nic *efx, unsigned cmd,
* progress on a NIC at any one time. So no need for locking.
*/
for (i = 0; i < hdr_len / 4 && bytes < PAGE_SIZE; i++)
- bytes += snprintf(buf + bytes, PAGE_SIZE - bytes,
- " %08x", le32_to_cpu(hdr[i].u32[0]));
+ bytes += scnprintf(buf + bytes, PAGE_SIZE - bytes,
+ " %08x",
+ le32_to_cpu(hdr[i].u32[0]));
for (i = 0; i < inlen / 4 && bytes < PAGE_SIZE; i++)
- bytes += snprintf(buf + bytes, PAGE_SIZE - bytes,
- " %08x", le32_to_cpu(inbuf[i].u32[0]));
+ bytes += scnprintf(buf + bytes, PAGE_SIZE - bytes,
+ " %08x",
+ le32_to_cpu(inbuf[i].u32[0]));
netif_info(efx, hw, efx->net_dev, "MCDI RPC REQ:%s\n", buf);
}
@@ -302,15 +304,15 @@ static void efx_mcdi_read_response_header(struct efx_nic *efx)
*/
for (i = 0; i < hdr_len && bytes < PAGE_SIZE; i++) {
efx->type->mcdi_read_response(efx, &hdr, (i * 4), 4);
- bytes += snprintf(buf + bytes, PAGE_SIZE - bytes,
- " %08x", le32_to_cpu(hdr.u32[0]));
+ bytes += scnprintf(buf + bytes, PAGE_SIZE - bytes,
+ " %08x", le32_to_cpu(hdr.u32[0]));
}
for (i = 0; i < data_len && bytes < PAGE_SIZE; i++) {
efx->type->mcdi_read_response(efx, &hdr,
mcdi->resp_hdr_len + (i * 4), 4);
- bytes += snprintf(buf + bytes, PAGE_SIZE - bytes,
- " %08x", le32_to_cpu(hdr.u32[0]));
+ bytes += scnprintf(buf + bytes, PAGE_SIZE - bytes,
+ " %08x", le32_to_cpu(hdr.u32[0]));
}
netif_info(efx, hw, efx->net_dev, "MCDI RPC RESP:%s\n", buf);
@@ -1417,9 +1419,11 @@ void efx_mcdi_print_fwver(struct efx_nic *efx, char *buf, size_t len)
}
ver_words = (__le16 *)MCDI_PTR(outbuf, GET_VERSION_OUT_VERSION);
- offset = snprintf(buf, len, "%u.%u.%u.%u",
- le16_to_cpu(ver_words[0]), le16_to_cpu(ver_words[1]),
- le16_to_cpu(ver_words[2]), le16_to_cpu(ver_words[3]));
+ offset = scnprintf(buf, len, "%u.%u.%u.%u",
+ le16_to_cpu(ver_words[0]),
+ le16_to_cpu(ver_words[1]),
+ le16_to_cpu(ver_words[2]),
+ le16_to_cpu(ver_words[3]));
/* EF10 may have multiple datapath firmware variants within a
* single version. Report which variants are running.
@@ -1427,9 +1431,9 @@ void efx_mcdi_print_fwver(struct efx_nic *efx, char *buf, size_t len)
if (efx_nic_rev(efx) >= EFX_REV_HUNT_A0) {
struct efx_ef10_nic_data *nic_data = efx->nic_data;
- offset += snprintf(buf + offset, len - offset, " rx%x tx%x",
- nic_data->rx_dpcpu_fw_id,
- nic_data->tx_dpcpu_fw_id);
+ offset += scnprintf(buf + offset, len - offset, " rx%x tx%x",
+ nic_data->rx_dpcpu_fw_id,
+ nic_data->tx_dpcpu_fw_id);
/* It's theoretically possible for the string to exceed 31
* characters, though in practice the first three version
diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h
index 9f9886f..8164f0e 100644
--- a/drivers/net/ethernet/sfc/net_driver.h
+++ b/drivers/net/ethernet/sfc/net_driver.h
@@ -208,8 +208,6 @@ struct efx_tx_buffer {
* avoid cache-line ping-pong between the xmit path and the
* completion path.
* @merge_events: Number of TX merged completion events
- * @completed_desc_ptr: Most recent completed pointer - only used with
- * timestamping.
* @completed_timestamp_major: Top part of the most recent tx timestamp.
* @completed_timestamp_minor: Low part of the most recent tx timestamp.
* @insert_count: Current insert pointer
@@ -269,7 +267,6 @@ struct efx_tx_queue {
unsigned int merge_events;
unsigned int bytes_compl;
unsigned int pkts_compl;
- unsigned int completed_desc_ptr;
u32 completed_timestamp_major;
u32 completed_timestamp_minor;
diff --git a/drivers/net/ethernet/sfc/ptp.c b/drivers/net/ethernet/sfc/ptp.c
index af15a73..59b4f16 100644
--- a/drivers/net/ethernet/sfc/ptp.c
+++ b/drivers/net/ethernet/sfc/ptp.c
@@ -560,13 +560,45 @@ efx_ptp_mac_nic_to_ktime_correction(struct efx_nic *efx,
u32 nic_major, u32 nic_minor,
s32 correction)
{
+ u32 sync_timestamp;
ktime_t kt = { 0 };
+ s16 delta;
if (!(nic_major & 0x80000000)) {
WARN_ON_ONCE(nic_major >> 16);
- /* Use the top bits from the latest sync event. */
- nic_major &= 0xffff;
- nic_major |= (last_sync_timestamp_major(efx) & 0xffff0000);
+
+ /* Medford provides 48 bits of timestamp, so we must get the top
+ * 16 bits from the timesync event state.
+ *
+ * We only have the lower 16 bits of the time now, but we do
+ * have a full resolution timestamp at some point in past. As
+ * long as the difference between the (real) now and the sync
+ * is less than 2^15, then we can reconstruct the difference
+ * between those two numbers using only the lower 16 bits of
+ * each.
+ *
+ * Put another way
+ *
+ * a - b = ((a mod k) - b) mod k
+ *
+ * when -k/2 < (a-b) < k/2. In our case k is 2^16. We know
+ * (a mod k) and b, so can calculate the delta, a - b.
+ *
+ */
+ sync_timestamp = last_sync_timestamp_major(efx);
+
+ /* Because delta is s16 this does an implicit mask down to
+ * 16 bits which is what we need, assuming
+ * MEDFORD_TX_SECS_EVENT_BITS is 16. delta is signed so that
+ * we can deal with the (unlikely) case of sync timestamps
+ * arriving from the future.
+ */
+ delta = nic_major - sync_timestamp;
+
+ /* Recover the fully specified time now, by applying the offset
+ * to the (fully specified) sync time.
+ */
+ nic_major = sync_timestamp + delta;
kt = ptp->nic_to_kernel_time(nic_major, nic_minor,
correction);
diff --git a/drivers/net/ethernet/sfc/tx.c b/drivers/net/ethernet/sfc/tx.c
index 04d7f41..8aafc54 100644
--- a/drivers/net/ethernet/sfc/tx.c
+++ b/drivers/net/ethernet/sfc/tx.c
@@ -535,6 +535,44 @@ netdev_tx_t efx_hard_start_xmit(struct sk_buff *skb,
return efx_enqueue_skb(tx_queue, skb);
}
+void efx_xmit_done_single(struct efx_tx_queue *tx_queue)
+{
+ unsigned int pkts_compl = 0, bytes_compl = 0;
+ unsigned int read_ptr;
+ bool finished = false;
+
+ read_ptr = tx_queue->read_count & tx_queue->ptr_mask;
+
+ while (!finished) {
+ struct efx_tx_buffer *buffer = &tx_queue->buffer[read_ptr];
+
+ if (!efx_tx_buffer_in_use(buffer)) {
+ struct efx_nic *efx = tx_queue->efx;
+
+ netif_err(efx, hw, efx->net_dev,
+ "TX queue %d spurious single TX completion\n",
+ tx_queue->queue);
+ efx_schedule_reset(efx, RESET_TYPE_TX_SKIP);
+ return;
+ }
+
+ /* Need to check the flag before dequeueing. */
+ if (buffer->flags & EFX_TX_BUF_SKB)
+ finished = true;
+ efx_dequeue_buffer(tx_queue, buffer, &pkts_compl, &bytes_compl);
+
+ ++tx_queue->read_count;
+ read_ptr = tx_queue->read_count & tx_queue->ptr_mask;
+ }
+
+ tx_queue->pkts_compl += pkts_compl;
+ tx_queue->bytes_compl += bytes_compl;
+
+ EFX_WARN_ON_PARANOID(pkts_compl != 1);
+
+ efx_xmit_done_check_empty(tx_queue);
+}
+
void efx_init_tx_queue_core_txq(struct efx_tx_queue *tx_queue)
{
struct efx_nic *efx = tx_queue->efx;
diff --git a/drivers/net/ethernet/sfc/tx_common.c b/drivers/net/ethernet/sfc/tx_common.c
index b1571e9..70876df 100644
--- a/drivers/net/ethernet/sfc/tx_common.c
+++ b/drivers/net/ethernet/sfc/tx_common.c
@@ -80,7 +80,6 @@ void efx_init_tx_queue(struct efx_tx_queue *tx_queue)
tx_queue->xmit_more_available = false;
tx_queue->timestamping = (efx_ptp_use_mac_tx_timestamps(efx) &&
tx_queue->channel == efx_ptp_channel(efx));
- tx_queue->completed_desc_ptr = tx_queue->ptr_mask;
tx_queue->completed_timestamp_major = 0;
tx_queue->completed_timestamp_minor = 0;
@@ -210,10 +209,9 @@ static void efx_dequeue_buffers(struct efx_tx_queue *tx_queue,
while (read_ptr != stop_index) {
struct efx_tx_buffer *buffer = &tx_queue->buffer[read_ptr];
- if (!(buffer->flags & EFX_TX_BUF_OPTION) &&
- unlikely(buffer->len == 0)) {
+ if (!efx_tx_buffer_in_use(buffer)) {
netif_err(efx, tx_err, efx->net_dev,
- "TX queue %d spurious TX completion id %x\n",
+ "TX queue %d spurious TX completion id %d\n",
tx_queue->queue, read_ptr);
efx_schedule_reset(efx, RESET_TYPE_TX_SKIP);
return;
@@ -226,6 +224,19 @@ static void efx_dequeue_buffers(struct efx_tx_queue *tx_queue,
}
}
+void efx_xmit_done_check_empty(struct efx_tx_queue *tx_queue)
+{
+ if ((int)(tx_queue->read_count - tx_queue->old_write_count) >= 0) {
+ tx_queue->old_write_count = READ_ONCE(tx_queue->write_count);
+ if (tx_queue->read_count == tx_queue->old_write_count) {
+ /* Ensure that read_count is flushed. */
+ smp_mb();
+ tx_queue->empty_read_count =
+ tx_queue->read_count | EFX_EMPTY_COUNT_VALID;
+ }
+ }
+}
+
void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index)
{
unsigned int fill_level, pkts_compl = 0, bytes_compl = 0;
@@ -256,15 +267,7 @@ void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index)
netif_tx_wake_queue(tx_queue->core_txq);
}
- /* Check whether the hardware queue is now empty */
- if ((int)(tx_queue->read_count - tx_queue->old_write_count) >= 0) {
- tx_queue->old_write_count = READ_ONCE(tx_queue->write_count);
- if (tx_queue->read_count == tx_queue->old_write_count) {
- smp_mb();
- tx_queue->empty_read_count =
- tx_queue->read_count | EFX_EMPTY_COUNT_VALID;
- }
- }
+ efx_xmit_done_check_empty(tx_queue);
}
/* Remove buffers put into a tx_queue for the current packet.
diff --git a/drivers/net/ethernet/sfc/tx_common.h b/drivers/net/ethernet/sfc/tx_common.h
index f92f1fe..99cf7ce 100644
--- a/drivers/net/ethernet/sfc/tx_common.h
+++ b/drivers/net/ethernet/sfc/tx_common.h
@@ -21,6 +21,12 @@ void efx_dequeue_buffer(struct efx_tx_queue *tx_queue,
unsigned int *pkts_compl,
unsigned int *bytes_compl);
+static inline bool efx_tx_buffer_in_use(struct efx_tx_buffer *buffer)
+{
+ return buffer->len || (buffer->flags & EFX_TX_BUF_OPTION);
+}
+
+void efx_xmit_done_check_empty(struct efx_tx_queue *tx_queue);
void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index);
void efx_enqueue_unwind(struct efx_tx_queue *tx_queue,
diff --git a/drivers/net/ethernet/socionext/sni_ave.c b/drivers/net/ethernet/socionext/sni_ave.c
index b703242..67ddf78 100644
--- a/drivers/net/ethernet/socionext/sni_ave.c
+++ b/drivers/net/ethernet/socionext/sni_ave.c
@@ -1810,6 +1810,9 @@ static int ave_pro4_get_pinmode(struct ave_private *priv,
break;
case PHY_INTERFACE_MODE_MII:
case PHY_INTERFACE_MODE_RGMII:
+ case PHY_INTERFACE_MODE_RGMII_ID:
+ case PHY_INTERFACE_MODE_RGMII_RXID:
+ case PHY_INTERFACE_MODE_RGMII_TXID:
priv->pinmode_val = 0;
break;
default:
@@ -1854,6 +1857,9 @@ static int ave_ld20_get_pinmode(struct ave_private *priv,
priv->pinmode_val = SG_ETPINMODE_RMII(0);
break;
case PHY_INTERFACE_MODE_RGMII:
+ case PHY_INTERFACE_MODE_RGMII_ID:
+ case PHY_INTERFACE_MODE_RGMII_RXID:
+ case PHY_INTERFACE_MODE_RGMII_TXID:
priv->pinmode_val = 0;
break;
default:
@@ -1876,6 +1882,9 @@ static int ave_pxs3_get_pinmode(struct ave_private *priv,
priv->pinmode_val = SG_ETPINMODE_RMII(arg);
break;
case PHY_INTERFACE_MODE_RGMII:
+ case PHY_INTERFACE_MODE_RGMII_ID:
+ case PHY_INTERFACE_MODE_RGMII_RXID:
+ case PHY_INTERFACE_MODE_RGMII_TXID:
priv->pinmode_val = 0;
break;
default:
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c
index dc50ba1..2d5573b 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c
@@ -1411,7 +1411,7 @@ static int rk_gmac_probe(struct platform_device *pdev)
ret = rk_gmac_clk_init(plat_dat);
if (ret)
- return ret;
+ goto err_remove_config_dt;
ret = rk_gmac_powerup(plat_dat->bsp_priv);
if (ret)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c
index d0356fb..5427843 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c
@@ -24,6 +24,7 @@
static void dwmac1000_core_init(struct mac_device_info *hw,
struct net_device *dev)
{
+ struct stmmac_priv *priv = netdev_priv(dev);
void __iomem *ioaddr = hw->pcsr;
u32 value = readl(ioaddr + GMAC_CONTROL);
int mtu = dev->mtu;
@@ -35,7 +36,7 @@ static void dwmac1000_core_init(struct mac_device_info *hw,
* Broadcom tags can look like invalid LLC/SNAP packets and cause the
* hardware to truncate packets on reception.
*/
- if (netdev_uses_dsa(dev))
+ if (netdev_uses_dsa(dev) || !priv->plat->enh_desc)
value &= ~GMAC_CONTROL_ACS;
if (mtu > 1500)
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 5836b21..7da18c9 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -4405,6 +4405,8 @@ static void stmmac_init_fs(struct net_device *dev)
{
struct stmmac_priv *priv = netdev_priv(dev);
+ rtnl_lock();
+
/* Create per netdev entries */
priv->dbgfs_dir = debugfs_create_dir(dev->name, stmmac_fs_dir);
@@ -4416,14 +4418,13 @@ static void stmmac_init_fs(struct net_device *dev)
debugfs_create_file("dma_cap", 0444, priv->dbgfs_dir, dev,
&stmmac_dma_cap_fops);
- register_netdevice_notifier(&stmmac_notifier);
+ rtnl_unlock();
}
static void stmmac_exit_fs(struct net_device *dev)
{
struct stmmac_priv *priv = netdev_priv(dev);
- unregister_netdevice_notifier(&stmmac_notifier);
debugfs_remove_recursive(priv->dbgfs_dir);
}
#endif /* CONFIG_DEBUG_FS */
@@ -4940,14 +4941,14 @@ int stmmac_dvr_remove(struct device *dev)
netdev_info(priv->dev, "%s: removing driver", __func__);
-#ifdef CONFIG_DEBUG_FS
- stmmac_exit_fs(ndev);
-#endif
stmmac_stop_all_dma(priv);
stmmac_mac_set(priv, priv->ioaddr, false);
netif_carrier_off(ndev);
unregister_netdev(ndev);
+#ifdef CONFIG_DEBUG_FS
+ stmmac_exit_fs(ndev);
+#endif
phylink_destroy(priv->phylink);
if (priv->plat->stmmac_rst)
reset_control_assert(priv->plat->stmmac_rst);
@@ -5166,6 +5167,7 @@ static int __init stmmac_init(void)
/* Create debugfs main directory if it doesn't exist yet */
if (!stmmac_fs_dir)
stmmac_fs_dir = debugfs_create_dir(STMMAC_RESOURCE_NAME, NULL);
+ register_netdevice_notifier(&stmmac_notifier);
#endif
return 0;
@@ -5174,6 +5176,7 @@ static int __init stmmac_init(void)
static void __exit stmmac_exit(void)
{
#ifdef CONFIG_DEBUG_FS
+ unregister_netdevice_notifier(&stmmac_notifier);
debugfs_remove_recursive(stmmac_fs_dir);
#endif
}
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
index d10ac54..13fafd9 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
@@ -663,16 +663,22 @@ int stmmac_get_platform_resources(struct platform_device *pdev,
* In case the wake up interrupt is not passed from the platform
* so the driver will continue to use the mac irq (ndev->irq)
*/
- stmmac_res->wol_irq = platform_get_irq_byname(pdev, "eth_wake_irq");
+ stmmac_res->wol_irq =
+ platform_get_irq_byname_optional(pdev, "eth_wake_irq");
if (stmmac_res->wol_irq < 0) {
if (stmmac_res->wol_irq == -EPROBE_DEFER)
return -EPROBE_DEFER;
+ dev_info(&pdev->dev, "IRQ eth_wake_irq not found\n");
stmmac_res->wol_irq = stmmac_res->irq;
}
- stmmac_res->lpi_irq = platform_get_irq_byname(pdev, "eth_lpi");
- if (stmmac_res->lpi_irq == -EPROBE_DEFER)
- return -EPROBE_DEFER;
+ stmmac_res->lpi_irq =
+ platform_get_irq_byname_optional(pdev, "eth_lpi");
+ if (stmmac_res->lpi_irq < 0) {
+ if (stmmac_res->lpi_irq == -EPROBE_DEFER)
+ return -EPROBE_DEFER;
+ dev_info(&pdev->dev, "IRQ eth_lpi not found\n");
+ }
res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
stmmac_res->addr = devm_ioremap_resource(&pdev->dev, res);
diff --git a/drivers/net/ethernet/sun/sunvnet_common.c b/drivers/net/ethernet/sun/sunvnet_common.c
index c23ce83..8dc6c9f 100644
--- a/drivers/net/ethernet/sun/sunvnet_common.c
+++ b/drivers/net/ethernet/sun/sunvnet_common.c
@@ -1350,27 +1350,12 @@ sunvnet_start_xmit_common(struct sk_buff *skb, struct net_device *dev,
if (vio_version_after_eq(&port->vio, 1, 3))
localmtu -= VLAN_HLEN;
- if (skb->protocol == htons(ETH_P_IP)) {
- struct flowi4 fl4;
- struct rtable *rt = NULL;
-
- memset(&fl4, 0, sizeof(fl4));
- fl4.flowi4_oif = dev->ifindex;
- fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos);
- fl4.daddr = ip_hdr(skb)->daddr;
- fl4.saddr = ip_hdr(skb)->saddr;
-
- rt = ip_route_output_key(dev_net(dev), &fl4);
- if (!IS_ERR(rt)) {
- skb_dst_set(skb, &rt->dst);
- icmp_send(skb, ICMP_DEST_UNREACH,
- ICMP_FRAG_NEEDED,
- htonl(localmtu));
- }
- }
+ if (skb->protocol == htons(ETH_P_IP))
+ icmp_ndo_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
+ htonl(localmtu));
#if IS_ENABLED(CONFIG_IPV6)
else if (skb->protocol == htons(ETH_P_IPV6))
- icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, localmtu);
+ icmpv6_ndo_send(skb, ICMPV6_PKT_TOOBIG, 0, localmtu);
#endif
goto out_dropped;
}
diff --git a/drivers/net/ethernet/xilinx/ll_temac.h b/drivers/net/ethernet/xilinx/ll_temac.h
index 276292b..53fb814 100644
--- a/drivers/net/ethernet/xilinx/ll_temac.h
+++ b/drivers/net/ethernet/xilinx/ll_temac.h
@@ -375,10 +375,14 @@ struct temac_local {
int tx_bd_next;
int tx_bd_tail;
int rx_bd_ci;
+ int rx_bd_tail;
/* DMA channel control setup */
u32 tx_chnl_ctrl;
u32 rx_chnl_ctrl;
+ u8 coalesce_count_rx;
+
+ struct delayed_work restart_work;
};
/* Wrappers for temac_ior()/temac_iow() function pointers above */
diff --git a/drivers/net/ethernet/xilinx/ll_temac_main.c b/drivers/net/ethernet/xilinx/ll_temac_main.c
index 6f11f52..9461acec 100644
--- a/drivers/net/ethernet/xilinx/ll_temac_main.c
+++ b/drivers/net/ethernet/xilinx/ll_temac_main.c
@@ -51,6 +51,7 @@
#include <linux/ip.h>
#include <linux/slab.h>
#include <linux/interrupt.h>
+#include <linux/workqueue.h>
#include <linux/dma-mapping.h>
#include <linux/processor.h>
#include <linux/platform_data/xilinx-ll-temac.h>
@@ -367,6 +368,8 @@ static int temac_dma_bd_init(struct net_device *ndev)
skb_dma_addr = dma_map_single(ndev->dev.parent, skb->data,
XTE_MAX_JUMBO_FRAME_SIZE,
DMA_FROM_DEVICE);
+ if (dma_mapping_error(ndev->dev.parent, skb_dma_addr))
+ goto out;
lp->rx_bd_v[i].phys = cpu_to_be32(skb_dma_addr);
lp->rx_bd_v[i].len = cpu_to_be32(XTE_MAX_JUMBO_FRAME_SIZE);
lp->rx_bd_v[i].app0 = cpu_to_be32(STS_CTRL_APP0_IRQONEND);
@@ -387,12 +390,13 @@ static int temac_dma_bd_init(struct net_device *ndev)
lp->tx_bd_next = 0;
lp->tx_bd_tail = 0;
lp->rx_bd_ci = 0;
+ lp->rx_bd_tail = RX_BD_NUM - 1;
/* Enable RX DMA transfers */
wmb();
lp->dma_out(lp, RX_CURDESC_PTR, lp->rx_bd_p);
lp->dma_out(lp, RX_TAILDESC_PTR,
- lp->rx_bd_p + (sizeof(*lp->rx_bd_v) * (RX_BD_NUM - 1)));
+ lp->rx_bd_p + (sizeof(*lp->rx_bd_v) * lp->rx_bd_tail));
/* Prepare for TX DMA transfer */
lp->dma_out(lp, TX_CURDESC_PTR, lp->tx_bd_p);
@@ -788,6 +792,9 @@ static void temac_start_xmit_done(struct net_device *ndev)
stat = be32_to_cpu(cur_p->app0);
}
+ /* Matches barrier in temac_start_xmit */
+ smp_mb();
+
netif_wake_queue(ndev);
}
@@ -830,9 +837,19 @@ temac_start_xmit(struct sk_buff *skb, struct net_device *ndev)
cur_p = &lp->tx_bd_v[lp->tx_bd_tail];
if (temac_check_tx_bd_space(lp, num_frag + 1)) {
- if (!netif_queue_stopped(ndev))
- netif_stop_queue(ndev);
- return NETDEV_TX_BUSY;
+ if (netif_queue_stopped(ndev))
+ return NETDEV_TX_BUSY;
+
+ netif_stop_queue(ndev);
+
+ /* Matches barrier in temac_start_xmit_done */
+ smp_mb();
+
+ /* Space might have just been freed - check again */
+ if (temac_check_tx_bd_space(lp, num_frag))
+ return NETDEV_TX_BUSY;
+
+ netif_wake_queue(ndev);
}
cur_p->app0 = 0;
@@ -850,12 +867,16 @@ temac_start_xmit(struct sk_buff *skb, struct net_device *ndev)
skb_dma_addr = dma_map_single(ndev->dev.parent, skb->data,
skb_headlen(skb), DMA_TO_DEVICE);
cur_p->len = cpu_to_be32(skb_headlen(skb));
+ if (WARN_ON_ONCE(dma_mapping_error(ndev->dev.parent, skb_dma_addr))) {
+ dev_kfree_skb_any(skb);
+ ndev->stats.tx_dropped++;
+ return NETDEV_TX_OK;
+ }
cur_p->phys = cpu_to_be32(skb_dma_addr);
ptr_to_txbd((void *)skb, cur_p);
for (ii = 0; ii < num_frag; ii++) {
- lp->tx_bd_tail++;
- if (lp->tx_bd_tail >= TX_BD_NUM)
+ if (++lp->tx_bd_tail >= TX_BD_NUM)
lp->tx_bd_tail = 0;
cur_p = &lp->tx_bd_v[lp->tx_bd_tail];
@@ -863,6 +884,27 @@ temac_start_xmit(struct sk_buff *skb, struct net_device *ndev)
skb_frag_address(frag),
skb_frag_size(frag),
DMA_TO_DEVICE);
+ if (dma_mapping_error(ndev->dev.parent, skb_dma_addr)) {
+ if (--lp->tx_bd_tail < 0)
+ lp->tx_bd_tail = TX_BD_NUM - 1;
+ cur_p = &lp->tx_bd_v[lp->tx_bd_tail];
+ while (--ii >= 0) {
+ --frag;
+ dma_unmap_single(ndev->dev.parent,
+ be32_to_cpu(cur_p->phys),
+ skb_frag_size(frag),
+ DMA_TO_DEVICE);
+ if (--lp->tx_bd_tail < 0)
+ lp->tx_bd_tail = TX_BD_NUM - 1;
+ cur_p = &lp->tx_bd_v[lp->tx_bd_tail];
+ }
+ dma_unmap_single(ndev->dev.parent,
+ be32_to_cpu(cur_p->phys),
+ skb_headlen(skb), DMA_TO_DEVICE);
+ dev_kfree_skb_any(skb);
+ ndev->stats.tx_dropped++;
+ return NETDEV_TX_OK;
+ }
cur_p->phys = cpu_to_be32(skb_dma_addr);
cur_p->len = cpu_to_be32(skb_frag_size(frag));
cur_p->app0 = 0;
@@ -884,31 +926,56 @@ temac_start_xmit(struct sk_buff *skb, struct net_device *ndev)
return NETDEV_TX_OK;
}
+static int ll_temac_recv_buffers_available(struct temac_local *lp)
+{
+ int available;
+
+ if (!lp->rx_skb[lp->rx_bd_ci])
+ return 0;
+ available = 1 + lp->rx_bd_tail - lp->rx_bd_ci;
+ if (available <= 0)
+ available += RX_BD_NUM;
+ return available;
+}
static void ll_temac_recv(struct net_device *ndev)
{
struct temac_local *lp = netdev_priv(ndev);
- struct sk_buff *skb, *new_skb;
- unsigned int bdstat;
- struct cdmac_bd *cur_p;
- dma_addr_t tail_p, skb_dma_addr;
- int length;
unsigned long flags;
+ int rx_bd;
+ bool update_tail = false;
spin_lock_irqsave(&lp->rx_lock, flags);
- tail_p = lp->rx_bd_p + sizeof(*lp->rx_bd_v) * lp->rx_bd_ci;
- cur_p = &lp->rx_bd_v[lp->rx_bd_ci];
+ /* Process all received buffers, passing them on network
+ * stack. After this, the buffer descriptors will be in an
+ * un-allocated stage, where no skb is allocated for it, and
+ * they are therefore not available for TEMAC/DMA.
+ */
+ do {
+ struct cdmac_bd *bd = &lp->rx_bd_v[lp->rx_bd_ci];
+ struct sk_buff *skb = lp->rx_skb[lp->rx_bd_ci];
+ unsigned int bdstat = be32_to_cpu(bd->app0);
+ int length;
- bdstat = be32_to_cpu(cur_p->app0);
- while ((bdstat & STS_CTRL_APP0_CMPLT)) {
+ /* While this should not normally happen, we can end
+ * here when GFP_ATOMIC allocations fail, and we
+ * therefore have un-allocated buffers.
+ */
+ if (!skb)
+ break;
- skb = lp->rx_skb[lp->rx_bd_ci];
- length = be32_to_cpu(cur_p->app4) & 0x3FFF;
+ /* Loop over all completed buffer descriptors */
+ if (!(bdstat & STS_CTRL_APP0_CMPLT))
+ break;
- dma_unmap_single(ndev->dev.parent, be32_to_cpu(cur_p->phys),
+ dma_unmap_single(ndev->dev.parent, be32_to_cpu(bd->phys),
XTE_MAX_JUMBO_FRAME_SIZE, DMA_FROM_DEVICE);
+ /* The buffer is not valid for DMA anymore */
+ bd->phys = 0;
+ bd->len = 0;
+ length = be32_to_cpu(bd->app4) & 0x3FFF;
skb_put(skb, length);
skb->protocol = eth_type_trans(skb, ndev);
skb_checksum_none_assert(skb);
@@ -923,43 +990,102 @@ static void ll_temac_recv(struct net_device *ndev)
* (back) for proper IP checksum byte order
* (be16).
*/
- skb->csum = htons(be32_to_cpu(cur_p->app3) & 0xFFFF);
+ skb->csum = htons(be32_to_cpu(bd->app3) & 0xFFFF);
skb->ip_summed = CHECKSUM_COMPLETE;
}
if (!skb_defer_rx_timestamp(skb))
netif_rx(skb);
+ /* The skb buffer is now owned by network stack above */
+ lp->rx_skb[lp->rx_bd_ci] = NULL;
ndev->stats.rx_packets++;
ndev->stats.rx_bytes += length;
- new_skb = netdev_alloc_skb_ip_align(ndev,
- XTE_MAX_JUMBO_FRAME_SIZE);
- if (!new_skb) {
- spin_unlock_irqrestore(&lp->rx_lock, flags);
- return;
+ rx_bd = lp->rx_bd_ci;
+ if (++lp->rx_bd_ci >= RX_BD_NUM)
+ lp->rx_bd_ci = 0;
+ } while (rx_bd != lp->rx_bd_tail);
+
+ /* DMA operations will halt when the last buffer descriptor is
+ * processed (ie. the one pointed to by RX_TAILDESC_PTR).
+ * When that happens, no more interrupt events will be
+ * generated. No IRQ_COAL or IRQ_DLY, and not even an
+ * IRQ_ERR. To avoid stalling, we schedule a delayed work
+ * when there is a potential risk of that happening. The work
+ * will call this function, and thus re-schedule itself until
+ * enough buffers are available again.
+ */
+ if (ll_temac_recv_buffers_available(lp) < lp->coalesce_count_rx)
+ schedule_delayed_work(&lp->restart_work, HZ / 1000);
+
+ /* Allocate new buffers for those buffer descriptors that were
+ * passed to network stack. Note that GFP_ATOMIC allocations
+ * can fail (e.g. when a larger burst of GFP_ATOMIC
+ * allocations occurs), so while we try to allocate all
+ * buffers in the same interrupt where they were processed, we
+ * continue with what we could get in case of allocation
+ * failure. Allocation of remaining buffers will be retried
+ * in following calls.
+ */
+ while (1) {
+ struct sk_buff *skb;
+ struct cdmac_bd *bd;
+ dma_addr_t skb_dma_addr;
+
+ rx_bd = lp->rx_bd_tail + 1;
+ if (rx_bd >= RX_BD_NUM)
+ rx_bd = 0;
+ bd = &lp->rx_bd_v[rx_bd];
+
+ if (bd->phys)
+ break; /* All skb's allocated */
+
+ skb = netdev_alloc_skb_ip_align(ndev, XTE_MAX_JUMBO_FRAME_SIZE);
+ if (!skb) {
+ dev_warn(&ndev->dev, "skb alloc failed\n");
+ break;
}
- cur_p->app0 = cpu_to_be32(STS_CTRL_APP0_IRQONEND);
- skb_dma_addr = dma_map_single(ndev->dev.parent, new_skb->data,
+ skb_dma_addr = dma_map_single(ndev->dev.parent, skb->data,
XTE_MAX_JUMBO_FRAME_SIZE,
DMA_FROM_DEVICE);
- cur_p->phys = cpu_to_be32(skb_dma_addr);
- cur_p->len = cpu_to_be32(XTE_MAX_JUMBO_FRAME_SIZE);
- lp->rx_skb[lp->rx_bd_ci] = new_skb;
+ if (WARN_ON_ONCE(dma_mapping_error(ndev->dev.parent,
+ skb_dma_addr))) {
+ dev_kfree_skb_any(skb);
+ break;
+ }
- lp->rx_bd_ci++;
- if (lp->rx_bd_ci >= RX_BD_NUM)
- lp->rx_bd_ci = 0;
+ bd->phys = cpu_to_be32(skb_dma_addr);
+ bd->len = cpu_to_be32(XTE_MAX_JUMBO_FRAME_SIZE);
+ bd->app0 = cpu_to_be32(STS_CTRL_APP0_IRQONEND);
+ lp->rx_skb[rx_bd] = skb;
- cur_p = &lp->rx_bd_v[lp->rx_bd_ci];
- bdstat = be32_to_cpu(cur_p->app0);
+ lp->rx_bd_tail = rx_bd;
+ update_tail = true;
}
- lp->dma_out(lp, RX_TAILDESC_PTR, tail_p);
+
+ /* Move tail pointer when buffers have been allocated */
+ if (update_tail) {
+ lp->dma_out(lp, RX_TAILDESC_PTR,
+ lp->rx_bd_p + sizeof(*lp->rx_bd_v) * lp->rx_bd_tail);
+ }
spin_unlock_irqrestore(&lp->rx_lock, flags);
}
+/* Function scheduled to ensure a restart in case of DMA halt
+ * condition caused by running out of buffer descriptors.
+ */
+static void ll_temac_restart_work_func(struct work_struct *work)
+{
+ struct temac_local *lp = container_of(work, struct temac_local,
+ restart_work.work);
+ struct net_device *ndev = lp->ndev;
+
+ ll_temac_recv(ndev);
+}
+
static irqreturn_t ll_temac_tx_irq(int irq, void *_ndev)
{
struct net_device *ndev = _ndev;
@@ -1052,6 +1178,8 @@ static int temac_stop(struct net_device *ndev)
dev_dbg(&ndev->dev, "temac_close()\n");
+ cancel_delayed_work_sync(&lp->restart_work);
+
free_irq(lp->tx_irq, ndev);
free_irq(lp->rx_irq, ndev);
@@ -1173,6 +1301,7 @@ static int temac_probe(struct platform_device *pdev)
lp->dev = &pdev->dev;
lp->options = XTE_OPTION_DEFAULTS;
spin_lock_init(&lp->rx_lock);
+ INIT_DELAYED_WORK(&lp->restart_work, ll_temac_restart_work_func);
/* Setup mutex for synchronization of indirect register access */
if (pdata) {
@@ -1279,6 +1408,7 @@ static int temac_probe(struct platform_device *pdev)
*/
lp->tx_chnl_ctrl = 0x10220000;
lp->rx_chnl_ctrl = 0xff070000;
+ lp->coalesce_count_rx = 0x07;
/* Finished with the DMA node; drop the reference */
of_node_put(dma_np);
@@ -1310,11 +1440,14 @@ static int temac_probe(struct platform_device *pdev)
(pdata->tx_irq_count << 16);
else
lp->tx_chnl_ctrl = 0x10220000;
- if (pdata->rx_irq_timeout || pdata->rx_irq_count)
+ if (pdata->rx_irq_timeout || pdata->rx_irq_count) {
lp->rx_chnl_ctrl = (pdata->rx_irq_timeout << 24) |
(pdata->rx_irq_count << 16);
- else
+ lp->coalesce_count_rx = pdata->rx_irq_count;
+ } else {
lp->rx_chnl_ctrl = 0xff070000;
+ lp->coalesce_count_rx = 0x07;
+ }
}
/* Error handle returned DMA RX and TX interrupts */
diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c
index 75757e9..09f279c 100644
--- a/drivers/net/geneve.c
+++ b/drivers/net/geneve.c
@@ -1845,8 +1845,6 @@ static void geneve_destroy_tunnels(struct net *net, struct list_head *head)
if (!net_eq(dev_net(geneve->dev), net))
unregister_netdevice_queue(geneve->dev, head);
}
-
- WARN_ON_ONCE(!list_empty(&gn->sock_list));
}
static void __net_exit geneve_exit_batch_net(struct list_head *net_list)
@@ -1861,6 +1859,12 @@ static void __net_exit geneve_exit_batch_net(struct list_head *net_list)
/* unregister the devices gathered above */
unregister_netdevice_many(&list);
rtnl_unlock();
+
+ list_for_each_entry(net, net_list, exit_list) {
+ const struct geneve_net *gn = net_generic(net, geneve_net_id);
+
+ WARN_ON_ONCE(!list_empty(&gn->sock_list));
+ }
}
static struct pernet_operations geneve_net_ops = {
diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c
index af07ea7..672cd2c 100644
--- a/drivers/net/gtp.c
+++ b/drivers/net/gtp.c
@@ -546,8 +546,8 @@ static int gtp_build_skb_ip4(struct sk_buff *skb, struct net_device *dev,
mtu < ntohs(iph->tot_len)) {
netdev_dbg(dev, "packet too big, fragmentation needed\n");
memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
- icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
- htonl(mtu));
+ icmp_ndo_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
+ htonl(mtu));
goto err_rt;
}
diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
index ae3f308..1b320bc 100644
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -99,7 +99,7 @@ static struct netvsc_device *alloc_net_device(void)
init_waitqueue_head(&net_device->wait_drain);
net_device->destroy = false;
- net_device->tx_disable = false;
+ net_device->tx_disable = true;
net_device->max_pkt = RNDIS_MAX_PKT_DEFAULT;
net_device->pkt_align = RNDIS_PKT_ALIGN_DEFAULT;
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index 65e12cb..2c0a24c 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -1068,6 +1068,7 @@ static int netvsc_attach(struct net_device *ndev,
}
/* In any case device is now ready */
+ nvdev->tx_disable = false;
netif_device_attach(ndev);
/* Note: enable and attach happen when sub-channels setup */
@@ -2476,6 +2477,8 @@ static int netvsc_probe(struct hv_device *dev,
else
net->max_mtu = ETH_DATA_LEN;
+ nvdev->tx_disable = false;
+
ret = register_netdevice(net);
if (ret != 0) {
pr_err("Unable to register netdev.\n");
diff --git a/drivers/net/ifb.c b/drivers/net/ifb.c
index 242b9b0..7fe306e 100644
--- a/drivers/net/ifb.c
+++ b/drivers/net/ifb.c
@@ -75,7 +75,7 @@ static void ifb_ri_tasklet(unsigned long _txp)
}
while ((skb = __skb_dequeue(&txp->tq)) != NULL) {
- skb->tc_redirected = 0;
+ skb->redirected = 0;
skb->tc_skip_classify = 1;
u64_stats_update_begin(&txp->tsync);
@@ -96,7 +96,7 @@ static void ifb_ri_tasklet(unsigned long _txp)
rcu_read_unlock();
skb->skb_iif = txp->dev->ifindex;
- if (!skb->tc_from_ingress) {
+ if (!skb->from_ingress) {
dev_queue_xmit(skb);
} else {
skb_pull_rcsum(skb, skb->mac_len);
@@ -243,7 +243,7 @@ static netdev_tx_t ifb_xmit(struct sk_buff *skb, struct net_device *dev)
txp->rx_bytes += skb->len;
u64_stats_update_end(&txp->rsync);
- if (!skb->tc_redirected || !skb->skb_iif) {
+ if (!skb->redirected || !skb->skb_iif) {
dev_kfree_skb(skb);
dev->stats.rx_dropped++;
return NETDEV_TX_OK;
diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c
index 30cd0c4f..8801d09 100644
--- a/drivers/net/ipvlan/ipvlan_core.c
+++ b/drivers/net/ipvlan/ipvlan_core.c
@@ -293,6 +293,7 @@ void ipvlan_process_multicast(struct work_struct *work)
}
if (dev)
dev_put(dev);
+ cond_resched();
}
}
@@ -498,19 +499,21 @@ static int ipvlan_process_outbound(struct sk_buff *skb)
struct ethhdr *ethh = eth_hdr(skb);
int ret = NET_XMIT_DROP;
- /* In this mode we dont care about multicast and broadcast traffic */
- if (is_multicast_ether_addr(ethh->h_dest)) {
- pr_debug_ratelimited("Dropped {multi|broad}cast of type=[%x]\n",
- ntohs(skb->protocol));
- kfree_skb(skb);
- goto out;
- }
-
/* The ipvlan is a pseudo-L2 device, so the packets that we receive
* will have L2; which need to discarded and processed further
* in the net-ns of the main-device.
*/
if (skb_mac_header_was_set(skb)) {
+ /* In this mode we dont care about
+ * multicast and broadcast traffic */
+ if (is_multicast_ether_addr(ethh->h_dest)) {
+ pr_debug_ratelimited(
+ "Dropped {multi|broad}cast of type=[%x]\n",
+ ntohs(skb->protocol));
+ kfree_skb(skb);
+ goto out;
+ }
+
skb_pull(skb, sizeof(*ethh));
skb->mac_header = (typeof(skb->mac_header))~0U;
skb_reset_network_header(skb);
diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c
index a706622..f195f27 100644
--- a/drivers/net/ipvlan/ipvlan_main.c
+++ b/drivers/net/ipvlan/ipvlan_main.c
@@ -164,7 +164,6 @@ static void ipvlan_uninit(struct net_device *dev)
static int ipvlan_open(struct net_device *dev)
{
struct ipvl_dev *ipvlan = netdev_priv(dev);
- struct net_device *phy_dev = ipvlan->phy_dev;
struct ipvl_addr *addr;
if (ipvlan->port->mode == IPVLAN_MODE_L3 ||
@@ -178,7 +177,7 @@ static int ipvlan_open(struct net_device *dev)
ipvlan_ht_addr_add(ipvlan, addr);
rcu_read_unlock();
- return dev_uc_add(phy_dev, phy_dev->dev_addr);
+ return 0;
}
static int ipvlan_stop(struct net_device *dev)
@@ -190,8 +189,6 @@ static int ipvlan_stop(struct net_device *dev)
dev_uc_unsync(phy_dev, dev);
dev_mc_unsync(phy_dev, dev);
- dev_uc_del(phy_dev, phy_dev->dev_addr);
-
rcu_read_lock();
list_for_each_entry_rcu(addr, &ipvlan->addrs, anode)
ipvlan_ht_addr_del(addr);
diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c
index 45bfd99..92bc2b2 100644
--- a/drivers/net/macsec.c
+++ b/drivers/net/macsec.c
@@ -19,6 +19,7 @@
#include <net/gro_cells.h>
#include <net/macsec.h>
#include <linux/phy.h>
+#include <linux/if_arp.h>
#include <uapi/linux/if_macsec.h>
@@ -424,6 +425,11 @@ static struct macsec_eth_header *macsec_ethhdr(struct sk_buff *skb)
return (struct macsec_eth_header *)skb_mac_header(skb);
}
+static sci_t dev_to_sci(struct net_device *dev, __be16 port)
+{
+ return make_sci(dev->dev_addr, port);
+}
+
static void __macsec_pn_wrapped(struct macsec_secy *secy,
struct macsec_tx_sa *tx_sa)
{
@@ -3268,6 +3274,20 @@ static int macsec_set_mac_address(struct net_device *dev, void *p)
out:
ether_addr_copy(dev->dev_addr, addr->sa_data);
+ macsec->secy.sci = dev_to_sci(dev, MACSEC_PORT_ES);
+
+ /* If h/w offloading is available, propagate to the device */
+ if (macsec_is_offloaded(macsec)) {
+ const struct macsec_ops *ops;
+ struct macsec_context ctx;
+
+ ops = macsec_get_ops(macsec, &ctx);
+ if (ops) {
+ ctx.secy = &macsec->secy;
+ macsec_offload(ops->mdo_upd_secy, &ctx);
+ }
+ }
+
return 0;
}
@@ -3342,6 +3362,7 @@ static const struct device_type macsec_type = {
static const struct nla_policy macsec_rtnl_policy[IFLA_MACSEC_MAX + 1] = {
[IFLA_MACSEC_SCI] = { .type = NLA_U64 },
+ [IFLA_MACSEC_PORT] = { .type = NLA_U16 },
[IFLA_MACSEC_ICV_LEN] = { .type = NLA_U8 },
[IFLA_MACSEC_CIPHER_SUITE] = { .type = NLA_U64 },
[IFLA_MACSEC_WINDOW] = { .type = NLA_U32 },
@@ -3592,11 +3613,6 @@ static bool sci_exists(struct net_device *dev, sci_t sci)
return false;
}
-static sci_t dev_to_sci(struct net_device *dev, __be16 port)
-{
- return make_sci(dev->dev_addr, port);
-}
-
static int macsec_add_dev(struct net_device *dev, sci_t sci, u8 icv_len)
{
struct macsec_dev *macsec = macsec_priv(dev);
@@ -3650,6 +3666,8 @@ static int macsec_newlink(struct net *net, struct net_device *dev,
real_dev = __dev_get_by_index(net, nla_get_u32(tb[IFLA_LINK]));
if (!real_dev)
return -ENODEV;
+ if (real_dev->type != ARPHRD_ETHER)
+ return -EINVAL;
dev->priv_flags |= IFF_MACSEC;
diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index 81aa7ad..e7289d6 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -334,6 +334,8 @@ static void macvlan_process_broadcast(struct work_struct *w)
if (src)
dev_put(src->dev);
consume_skb(skb);
+
+ cond_resched();
}
}
diff --git a/drivers/net/netdevsim/ipsec.c b/drivers/net/netdevsim/ipsec.c
index e27fc1a..3811f1b 100644
--- a/drivers/net/netdevsim/ipsec.c
+++ b/drivers/net/netdevsim/ipsec.c
@@ -29,9 +29,9 @@ static ssize_t nsim_dbg_netdev_ops_read(struct file *filp,
return -ENOMEM;
p = buf;
- p += snprintf(p, bufsize - (p - buf),
- "SA count=%u tx=%u\n",
- ipsec->count, ipsec->tx);
+ p += scnprintf(p, bufsize - (p - buf),
+ "SA count=%u tx=%u\n",
+ ipsec->count, ipsec->tx);
for (i = 0; i < NSIM_IPSEC_MAX_SA_COUNT; i++) {
struct nsim_sa *sap = &ipsec->sa[i];
@@ -39,18 +39,18 @@ static ssize_t nsim_dbg_netdev_ops_read(struct file *filp,
if (!sap->used)
continue;
- p += snprintf(p, bufsize - (p - buf),
- "sa[%i] %cx ipaddr=0x%08x %08x %08x %08x\n",
- i, (sap->rx ? 'r' : 't'), sap->ipaddr[0],
- sap->ipaddr[1], sap->ipaddr[2], sap->ipaddr[3]);
- p += snprintf(p, bufsize - (p - buf),
- "sa[%i] spi=0x%08x proto=0x%x salt=0x%08x crypt=%d\n",
- i, be32_to_cpu(sap->xs->id.spi),
- sap->xs->id.proto, sap->salt, sap->crypt);
- p += snprintf(p, bufsize - (p - buf),
- "sa[%i] key=0x%08x %08x %08x %08x\n",
- i, sap->key[0], sap->key[1],
- sap->key[2], sap->key[3]);
+ p += scnprintf(p, bufsize - (p - buf),
+ "sa[%i] %cx ipaddr=0x%08x %08x %08x %08x\n",
+ i, (sap->rx ? 'r' : 't'), sap->ipaddr[0],
+ sap->ipaddr[1], sap->ipaddr[2], sap->ipaddr[3]);
+ p += scnprintf(p, bufsize - (p - buf),
+ "sa[%i] spi=0x%08x proto=0x%x salt=0x%08x crypt=%d\n",
+ i, be32_to_cpu(sap->xs->id.spi),
+ sap->xs->id.proto, sap->salt, sap->crypt);
+ p += scnprintf(p, bufsize - (p - buf),
+ "sa[%i] key=0x%08x %08x %08x %08x\n",
+ i, sap->key[0], sap->key[1],
+ sap->key[2], sap->key[3]);
}
len = simple_read_from_buffer(buffer, count, ppos, buf, p - buf);
diff --git a/drivers/net/phy/bcm63xx.c b/drivers/net/phy/bcm63xx.c
index 23f1958..459fb20 100644
--- a/drivers/net/phy/bcm63xx.c
+++ b/drivers/net/phy/bcm63xx.c
@@ -73,6 +73,7 @@ static struct phy_driver bcm63xx_driver[] = {
/* same phy as above, with just a different OUI */
.phy_id = 0x002bdc00,
.phy_id_mask = 0xfffffc00,
+ .name = "Broadcom BCM63XX (2)",
/* PHY_BASIC_FEATURES */
.flags = PHY_IS_INTERNAL,
.config_init = bcm63xx_config_init,
diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c
index 7d68b28..a62229a 100644
--- a/drivers/net/phy/broadcom.c
+++ b/drivers/net/phy/broadcom.c
@@ -410,7 +410,7 @@ static int bcm5481_config_aneg(struct phy_device *phydev)
struct device_node *np = phydev->mdio.dev.of_node;
int ret;
- /* Aneg firsly. */
+ /* Aneg firstly. */
ret = genphy_config_aneg(phydev);
/* Then we can set up the delay. */
@@ -463,7 +463,7 @@ static int bcm54616s_config_aneg(struct phy_device *phydev)
{
int ret;
- /* Aneg firsly. */
+ /* Aneg firstly. */
if (phydev->dev_flags & PHY_BCM_FLAGS_MODE_1000BX)
ret = genphy_c37_config_aneg(phydev);
else
diff --git a/drivers/net/phy/dp83867.c b/drivers/net/phy/dp83867.c
index 967f57e..9a07ad1 100644
--- a/drivers/net/phy/dp83867.c
+++ b/drivers/net/phy/dp83867.c
@@ -28,7 +28,8 @@
#define DP83867_CTRL 0x1f
/* Extended Registers */
-#define DP83867_CFG4 0x0031
+#define DP83867_FLD_THR_CFG 0x002e
+#define DP83867_CFG4 0x0031
#define DP83867_CFG4_SGMII_ANEG_MASK (BIT(5) | BIT(6))
#define DP83867_CFG4_SGMII_ANEG_TIMER_11MS (3 << 5)
#define DP83867_CFG4_SGMII_ANEG_TIMER_800US (2 << 5)
@@ -91,6 +92,7 @@
#define DP83867_STRAP_STS2_CLK_SKEW_RX_MASK GENMASK(2, 0)
#define DP83867_STRAP_STS2_CLK_SKEW_RX_SHIFT 0
#define DP83867_STRAP_STS2_CLK_SKEW_NONE BIT(2)
+#define DP83867_STRAP_STS2_STRAP_FLD BIT(10)
/* PHY CTRL bits */
#define DP83867_PHYCR_TX_FIFO_DEPTH_SHIFT 14
@@ -125,6 +127,9 @@
/* CFG4 bits */
#define DP83867_CFG4_PORT_MIRROR_EN BIT(0)
+/* FLD_THR_CFG */
+#define DP83867_FLD_THR_CFG_ENERGY_LOST_THR_MASK 0x7
+
enum {
DP83867_PORT_MIRROING_KEEP,
DP83867_PORT_MIRROING_EN,
@@ -476,6 +481,20 @@ static int dp83867_config_init(struct phy_device *phydev)
phy_clear_bits_mmd(phydev, DP83867_DEVADDR, DP83867_CFG4,
BIT(7));
+ bs = phy_read_mmd(phydev, DP83867_DEVADDR, DP83867_STRAP_STS2);
+ if (bs & DP83867_STRAP_STS2_STRAP_FLD) {
+ /* When using strap to enable FLD, the ENERGY_LOST_FLD_THR will
+ * be set to 0x2. This may causes the PHY link to be unstable -
+ * the default value 0x1 need to be restored.
+ */
+ ret = phy_modify_mmd(phydev, DP83867_DEVADDR,
+ DP83867_FLD_THR_CFG,
+ DP83867_FLD_THR_CFG_ENERGY_LOST_THR_MASK,
+ 0x1);
+ if (ret)
+ return ret;
+ }
+
if (phy_interface_is_rgmii(phydev) ||
phydev->interface == PHY_INTERFACE_MODE_SGMII) {
val = phy_read(phydev, MII_DP83867_PHYCTRL);
diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c
index 28e33ec..9a8bada 100644
--- a/drivers/net/phy/marvell.c
+++ b/drivers/net/phy/marvell.c
@@ -1306,6 +1306,9 @@ static int marvell_read_status_page_an(struct phy_device *phydev,
}
}
+ if (!(status & MII_M1011_PHY_STATUS_RESOLVED))
+ return 0;
+
if (status & MII_M1011_PHY_STATUS_FULLDUPLEX)
phydev->duplex = DUPLEX_FULL;
else
@@ -1365,6 +1368,8 @@ static int marvell_read_status_page(struct phy_device *phydev, int page)
linkmode_zero(phydev->lp_advertising);
phydev->pause = 0;
phydev->asym_pause = 0;
+ phydev->speed = SPEED_UNKNOWN;
+ phydev->duplex = DUPLEX_UNKNOWN;
if (phydev->autoneg == AUTONEG_ENABLE)
err = marvell_read_status_page_an(phydev, fiber, status);
diff --git a/drivers/net/phy/mdio-bcm-iproc.c b/drivers/net/phy/mdio-bcm-iproc.c
index 7e9975d..f1ded03 100644
--- a/drivers/net/phy/mdio-bcm-iproc.c
+++ b/drivers/net/phy/mdio-bcm-iproc.c
@@ -178,6 +178,23 @@ static int iproc_mdio_remove(struct platform_device *pdev)
return 0;
}
+#ifdef CONFIG_PM_SLEEP
+int iproc_mdio_resume(struct device *dev)
+{
+ struct platform_device *pdev = to_platform_device(dev);
+ struct iproc_mdio_priv *priv = platform_get_drvdata(pdev);
+
+ /* restore the mii clock configuration */
+ iproc_mdio_config_clk(priv->base);
+
+ return 0;
+}
+
+static const struct dev_pm_ops iproc_mdio_pm_ops = {
+ .resume = iproc_mdio_resume
+};
+#endif /* CONFIG_PM_SLEEP */
+
static const struct of_device_id iproc_mdio_of_match[] = {
{ .compatible = "brcm,iproc-mdio", },
{ /* sentinel */ },
@@ -188,6 +205,9 @@ static struct platform_driver iproc_mdio_driver = {
.driver = {
.name = "iproc-mdio",
.of_match_table = iproc_mdio_of_match,
+#ifdef CONFIG_PM_SLEEP
+ .pm = &iproc_mdio_pm_ops,
+#endif
},
.probe = iproc_mdio_probe,
.remove = iproc_mdio_remove,
diff --git a/drivers/net/phy/mdio-bcm-unimac.c b/drivers/net/phy/mdio-bcm-unimac.c
index 4a28fb2..fbd3689 100644
--- a/drivers/net/phy/mdio-bcm-unimac.c
+++ b/drivers/net/phy/mdio-bcm-unimac.c
@@ -242,11 +242,9 @@ static int unimac_mdio_probe(struct platform_device *pdev)
return -ENOMEM;
}
- priv->clk = devm_clk_get(&pdev->dev, NULL);
- if (PTR_ERR(priv->clk) == -EPROBE_DEFER)
+ priv->clk = devm_clk_get_optional(&pdev->dev, NULL);
+ if (IS_ERR(priv->clk))
return PTR_ERR(priv->clk);
- else
- priv->clk = NULL;
ret = clk_prepare_enable(priv->clk);
if (ret)
diff --git a/drivers/net/phy/mdio-mux-bcm-iproc.c b/drivers/net/phy/mdio-mux-bcm-iproc.c
index 88d409e..aad6809 100644
--- a/drivers/net/phy/mdio-mux-bcm-iproc.c
+++ b/drivers/net/phy/mdio-mux-bcm-iproc.c
@@ -288,8 +288,13 @@ static int mdio_mux_iproc_suspend(struct device *dev)
static int mdio_mux_iproc_resume(struct device *dev)
{
struct iproc_mdiomux_desc *md = dev_get_drvdata(dev);
+ int rc;
- clk_prepare_enable(md->core_clk);
+ rc = clk_prepare_enable(md->core_clk);
+ if (rc) {
+ dev_err(md->dev, "failed to enable core clk\n");
+ return rc;
+ }
mdio_mux_iproc_config(md);
return 0;
diff --git a/drivers/net/phy/mscc.c b/drivers/net/phy/mscc.c
index 937ac7d..f686f40 100644
--- a/drivers/net/phy/mscc.c
+++ b/drivers/net/phy/mscc.c
@@ -345,11 +345,11 @@ enum macsec_bank {
BIT(VSC8531_FORCE_LED_OFF) | \
BIT(VSC8531_FORCE_LED_ON))
-#define MSCC_VSC8584_REVB_INT8051_FW "mscc_vsc8584_revb_int8051_fb48.bin"
+#define MSCC_VSC8584_REVB_INT8051_FW "microchip/mscc_vsc8584_revb_int8051_fb48.bin"
#define MSCC_VSC8584_REVB_INT8051_FW_START_ADDR 0xe800
#define MSCC_VSC8584_REVB_INT8051_FW_CRC 0xfb48
-#define MSCC_VSC8574_REVB_INT8051_FW "mscc_vsc8574_revb_int8051_29e8.bin"
+#define MSCC_VSC8574_REVB_INT8051_FW "microchip/mscc_vsc8574_revb_int8051_29e8.bin"
#define MSCC_VSC8574_REVB_INT8051_FW_START_ADDR 0x4000
#define MSCC_VSC8574_REVB_INT8051_FW_CRC 0x29e8
diff --git a/drivers/net/phy/phy-c45.c b/drivers/net/phy/phy-c45.c
index a1caeee..dd2e23f 100644
--- a/drivers/net/phy/phy-c45.c
+++ b/drivers/net/phy/phy-c45.c
@@ -167,7 +167,7 @@ EXPORT_SYMBOL_GPL(genphy_c45_restart_aneg);
*/
int genphy_c45_check_and_restart_aneg(struct phy_device *phydev, bool restart)
{
- int ret = 0;
+ int ret;
if (!restart) {
/* Configure and restart aneg if it wasn't set before */
@@ -180,9 +180,9 @@ int genphy_c45_check_and_restart_aneg(struct phy_device *phydev, bool restart)
}
if (restart)
- ret = genphy_c45_restart_aneg(phydev);
+ return genphy_c45_restart_aneg(phydev);
- return ret;
+ return 0;
}
EXPORT_SYMBOL_GPL(genphy_c45_check_and_restart_aneg);
diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c
index d76e038..355bfde 100644
--- a/drivers/net/phy/phy.c
+++ b/drivers/net/phy/phy.c
@@ -727,7 +727,8 @@ static irqreturn_t phy_interrupt(int irq, void *phy_dat)
phy_trigger_machine(phydev);
}
- if (phy_clear_interrupt(phydev))
+ /* did_interrupt() may have cleared the interrupt already */
+ if (!phydev->drv->did_interrupt && phy_clear_interrupt(phydev))
goto phy_err;
return IRQ_HANDLED;
diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index 6a5056e..28e3c5c 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -247,7 +247,7 @@ static bool mdio_bus_phy_may_suspend(struct phy_device *phydev)
* MDIO bus driver and clock gated at this point.
*/
if (!netdev)
- return !phydev->suspended;
+ goto out;
if (netdev->wol_enabled)
return false;
@@ -267,7 +267,8 @@ static bool mdio_bus_phy_may_suspend(struct phy_device *phydev)
if (device_may_wakeup(&netdev->dev))
return false;
- return true;
+out:
+ return !phydev->suspended;
}
static int mdio_bus_phy_suspend(struct device *dev)
@@ -285,6 +286,8 @@ static int mdio_bus_phy_suspend(struct device *dev)
if (!mdio_bus_phy_may_suspend(phydev))
return 0;
+ phydev->suspended_by_mdio_bus = 1;
+
return phy_suspend(phydev);
}
@@ -293,9 +296,11 @@ static int mdio_bus_phy_resume(struct device *dev)
struct phy_device *phydev = to_phy_device(dev);
int ret;
- if (!mdio_bus_phy_may_suspend(phydev))
+ if (!phydev->suspended_by_mdio_bus)
goto no_resume;
+ phydev->suspended_by_mdio_bus = 0;
+
ret = phy_resume(phydev);
if (ret < 0)
return ret;
@@ -1792,7 +1797,7 @@ EXPORT_SYMBOL(genphy_restart_aneg);
*/
int genphy_check_and_restart_aneg(struct phy_device *phydev, bool restart)
{
- int ret = 0;
+ int ret;
if (!restart) {
/* Advertisement hasn't changed, but maybe aneg was never on to
@@ -1807,9 +1812,9 @@ int genphy_check_and_restart_aneg(struct phy_device *phydev, bool restart)
}
if (restart)
- ret = genphy_restart_aneg(phydev);
+ return genphy_restart_aneg(phydev);
- return ret;
+ return 0;
}
EXPORT_SYMBOL(genphy_check_and_restart_aneg);
diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c
index 70b9a14..6e66b8e 100644
--- a/drivers/net/phy/phylink.c
+++ b/drivers/net/phy/phylink.c
@@ -761,8 +761,14 @@ static int phylink_bringup_phy(struct phylink *pl, struct phy_device *phy,
config.interface = interface;
ret = phylink_validate(pl, supported, &config);
- if (ret)
+ if (ret) {
+ phylink_warn(pl, "validation of %s with support %*pb and advertisement %*pb failed: %d\n",
+ phy_modes(config.interface),
+ __ETHTOOL_LINK_MODE_MASK_NBITS, phy->supported,
+ __ETHTOOL_LINK_MODE_MASK_NBITS, config.advertising,
+ ret);
return ret;
+ }
phy->phylink = pl;
phy->phy_link_change = phylink_phy_change;
diff --git a/drivers/net/phy/sfp-bus.c b/drivers/net/phy/sfp-bus.c
index d949ea7..6900c68 100644
--- a/drivers/net/phy/sfp-bus.c
+++ b/drivers/net/phy/sfp-bus.c
@@ -572,13 +572,15 @@ static void sfp_upstream_clear(struct sfp_bus *bus)
* the sfp_bus structure, incrementing its reference count. This must
* be put via sfp_bus_put() when done.
*
- * Returns: on success, a pointer to the sfp_bus structure,
- * %NULL if no SFP is specified,
- * on failure, an error pointer value:
- * corresponding to the errors detailed for
- * fwnode_property_get_reference_args().
- * %-ENOMEM if we failed to allocate the bus.
- * an error from the upstream's connect_phy() method.
+ * Returns:
+ * - on success, a pointer to the sfp_bus structure,
+ * - %NULL if no SFP is specified,
+ * - on failure, an error pointer value:
+ *
+ * - corresponding to the errors detailed for
+ * fwnode_property_get_reference_args().
+ * - %-ENOMEM if we failed to allocate the bus.
+ * - an error from the upstream's connect_phy() method.
*/
struct sfp_bus *sfp_bus_find_fwnode(struct fwnode_handle *fwnode)
{
@@ -612,13 +614,15 @@ EXPORT_SYMBOL_GPL(sfp_bus_find_fwnode);
* the SFP bus using sfp_register_upstream(). This takes a reference on the
* bus, so it is safe to put the bus after this call.
*
- * Returns: on success, a pointer to the sfp_bus structure,
- * %NULL if no SFP is specified,
- * on failure, an error pointer value:
- * corresponding to the errors detailed for
- * fwnode_property_get_reference_args().
- * %-ENOMEM if we failed to allocate the bus.
- * an error from the upstream's connect_phy() method.
+ * Returns:
+ * - on success, a pointer to the sfp_bus structure,
+ * - %NULL if no SFP is specified,
+ * - on failure, an error pointer value:
+ *
+ * - corresponding to the errors detailed for
+ * fwnode_property_get_reference_args().
+ * - %-ENOMEM if we failed to allocate the bus.
+ * - an error from the upstream's connect_phy() method.
*/
int sfp_bus_add_upstream(struct sfp_bus *bus, void *upstream,
const struct sfp_upstream_ops *ops)
diff --git a/drivers/net/slip/slhc.c b/drivers/net/slip/slhc.c
index 58a69f8..f78ceba 100644
--- a/drivers/net/slip/slhc.c
+++ b/drivers/net/slip/slhc.c
@@ -232,7 +232,7 @@ slhc_compress(struct slcompress *comp, unsigned char *icp, int isize,
struct cstate *cs = lcs->next;
unsigned long deltaS, deltaA;
short changes = 0;
- int hlen;
+ int nlen, hlen;
unsigned char new_seq[16];
unsigned char *cp = new_seq;
struct iphdr *ip;
@@ -248,6 +248,8 @@ slhc_compress(struct slcompress *comp, unsigned char *icp, int isize,
return isize;
ip = (struct iphdr *) icp;
+ if (ip->version != 4 || ip->ihl < 5)
+ return isize;
/* Bail if this packet isn't TCP, or is an IP fragment */
if (ip->protocol != IPPROTO_TCP || (ntohs(ip->frag_off) & 0x3fff)) {
@@ -258,10 +260,14 @@ slhc_compress(struct slcompress *comp, unsigned char *icp, int isize,
comp->sls_o_tcp++;
return isize;
}
- /* Extract TCP header */
+ nlen = ip->ihl * 4;
+ if (isize < nlen + sizeof(*th))
+ return isize;
- th = (struct tcphdr *)(((unsigned char *)ip) + ip->ihl*4);
- hlen = ip->ihl*4 + th->doff*4;
+ th = (struct tcphdr *)(icp + nlen);
+ if (th->doff < sizeof(struct tcphdr) / 4)
+ return isize;
+ hlen = nlen + th->doff * 4;
/* Bail if the TCP packet isn't `compressible' (i.e., ACK isn't set or
* some other control bit is set). Also uncompressible if
diff --git a/drivers/net/slip/slip.c b/drivers/net/slip/slip.c
index 6f4d7ba..babb018 100644
--- a/drivers/net/slip/slip.c
+++ b/drivers/net/slip/slip.c
@@ -863,7 +863,10 @@ static int slip_open(struct tty_struct *tty)
tty->disc_data = NULL;
clear_bit(SLF_INUSE, &sl->flags);
sl_free_netdev(sl->dev);
+ /* do not call free_netdev before rtnl_unlock */
+ rtnl_unlock();
free_netdev(sl->dev);
+ return err;
err_exit:
rtnl_unlock();
diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c
index ca70a1d..4004f98 100644
--- a/drivers/net/team/team.c
+++ b/drivers/net/team/team.c
@@ -2240,6 +2240,8 @@ team_nl_option_policy[TEAM_ATTR_OPTION_MAX + 1] = {
[TEAM_ATTR_OPTION_CHANGED] = { .type = NLA_FLAG },
[TEAM_ATTR_OPTION_TYPE] = { .type = NLA_U8 },
[TEAM_ATTR_OPTION_DATA] = { .type = NLA_BINARY },
+ [TEAM_ATTR_OPTION_PORT_IFINDEX] = { .type = NLA_U32 },
+ [TEAM_ATTR_OPTION_ARRAY_INDEX] = { .type = NLA_U32 },
};
static int team_nl_cmd_noop(struct sk_buff *skb, struct genl_info *info)
diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c
index 9485c8d..6c738a2 100644
--- a/drivers/net/usb/qmi_wwan.c
+++ b/drivers/net/usb/qmi_wwan.c
@@ -61,7 +61,6 @@ enum qmi_wwan_flags {
enum qmi_wwan_quirks {
QMI_WWAN_QUIRK_DTR = 1 << 0, /* needs "set DTR" request */
- QMI_WWAN_QUIRK_QUECTEL_DYNCFG = 1 << 1, /* check num. endpoints */
};
struct qmimux_hdr {
@@ -338,6 +337,9 @@ static void qmi_wwan_netdev_setup(struct net_device *net)
netdev_dbg(net, "mode: raw IP\n");
} else if (!net->header_ops) { /* don't bother if already set */
ether_setup(net);
+ /* Restoring min/max mtu values set originally by usbnet */
+ net->min_mtu = 0;
+ net->max_mtu = ETH_MAX_MTU;
clear_bit(EVENT_NO_IP_ALIGN, &dev->flags);
netdev_dbg(net, "mode: Ethernet\n");
}
@@ -916,16 +918,6 @@ static const struct driver_info qmi_wwan_info_quirk_dtr = {
.data = QMI_WWAN_QUIRK_DTR,
};
-static const struct driver_info qmi_wwan_info_quirk_quectel_dyncfg = {
- .description = "WWAN/QMI device",
- .flags = FLAG_WWAN | FLAG_SEND_ZLP,
- .bind = qmi_wwan_bind,
- .unbind = qmi_wwan_unbind,
- .manage_power = qmi_wwan_manage_power,
- .rx_fixup = qmi_wwan_rx_fixup,
- .data = QMI_WWAN_QUIRK_DTR | QMI_WWAN_QUIRK_QUECTEL_DYNCFG,
-};
-
#define HUAWEI_VENDOR_ID 0x12D1
/* map QMI/wwan function by a fixed interface number */
@@ -946,14 +938,18 @@ static const struct driver_info qmi_wwan_info_quirk_quectel_dyncfg = {
#define QMI_GOBI_DEVICE(vend, prod) \
QMI_FIXED_INTF(vend, prod, 0)
-/* Quectel does not use fixed interface numbers on at least some of their
- * devices. We need to check the number of endpoints to ensure that we bind to
- * the correct interface.
+/* Many devices have QMI and DIAG functions which are distinguishable
+ * from other vendor specific functions by class, subclass and
+ * protocol all being 0xff. The DIAG function has exactly 2 endpoints
+ * and is silently rejected when probed.
+ *
+ * This makes it possible to match dynamically numbered QMI functions
+ * as seen on e.g. many Quectel modems.
*/
-#define QMI_QUIRK_QUECTEL_DYNCFG(vend, prod) \
+#define QMI_MATCH_FF_FF_FF(vend, prod) \
USB_DEVICE_AND_INTERFACE_INFO(vend, prod, USB_CLASS_VENDOR_SPEC, \
USB_SUBCLASS_VENDOR_SPEC, 0xff), \
- .driver_info = (unsigned long)&qmi_wwan_info_quirk_quectel_dyncfg
+ .driver_info = (unsigned long)&qmi_wwan_info_quirk_dtr
static const struct usb_device_id products[] = {
/* 1. CDC ECM like devices match on the control interface */
@@ -1059,10 +1055,10 @@ static const struct usb_device_id products[] = {
USB_DEVICE_AND_INTERFACE_INFO(0x03f0, 0x581d, USB_CLASS_VENDOR_SPEC, 1, 7),
.driver_info = (unsigned long)&qmi_wwan_info,
},
- {QMI_QUIRK_QUECTEL_DYNCFG(0x2c7c, 0x0125)}, /* Quectel EC25, EC20 R2.0 Mini PCIe */
- {QMI_QUIRK_QUECTEL_DYNCFG(0x2c7c, 0x0306)}, /* Quectel EP06/EG06/EM06 */
- {QMI_QUIRK_QUECTEL_DYNCFG(0x2c7c, 0x0512)}, /* Quectel EG12/EM12 */
- {QMI_QUIRK_QUECTEL_DYNCFG(0x2c7c, 0x0800)}, /* Quectel RM500Q-GL */
+ {QMI_MATCH_FF_FF_FF(0x2c7c, 0x0125)}, /* Quectel EC25, EC20 R2.0 Mini PCIe */
+ {QMI_MATCH_FF_FF_FF(0x2c7c, 0x0306)}, /* Quectel EP06/EG06/EM06 */
+ {QMI_MATCH_FF_FF_FF(0x2c7c, 0x0512)}, /* Quectel EG12/EM12 */
+ {QMI_MATCH_FF_FF_FF(0x2c7c, 0x0800)}, /* Quectel RM500Q-GL */
/* 3. Combined interface devices matching on interface number */
{QMI_FIXED_INTF(0x0408, 0xea42, 4)}, /* Yota / Megafon M100-1 */
@@ -1214,6 +1210,7 @@ static const struct usb_device_id products[] = {
{QMI_FIXED_INTF(0x1435, 0xd182, 5)}, /* Wistron NeWeb D18 */
{QMI_FIXED_INTF(0x1435, 0xd191, 4)}, /* Wistron NeWeb D19Q1 */
{QMI_QUIRK_SET_DTR(0x1508, 0x1001, 4)}, /* Fibocom NL668 series */
+ {QMI_FIXED_INTF(0x1690, 0x7588, 4)}, /* ASKEY WWHC050 */
{QMI_FIXED_INTF(0x16d8, 0x6003, 0)}, /* CMOTech 6003 */
{QMI_FIXED_INTF(0x16d8, 0x6007, 0)}, /* CMOTech CHE-628S */
{QMI_FIXED_INTF(0x16d8, 0x6008, 0)}, /* CMOTech CMU-301 */
@@ -1363,6 +1360,7 @@ static const struct usb_device_id products[] = {
{QMI_FIXED_INTF(0x413c, 0x81b6, 8)}, /* Dell Wireless 5811e */
{QMI_FIXED_INTF(0x413c, 0x81b6, 10)}, /* Dell Wireless 5811e */
{QMI_FIXED_INTF(0x413c, 0x81d7, 0)}, /* Dell Wireless 5821e */
+ {QMI_FIXED_INTF(0x413c, 0x81d7, 1)}, /* Dell Wireless 5821e preproduction config */
{QMI_FIXED_INTF(0x413c, 0x81e0, 0)}, /* Dell Wireless 5821e with eSIM support*/
{QMI_FIXED_INTF(0x03f0, 0x4e1d, 8)}, /* HP lt4111 LTE/EV-DO/HSPA+ Gobi 4G Module */
{QMI_FIXED_INTF(0x03f0, 0x9d1d, 1)}, /* HP lt4120 Snapdragon X5 LTE */
@@ -1454,7 +1452,6 @@ static int qmi_wwan_probe(struct usb_interface *intf,
{
struct usb_device_id *id = (struct usb_device_id *)prod;
struct usb_interface_descriptor *desc = &intf->cur_altsetting->desc;
- const struct driver_info *info;
/* Workaround to enable dynamic IDs. This disables usbnet
* blacklisting functionality. Which, if required, can be
@@ -1490,12 +1487,8 @@ static int qmi_wwan_probe(struct usb_interface *intf,
* different. Ignore the current interface if the number of endpoints
* equals the number for the diag interface (two).
*/
- info = (void *)id->driver_info;
-
- if (info->data & QMI_WWAN_QUIRK_QUECTEL_DYNCFG) {
- if (desc->bNumEndpoints == 2)
- return -ENODEV;
- }
+ if (desc->bNumEndpoints == 2)
+ return -ENODEV;
return usbnet_probe(intf, id);
}
diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c
index 78ddbaf..95b19ce 100644
--- a/drivers/net/usb/r8152.c
+++ b/drivers/net/usb/r8152.c
@@ -3221,6 +3221,8 @@ static u16 r8153_phy_status(struct r8152 *tp, u16 desired)
}
msleep(20);
+ if (test_bit(RTL8152_UNPLUG, &tp->flags))
+ break;
}
return data;
@@ -5402,7 +5404,10 @@ static void r8153_init(struct r8152 *tp)
if (ocp_read_word(tp, MCU_TYPE_PLA, PLA_BOOT_CTRL) &
AUTOLOAD_DONE)
break;
+
msleep(20);
+ if (test_bit(RTL8152_UNPLUG, &tp->flags))
+ break;
}
data = r8153_phy_status(tp, 0);
@@ -5539,7 +5544,10 @@ static void r8153b_init(struct r8152 *tp)
if (ocp_read_word(tp, MCU_TYPE_PLA, PLA_BOOT_CTRL) &
AUTOLOAD_DONE)
break;
+
msleep(20);
+ if (test_bit(RTL8152_UNPLUG, &tp->flags))
+ break;
}
data = r8153_phy_status(tp, 0);
diff --git a/drivers/net/veth.c b/drivers/net/veth.c
index 8cdc441..d4cbb9e 100644
--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c
@@ -328,7 +328,7 @@ static void veth_get_stats64(struct net_device *dev,
rcu_read_lock();
peer = rcu_dereference(priv->peer);
if (peer) {
- tot->rx_dropped += veth_stats_tx(peer, &packets, &bytes);
+ veth_stats_tx(peer, &packets, &bytes);
tot->rx_bytes += bytes;
tot->rx_packets += packets;
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index d3b08b7..45308b3 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -2779,10 +2779,19 @@ static void vxlan_vs_add_dev(struct vxlan_sock *vs, struct vxlan_dev *vxlan,
/* Setup stats when device is created */
static int vxlan_init(struct net_device *dev)
{
+ struct vxlan_dev *vxlan = netdev_priv(dev);
+ int err;
+
dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
if (!dev->tstats)
return -ENOMEM;
+ err = gro_cells_init(&vxlan->gro_cells, dev);
+ if (err) {
+ free_percpu(dev->tstats);
+ return err;
+ }
+
return 0;
}
@@ -3043,8 +3052,6 @@ static void vxlan_setup(struct net_device *dev)
vxlan->dev = dev;
- gro_cells_init(&vxlan->gro_cells, dev);
-
for (h = 0; h < FDB_HASH_SIZE; ++h) {
spin_lock_init(&vxlan->hash_lock[h]);
INIT_HLIST_HEAD(&vxlan->fdb_head[h]);
diff --git a/drivers/net/wireguard/device.c b/drivers/net/wireguard/device.c
index 16b1982..3ac3f85 100644
--- a/drivers/net/wireguard/device.c
+++ b/drivers/net/wireguard/device.c
@@ -122,7 +122,7 @@ static netdev_tx_t wg_xmit(struct sk_buff *skb, struct net_device *dev)
u32 mtu;
int ret;
- if (unlikely(wg_skb_examine_untrusted_ip_hdr(skb) != skb->protocol)) {
+ if (unlikely(!wg_check_packet_protocol(skb))) {
ret = -EPROTONOSUPPORT;
net_dbg_ratelimited("%s: Invalid IP packet\n", dev->name);
goto err;
@@ -203,9 +203,9 @@ static netdev_tx_t wg_xmit(struct sk_buff *skb, struct net_device *dev)
err:
++dev->stats.tx_errors;
if (skb->protocol == htons(ETH_P_IP))
- icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0);
+ icmp_ndo_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0);
else if (skb->protocol == htons(ETH_P_IPV6))
- icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
+ icmpv6_ndo_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
kfree_skb(skb);
return ret;
}
@@ -258,6 +258,8 @@ static void wg_setup(struct net_device *dev)
enum { WG_NETDEV_FEATURES = NETIF_F_HW_CSUM | NETIF_F_RXCSUM |
NETIF_F_SG | NETIF_F_GSO |
NETIF_F_GSO_SOFTWARE | NETIF_F_HIGHDMA };
+ const int overhead = MESSAGE_MINIMUM_LENGTH + sizeof(struct udphdr) +
+ max(sizeof(struct ipv6hdr), sizeof(struct iphdr));
dev->netdev_ops = &netdev_ops;
dev->hard_header_len = 0;
@@ -271,9 +273,8 @@ static void wg_setup(struct net_device *dev)
dev->features |= WG_NETDEV_FEATURES;
dev->hw_features |= WG_NETDEV_FEATURES;
dev->hw_enc_features |= WG_NETDEV_FEATURES;
- dev->mtu = ETH_DATA_LEN - MESSAGE_MINIMUM_LENGTH -
- sizeof(struct udphdr) -
- max(sizeof(struct ipv6hdr), sizeof(struct iphdr));
+ dev->mtu = ETH_DATA_LEN - overhead;
+ dev->max_mtu = round_down(INT_MAX, MESSAGE_PADDING_MULTIPLE) - overhead;
SET_NETDEV_DEVTYPE(dev, &device_type);
diff --git a/drivers/net/wireguard/netlink.c b/drivers/net/wireguard/netlink.c
index bda2640..802099c 100644
--- a/drivers/net/wireguard/netlink.c
+++ b/drivers/net/wireguard/netlink.c
@@ -411,11 +411,7 @@ static int set_peer(struct wg_device *wg, struct nlattr **attrs)
peer = wg_peer_create(wg, public_key, preshared_key);
if (IS_ERR(peer)) {
- /* Similar to the above, if the key is invalid, we skip
- * it without fanfare, so that services don't need to
- * worry about doing key validation themselves.
- */
- ret = PTR_ERR(peer) == -EKEYREJECTED ? 0 : PTR_ERR(peer);
+ ret = PTR_ERR(peer);
peer = NULL;
goto out;
}
@@ -569,7 +565,7 @@ static int wg_set_device(struct sk_buff *skb, struct genl_info *info)
private_key);
list_for_each_entry_safe(peer, temp, &wg->peer_list,
peer_list) {
- BUG_ON(!wg_noise_precompute_static_static(peer));
+ wg_noise_precompute_static_static(peer);
wg_noise_expire_current_peer_keypairs(peer);
}
wg_cookie_checker_precompute_device_keys(&wg->cookie_checker);
diff --git a/drivers/net/wireguard/noise.c b/drivers/net/wireguard/noise.c
index 919d9d8..708dc61 100644
--- a/drivers/net/wireguard/noise.c
+++ b/drivers/net/wireguard/noise.c
@@ -44,32 +44,23 @@ void __init wg_noise_init(void)
}
/* Must hold peer->handshake.static_identity->lock */
-bool wg_noise_precompute_static_static(struct wg_peer *peer)
+void wg_noise_precompute_static_static(struct wg_peer *peer)
{
- bool ret;
-
down_write(&peer->handshake.lock);
- if (peer->handshake.static_identity->has_identity) {
- ret = curve25519(
- peer->handshake.precomputed_static_static,
+ if (!peer->handshake.static_identity->has_identity ||
+ !curve25519(peer->handshake.precomputed_static_static,
peer->handshake.static_identity->static_private,
- peer->handshake.remote_static);
- } else {
- u8 empty[NOISE_PUBLIC_KEY_LEN] = { 0 };
-
- ret = curve25519(empty, empty, peer->handshake.remote_static);
+ peer->handshake.remote_static))
memset(peer->handshake.precomputed_static_static, 0,
NOISE_PUBLIC_KEY_LEN);
- }
up_write(&peer->handshake.lock);
- return ret;
}
-bool wg_noise_handshake_init(struct noise_handshake *handshake,
- struct noise_static_identity *static_identity,
- const u8 peer_public_key[NOISE_PUBLIC_KEY_LEN],
- const u8 peer_preshared_key[NOISE_SYMMETRIC_KEY_LEN],
- struct wg_peer *peer)
+void wg_noise_handshake_init(struct noise_handshake *handshake,
+ struct noise_static_identity *static_identity,
+ const u8 peer_public_key[NOISE_PUBLIC_KEY_LEN],
+ const u8 peer_preshared_key[NOISE_SYMMETRIC_KEY_LEN],
+ struct wg_peer *peer)
{
memset(handshake, 0, sizeof(*handshake));
init_rwsem(&handshake->lock);
@@ -81,7 +72,7 @@ bool wg_noise_handshake_init(struct noise_handshake *handshake,
NOISE_SYMMETRIC_KEY_LEN);
handshake->static_identity = static_identity;
handshake->state = HANDSHAKE_ZEROED;
- return wg_noise_precompute_static_static(peer);
+ wg_noise_precompute_static_static(peer);
}
static void handshake_zero(struct noise_handshake *handshake)
@@ -403,6 +394,19 @@ static bool __must_check mix_dh(u8 chaining_key[NOISE_HASH_LEN],
return true;
}
+static bool __must_check mix_precomputed_dh(u8 chaining_key[NOISE_HASH_LEN],
+ u8 key[NOISE_SYMMETRIC_KEY_LEN],
+ const u8 precomputed[NOISE_PUBLIC_KEY_LEN])
+{
+ static u8 zero_point[NOISE_PUBLIC_KEY_LEN];
+ if (unlikely(!crypto_memneq(precomputed, zero_point, NOISE_PUBLIC_KEY_LEN)))
+ return false;
+ kdf(chaining_key, key, NULL, precomputed, NOISE_HASH_LEN,
+ NOISE_SYMMETRIC_KEY_LEN, 0, NOISE_PUBLIC_KEY_LEN,
+ chaining_key);
+ return true;
+}
+
static void mix_hash(u8 hash[NOISE_HASH_LEN], const u8 *src, size_t src_len)
{
struct blake2s_state blake;
@@ -531,10 +535,9 @@ wg_noise_handshake_create_initiation(struct message_handshake_initiation *dst,
NOISE_PUBLIC_KEY_LEN, key, handshake->hash);
/* ss */
- kdf(handshake->chaining_key, key, NULL,
- handshake->precomputed_static_static, NOISE_HASH_LEN,
- NOISE_SYMMETRIC_KEY_LEN, 0, NOISE_PUBLIC_KEY_LEN,
- handshake->chaining_key);
+ if (!mix_precomputed_dh(handshake->chaining_key, key,
+ handshake->precomputed_static_static))
+ goto out;
/* {t} */
tai64n_now(timestamp);
@@ -595,9 +598,9 @@ wg_noise_handshake_consume_initiation(struct message_handshake_initiation *src,
handshake = &peer->handshake;
/* ss */
- kdf(chaining_key, key, NULL, handshake->precomputed_static_static,
- NOISE_HASH_LEN, NOISE_SYMMETRIC_KEY_LEN, 0, NOISE_PUBLIC_KEY_LEN,
- chaining_key);
+ if (!mix_precomputed_dh(chaining_key, key,
+ handshake->precomputed_static_static))
+ goto out;
/* {t} */
if (!message_decrypt(t, src->encrypted_timestamp,
diff --git a/drivers/net/wireguard/noise.h b/drivers/net/wireguard/noise.h
index 138a07b..f532d59 100644
--- a/drivers/net/wireguard/noise.h
+++ b/drivers/net/wireguard/noise.h
@@ -94,11 +94,11 @@ struct noise_handshake {
struct wg_device;
void wg_noise_init(void);
-bool wg_noise_handshake_init(struct noise_handshake *handshake,
- struct noise_static_identity *static_identity,
- const u8 peer_public_key[NOISE_PUBLIC_KEY_LEN],
- const u8 peer_preshared_key[NOISE_SYMMETRIC_KEY_LEN],
- struct wg_peer *peer);
+void wg_noise_handshake_init(struct noise_handshake *handshake,
+ struct noise_static_identity *static_identity,
+ const u8 peer_public_key[NOISE_PUBLIC_KEY_LEN],
+ const u8 peer_preshared_key[NOISE_SYMMETRIC_KEY_LEN],
+ struct wg_peer *peer);
void wg_noise_handshake_clear(struct noise_handshake *handshake);
static inline void wg_noise_reset_last_sent_handshake(atomic64_t *handshake_ns)
{
@@ -116,7 +116,7 @@ void wg_noise_expire_current_peer_keypairs(struct wg_peer *peer);
void wg_noise_set_static_identity_private_key(
struct noise_static_identity *static_identity,
const u8 private_key[NOISE_PUBLIC_KEY_LEN]);
-bool wg_noise_precompute_static_static(struct wg_peer *peer);
+void wg_noise_precompute_static_static(struct wg_peer *peer);
bool
wg_noise_handshake_create_initiation(struct message_handshake_initiation *dst,
diff --git a/drivers/net/wireguard/peer.c b/drivers/net/wireguard/peer.c
index 071eedf..1d634bd 100644
--- a/drivers/net/wireguard/peer.c
+++ b/drivers/net/wireguard/peer.c
@@ -34,11 +34,8 @@ struct wg_peer *wg_peer_create(struct wg_device *wg,
return ERR_PTR(ret);
peer->device = wg;
- if (!wg_noise_handshake_init(&peer->handshake, &wg->static_identity,
- public_key, preshared_key, peer)) {
- ret = -EKEYREJECTED;
- goto err_1;
- }
+ wg_noise_handshake_init(&peer->handshake, &wg->static_identity,
+ public_key, preshared_key, peer);
if (dst_cache_init(&peer->endpoint_cache, GFP_KERNEL))
goto err_1;
if (wg_packet_queue_init(&peer->tx_queue, wg_packet_tx_worker, false,
diff --git a/drivers/net/wireguard/queueing.h b/drivers/net/wireguard/queueing.h
index fecb559..3432232 100644
--- a/drivers/net/wireguard/queueing.h
+++ b/drivers/net/wireguard/queueing.h
@@ -66,7 +66,7 @@ struct packet_cb {
#define PACKET_PEER(skb) (PACKET_CB(skb)->keypair->entry.peer)
/* Returns either the correct skb->protocol value, or 0 if invalid. */
-static inline __be16 wg_skb_examine_untrusted_ip_hdr(struct sk_buff *skb)
+static inline __be16 wg_examine_packet_protocol(struct sk_buff *skb)
{
if (skb_network_header(skb) >= skb->head &&
(skb_network_header(skb) + sizeof(struct iphdr)) <=
@@ -81,6 +81,12 @@ static inline __be16 wg_skb_examine_untrusted_ip_hdr(struct sk_buff *skb)
return 0;
}
+static inline bool wg_check_packet_protocol(struct sk_buff *skb)
+{
+ __be16 real_protocol = wg_examine_packet_protocol(skb);
+ return real_protocol && skb->protocol == real_protocol;
+}
+
static inline void wg_reset_packet(struct sk_buff *skb)
{
skb_scrub_packet(skb, true);
@@ -94,8 +100,8 @@ static inline void wg_reset_packet(struct sk_buff *skb)
skb->dev = NULL;
#ifdef CONFIG_NET_SCHED
skb->tc_index = 0;
- skb_reset_tc(skb);
#endif
+ skb_reset_redirect(skb);
skb->hdr_len = skb_headroom(skb);
skb_reset_mac_header(skb);
skb_reset_network_header(skb);
diff --git a/drivers/net/wireguard/receive.c b/drivers/net/wireguard/receive.c
index 9c6bab9..da3b782 100644
--- a/drivers/net/wireguard/receive.c
+++ b/drivers/net/wireguard/receive.c
@@ -56,7 +56,7 @@ static int prepare_skb_header(struct sk_buff *skb, struct wg_device *wg)
size_t data_offset, data_len, header_len;
struct udphdr *udp;
- if (unlikely(wg_skb_examine_untrusted_ip_hdr(skb) != skb->protocol ||
+ if (unlikely(!wg_check_packet_protocol(skb) ||
skb_transport_header(skb) < skb->head ||
(skb_transport_header(skb) + sizeof(struct udphdr)) >
skb_tail_pointer(skb)))
@@ -118,10 +118,13 @@ static void wg_receive_handshake_packet(struct wg_device *wg,
under_load = skb_queue_len(&wg->incoming_handshakes) >=
MAX_QUEUED_INCOMING_HANDSHAKES / 8;
- if (under_load)
+ if (under_load) {
last_under_load = ktime_get_coarse_boottime_ns();
- else if (last_under_load)
+ } else if (last_under_load) {
under_load = !wg_birthdate_has_expired(last_under_load, 1);
+ if (!under_load)
+ last_under_load = 0;
+ }
mac_state = wg_cookie_validate_packet(&wg->cookie_checker, skb,
under_load);
if ((under_load && mac_state == VALID_MAC_WITH_COOKIE) ||
@@ -385,7 +388,7 @@ static void wg_packet_consume_data_done(struct wg_peer *peer,
*/
skb->ip_summed = CHECKSUM_UNNECESSARY;
skb->csum_level = ~0; /* All levels */
- skb->protocol = wg_skb_examine_untrusted_ip_hdr(skb);
+ skb->protocol = wg_examine_packet_protocol(skb);
if (skb->protocol == htons(ETH_P_IP)) {
len = ntohs(ip_hdr(skb)->tot_len);
if (unlikely(len < sizeof(struct iphdr)))
@@ -584,8 +587,7 @@ void wg_packet_receive(struct wg_device *wg, struct sk_buff *skb)
wg_packet_consume_data(wg, skb);
break;
default:
- net_dbg_skb_ratelimited("%s: Invalid packet from %pISpfsc\n",
- wg->dev->name, skb);
+ WARN(1, "Non-exhaustive parsing of packet header lead to unknown packet type!\n");
goto err;
}
return;
diff --git a/drivers/net/wireguard/send.c b/drivers/net/wireguard/send.c
index c132605..7348c10 100644
--- a/drivers/net/wireguard/send.c
+++ b/drivers/net/wireguard/send.c
@@ -143,16 +143,22 @@ static void keep_key_fresh(struct wg_peer *peer)
static unsigned int calculate_skb_padding(struct sk_buff *skb)
{
+ unsigned int padded_size, last_unit = skb->len;
+
+ if (unlikely(!PACKET_CB(skb)->mtu))
+ return ALIGN(last_unit, MESSAGE_PADDING_MULTIPLE) - last_unit;
+
/* We do this modulo business with the MTU, just in case the networking
* layer gives us a packet that's bigger than the MTU. In that case, we
* wouldn't want the final subtraction to overflow in the case of the
- * padded_size being clamped.
+ * padded_size being clamped. Fortunately, that's very rarely the case,
+ * so we optimize for that not happening.
*/
- unsigned int last_unit = skb->len % PACKET_CB(skb)->mtu;
- unsigned int padded_size = ALIGN(last_unit, MESSAGE_PADDING_MULTIPLE);
+ if (unlikely(last_unit > PACKET_CB(skb)->mtu))
+ last_unit %= PACKET_CB(skb)->mtu;
- if (padded_size > PACKET_CB(skb)->mtu)
- padded_size = PACKET_CB(skb)->mtu;
+ padded_size = min(PACKET_CB(skb)->mtu,
+ ALIGN(last_unit, MESSAGE_PADDING_MULTIPLE));
return padded_size - last_unit;
}
diff --git a/drivers/net/wireguard/socket.c b/drivers/net/wireguard/socket.c
index 262f3b5..b0d6541 100644
--- a/drivers/net/wireguard/socket.c
+++ b/drivers/net/wireguard/socket.c
@@ -432,7 +432,6 @@ void wg_socket_reinit(struct wg_device *wg, struct sock *new4,
wg->incoming_port = ntohs(inet_sk(new4)->inet_sport);
mutex_unlock(&wg->socket_update_lock);
synchronize_rcu();
- synchronize_net();
sock_free(old4);
sock_free(old6);
}
diff --git a/drivers/net/wireless/intel/iwlwifi/cfg/22000.c b/drivers/net/wireless/intel/iwlwifi/cfg/22000.c
index a22a830..355af47 100644
--- a/drivers/net/wireless/intel/iwlwifi/cfg/22000.c
+++ b/drivers/net/wireless/intel/iwlwifi/cfg/22000.c
@@ -283,6 +283,7 @@ const struct iwl_cfg iwl_ax101_cfg_qu_c0_hr_b0 = {
* HT size; mac80211 would otherwise pick the HE max (256) by default.
*/
.max_tx_agg_size = IEEE80211_MAX_AMPDU_BUF_HT,
+ .tx_with_siso_diversity = true,
.num_rbds = IWL_NUM_RBDS_22000_HE,
};
@@ -309,6 +310,7 @@ const struct iwl_cfg iwl_ax101_cfg_quz_hr = {
* HT size; mac80211 would otherwise pick the HE max (256) by default.
*/
.max_tx_agg_size = IEEE80211_MAX_AMPDU_BUF_HT,
+ .tx_with_siso_diversity = true,
.num_rbds = IWL_NUM_RBDS_22000_HE,
};
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/acpi.c b/drivers/net/wireless/intel/iwlwifi/fw/acpi.c
index 48d375a..ba2aff3 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/acpi.c
+++ b/drivers/net/wireless/intel/iwlwifi/fw/acpi.c
@@ -6,7 +6,7 @@
* GPL LICENSE SUMMARY
*
* Copyright(c) 2017 Intel Deutschland GmbH
- * Copyright (C) 2019 Intel Corporation
+ * Copyright (C) 2019 - 2020 Intel Corporation
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
@@ -27,7 +27,7 @@
* BSD LICENSE
*
* Copyright(c) 2017 Intel Deutschland GmbH
- * Copyright (C) 2019 Intel Corporation
+ * Copyright (C) 2019 - 2020 Intel Corporation
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -491,13 +491,13 @@ int iwl_validate_sar_geo_profile(struct iwl_fw_runtime *fwrt,
}
IWL_EXPORT_SYMBOL(iwl_validate_sar_geo_profile);
-void iwl_sar_geo_init(struct iwl_fw_runtime *fwrt,
- struct iwl_per_chain_offset_group *table)
+int iwl_sar_geo_init(struct iwl_fw_runtime *fwrt,
+ struct iwl_per_chain_offset_group *table)
{
int ret, i, j;
if (!iwl_sar_geo_support(fwrt))
- return;
+ return -EOPNOTSUPP;
ret = iwl_sar_get_wgds_table(fwrt);
if (ret < 0) {
@@ -505,7 +505,7 @@ void iwl_sar_geo_init(struct iwl_fw_runtime *fwrt,
"Geo SAR BIOS table invalid or unavailable. (%d)\n",
ret);
/* we don't fail if the table is not available */
- return;
+ return -ENOENT;
}
BUILD_BUG_ON(ACPI_NUM_GEO_PROFILES * ACPI_WGDS_NUM_BANDS *
@@ -530,5 +530,7 @@ void iwl_sar_geo_init(struct iwl_fw_runtime *fwrt,
i, j, value[1], value[2], value[0]);
}
}
+
+ return 0;
}
IWL_EXPORT_SYMBOL(iwl_sar_geo_init);
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/acpi.h b/drivers/net/wireless/intel/iwlwifi/fw/acpi.h
index 4a6e826..5590e5c 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/acpi.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/acpi.h
@@ -6,7 +6,7 @@
* GPL LICENSE SUMMARY
*
* Copyright(c) 2017 Intel Deutschland GmbH
- * Copyright(c) 2018 - 2019 Intel Corporation
+ * Copyright(c) 2018 - 2020 Intel Corporation
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
@@ -27,7 +27,7 @@
* BSD LICENSE
*
* Copyright(c) 2017 Intel Deutschland GmbH
- * Copyright(c) 2018 - 2019 Intel Corporation
+ * Copyright(c) 2018 - 2020 Intel Corporation
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -171,8 +171,9 @@ bool iwl_sar_geo_support(struct iwl_fw_runtime *fwrt);
int iwl_validate_sar_geo_profile(struct iwl_fw_runtime *fwrt,
struct iwl_host_cmd *cmd);
-void iwl_sar_geo_init(struct iwl_fw_runtime *fwrt,
- struct iwl_per_chain_offset_group *table);
+int iwl_sar_geo_init(struct iwl_fw_runtime *fwrt,
+ struct iwl_per_chain_offset_group *table);
+
#else /* CONFIG_ACPI */
static inline void *iwl_acpi_get_object(struct device *dev, acpi_string method)
@@ -243,9 +244,10 @@ static inline int iwl_validate_sar_geo_profile(struct iwl_fw_runtime *fwrt,
return -ENOENT;
}
-static inline void iwl_sar_geo_init(struct iwl_fw_runtime *fwrt,
- struct iwl_per_chain_offset_group *table)
+static inline int iwl_sar_geo_init(struct iwl_fw_runtime *fwrt,
+ struct iwl_per_chain_offset_group *table)
{
+ return -ENOENT;
}
#endif /* CONFIG_ACPI */
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c
index 91df1ee..8796ab8 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c
+++ b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c
@@ -8,7 +8,7 @@
* Copyright(c) 2008 - 2014 Intel Corporation. All rights reserved.
* Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
* Copyright(c) 2015 - 2017 Intel Deutschland GmbH
- * Copyright(c) 2018 - 2019 Intel Corporation
+ * Copyright(c) 2018 - 2020 Intel Corporation
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
@@ -31,7 +31,7 @@
* Copyright(c) 2005 - 2014 Intel Corporation. All rights reserved.
* Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
* Copyright(c) 2015 - 2017 Intel Deutschland GmbH
- * Copyright(c) 2018 - 2019 Intel Corporation
+ * Copyright(c) 2018 - 2020 Intel Corporation
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -1409,11 +1409,7 @@ static int iwl_dump_ini_rxf_iter(struct iwl_fw_runtime *fwrt,
goto out;
}
- /*
- * region register have absolute value so apply rxf offset after
- * reading the registers
- */
- offs += rxf_data.offset;
+ offs = rxf_data.offset;
/* Lock fence */
iwl_write_prph_no_grab(fwrt->trans, RXF_SET_FENCE_MODE + offs, 0x1);
@@ -2494,10 +2490,7 @@ static void iwl_fw_dbg_collect_sync(struct iwl_fw_runtime *fwrt, u8 wk_idx)
goto out;
}
- if (iwl_fw_dbg_stop_restart_recording(fwrt, ¶ms, true)) {
- IWL_ERR(fwrt, "Failed to stop DBGC recording, aborting dump\n");
- goto out;
- }
+ iwl_fw_dbg_stop_restart_recording(fwrt, ¶ms, true);
IWL_DEBUG_FW_INFO(fwrt, "WRT: Data collection start\n");
if (iwl_trans_dbg_ini_valid(fwrt->trans))
@@ -2662,14 +2655,14 @@ static int iwl_fw_dbg_restart_recording(struct iwl_trans *trans,
return 0;
}
-int iwl_fw_dbg_stop_restart_recording(struct iwl_fw_runtime *fwrt,
- struct iwl_fw_dbg_params *params,
- bool stop)
+void iwl_fw_dbg_stop_restart_recording(struct iwl_fw_runtime *fwrt,
+ struct iwl_fw_dbg_params *params,
+ bool stop)
{
int ret = 0;
if (test_bit(STATUS_FW_ERROR, &fwrt->trans->status))
- return 0;
+ return;
if (fw_has_capa(&fwrt->fw->ucode_capa,
IWL_UCODE_TLV_CAPA_DBG_SUSPEND_RESUME_CMD_SUPP))
@@ -2686,7 +2679,5 @@ int iwl_fw_dbg_stop_restart_recording(struct iwl_fw_runtime *fwrt,
iwl_fw_set_dbg_rec_on(fwrt);
}
#endif
-
- return ret;
}
IWL_EXPORT_SYMBOL(iwl_fw_dbg_stop_restart_recording);
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/dbg.h b/drivers/net/wireless/intel/iwlwifi/fw/dbg.h
index 179f290..9d35132 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/dbg.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/dbg.h
@@ -239,9 +239,9 @@ _iwl_fw_dbg_trigger_simple_stop(struct iwl_fw_runtime *fwrt,
_iwl_fw_dbg_trigger_simple_stop((fwrt), (wdev), \
iwl_fw_dbg_get_trigger((fwrt)->fw,\
(trig)))
-int iwl_fw_dbg_stop_restart_recording(struct iwl_fw_runtime *fwrt,
- struct iwl_fw_dbg_params *params,
- bool stop);
+void iwl_fw_dbg_stop_restart_recording(struct iwl_fw_runtime *fwrt,
+ struct iwl_fw_dbg_params *params,
+ bool stop);
#ifdef CONFIG_IWLWIFI_DEBUGFS
static inline void iwl_fw_set_dbg_rec_on(struct iwl_fw_runtime *fwrt)
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-drv.c b/drivers/net/wireless/intel/iwlwifi/iwl-drv.c
index 2d1cb46..0481796 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-drv.c
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-drv.c
@@ -1467,7 +1467,7 @@ static void iwl_req_fw_callback(const struct firmware *ucode_raw, void *context)
kmemdup(pieces->dbg_conf_tlv[i],
pieces->dbg_conf_tlv_len[i],
GFP_KERNEL);
- if (!pieces->dbg_conf_tlv_len[i])
+ if (!pieces->dbg_conf_tlv[i])
goto out_free_fw;
}
}
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
index 54c094e..98263cd 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
@@ -762,10 +762,17 @@ static int iwl_mvm_sar_geo_init(struct iwl_mvm *mvm)
u16 cmd_wide_id = WIDE_ID(PHY_OPS_GROUP, GEO_TX_POWER_LIMIT);
union geo_tx_power_profiles_cmd cmd;
u16 len;
+ int ret;
cmd.geo_cmd.ops = cpu_to_le32(IWL_PER_CHAIN_OFFSET_SET_TABLES);
- iwl_sar_geo_init(&mvm->fwrt, cmd.geo_cmd.table);
+ ret = iwl_sar_geo_init(&mvm->fwrt, cmd.geo_cmd.table);
+ /*
+ * It is a valid scenario to not support SAR, or miss wgds table,
+ * but in that case there is no need to send the command.
+ */
+ if (ret)
+ return 0;
cmd.geo_cmd.table_revision = cpu_to_le32(mvm->fwrt.geo_rev);
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c b/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c
index 70b29bf..60296a7 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c
@@ -308,7 +308,8 @@ iwl_parse_nvm_sections(struct iwl_mvm *mvm)
}
/* PHY_SKU section is mandatory in B0 */
- if (!mvm->nvm_sections[NVM_SECTION_TYPE_PHY_SKU].data) {
+ if (mvm->trans->cfg->nvm_type == IWL_NVM_EXT &&
+ !mvm->nvm_sections[NVM_SECTION_TYPE_PHY_SKU].data) {
IWL_ERR(mvm,
"Can't parse phy_sku in B0, empty sections\n");
return NULL;
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rs-fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/rs-fw.c
index e2cf9e0..ca99a9c 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/rs-fw.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/rs-fw.c
@@ -6,7 +6,7 @@
* GPL LICENSE SUMMARY
*
* Copyright(c) 2017 Intel Deutschland GmbH
- * Copyright(c) 2018 - 2019 Intel Corporation
+ * Copyright(c) 2018 - 2020 Intel Corporation
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
@@ -27,7 +27,7 @@
* BSD LICENSE
*
* Copyright(c) 2017 Intel Deutschland GmbH
- * Copyright(c) 2018 - 2019 Intel Corporation
+ * Copyright(c) 2018 - 2020 Intel Corporation
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -147,7 +147,11 @@ static u16 rs_fw_get_config_flags(struct iwl_mvm *mvm,
(vht_ena && (vht_cap->cap & IEEE80211_VHT_CAP_RXLDPC))))
flags |= IWL_TLC_MNG_CFG_FLAGS_LDPC_MSK;
- /* consider our LDPC support in case of HE */
+ /* consider LDPC support in case of HE */
+ if (he_cap->has_he && (he_cap->he_cap_elem.phy_cap_info[1] &
+ IEEE80211_HE_PHY_CAP1_LDPC_CODING_IN_PAYLOAD))
+ flags |= IWL_TLC_MNG_CFG_FLAGS_LDPC_MSK;
+
if (sband->iftype_data && sband->iftype_data->he_cap.has_he &&
!(sband->iftype_data->he_cap.he_cap_elem.phy_cap_info[1] &
IEEE80211_HE_PHY_CAP1_LDPC_CODING_IN_PAYLOAD))
@@ -191,11 +195,13 @@ rs_fw_vht_set_enabled_rates(const struct ieee80211_sta *sta,
{
u16 supp;
int i, highest_mcs;
+ u8 nss = sta->rx_nss;
- for (i = 0; i < sta->rx_nss; i++) {
- if (i == IWL_TLC_NSS_MAX)
- break;
+ /* the station support only a single receive chain */
+ if (sta->smps_mode == IEEE80211_SMPS_STATIC)
+ nss = 1;
+ for (i = 0; i < nss && i < IWL_TLC_NSS_MAX; i++) {
highest_mcs = rs_fw_vht_highest_rx_mcs_index(vht_cap, i + 1);
if (!highest_mcs)
continue;
@@ -241,8 +247,13 @@ rs_fw_he_set_enabled_rates(const struct ieee80211_sta *sta,
u16 tx_mcs_160 =
le16_to_cpu(sband->iftype_data->he_cap.he_mcs_nss_supp.tx_mcs_160);
int i;
+ u8 nss = sta->rx_nss;
- for (i = 0; i < sta->rx_nss && i < IWL_TLC_NSS_MAX; i++) {
+ /* the station support only a single receive chain */
+ if (sta->smps_mode == IEEE80211_SMPS_STATIC)
+ nss = 1;
+
+ for (i = 0; i < nss && i < IWL_TLC_NSS_MAX; i++) {
u16 _mcs_160 = (mcs_160 >> (2 * i)) & 0x3;
u16 _mcs_80 = (mcs_80 >> (2 * i)) & 0x3;
u16 _tx_mcs_160 = (tx_mcs_160 >> (2 * i)) & 0x3;
@@ -303,8 +314,14 @@ static void rs_fw_set_supp_rates(struct ieee80211_sta *sta,
cmd->mode = IWL_TLC_MNG_MODE_HT;
cmd->ht_rates[IWL_TLC_NSS_1][IWL_TLC_HT_BW_NONE_160] =
cpu_to_le16(ht_cap->mcs.rx_mask[0]);
- cmd->ht_rates[IWL_TLC_NSS_2][IWL_TLC_HT_BW_NONE_160] =
- cpu_to_le16(ht_cap->mcs.rx_mask[1]);
+
+ /* the station support only a single receive chain */
+ if (sta->smps_mode == IEEE80211_SMPS_STATIC)
+ cmd->ht_rates[IWL_TLC_NSS_2][IWL_TLC_HT_BW_NONE_160] =
+ 0;
+ else
+ cmd->ht_rates[IWL_TLC_NSS_2][IWL_TLC_HT_BW_NONE_160] =
+ cpu_to_le16(ht_cap->mcs.rx_mask[1]);
}
}
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c b/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c
index c0b420f..1babc4b 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c
@@ -785,7 +785,9 @@ void iwl_mvm_rx_session_protect_notif(struct iwl_mvm *mvm,
if (!le32_to_cpu(notif->status)) {
iwl_mvm_te_check_disconnect(mvm, vif,
"Session protection failure");
+ spin_lock_bh(&mvm->time_event_lock);
iwl_mvm_te_clear_data(mvm, te_data);
+ spin_unlock_bh(&mvm->time_event_lock);
}
if (le32_to_cpu(notif->start)) {
@@ -801,7 +803,9 @@ void iwl_mvm_rx_session_protect_notif(struct iwl_mvm *mvm,
*/
iwl_mvm_te_check_disconnect(mvm, vif,
"No beacon heard and the session protection is over already...");
+ spin_lock_bh(&mvm->time_event_lock);
iwl_mvm_te_clear_data(mvm, te_data);
+ spin_unlock_bh(&mvm->time_event_lock);
}
goto out_unlock;
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
index 97f227f..f441b20 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
@@ -981,6 +981,9 @@ static const struct iwl_dev_info iwl_dev_info_table[] = {
IWL_DEV_INFO(0x2526, 0x0014, iwl9260_2ac_160_cfg, iwl9260_160_name),
IWL_DEV_INFO(0x2526, 0x0018, iwl9260_2ac_160_cfg, iwl9260_160_name),
IWL_DEV_INFO(0x2526, 0x001C, iwl9260_2ac_160_cfg, iwl9260_160_name),
+ IWL_DEV_INFO(0x2526, 0x4010, iwl9260_2ac_160_cfg, iwl9260_160_name),
+ IWL_DEV_INFO(0x2526, 0x4018, iwl9260_2ac_160_cfg, iwl9260_160_name),
+ IWL_DEV_INFO(0x2526, 0x401C, iwl9260_2ac_160_cfg, iwl9260_160_name),
IWL_DEV_INFO(0x2526, 0x6010, iwl9260_2ac_160_cfg, iwl9260_160_name),
IWL_DEV_INFO(0x2526, 0x6014, iwl9260_2ac_160_cfg, iwl9260_160_name),
IWL_DEV_INFO(0x2526, 0x8014, iwl9260_2ac_160_cfg, iwl9260_160_name),
diff --git a/drivers/net/wireless/mediatek/mt76/dma.c b/drivers/net/wireless/mediatek/mt76/dma.c
index 6173c80..1847f55 100644
--- a/drivers/net/wireless/mediatek/mt76/dma.c
+++ b/drivers/net/wireless/mediatek/mt76/dma.c
@@ -447,10 +447,13 @@ mt76_add_fragment(struct mt76_dev *dev, struct mt76_queue *q, void *data,
struct page *page = virt_to_head_page(data);
int offset = data - page_address(page);
struct sk_buff *skb = q->rx_head;
+ struct skb_shared_info *shinfo = skb_shinfo(skb);
- offset += q->buf_offset;
- skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, offset, len,
- q->buf_size);
+ if (shinfo->nr_frags < ARRAY_SIZE(shinfo->frags)) {
+ offset += q->buf_offset;
+ skb_add_rx_frag(skb, shinfo->nr_frags, page, offset, len,
+ q->buf_size);
+ }
if (more)
return;
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/trx.h b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/trx.h
index 9177298..e17f70b 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/trx.h
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/trx.h
@@ -561,6 +561,7 @@ static inline void clear_pci_tx_desc_content(__le32 *__pdesc, int _size)
rxmcs == DESC92C_RATE11M)
struct phy_status_rpt {
+ u8 padding[2];
u8 ch_corr[2];
u8 cck_sig_qual_ofdm_pwdb_all;
u8 cck_agc_rpt_ofdm_cfosho_a;
diff --git a/drivers/net/wireless/ti/wlcore/main.c b/drivers/net/wireless/ti/wlcore/main.c
index ed049c9..f140f7d 100644
--- a/drivers/net/wireless/ti/wlcore/main.c
+++ b/drivers/net/wireless/ti/wlcore/main.c
@@ -6274,7 +6274,7 @@ static int wl1271_init_ieee80211(struct wl1271 *wl)
wl->hw->wiphy->flags |= WIPHY_FLAG_AP_UAPSD |
WIPHY_FLAG_HAS_REMAIN_ON_CHANNEL |
WIPHY_FLAG_HAS_CHANNEL_SWITCH |
-+ WIPHY_FLAG_IBSS_RSN;
+ WIPHY_FLAG_IBSS_RSN;
wl->hw->wiphy->features |= NL80211_FEATURE_AP_SCAN;
diff --git a/drivers/nfc/fdp/fdp.c b/drivers/nfc/fdp/fdp.c
index 0cc9ac85..ed21231 100644
--- a/drivers/nfc/fdp/fdp.c
+++ b/drivers/nfc/fdp/fdp.c
@@ -184,7 +184,7 @@ static int fdp_nci_send_patch(struct nci_dev *ndev, u8 conn_id, u8 type)
const struct firmware *fw;
struct sk_buff *skb;
unsigned long len;
- u8 max_size, payload_size;
+ int max_size, payload_size;
int rc = 0;
if ((type == NCI_PATCH_TYPE_OTP && !info->otp_patch) ||
@@ -207,8 +207,7 @@ static int fdp_nci_send_patch(struct nci_dev *ndev, u8 conn_id, u8 type)
while (len) {
- payload_size = min_t(unsigned long, (unsigned long) max_size,
- len);
+ payload_size = min_t(unsigned long, max_size, len);
skb = nci_skb_alloc(ndev, (NCI_CTRL_HDR_SIZE + payload_size),
GFP_KERNEL);
diff --git a/drivers/nfc/pn544/i2c.c b/drivers/nfc/pn544/i2c.c
index 720c89d..4ac8cb2 100644
--- a/drivers/nfc/pn544/i2c.c
+++ b/drivers/nfc/pn544/i2c.c
@@ -225,6 +225,7 @@ static void pn544_hci_i2c_platform_init(struct pn544_i2c_phy *phy)
out:
gpiod_set_value_cansleep(phy->gpiod_en, !phy->en_polarity);
+ usleep_range(10000, 15000);
}
static void pn544_hci_i2c_enable_mode(struct pn544_i2c_phy *phy, int run_mode)
diff --git a/drivers/nfc/pn544/pn544.c b/drivers/nfc/pn544/pn544.c
index 2b83156..b788870 100644
--- a/drivers/nfc/pn544/pn544.c
+++ b/drivers/nfc/pn544/pn544.c
@@ -682,7 +682,7 @@ static int pn544_hci_tm_send(struct nfc_hci_dev *hdev, struct sk_buff *skb)
static int pn544_hci_check_presence(struct nfc_hci_dev *hdev,
struct nfc_target *target)
{
- pr_debug("supported protocol %d\b", target->supported_protocols);
+ pr_debug("supported protocol %d\n", target->supported_protocols);
if (target->supported_protocols & (NFC_PROTO_ISO14443_MASK |
NFC_PROTO_ISO14443_B_MASK)) {
return nfc_hci_send_cmd(hdev, target->hci_reader_gate,
diff --git a/drivers/nvdimm/blk.c b/drivers/nvdimm/blk.c
index 677d6f4..43751fa 100644
--- a/drivers/nvdimm/blk.c
+++ b/drivers/nvdimm/blk.c
@@ -249,13 +249,12 @@ static int nsblk_attach_disk(struct nd_namespace_blk *nsblk)
internal_nlba = div_u64(nsblk->size, nsblk_internal_lbasize(nsblk));
available_disk_size = internal_nlba * nsblk_sector_size(nsblk);
- q = blk_alloc_queue(GFP_KERNEL);
+ q = blk_alloc_queue(nd_blk_make_request, NUMA_NO_NODE);
if (!q)
return -ENOMEM;
if (devm_add_action_or_reset(dev, nd_blk_release_queue, q))
return -ENOMEM;
- blk_queue_make_request(q, nd_blk_make_request);
blk_queue_max_hw_sectors(q, UINT_MAX);
blk_queue_logical_block_size(q, nsblk_sector_size(nsblk));
blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c
index 0d04ea3..3b09419 100644
--- a/drivers/nvdimm/btt.c
+++ b/drivers/nvdimm/btt.c
@@ -1521,7 +1521,7 @@ static int btt_blk_init(struct btt *btt)
struct nd_namespace_common *ndns = nd_btt->ndns;
/* create a new disk and request queue for btt */
- btt->btt_queue = blk_alloc_queue(GFP_KERNEL);
+ btt->btt_queue = blk_alloc_queue(btt_make_request, NUMA_NO_NODE);
if (!btt->btt_queue)
return -ENOMEM;
@@ -1540,7 +1540,6 @@ static int btt_blk_init(struct btt *btt)
btt->btt_disk->queue->backing_dev_info->capabilities |=
BDI_CAP_SYNCHRONOUS_IO;
- blk_queue_make_request(btt->btt_queue, btt_make_request);
blk_queue_logical_block_size(btt->btt_queue, btt->sector_size);
blk_queue_max_hw_sectors(btt->btt_queue, UINT_MAX);
blk_queue_flag_set(QUEUE_FLAG_NONROT, btt->btt_queue);
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index 4eae441..4ffc6f7 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -395,7 +395,7 @@ static int pmem_attach_disk(struct device *dev,
return -EBUSY;
}
- q = blk_alloc_queue_node(GFP_KERNEL, dev_to_node(dev));
+ q = blk_alloc_queue(pmem_make_request, dev_to_node(dev));
if (!q)
return -ENOMEM;
@@ -433,7 +433,6 @@ static int pmem_attach_disk(struct device *dev,
pmem->virt_addr = addr;
blk_queue_write_cache(q, true, fua);
- blk_queue_make_request(q, pmem_make_request);
blk_queue_physical_block_size(q, PAGE_SIZE);
blk_queue_logical_block_size(q, pmem_sector_size(ndns));
blk_queue_max_hw_sectors(q, UINT_MAX);
diff --git a/drivers/nvme/host/Kconfig b/drivers/nvme/host/Kconfig
index b9358db..9c17ed3 100644
--- a/drivers/nvme/host/Kconfig
+++ b/drivers/nvme/host/Kconfig
@@ -32,8 +32,6 @@
a hardware monitoring device will be created for each NVMe drive
in the system.
- If unsure, say N.
-
config NVME_FABRICS
tristate
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 5dc32b7..4f907e3 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -66,8 +66,8 @@ MODULE_PARM_DESC(streams, "turn on support for Streams write directives");
* nvme_reset_wq - hosts nvme reset works
* nvme_delete_wq - hosts nvme delete works
*
- * nvme_wq will host works such are scan, aen handling, fw activation,
- * keep-alive error recovery, periodic reconnects etc. nvme_reset_wq
+ * nvme_wq will host works such as scan, aen handling, fw activation,
+ * keep-alive, periodic reconnects etc. nvme_reset_wq
* runs reset works which also flush works hosted on nvme_wq for
* serialization purposes. nvme_delete_wq host controller deletion
* works which flush reset works for serialization.
@@ -171,7 +171,6 @@ static void nvme_do_delete_ctrl(struct nvme_ctrl *ctrl)
nvme_remove_namespaces(ctrl);
ctrl->ops->delete_ctrl(ctrl);
nvme_uninit_ctrl(ctrl);
- nvme_put_ctrl(ctrl);
}
static void nvme_delete_ctrl_work(struct work_struct *work)
@@ -192,21 +191,16 @@ int nvme_delete_ctrl(struct nvme_ctrl *ctrl)
}
EXPORT_SYMBOL_GPL(nvme_delete_ctrl);
-static int nvme_delete_ctrl_sync(struct nvme_ctrl *ctrl)
+static void nvme_delete_ctrl_sync(struct nvme_ctrl *ctrl)
{
- int ret = 0;
-
/*
* Keep a reference until nvme_do_delete_ctrl() complete,
* since ->delete_ctrl can free the controller.
*/
nvme_get_ctrl(ctrl);
- if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_DELETING))
- ret = -EBUSY;
- if (!ret)
+ if (nvme_change_ctrl_state(ctrl, NVME_CTRL_DELETING))
nvme_do_delete_ctrl(ctrl);
nvme_put_ctrl(ctrl);
- return ret;
}
static inline bool nvme_ns_has_pi(struct nvme_ns *ns)
@@ -291,11 +285,8 @@ void nvme_complete_rq(struct request *req)
nvme_req(req)->ctrl->comp_seen = true;
if (unlikely(status != BLK_STS_OK && nvme_req_needs_retry(req))) {
- if ((req->cmd_flags & REQ_NVME_MPATH) &&
- blk_path_error(status)) {
- nvme_failover_req(req);
+ if ((req->cmd_flags & REQ_NVME_MPATH) && nvme_failover_req(req))
return;
- }
if (!blk_queue_dying(req->q)) {
nvme_retry_req(req);
@@ -976,7 +967,7 @@ static void nvme_keep_alive_end_io(struct request *rq, blk_status_t status)
startka = true;
spin_unlock_irqrestore(&ctrl->lock, flags);
if (startka)
- schedule_delayed_work(&ctrl->ka_work, ctrl->kato * HZ);
+ queue_delayed_work(nvme_wq, &ctrl->ka_work, ctrl->kato * HZ);
}
static int nvme_keep_alive(struct nvme_ctrl *ctrl)
@@ -1006,7 +997,7 @@ static void nvme_keep_alive_work(struct work_struct *work)
dev_dbg(ctrl->device,
"reschedule traffic based keep-alive timer\n");
ctrl->comp_seen = false;
- schedule_delayed_work(&ctrl->ka_work, ctrl->kato * HZ);
+ queue_delayed_work(nvme_wq, &ctrl->ka_work, ctrl->kato * HZ);
return;
}
@@ -1023,7 +1014,7 @@ static void nvme_start_keep_alive(struct nvme_ctrl *ctrl)
if (unlikely(ctrl->kato == 0))
return;
- schedule_delayed_work(&ctrl->ka_work, ctrl->kato * HZ);
+ queue_delayed_work(nvme_wq, &ctrl->ka_work, ctrl->kato * HZ);
}
void nvme_stop_keep_alive(struct nvme_ctrl *ctrl)
@@ -1055,6 +1046,43 @@ static int nvme_identify_ctrl(struct nvme_ctrl *dev, struct nvme_id_ctrl **id)
return error;
}
+static int nvme_process_ns_desc(struct nvme_ctrl *ctrl, struct nvme_ns_ids *ids,
+ struct nvme_ns_id_desc *cur)
+{
+ const char *warn_str = "ctrl returned bogus length:";
+ void *data = cur;
+
+ switch (cur->nidt) {
+ case NVME_NIDT_EUI64:
+ if (cur->nidl != NVME_NIDT_EUI64_LEN) {
+ dev_warn(ctrl->device, "%s %d for NVME_NIDT_EUI64\n",
+ warn_str, cur->nidl);
+ return -1;
+ }
+ memcpy(ids->eui64, data + sizeof(*cur), NVME_NIDT_EUI64_LEN);
+ return NVME_NIDT_EUI64_LEN;
+ case NVME_NIDT_NGUID:
+ if (cur->nidl != NVME_NIDT_NGUID_LEN) {
+ dev_warn(ctrl->device, "%s %d for NVME_NIDT_NGUID\n",
+ warn_str, cur->nidl);
+ return -1;
+ }
+ memcpy(ids->nguid, data + sizeof(*cur), NVME_NIDT_NGUID_LEN);
+ return NVME_NIDT_NGUID_LEN;
+ case NVME_NIDT_UUID:
+ if (cur->nidl != NVME_NIDT_UUID_LEN) {
+ dev_warn(ctrl->device, "%s %d for NVME_NIDT_UUID\n",
+ warn_str, cur->nidl);
+ return -1;
+ }
+ uuid_copy(&ids->uuid, data + sizeof(*cur));
+ return NVME_NIDT_UUID_LEN;
+ default:
+ /* Skip unknown types */
+ return cur->nidl;
+ }
+}
+
static int nvme_identify_ns_descs(struct nvme_ctrl *ctrl, unsigned nsid,
struct nvme_ns_ids *ids)
{
@@ -1074,8 +1102,17 @@ static int nvme_identify_ns_descs(struct nvme_ctrl *ctrl, unsigned nsid,
status = nvme_submit_sync_cmd(ctrl->admin_q, &c, data,
NVME_IDENTIFY_DATA_SIZE);
- if (status)
+ if (status) {
+ dev_warn(ctrl->device,
+ "Identify Descriptors failed (%d)\n", status);
+ /*
+ * Don't treat an error as fatal, as we potentially already
+ * have a NGUID or EUI-64.
+ */
+ if (status > 0)
+ status = 0;
goto free_data;
+ }
for (pos = 0; pos < NVME_IDENTIFY_DATA_SIZE; pos += len) {
struct nvme_ns_id_desc *cur = data + pos;
@@ -1083,42 +1120,9 @@ static int nvme_identify_ns_descs(struct nvme_ctrl *ctrl, unsigned nsid,
if (cur->nidl == 0)
break;
- switch (cur->nidt) {
- case NVME_NIDT_EUI64:
- if (cur->nidl != NVME_NIDT_EUI64_LEN) {
- dev_warn(ctrl->device,
- "ctrl returned bogus length: %d for NVME_NIDT_EUI64\n",
- cur->nidl);
- goto free_data;
- }
- len = NVME_NIDT_EUI64_LEN;
- memcpy(ids->eui64, data + pos + sizeof(*cur), len);
- break;
- case NVME_NIDT_NGUID:
- if (cur->nidl != NVME_NIDT_NGUID_LEN) {
- dev_warn(ctrl->device,
- "ctrl returned bogus length: %d for NVME_NIDT_NGUID\n",
- cur->nidl);
- goto free_data;
- }
- len = NVME_NIDT_NGUID_LEN;
- memcpy(ids->nguid, data + pos + sizeof(*cur), len);
- break;
- case NVME_NIDT_UUID:
- if (cur->nidl != NVME_NIDT_UUID_LEN) {
- dev_warn(ctrl->device,
- "ctrl returned bogus length: %d for NVME_NIDT_UUID\n",
- cur->nidl);
- goto free_data;
- }
- len = NVME_NIDT_UUID_LEN;
- uuid_copy(&ids->uuid, data + pos + sizeof(*cur));
- break;
- default:
- /* Skip unknown types */
- len = cur->nidl;
- break;
- }
+ len = nvme_process_ns_desc(ctrl, ids, cur);
+ if (len < 0)
+ goto free_data;
len += sizeof(*cur);
}
@@ -1165,8 +1169,8 @@ static int nvme_identify_ns(struct nvme_ctrl *ctrl,
static int nvme_features(struct nvme_ctrl *dev, u8 op, unsigned int fid,
unsigned int dword11, void *buffer, size_t buflen, u32 *result)
{
+ union nvme_result res = { 0 };
struct nvme_command c;
- union nvme_result res;
int ret;
memset(&c, 0, sizeof(c));
@@ -1584,6 +1588,47 @@ static int nvme_ioctl(struct block_device *bdev, fmode_t mode,
return ret;
}
+#ifdef CONFIG_COMPAT
+struct nvme_user_io32 {
+ __u8 opcode;
+ __u8 flags;
+ __u16 control;
+ __u16 nblocks;
+ __u16 rsvd;
+ __u64 metadata;
+ __u64 addr;
+ __u64 slba;
+ __u32 dsmgmt;
+ __u32 reftag;
+ __u16 apptag;
+ __u16 appmask;
+} __attribute__((__packed__));
+
+#define NVME_IOCTL_SUBMIT_IO32 _IOW('N', 0x42, struct nvme_user_io32)
+
+static int nvme_compat_ioctl(struct block_device *bdev, fmode_t mode,
+ unsigned int cmd, unsigned long arg)
+{
+ /*
+ * Corresponds to the difference of NVME_IOCTL_SUBMIT_IO
+ * between 32 bit programs and 64 bit kernel.
+ * The cause is that the results of sizeof(struct nvme_user_io),
+ * which is used to define NVME_IOCTL_SUBMIT_IO,
+ * are not same between 32 bit compiler and 64 bit compiler.
+ * NVME_IOCTL_SUBMIT_IO32 is for 64 bit kernel handling
+ * NVME_IOCTL_SUBMIT_IO issued from 32 bit programs.
+ * Other IOCTL numbers are same between 32 bit and 64 bit.
+ * So there is nothing to do regarding to other IOCTL numbers.
+ */
+ if (cmd == NVME_IOCTL_SUBMIT_IO32)
+ return nvme_ioctl(bdev, mode, NVME_IOCTL_SUBMIT_IO, arg);
+
+ return nvme_ioctl(bdev, mode, cmd, arg);
+}
+#else
+#define nvme_compat_ioctl NULL
+#endif /* CONFIG_COMPAT */
+
static int nvme_open(struct block_device *bdev, fmode_t mode)
{
struct nvme_ns *ns = bdev->bd_disk->private_data;
@@ -1721,26 +1766,15 @@ static void nvme_config_write_zeroes(struct gendisk *disk, struct nvme_ns *ns)
static int nvme_report_ns_ids(struct nvme_ctrl *ctrl, unsigned int nsid,
struct nvme_id_ns *id, struct nvme_ns_ids *ids)
{
- int ret = 0;
-
memset(ids, 0, sizeof(*ids));
if (ctrl->vs >= NVME_VS(1, 1, 0))
memcpy(ids->eui64, id->eui64, sizeof(id->eui64));
if (ctrl->vs >= NVME_VS(1, 2, 0))
memcpy(ids->nguid, id->nguid, sizeof(id->nguid));
- if (ctrl->vs >= NVME_VS(1, 3, 0)) {
- /* Don't treat error as fatal we potentially
- * already have a NGUID or EUI-64
- */
- ret = nvme_identify_ns_descs(ctrl, nsid, ids);
- if (ret)
- dev_warn(ctrl->device,
- "Identify Descriptors failed (%d)\n", ret);
- if (ret > 0)
- ret = 0;
- }
- return ret;
+ if (ctrl->vs >= NVME_VS(1, 3, 0))
+ return nvme_identify_ns_descs(ctrl, nsid, ids);
+ return 0;
}
static bool nvme_ns_ids_valid(struct nvme_ns_ids *ids)
@@ -1810,7 +1844,7 @@ static void nvme_update_disk_info(struct gendisk *disk,
ns->lba_shift > PAGE_SHIFT)
capacity = 0;
- set_capacity(disk, capacity);
+ set_capacity_revalidate_and_notify(disk, capacity, false);
nvme_config_discard(disk, ns);
nvme_config_write_zeroes(disk, ns);
@@ -2027,7 +2061,7 @@ EXPORT_SYMBOL_GPL(nvme_sec_submit);
static const struct block_device_operations nvme_fops = {
.owner = THIS_MODULE,
.ioctl = nvme_ioctl,
- .compat_ioctl = nvme_ioctl,
+ .compat_ioctl = nvme_compat_ioctl,
.open = nvme_open,
.release = nvme_release,
.getgeo = nvme_getgeo,
@@ -2055,7 +2089,7 @@ const struct block_device_operations nvme_ns_head_ops = {
.open = nvme_ns_head_open,
.release = nvme_ns_head_release,
.ioctl = nvme_ioctl,
- .compat_ioctl = nvme_ioctl,
+ .compat_ioctl = nvme_compat_ioctl,
.getgeo = nvme_getgeo,
.pr_ops = &nvme_pr_ops,
};
@@ -2074,13 +2108,13 @@ static int nvme_wait_ready(struct nvme_ctrl *ctrl, u64 cap, bool enabled)
if ((csts & NVME_CSTS_RDY) == bit)
break;
- msleep(100);
+ usleep_range(1000, 2000);
if (fatal_signal_pending(current))
return -EINTR;
if (time_after(jiffies, timeout)) {
dev_err(ctrl->device,
- "Device not ready; aborting %s\n", enabled ?
- "initialisation" : "reset");
+ "Device not ready; aborting %s, CSTS=0x%x\n",
+ enabled ? "initialisation" : "reset", csts);
return -ENODEV;
}
}
@@ -2591,8 +2625,7 @@ static bool nvme_validate_cntlid(struct nvme_subsystem *subsys,
lockdep_assert_held(&nvme_subsystems_lock);
list_for_each_entry(tmp, &subsys->ctrls, subsys_entry) {
- if (tmp->state == NVME_CTRL_DELETING ||
- tmp->state == NVME_CTRL_DEAD)
+ if (nvme_state_terminal(tmp))
continue;
if (tmp->cntlid == ctrl->cntlid) {
@@ -3193,6 +3226,10 @@ static ssize_t nvme_sysfs_delete(struct device *dev,
{
struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
+ /* Can't delete non-created controllers */
+ if (!ctrl->created)
+ return -EBUSY;
+
if (device_remove_file_self(dev, attr))
nvme_delete_ctrl_sync(ctrl);
return count;
@@ -3242,6 +3279,26 @@ static ssize_t nvme_sysfs_show_subsysnqn(struct device *dev,
}
static DEVICE_ATTR(subsysnqn, S_IRUGO, nvme_sysfs_show_subsysnqn, NULL);
+static ssize_t nvme_sysfs_show_hostnqn(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
+
+ return snprintf(buf, PAGE_SIZE, "%s\n", ctrl->opts->host->nqn);
+}
+static DEVICE_ATTR(hostnqn, S_IRUGO, nvme_sysfs_show_hostnqn, NULL);
+
+static ssize_t nvme_sysfs_show_hostid(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
+
+ return snprintf(buf, PAGE_SIZE, "%pU\n", &ctrl->opts->host->id);
+}
+static DEVICE_ATTR(hostid, S_IRUGO, nvme_sysfs_show_hostid, NULL);
+
static ssize_t nvme_sysfs_show_address(struct device *dev,
struct device_attribute *attr,
char *buf)
@@ -3267,6 +3324,8 @@ static struct attribute *nvme_dev_attrs[] = {
&dev_attr_numa_node.attr,
&dev_attr_queue_count.attr,
&dev_attr_sqsize.attr,
+ &dev_attr_hostnqn.attr,
+ &dev_attr_hostid.attr,
NULL
};
@@ -3280,6 +3339,10 @@ static umode_t nvme_dev_attrs_are_visible(struct kobject *kobj,
return 0;
if (a == &dev_attr_address.attr && !ctrl->ops->get_address)
return 0;
+ if (a == &dev_attr_hostnqn.attr && !ctrl->opts)
+ return 0;
+ if (a == &dev_attr_hostid.attr && !ctrl->opts)
+ return 0;
return a->mode;
}
@@ -3294,7 +3357,7 @@ static const struct attribute_group *nvme_dev_attr_groups[] = {
NULL,
};
-static struct nvme_ns_head *__nvme_find_ns_head(struct nvme_subsystem *subsys,
+static struct nvme_ns_head *nvme_find_ns_head(struct nvme_subsystem *subsys,
unsigned nsid)
{
struct nvme_ns_head *h;
@@ -3327,7 +3390,8 @@ static int __nvme_check_ids(struct nvme_subsystem *subsys,
}
static struct nvme_ns_head *nvme_alloc_ns_head(struct nvme_ctrl *ctrl,
- unsigned nsid, struct nvme_id_ns *id)
+ unsigned nsid, struct nvme_id_ns *id,
+ struct nvme_ns_ids *ids)
{
struct nvme_ns_head *head;
size_t size = sizeof(*head);
@@ -3350,12 +3414,9 @@ static struct nvme_ns_head *nvme_alloc_ns_head(struct nvme_ctrl *ctrl,
goto out_ida_remove;
head->subsys = ctrl->subsys;
head->ns_id = nsid;
+ head->ids = *ids;
kref_init(&head->ref);
- ret = nvme_report_ns_ids(ctrl, nsid, id, &head->ids);
- if (ret)
- goto out_cleanup_srcu;
-
ret = __nvme_check_ids(ctrl->subsys, head);
if (ret) {
dev_err(ctrl->device,
@@ -3390,24 +3451,23 @@ static int nvme_init_ns_head(struct nvme_ns *ns, unsigned nsid,
struct nvme_ctrl *ctrl = ns->ctrl;
bool is_shared = id->nmic & (1 << 0);
struct nvme_ns_head *head = NULL;
+ struct nvme_ns_ids ids;
int ret = 0;
+ ret = nvme_report_ns_ids(ctrl, nsid, id, &ids);
+ if (ret)
+ goto out;
+
mutex_lock(&ctrl->subsys->lock);
if (is_shared)
- head = __nvme_find_ns_head(ctrl->subsys, nsid);
+ head = nvme_find_ns_head(ctrl->subsys, nsid);
if (!head) {
- head = nvme_alloc_ns_head(ctrl, nsid, id);
+ head = nvme_alloc_ns_head(ctrl, nsid, id, &ids);
if (IS_ERR(head)) {
ret = PTR_ERR(head);
goto out_unlock;
}
} else {
- struct nvme_ns_ids ids;
-
- ret = nvme_report_ns_ids(ctrl, nsid, id, &ids);
- if (ret)
- goto out_unlock;
-
if (!nvme_ns_ids_equal(&head->ids, &ids)) {
dev_err(ctrl->device,
"IDs don't match for shared namespace %d\n",
@@ -3422,6 +3482,7 @@ static int nvme_init_ns_head(struct nvme_ns *ns, unsigned nsid,
out_unlock:
mutex_unlock(&ctrl->subsys->lock);
+out:
if (ret > 0)
ret = blk_status_to_errno(nvme_error_status(ret));
return ret;
@@ -3480,7 +3541,7 @@ static int nvme_setup_streams_ns(struct nvme_ctrl *ctrl, struct nvme_ns *ns)
return 0;
}
-static int nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
+static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
{
struct nvme_ns *ns;
struct gendisk *disk;
@@ -3490,13 +3551,11 @@ static int nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
ns = kzalloc_node(sizeof(*ns), GFP_KERNEL, node);
if (!ns)
- return -ENOMEM;
+ return;
ns->queue = blk_mq_init_queue(ctrl->tagset);
- if (IS_ERR(ns->queue)) {
- ret = PTR_ERR(ns->queue);
+ if (IS_ERR(ns->queue))
goto out_free_ns;
- }
if (ctrl->opts && ctrl->opts->data_digest)
ns->queue->backing_dev_info->capabilities
@@ -3519,10 +3578,8 @@ static int nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
if (ret)
goto out_free_queue;
- if (id->ncap == 0) {
- ret = -EINVAL;
+ if (id->ncap == 0) /* no namespace (legacy quirk) */
goto out_free_id;
- }
ret = nvme_init_ns_head(ns, nsid, id);
if (ret)
@@ -3531,10 +3588,8 @@ static int nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
nvme_set_disk_name(disk_name, ns, ctrl, &flags);
disk = alloc_disk_node(0, node);
- if (!disk) {
- ret = -ENOMEM;
+ if (!disk)
goto out_unlink_ns;
- }
disk->fops = &nvme_fops;
disk->private_data = ns;
@@ -3565,7 +3620,7 @@ static int nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
nvme_fault_inject_init(&ns->fault_inject, ns->disk->disk_name);
kfree(id);
- return 0;
+ return;
out_put_disk:
put_disk(ns->disk);
out_unlink_ns:
@@ -3579,9 +3634,6 @@ static int nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
blk_cleanup_queue(ns->queue);
out_free_ns:
kfree(ns);
- if (ret > 0)
- ret = blk_status_to_errno(nvme_error_status(ret));
- return ret;
}
static void nvme_ns_remove(struct nvme_ns *ns)
@@ -3867,7 +3919,7 @@ static void nvme_get_fw_slot_info(struct nvme_ctrl *ctrl)
if (!log)
return;
- if (nvme_get_log(ctrl, NVME_NSID_ALL, 0, NVME_LOG_FW_SLOT, log,
+ if (nvme_get_log(ctrl, NVME_NSID_ALL, NVME_LOG_FW_SLOT, 0, log,
sizeof(*log), 0))
dev_warn(ctrl->device, "Get FW SLOT INFO log error\n");
kfree(log);
@@ -3987,6 +4039,7 @@ void nvme_start_ctrl(struct nvme_ctrl *ctrl)
nvme_queue_scan(ctrl);
nvme_start_queues(ctrl);
}
+ ctrl->created = true;
}
EXPORT_SYMBOL_GPL(nvme_start_ctrl);
@@ -3995,6 +4048,7 @@ void nvme_uninit_ctrl(struct nvme_ctrl *ctrl)
nvme_fault_inject_fini(&ctrl->fault_inject);
dev_pm_qos_hide_latency_tolerance(ctrl->device);
cdev_device_del(&ctrl->cdev, ctrl->device);
+ nvme_put_ctrl(ctrl);
}
EXPORT_SYMBOL_GPL(nvme_uninit_ctrl);
@@ -4077,6 +4131,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
if (ret)
goto out_release_instance;
+ nvme_get_ctrl(ctrl);
cdev_init(&ctrl->cdev, &nvme_dev_fops);
ctrl->cdev.owner = ops->module;
ret = cdev_device_add(&ctrl->cdev, ctrl->device);
@@ -4095,6 +4150,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
return 0;
out_free_name:
+ nvme_put_ctrl(ctrl);
kfree_const(ctrl->device->kobj.name);
out_release_instance:
ida_simple_remove(&nvme_instance_ida, ctrl->instance);
@@ -4299,6 +4355,7 @@ static void __exit nvme_core_exit(void)
destroy_workqueue(nvme_delete_wq);
destroy_workqueue(nvme_reset_wq);
destroy_workqueue(nvme_wq);
+ ida_destroy(&nvme_instance_ida);
}
MODULE_LICENSE("GPL");
diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c
index 74b8818..2a6c819 100644
--- a/drivers/nvme/host/fabrics.c
+++ b/drivers/nvme/host/fabrics.c
@@ -105,14 +105,14 @@ int nvmf_get_address(struct nvme_ctrl *ctrl, char *buf, int size)
int len = 0;
if (ctrl->opts->mask & NVMF_OPT_TRADDR)
- len += snprintf(buf, size, "traddr=%s", ctrl->opts->traddr);
+ len += scnprintf(buf, size, "traddr=%s", ctrl->opts->traddr);
if (ctrl->opts->mask & NVMF_OPT_TRSVCID)
- len += snprintf(buf + len, size - len, "%strsvcid=%s",
+ len += scnprintf(buf + len, size - len, "%strsvcid=%s",
(len) ? "," : "", ctrl->opts->trsvcid);
if (ctrl->opts->mask & NVMF_OPT_HOST_TRADDR)
- len += snprintf(buf + len, size - len, "%shost_traddr=%s",
+ len += scnprintf(buf + len, size - len, "%shost_traddr=%s",
(len) ? "," : "", ctrl->opts->host_traddr);
- len += snprintf(buf + len, size - len, "\n");
+ len += scnprintf(buf + len, size - len, "\n");
return len;
}
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index 5a70ac3..a8bf2fb 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -3181,10 +3181,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
goto fail_ctrl;
}
- nvme_get_ctrl(&ctrl->ctrl);
-
if (!queue_delayed_work(nvme_wq, &ctrl->connect_work, 0)) {
- nvme_put_ctrl(&ctrl->ctrl);
dev_err(ctrl->ctrl.device,
"NVME-FC{%d}: failed to schedule initial connect\n",
ctrl->cnum);
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index 797c183..61bf875 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -64,17 +64,12 @@ void nvme_set_disk_name(char *disk_name, struct nvme_ns *ns,
}
}
-void nvme_failover_req(struct request *req)
+bool nvme_failover_req(struct request *req)
{
struct nvme_ns *ns = req->q->queuedata;
u16 status = nvme_req(req)->status;
unsigned long flags;
- spin_lock_irqsave(&ns->head->requeue_lock, flags);
- blk_steal_bios(&ns->head->requeue_list, req);
- spin_unlock_irqrestore(&ns->head->requeue_lock, flags);
- blk_mq_end_request(req, 0);
-
switch (status & 0x7ff) {
case NVME_SC_ANA_TRANSITION:
case NVME_SC_ANA_INACCESSIBLE:
@@ -103,15 +98,17 @@ void nvme_failover_req(struct request *req)
nvme_mpath_clear_current_path(ns);
break;
default:
- /*
- * Reset the controller for any non-ANA error as we don't know
- * what caused the error.
- */
- nvme_reset_ctrl(ns->ctrl);
- break;
+ /* This was a non-ANA error so follow the normal error path. */
+ return false;
}
+ spin_lock_irqsave(&ns->head->requeue_lock, flags);
+ blk_steal_bios(&ns->head->requeue_list, req);
+ spin_unlock_irqrestore(&ns->head->requeue_lock, flags);
+ blk_mq_end_request(req, 0);
+
kblockd_schedule_work(&ns->head->requeue_work);
+ return true;
}
void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl)
@@ -377,11 +374,10 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head)
if (!(ctrl->subsys->cmic & (1 << 1)) || !multipath)
return 0;
- q = blk_alloc_queue_node(GFP_KERNEL, ctrl->numa_node);
+ q = blk_alloc_queue(nvme_ns_head_make_request, ctrl->numa_node);
if (!q)
goto out;
q->queuedata = head;
- blk_queue_make_request(q, nvme_ns_head_make_request);
blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
/* set to a default value for 512 until disk is validated */
blk_queue_logical_block_size(q, 512);
@@ -715,6 +711,7 @@ int nvme_mpath_init(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
}
INIT_WORK(&ctrl->ana_work, nvme_ana_work);
+ kfree(ctrl->ana_log_buf);
ctrl->ana_log_buf = kmalloc(ctrl->ana_log_size, GFP_KERNEL);
if (!ctrl->ana_log_buf) {
error = -ENOMEM;
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 1024fec..2e04a36 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -259,6 +259,7 @@ struct nvme_ctrl {
struct nvme_command ka_cmd;
struct work_struct fw_act_work;
unsigned long events;
+ bool created;
#ifdef CONFIG_NVME_MULTIPATH
/* asymmetric namespace access: */
@@ -550,7 +551,7 @@ void nvme_mpath_wait_freeze(struct nvme_subsystem *subsys);
void nvme_mpath_start_freeze(struct nvme_subsystem *subsys);
void nvme_set_disk_name(char *disk_name, struct nvme_ns *ns,
struct nvme_ctrl *ctrl, int *flags);
-void nvme_failover_req(struct request *req);
+bool nvme_failover_req(struct request *req);
void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl);
int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl,struct nvme_ns_head *head);
void nvme_mpath_add_disk(struct nvme_ns *ns, struct nvme_id_ns *id);
@@ -599,8 +600,9 @@ static inline void nvme_set_disk_name(char *disk_name, struct nvme_ns *ns,
sprintf(disk_name, "nvme%dn%d", ctrl->instance, ns->head->instance);
}
-static inline void nvme_failover_req(struct request *req)
+static inline bool nvme_failover_req(struct request *req)
{
+ return false;
}
static inline void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl)
{
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index da392b5..4e79e41 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -971,39 +971,25 @@ static inline void nvme_handle_cqe(struct nvme_queue *nvmeq, u16 idx)
nvme_end_request(req, cqe->status, cqe->result);
}
-static void nvme_complete_cqes(struct nvme_queue *nvmeq, u16 start, u16 end)
-{
- while (start != end) {
- nvme_handle_cqe(nvmeq, start);
- if (++start == nvmeq->q_depth)
- start = 0;
- }
-}
-
static inline void nvme_update_cq_head(struct nvme_queue *nvmeq)
{
- if (nvmeq->cq_head == nvmeq->q_depth - 1) {
+ if (++nvmeq->cq_head == nvmeq->q_depth) {
nvmeq->cq_head = 0;
- nvmeq->cq_phase = !nvmeq->cq_phase;
- } else {
- nvmeq->cq_head++;
+ nvmeq->cq_phase ^= 1;
}
}
-static inline int nvme_process_cq(struct nvme_queue *nvmeq, u16 *start,
- u16 *end, unsigned int tag)
+static inline int nvme_process_cq(struct nvme_queue *nvmeq)
{
int found = 0;
- *start = nvmeq->cq_head;
while (nvme_cqe_pending(nvmeq)) {
- if (tag == -1U || nvmeq->cqes[nvmeq->cq_head].command_id == tag)
- found++;
+ found++;
+ nvme_handle_cqe(nvmeq, nvmeq->cq_head);
nvme_update_cq_head(nvmeq);
}
- *end = nvmeq->cq_head;
- if (*start != *end)
+ if (found)
nvme_ring_cq_doorbell(nvmeq);
return found;
}
@@ -1012,21 +998,16 @@ static irqreturn_t nvme_irq(int irq, void *data)
{
struct nvme_queue *nvmeq = data;
irqreturn_t ret = IRQ_NONE;
- u16 start, end;
/*
* The rmb/wmb pair ensures we see all updates from a previous run of
* the irq handler, even if that was on another CPU.
*/
rmb();
- nvme_process_cq(nvmeq, &start, &end, -1);
+ if (nvme_process_cq(nvmeq))
+ ret = IRQ_HANDLED;
wmb();
- if (start != end) {
- nvme_complete_cqes(nvmeq, start, end);
- return IRQ_HANDLED;
- }
-
return ret;
}
@@ -1039,48 +1020,32 @@ static irqreturn_t nvme_irq_check(int irq, void *data)
}
/*
- * Poll for completions any queue, including those not dedicated to polling.
+ * Poll for completions for any interrupt driven queue
* Can be called from any context.
*/
-static int nvme_poll_irqdisable(struct nvme_queue *nvmeq, unsigned int tag)
+static void nvme_poll_irqdisable(struct nvme_queue *nvmeq)
{
struct pci_dev *pdev = to_pci_dev(nvmeq->dev->dev);
- u16 start, end;
- int found;
- /*
- * For a poll queue we need to protect against the polling thread
- * using the CQ lock. For normal interrupt driven threads we have
- * to disable the interrupt to avoid racing with it.
- */
- if (test_bit(NVMEQ_POLLED, &nvmeq->flags)) {
- spin_lock(&nvmeq->cq_poll_lock);
- found = nvme_process_cq(nvmeq, &start, &end, tag);
- spin_unlock(&nvmeq->cq_poll_lock);
- } else {
- disable_irq(pci_irq_vector(pdev, nvmeq->cq_vector));
- found = nvme_process_cq(nvmeq, &start, &end, tag);
- enable_irq(pci_irq_vector(pdev, nvmeq->cq_vector));
- }
+ WARN_ON_ONCE(test_bit(NVMEQ_POLLED, &nvmeq->flags));
- nvme_complete_cqes(nvmeq, start, end);
- return found;
+ disable_irq(pci_irq_vector(pdev, nvmeq->cq_vector));
+ nvme_process_cq(nvmeq);
+ enable_irq(pci_irq_vector(pdev, nvmeq->cq_vector));
}
static int nvme_poll(struct blk_mq_hw_ctx *hctx)
{
struct nvme_queue *nvmeq = hctx->driver_data;
- u16 start, end;
bool found;
if (!nvme_cqe_pending(nvmeq))
return 0;
spin_lock(&nvmeq->cq_poll_lock);
- found = nvme_process_cq(nvmeq, &start, &end, -1);
+ found = nvme_process_cq(nvmeq);
spin_unlock(&nvmeq->cq_poll_lock);
- nvme_complete_cqes(nvmeq, start, end);
return found;
}
@@ -1255,7 +1220,12 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
/*
* Did we miss an interrupt?
*/
- if (nvme_poll_irqdisable(nvmeq, req->tag)) {
+ if (test_bit(NVMEQ_POLLED, &nvmeq->flags))
+ nvme_poll(req->mq_hctx);
+ else
+ nvme_poll_irqdisable(nvmeq);
+
+ if (blk_mq_request_completed(req)) {
dev_warn(dev->ctrl.device,
"I/O %d QID %d timeout, completion polled\n",
req->tag, nvmeq->qid);
@@ -1398,7 +1368,21 @@ static void nvme_disable_admin_queue(struct nvme_dev *dev, bool shutdown)
else
nvme_disable_ctrl(&dev->ctrl);
- nvme_poll_irqdisable(nvmeq, -1);
+ nvme_poll_irqdisable(nvmeq);
+}
+
+/*
+ * Called only on a device that has been disabled and after all other threads
+ * that can check this device's completion queues have synced. This is the
+ * last chance for the driver to see a natural completion before
+ * nvme_cancel_request() terminates all incomplete requests.
+ */
+static void nvme_reap_pending_cqes(struct nvme_dev *dev)
+{
+ int i;
+
+ for (i = dev->ctrl.queue_count - 1; i > 0; i--)
+ nvme_process_cq(&dev->queues[i]);
}
static int nvme_cmb_qdepth(struct nvme_dev *dev, int nr_io_queues,
@@ -2235,11 +2219,6 @@ static bool __nvme_disable_io_queues(struct nvme_dev *dev, u8 opcode)
if (timeout == 0)
return false;
- /* handle any remaining CQEs */
- if (opcode == nvme_admin_delete_cq &&
- !test_bit(NVMEQ_DELETE_ERROR, &nvmeq->flags))
- nvme_poll_irqdisable(nvmeq, -1);
-
sent--;
if (nr_queues)
goto retry;
@@ -2428,6 +2407,7 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
nvme_suspend_io_queues(dev);
nvme_suspend_queue(&dev->queues[0]);
nvme_pci_disable(dev);
+ nvme_reap_pending_cqes(dev);
blk_mq_tagset_busy_iter(&dev->tagset, nvme_cancel_request, &dev->ctrl);
blk_mq_tagset_busy_iter(&dev->admin_tagset, nvme_cancel_request, &dev->ctrl);
@@ -2490,13 +2470,13 @@ static void nvme_pci_free_ctrl(struct nvme_ctrl *ctrl)
struct nvme_dev *dev = to_nvme_dev(ctrl);
nvme_dbbuf_dma_free(dev);
- put_device(dev->dev);
nvme_free_tagset(dev);
if (dev->ctrl.admin_q)
blk_put_queue(dev->ctrl.admin_q);
- kfree(dev->queues);
free_opal_dev(dev->ctrl.opal_dev);
mempool_destroy(dev->iod_mempool);
+ put_device(dev->dev);
+ kfree(dev->queues);
kfree(dev);
}
@@ -2676,7 +2656,7 @@ static int nvme_pci_get_address(struct nvme_ctrl *ctrl, char *buf, int size)
{
struct pci_dev *pdev = to_pci_dev(to_nvme_dev(ctrl)->dev);
- return snprintf(buf, size, "%s", dev_name(&pdev->dev));
+ return snprintf(buf, size, "%s\n", dev_name(&pdev->dev));
}
static const struct nvme_ctrl_ops nvme_pci_ctrl_ops = {
@@ -2734,6 +2714,18 @@ static unsigned long check_vendor_combination_bug(struct pci_dev *pdev)
(dmi_match(DMI_BOARD_NAME, "PRIME B350M-A") ||
dmi_match(DMI_BOARD_NAME, "PRIME Z370-A")))
return NVME_QUIRK_NO_APST;
+ } else if ((pdev->vendor == 0x144d && (pdev->device == 0xa801 ||
+ pdev->device == 0xa808 || pdev->device == 0xa809)) ||
+ (pdev->vendor == 0x1e0f && pdev->device == 0x0001)) {
+ /*
+ * Forcing to use host managed nvme power settings for
+ * lowest idle power with quick resume latency on
+ * Samsung and Toshiba SSDs based on suspend behavior
+ * on Coffee Lake board for LENOVO C640
+ */
+ if ((dmi_match(DMI_BOARD_VENDOR, "LENOVO")) &&
+ dmi_match(DMI_BOARD_NAME, "LNVNB161216"))
+ return NVME_QUIRK_SIMPLE_SUSPEND;
}
return 0;
@@ -2810,7 +2802,6 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
dev_info(dev->ctrl.device, "pci function %s\n", dev_name(&pdev->dev));
nvme_reset_ctrl(&dev->ctrl);
- nvme_get_ctrl(&dev->ctrl);
async_schedule(nvme_async_probe, dev);
return 0;
@@ -2882,10 +2873,9 @@ static void nvme_remove(struct pci_dev *pdev)
nvme_free_host_mem(dev);
nvme_dev_remove_admin(dev);
nvme_free_queues(dev, 0);
- nvme_uninit_ctrl(&dev->ctrl);
nvme_release_prp_pools(dev);
nvme_dev_unmap(dev);
- nvme_put_ctrl(&dev->ctrl);
+ nvme_uninit_ctrl(&dev->ctrl);
}
#ifdef CONFIG_PM_SLEEP
@@ -3096,7 +3086,8 @@ static const struct pci_device_id nvme_id_table[] = {
.driver_data = NVME_QUIRK_NO_DEEPEST_PS |
NVME_QUIRK_IGNORE_DEV_SUBNQN, },
{ PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_EXPRESS, 0xffffff) },
- { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2001) },
+ { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2001),
+ .driver_data = NVME_QUIRK_SINGLE_VECTOR },
{ PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2003) },
{ PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2005),
.driver_data = NVME_QUIRK_SINGLE_VECTOR |
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 2a47c6c..86603d9 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -850,9 +850,11 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl,
if (new)
blk_mq_free_tag_set(ctrl->ctrl.admin_tagset);
out_free_async_qe:
- nvme_rdma_free_qe(ctrl->device->dev, &ctrl->async_event_sqe,
- sizeof(struct nvme_command), DMA_TO_DEVICE);
- ctrl->async_event_sqe.data = NULL;
+ if (ctrl->async_event_sqe.data) {
+ nvme_rdma_free_qe(ctrl->device->dev, &ctrl->async_event_sqe,
+ sizeof(struct nvme_command), DMA_TO_DEVICE);
+ ctrl->async_event_sqe.data = NULL;
+ }
out_free_queue:
nvme_rdma_free_queue(&ctrl->queues[0]);
return error;
@@ -1022,8 +1024,13 @@ static int nvme_rdma_setup_ctrl(struct nvme_rdma_ctrl *ctrl, bool new)
changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
if (!changed) {
- /* state change failure is ok if we're in DELETING state */
+ /*
+ * state change failure is ok if we're in DELETING state,
+ * unless we're during creation of a new controller to
+ * avoid races with teardown flow.
+ */
WARN_ON_ONCE(ctrl->ctrl.state != NVME_CTRL_DELETING);
+ WARN_ON_ONCE(new);
ret = -EINVAL;
goto destroy_io;
}
@@ -1088,7 +1095,7 @@ static void nvme_rdma_error_recovery(struct nvme_rdma_ctrl *ctrl)
if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RESETTING))
return;
- queue_work(nvme_wq, &ctrl->err_work);
+ queue_work(nvme_reset_wq, &ctrl->err_work);
}
static void nvme_rdma_wr_error(struct ib_cq *cq, struct ib_wc *wc,
@@ -2043,8 +2050,6 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
dev_info(ctrl->ctrl.device, "new ctrl: NQN \"%s\", addr %pISpcs\n",
ctrl->ctrl.opts->subsysnqn, &ctrl->addr);
- nvme_get_ctrl(&ctrl->ctrl);
-
mutex_lock(&nvme_rdma_ctrl_mutex);
list_add_tail(&ctrl->list, &nvme_rdma_ctrl_list);
mutex_unlock(&nvme_rdma_ctrl_mutex);
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 6d43b23a..0ef14f0 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -20,6 +20,16 @@
struct nvme_tcp_queue;
+/* Define the socket priority to use for connections were it is desirable
+ * that the NIC consider performing optimized packet processing or filtering.
+ * A non-zero value being sufficient to indicate general consideration of any
+ * possible optimization. Making it a module param allows for alternative
+ * values that may be unique for some NIC implementations.
+ */
+static int so_priority;
+module_param(so_priority, int, 0644);
+MODULE_PARM_DESC(so_priority, "nvme tcp socket optimize priority");
+
enum nvme_tcp_send_state {
NVME_TCP_SEND_CMD_PDU = 0,
NVME_TCP_SEND_H2C_PDU,
@@ -422,7 +432,7 @@ static void nvme_tcp_error_recovery(struct nvme_ctrl *ctrl)
if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING))
return;
- queue_work(nvme_wq, &to_tcp_ctrl(ctrl)->err_work);
+ queue_work(nvme_reset_wq, &to_tcp_ctrl(ctrl)->err_work);
}
static int nvme_tcp_process_nvme_cqe(struct nvme_tcp_queue *queue,
@@ -1017,8 +1027,15 @@ static int nvme_tcp_try_send(struct nvme_tcp_queue *queue)
if (req->state == NVME_TCP_SEND_DDGST)
ret = nvme_tcp_try_send_ddgst(req);
done:
- if (ret == -EAGAIN)
+ if (ret == -EAGAIN) {
ret = 0;
+ } else if (ret < 0) {
+ dev_err(queue->ctrl->ctrl.device,
+ "failed to send request %d\n", ret);
+ if (ret != -EPIPE && ret != -ECONNRESET)
+ nvme_tcp_fail_request(queue->request);
+ nvme_tcp_done_send_req(queue);
+ }
return ret;
}
@@ -1049,20 +1066,16 @@ static void nvme_tcp_io_work(struct work_struct *w)
int result;
result = nvme_tcp_try_send(queue);
- if (result > 0) {
+ if (result > 0)
pending = true;
- } else if (unlikely(result < 0)) {
- dev_err(queue->ctrl->ctrl.device,
- "failed to send request %d\n", result);
- if (result != -EPIPE)
- nvme_tcp_fail_request(queue->request);
- nvme_tcp_done_send_req(queue);
- return;
- }
+ else if (unlikely(result < 0))
+ break;
result = nvme_tcp_try_recv(queue);
if (result > 0)
pending = true;
+ else if (unlikely(result < 0))
+ break;
if (!pending)
return;
@@ -1243,13 +1256,67 @@ static int nvme_tcp_init_connection(struct nvme_tcp_queue *queue)
return ret;
}
+static bool nvme_tcp_admin_queue(struct nvme_tcp_queue *queue)
+{
+ return nvme_tcp_queue_id(queue) == 0;
+}
+
+static bool nvme_tcp_default_queue(struct nvme_tcp_queue *queue)
+{
+ struct nvme_tcp_ctrl *ctrl = queue->ctrl;
+ int qid = nvme_tcp_queue_id(queue);
+
+ return !nvme_tcp_admin_queue(queue) &&
+ qid < 1 + ctrl->io_queues[HCTX_TYPE_DEFAULT];
+}
+
+static bool nvme_tcp_read_queue(struct nvme_tcp_queue *queue)
+{
+ struct nvme_tcp_ctrl *ctrl = queue->ctrl;
+ int qid = nvme_tcp_queue_id(queue);
+
+ return !nvme_tcp_admin_queue(queue) &&
+ !nvme_tcp_default_queue(queue) &&
+ qid < 1 + ctrl->io_queues[HCTX_TYPE_DEFAULT] +
+ ctrl->io_queues[HCTX_TYPE_READ];
+}
+
+static bool nvme_tcp_poll_queue(struct nvme_tcp_queue *queue)
+{
+ struct nvme_tcp_ctrl *ctrl = queue->ctrl;
+ int qid = nvme_tcp_queue_id(queue);
+
+ return !nvme_tcp_admin_queue(queue) &&
+ !nvme_tcp_default_queue(queue) &&
+ !nvme_tcp_read_queue(queue) &&
+ qid < 1 + ctrl->io_queues[HCTX_TYPE_DEFAULT] +
+ ctrl->io_queues[HCTX_TYPE_READ] +
+ ctrl->io_queues[HCTX_TYPE_POLL];
+}
+
+static void nvme_tcp_set_queue_io_cpu(struct nvme_tcp_queue *queue)
+{
+ struct nvme_tcp_ctrl *ctrl = queue->ctrl;
+ int qid = nvme_tcp_queue_id(queue);
+ int n = 0;
+
+ if (nvme_tcp_default_queue(queue))
+ n = qid - 1;
+ else if (nvme_tcp_read_queue(queue))
+ n = qid - ctrl->io_queues[HCTX_TYPE_DEFAULT] - 1;
+ else if (nvme_tcp_poll_queue(queue))
+ n = qid - ctrl->io_queues[HCTX_TYPE_DEFAULT] -
+ ctrl->io_queues[HCTX_TYPE_READ] - 1;
+ queue->io_cpu = cpumask_next_wrap(n - 1, cpu_online_mask, -1, false);
+}
+
static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl,
int qid, size_t queue_size)
{
struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl);
struct nvme_tcp_queue *queue = &ctrl->queues[qid];
struct linger sol = { .l_onoff = 1, .l_linger = 0 };
- int ret, opt, rcv_pdu_size, n;
+ int ret, opt, rcv_pdu_size;
queue->ctrl = ctrl;
INIT_LIST_HEAD(&queue->send_list);
@@ -1304,6 +1371,17 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl,
goto err_sock;
}
+ if (so_priority > 0) {
+ ret = kernel_setsockopt(queue->sock, SOL_SOCKET, SO_PRIORITY,
+ (char *)&so_priority, sizeof(so_priority));
+ if (ret) {
+ dev_err(ctrl->ctrl.device,
+ "failed to set SO_PRIORITY sock opt, ret %d\n",
+ ret);
+ goto err_sock;
+ }
+ }
+
/* Set socket type of service */
if (nctrl->opts->tos >= 0) {
opt = nctrl->opts->tos;
@@ -1317,11 +1395,7 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl,
}
queue->sock->sk->sk_allocation = GFP_ATOMIC;
- if (!qid)
- n = 0;
- else
- n = (qid - 1) % num_online_cpus();
- queue->io_cpu = cpumask_next_wrap(n - 1, cpu_online_mask, -1, false);
+ nvme_tcp_set_queue_io_cpu(queue);
queue->request = NULL;
queue->data_remaining = 0;
queue->ddgst_remaining = 0;
@@ -1856,8 +1930,13 @@ static int nvme_tcp_setup_ctrl(struct nvme_ctrl *ctrl, bool new)
}
if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_LIVE)) {
- /* state change failure is ok if we're in DELETING state */
+ /*
+ * state change failure is ok if we're in DELETING state,
+ * unless we're during creation of a new controller to
+ * avoid races with teardown flow.
+ */
WARN_ON_ONCE(ctrl->state != NVME_CTRL_DELETING);
+ WARN_ON_ONCE(new);
ret = -EINVAL;
goto destroy_io;
}
@@ -2354,8 +2433,6 @@ static struct nvme_ctrl *nvme_tcp_create_ctrl(struct device *dev,
dev_info(ctrl->ctrl.device, "new ctrl: NQN \"%s\", addr %pISp\n",
ctrl->ctrl.opts->subsysnqn, &ctrl->addr);
- nvme_get_ctrl(&ctrl->ctrl);
-
mutex_lock(&nvme_tcp_ctrl_mutex);
list_add_tail(&ctrl->list, &nvme_tcp_ctrl_list);
mutex_unlock(&nvme_tcp_ctrl_mutex);
diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c
index 72a7e41..9d6f75c 100644
--- a/drivers/nvme/target/admin-cmd.c
+++ b/drivers/nvme/target/admin-cmd.c
@@ -6,6 +6,7 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/module.h>
#include <linux/rculist.h>
+#include <linux/part_stat.h>
#include <generated/utsrelease.h>
#include <asm/unaligned.h>
@@ -322,12 +323,25 @@ static void nvmet_execute_get_log_page(struct nvmet_req *req)
nvmet_req_complete(req, NVME_SC_INVALID_FIELD | NVME_SC_DNR);
}
+static void nvmet_id_set_model_number(struct nvme_id_ctrl *id,
+ struct nvmet_subsys *subsys)
+{
+ const char *model = NVMET_DEFAULT_CTRL_MODEL;
+ struct nvmet_subsys_model *subsys_model;
+
+ rcu_read_lock();
+ subsys_model = rcu_dereference(subsys->model);
+ if (subsys_model)
+ model = subsys_model->number;
+ memcpy_and_pad(id->mn, sizeof(id->mn), model, strlen(model), ' ');
+ rcu_read_unlock();
+}
+
static void nvmet_execute_identify_ctrl(struct nvmet_req *req)
{
struct nvmet_ctrl *ctrl = req->sq->ctrl;
struct nvme_id_ctrl *id;
u16 status = 0;
- const char model[] = "Linux";
id = kzalloc(sizeof(*id), GFP_KERNEL);
if (!id) {
@@ -342,7 +356,7 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req)
memset(id->sn, ' ', sizeof(id->sn));
bin2hex(id->sn, &ctrl->subsys->serial,
min(sizeof(ctrl->subsys->serial), sizeof(id->sn) / 2));
- memcpy_and_pad(id->mn, sizeof(id->mn), model, sizeof(model) - 1, ' ');
+ nvmet_id_set_model_number(id, ctrl->subsys);
memcpy_and_pad(id->fr, sizeof(id->fr),
UTS_RELEASE, strlen(UTS_RELEASE), ' ');
@@ -356,8 +370,12 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req)
/* we support multiple ports, multiples hosts and ANA: */
id->cmic = (1 << 0) | (1 << 1) | (1 << 3);
- /* no limit on data transfer sizes for now */
- id->mdts = 0;
+ /* Limit MDTS according to transport capability */
+ if (ctrl->ops->get_mdts)
+ id->mdts = ctrl->ops->get_mdts(ctrl);
+ else
+ id->mdts = 0;
+
id->cntlid = cpu_to_le16(ctrl->cntlid);
id->ver = cpu_to_le32(ctrl->subsys->ver);
@@ -720,13 +738,22 @@ static void nvmet_execute_set_features(struct nvmet_req *req)
{
struct nvmet_subsys *subsys = req->sq->ctrl->subsys;
u32 cdw10 = le32_to_cpu(req->cmd->common.cdw10);
+ u32 cdw11 = le32_to_cpu(req->cmd->common.cdw11);
u16 status = 0;
+ u16 nsqr;
+ u16 ncqr;
if (!nvmet_check_data_len(req, 0))
return;
switch (cdw10 & 0xff) {
case NVME_FEAT_NUM_QUEUES:
+ ncqr = (cdw11 >> 16) & 0xffff;
+ nsqr = cdw11 & 0xffff;
+ if (ncqr == 0xffff || nsqr == 0xffff) {
+ status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+ break;
+ }
nvmet_set_result(req,
(subsys->max_qid - 1) | ((subsys->max_qid - 1) << 16));
break;
diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c
index 98613a4..7aa1078 100644
--- a/drivers/nvme/target/configfs.c
+++ b/drivers/nvme/target/configfs.c
@@ -395,14 +395,12 @@ static ssize_t nvmet_ns_device_uuid_store(struct config_item *item,
struct nvmet_subsys *subsys = ns->subsys;
int ret = 0;
-
mutex_lock(&subsys->lock);
if (ns->enabled) {
ret = -EBUSY;
goto out_unlock;
}
-
if (uuid_parse(page, &ns->uuid))
ret = -EINVAL;
@@ -815,10 +813,10 @@ static ssize_t nvmet_subsys_attr_version_show(struct config_item *item,
(int)NVME_MAJOR(subsys->ver),
(int)NVME_MINOR(subsys->ver),
(int)NVME_TERTIARY(subsys->ver));
- else
- return snprintf(page, PAGE_SIZE, "%d.%d\n",
- (int)NVME_MAJOR(subsys->ver),
- (int)NVME_MINOR(subsys->ver));
+
+ return snprintf(page, PAGE_SIZE, "%d.%d\n",
+ (int)NVME_MAJOR(subsys->ver),
+ (int)NVME_MINOR(subsys->ver));
}
static ssize_t nvmet_subsys_attr_version_store(struct config_item *item,
@@ -828,7 +826,6 @@ static ssize_t nvmet_subsys_attr_version_store(struct config_item *item,
int major, minor, tertiary = 0;
int ret;
-
ret = sscanf(page, "%d.%d.%d\n", &major, &minor, &tertiary);
if (ret != 2 && ret != 3)
return -EINVAL;
@@ -852,20 +849,151 @@ static ssize_t nvmet_subsys_attr_serial_show(struct config_item *item,
static ssize_t nvmet_subsys_attr_serial_store(struct config_item *item,
const char *page, size_t count)
{
- struct nvmet_subsys *subsys = to_subsys(item);
+ u64 serial;
+
+ if (sscanf(page, "%llx\n", &serial) != 1)
+ return -EINVAL;
down_write(&nvmet_config_sem);
- sscanf(page, "%llx\n", &subsys->serial);
+ to_subsys(item)->serial = serial;
up_write(&nvmet_config_sem);
return count;
}
CONFIGFS_ATTR(nvmet_subsys_, attr_serial);
+static ssize_t nvmet_subsys_attr_cntlid_min_show(struct config_item *item,
+ char *page)
+{
+ return snprintf(page, PAGE_SIZE, "%u\n", to_subsys(item)->cntlid_min);
+}
+
+static ssize_t nvmet_subsys_attr_cntlid_min_store(struct config_item *item,
+ const char *page, size_t cnt)
+{
+ u16 cntlid_min;
+
+ if (sscanf(page, "%hu\n", &cntlid_min) != 1)
+ return -EINVAL;
+
+ if (cntlid_min == 0)
+ return -EINVAL;
+
+ down_write(&nvmet_config_sem);
+ if (cntlid_min >= to_subsys(item)->cntlid_max)
+ goto out_unlock;
+ to_subsys(item)->cntlid_min = cntlid_min;
+ up_write(&nvmet_config_sem);
+ return cnt;
+
+out_unlock:
+ up_write(&nvmet_config_sem);
+ return -EINVAL;
+}
+CONFIGFS_ATTR(nvmet_subsys_, attr_cntlid_min);
+
+static ssize_t nvmet_subsys_attr_cntlid_max_show(struct config_item *item,
+ char *page)
+{
+ return snprintf(page, PAGE_SIZE, "%u\n", to_subsys(item)->cntlid_max);
+}
+
+static ssize_t nvmet_subsys_attr_cntlid_max_store(struct config_item *item,
+ const char *page, size_t cnt)
+{
+ u16 cntlid_max;
+
+ if (sscanf(page, "%hu\n", &cntlid_max) != 1)
+ return -EINVAL;
+
+ if (cntlid_max == 0)
+ return -EINVAL;
+
+ down_write(&nvmet_config_sem);
+ if (cntlid_max <= to_subsys(item)->cntlid_min)
+ goto out_unlock;
+ to_subsys(item)->cntlid_max = cntlid_max;
+ up_write(&nvmet_config_sem);
+ return cnt;
+
+out_unlock:
+ up_write(&nvmet_config_sem);
+ return -EINVAL;
+}
+CONFIGFS_ATTR(nvmet_subsys_, attr_cntlid_max);
+
+static ssize_t nvmet_subsys_attr_model_show(struct config_item *item,
+ char *page)
+{
+ struct nvmet_subsys *subsys = to_subsys(item);
+ struct nvmet_subsys_model *subsys_model;
+ char *model = NVMET_DEFAULT_CTRL_MODEL;
+ int ret;
+
+ rcu_read_lock();
+ subsys_model = rcu_dereference(subsys->model);
+ if (subsys_model)
+ model = subsys_model->number;
+ ret = snprintf(page, PAGE_SIZE, "%s\n", model);
+ rcu_read_unlock();
+
+ return ret;
+}
+
+/* See Section 1.5 of NVMe 1.4 */
+static bool nvmet_is_ascii(const char c)
+{
+ return c >= 0x20 && c <= 0x7e;
+}
+
+static ssize_t nvmet_subsys_attr_model_store(struct config_item *item,
+ const char *page, size_t count)
+{
+ struct nvmet_subsys *subsys = to_subsys(item);
+ struct nvmet_subsys_model *new_model;
+ char *new_model_number;
+ int pos = 0, len;
+
+ len = strcspn(page, "\n");
+ if (!len)
+ return -EINVAL;
+
+ for (pos = 0; pos < len; pos++) {
+ if (!nvmet_is_ascii(page[pos]))
+ return -EINVAL;
+ }
+
+ new_model_number = kstrndup(page, len, GFP_KERNEL);
+ if (!new_model_number)
+ return -ENOMEM;
+
+ new_model = kzalloc(sizeof(*new_model) + len + 1, GFP_KERNEL);
+ if (!new_model) {
+ kfree(new_model_number);
+ return -ENOMEM;
+ }
+ memcpy(new_model->number, new_model_number, len);
+
+ down_write(&nvmet_config_sem);
+ mutex_lock(&subsys->lock);
+ new_model = rcu_replace_pointer(subsys->model, new_model,
+ mutex_is_locked(&subsys->lock));
+ mutex_unlock(&subsys->lock);
+ up_write(&nvmet_config_sem);
+
+ kfree_rcu(new_model, rcuhead);
+
+ return count;
+}
+CONFIGFS_ATTR(nvmet_subsys_, attr_model);
+
static struct configfs_attribute *nvmet_subsys_attrs[] = {
&nvmet_subsys_attr_attr_allow_any_host,
&nvmet_subsys_attr_attr_version,
&nvmet_subsys_attr_attr_serial,
+ &nvmet_subsys_attr_attr_cntlid_min,
+ &nvmet_subsys_attr_attr_cntlid_max,
+ &nvmet_subsys_attr_attr_model,
NULL,
};
diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index 576de77..b685f99 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -1289,8 +1289,11 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn,
if (!ctrl->sqs)
goto out_free_cqs;
+ if (subsys->cntlid_min > subsys->cntlid_max)
+ goto out_free_cqs;
+
ret = ida_simple_get(&cntlid_ida,
- NVME_CNTLID_MIN, NVME_CNTLID_MAX,
+ subsys->cntlid_min, subsys->cntlid_max,
GFP_KERNEL);
if (ret < 0) {
status = NVME_SC_CONNECT_CTRL_BUSY | NVME_SC_DNR;
@@ -1438,7 +1441,8 @@ struct nvmet_subsys *nvmet_subsys_alloc(const char *subsysnqn,
kfree(subsys);
return ERR_PTR(-ENOMEM);
}
-
+ subsys->cntlid_min = NVME_CNTLID_MIN;
+ subsys->cntlid_max = NVME_CNTLID_MAX;
kref_init(&subsys->ref);
mutex_init(&subsys->lock);
@@ -1457,6 +1461,7 @@ static void nvmet_subsys_free(struct kref *ref)
WARN_ON_ONCE(!list_empty(&subsys->namespaces));
kfree(subsys->subsysnqn);
+ kfree_rcu(subsys->model, rcuhead);
kfree(subsys);
}
diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c
index 4df4ebd..0d54e73 100644
--- a/drivers/nvme/target/loop.c
+++ b/drivers/nvme/target/loop.c
@@ -485,7 +485,6 @@ static void nvme_loop_reset_ctrl_work(struct work_struct *work)
out_disable:
dev_warn(ctrl->ctrl.device, "Removing after reset failure\n");
nvme_uninit_ctrl(&ctrl->ctrl);
- nvme_put_ctrl(&ctrl->ctrl);
}
static const struct nvme_ctrl_ops nvme_loop_ctrl_ops = {
@@ -618,8 +617,6 @@ static struct nvme_ctrl *nvme_loop_create_ctrl(struct device *dev,
dev_info(ctrl->ctrl.device,
"new ctrl: \"%s\"\n", ctrl->ctrl.opts->subsysnqn);
- nvme_get_ctrl(&ctrl->ctrl);
-
changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
WARN_ON_ONCE(!changed);
diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
index eda28b2..421dff3 100644
--- a/drivers/nvme/target/nvmet.h
+++ b/drivers/nvme/target/nvmet.h
@@ -23,6 +23,7 @@
#define NVMET_ASYNC_EVENTS 4
#define NVMET_ERROR_LOG_SLOTS 128
#define NVMET_NO_ERROR_LOC ((u16)-1)
+#define NVMET_DEFAULT_CTRL_MODEL "Linux"
/*
* Supported optional AENs:
@@ -202,6 +203,11 @@ struct nvmet_ctrl {
struct nvme_error_slot slots[NVMET_ERROR_LOG_SLOTS];
};
+struct nvmet_subsys_model {
+ struct rcu_head rcuhead;
+ char number[];
+};
+
struct nvmet_subsys {
enum nvme_subsys_type type;
@@ -211,6 +217,8 @@ struct nvmet_subsys {
struct list_head namespaces;
unsigned int nr_namespaces;
unsigned int max_nsid;
+ u16 cntlid_min;
+ u16 cntlid_max;
struct list_head ctrls;
@@ -227,6 +235,8 @@ struct nvmet_subsys {
struct config_group namespaces_group;
struct config_group allowed_hosts_group;
+
+ struct nvmet_subsys_model __rcu *model;
};
static inline struct nvmet_subsys *to_subsys(struct config_item *item)
@@ -279,6 +289,7 @@ struct nvmet_fabrics_ops {
struct nvmet_port *port, char *traddr);
u16 (*install_queue)(struct nvmet_sq *nvme_sq);
void (*discovery_chg)(struct nvmet_port *port);
+ u8 (*get_mdts)(const struct nvmet_ctrl *ctrl);
};
#define NVMET_MAX_INLINE_BIOVEC 8
diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c
index 37d262a..9e1b8c6 100644
--- a/drivers/nvme/target/rdma.c
+++ b/drivers/nvme/target/rdma.c
@@ -31,6 +31,9 @@
#define NVMET_RDMA_MAX_INLINE_SGE 4
#define NVMET_RDMA_MAX_INLINE_DATA_SIZE max_t(int, SZ_16K, PAGE_SIZE)
+/* Assume mpsmin == device_page_size == 4KB */
+#define NVMET_RDMA_MAX_MDTS 8
+
struct nvmet_rdma_cmd {
struct ib_sge sge[NVMET_RDMA_MAX_INLINE_SGE + 1];
struct ib_cqe cqe;
@@ -975,7 +978,7 @@ static int nvmet_rdma_create_queue_ib(struct nvmet_rdma_queue *queue)
{
struct ib_qp_init_attr qp_attr;
struct nvmet_rdma_device *ndev = queue->dev;
- int comp_vector, nr_cqe, ret, i;
+ int comp_vector, nr_cqe, ret, i, factor;
/*
* Spread the io queues across completion vectors,
@@ -1008,7 +1011,9 @@ static int nvmet_rdma_create_queue_ib(struct nvmet_rdma_queue *queue)
qp_attr.qp_type = IB_QPT_RC;
/* +1 for drain */
qp_attr.cap.max_send_wr = queue->send_queue_size + 1;
- qp_attr.cap.max_rdma_ctxs = queue->send_queue_size;
+ factor = rdma_rw_mr_factor(ndev->device, queue->cm_id->port_num,
+ 1 << NVMET_RDMA_MAX_MDTS);
+ qp_attr.cap.max_rdma_ctxs = queue->send_queue_size * factor;
qp_attr.cap.max_send_sge = max(ndev->device->attrs.max_sge_rd,
ndev->device->attrs.max_send_sge);
@@ -1602,6 +1607,11 @@ static void nvmet_rdma_disc_port_addr(struct nvmet_req *req,
}
}
+static u8 nvmet_rdma_get_mdts(const struct nvmet_ctrl *ctrl)
+{
+ return NVMET_RDMA_MAX_MDTS;
+}
+
static const struct nvmet_fabrics_ops nvmet_rdma_ops = {
.owner = THIS_MODULE,
.type = NVMF_TRTYPE_RDMA,
@@ -1612,6 +1622,7 @@ static const struct nvmet_fabrics_ops nvmet_rdma_ops = {
.queue_response = nvmet_rdma_queue_response,
.delete_ctrl = nvmet_rdma_delete_ctrl,
.disc_traddr = nvmet_rdma_disc_port_addr,
+ .get_mdts = nvmet_rdma_get_mdts,
};
static void nvmet_rdma_remove_one(struct ib_device *ib_device, void *client_data)
diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c
index af674fc..f0da04e 100644
--- a/drivers/nvme/target/tcp.c
+++ b/drivers/nvme/target/tcp.c
@@ -19,6 +19,16 @@
#define NVMET_TCP_DEF_INLINE_DATA_SIZE (4 * PAGE_SIZE)
+/* Define the socket priority to use for connections were it is desirable
+ * that the NIC consider performing optimized packet processing or filtering.
+ * A non-zero value being sufficient to indicate general consideration of any
+ * possible optimization. Making it a module param allows for alternative
+ * values that may be unique for some NIC implementations.
+ */
+static int so_priority;
+module_param(so_priority, int, 0644);
+MODULE_PARM_DESC(so_priority, "nvmet tcp socket optimize priority");
+
#define NVMET_TCP_RECV_BUDGET 8
#define NVMET_TCP_SEND_BUDGET 8
#define NVMET_TCP_IO_WORK_BUDGET 64
@@ -515,7 +525,7 @@ static int nvmet_try_send_data_pdu(struct nvmet_tcp_cmd *cmd)
return 1;
}
-static int nvmet_try_send_data(struct nvmet_tcp_cmd *cmd)
+static int nvmet_try_send_data(struct nvmet_tcp_cmd *cmd, bool last_in_batch)
{
struct nvmet_tcp_queue *queue = cmd->queue;
int ret;
@@ -523,9 +533,15 @@ static int nvmet_try_send_data(struct nvmet_tcp_cmd *cmd)
while (cmd->cur_sg) {
struct page *page = sg_page(cmd->cur_sg);
u32 left = cmd->cur_sg->length - cmd->offset;
+ int flags = MSG_DONTWAIT;
+
+ if ((!last_in_batch && cmd->queue->send_list_len) ||
+ cmd->wbytes_done + left < cmd->req.transfer_len ||
+ queue->data_digest || !queue->nvme_sq.sqhd_disabled)
+ flags |= MSG_MORE;
ret = kernel_sendpage(cmd->queue->sock, page, cmd->offset,
- left, MSG_DONTWAIT | MSG_MORE);
+ left, flags);
if (ret <= 0)
return ret;
@@ -616,7 +632,7 @@ static int nvmet_try_send_r2t(struct nvmet_tcp_cmd *cmd, bool last_in_batch)
return 1;
}
-static int nvmet_try_send_ddgst(struct nvmet_tcp_cmd *cmd)
+static int nvmet_try_send_ddgst(struct nvmet_tcp_cmd *cmd, bool last_in_batch)
{
struct nvmet_tcp_queue *queue = cmd->queue;
struct msghdr msg = { .msg_flags = MSG_DONTWAIT };
@@ -626,6 +642,9 @@ static int nvmet_try_send_ddgst(struct nvmet_tcp_cmd *cmd)
};
int ret;
+ if (!last_in_batch && cmd->queue->send_list_len)
+ msg.msg_flags |= MSG_MORE;
+
ret = kernel_sendmsg(queue->sock, &msg, &iov, 1, iov.iov_len);
if (unlikely(ret <= 0))
return ret;
@@ -660,13 +679,13 @@ static int nvmet_tcp_try_send_one(struct nvmet_tcp_queue *queue,
}
if (cmd->state == NVMET_TCP_SEND_DATA) {
- ret = nvmet_try_send_data(cmd);
+ ret = nvmet_try_send_data(cmd, last_in_batch);
if (ret <= 0)
goto done_send;
}
if (cmd->state == NVMET_TCP_SEND_DDGST) {
- ret = nvmet_try_send_ddgst(cmd);
+ ret = nvmet_try_send_ddgst(cmd, last_in_batch);
if (ret <= 0)
goto done_send;
}
@@ -788,7 +807,7 @@ static int nvmet_tcp_handle_icreq(struct nvmet_tcp_queue *queue)
icresp->hdr.pdo = 0;
icresp->hdr.plen = cpu_to_le32(icresp->hdr.hlen);
icresp->pfv = cpu_to_le16(NVME_TCP_PFV_1_0);
- icresp->maxdata = cpu_to_le32(0xffff); /* FIXME: support r2t */
+ icresp->maxdata = cpu_to_le32(0x400000); /* 16M arbitrary limit */
icresp->cpda = 0;
if (queue->hdr_digest)
icresp->digest |= NVME_TCP_HDR_DIGEST_ENABLE;
@@ -1433,6 +1452,13 @@ static int nvmet_tcp_set_queue_sock(struct nvmet_tcp_queue *queue)
if (ret)
return ret;
+ if (so_priority > 0) {
+ ret = kernel_setsockopt(sock, SOL_SOCKET, SO_PRIORITY,
+ (char *)&so_priority, sizeof(so_priority));
+ if (ret)
+ return ret;
+ }
+
/* Set socket type of service */
if (inet->rcv_tos > 0) {
int tos = inet->rcv_tos;
@@ -1622,6 +1648,15 @@ static int nvmet_tcp_add_port(struct nvmet_port *nport)
goto err_sock;
}
+ if (so_priority > 0) {
+ ret = kernel_setsockopt(port->sock, SOL_SOCKET, SO_PRIORITY,
+ (char *)&so_priority, sizeof(so_priority));
+ if (ret) {
+ pr_err("failed to set SO_PRIORITY sock opt %d\n", ret);
+ goto err_sock;
+ }
+ }
+
ret = kernel_bind(port->sock, (struct sockaddr *)&port->addr,
sizeof(port->addr));
if (ret) {
diff --git a/drivers/of/of_mdio.c b/drivers/of/of_mdio.c
index 8270bbf..9f982c0 100644
--- a/drivers/of/of_mdio.c
+++ b/drivers/of/of_mdio.c
@@ -306,6 +306,7 @@ int of_mdiobus_register(struct mii_bus *mdio, struct device_node *np)
rc = of_mdiobus_register_phy(mdio, child, addr);
if (rc && rc != -ENODEV)
goto unregister;
+ break;
}
}
}
diff --git a/drivers/pci/controller/pcie-brcmstb.c b/drivers/pci/controller/pcie-brcmstb.c
index d20aabc..3a10e67 100644
--- a/drivers/pci/controller/pcie-brcmstb.c
+++ b/drivers/pci/controller/pcie-brcmstb.c
@@ -670,7 +670,7 @@ static inline int brcm_pcie_get_rc_bar2_size_and_offset(struct brcm_pcie *pcie,
* outbound memory @ 3GB). So instead it will start at the 1x
* multiple of its size
*/
- if (!*rc_bar2_size || *rc_bar2_offset % *rc_bar2_size ||
+ if (!*rc_bar2_size || (*rc_bar2_offset & (*rc_bar2_size - 1)) ||
(*rc_bar2_offset < SZ_4G && *rc_bar2_offset > SZ_2G)) {
dev_err(dev, "Invalid rc_bar2_offset/size: size 0x%llx, off 0x%llx\n",
*rc_bar2_size, *rc_bar2_offset);
diff --git a/drivers/perf/arm_pmu_acpi.c b/drivers/perf/arm_pmu_acpi.c
index acce878..f5c7a84 100644
--- a/drivers/perf/arm_pmu_acpi.c
+++ b/drivers/perf/arm_pmu_acpi.c
@@ -24,8 +24,6 @@ static int arm_pmu_acpi_register_irq(int cpu)
int gsi, trigger;
gicc = acpi_cpu_get_madt_gicc(cpu);
- if (WARN_ON(!gicc))
- return -EINVAL;
gsi = gicc->performance_interrupt;
@@ -64,11 +62,10 @@ static void arm_pmu_acpi_unregister_irq(int cpu)
int gsi;
gicc = acpi_cpu_get_madt_gicc(cpu);
- if (!gicc)
- return;
gsi = gicc->performance_interrupt;
- acpi_unregister_gsi(gsi);
+ if (gsi)
+ acpi_unregister_gsi(gsi);
}
#if IS_ENABLED(CONFIG_ARM_SPE_PMU)
diff --git a/drivers/perf/arm_smmuv3_pmu.c b/drivers/perf/arm_smmuv3_pmu.c
index d704ecc..f01a57e 100644
--- a/drivers/perf/arm_smmuv3_pmu.c
+++ b/drivers/perf/arm_smmuv3_pmu.c
@@ -771,7 +771,7 @@ static int smmu_pmu_probe(struct platform_device *pdev)
smmu_pmu->reloc_base = smmu_pmu->reg_base;
}
- irq = platform_get_irq(pdev, 0);
+ irq = platform_get_irq_optional(pdev, 0);
if (irq > 0)
smmu_pmu->irq = irq;
diff --git a/drivers/perf/fsl_imx8_ddr_perf.c b/drivers/perf/fsl_imx8_ddr_perf.c
index 95dca2c..90884d1 100644
--- a/drivers/perf/fsl_imx8_ddr_perf.c
+++ b/drivers/perf/fsl_imx8_ddr_perf.c
@@ -388,9 +388,10 @@ static void ddr_perf_counter_enable(struct ddr_pmu *pmu, int config,
if (enable) {
/*
- * must disable first, then enable again
- * otherwise, cycle counter will not work
- * if previous state is enabled.
+ * cycle counter is special which should firstly write 0 then
+ * write 1 into CLEAR bit to clear it. Other counters only
+ * need write 0 into CLEAR bit and it turns out to be 1 by
+ * hardware. Below enable flow is harmless for all counters.
*/
writel(0, pmu->base + reg);
val = CNTL_EN | CNTL_CLEAR;
@@ -398,7 +399,8 @@ static void ddr_perf_counter_enable(struct ddr_pmu *pmu, int config,
writel(val, pmu->base + reg);
} else {
/* Disable counter */
- writel(0, pmu->base + reg);
+ val = readl_relaxed(pmu->base + reg) & CNTL_EN_MASK;
+ writel(val, pmu->base + reg);
}
}
diff --git a/drivers/phy/allwinner/phy-sun50i-usb3.c b/drivers/phy/allwinner/phy-sun50i-usb3.c
index 1169f3e..b1c04f71 100644
--- a/drivers/phy/allwinner/phy-sun50i-usb3.c
+++ b/drivers/phy/allwinner/phy-sun50i-usb3.c
@@ -49,7 +49,7 @@
#define SUNXI_LOS_BIAS(n) ((n) << 3)
#define SUNXI_LOS_BIAS_MASK GENMASK(5, 3)
#define SUNXI_TXVBOOSTLVL(n) ((n) << 0)
-#define SUNXI_TXVBOOSTLVL_MASK GENMASK(0, 2)
+#define SUNXI_TXVBOOSTLVL_MASK GENMASK(2, 0)
struct sun50i_usb3_phy {
struct phy *phy;
diff --git a/drivers/phy/broadcom/phy-brcm-sata.c b/drivers/phy/broadcom/phy-brcm-sata.c
index 4710cfc..18251f2 100644
--- a/drivers/phy/broadcom/phy-brcm-sata.c
+++ b/drivers/phy/broadcom/phy-brcm-sata.c
@@ -186,29 +186,6 @@ enum sata_phy_ctrl_regs {
PHY_CTRL_1_RESET = BIT(0),
};
-static inline void __iomem *brcm_sata_pcb_base(struct brcm_sata_port *port)
-{
- struct brcm_sata_phy *priv = port->phy_priv;
- u32 size = 0;
-
- switch (priv->version) {
- case BRCM_SATA_PHY_STB_16NM:
- case BRCM_SATA_PHY_STB_28NM:
- case BRCM_SATA_PHY_IPROC_NS2:
- case BRCM_SATA_PHY_DSL_28NM:
- size = SATA_PCB_REG_28NM_SPACE_SIZE;
- break;
- case BRCM_SATA_PHY_STB_40NM:
- size = SATA_PCB_REG_40NM_SPACE_SIZE;
- break;
- default:
- dev_err(priv->dev, "invalid phy version\n");
- break;
- }
-
- return priv->phy_base + (port->portnum * size);
-}
-
static inline void __iomem *brcm_sata_ctrl_base(struct brcm_sata_port *port)
{
struct brcm_sata_phy *priv = port->phy_priv;
@@ -226,19 +203,34 @@ static inline void __iomem *brcm_sata_ctrl_base(struct brcm_sata_port *port)
return priv->ctrl_base + (port->portnum * size);
}
-static void brcm_sata_phy_wr(void __iomem *pcb_base, u32 bank,
+static void brcm_sata_phy_wr(struct brcm_sata_port *port, u32 bank,
u32 ofs, u32 msk, u32 value)
{
+ struct brcm_sata_phy *priv = port->phy_priv;
+ void __iomem *pcb_base = priv->phy_base;
u32 tmp;
+ if (priv->version == BRCM_SATA_PHY_STB_40NM)
+ bank += (port->portnum * SATA_PCB_REG_40NM_SPACE_SIZE);
+ else
+ pcb_base += (port->portnum * SATA_PCB_REG_28NM_SPACE_SIZE);
+
writel(bank, pcb_base + SATA_PCB_BANK_OFFSET);
tmp = readl(pcb_base + SATA_PCB_REG_OFFSET(ofs));
tmp = (tmp & msk) | value;
writel(tmp, pcb_base + SATA_PCB_REG_OFFSET(ofs));
}
-static u32 brcm_sata_phy_rd(void __iomem *pcb_base, u32 bank, u32 ofs)
+static u32 brcm_sata_phy_rd(struct brcm_sata_port *port, u32 bank, u32 ofs)
{
+ struct brcm_sata_phy *priv = port->phy_priv;
+ void __iomem *pcb_base = priv->phy_base;
+
+ if (priv->version == BRCM_SATA_PHY_STB_40NM)
+ bank += (port->portnum * SATA_PCB_REG_40NM_SPACE_SIZE);
+ else
+ pcb_base += (port->portnum * SATA_PCB_REG_28NM_SPACE_SIZE);
+
writel(bank, pcb_base + SATA_PCB_BANK_OFFSET);
return readl(pcb_base + SATA_PCB_REG_OFFSET(ofs));
}
@@ -250,16 +242,15 @@ static u32 brcm_sata_phy_rd(void __iomem *pcb_base, u32 bank, u32 ofs)
static void brcm_stb_sata_ssc_init(struct brcm_sata_port *port)
{
- void __iomem *base = brcm_sata_pcb_base(port);
struct brcm_sata_phy *priv = port->phy_priv;
u32 tmp;
/* override the TX spread spectrum setting */
tmp = TXPMD_CONTROL1_TX_SSC_EN_FRC_VAL | TXPMD_CONTROL1_TX_SSC_EN_FRC;
- brcm_sata_phy_wr(base, TXPMD_REG_BANK, TXPMD_CONTROL1, ~tmp, tmp);
+ brcm_sata_phy_wr(port, TXPMD_REG_BANK, TXPMD_CONTROL1, ~tmp, tmp);
/* set fixed min freq */
- brcm_sata_phy_wr(base, TXPMD_REG_BANK, TXPMD_TX_FREQ_CTRL_CONTROL2,
+ brcm_sata_phy_wr(port, TXPMD_REG_BANK, TXPMD_TX_FREQ_CTRL_CONTROL2,
~TXPMD_TX_FREQ_CTRL_CONTROL2_FMIN_MASK,
STB_FMIN_VAL_DEFAULT);
@@ -271,7 +262,7 @@ static void brcm_stb_sata_ssc_init(struct brcm_sata_port *port)
tmp = STB_FMAX_VAL_DEFAULT;
}
- brcm_sata_phy_wr(base, TXPMD_REG_BANK, TXPMD_TX_FREQ_CTRL_CONTROL3,
+ brcm_sata_phy_wr(port, TXPMD_REG_BANK, TXPMD_TX_FREQ_CTRL_CONTROL3,
~TXPMD_TX_FREQ_CTRL_CONTROL3_FMAX_MASK, tmp);
}
@@ -280,7 +271,6 @@ static void brcm_stb_sata_ssc_init(struct brcm_sata_port *port)
static int brcm_stb_sata_rxaeq_init(struct brcm_sata_port *port)
{
- void __iomem *base = brcm_sata_pcb_base(port);
u32 tmp = 0, reg = 0;
switch (port->rxaeq_mode) {
@@ -301,8 +291,8 @@ static int brcm_stb_sata_rxaeq_init(struct brcm_sata_port *port)
break;
}
- brcm_sata_phy_wr(base, AEQRX_REG_BANK_0, reg, ~tmp, tmp);
- brcm_sata_phy_wr(base, AEQRX_REG_BANK_1, reg, ~tmp, tmp);
+ brcm_sata_phy_wr(port, AEQRX_REG_BANK_0, reg, ~tmp, tmp);
+ brcm_sata_phy_wr(port, AEQRX_REG_BANK_1, reg, ~tmp, tmp);
return 0;
}
@@ -316,18 +306,17 @@ static int brcm_stb_sata_init(struct brcm_sata_port *port)
static int brcm_stb_sata_16nm_ssc_init(struct brcm_sata_port *port)
{
- void __iomem *base = brcm_sata_pcb_base(port);
u32 tmp, value;
/* Reduce CP tail current to 1/16th of its default value */
- brcm_sata_phy_wr(base, PLL1_REG_BANK, PLL1_ACTRL6, 0, 0x141);
+ brcm_sata_phy_wr(port, PLL1_REG_BANK, PLL1_ACTRL6, 0, 0x141);
/* Turn off CP tail current boost */
- brcm_sata_phy_wr(base, PLL1_REG_BANK, PLL1_ACTRL8, 0, 0xc006);
+ brcm_sata_phy_wr(port, PLL1_REG_BANK, PLL1_ACTRL8, 0, 0xc006);
/* Set a specific AEQ equalizer value */
tmp = AEQ_FRC_EQ_FORCE_VAL | AEQ_FRC_EQ_FORCE;
- brcm_sata_phy_wr(base, AEQRX_REG_BANK_0, AEQ_FRC_EQ,
+ brcm_sata_phy_wr(port, AEQRX_REG_BANK_0, AEQ_FRC_EQ,
~(tmp | AEQ_RFZ_FRC_VAL |
AEQ_FRC_EQ_VAL_MASK << AEQ_FRC_EQ_VAL_SHIFT),
tmp | 32 << AEQ_FRC_EQ_VAL_SHIFT);
@@ -337,7 +326,7 @@ static int brcm_stb_sata_16nm_ssc_init(struct brcm_sata_port *port)
value = 0x52;
else
value = 0;
- brcm_sata_phy_wr(base, RXPMD_REG_BANK, RXPMD_RX_CDR_CONTROL1,
+ brcm_sata_phy_wr(port, RXPMD_REG_BANK, RXPMD_RX_CDR_CONTROL1,
~RXPMD_RX_PPM_VAL_MASK, value);
/* Set proportional loop bandwith Gen1/2/3 */
@@ -352,7 +341,7 @@ static int brcm_stb_sata_16nm_ssc_init(struct brcm_sata_port *port)
value = 1 << RXPMD_G1_CDR_PROP_BW_SHIFT |
1 << RXPMD_G2_CDR_PROP_BW_SHIFT |
1 << RXPMD_G3_CDR_PROB_BW_SHIFT;
- brcm_sata_phy_wr(base, RXPMD_REG_BANK, RXPMD_RX_CDR_CDR_PROP_BW, ~tmp,
+ brcm_sata_phy_wr(port, RXPMD_REG_BANK, RXPMD_RX_CDR_CDR_PROP_BW, ~tmp,
value);
/* Set CDR integral loop acquisition bandwidth for Gen1/2/3 */
@@ -365,7 +354,7 @@ static int brcm_stb_sata_16nm_ssc_init(struct brcm_sata_port *port)
1 << RXPMD_G3_CDR_ACQ_INT_BW_SHIFT;
else
value = 0;
- brcm_sata_phy_wr(base, RXPMD_REG_BANK, RXPMD_RX_CDR_CDR_ACQ_INTEG_BW,
+ brcm_sata_phy_wr(port, RXPMD_REG_BANK, RXPMD_RX_CDR_CDR_ACQ_INTEG_BW,
~tmp, value);
/* Set CDR integral loop locking bandwidth to 1 for Gen 1/2/3 */
@@ -378,7 +367,7 @@ static int brcm_stb_sata_16nm_ssc_init(struct brcm_sata_port *port)
1 << RXPMD_G3_CDR_LOCK_INT_BW_SHIFT;
else
value = 0;
- brcm_sata_phy_wr(base, RXPMD_REG_BANK, RXPMD_RX_CDR_CDR_LOCK_INTEG_BW,
+ brcm_sata_phy_wr(port, RXPMD_REG_BANK, RXPMD_RX_CDR_CDR_LOCK_INTEG_BW,
~tmp, value);
/* Set no guard band and clamp CDR */
@@ -387,11 +376,11 @@ static int brcm_stb_sata_16nm_ssc_init(struct brcm_sata_port *port)
value = 0x51;
else
value = 0;
- brcm_sata_phy_wr(base, RXPMD_REG_BANK, RXPMD_RX_FREQ_MON_CONTROL1,
+ brcm_sata_phy_wr(port, RXPMD_REG_BANK, RXPMD_RX_FREQ_MON_CONTROL1,
~tmp, RXPMD_MON_CORRECT_EN | value);
/* Turn on/off SSC */
- brcm_sata_phy_wr(base, TX_REG_BANK, TX_ACTRL5, ~TX_ACTRL5_SSC_EN,
+ brcm_sata_phy_wr(port, TX_REG_BANK, TX_ACTRL5, ~TX_ACTRL5_SSC_EN,
port->ssc_en ? TX_ACTRL5_SSC_EN : 0);
return 0;
@@ -411,7 +400,6 @@ static int brcm_ns2_sata_init(struct brcm_sata_port *port)
{
int try;
unsigned int val;
- void __iomem *base = brcm_sata_pcb_base(port);
void __iomem *ctrl_base = brcm_sata_ctrl_base(port);
struct device *dev = port->phy_priv->dev;
@@ -421,24 +409,24 @@ static int brcm_ns2_sata_init(struct brcm_sata_port *port)
val |= (0x4 << OOB_CTRL1_BURST_MIN_SHIFT);
val |= (0x9 << OOB_CTRL1_WAKE_IDLE_MAX_SHIFT);
val |= (0x3 << OOB_CTRL1_WAKE_IDLE_MIN_SHIFT);
- brcm_sata_phy_wr(base, OOB_REG_BANK, OOB_CTRL1, 0x0, val);
+ brcm_sata_phy_wr(port, OOB_REG_BANK, OOB_CTRL1, 0x0, val);
val = 0x0;
val |= (0x1b << OOB_CTRL2_RESET_IDLE_MAX_SHIFT);
val |= (0x2 << OOB_CTRL2_BURST_CNT_SHIFT);
val |= (0x9 << OOB_CTRL2_RESET_IDLE_MIN_SHIFT);
- brcm_sata_phy_wr(base, OOB_REG_BANK, OOB_CTRL2, 0x0, val);
+ brcm_sata_phy_wr(port, OOB_REG_BANK, OOB_CTRL2, 0x0, val);
/* Configure PHY PLL register bank 1 */
val = NS2_PLL1_ACTRL2_MAGIC;
- brcm_sata_phy_wr(base, PLL1_REG_BANK, PLL1_ACTRL2, 0x0, val);
+ brcm_sata_phy_wr(port, PLL1_REG_BANK, PLL1_ACTRL2, 0x0, val);
val = NS2_PLL1_ACTRL3_MAGIC;
- brcm_sata_phy_wr(base, PLL1_REG_BANK, PLL1_ACTRL3, 0x0, val);
+ brcm_sata_phy_wr(port, PLL1_REG_BANK, PLL1_ACTRL3, 0x0, val);
val = NS2_PLL1_ACTRL4_MAGIC;
- brcm_sata_phy_wr(base, PLL1_REG_BANK, PLL1_ACTRL4, 0x0, val);
+ brcm_sata_phy_wr(port, PLL1_REG_BANK, PLL1_ACTRL4, 0x0, val);
/* Configure PHY BLOCK0 register bank */
/* Set oob_clk_sel to refclk/2 */
- brcm_sata_phy_wr(base, BLOCK0_REG_BANK, BLOCK0_SPARE,
+ brcm_sata_phy_wr(port, BLOCK0_REG_BANK, BLOCK0_SPARE,
~BLOCK0_SPARE_OOB_CLK_SEL_MASK,
BLOCK0_SPARE_OOB_CLK_SEL_REFBY2);
@@ -451,7 +439,7 @@ static int brcm_ns2_sata_init(struct brcm_sata_port *port)
/* Wait for PHY PLL lock by polling pll_lock bit */
try = 50;
while (try) {
- val = brcm_sata_phy_rd(base, BLOCK0_REG_BANK,
+ val = brcm_sata_phy_rd(port, BLOCK0_REG_BANK,
BLOCK0_XGXSSTATUS);
if (val & BLOCK0_XGXSSTATUS_PLL_LOCK)
break;
@@ -471,9 +459,7 @@ static int brcm_ns2_sata_init(struct brcm_sata_port *port)
static int brcm_nsp_sata_init(struct brcm_sata_port *port)
{
- struct brcm_sata_phy *priv = port->phy_priv;
struct device *dev = port->phy_priv->dev;
- void __iomem *base = priv->phy_base;
unsigned int oob_bank;
unsigned int val, try;
@@ -490,36 +476,36 @@ static int brcm_nsp_sata_init(struct brcm_sata_port *port)
val |= (0x06 << OOB_CTRL1_BURST_MIN_SHIFT);
val |= (0x0f << OOB_CTRL1_WAKE_IDLE_MAX_SHIFT);
val |= (0x06 << OOB_CTRL1_WAKE_IDLE_MIN_SHIFT);
- brcm_sata_phy_wr(base, oob_bank, OOB_CTRL1, 0x0, val);
+ brcm_sata_phy_wr(port, oob_bank, OOB_CTRL1, 0x0, val);
val = 0x0;
val |= (0x2e << OOB_CTRL2_RESET_IDLE_MAX_SHIFT);
val |= (0x02 << OOB_CTRL2_BURST_CNT_SHIFT);
val |= (0x16 << OOB_CTRL2_RESET_IDLE_MIN_SHIFT);
- brcm_sata_phy_wr(base, oob_bank, OOB_CTRL2, 0x0, val);
+ brcm_sata_phy_wr(port, oob_bank, OOB_CTRL2, 0x0, val);
- brcm_sata_phy_wr(base, PLL_REG_BANK_0, PLL_ACTRL2,
+ brcm_sata_phy_wr(port, PLL_REG_BANK_0, PLL_ACTRL2,
~(PLL_ACTRL2_SELDIV_MASK << PLL_ACTRL2_SELDIV_SHIFT),
0x0c << PLL_ACTRL2_SELDIV_SHIFT);
- brcm_sata_phy_wr(base, PLL_REG_BANK_0, PLL_CAP_CONTROL,
+ brcm_sata_phy_wr(port, PLL_REG_BANK_0, PLL_CAP_CONTROL,
0xff0, 0x4f0);
val = PLLCONTROL_0_FREQ_DET_RESTART | PLLCONTROL_0_FREQ_MONITOR;
- brcm_sata_phy_wr(base, PLL_REG_BANK_0, PLL_REG_BANK_0_PLLCONTROL_0,
+ brcm_sata_phy_wr(port, PLL_REG_BANK_0, PLL_REG_BANK_0_PLLCONTROL_0,
~val, val);
val = PLLCONTROL_0_SEQ_START;
- brcm_sata_phy_wr(base, PLL_REG_BANK_0, PLL_REG_BANK_0_PLLCONTROL_0,
+ brcm_sata_phy_wr(port, PLL_REG_BANK_0, PLL_REG_BANK_0_PLLCONTROL_0,
~val, 0);
mdelay(10);
- brcm_sata_phy_wr(base, PLL_REG_BANK_0, PLL_REG_BANK_0_PLLCONTROL_0,
+ brcm_sata_phy_wr(port, PLL_REG_BANK_0, PLL_REG_BANK_0_PLLCONTROL_0,
~val, val);
/* Wait for pll_seq_done bit */
try = 50;
while (--try) {
- val = brcm_sata_phy_rd(base, BLOCK0_REG_BANK,
+ val = brcm_sata_phy_rd(port, BLOCK0_REG_BANK,
BLOCK0_XGXSSTATUS);
if (val & BLOCK0_XGXSSTATUS_PLL_LOCK)
break;
@@ -546,27 +532,25 @@ static int brcm_nsp_sata_init(struct brcm_sata_port *port)
static int brcm_sr_sata_init(struct brcm_sata_port *port)
{
- struct brcm_sata_phy *priv = port->phy_priv;
struct device *dev = port->phy_priv->dev;
- void __iomem *base = priv->phy_base;
unsigned int val, try;
/* Configure PHY PLL register bank 1 */
val = SR_PLL1_ACTRL2_MAGIC;
- brcm_sata_phy_wr(base, PLL1_REG_BANK, PLL1_ACTRL2, 0x0, val);
+ brcm_sata_phy_wr(port, PLL1_REG_BANK, PLL1_ACTRL2, 0x0, val);
val = SR_PLL1_ACTRL3_MAGIC;
- brcm_sata_phy_wr(base, PLL1_REG_BANK, PLL1_ACTRL3, 0x0, val);
+ brcm_sata_phy_wr(port, PLL1_REG_BANK, PLL1_ACTRL3, 0x0, val);
val = SR_PLL1_ACTRL4_MAGIC;
- brcm_sata_phy_wr(base, PLL1_REG_BANK, PLL1_ACTRL4, 0x0, val);
+ brcm_sata_phy_wr(port, PLL1_REG_BANK, PLL1_ACTRL4, 0x0, val);
/* Configure PHY PLL register bank 0 */
val = SR_PLL0_ACTRL6_MAGIC;
- brcm_sata_phy_wr(base, PLL_REG_BANK_0, PLL_ACTRL6, 0x0, val);
+ brcm_sata_phy_wr(port, PLL_REG_BANK_0, PLL_ACTRL6, 0x0, val);
/* Wait for PHY PLL lock by polling pll_lock bit */
try = 50;
do {
- val = brcm_sata_phy_rd(base, BLOCK0_REG_BANK,
+ val = brcm_sata_phy_rd(port, BLOCK0_REG_BANK,
BLOCK0_XGXSSTATUS);
if (val & BLOCK0_XGXSSTATUS_PLL_LOCK)
break;
@@ -581,7 +565,7 @@ static int brcm_sr_sata_init(struct brcm_sata_port *port)
}
/* Invert Tx polarity */
- brcm_sata_phy_wr(base, TX_REG_BANK, TX_ACTRL0,
+ brcm_sata_phy_wr(port, TX_REG_BANK, TX_ACTRL0,
~TX_ACTRL0_TXPOL_FLIP, TX_ACTRL0_TXPOL_FLIP);
/* Configure OOB control to handle 100MHz reference clock */
@@ -589,52 +573,51 @@ static int brcm_sr_sata_init(struct brcm_sata_port *port)
(0x4 << OOB_CTRL1_BURST_MIN_SHIFT) |
(0x8 << OOB_CTRL1_WAKE_IDLE_MAX_SHIFT) |
(0x3 << OOB_CTRL1_WAKE_IDLE_MIN_SHIFT));
- brcm_sata_phy_wr(base, OOB_REG_BANK, OOB_CTRL1, 0x0, val);
+ brcm_sata_phy_wr(port, OOB_REG_BANK, OOB_CTRL1, 0x0, val);
val = ((0x1b << OOB_CTRL2_RESET_IDLE_MAX_SHIFT) |
(0x2 << OOB_CTRL2_BURST_CNT_SHIFT) |
(0x9 << OOB_CTRL2_RESET_IDLE_MIN_SHIFT));
- brcm_sata_phy_wr(base, OOB_REG_BANK, OOB_CTRL2, 0x0, val);
+ brcm_sata_phy_wr(port, OOB_REG_BANK, OOB_CTRL2, 0x0, val);
return 0;
}
static int brcm_dsl_sata_init(struct brcm_sata_port *port)
{
- void __iomem *base = brcm_sata_pcb_base(port);
struct device *dev = port->phy_priv->dev;
unsigned int try;
u32 tmp;
- brcm_sata_phy_wr(base, PLL1_REG_BANK, PLL1_ACTRL7, 0, 0x873);
+ brcm_sata_phy_wr(port, PLL1_REG_BANK, PLL1_ACTRL7, 0, 0x873);
- brcm_sata_phy_wr(base, PLL1_REG_BANK, PLL1_ACTRL6, 0, 0xc000);
+ brcm_sata_phy_wr(port, PLL1_REG_BANK, PLL1_ACTRL6, 0, 0xc000);
- brcm_sata_phy_wr(base, PLL_REG_BANK_0, PLL_REG_BANK_0_PLLCONTROL_0,
+ brcm_sata_phy_wr(port, PLL_REG_BANK_0, PLL_REG_BANK_0_PLLCONTROL_0,
0, 0x3089);
usleep_range(1000, 2000);
- brcm_sata_phy_wr(base, PLL_REG_BANK_0, PLL_REG_BANK_0_PLLCONTROL_0,
+ brcm_sata_phy_wr(port, PLL_REG_BANK_0, PLL_REG_BANK_0_PLLCONTROL_0,
0, 0x3088);
usleep_range(1000, 2000);
- brcm_sata_phy_wr(base, AEQRX_REG_BANK_1, AEQRX_SLCAL0_CTRL0,
+ brcm_sata_phy_wr(port, AEQRX_REG_BANK_1, AEQRX_SLCAL0_CTRL0,
0, 0x3000);
- brcm_sata_phy_wr(base, AEQRX_REG_BANK_1, AEQRX_SLCAL1_CTRL0,
+ brcm_sata_phy_wr(port, AEQRX_REG_BANK_1, AEQRX_SLCAL1_CTRL0,
0, 0x3000);
usleep_range(1000, 2000);
- brcm_sata_phy_wr(base, PLL_REG_BANK_0, PLL_CAP_CHARGE_TIME, 0, 0x32);
+ brcm_sata_phy_wr(port, PLL_REG_BANK_0, PLL_CAP_CHARGE_TIME, 0, 0x32);
- brcm_sata_phy_wr(base, PLL_REG_BANK_0, PLL_VCO_CAL_THRESH, 0, 0xa);
+ brcm_sata_phy_wr(port, PLL_REG_BANK_0, PLL_VCO_CAL_THRESH, 0, 0xa);
- brcm_sata_phy_wr(base, PLL_REG_BANK_0, PLL_FREQ_DET_TIME, 0, 0x64);
+ brcm_sata_phy_wr(port, PLL_REG_BANK_0, PLL_FREQ_DET_TIME, 0, 0x64);
usleep_range(1000, 2000);
/* Acquire PLL lock */
try = 50;
while (try) {
- tmp = brcm_sata_phy_rd(base, BLOCK0_REG_BANK,
+ tmp = brcm_sata_phy_rd(port, BLOCK0_REG_BANK,
BLOCK0_XGXSSTATUS);
if (tmp & BLOCK0_XGXSSTATUS_PLL_LOCK)
break;
@@ -687,10 +670,9 @@ static int brcm_sata_phy_init(struct phy *phy)
static void brcm_stb_sata_calibrate(struct brcm_sata_port *port)
{
- void __iomem *base = brcm_sata_pcb_base(port);
u32 tmp = BIT(8);
- brcm_sata_phy_wr(base, RXPMD_REG_BANK, RXPMD_RX_FREQ_MON_CONTROL1,
+ brcm_sata_phy_wr(port, RXPMD_REG_BANK, RXPMD_RX_FREQ_MON_CONTROL1,
~tmp, tmp);
}
diff --git a/drivers/phy/motorola/phy-mapphone-mdm6600.c b/drivers/phy/motorola/phy-mapphone-mdm6600.c
index f20524f..94a34cf 100644
--- a/drivers/phy/motorola/phy-mapphone-mdm6600.c
+++ b/drivers/phy/motorola/phy-mapphone-mdm6600.c
@@ -20,6 +20,7 @@
#define PHY_MDM6600_PHY_DELAY_MS 4000 /* PHY enable 2.2s to 3.5s */
#define PHY_MDM6600_ENABLED_DELAY_MS 8000 /* 8s more total for MDM6600 */
+#define PHY_MDM6600_WAKE_KICK_MS 600 /* time on after GPIO toggle */
#define MDM6600_MODEM_IDLE_DELAY_MS 1000 /* modem after USB suspend */
#define MDM6600_MODEM_WAKE_DELAY_MS 200 /* modem response after idle */
@@ -243,10 +244,24 @@ static irqreturn_t phy_mdm6600_wakeirq_thread(int irq, void *data)
{
struct phy_mdm6600 *ddata = data;
struct gpio_desc *mode_gpio1;
+ int error, wakeup;
mode_gpio1 = ddata->mode_gpios->desc[PHY_MDM6600_MODE1];
- dev_dbg(ddata->dev, "OOB wake on mode_gpio1: %i\n",
- gpiod_get_value(mode_gpio1));
+ wakeup = gpiod_get_value(mode_gpio1);
+ if (!wakeup)
+ return IRQ_NONE;
+
+ dev_dbg(ddata->dev, "OOB wake on mode_gpio1: %i\n", wakeup);
+ error = pm_runtime_get_sync(ddata->dev);
+ if (error < 0) {
+ pm_runtime_put_noidle(ddata->dev);
+
+ return IRQ_NONE;
+ }
+
+ /* Just wake-up and kick the autosuspend timer */
+ pm_runtime_mark_last_busy(ddata->dev);
+ pm_runtime_put_autosuspend(ddata->dev);
return IRQ_HANDLED;
}
@@ -496,8 +511,14 @@ static void phy_mdm6600_modem_wake(struct work_struct *work)
ddata = container_of(work, struct phy_mdm6600, modem_wake_work.work);
phy_mdm6600_wake_modem(ddata);
+
+ /*
+ * The modem does not always stay awake 1.2 seconds after toggling
+ * the wake GPIO, and sometimes it idles after about some 600 ms
+ * making writes time out.
+ */
schedule_delayed_work(&ddata->modem_wake_work,
- msecs_to_jiffies(MDM6600_MODEM_IDLE_DELAY_MS));
+ msecs_to_jiffies(PHY_MDM6600_WAKE_KICK_MS));
}
static int __maybe_unused phy_mdm6600_runtime_suspend(struct device *dev)
diff --git a/drivers/phy/phy-core.c b/drivers/phy/phy-core.c
index cd5a6c9..a27b8d5 100644
--- a/drivers/phy/phy-core.c
+++ b/drivers/phy/phy-core.c
@@ -688,11 +688,9 @@ struct phy *phy_get(struct device *dev, const char *string)
get_device(&phy->dev);
link = device_link_add(dev, &phy->dev, DL_FLAG_STATELESS);
- if (!link) {
- dev_err(dev, "failed to create device link to %s\n",
+ if (!link)
+ dev_dbg(dev, "failed to create device link to %s\n",
dev_name(phy->dev.parent));
- return ERR_PTR(-EINVAL);
- }
return phy;
}
@@ -803,11 +801,9 @@ struct phy *devm_of_phy_get(struct device *dev, struct device_node *np,
}
link = device_link_add(dev, &phy->dev, DL_FLAG_STATELESS);
- if (!link) {
- dev_err(dev, "failed to create device link to %s\n",
+ if (!link)
+ dev_dbg(dev, "failed to create device link to %s\n",
dev_name(phy->dev.parent));
- return ERR_PTR(-EINVAL);
- }
return phy;
}
@@ -852,11 +848,9 @@ struct phy *devm_of_phy_get_by_index(struct device *dev, struct device_node *np,
devres_add(dev, ptr);
link = device_link_add(dev, &phy->dev, DL_FLAG_STATELESS);
- if (!link) {
- dev_err(dev, "failed to create device link to %s\n",
+ if (!link)
+ dev_dbg(dev, "failed to create device link to %s\n",
dev_name(phy->dev.parent));
- return ERR_PTR(-EINVAL);
- }
return phy;
}
diff --git a/drivers/phy/ti/phy-gmii-sel.c b/drivers/phy/ti/phy-gmii-sel.c
index a28bd15..1c536fc 100644
--- a/drivers/phy/ti/phy-gmii-sel.c
+++ b/drivers/phy/ti/phy-gmii-sel.c
@@ -80,20 +80,20 @@ static int phy_gmii_sel_mode(struct phy *phy, enum phy_mode mode, int submode)
break;
case PHY_INTERFACE_MODE_MII:
- mode = AM33XX_GMII_SEL_MODE_MII;
+ case PHY_INTERFACE_MODE_GMII:
+ gmii_sel_mode = AM33XX_GMII_SEL_MODE_MII;
break;
default:
- dev_warn(dev,
- "port%u: unsupported mode: \"%s\". Defaulting to MII.\n",
- if_phy->id, phy_modes(rgmii_id));
+ dev_warn(dev, "port%u: unsupported mode: \"%s\"\n",
+ if_phy->id, phy_modes(submode));
return -EINVAL;
}
if_phy->phy_if_mode = submode;
dev_dbg(dev, "%s id:%u mode:%u rgmii_id:%d rmii_clk_ext:%d\n",
- __func__, if_phy->id, mode, rgmii_id,
+ __func__, if_phy->id, submode, rgmii_id,
if_phy->rmii_clock_external);
regfield = if_phy->fields[PHY_GMII_SEL_PORT_MODE];
diff --git a/drivers/pinctrl/cirrus/pinctrl-madera-core.c b/drivers/pinctrl/cirrus/pinctrl-madera-core.c
index 7b6409e..dce2626 100644
--- a/drivers/pinctrl/cirrus/pinctrl-madera-core.c
+++ b/drivers/pinctrl/cirrus/pinctrl-madera-core.c
@@ -1073,13 +1073,26 @@ static int madera_pin_probe(struct platform_device *pdev)
return ret;
}
+ platform_set_drvdata(pdev, priv);
+
dev_dbg(priv->dev, "pinctrl probed ok\n");
return 0;
}
+static int madera_pin_remove(struct platform_device *pdev)
+{
+ struct madera_pin_private *priv = platform_get_drvdata(pdev);
+
+ if (priv->madera->pdata.gpio_configs)
+ pinctrl_unregister_mappings(priv->madera->pdata.gpio_configs);
+
+ return 0;
+}
+
static struct platform_driver madera_pin_driver = {
.probe = madera_pin_probe,
+ .remove = madera_pin_remove,
.driver = {
.name = "madera-pinctrl",
},
diff --git a/drivers/pinctrl/core.c b/drivers/pinctrl/core.c
index 446d84f..f23c55e 100644
--- a/drivers/pinctrl/core.c
+++ b/drivers/pinctrl/core.c
@@ -2021,7 +2021,6 @@ static int pinctrl_claim_hogs(struct pinctrl_dev *pctldev)
return PTR_ERR(pctldev->p);
}
- kref_get(&pctldev->p->users);
pctldev->hog_default =
pinctrl_lookup_state(pctldev->p, PINCTRL_STATE_DEFAULT);
if (IS_ERR(pctldev->hog_default)) {
diff --git a/drivers/pinctrl/freescale/pinctrl-scu.c b/drivers/pinctrl/freescale/pinctrl-scu.c
index 73bf1d9..23cf04b 100644
--- a/drivers/pinctrl/freescale/pinctrl-scu.c
+++ b/drivers/pinctrl/freescale/pinctrl-scu.c
@@ -23,12 +23,12 @@ struct imx_sc_msg_req_pad_set {
struct imx_sc_rpc_msg hdr;
u32 val;
u16 pad;
-} __packed;
+} __packed __aligned(4);
struct imx_sc_msg_req_pad_get {
struct imx_sc_rpc_msg hdr;
u16 pad;
-} __packed;
+} __packed __aligned(4);
struct imx_sc_msg_resp_pad_get {
struct imx_sc_rpc_msg hdr;
diff --git a/drivers/pinctrl/meson/pinctrl-meson-gxl.c b/drivers/pinctrl/meson/pinctrl-meson-gxl.c
index 1b6e864..2ac921c8 100644
--- a/drivers/pinctrl/meson/pinctrl-meson-gxl.c
+++ b/drivers/pinctrl/meson/pinctrl-meson-gxl.c
@@ -147,8 +147,8 @@ static const unsigned int sdio_d0_pins[] = { GPIOX_0 };
static const unsigned int sdio_d1_pins[] = { GPIOX_1 };
static const unsigned int sdio_d2_pins[] = { GPIOX_2 };
static const unsigned int sdio_d3_pins[] = { GPIOX_3 };
-static const unsigned int sdio_cmd_pins[] = { GPIOX_4 };
-static const unsigned int sdio_clk_pins[] = { GPIOX_5 };
+static const unsigned int sdio_clk_pins[] = { GPIOX_4 };
+static const unsigned int sdio_cmd_pins[] = { GPIOX_5 };
static const unsigned int sdio_irq_pins[] = { GPIOX_7 };
static const unsigned int nand_ce0_pins[] = { BOOT_8 };
diff --git a/drivers/pinctrl/pinctrl-falcon.c b/drivers/pinctrl/pinctrl-falcon.c
index a454f57..62c02b9 100644
--- a/drivers/pinctrl/pinctrl-falcon.c
+++ b/drivers/pinctrl/pinctrl-falcon.c
@@ -451,7 +451,7 @@ static int pinctrl_falcon_probe(struct platform_device *pdev)
falcon_info.clk[*bank] = clk_get(&ppdev->dev, NULL);
if (IS_ERR(falcon_info.clk[*bank])) {
dev_err(&ppdev->dev, "failed to get clock\n");
- of_node_put(np)
+ of_node_put(np);
return PTR_ERR(falcon_info.clk[*bank]);
}
falcon_info.membase[*bank] = devm_ioremap_resource(&pdev->dev,
diff --git a/drivers/pinctrl/qcom/pinctrl-msm.c b/drivers/pinctrl/qcom/pinctrl-msm.c
index 9a8daa2..1a948c3 100644
--- a/drivers/pinctrl/qcom/pinctrl-msm.c
+++ b/drivers/pinctrl/qcom/pinctrl-msm.c
@@ -1104,7 +1104,6 @@ static int msm_gpio_init(struct msm_pinctrl *pctrl)
pctrl->irq_chip.irq_mask = msm_gpio_irq_mask;
pctrl->irq_chip.irq_unmask = msm_gpio_irq_unmask;
pctrl->irq_chip.irq_ack = msm_gpio_irq_ack;
- pctrl->irq_chip.irq_eoi = irq_chip_eoi_parent;
pctrl->irq_chip.irq_set_type = msm_gpio_irq_set_type;
pctrl->irq_chip.irq_set_wake = msm_gpio_irq_set_wake;
pctrl->irq_chip.irq_request_resources = msm_gpio_irq_reqres;
@@ -1118,7 +1117,7 @@ static int msm_gpio_init(struct msm_pinctrl *pctrl)
if (!chip->irq.parent_domain)
return -EPROBE_DEFER;
chip->irq.child_to_parent_hwirq = msm_gpio_wakeirq;
-
+ pctrl->irq_chip.irq_eoi = irq_chip_eoi_parent;
/*
* Let's skip handling the GPIOs, if the parent irqchip
* is handling the direct connect IRQ of the GPIO.
diff --git a/drivers/pinctrl/qcom/pinctrl-ssbi-gpio.c b/drivers/pinctrl/qcom/pinctrl-ssbi-gpio.c
index fba1d41..338a15d 100644
--- a/drivers/pinctrl/qcom/pinctrl-ssbi-gpio.c
+++ b/drivers/pinctrl/qcom/pinctrl-ssbi-gpio.c
@@ -794,7 +794,7 @@ static int pm8xxx_gpio_probe(struct platform_device *pdev)
girq->fwnode = of_node_to_fwnode(pctrl->dev->of_node);
girq->parent_domain = parent_domain;
girq->child_to_parent_hwirq = pm8xxx_child_to_parent_hwirq;
- girq->populate_parent_alloc_arg = gpiochip_populate_parent_fwspec_fourcell;
+ girq->populate_parent_alloc_arg = gpiochip_populate_parent_fwspec_twocell;
girq->child_offset_to_irq = pm8xxx_child_offset_to_irq;
girq->child_irq_domain_ops.translate = pm8xxx_domain_translate;
diff --git a/drivers/platform/chrome/wilco_ec/properties.c b/drivers/platform/chrome/wilco_ec/properties.c
index e69682c..62f2761 100644
--- a/drivers/platform/chrome/wilco_ec/properties.c
+++ b/drivers/platform/chrome/wilco_ec/properties.c
@@ -5,7 +5,7 @@
#include <linux/platform_data/wilco-ec.h>
#include <linux/string.h>
-#include <linux/unaligned/le_memmove.h>
+#include <asm/unaligned.h>
/* Operation code; what the EC should do with the property */
enum ec_property_op {
diff --git a/drivers/regulator/stm32-vrefbuf.c b/drivers/regulator/stm32-vrefbuf.c
index bdfaf7e..992bc18 100644
--- a/drivers/regulator/stm32-vrefbuf.c
+++ b/drivers/regulator/stm32-vrefbuf.c
@@ -88,7 +88,7 @@ static int stm32_vrefbuf_disable(struct regulator_dev *rdev)
}
val = readl_relaxed(priv->base + STM32_VREFBUF_CSR);
- val = (val & ~STM32_ENVR) | STM32_HIZ;
+ val &= ~STM32_ENVR;
writel_relaxed(val, priv->base + STM32_VREFBUF_CSR);
pm_runtime_mark_last_busy(priv->dev);
@@ -175,6 +175,7 @@ static const struct regulator_desc stm32_vrefbuf_regu = {
.volt_table = stm32_vrefbuf_voltages,
.n_voltages = ARRAY_SIZE(stm32_vrefbuf_voltages),
.ops = &stm32_vrefbuf_volt_ops,
+ .off_on_delay = 1000,
.type = REGULATOR_VOLTAGE,
.owner = THIS_MODULE,
};
diff --git a/drivers/reset/Kconfig b/drivers/reset/Kconfig
index 461b0e5..d9efbfd 100644
--- a/drivers/reset/Kconfig
+++ b/drivers/reset/Kconfig
@@ -51,6 +51,7 @@
config RESET_BRCMSTB_RESCAL
bool "Broadcom STB RESCAL reset controller"
+ depends on HAS_IOMEM
default ARCH_BRCMSTB || COMPILE_TEST
help
This enables the RESCAL reset controller for SATA, PCIe0, or PCIe1 on
@@ -73,7 +74,7 @@
config RESET_INTEL_GW
bool "Intel Reset Controller Driver"
- depends on OF
+ depends on OF && HAS_IOMEM
select REGMAP_MMIO
help
This enables the reset controller driver for Intel Gateway SoCs.
diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig
index 34c8b6c..8e50388 100644
--- a/drivers/rtc/Kconfig
+++ b/drivers/rtc/Kconfig
@@ -327,6 +327,7 @@
config RTC_DRV_MAX8907
tristate "Maxim MAX8907"
depends on MFD_MAX8907 || COMPILE_TEST
+ select REGMAP_IRQ
help
If you say yes here you will get support for the
RTC of Maxim MAX8907 PMIC.
diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c
index 6cca727..cf87eb2 100644
--- a/drivers/s390/block/dasd.c
+++ b/drivers/s390/block/dasd.c
@@ -178,6 +178,8 @@ struct dasd_block *dasd_alloc_block(void)
(unsigned long) block);
INIT_LIST_HEAD(&block->ccw_queue);
spin_lock_init(&block->queue_lock);
+ INIT_LIST_HEAD(&block->format_list);
+ spin_lock_init(&block->format_lock);
timer_setup(&block->timer, dasd_block_timeout, 0);
spin_lock_init(&block->profile.lock);
@@ -1779,20 +1781,26 @@ void dasd_int_handler(struct ccw_device *cdev, unsigned long intparm,
if (dasd_ese_needs_format(cqr->block, irb)) {
if (rq_data_dir((struct request *)cqr->callback_data) == READ) {
- device->discipline->ese_read(cqr);
+ device->discipline->ese_read(cqr, irb);
cqr->status = DASD_CQR_SUCCESS;
cqr->stopclk = now;
dasd_device_clear_timer(device);
dasd_schedule_device_bh(device);
return;
}
- fcqr = device->discipline->ese_format(device, cqr);
+ fcqr = device->discipline->ese_format(device, cqr, irb);
if (IS_ERR(fcqr)) {
+ if (PTR_ERR(fcqr) == -EINVAL) {
+ cqr->status = DASD_CQR_ERROR;
+ return;
+ }
/*
* If we can't format now, let the request go
* one extra round. Maybe we can format later.
*/
cqr->status = DASD_CQR_QUEUED;
+ dasd_schedule_device_bh(device);
+ return;
} else {
fcqr->status = DASD_CQR_QUEUED;
cqr->status = DASD_CQR_QUEUED;
@@ -2748,11 +2756,13 @@ static void __dasd_cleanup_cqr(struct dasd_ccw_req *cqr)
{
struct request *req;
blk_status_t error = BLK_STS_OK;
+ unsigned int proc_bytes;
int status;
req = (struct request *) cqr->callback_data;
dasd_profile_end(cqr->block, cqr, req);
+ proc_bytes = cqr->proc_bytes;
status = cqr->block->base->discipline->free_cp(cqr, req);
if (status < 0)
error = errno_to_blk_status(status);
@@ -2783,7 +2793,18 @@ static void __dasd_cleanup_cqr(struct dasd_ccw_req *cqr)
blk_mq_end_request(req, error);
blk_mq_run_hw_queues(req->q, true);
} else {
- blk_mq_complete_request(req);
+ /*
+ * Partial completed requests can happen with ESE devices.
+ * During read we might have gotten a NRF error and have to
+ * complete a request partially.
+ */
+ if (proc_bytes) {
+ blk_update_request(req, BLK_STS_OK,
+ blk_rq_bytes(req) - proc_bytes);
+ blk_mq_requeue_request(req, true);
+ } else {
+ blk_mq_complete_request(req);
+ }
}
}
diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c
index a28b9ff..ad44d22 100644
--- a/drivers/s390/block/dasd_eckd.c
+++ b/drivers/s390/block/dasd_eckd.c
@@ -207,6 +207,45 @@ static void set_ch_t(struct ch_t *geo, __u32 cyl, __u8 head)
geo->head |= head;
}
+/*
+ * calculate failing track from sense data depending if
+ * it is an EAV device or not
+ */
+static int dasd_eckd_track_from_irb(struct irb *irb, struct dasd_device *device,
+ sector_t *track)
+{
+ struct dasd_eckd_private *private = device->private;
+ u8 *sense = NULL;
+ u32 cyl;
+ u8 head;
+
+ sense = dasd_get_sense(irb);
+ if (!sense) {
+ DBF_DEV_EVENT(DBF_WARNING, device, "%s",
+ "ESE error no sense data\n");
+ return -EINVAL;
+ }
+ if (!(sense[27] & DASD_SENSE_BIT_2)) {
+ DBF_DEV_EVENT(DBF_WARNING, device, "%s",
+ "ESE error no valid track data\n");
+ return -EINVAL;
+ }
+
+ if (sense[27] & DASD_SENSE_BIT_3) {
+ /* enhanced addressing */
+ cyl = sense[30] << 20;
+ cyl |= (sense[31] & 0xF0) << 12;
+ cyl |= sense[28] << 8;
+ cyl |= sense[29];
+ } else {
+ cyl = sense[29] << 8;
+ cyl |= sense[30];
+ }
+ head = sense[31] & 0x0F;
+ *track = cyl * private->rdc_data.trk_per_cyl + head;
+ return 0;
+}
+
static int set_timestamp(struct ccw1 *ccw, struct DE_eckd_data *data,
struct dasd_device *device)
{
@@ -2986,6 +3025,37 @@ static int dasd_eckd_format_device(struct dasd_device *base,
0, NULL);
}
+static bool test_and_set_format_track(struct dasd_format_entry *to_format,
+ struct dasd_block *block)
+{
+ struct dasd_format_entry *format;
+ unsigned long flags;
+ bool rc = false;
+
+ spin_lock_irqsave(&block->format_lock, flags);
+ list_for_each_entry(format, &block->format_list, list) {
+ if (format->track == to_format->track) {
+ rc = true;
+ goto out;
+ }
+ }
+ list_add_tail(&to_format->list, &block->format_list);
+
+out:
+ spin_unlock_irqrestore(&block->format_lock, flags);
+ return rc;
+}
+
+static void clear_format_track(struct dasd_format_entry *format,
+ struct dasd_block *block)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&block->format_lock, flags);
+ list_del_init(&format->list);
+ spin_unlock_irqrestore(&block->format_lock, flags);
+}
+
/*
* Callback function to free ESE format requests.
*/
@@ -2993,15 +3063,19 @@ static void dasd_eckd_ese_format_cb(struct dasd_ccw_req *cqr, void *data)
{
struct dasd_device *device = cqr->startdev;
struct dasd_eckd_private *private = device->private;
+ struct dasd_format_entry *format = data;
+ clear_format_track(format, cqr->basedev->block);
private->count--;
dasd_ffree_request(cqr, device);
}
static struct dasd_ccw_req *
-dasd_eckd_ese_format(struct dasd_device *startdev, struct dasd_ccw_req *cqr)
+dasd_eckd_ese_format(struct dasd_device *startdev, struct dasd_ccw_req *cqr,
+ struct irb *irb)
{
struct dasd_eckd_private *private;
+ struct dasd_format_entry *format;
struct format_data_t fdata;
unsigned int recs_per_trk;
struct dasd_ccw_req *fcqr;
@@ -3011,23 +3085,39 @@ dasd_eckd_ese_format(struct dasd_device *startdev, struct dasd_ccw_req *cqr)
struct request *req;
sector_t first_trk;
sector_t last_trk;
+ sector_t curr_trk;
int rc;
req = cqr->callback_data;
- base = cqr->block->base;
+ block = cqr->block;
+ base = block->base;
private = base->private;
- block = base->block;
blksize = block->bp_block;
recs_per_trk = recs_per_track(&private->rdc_data, 0, blksize);
+ format = &startdev->format_entry;
first_trk = blk_rq_pos(req) >> block->s2b_shift;
sector_div(first_trk, recs_per_trk);
last_trk =
(blk_rq_pos(req) + blk_rq_sectors(req) - 1) >> block->s2b_shift;
sector_div(last_trk, recs_per_trk);
+ rc = dasd_eckd_track_from_irb(irb, base, &curr_trk);
+ if (rc)
+ return ERR_PTR(rc);
- fdata.start_unit = first_trk;
- fdata.stop_unit = last_trk;
+ if (curr_trk < first_trk || curr_trk > last_trk) {
+ DBF_DEV_EVENT(DBF_WARNING, startdev,
+ "ESE error track %llu not within range %llu - %llu\n",
+ curr_trk, first_trk, last_trk);
+ return ERR_PTR(-EINVAL);
+ }
+ format->track = curr_trk;
+ /* test if track is already in formatting by another thread */
+ if (test_and_set_format_track(format, block))
+ return ERR_PTR(-EEXIST);
+
+ fdata.start_unit = curr_trk;
+ fdata.stop_unit = curr_trk;
fdata.blksize = blksize;
fdata.intensity = private->uses_cdl ? DASD_FMT_INT_COMPAT : 0;
@@ -3044,6 +3134,7 @@ dasd_eckd_ese_format(struct dasd_device *startdev, struct dasd_ccw_req *cqr)
return fcqr;
fcqr->callback = dasd_eckd_ese_format_cb;
+ fcqr->callback_data = (void *) format;
return fcqr;
}
@@ -3051,29 +3142,87 @@ dasd_eckd_ese_format(struct dasd_device *startdev, struct dasd_ccw_req *cqr)
/*
* When data is read from an unformatted area of an ESE volume, this function
* returns zeroed data and thereby mimics a read of zero data.
+ *
+ * The first unformatted track is the one that got the NRF error, the address is
+ * encoded in the sense data.
+ *
+ * All tracks before have returned valid data and should not be touched.
+ * All tracks after the unformatted track might be formatted or not. This is
+ * currently not known, remember the processed data and return the remainder of
+ * the request to the blocklayer in __dasd_cleanup_cqr().
*/
-static void dasd_eckd_ese_read(struct dasd_ccw_req *cqr)
+static int dasd_eckd_ese_read(struct dasd_ccw_req *cqr, struct irb *irb)
{
+ struct dasd_eckd_private *private;
+ sector_t first_trk, last_trk;
+ sector_t first_blk, last_blk;
unsigned int blksize, off;
+ unsigned int recs_per_trk;
struct dasd_device *base;
struct req_iterator iter;
+ struct dasd_block *block;
+ unsigned int skip_block;
+ unsigned int blk_count;
struct request *req;
struct bio_vec bv;
+ sector_t curr_trk;
+ sector_t end_blk;
char *dst;
+ int rc;
req = (struct request *) cqr->callback_data;
base = cqr->block->base;
blksize = base->block->bp_block;
+ block = cqr->block;
+ private = base->private;
+ skip_block = 0;
+ blk_count = 0;
+
+ recs_per_trk = recs_per_track(&private->rdc_data, 0, blksize);
+ first_trk = first_blk = blk_rq_pos(req) >> block->s2b_shift;
+ sector_div(first_trk, recs_per_trk);
+ last_trk = last_blk =
+ (blk_rq_pos(req) + blk_rq_sectors(req) - 1) >> block->s2b_shift;
+ sector_div(last_trk, recs_per_trk);
+ rc = dasd_eckd_track_from_irb(irb, base, &curr_trk);
+ if (rc)
+ return rc;
+
+ /* sanity check if the current track from sense data is valid */
+ if (curr_trk < first_trk || curr_trk > last_trk) {
+ DBF_DEV_EVENT(DBF_WARNING, base,
+ "ESE error track %llu not within range %llu - %llu\n",
+ curr_trk, first_trk, last_trk);
+ return -EINVAL;
+ }
+
+ /*
+ * if not the first track got the NRF error we have to skip over valid
+ * blocks
+ */
+ if (curr_trk != first_trk)
+ skip_block = curr_trk * recs_per_trk - first_blk;
+
+ /* we have no information beyond the current track */
+ end_blk = (curr_trk + 1) * recs_per_trk;
rq_for_each_segment(bv, req, iter) {
dst = page_address(bv.bv_page) + bv.bv_offset;
for (off = 0; off < bv.bv_len; off += blksize) {
- if (dst && rq_data_dir(req) == READ) {
+ if (first_blk + blk_count >= end_blk) {
+ cqr->proc_bytes = blk_count * blksize;
+ return 0;
+ }
+ if (dst && !skip_block) {
dst += off;
memset(dst, 0, blksize);
+ } else {
+ skip_block--;
}
+ blk_count++;
}
}
+ return 0;
}
/*
diff --git a/drivers/s390/block/dasd_int.h b/drivers/s390/block/dasd_int.h
index 91c9f95..fa552f9 100644
--- a/drivers/s390/block/dasd_int.h
+++ b/drivers/s390/block/dasd_int.h
@@ -187,6 +187,7 @@ struct dasd_ccw_req {
void (*callback)(struct dasd_ccw_req *, void *data);
void *callback_data;
+ unsigned int proc_bytes; /* bytes for partial completion */
};
/*
@@ -387,8 +388,9 @@ struct dasd_discipline {
int (*ext_pool_warn_thrshld)(struct dasd_device *);
int (*ext_pool_oos)(struct dasd_device *);
int (*ext_pool_exhaust)(struct dasd_device *, struct dasd_ccw_req *);
- struct dasd_ccw_req *(*ese_format)(struct dasd_device *, struct dasd_ccw_req *);
- void (*ese_read)(struct dasd_ccw_req *);
+ struct dasd_ccw_req *(*ese_format)(struct dasd_device *,
+ struct dasd_ccw_req *, struct irb *);
+ int (*ese_read)(struct dasd_ccw_req *, struct irb *);
};
extern struct dasd_discipline *dasd_diag_discipline_pointer;
@@ -474,6 +476,11 @@ struct dasd_profile {
spinlock_t lock;
};
+struct dasd_format_entry {
+ struct list_head list;
+ sector_t track;
+};
+
struct dasd_device {
/* Block device stuff. */
struct dasd_block *block;
@@ -539,6 +546,7 @@ struct dasd_device {
struct dentry *debugfs_dentry;
struct dentry *hosts_dentry;
struct dasd_profile profile;
+ struct dasd_format_entry format_entry;
};
struct dasd_block {
@@ -564,6 +572,9 @@ struct dasd_block {
struct dentry *debugfs_dentry;
struct dasd_profile profile;
+
+ struct list_head format_list;
+ spinlock_t format_lock;
};
struct dasd_attention_data {
diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c
index 63502ca..80d22290 100644
--- a/drivers/s390/block/dcssblk.c
+++ b/drivers/s390/block/dcssblk.c
@@ -636,10 +636,10 @@ dcssblk_add_store(struct device *dev, struct device_attribute *attr, const char
}
dev_info->gd->major = dcssblk_major;
dev_info->gd->fops = &dcssblk_devops;
- dev_info->dcssblk_queue = blk_alloc_queue(GFP_KERNEL);
+ dev_info->dcssblk_queue =
+ blk_alloc_queue(dcssblk_make_request, NUMA_NO_NODE);
dev_info->gd->queue = dev_info->dcssblk_queue;
dev_info->gd->private_data = dev_info;
- blk_queue_make_request(dev_info->dcssblk_queue, dcssblk_make_request);
blk_queue_logical_block_size(dev_info->dcssblk_queue, 4096);
blk_queue_flag_set(QUEUE_FLAG_DAX, dev_info->dcssblk_queue);
diff --git a/drivers/s390/block/xpram.c b/drivers/s390/block/xpram.c
index 3df5d68..45a04da 100644
--- a/drivers/s390/block/xpram.c
+++ b/drivers/s390/block/xpram.c
@@ -343,14 +343,14 @@ static int __init xpram_setup_blkdev(void)
xpram_disks[i] = alloc_disk(1);
if (!xpram_disks[i])
goto out;
- xpram_queues[i] = blk_alloc_queue(GFP_KERNEL);
+ xpram_queues[i] = blk_alloc_queue(xpram_make_request,
+ NUMA_NO_NODE);
if (!xpram_queues[i]) {
put_disk(xpram_disks[i]);
goto out;
}
blk_queue_flag_set(QUEUE_FLAG_NONROT, xpram_queues[i]);
blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, xpram_queues[i]);
- blk_queue_make_request(xpram_queues[i], xpram_make_request);
blk_queue_logical_block_size(xpram_queues[i], 4096);
}
diff --git a/drivers/s390/cio/blacklist.c b/drivers/s390/cio/blacklist.c
index da642e8..4dd2eb634 100644
--- a/drivers/s390/cio/blacklist.c
+++ b/drivers/s390/cio/blacklist.c
@@ -303,8 +303,10 @@ static void *
cio_ignore_proc_seq_next(struct seq_file *s, void *it, loff_t *offset)
{
struct ccwdev_iter *iter;
+ loff_t p = *offset;
- if (*offset >= (__MAX_SUBCHANNEL + 1) * (__MAX_SSID + 1))
+ (*offset)++;
+ if (p >= (__MAX_SUBCHANNEL + 1) * (__MAX_SSID + 1))
return NULL;
iter = it;
if (iter->devno == __MAX_SUBCHANNEL) {
@@ -314,7 +316,6 @@ cio_ignore_proc_seq_next(struct seq_file *s, void *it, loff_t *offset)
return NULL;
} else
iter->devno++;
- (*offset)++;
return iter;
}
diff --git a/drivers/s390/cio/chp.c b/drivers/s390/cio/chp.c
index 51038ec..dfcbe54 100644
--- a/drivers/s390/cio/chp.c
+++ b/drivers/s390/cio/chp.c
@@ -135,7 +135,7 @@ static ssize_t chp_measurement_chars_read(struct file *filp,
struct channel_path *chp;
struct device *device;
- device = container_of(kobj, struct device, kobj);
+ device = kobj_to_dev(kobj);
chp = to_channelpath(device);
if (chp->cmg == -1)
return 0;
@@ -184,7 +184,7 @@ static ssize_t chp_measurement_read(struct file *filp, struct kobject *kobj,
struct device *device;
unsigned int size;
- device = container_of(kobj, struct device, kobj);
+ device = kobj_to_dev(kobj);
chp = to_channelpath(device);
css = to_css(chp->dev.parent);
diff --git a/drivers/s390/cio/qdio.h b/drivers/s390/cio/qdio.h
index 4b07984..ff74eb5 100644
--- a/drivers/s390/cio/qdio.h
+++ b/drivers/s390/cio/qdio.h
@@ -182,11 +182,9 @@ enum qdio_queue_irq_states {
};
struct qdio_input_q {
- /* input buffer acknowledgement flag */
- int polling;
/* first ACK'ed buffer */
int ack_start;
- /* how much sbals are acknowledged with qebsm */
+ /* how many SBALs are acknowledged */
int ack_count;
/* last time of noticing incoming data */
u64 timestamp;
diff --git a/drivers/s390/cio/qdio_debug.c b/drivers/s390/cio/qdio_debug.c
index 35410e6..9c0370b 100644
--- a/drivers/s390/cio/qdio_debug.c
+++ b/drivers/s390/cio/qdio_debug.c
@@ -124,9 +124,8 @@ static int qstat_show(struct seq_file *m, void *v)
seq_printf(m, "nr_used: %d ftc: %d\n",
atomic_read(&q->nr_buf_used), q->first_to_check);
if (q->is_input_q) {
- seq_printf(m, "polling: %d ack start: %d ack count: %d\n",
- q->u.in.polling, q->u.in.ack_start,
- q->u.in.ack_count);
+ seq_printf(m, "ack start: %d ack count: %d\n",
+ q->u.in.ack_start, q->u.in.ack_count);
seq_printf(m, "DSCI: %x IRQs disabled: %u\n",
*(u8 *)q->irq_ptr->dsci,
test_bit(QDIO_QUEUE_IRQS_DISABLED,
diff --git a/drivers/s390/cio/qdio_main.c b/drivers/s390/cio/qdio_main.c
index f8b897b..3475317c 100644
--- a/drivers/s390/cio/qdio_main.c
+++ b/drivers/s390/cio/qdio_main.c
@@ -393,19 +393,15 @@ int debug_get_buf_state(struct qdio_q *q, unsigned int bufnr,
static inline void qdio_stop_polling(struct qdio_q *q)
{
- if (!q->u.in.polling)
+ if (!q->u.in.ack_count)
return;
- q->u.in.polling = 0;
qperf_inc(q, stop_polling);
/* show the card that we are not polling anymore */
- if (is_qebsm(q)) {
- set_buf_states(q, q->u.in.ack_start, SLSB_P_INPUT_NOT_INIT,
- q->u.in.ack_count);
- q->u.in.ack_count = 0;
- } else
- set_buf_state(q, q->u.in.ack_start, SLSB_P_INPUT_NOT_INIT);
+ set_buf_states(q, q->u.in.ack_start, SLSB_P_INPUT_NOT_INIT,
+ q->u.in.ack_count);
+ q->u.in.ack_count = 0;
}
static inline void account_sbals(struct qdio_q *q, unsigned int count)
@@ -451,8 +447,7 @@ static inline void inbound_primed(struct qdio_q *q, unsigned int start,
/* for QEBSM the ACK was already set by EQBS */
if (is_qebsm(q)) {
- if (!q->u.in.polling) {
- q->u.in.polling = 1;
+ if (!q->u.in.ack_count) {
q->u.in.ack_count = count;
q->u.in.ack_start = start;
return;
@@ -471,12 +466,12 @@ static inline void inbound_primed(struct qdio_q *q, unsigned int start,
* or by the next inbound run.
*/
new = add_buf(start, count - 1);
- if (q->u.in.polling) {
+ if (q->u.in.ack_count) {
/* reset the previous ACK but first set the new one */
set_buf_state(q, new, SLSB_P_INPUT_ACK);
set_buf_state(q, q->u.in.ack_start, SLSB_P_INPUT_NOT_INIT);
} else {
- q->u.in.polling = 1;
+ q->u.in.ack_count = 1;
set_buf_state(q, new, SLSB_P_INPUT_ACK);
}
@@ -1479,13 +1474,12 @@ static int handle_inbound(struct qdio_q *q, unsigned int callflags,
qperf_inc(q, inbound_call);
- if (!q->u.in.polling)
+ if (!q->u.in.ack_count)
goto set;
/* protect against stop polling setting an ACK for an emptied slsb */
if (count == QDIO_MAX_BUFFERS_PER_Q) {
/* overwriting everything, just delete polling status */
- q->u.in.polling = 0;
q->u.in.ack_count = 0;
goto set;
} else if (buf_in_between(q->u.in.ack_start, bufnr, count)) {
@@ -1495,15 +1489,14 @@ static int handle_inbound(struct qdio_q *q, unsigned int callflags,
diff = sub_buf(diff, q->u.in.ack_start);
q->u.in.ack_count -= diff;
if (q->u.in.ack_count <= 0) {
- q->u.in.polling = 0;
q->u.in.ack_count = 0;
goto set;
}
q->u.in.ack_start = add_buf(q->u.in.ack_start, diff);
+ } else {
+ /* the only ACK will be deleted */
+ q->u.in.ack_count = 0;
}
- else
- /* the only ACK will be deleted, so stop polling */
- q->u.in.polling = 0;
}
set:
diff --git a/drivers/s390/cio/qdio_setup.c b/drivers/s390/cio/qdio_setup.c
index dc430bd..e115623 100644
--- a/drivers/s390/cio/qdio_setup.c
+++ b/drivers/s390/cio/qdio_setup.c
@@ -8,6 +8,7 @@
#include <linux/kernel.h>
#include <linux/slab.h>
#include <linux/export.h>
+#include <linux/io.h>
#include <asm/qdio.h>
#include "cio.h"
@@ -205,7 +206,7 @@ static void setup_storage_lists(struct qdio_q *q, struct qdio_irq *irq_ptr,
/* fill in sl */
for (j = 0; j < QDIO_MAX_BUFFERS_PER_Q; j++)
- q->sl->element[j].sbal = (unsigned long)q->sbal[j];
+ q->sl->element[j].sbal = virt_to_phys(q->sbal[j]);
}
static void setup_queues(struct qdio_irq *irq_ptr,
@@ -536,7 +537,7 @@ void qdio_print_subchannel_info(struct qdio_irq *irq_ptr,
int qdio_enable_async_operation(struct qdio_output_q *outq)
{
outq->aobs = kcalloc(QDIO_MAX_BUFFERS_PER_Q, sizeof(struct qaob *),
- GFP_ATOMIC);
+ GFP_KERNEL);
if (!outq->aobs) {
outq->use_cq = 0;
return -ENOMEM;
diff --git a/drivers/s390/cio/vfio_ccw_trace.h b/drivers/s390/cio/vfio_ccw_trace.h
index 30162a3..f5d3188 100644
--- a/drivers/s390/cio/vfio_ccw_trace.h
+++ b/drivers/s390/cio/vfio_ccw_trace.h
@@ -1,5 +1,5 @@
-/* SPDX-License-Identifier: GPL-2.0
- * Tracepoints for vfio_ccw driver
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Tracepoints for vfio_ccw driver
*
* Copyright IBM Corp. 2018
*
diff --git a/drivers/s390/crypto/ap_bus.h b/drivers/s390/crypto/ap_bus.h
index bb35ba4..4348fdf 100644
--- a/drivers/s390/crypto/ap_bus.h
+++ b/drivers/s390/crypto/ap_bus.h
@@ -162,7 +162,7 @@ struct ap_card {
unsigned int functions; /* AP device function bitfield. */
int queue_depth; /* AP queue depth.*/
int id; /* AP card number. */
- atomic_t total_request_count; /* # requests ever for this AP device.*/
+ atomic64_t total_request_count; /* # requests ever for this AP device.*/
};
#define to_ap_card(x) container_of((x), struct ap_card, ap_dev.device)
@@ -179,7 +179,7 @@ struct ap_queue {
enum ap_state state; /* State of the AP device. */
int pendingq_count; /* # requests on pendingq list. */
int requestq_count; /* # requests on requestq list. */
- int total_request_count; /* # requests ever for this AP device.*/
+ u64 total_request_count; /* # requests ever for this AP device.*/
int request_timeout; /* Request timeout in jiffies. */
struct timer_list timeout; /* Timer for request timeouts. */
struct list_head pendingq; /* List of message sent to AP queue. */
diff --git a/drivers/s390/crypto/ap_card.c b/drivers/s390/crypto/ap_card.c
index 63b4cc6..e85bfca 100644
--- a/drivers/s390/crypto/ap_card.c
+++ b/drivers/s390/crypto/ap_card.c
@@ -63,13 +63,13 @@ static ssize_t request_count_show(struct device *dev,
char *buf)
{
struct ap_card *ac = to_ap_card(dev);
- unsigned int req_cnt;
+ u64 req_cnt;
req_cnt = 0;
spin_lock_bh(&ap_list_lock);
- req_cnt = atomic_read(&ac->total_request_count);
+ req_cnt = atomic64_read(&ac->total_request_count);
spin_unlock_bh(&ap_list_lock);
- return snprintf(buf, PAGE_SIZE, "%d\n", req_cnt);
+ return snprintf(buf, PAGE_SIZE, "%llu\n", req_cnt);
}
static ssize_t request_count_store(struct device *dev,
@@ -83,7 +83,7 @@ static ssize_t request_count_store(struct device *dev,
for_each_ap_queue(aq, ac)
aq->total_request_count = 0;
spin_unlock_bh(&ap_list_lock);
- atomic_set(&ac->total_request_count, 0);
+ atomic64_set(&ac->total_request_count, 0);
return count;
}
diff --git a/drivers/s390/crypto/ap_queue.c b/drivers/s390/crypto/ap_queue.c
index 37c3bdc..a317ab4 100644
--- a/drivers/s390/crypto/ap_queue.c
+++ b/drivers/s390/crypto/ap_queue.c
@@ -479,12 +479,12 @@ static ssize_t request_count_show(struct device *dev,
char *buf)
{
struct ap_queue *aq = to_ap_queue(dev);
- unsigned int req_cnt;
+ u64 req_cnt;
spin_lock_bh(&aq->lock);
req_cnt = aq->total_request_count;
spin_unlock_bh(&aq->lock);
- return snprintf(buf, PAGE_SIZE, "%d\n", req_cnt);
+ return snprintf(buf, PAGE_SIZE, "%llu\n", req_cnt);
}
static ssize_t request_count_store(struct device *dev,
@@ -676,7 +676,7 @@ void ap_queue_message(struct ap_queue *aq, struct ap_message *ap_msg)
list_add_tail(&ap_msg->list, &aq->requestq);
aq->requestq_count++;
aq->total_request_count++;
- atomic_inc(&aq->card->total_request_count);
+ atomic64_inc(&aq->card->total_request_count);
/* Send/receive as many request from the queue as possible. */
ap_wait(ap_sm_event_loop(aq, AP_EVENT_POLL));
spin_unlock_bh(&aq->lock);
diff --git a/drivers/s390/crypto/pkey_api.c b/drivers/s390/crypto/pkey_api.c
index 71dae64..2f33c5f 100644
--- a/drivers/s390/crypto/pkey_api.c
+++ b/drivers/s390/crypto/pkey_api.c
@@ -994,7 +994,7 @@ static long pkey_unlocked_ioctl(struct file *filp, unsigned int cmd,
return -EFAULT;
rc = cca_sec2protkey(ksp.cardnr, ksp.domain,
ksp.seckey.seckey, ksp.protkey.protkey,
- NULL, &ksp.protkey.type);
+ &ksp.protkey.len, &ksp.protkey.type);
DEBUG_DBG("%s cca_sec2protkey()=%d\n", __func__, rc);
if (rc)
break;
diff --git a/drivers/s390/crypto/zcrypt_api.c b/drivers/s390/crypto/zcrypt_api.c
index a42257d..56a405d 100644
--- a/drivers/s390/crypto/zcrypt_api.c
+++ b/drivers/s390/crypto/zcrypt_api.c
@@ -606,8 +606,8 @@ static inline bool zcrypt_card_compare(struct zcrypt_card *zc,
weight += atomic_read(&zc->load);
pref_weight += atomic_read(&pref_zc->load);
if (weight == pref_weight)
- return atomic_read(&zc->card->total_request_count) >
- atomic_read(&pref_zc->card->total_request_count);
+ return atomic64_read(&zc->card->total_request_count) >
+ atomic64_read(&pref_zc->card->total_request_count);
return weight > pref_weight;
}
@@ -1226,11 +1226,12 @@ static void zcrypt_qdepth_mask(char qdepth[], size_t max_adapters)
spin_unlock(&zcrypt_list_lock);
}
-static void zcrypt_perdev_reqcnt(int reqcnt[], size_t max_adapters)
+static void zcrypt_perdev_reqcnt(u32 reqcnt[], size_t max_adapters)
{
struct zcrypt_card *zc;
struct zcrypt_queue *zq;
int card;
+ u64 cnt;
memset(reqcnt, 0, sizeof(int) * max_adapters);
spin_lock(&zcrypt_list_lock);
@@ -1242,8 +1243,9 @@ static void zcrypt_perdev_reqcnt(int reqcnt[], size_t max_adapters)
|| card >= max_adapters)
continue;
spin_lock(&zq->queue->lock);
- reqcnt[card] = zq->queue->total_request_count;
+ cnt = zq->queue->total_request_count;
spin_unlock(&zq->queue->lock);
+ reqcnt[card] = (cnt < UINT_MAX) ? (u32) cnt : UINT_MAX;
}
}
local_bh_enable();
@@ -1421,9 +1423,9 @@ static long zcrypt_unlocked_ioctl(struct file *filp, unsigned int cmd,
return 0;
}
case ZCRYPT_PERDEV_REQCNT: {
- int *reqcnt;
+ u32 *reqcnt;
- reqcnt = kcalloc(AP_DEVICES, sizeof(int), GFP_KERNEL);
+ reqcnt = kcalloc(AP_DEVICES, sizeof(u32), GFP_KERNEL);
if (!reqcnt)
return -ENOMEM;
zcrypt_perdev_reqcnt(reqcnt, AP_DEVICES);
@@ -1480,7 +1482,7 @@ static long zcrypt_unlocked_ioctl(struct file *filp, unsigned int cmd,
}
case Z90STAT_PERDEV_REQCNT: {
/* the old ioctl supports only 64 adapters */
- int reqcnt[MAX_ZDEV_CARDIDS];
+ u32 reqcnt[MAX_ZDEV_CARDIDS];
zcrypt_perdev_reqcnt(reqcnt, MAX_ZDEV_CARDIDS);
if (copy_to_user((int __user *) arg, reqcnt, sizeof(reqcnt)))
diff --git a/drivers/s390/crypto/zcrypt_ep11misc.c b/drivers/s390/crypto/zcrypt_ep11misc.c
index d4caf46..2afe215 100644
--- a/drivers/s390/crypto/zcrypt_ep11misc.c
+++ b/drivers/s390/crypto/zcrypt_ep11misc.c
@@ -887,7 +887,7 @@ static int ep11_unwrapkey(u16 card, u16 domain,
/* empty pin tag */
*p++ = 0x04;
*p++ = 0;
- /* encrytped key value tag and bytes */
+ /* encrypted key value tag and bytes */
p += asn1tag_write(p, 0x04, enckey, enckeysize);
/* reply cprb and payload */
@@ -1095,7 +1095,7 @@ int ep11_clr2keyblob(u16 card, u16 domain, u32 keybitsize, u32 keygenflags,
/* Step 1: generate AES 256 bit random kek key */
rc = ep11_genaeskey(card, domain, 256,
- 0x00006c00, /* EN/DECRYTP, WRAP/UNWRAP */
+ 0x00006c00, /* EN/DECRYPT, WRAP/UNWRAP */
kek, &keklen);
if (rc) {
DEBUG_ERR(
diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h
index 9575a62..468cada 100644
--- a/drivers/s390/net/qeth_core.h
+++ b/drivers/s390/net/qeth_core.h
@@ -369,7 +369,7 @@ enum qeth_qdio_info_states {
struct qeth_buffer_pool_entry {
struct list_head list;
struct list_head init_list;
- void *elements[QDIO_MAX_ELEMENTS_PER_BUFFER];
+ struct page *elements[QDIO_MAX_ELEMENTS_PER_BUFFER];
};
struct qeth_qdio_buffer_pool {
@@ -983,7 +983,7 @@ extern const struct attribute_group qeth_device_blkt_group;
extern const struct device_type qeth_generic_devtype;
const char *qeth_get_cardname_short(struct qeth_card *);
-int qeth_realloc_buffer_pool(struct qeth_card *, int);
+int qeth_resize_buffer_pool(struct qeth_card *card, unsigned int count);
int qeth_core_load_discipline(struct qeth_card *, enum qeth_discipline_id);
void qeth_core_free_discipline(struct qeth_card *);
diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c
index 9639938..6d3f2f1 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -65,7 +65,6 @@ static struct lock_class_key qdio_out_skb_queue_key;
static void qeth_issue_next_read_cb(struct qeth_card *card,
struct qeth_cmd_buffer *iob,
unsigned int data_length);
-static void qeth_free_buffer_pool(struct qeth_card *);
static int qeth_qdio_establish(struct qeth_card *);
static void qeth_free_qdio_queues(struct qeth_card *card);
static void qeth_notify_skbs(struct qeth_qdio_out_q *queue,
@@ -212,49 +211,121 @@ void qeth_clear_working_pool_list(struct qeth_card *card)
}
EXPORT_SYMBOL_GPL(qeth_clear_working_pool_list);
+static void qeth_free_pool_entry(struct qeth_buffer_pool_entry *entry)
+{
+ unsigned int i;
+
+ for (i = 0; i < ARRAY_SIZE(entry->elements); i++) {
+ if (entry->elements[i])
+ __free_page(entry->elements[i]);
+ }
+
+ kfree(entry);
+}
+
+static void qeth_free_buffer_pool(struct qeth_card *card)
+{
+ struct qeth_buffer_pool_entry *entry, *tmp;
+
+ list_for_each_entry_safe(entry, tmp, &card->qdio.init_pool.entry_list,
+ init_list) {
+ list_del(&entry->init_list);
+ qeth_free_pool_entry(entry);
+ }
+}
+
+static struct qeth_buffer_pool_entry *qeth_alloc_pool_entry(unsigned int pages)
+{
+ struct qeth_buffer_pool_entry *entry;
+ unsigned int i;
+
+ entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+ if (!entry)
+ return NULL;
+
+ for (i = 0; i < pages; i++) {
+ entry->elements[i] = alloc_page(GFP_KERNEL);
+
+ if (!entry->elements[i]) {
+ qeth_free_pool_entry(entry);
+ return NULL;
+ }
+ }
+
+ return entry;
+}
+
static int qeth_alloc_buffer_pool(struct qeth_card *card)
{
- struct qeth_buffer_pool_entry *pool_entry;
- void *ptr;
- int i, j;
+ unsigned int buf_elements = QETH_MAX_BUFFER_ELEMENTS(card);
+ unsigned int i;
QETH_CARD_TEXT(card, 5, "alocpool");
for (i = 0; i < card->qdio.init_pool.buf_count; ++i) {
- pool_entry = kzalloc(sizeof(*pool_entry), GFP_KERNEL);
- if (!pool_entry) {
+ struct qeth_buffer_pool_entry *entry;
+
+ entry = qeth_alloc_pool_entry(buf_elements);
+ if (!entry) {
qeth_free_buffer_pool(card);
return -ENOMEM;
}
- for (j = 0; j < QETH_MAX_BUFFER_ELEMENTS(card); ++j) {
- ptr = (void *) __get_free_page(GFP_KERNEL);
- if (!ptr) {
- while (j > 0)
- free_page((unsigned long)
- pool_entry->elements[--j]);
- kfree(pool_entry);
- qeth_free_buffer_pool(card);
- return -ENOMEM;
- }
- pool_entry->elements[j] = ptr;
- }
- list_add(&pool_entry->init_list,
- &card->qdio.init_pool.entry_list);
+
+ list_add(&entry->init_list, &card->qdio.init_pool.entry_list);
}
return 0;
}
-int qeth_realloc_buffer_pool(struct qeth_card *card, int bufcnt)
+int qeth_resize_buffer_pool(struct qeth_card *card, unsigned int count)
{
+ unsigned int buf_elements = QETH_MAX_BUFFER_ELEMENTS(card);
+ struct qeth_qdio_buffer_pool *pool = &card->qdio.init_pool;
+ struct qeth_buffer_pool_entry *entry, *tmp;
+ int delta = count - pool->buf_count;
+ LIST_HEAD(entries);
+
QETH_CARD_TEXT(card, 2, "realcbp");
- /* TODO: steel/add buffers from/to a running card's buffer pool (?) */
- qeth_clear_working_pool_list(card);
- qeth_free_buffer_pool(card);
- card->qdio.in_buf_pool.buf_count = bufcnt;
- card->qdio.init_pool.buf_count = bufcnt;
- return qeth_alloc_buffer_pool(card);
+ /* Defer until queue is allocated: */
+ if (!card->qdio.in_q)
+ goto out;
+
+ /* Remove entries from the pool: */
+ while (delta < 0) {
+ entry = list_first_entry(&pool->entry_list,
+ struct qeth_buffer_pool_entry,
+ init_list);
+ list_del(&entry->init_list);
+ qeth_free_pool_entry(entry);
+
+ delta++;
+ }
+
+ /* Allocate additional entries: */
+ while (delta > 0) {
+ entry = qeth_alloc_pool_entry(buf_elements);
+ if (!entry) {
+ list_for_each_entry_safe(entry, tmp, &entries,
+ init_list) {
+ list_del(&entry->init_list);
+ qeth_free_pool_entry(entry);
+ }
+
+ return -ENOMEM;
+ }
+
+ list_add(&entry->init_list, &entries);
+
+ delta--;
+ }
+
+ list_splice(&entries, &pool->entry_list);
+
+out:
+ card->qdio.in_buf_pool.buf_count = count;
+ pool->buf_count = count;
+ return 0;
}
-EXPORT_SYMBOL_GPL(qeth_realloc_buffer_pool);
+EXPORT_SYMBOL_GPL(qeth_resize_buffer_pool);
static void qeth_free_qdio_queue(struct qeth_qdio_q *q)
{
@@ -1128,9 +1199,10 @@ static void qeth_clear_output_buffer(struct qeth_qdio_out_q *queue,
qeth_tx_complete_buf(buf, error, budget);
for (i = 0; i < queue->max_elements; ++i) {
- if (buf->buffer->element[i].addr && buf->is_header[i])
- kmem_cache_free(qeth_core_header_cache,
- buf->buffer->element[i].addr);
+ void *data = phys_to_virt(buf->buffer->element[i].addr);
+
+ if (data && buf->is_header[i])
+ kmem_cache_free(qeth_core_header_cache, data);
buf->is_header[i] = 0;
}
@@ -1169,19 +1241,6 @@ void qeth_drain_output_queues(struct qeth_card *card)
}
EXPORT_SYMBOL_GPL(qeth_drain_output_queues);
-static void qeth_free_buffer_pool(struct qeth_card *card)
-{
- struct qeth_buffer_pool_entry *pool_entry, *tmp;
- int i = 0;
- list_for_each_entry_safe(pool_entry, tmp,
- &card->qdio.init_pool.entry_list, init_list){
- for (i = 0; i < QETH_MAX_BUFFER_ELEMENTS(card); ++i)
- free_page((unsigned long)pool_entry->elements[i]);
- list_del(&pool_entry->init_list);
- kfree(pool_entry);
- }
-}
-
static int qeth_osa_set_output_queues(struct qeth_card *card, bool single)
{
unsigned int count = single ? 1 : card->dev->num_tx_queues;
@@ -1203,7 +1262,6 @@ static int qeth_osa_set_output_queues(struct qeth_card *card, bool single)
if (count == 1)
dev_info(&card->gdev->dev, "Priority Queueing not supported\n");
- card->qdio.default_out_queue = single ? 0 : QETH_DEFAULT_QUEUE;
card->qdio.no_out_queues = count;
return 0;
}
@@ -2392,7 +2450,6 @@ static void qeth_free_qdio_queues(struct qeth_card *card)
return;
qeth_free_cq(card);
- cancel_delayed_work_sync(&card->buffer_reclaim_work);
for (j = 0; j < QDIO_MAX_BUFFERS_PER_Q; ++j) {
if (card->qdio.in_q->bufs[j].rx_skb)
dev_kfree_skb_any(card->qdio.in_q->bufs[j].rx_skb);
@@ -2574,7 +2631,6 @@ static struct qeth_buffer_pool_entry *qeth_find_free_buffer_pool_entry(
struct list_head *plh;
struct qeth_buffer_pool_entry *entry;
int i, free;
- struct page *page;
if (list_empty(&card->qdio.in_buf_pool.entry_list))
return NULL;
@@ -2583,7 +2639,7 @@ static struct qeth_buffer_pool_entry *qeth_find_free_buffer_pool_entry(
entry = list_entry(plh, struct qeth_buffer_pool_entry, list);
free = 1;
for (i = 0; i < QETH_MAX_BUFFER_ELEMENTS(card); ++i) {
- if (page_count(virt_to_page(entry->elements[i])) > 1) {
+ if (page_count(entry->elements[i]) > 1) {
free = 0;
break;
}
@@ -2598,15 +2654,15 @@ static struct qeth_buffer_pool_entry *qeth_find_free_buffer_pool_entry(
entry = list_entry(card->qdio.in_buf_pool.entry_list.next,
struct qeth_buffer_pool_entry, list);
for (i = 0; i < QETH_MAX_BUFFER_ELEMENTS(card); ++i) {
- if (page_count(virt_to_page(entry->elements[i])) > 1) {
- page = alloc_page(GFP_ATOMIC);
- if (!page) {
+ if (page_count(entry->elements[i]) > 1) {
+ struct page *page = alloc_page(GFP_ATOMIC);
+
+ if (!page)
return NULL;
- } else {
- free_page((unsigned long)entry->elements[i]);
- entry->elements[i] = page_address(page);
- QETH_CARD_STAT_INC(card, rx_sg_alloc_page);
- }
+
+ __free_page(entry->elements[i]);
+ entry->elements[i] = page;
+ QETH_CARD_STAT_INC(card, rx_sg_alloc_page);
}
}
list_del_init(&entry->list);
@@ -2624,12 +2680,12 @@ static int qeth_init_input_buffer(struct qeth_card *card,
ETH_HLEN +
sizeof(struct ipv6hdr));
if (!buf->rx_skb)
- return 1;
+ return -ENOMEM;
}
pool_entry = qeth_find_free_buffer_pool_entry(card);
if (!pool_entry)
- return 1;
+ return -ENOBUFS;
/*
* since the buffer is accessed only from the input_tasklet
@@ -2641,7 +2697,8 @@ static int qeth_init_input_buffer(struct qeth_card *card,
buf->pool_entry = pool_entry;
for (i = 0; i < QETH_MAX_BUFFER_ELEMENTS(card); ++i) {
buf->buffer->element[i].length = PAGE_SIZE;
- buf->buffer->element[i].addr = pool_entry->elements[i];
+ buf->buffer->element[i].addr =
+ page_to_phys(pool_entry->elements[i]);
if (i == QETH_MAX_BUFFER_ELEMENTS(card) - 1)
buf->buffer->element[i].eflags = SBAL_EFLAGS_LAST_ENTRY;
else
@@ -2673,10 +2730,15 @@ static int qeth_init_qdio_queues(struct qeth_card *card)
/* inbound queue */
qdio_reset_buffers(card->qdio.in_q->qdio_bufs, QDIO_MAX_BUFFERS_PER_Q);
memset(&card->rx, 0, sizeof(struct qeth_rx));
+
qeth_initialize_working_pool_list(card);
/*give only as many buffers to hardware as we have buffer pool entries*/
- for (i = 0; i < card->qdio.in_buf_pool.buf_count - 1; ++i)
- qeth_init_input_buffer(card, &card->qdio.in_q->bufs[i]);
+ for (i = 0; i < card->qdio.in_buf_pool.buf_count - 1; i++) {
+ rc = qeth_init_input_buffer(card, &card->qdio.in_q->bufs[i]);
+ if (rc)
+ return rc;
+ }
+
card->qdio.in_q->next_buf_to_init =
card->qdio.in_buf_pool.buf_count - 1;
rc = do_QDIO(CARD_DDEV(card), QDIO_FLAG_SYNC_INPUT, 0, 0,
@@ -3459,9 +3521,8 @@ static void qeth_qdio_cq_handler(struct qeth_card *card, unsigned int qdio_err,
while ((e < QDIO_MAX_ELEMENTS_PER_BUFFER) &&
buffer->element[e].addr) {
- unsigned long phys_aob_addr;
+ unsigned long phys_aob_addr = buffer->element[e].addr;
- phys_aob_addr = (unsigned long) buffer->element[e].addr;
qeth_qdio_handle_aob(card, phys_aob_addr);
++e;
}
@@ -3750,7 +3811,7 @@ static unsigned int __qeth_fill_buffer(struct sk_buff *skb,
elem_length = min_t(unsigned int, length,
PAGE_SIZE - offset_in_page(data));
- buffer->element[element].addr = data;
+ buffer->element[element].addr = virt_to_phys(data);
buffer->element[element].length = elem_length;
length -= elem_length;
if (is_first_elem) {
@@ -3780,7 +3841,7 @@ static unsigned int __qeth_fill_buffer(struct sk_buff *skb,
elem_length = min_t(unsigned int, length,
PAGE_SIZE - offset_in_page(data));
- buffer->element[element].addr = data;
+ buffer->element[element].addr = virt_to_phys(data);
buffer->element[element].length = elem_length;
buffer->element[element].eflags =
SBAL_EFLAGS_MIDDLE_FRAG;
@@ -3820,7 +3881,7 @@ static unsigned int qeth_fill_buffer(struct qeth_qdio_out_buffer *buf,
int element = buf->next_element_to_fill;
is_first_elem = false;
- buffer->element[element].addr = hdr;
+ buffer->element[element].addr = virt_to_phys(hdr);
buffer->element[element].length = hd_len;
buffer->element[element].eflags = SBAL_EFLAGS_FIRST_FRAG;
/* remember to free cache-allocated qeth_hdr: */
@@ -4746,10 +4807,10 @@ static void qeth_qdio_establish_cq(struct qeth_card *card,
if (card->options.cq == QETH_CQ_ENABLED) {
int offset = QDIO_MAX_BUFFERS_PER_Q *
(card->qdio.no_in_queues - 1);
- for (i = 0; i < QDIO_MAX_BUFFERS_PER_Q; ++i) {
- in_sbal_ptrs[offset + i] = (struct qdio_buffer *)
- virt_to_phys(card->qdio.c_q->bufs[i].buffer);
- }
+
+ for (i = 0; i < QDIO_MAX_BUFFERS_PER_Q; i++)
+ in_sbal_ptrs[offset + i] =
+ card->qdio.c_q->bufs[i].buffer;
queue_start_poll[card->qdio.no_in_queues - 1] = NULL;
}
@@ -4783,10 +4844,9 @@ static int qeth_qdio_establish(struct qeth_card *card)
rc = -ENOMEM;
goto out_free_qib_param;
}
- for (i = 0; i < QDIO_MAX_BUFFERS_PER_Q; ++i) {
- in_sbal_ptrs[i] = (struct qdio_buffer *)
- virt_to_phys(card->qdio.in_q->bufs[i].buffer);
- }
+
+ for (i = 0; i < QDIO_MAX_BUFFERS_PER_Q; i++)
+ in_sbal_ptrs[i] = card->qdio.in_q->bufs[i].buffer;
queue_start_poll = kcalloc(card->qdio.no_in_queues, sizeof(void *),
GFP_KERNEL);
@@ -4807,11 +4867,11 @@ static int qeth_qdio_establish(struct qeth_card *card)
rc = -ENOMEM;
goto out_free_queue_start_poll;
}
+
for (i = 0, k = 0; i < card->qdio.no_out_queues; ++i)
- for (j = 0; j < QDIO_MAX_BUFFERS_PER_Q; ++j, ++k) {
- out_sbal_ptrs[k] = (struct qdio_buffer *)virt_to_phys(
- card->qdio.out_qs[i]->bufs[j]->buffer);
- }
+ for (j = 0; j < QDIO_MAX_BUFFERS_PER_Q; j++, k++)
+ out_sbal_ptrs[k] =
+ card->qdio.out_qs[i]->bufs[j]->buffer;
memset(&init_data, 0, sizeof(struct qdio_initialize));
init_data.cdev = CARD_DDEV(card);
@@ -5289,7 +5349,7 @@ static int qeth_extract_skb(struct qeth_card *card,
offset = 0;
}
- hdr = element->addr + offset;
+ hdr = phys_to_virt(element->addr) + offset;
offset += sizeof(*hdr);
skb = NULL;
@@ -5344,7 +5404,7 @@ static int qeth_extract_skb(struct qeth_card *card,
}
use_rx_sg = (card->options.cq == QETH_CQ_ENABLED) ||
- ((skb_len >= card->options.rx_sg_cb) &&
+ (skb_len > card->options.rx_sg_cb &&
!atomic_read(&card->force_alloc_skb) &&
!IS_OSN(card));
@@ -5388,7 +5448,7 @@ static int qeth_extract_skb(struct qeth_card *card,
walk_packet:
while (skb_len) {
int data_len = min(skb_len, (int)(element->length - offset));
- char *data = element->addr + offset;
+ char *data = phys_to_virt(element->addr) + offset;
skb_len -= data_len;
offset += data_len;
@@ -5447,7 +5507,6 @@ static int qeth_extract_skbs(struct qeth_card *card, int budget,
{
int work_done = 0;
- WARN_ON_ONCE(!budget);
*done = false;
while (budget) {
diff --git a/drivers/s390/net/qeth_core_sys.c b/drivers/s390/net/qeth_core_sys.c
index 2bd9993..78cae61 100644
--- a/drivers/s390/net/qeth_core_sys.c
+++ b/drivers/s390/net/qeth_core_sys.c
@@ -247,8 +247,8 @@ static ssize_t qeth_dev_bufcnt_store(struct device *dev,
struct device_attribute *attr, const char *buf, size_t count)
{
struct qeth_card *card = dev_get_drvdata(dev);
+ unsigned int cnt;
char *tmp;
- int cnt, old_cnt;
int rc = 0;
mutex_lock(&card->conf_mutex);
@@ -257,13 +257,12 @@ static ssize_t qeth_dev_bufcnt_store(struct device *dev,
goto out;
}
- old_cnt = card->qdio.in_buf_pool.buf_count;
cnt = simple_strtoul(buf, &tmp, 10);
cnt = (cnt < QETH_IN_BUF_COUNT_MIN) ? QETH_IN_BUF_COUNT_MIN :
((cnt > QETH_IN_BUF_COUNT_MAX) ? QETH_IN_BUF_COUNT_MAX : cnt);
- if (old_cnt != cnt) {
- rc = qeth_realloc_buffer_pool(card, cnt);
- }
+
+ rc = qeth_resize_buffer_pool(card, cnt);
+
out:
mutex_unlock(&card->conf_mutex);
return rc ? rc : count;
diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c
index 692bd26..8fb2937 100644
--- a/drivers/s390/net/qeth_l2_main.c
+++ b/drivers/s390/net/qeth_l2_main.c
@@ -284,6 +284,7 @@ static void qeth_l2_stop_card(struct qeth_card *card)
if (card->state == CARD_STATE_SOFTSETUP) {
qeth_clear_ipacmd_list(card);
qeth_drain_output_queues(card);
+ cancel_delayed_work_sync(&card->buffer_reclaim_work);
card->state = CARD_STATE_DOWN;
}
@@ -1707,15 +1708,14 @@ int qeth_l2_vnicc_set_state(struct qeth_card *card, u32 vnicc, bool state)
QETH_CARD_TEXT(card, 2, "vniccsch");
- /* do not change anything if BridgePort is enabled */
- if (qeth_bridgeport_is_in_use(card))
- return -EBUSY;
-
/* check if characteristic and enable/disable are supported */
if (!(card->options.vnicc.sup_chars & vnicc) ||
!(card->options.vnicc.set_char_sup & vnicc))
return -EOPNOTSUPP;
+ if (qeth_bridgeport_is_in_use(card))
+ return -EBUSY;
+
/* set enable/disable command and store wanted characteristic */
if (state) {
cmd = IPA_VNICC_ENABLE;
@@ -1761,14 +1761,13 @@ int qeth_l2_vnicc_get_state(struct qeth_card *card, u32 vnicc, bool *state)
QETH_CARD_TEXT(card, 2, "vniccgch");
- /* do not get anything if BridgePort is enabled */
- if (qeth_bridgeport_is_in_use(card))
- return -EBUSY;
-
/* check if characteristic is supported */
if (!(card->options.vnicc.sup_chars & vnicc))
return -EOPNOTSUPP;
+ if (qeth_bridgeport_is_in_use(card))
+ return -EBUSY;
+
/* if card is ready, query current VNICC state */
if (qeth_card_hw_is_reachable(card))
rc = qeth_l2_vnicc_query_chars(card);
@@ -1786,15 +1785,14 @@ int qeth_l2_vnicc_set_timeout(struct qeth_card *card, u32 timeout)
QETH_CARD_TEXT(card, 2, "vniccsto");
- /* do not change anything if BridgePort is enabled */
- if (qeth_bridgeport_is_in_use(card))
- return -EBUSY;
-
/* check if characteristic and set_timeout are supported */
if (!(card->options.vnicc.sup_chars & QETH_VNICC_LEARNING) ||
!(card->options.vnicc.getset_timeout_sup & QETH_VNICC_LEARNING))
return -EOPNOTSUPP;
+ if (qeth_bridgeport_is_in_use(card))
+ return -EBUSY;
+
/* do we need to do anything? */
if (card->options.vnicc.learning_timeout == timeout)
return rc;
@@ -1823,14 +1821,14 @@ int qeth_l2_vnicc_get_timeout(struct qeth_card *card, u32 *timeout)
QETH_CARD_TEXT(card, 2, "vniccgto");
- /* do not get anything if BridgePort is enabled */
- if (qeth_bridgeport_is_in_use(card))
- return -EBUSY;
-
/* check if characteristic and get_timeout are supported */
if (!(card->options.vnicc.sup_chars & QETH_VNICC_LEARNING) ||
!(card->options.vnicc.getset_timeout_sup & QETH_VNICC_LEARNING))
return -EOPNOTSUPP;
+
+ if (qeth_bridgeport_is_in_use(card))
+ return -EBUSY;
+
/* if card is ready, get timeout. Otherwise, just return stored value */
*timeout = card->options.vnicc.learning_timeout;
if (qeth_card_hw_is_reachable(card))
diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c
index 317d5664..82f800d 100644
--- a/drivers/s390/net/qeth_l3_main.c
+++ b/drivers/s390/net/qeth_l3_main.c
@@ -1178,6 +1178,7 @@ static void qeth_l3_stop_card(struct qeth_card *card)
qeth_l3_clear_ip_htable(card, 1);
qeth_clear_ipacmd_list(card);
qeth_drain_output_queues(card);
+ cancel_delayed_work_sync(&card->buffer_reclaim_work);
card->state = CARD_STATE_DOWN;
}
diff --git a/drivers/s390/net/qeth_l3_sys.c b/drivers/s390/net/qeth_l3_sys.c
index 29f2517..a3d1c3b 100644
--- a/drivers/s390/net/qeth_l3_sys.c
+++ b/drivers/s390/net/qeth_l3_sys.c
@@ -206,12 +206,11 @@ static ssize_t qeth_l3_dev_sniffer_store(struct device *dev,
qdio_get_ssqd_desc(CARD_DDEV(card), &card->ssqd);
if (card->ssqd.qdioac2 & CHSC_AC2_SNIFFER_AVAILABLE) {
card->options.sniffer = i;
- if (card->qdio.init_pool.buf_count !=
- QETH_IN_BUF_COUNT_MAX)
- qeth_realloc_buffer_pool(card,
- QETH_IN_BUF_COUNT_MAX);
- } else
+ qeth_resize_buffer_pool(card, QETH_IN_BUF_COUNT_MAX);
+ } else {
rc = -EPERM;
+ }
+
break;
default:
rc = -EINVAL;
diff --git a/drivers/s390/scsi/zfcp_fsf.c b/drivers/s390/scsi/zfcp_fsf.c
index 223a805..cae9b7ff 100644
--- a/drivers/s390/scsi/zfcp_fsf.c
+++ b/drivers/s390/scsi/zfcp_fsf.c
@@ -2510,7 +2510,7 @@ void zfcp_fsf_reqid_check(struct zfcp_qdio *qdio, int sbal_idx)
for (idx = 0; idx < QDIO_MAX_ELEMENTS_PER_BUFFER; idx++) {
sbale = &sbal->element[idx];
- req_id = (unsigned long) sbale->addr;
+ req_id = sbale->addr;
fsf_req = zfcp_reqlist_find_rm(adapter->req_list, req_id);
if (!fsf_req) {
diff --git a/drivers/s390/scsi/zfcp_fsf.h b/drivers/s390/scsi/zfcp_fsf.h
index 2b1e4da..4bfb79f 100644
--- a/drivers/s390/scsi/zfcp_fsf.h
+++ b/drivers/s390/scsi/zfcp_fsf.h
@@ -410,7 +410,7 @@ struct fsf_qtcb_bottom_port {
u8 cb_util;
u8 a_util;
u8 res2;
- u16 temperature;
+ s16 temperature;
u16 vcc;
u16 tx_bias;
u16 tx_power;
diff --git a/drivers/s390/scsi/zfcp_qdio.c b/drivers/s390/scsi/zfcp_qdio.c
index 661436a..f0d6296e 100644
--- a/drivers/s390/scsi/zfcp_qdio.c
+++ b/drivers/s390/scsi/zfcp_qdio.c
@@ -98,7 +98,7 @@ static void zfcp_qdio_int_resp(struct ccw_device *cdev, unsigned int qdio_err,
memset(pl, 0,
ZFCP_QDIO_MAX_SBALS_PER_REQ * sizeof(void *));
sbale = qdio->res_q[idx]->element;
- req_id = (u64) sbale->addr;
+ req_id = sbale->addr;
scount = min(sbale->scount + 1,
ZFCP_QDIO_MAX_SBALS_PER_REQ + 1);
/* incl. signaling SBAL */
@@ -199,7 +199,7 @@ int zfcp_qdio_sbals_from_sg(struct zfcp_qdio *qdio, struct zfcp_qdio_req *q_req,
q_req->sbal_number);
return -EINVAL;
}
- sbale->addr = sg_virt(sg);
+ sbale->addr = sg_phys(sg);
sbale->length = sg->length;
}
return 0;
@@ -418,7 +418,7 @@ int zfcp_qdio_open(struct zfcp_qdio *qdio)
sbale->length = 0;
sbale->eflags = SBAL_EFLAGS_LAST_ENTRY;
sbale->sflags = 0;
- sbale->addr = NULL;
+ sbale->addr = 0;
}
if (do_QDIO(cdev, QDIO_FLAG_SYNC_INPUT, 0, 0, QDIO_MAX_BUFFERS_PER_Q))
diff --git a/drivers/s390/scsi/zfcp_qdio.h b/drivers/s390/scsi/zfcp_qdio.h
index 2a816a3..6b43d6b 100644
--- a/drivers/s390/scsi/zfcp_qdio.h
+++ b/drivers/s390/scsi/zfcp_qdio.h
@@ -122,14 +122,14 @@ void zfcp_qdio_req_init(struct zfcp_qdio *qdio, struct zfcp_qdio_req *q_req,
% QDIO_MAX_BUFFERS_PER_Q;
sbale = zfcp_qdio_sbale_req(qdio, q_req);
- sbale->addr = (void *) req_id;
+ sbale->addr = req_id;
sbale->eflags = 0;
sbale->sflags = SBAL_SFLAGS0_COMMAND | sbtype;
if (unlikely(!data))
return;
sbale++;
- sbale->addr = data;
+ sbale->addr = virt_to_phys(data);
sbale->length = len;
}
@@ -152,7 +152,7 @@ void zfcp_qdio_fill_next(struct zfcp_qdio *qdio, struct zfcp_qdio_req *q_req,
BUG_ON(q_req->sbale_curr == qdio->max_sbale_per_sbal - 1);
q_req->sbale_curr++;
sbale = zfcp_qdio_sbale_curr(qdio, q_req);
- sbale->addr = data;
+ sbale->addr = virt_to_phys(data);
sbale->length = len;
}
diff --git a/drivers/s390/scsi/zfcp_sysfs.c b/drivers/s390/scsi/zfcp_sysfs.c
index 494b9fe..a711a0d 100644
--- a/drivers/s390/scsi/zfcp_sysfs.c
+++ b/drivers/s390/scsi/zfcp_sysfs.c
@@ -800,7 +800,7 @@ static ZFCP_DEV_ATTR(adapter_diag, b2b_credit, 0400,
static ZFCP_DEV_ATTR(adapter_diag_sfp, _name, 0400, \
zfcp_sysfs_adapter_diag_sfp_##_name##_show, NULL)
-ZFCP_DEFINE_DIAG_SFP_ATTR(temperature, temperature, 5, "%hu");
+ZFCP_DEFINE_DIAG_SFP_ATTR(temperature, temperature, 6, "%hd");
ZFCP_DEFINE_DIAG_SFP_ATTR(vcc, vcc, 5, "%hu");
ZFCP_DEFINE_DIAG_SFP_ATTR(tx_bias, tx_bias, 5, "%hu");
ZFCP_DEFINE_DIAG_SFP_ATTR(tx_power, tx_power, 5, "%hu");
diff --git a/drivers/scsi/BusLogic.c b/drivers/scsi/BusLogic.c
index 3170b29..1862594 100644
--- a/drivers/scsi/BusLogic.c
+++ b/drivers/scsi/BusLogic.c
@@ -36,6 +36,7 @@
#include <linux/jiffies.h>
#include <linux/dma-mapping.h>
#include <linux/slab.h>
+#include <linux/msdos_partition.h>
#include <scsi/scsicam.h>
#include <asm/dma.h>
@@ -3410,9 +3411,10 @@ static int blogic_diskparam(struct scsi_device *sdev, struct block_device *dev,
a partition table entry whose end_head matches one of the
standard BusLogic geometry translations (64/32, 128/32, or 255/63).
*/
- if (*(unsigned short *) (buf + 64) == 0xAA55) {
- struct partition *part1_entry = (struct partition *) buf;
- struct partition *part_entry = part1_entry;
+ if (*(unsigned short *) (buf + 64) == MSDOS_LABEL_MAGIC) {
+ struct msdos_partition *part1_entry =
+ (struct msdos_partition *)buf;
+ struct msdos_partition *part_entry = part1_entry;
int saved_cyl = diskparam->cylinders, part_no;
unsigned char part_end_head = 0, part_end_sector = 0;
diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig
index a7881f8..1b6eaf8 100644
--- a/drivers/scsi/Kconfig
+++ b/drivers/scsi/Kconfig
@@ -989,6 +989,7 @@
config SCSI_IPR
tristate "IBM Power Linux RAID adapter support"
depends on PCI && SCSI && ATA
+ select SATA_HOST
select FW_LOADER
select IRQ_POLL
select SGL_ALLOC
diff --git a/drivers/scsi/aacraid/linit.c b/drivers/scsi/aacraid/linit.c
index ee6bc2f..0443b74 100644
--- a/drivers/scsi/aacraid/linit.c
+++ b/drivers/scsi/aacraid/linit.c
@@ -33,6 +33,7 @@
#include <linux/syscalls.h>
#include <linux/delay.h>
#include <linux/kthread.h>
+#include <linux/msdos_partition.h>
#include <scsi/scsi.h>
#include <scsi/scsi_cmnd.h>
@@ -328,9 +329,9 @@ static int aac_biosparm(struct scsi_device *sdev, struct block_device *bdev,
buf = scsi_bios_ptable(bdev);
if (!buf)
return 0;
- if(*(__le16 *)(buf + 0x40) == cpu_to_le16(0xaa55)) {
- struct partition *first = (struct partition * )buf;
- struct partition *entry = first;
+ if (*(__le16 *)(buf + 0x40) == cpu_to_le16(MSDOS_LABEL_MAGIC)) {
+ struct msdos_partition *first = (struct msdos_partition *)buf;
+ struct msdos_partition *entry = first;
int saved_cylinders = param->cylinders;
int num;
unsigned char end_head, end_sec;
diff --git a/drivers/scsi/aic7xxx/aic79xx_osm.c b/drivers/scsi/aic7xxx/aic79xx_osm.c
index 5799251..dc4fe33 100644
--- a/drivers/scsi/aic7xxx/aic79xx_osm.c
+++ b/drivers/scsi/aic7xxx/aic79xx_osm.c
@@ -723,24 +723,17 @@ static int
ahd_linux_biosparam(struct scsi_device *sdev, struct block_device *bdev,
sector_t capacity, int geom[])
{
- uint8_t *bh;
int heads;
int sectors;
int cylinders;
- int ret;
int extended;
struct ahd_softc *ahd;
ahd = *((struct ahd_softc **)sdev->host->hostdata);
- bh = scsi_bios_ptable(bdev);
- if (bh) {
- ret = scsi_partsize(bh, capacity,
- &geom[2], &geom[0], &geom[1]);
- kfree(bh);
- if (ret != -1)
- return (ret);
- }
+ if (scsi_partsize(bdev, capacity, geom))
+ return 0;
+
heads = 64;
sectors = 32;
cylinders = aic_sector_div(capacity, heads, sectors);
diff --git a/drivers/scsi/aic7xxx/aic7xxx_osm.c b/drivers/scsi/aic7xxx/aic7xxx_osm.c
index d5c4a0d..2edfa05 100644
--- a/drivers/scsi/aic7xxx/aic7xxx_osm.c
+++ b/drivers/scsi/aic7xxx/aic7xxx_osm.c
@@ -695,11 +695,9 @@ static int
ahc_linux_biosparam(struct scsi_device *sdev, struct block_device *bdev,
sector_t capacity, int geom[])
{
- uint8_t *bh;
int heads;
int sectors;
int cylinders;
- int ret;
int extended;
struct ahc_softc *ahc;
u_int channel;
@@ -707,14 +705,9 @@ ahc_linux_biosparam(struct scsi_device *sdev, struct block_device *bdev,
ahc = *((struct ahc_softc **)sdev->host->hostdata);
channel = sdev_channel(sdev);
- bh = scsi_bios_ptable(bdev);
- if (bh) {
- ret = scsi_partsize(bh, capacity,
- &geom[2], &geom[0], &geom[1]);
- kfree(bh);
- if (ret != -1)
- return (ret);
- }
+ if (scsi_partsize(bdev, capacity, geom))
+ return 0;
+
heads = 64;
sectors = 32;
cylinders = aic_sector_div(capacity, heads, sectors);
diff --git a/drivers/scsi/arcmsr/arcmsr_hba.c b/drivers/scsi/arcmsr/arcmsr_hba.c
index 40dc8ea..c2c79a37 100644
--- a/drivers/scsi/arcmsr/arcmsr_hba.c
+++ b/drivers/scsi/arcmsr/arcmsr_hba.c
@@ -353,16 +353,11 @@ static irqreturn_t arcmsr_do_interrupt(int irq, void *dev_id)
static int arcmsr_bios_param(struct scsi_device *sdev,
struct block_device *bdev, sector_t capacity, int *geom)
{
- int ret, heads, sectors, cylinders, total_capacity;
- unsigned char *buffer;/* return copy of block device's partition table */
+ int heads, sectors, cylinders, total_capacity;
- buffer = scsi_bios_ptable(bdev);
- if (buffer) {
- ret = scsi_partsize(buffer, capacity, &geom[2], &geom[0], &geom[1]);
- kfree(buffer);
- if (ret != -1)
- return ret;
- }
+ if (scsi_partsize(bdev, capacity, geom))
+ return 0;
+
total_capacity = capacity;
heads = 64;
sectors = 32;
diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c
index ae45cbe..cd8db13 100644
--- a/drivers/scsi/ipr.c
+++ b/drivers/scsi/ipr.c
@@ -9950,6 +9950,7 @@ static void ipr_init_ioa_cfg(struct ipr_ioa_cfg *ioa_cfg,
ioa_cfg->max_devs_supported = ipr_max_devs;
if (ioa_cfg->sis64) {
+ host->max_channel = IPR_MAX_SIS64_BUSES;
host->max_id = IPR_MAX_SIS64_TARGETS_PER_BUS;
host->max_lun = IPR_MAX_SIS64_LUNS_PER_TARGET;
if (ipr_max_devs > IPR_MAX_SIS64_DEVS)
@@ -9958,6 +9959,7 @@ static void ipr_init_ioa_cfg(struct ipr_ioa_cfg *ioa_cfg,
+ ((sizeof(struct ipr_config_table_entry64)
* ioa_cfg->max_devs_supported)));
} else {
+ host->max_channel = IPR_VSET_BUS;
host->max_id = IPR_MAX_NUM_TARGETS_PER_BUS;
host->max_lun = IPR_MAX_NUM_LUNS_PER_TARGET;
if (ipr_max_devs > IPR_MAX_PHYSICAL_DEVS)
@@ -9967,7 +9969,6 @@ static void ipr_init_ioa_cfg(struct ipr_ioa_cfg *ioa_cfg,
* ioa_cfg->max_devs_supported)));
}
- host->max_channel = IPR_VSET_BUS;
host->unique_id = host->host_no;
host->max_cmd_len = IPR_MAX_CDB_LEN;
host->can_queue = ioa_cfg->max_cmds;
diff --git a/drivers/scsi/ipr.h b/drivers/scsi/ipr.h
index a67baeb3..b97aa9a 100644
--- a/drivers/scsi/ipr.h
+++ b/drivers/scsi/ipr.h
@@ -1300,6 +1300,7 @@ struct ipr_resource_entry {
#define IPR_ARRAY_VIRTUAL_BUS 0x1
#define IPR_VSET_VIRTUAL_BUS 0x2
#define IPR_IOAFP_VIRTUAL_BUS 0x3
+#define IPR_MAX_SIS64_BUSES 0x4
#define IPR_GET_RES_PHYS_LOC(res) \
(((res)->bus << 24) | ((res)->target << 8) | (res)->lun)
diff --git a/drivers/scsi/libfc/fc_disc.c b/drivers/scsi/libfc/fc_disc.c
index 9c5f7c9..2b865c6 100644
--- a/drivers/scsi/libfc/fc_disc.c
+++ b/drivers/scsi/libfc/fc_disc.c
@@ -628,6 +628,8 @@ static void fc_disc_gpn_id_resp(struct fc_seq *sp, struct fc_frame *fp,
}
out:
kref_put(&rdata->kref, fc_rport_destroy);
+ if (!IS_ERR(fp))
+ fc_frame_free(fp);
}
/**
diff --git a/drivers/scsi/libsas/Kconfig b/drivers/scsi/libsas/Kconfig
index 5c6a5ef..052ee3a 100644
--- a/drivers/scsi/libsas/Kconfig
+++ b/drivers/scsi/libsas/Kconfig
@@ -19,6 +19,7 @@
bool "ATA support for libsas (requires libata)"
depends on SCSI_SAS_LIBSAS
depends on ATA = y || ATA = SCSI_SAS_LIBSAS
+ select SATA_HOST
help
Builds in ATA support into libsas. Will necessitate
the loading of libata along with libsas.
diff --git a/drivers/scsi/megaraid.c b/drivers/scsi/megaraid.c
index ff6d4aa..f27ffd0 100644
--- a/drivers/scsi/megaraid.c
+++ b/drivers/scsi/megaraid.c
@@ -2795,11 +2795,9 @@ megaraid_biosparam(struct scsi_device *sdev, struct block_device *bdev,
sector_t capacity, int geom[])
{
adapter_t *adapter;
- unsigned char *bh;
int heads;
int sectors;
int cylinders;
- int rval;
/* Get pointer to host config structure */
adapter = (adapter_t *)sdev->host->hostdata;
@@ -2826,15 +2824,8 @@ megaraid_biosparam(struct scsi_device *sdev, struct block_device *bdev,
geom[2] = cylinders;
}
else {
- bh = scsi_bios_ptable(bdev);
-
- if( bh ) {
- rval = scsi_partsize(bh, capacity,
- &geom[2], &geom[0], &geom[1]);
- kfree(bh);
- if( rval != -1 )
- return rval;
- }
+ if (scsi_partsize(bdev, capacity, geom))
+ return 0;
dev_info(&adapter->dev->dev,
"invalid partition on this disk on channel %d\n",
diff --git a/drivers/scsi/megaraid/megaraid_sas_fusion.c b/drivers/scsi/megaraid/megaraid_sas_fusion.c
index f3b36fd..b2ad965 100644
--- a/drivers/scsi/megaraid/megaraid_sas_fusion.c
+++ b/drivers/scsi/megaraid/megaraid_sas_fusion.c
@@ -623,7 +623,8 @@ megasas_alloc_request_fusion(struct megasas_instance *instance)
fusion->io_request_frames =
dma_pool_alloc(fusion->io_request_frames_pool,
- GFP_KERNEL, &fusion->io_request_frames_phys);
+ GFP_KERNEL | __GFP_NOWARN,
+ &fusion->io_request_frames_phys);
if (!fusion->io_request_frames) {
if (instance->max_fw_cmds >= (MEGASAS_REDUCE_QD_COUNT * 2)) {
instance->max_fw_cmds -= MEGASAS_REDUCE_QD_COUNT;
@@ -661,7 +662,7 @@ megasas_alloc_request_fusion(struct megasas_instance *instance)
fusion->io_request_frames =
dma_pool_alloc(fusion->io_request_frames_pool,
- GFP_KERNEL,
+ GFP_KERNEL | __GFP_NOWARN,
&fusion->io_request_frames_phys);
if (!fusion->io_request_frames) {
diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
index b520a98..7a94e11 100644
--- a/drivers/scsi/qla2xxx/qla_os.c
+++ b/drivers/scsi/qla2xxx/qla_os.c
@@ -864,7 +864,7 @@ qla2xxx_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd)
goto qc24_fail_command;
}
- if (atomic_read(&fcport->state) != FCS_ONLINE) {
+ if (atomic_read(&fcport->state) != FCS_ONLINE || fcport->deleted) {
if (atomic_read(&fcport->state) == FCS_DEVICE_DEAD ||
atomic_read(&base_vha->loop_state) == LOOP_DEAD) {
ql_dbg(ql_dbg_io, vha, 0x3005,
@@ -946,7 +946,7 @@ qla2xxx_mqueuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd,
goto qc24_fail_command;
}
- if (atomic_read(&fcport->state) != FCS_ONLINE) {
+ if (atomic_read(&fcport->state) != FCS_ONLINE || fcport->deleted) {
if (atomic_read(&fcport->state) == FCS_DEVICE_DEAD ||
atomic_read(&base_vha->loop_state) == LOOP_DEAD) {
ql_dbg(ql_dbg_io, vha, 0x3077,
diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c
index 44cb054..4c6c448 100644
--- a/drivers/scsi/scsi_debug.c
+++ b/drivers/scsi/scsi_debug.c
@@ -38,6 +38,7 @@
#include <linux/hrtimer.h>
#include <linux/uuid.h>
#include <linux/t10-pi.h>
+#include <linux/msdos_partition.h>
#include <net/checksum.h>
@@ -4146,7 +4147,7 @@ static int scsi_debug_host_reset(struct scsi_cmnd *SCpnt)
static void __init sdebug_build_parts(unsigned char *ramp,
unsigned long store_size)
{
- struct partition *pp;
+ struct msdos_partition *pp;
int starts[SDEBUG_MAX_PARTS + 2];
int sectors_per_part, num_sectors, k;
int heads_by_sects, start_sec, end_sec;
@@ -4171,7 +4172,7 @@ static void __init sdebug_build_parts(unsigned char *ramp,
ramp[510] = 0x55; /* magic partition markings */
ramp[511] = 0xAA;
- pp = (struct partition *)(ramp + 0x1be);
+ pp = (struct msdos_partition *)(ramp + 0x1be);
for (k = 0; starts[k + 1]; ++k, ++pp) {
start_sec = starts[k];
end_sec = starts[k + 1] - 1;
diff --git a/drivers/scsi/scsicam.c b/drivers/scsi/scsicam.c
index e969138..682cf08 100644
--- a/drivers/scsi/scsicam.c
+++ b/drivers/scsi/scsicam.c
@@ -17,14 +17,11 @@
#include <linux/genhd.h>
#include <linux/kernel.h>
#include <linux/blkdev.h>
+#include <linux/msdos_partition.h>
#include <asm/unaligned.h>
#include <scsi/scsicam.h>
-
-static int setsize(unsigned long capacity, unsigned int *cyls, unsigned int *hds,
- unsigned int *secs);
-
/**
* scsi_bios_ptable - Read PC partition table out of first sector of device.
* @dev: from this device
@@ -35,105 +32,48 @@ static int setsize(unsigned long capacity, unsigned int *cyls, unsigned int *hds
*/
unsigned char *scsi_bios_ptable(struct block_device *dev)
{
- unsigned char *res = kmalloc(66, GFP_KERNEL);
- if (res) {
- struct block_device *bdev = dev->bd_contains;
- Sector sect;
- void *data = read_dev_sector(bdev, 0, §);
- if (data) {
- memcpy(res, data + 0x1be, 66);
- put_dev_sector(sect);
- } else {
- kfree(res);
- res = NULL;
- }
- }
+ struct address_space *mapping = dev->bd_contains->bd_inode->i_mapping;
+ unsigned char *res = NULL;
+ struct page *page;
+
+ page = read_mapping_page(mapping, 0, NULL);
+ if (IS_ERR(page))
+ return NULL;
+
+ if (!PageError(page))
+ res = kmemdup(page_address(page) + 0x1be, 66, GFP_KERNEL);
+ put_page(page);
return res;
}
EXPORT_SYMBOL(scsi_bios_ptable);
/**
- * scsicam_bios_param - Determine geometry of a disk in cylinders/heads/sectors.
- * @bdev: which device
- * @capacity: size of the disk in sectors
- * @ip: return value: ip[0]=heads, ip[1]=sectors, ip[2]=cylinders
- *
- * Description : determine the BIOS mapping/geometry used for a drive in a
- * SCSI-CAM system, storing the results in ip as required
- * by the HDIO_GETGEO ioctl().
- *
- * Returns : -1 on failure, 0 on success.
- */
-
-int scsicam_bios_param(struct block_device *bdev, sector_t capacity, int *ip)
-{
- unsigned char *p;
- u64 capacity64 = capacity; /* Suppress gcc warning */
- int ret;
-
- p = scsi_bios_ptable(bdev);
- if (!p)
- return -1;
-
- /* try to infer mapping from partition table */
- ret = scsi_partsize(p, (unsigned long)capacity, (unsigned int *)ip + 2,
- (unsigned int *)ip + 0, (unsigned int *)ip + 1);
- kfree(p);
-
- if (ret == -1 && capacity64 < (1ULL << 32)) {
- /* pick some standard mapping with at most 1024 cylinders,
- and at most 62 sectors per track - this works up to
- 7905 MB */
- ret = setsize((unsigned long)capacity, (unsigned int *)ip + 2,
- (unsigned int *)ip + 0, (unsigned int *)ip + 1);
- }
-
- /* if something went wrong, then apparently we have to return
- a geometry with more than 1024 cylinders */
- if (ret || ip[0] > 255 || ip[1] > 63) {
- if ((capacity >> 11) > 65534) {
- ip[0] = 255;
- ip[1] = 63;
- } else {
- ip[0] = 64;
- ip[1] = 32;
- }
-
- if (capacity > 65535*63*255)
- ip[2] = 65535;
- else
- ip[2] = (unsigned long)capacity / (ip[0] * ip[1]);
- }
-
- return 0;
-}
-EXPORT_SYMBOL(scsicam_bios_param);
-
-/**
* scsi_partsize - Parse cylinders/heads/sectors from PC partition table
- * @buf: partition table, see scsi_bios_ptable()
+ * @bdev: block device to parse
* @capacity: size of the disk in sectors
- * @cyls: put cylinders here
- * @hds: put heads here
- * @secs: put sectors here
+ * @geom: output in form of [hds, cylinders, sectors]
*
* Determine the BIOS mapping/geometry used to create the partition
- * table, storing the results in @cyls, @hds, and @secs
+ * table, storing the results in @geom.
*
- * Returns: -1 on failure, 0 on success.
+ * Returns: %false on failure, %true on success.
*/
-
-int scsi_partsize(unsigned char *buf, unsigned long capacity,
- unsigned int *cyls, unsigned int *hds, unsigned int *secs)
+bool scsi_partsize(struct block_device *bdev, sector_t capacity, int geom[3])
{
- struct partition *p = (struct partition *)buf, *largest = NULL;
- int i, largest_cyl;
int cyl, ext_cyl, end_head, end_cyl, end_sector;
unsigned int logical_end, physical_end, ext_physical_end;
+ struct msdos_partition *p, *largest = NULL;
+ void *buf;
+ int ret = false;
+ buf = scsi_bios_ptable(bdev);
+ if (!buf)
+ return false;
if (*(unsigned short *) (buf + 64) == 0xAA55) {
- for (largest_cyl = -1, i = 0; i < 4; ++i, ++p) {
+ int largest_cyl = -1, i;
+
+ for (i = 0, p = buf; i < 4; i++, p++) {
if (!p->sys_ind)
continue;
#ifdef DEBUG
@@ -153,7 +93,7 @@ int scsi_partsize(unsigned char *buf, unsigned long capacity,
end_sector = largest->end_sector & 0x3f;
if (end_head + 1 == 0 || end_sector == 0)
- return -1;
+ goto out_free_buf;
#ifdef DEBUG
printk("scsicam_bios_param : end at h = %d, c = %d, s = %d\n",
@@ -178,19 +118,24 @@ int scsi_partsize(unsigned char *buf, unsigned long capacity,
,logical_end, physical_end, ext_physical_end, ext_cyl);
#endif
- if ((logical_end == physical_end) ||
- (end_cyl == 1023 && ext_physical_end == logical_end)) {
- *secs = end_sector;
- *hds = end_head + 1;
- *cyls = capacity / ((end_head + 1) * end_sector);
- return 0;
+ if (logical_end == physical_end ||
+ (end_cyl == 1023 && ext_physical_end == logical_end)) {
+ geom[0] = end_head + 1;
+ geom[1] = end_sector;
+ geom[2] = (unsigned long)capacity /
+ ((end_head + 1) * end_sector);
+ ret = true;
+ goto out_free_buf;
}
#ifdef DEBUG
printk("scsicam_bios_param : logical (%u) != physical (%u)\n",
logical_end, physical_end);
#endif
}
- return -1;
+
+out_free_buf:
+ kfree(buf);
+ return ret;
}
EXPORT_SYMBOL(scsi_partsize);
@@ -258,3 +203,56 @@ static int setsize(unsigned long capacity, unsigned int *cyls, unsigned int *hds
*hds = (unsigned int) heads;
return (rv);
}
+
+/**
+ * scsicam_bios_param - Determine geometry of a disk in cylinders/heads/sectors.
+ * @bdev: which device
+ * @capacity: size of the disk in sectors
+ * @ip: return value: ip[0]=heads, ip[1]=sectors, ip[2]=cylinders
+ *
+ * Description : determine the BIOS mapping/geometry used for a drive in a
+ * SCSI-CAM system, storing the results in ip as required
+ * by the HDIO_GETGEO ioctl().
+ *
+ * Returns : -1 on failure, 0 on success.
+ */
+int scsicam_bios_param(struct block_device *bdev, sector_t capacity, int *ip)
+{
+ u64 capacity64 = capacity; /* Suppress gcc warning */
+ int ret = 0;
+
+ /* try to infer mapping from partition table */
+ if (scsi_partsize(bdev, capacity, ip))
+ return 0;
+
+ if (capacity64 < (1ULL << 32)) {
+ /*
+ * Pick some standard mapping with at most 1024 cylinders, and
+ * at most 62 sectors per track - this works up to 7905 MB.
+ */
+ ret = setsize((unsigned long)capacity, (unsigned int *)ip + 2,
+ (unsigned int *)ip + 0, (unsigned int *)ip + 1);
+ }
+
+ /*
+ * If something went wrong, then apparently we have to return a geometry
+ * with more than 1024 cylinders.
+ */
+ if (ret || ip[0] > 255 || ip[1] > 63) {
+ if ((capacity >> 11) > 65534) {
+ ip[0] = 255;
+ ip[1] = 63;
+ } else {
+ ip[0] = 64;
+ ip[1] = 32;
+ }
+
+ if (capacity > 65535*63*255)
+ ip[2] = 65535;
+ else
+ ip[2] = (unsigned long)capacity / (ip[0] * ip[1]);
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL(scsicam_bios_param);
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 8ca9299..a793cb0 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -3169,9 +3169,11 @@ static int sd_revalidate_disk(struct gendisk *disk)
if (sd_validate_opt_xfer_size(sdkp, dev_max)) {
q->limits.io_opt = logical_to_bytes(sdp, sdkp->opt_xfer_blocks);
rw_max = logical_to_sectors(sdp, sdkp->opt_xfer_blocks);
- } else
+ } else {
+ q->limits.io_opt = 0;
rw_max = min_not_zero(logical_to_sectors(sdp, dev_max),
(sector_t)BLK_DEF_MAX_SECTORS);
+ }
/* Do not exceed controller limit */
rw_max = min(rw_max, queue_max_hw_sectors(q));
@@ -3187,7 +3189,8 @@ static int sd_revalidate_disk(struct gendisk *disk)
sdkp->first_scan = 0;
- set_capacity(disk, logical_to_sectors(sdp, sdkp->capacity));
+ set_capacity_revalidate_and_notify(disk,
+ logical_to_sectors(sdp, sdkp->capacity), false);
sd_config_write_same(sdkp);
kfree(buffer);
diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c
index e4282bc..f45c22b 100644
--- a/drivers/scsi/sd_zbc.c
+++ b/drivers/scsi/sd_zbc.c
@@ -161,6 +161,7 @@ int sd_zbc_report_zones(struct gendisk *disk, sector_t sector,
unsigned int nr_zones, report_zones_cb cb, void *data)
{
struct scsi_disk *sdkp = scsi_disk(disk);
+ sector_t capacity = logical_to_sectors(sdkp->device, sdkp->capacity);
unsigned int nr, i;
unsigned char *buf;
size_t offset, buflen = 0;
@@ -171,11 +172,15 @@ int sd_zbc_report_zones(struct gendisk *disk, sector_t sector,
/* Not a zoned device */
return -EOPNOTSUPP;
+ if (!capacity)
+ /* Device gone or invalid */
+ return -ENODEV;
+
buf = sd_zbc_alloc_report_buffer(sdkp, nr_zones, &buflen);
if (!buf)
return -ENOMEM;
- while (zone_idx < nr_zones && sector < get_capacity(disk)) {
+ while (zone_idx < nr_zones && sector < capacity) {
ret = sd_zbc_do_report_zones(sdkp, buf, buflen,
sectors_to_logical(sdkp->device, sector), true);
if (ret)
diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c
index 0fbb8fe..e4240e4 100644
--- a/drivers/scsi/sr.c
+++ b/drivers/scsi/sr.c
@@ -688,7 +688,7 @@ static const struct block_device_operations sr_bdops =
.release = sr_block_release,
.ioctl = sr_block_ioctl,
#ifdef CONFIG_COMPAT
- .ioctl = sr_block_compat_ioctl,
+ .compat_ioctl = sr_block_compat_ioctl,
#endif
.check_events = sr_block_check_events,
.revalidate_disk = sr_block_revalidate_disk,
diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index abd0e6b..2d705694 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -3884,18 +3884,25 @@ EXPORT_SYMBOL_GPL(ufshcd_uic_hibern8_exit);
void ufshcd_auto_hibern8_update(struct ufs_hba *hba, u32 ahit)
{
unsigned long flags;
+ bool update = false;
- if (!(hba->capabilities & MASK_AUTO_HIBERN8_SUPPORT))
+ if (!ufshcd_is_auto_hibern8_supported(hba))
return;
spin_lock_irqsave(hba->host->host_lock, flags);
- if (hba->ahit == ahit)
- goto out_unlock;
- hba->ahit = ahit;
- if (!pm_runtime_suspended(hba->dev))
- ufshcd_writel(hba, hba->ahit, REG_AUTO_HIBERNATE_IDLE_TIMER);
-out_unlock:
+ if (hba->ahit != ahit) {
+ hba->ahit = ahit;
+ update = true;
+ }
spin_unlock_irqrestore(hba->host->host_lock, flags);
+
+ if (update && !pm_runtime_suspended(hba->dev)) {
+ pm_runtime_get_sync(hba->dev);
+ ufshcd_hold(hba, false);
+ ufshcd_auto_hibern8_enable(hba);
+ ufshcd_release(hba);
+ pm_runtime_put(hba->dev);
+ }
}
EXPORT_SYMBOL_GPL(ufshcd_auto_hibern8_update);
diff --git a/drivers/slimbus/qcom-ngd-ctrl.c b/drivers/slimbus/qcom-ngd-ctrl.c
index e3f5ebc..fc2575f 100644
--- a/drivers/slimbus/qcom-ngd-ctrl.c
+++ b/drivers/slimbus/qcom-ngd-ctrl.c
@@ -1320,6 +1320,9 @@ static const struct of_device_id qcom_slim_ngd_dt_match[] = {
{
.compatible = "qcom,slim-ngd-v1.5.0",
.data = &ngd_v1_5_offset_info,
+ },{
+ .compatible = "qcom,slim-ngd-v2.1.0",
+ .data = &ngd_v1_5_offset_info,
},
{}
};
diff --git a/drivers/soc/fsl/dpio/dpio-driver.c b/drivers/soc/fsl/dpio/dpio-driver.c
index 70014ec..7b642c3 100644
--- a/drivers/soc/fsl/dpio/dpio-driver.c
+++ b/drivers/soc/fsl/dpio/dpio-driver.c
@@ -233,10 +233,6 @@ static int dpaa2_dpio_probe(struct fsl_mc_device *dpio_dev)
goto err_allocate_irqs;
}
- err = register_dpio_irq_handlers(dpio_dev, desc.cpu);
- if (err)
- goto err_register_dpio_irq;
-
priv->io = dpaa2_io_create(&desc, dev);
if (!priv->io) {
dev_err(dev, "dpaa2_io_create failed\n");
@@ -244,6 +240,10 @@ static int dpaa2_dpio_probe(struct fsl_mc_device *dpio_dev)
goto err_dpaa2_io_create;
}
+ err = register_dpio_irq_handlers(dpio_dev, desc.cpu);
+ if (err)
+ goto err_register_dpio_irq;
+
dev_info(dev, "probed\n");
dev_dbg(dev, " receives_notifications = %d\n",
desc.receives_notifications);
diff --git a/drivers/soc/imx/soc-imx-scu.c b/drivers/soc/imx/soc-imx-scu.c
index fb70b8a..20d37ea 100644
--- a/drivers/soc/imx/soc-imx-scu.c
+++ b/drivers/soc/imx/soc-imx-scu.c
@@ -25,7 +25,7 @@ struct imx_sc_msg_misc_get_soc_id {
u32 id;
} resp;
} data;
-} __packed;
+} __packed __aligned(4);
struct imx_sc_msg_misc_get_soc_uid {
struct imx_sc_rpc_msg hdr;
diff --git a/drivers/soc/samsung/exynos-chipid.c b/drivers/soc/samsung/exynos-chipid.c
index 2dad496..8d4d050 100644
--- a/drivers/soc/samsung/exynos-chipid.c
+++ b/drivers/soc/samsung/exynos-chipid.c
@@ -59,7 +59,7 @@ static int __init exynos_chipid_early_init(void)
syscon = of_find_compatible_node(NULL, NULL,
"samsung,exynos4210-chipid");
if (!syscon)
- return ENODEV;
+ return -ENODEV;
regmap = device_node_to_regmap(syscon);
of_node_put(syscon);
diff --git a/drivers/soc/tegra/fuse/fuse-tegra30.c b/drivers/soc/tegra/fuse/fuse-tegra30.c
index f68f4e1..e6037f9 100644
--- a/drivers/soc/tegra/fuse/fuse-tegra30.c
+++ b/drivers/soc/tegra/fuse/fuse-tegra30.c
@@ -36,7 +36,8 @@
defined(CONFIG_ARCH_TEGRA_124_SOC) || \
defined(CONFIG_ARCH_TEGRA_132_SOC) || \
defined(CONFIG_ARCH_TEGRA_210_SOC) || \
- defined(CONFIG_ARCH_TEGRA_186_SOC)
+ defined(CONFIG_ARCH_TEGRA_186_SOC) || \
+ defined(CONFIG_ARCH_TEGRA_194_SOC)
static u32 tegra30_fuse_read_early(struct tegra_fuse *fuse, unsigned int offset)
{
if (WARN_ON(!fuse->base))
diff --git a/drivers/spi/atmel-quadspi.c b/drivers/spi/atmel-quadspi.c
index fd8007e..13def7f 100644
--- a/drivers/spi/atmel-quadspi.c
+++ b/drivers/spi/atmel-quadspi.c
@@ -149,6 +149,7 @@ struct atmel_qspi {
struct clk *qspick;
struct platform_device *pdev;
const struct atmel_qspi_caps *caps;
+ resource_size_t mmap_size;
u32 pending;
u32 mr;
u32 scr;
@@ -329,6 +330,14 @@ static int atmel_qspi_exec_op(struct spi_mem *mem, const struct spi_mem_op *op)
u32 sr, offset;
int err;
+ /*
+ * Check if the address exceeds the MMIO window size. An improvement
+ * would be to add support for regular SPI mode and fall back to it
+ * when the flash memories overrun the controller's memory space.
+ */
+ if (op->addr.val + op->data.nbytes > aq->mmap_size)
+ return -ENOTSUPP;
+
err = atmel_qspi_set_cfg(aq, op, &offset);
if (err)
return err;
@@ -480,6 +489,8 @@ static int atmel_qspi_probe(struct platform_device *pdev)
goto exit;
}
+ aq->mmap_size = resource_size(res);
+
/* Get the peripheral clock */
aq->pclk = devm_clk_get(&pdev->dev, "pclk");
if (IS_ERR(aq->pclk))
diff --git a/drivers/spi/spi-bcm63xx-hsspi.c b/drivers/spi/spi-bcm63xx-hsspi.c
index 7327309..6c23530 100644
--- a/drivers/spi/spi-bcm63xx-hsspi.c
+++ b/drivers/spi/spi-bcm63xx-hsspi.c
@@ -366,7 +366,6 @@ static int bcm63xx_hsspi_probe(struct platform_device *pdev)
goto out_disable_clk;
rate = clk_get_rate(pll_clk);
- clk_disable_unprepare(pll_clk);
if (!rate) {
ret = -EINVAL;
goto out_disable_pll_clk;
diff --git a/drivers/spi/spi-omap2-mcspi.c b/drivers/spi/spi-omap2-mcspi.c
index 7e2292c..e9e2567 100644
--- a/drivers/spi/spi-omap2-mcspi.c
+++ b/drivers/spi/spi-omap2-mcspi.c
@@ -130,6 +130,7 @@ struct omap2_mcspi {
int fifo_depth;
bool slave_aborted;
unsigned int pin_dir:1;
+ size_t max_xfer_len;
};
struct omap2_mcspi_cs {
@@ -974,20 +975,12 @@ static int omap2_mcspi_setup_transfer(struct spi_device *spi,
* Note that we currently allow DMA only if we get a channel
* for both rx and tx. Otherwise we'll do PIO for both rx and tx.
*/
-static int omap2_mcspi_request_dma(struct spi_device *spi)
+static int omap2_mcspi_request_dma(struct omap2_mcspi *mcspi,
+ struct omap2_mcspi_dma *mcspi_dma)
{
- struct spi_master *master = spi->master;
- struct omap2_mcspi *mcspi;
- struct omap2_mcspi_dma *mcspi_dma;
int ret = 0;
- mcspi = spi_master_get_devdata(master);
- mcspi_dma = mcspi->dma_channels + spi->chip_select;
-
- init_completion(&mcspi_dma->dma_rx_completion);
- init_completion(&mcspi_dma->dma_tx_completion);
-
- mcspi_dma->dma_rx = dma_request_chan(&master->dev,
+ mcspi_dma->dma_rx = dma_request_chan(mcspi->dev,
mcspi_dma->dma_rx_ch_name);
if (IS_ERR(mcspi_dma->dma_rx)) {
ret = PTR_ERR(mcspi_dma->dma_rx);
@@ -995,7 +988,7 @@ static int omap2_mcspi_request_dma(struct spi_device *spi)
goto no_dma;
}
- mcspi_dma->dma_tx = dma_request_chan(&master->dev,
+ mcspi_dma->dma_tx = dma_request_chan(mcspi->dev,
mcspi_dma->dma_tx_ch_name);
if (IS_ERR(mcspi_dma->dma_tx)) {
ret = PTR_ERR(mcspi_dma->dma_tx);
@@ -1004,20 +997,40 @@ static int omap2_mcspi_request_dma(struct spi_device *spi)
mcspi_dma->dma_rx = NULL;
}
+ init_completion(&mcspi_dma->dma_rx_completion);
+ init_completion(&mcspi_dma->dma_tx_completion);
+
no_dma:
return ret;
}
+static void omap2_mcspi_release_dma(struct spi_master *master)
+{
+ struct omap2_mcspi *mcspi = spi_master_get_devdata(master);
+ struct omap2_mcspi_dma *mcspi_dma;
+ int i;
+
+ for (i = 0; i < master->num_chipselect; i++) {
+ mcspi_dma = &mcspi->dma_channels[i];
+
+ if (mcspi_dma->dma_rx) {
+ dma_release_channel(mcspi_dma->dma_rx);
+ mcspi_dma->dma_rx = NULL;
+ }
+ if (mcspi_dma->dma_tx) {
+ dma_release_channel(mcspi_dma->dma_tx);
+ mcspi_dma->dma_tx = NULL;
+ }
+ }
+}
+
static int omap2_mcspi_setup(struct spi_device *spi)
{
int ret;
struct omap2_mcspi *mcspi = spi_master_get_devdata(spi->master);
struct omap2_mcspi_regs *ctx = &mcspi->ctx;
- struct omap2_mcspi_dma *mcspi_dma;
struct omap2_mcspi_cs *cs = spi->controller_state;
- mcspi_dma = &mcspi->dma_channels[spi->chip_select];
-
if (!cs) {
cs = kzalloc(sizeof *cs, GFP_KERNEL);
if (!cs)
@@ -1042,13 +1055,6 @@ static int omap2_mcspi_setup(struct spi_device *spi)
}
}
- if (!mcspi_dma->dma_rx || !mcspi_dma->dma_tx) {
- ret = omap2_mcspi_request_dma(spi);
- if (ret)
- dev_warn(&spi->dev, "not using DMA for McSPI (%d)\n",
- ret);
- }
-
ret = pm_runtime_get_sync(mcspi->dev);
if (ret < 0) {
pm_runtime_put_noidle(mcspi->dev);
@@ -1065,12 +1071,8 @@ static int omap2_mcspi_setup(struct spi_device *spi)
static void omap2_mcspi_cleanup(struct spi_device *spi)
{
- struct omap2_mcspi *mcspi;
- struct omap2_mcspi_dma *mcspi_dma;
struct omap2_mcspi_cs *cs;
- mcspi = spi_master_get_devdata(spi->master);
-
if (spi->controller_state) {
/* Unlink controller state from context save list */
cs = spi->controller_state;
@@ -1079,19 +1081,6 @@ static void omap2_mcspi_cleanup(struct spi_device *spi)
kfree(cs);
}
- if (spi->chip_select < spi->master->num_chipselect) {
- mcspi_dma = &mcspi->dma_channels[spi->chip_select];
-
- if (mcspi_dma->dma_rx) {
- dma_release_channel(mcspi_dma->dma_rx);
- mcspi_dma->dma_rx = NULL;
- }
- if (mcspi_dma->dma_tx) {
- dma_release_channel(mcspi_dma->dma_tx);
- mcspi_dma->dma_tx = NULL;
- }
- }
-
if (gpio_is_valid(spi->cs_gpio))
gpio_free(spi->cs_gpio);
}
@@ -1302,9 +1291,24 @@ static bool omap2_mcspi_can_dma(struct spi_master *master,
if (spi_controller_is_slave(master))
return true;
+ master->dma_rx = mcspi_dma->dma_rx;
+ master->dma_tx = mcspi_dma->dma_tx;
+
return (xfer->len >= DMA_MIN_BYTES);
}
+static size_t omap2_mcspi_max_xfer_size(struct spi_device *spi)
+{
+ struct omap2_mcspi *mcspi = spi_master_get_devdata(spi->master);
+ struct omap2_mcspi_dma *mcspi_dma =
+ &mcspi->dma_channels[spi->chip_select];
+
+ if (mcspi->max_xfer_len && mcspi_dma->dma_rx)
+ return mcspi->max_xfer_len;
+
+ return SIZE_MAX;
+}
+
static int omap2_mcspi_controller_setup(struct omap2_mcspi *mcspi)
{
struct spi_master *master = mcspi->master;
@@ -1373,6 +1377,11 @@ static struct omap2_mcspi_platform_config omap4_pdata = {
.regs_offset = OMAP4_MCSPI_REG_OFFSET,
};
+static struct omap2_mcspi_platform_config am654_pdata = {
+ .regs_offset = OMAP4_MCSPI_REG_OFFSET,
+ .max_xfer_len = SZ_4K - 1,
+};
+
static const struct of_device_id omap_mcspi_of_match[] = {
{
.compatible = "ti,omap2-mcspi",
@@ -1382,6 +1391,10 @@ static const struct of_device_id omap_mcspi_of_match[] = {
.compatible = "ti,omap4-mcspi",
.data = &omap4_pdata,
},
+ {
+ .compatible = "ti,am654-mcspi",
+ .data = &am654_pdata,
+ },
{ },
};
MODULE_DEVICE_TABLE(of, omap_mcspi_of_match);
@@ -1439,6 +1452,10 @@ static int omap2_mcspi_probe(struct platform_device *pdev)
mcspi->pin_dir = pdata->pin_dir;
}
regs_offset = pdata->regs_offset;
+ if (pdata->max_xfer_len) {
+ mcspi->max_xfer_len = pdata->max_xfer_len;
+ master->max_transfer_size = omap2_mcspi_max_xfer_size;
+ }
r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
mcspi->base = devm_ioremap_resource(&pdev->dev, r);
@@ -1464,6 +1481,11 @@ static int omap2_mcspi_probe(struct platform_device *pdev)
for (i = 0; i < master->num_chipselect; i++) {
sprintf(mcspi->dma_channels[i].dma_rx_ch_name, "rx%d", i);
sprintf(mcspi->dma_channels[i].dma_tx_ch_name, "tx%d", i);
+
+ status = omap2_mcspi_request_dma(mcspi,
+ &mcspi->dma_channels[i]);
+ if (status == -EPROBE_DEFER)
+ goto free_master;
}
status = platform_get_irq(pdev, 0);
@@ -1501,6 +1523,7 @@ static int omap2_mcspi_probe(struct platform_device *pdev)
pm_runtime_put_sync(&pdev->dev);
pm_runtime_disable(&pdev->dev);
free_master:
+ omap2_mcspi_release_dma(master);
spi_master_put(master);
return status;
}
@@ -1510,6 +1533,8 @@ static int omap2_mcspi_remove(struct platform_device *pdev)
struct spi_master *master = platform_get_drvdata(pdev);
struct omap2_mcspi *mcspi = spi_master_get_devdata(master);
+ omap2_mcspi_release_dma(master);
+
pm_runtime_dont_use_autosuspend(mcspi->dev);
pm_runtime_put_sync(mcspi->dev);
pm_runtime_disable(&pdev->dev);
diff --git a/drivers/spi/spi-pxa2xx.c b/drivers/spi/spi-pxa2xx.c
index 4c7a71f..2e31815 100644
--- a/drivers/spi/spi-pxa2xx.c
+++ b/drivers/spi/spi-pxa2xx.c
@@ -70,6 +70,10 @@ MODULE_ALIAS("platform:pxa2xx-spi");
#define LPSS_CAPS_CS_EN_SHIFT 9
#define LPSS_CAPS_CS_EN_MASK (0xf << LPSS_CAPS_CS_EN_SHIFT)
+#define LPSS_PRIV_CLOCK_GATE 0x38
+#define LPSS_PRIV_CLOCK_GATE_CLK_CTL_MASK 0x3
+#define LPSS_PRIV_CLOCK_GATE_CLK_CTL_FORCE_ON 0x3
+
struct lpss_config {
/* LPSS offset from drv_data->ioaddr */
unsigned offset;
@@ -86,6 +90,8 @@ struct lpss_config {
unsigned cs_sel_shift;
unsigned cs_sel_mask;
unsigned cs_num;
+ /* Quirks */
+ unsigned cs_clk_stays_gated : 1;
};
/* Keep these sorted with enum pxa_ssp_type */
@@ -156,6 +162,7 @@ static const struct lpss_config lpss_platforms[] = {
.tx_threshold_hi = 56,
.cs_sel_shift = 8,
.cs_sel_mask = 3 << 8,
+ .cs_clk_stays_gated = true,
},
};
@@ -383,6 +390,22 @@ static void lpss_ssp_cs_control(struct spi_device *spi, bool enable)
else
value |= LPSS_CS_CONTROL_CS_HIGH;
__lpss_ssp_write_priv(drv_data, config->reg_cs_ctrl, value);
+ if (config->cs_clk_stays_gated) {
+ u32 clkgate;
+
+ /*
+ * Changing CS alone when dynamic clock gating is on won't
+ * actually flip CS at that time. This ruins SPI transfers
+ * that specify delays, or have no data. Toggle the clock mode
+ * to force on briefly to poke the CS pin to move.
+ */
+ clkgate = __lpss_ssp_read_priv(drv_data, LPSS_PRIV_CLOCK_GATE);
+ value = (clkgate & ~LPSS_PRIV_CLOCK_GATE_CLK_CTL_MASK) |
+ LPSS_PRIV_CLOCK_GATE_CLK_CTL_FORCE_ON;
+
+ __lpss_ssp_write_priv(drv_data, LPSS_PRIV_CLOCK_GATE, value);
+ __lpss_ssp_write_priv(drv_data, LPSS_PRIV_CLOCK_GATE, clkgate);
+ }
}
static void cs_assert(struct spi_device *spi)
diff --git a/drivers/spi/spi-qup.c b/drivers/spi/spi-qup.c
index dd3434a..a364b99 100644
--- a/drivers/spi/spi-qup.c
+++ b/drivers/spi/spi-qup.c
@@ -1217,6 +1217,11 @@ static int spi_qup_suspend(struct device *device)
struct spi_qup *controller = spi_master_get_devdata(master);
int ret;
+ if (pm_runtime_suspended(device)) {
+ ret = spi_qup_pm_resume_runtime(device);
+ if (ret)
+ return ret;
+ }
ret = spi_master_suspend(master);
if (ret)
return ret;
@@ -1225,10 +1230,8 @@ static int spi_qup_suspend(struct device *device)
if (ret)
return ret;
- if (!pm_runtime_suspended(device)) {
- clk_disable_unprepare(controller->cclk);
- clk_disable_unprepare(controller->iclk);
- }
+ clk_disable_unprepare(controller->cclk);
+ clk_disable_unprepare(controller->iclk);
return 0;
}
diff --git a/drivers/spi/spi-zynqmp-gqspi.c b/drivers/spi/spi-zynqmp-gqspi.c
index 60c4de4..7412a30 100644
--- a/drivers/spi/spi-zynqmp-gqspi.c
+++ b/drivers/spi/spi-zynqmp-gqspi.c
@@ -401,9 +401,6 @@ static void zynqmp_qspi_chipselect(struct spi_device *qspi, bool is_high)
zynqmp_gqspi_write(xqspi, GQSPI_GEN_FIFO_OFST, genfifoentry);
- /* Dummy generic FIFO entry */
- zynqmp_gqspi_write(xqspi, GQSPI_GEN_FIFO_OFST, 0x0);
-
/* Manually start the generic FIFO command */
zynqmp_gqspi_write(xqspi, GQSPI_CONFIG_OFST,
zynqmp_gqspi_read(xqspi, GQSPI_CONFIG_OFST) |
diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index 38b4c78..755221b 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -2639,7 +2639,7 @@ int spi_register_controller(struct spi_controller *ctlr)
if (ctlr->use_gpio_descriptors) {
status = spi_get_gpio_descs(ctlr);
if (status)
- return status;
+ goto free_bus_id;
/*
* A controller using GPIO descriptors always
* supports SPI_CS_HIGH if need be.
@@ -2649,7 +2649,7 @@ int spi_register_controller(struct spi_controller *ctlr)
/* Legacy code path for GPIOs from DT */
status = of_spi_get_gpio_numbers(ctlr);
if (status)
- return status;
+ goto free_bus_id;
}
}
@@ -2657,17 +2657,14 @@ int spi_register_controller(struct spi_controller *ctlr)
* Even if it's just one always-selected device, there must
* be at least one chipselect.
*/
- if (!ctlr->num_chipselect)
- return -EINVAL;
+ if (!ctlr->num_chipselect) {
+ status = -EINVAL;
+ goto free_bus_id;
+ }
status = device_add(&ctlr->dev);
- if (status < 0) {
- /* free bus id */
- mutex_lock(&board_lock);
- idr_remove(&spi_master_idr, ctlr->bus_num);
- mutex_unlock(&board_lock);
- goto done;
- }
+ if (status < 0)
+ goto free_bus_id;
dev_dbg(dev, "registered %s %s\n",
spi_controller_is_slave(ctlr) ? "slave" : "master",
dev_name(&ctlr->dev));
@@ -2683,11 +2680,7 @@ int spi_register_controller(struct spi_controller *ctlr)
status = spi_controller_initialize_queue(ctlr);
if (status) {
device_del(&ctlr->dev);
- /* free bus id */
- mutex_lock(&board_lock);
- idr_remove(&spi_master_idr, ctlr->bus_num);
- mutex_unlock(&board_lock);
- goto done;
+ goto free_bus_id;
}
}
/* add statistics */
@@ -2702,7 +2695,12 @@ int spi_register_controller(struct spi_controller *ctlr)
/* Register devices from the device tree and ACPI */
of_register_spi_devices(ctlr);
acpi_register_spi_devices(ctlr);
-done:
+ return status;
+
+free_bus_id:
+ mutex_lock(&board_lock);
+ idr_remove(&spi_master_idr, ctlr->bus_num);
+ mutex_unlock(&board_lock);
return status;
}
EXPORT_SYMBOL_GPL(spi_register_controller);
diff --git a/drivers/spi/spidev.c b/drivers/spi/spidev.c
index 1e217e3..2ab6e78 100644
--- a/drivers/spi/spidev.c
+++ b/drivers/spi/spidev.c
@@ -396,6 +396,7 @@ spidev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
else
retval = get_user(tmp, (u32 __user *)arg);
if (retval == 0) {
+ struct spi_controller *ctlr = spi->controller;
u32 save = spi->mode;
if (tmp & ~SPI_MODE_MASK) {
@@ -403,6 +404,10 @@ spidev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
break;
}
+ if (ctlr->use_gpio_descriptors && ctlr->cs_gpiods &&
+ ctlr->cs_gpiods[spi->chip_select])
+ tmp |= SPI_CS_HIGH;
+
tmp |= spi->mode & ~SPI_MODE_MASK;
spi->mode = (u16)tmp;
retval = spi_setup(spi);
diff --git a/drivers/spmi/spmi-pmic-arb.c b/drivers/spmi/spmi-pmic-arb.c
index 97acc2b..de844b4 100644
--- a/drivers/spmi/spmi-pmic-arb.c
+++ b/drivers/spmi/spmi-pmic-arb.c
@@ -731,6 +731,7 @@ static int qpnpint_irq_domain_translate(struct irq_domain *d,
return 0;
}
+static struct lock_class_key qpnpint_irq_lock_class, qpnpint_irq_request_class;
static void qpnpint_irq_domain_map(struct spmi_pmic_arb *pmic_arb,
struct irq_domain *domain, unsigned int virq,
@@ -746,6 +747,9 @@ static void qpnpint_irq_domain_map(struct spmi_pmic_arb *pmic_arb,
else
handler = handle_level_irq;
+
+ irq_set_lockdep_class(virq, &qpnpint_irq_lock_class,
+ &qpnpint_irq_request_class);
irq_domain_set_info(domain, virq, hwirq, &pmic_arb_irqchip, pmic_arb,
handler, NULL, NULL);
}
diff --git a/drivers/staging/android/Kconfig b/drivers/staging/android/Kconfig
index d6d605d..8d8fd5c 100644
--- a/drivers/staging/android/Kconfig
+++ b/drivers/staging/android/Kconfig
@@ -14,14 +14,6 @@
It is, in theory, a good memory allocator for low-memory devices,
because it can discard shared memory units when under memory pressure.
-config ANDROID_VSOC
- tristate "Android Virtual SoC support"
- depends on PCI_MSI
- help
- This option adds support for the Virtual SoC driver needed to boot
- a 'cuttlefish' Android image inside QEmu. The driver interacts with
- a QEmu ivshmem device. If built as a module, it will be called vsoc.
-
source "drivers/staging/android/ion/Kconfig"
endif # if ANDROID
diff --git a/drivers/staging/android/Makefile b/drivers/staging/android/Makefile
index 14bd9c6..3b66cd0 100644
--- a/drivers/staging/android/Makefile
+++ b/drivers/staging/android/Makefile
@@ -4,4 +4,3 @@
obj-y += ion/
obj-$(CONFIG_ASHMEM) += ashmem.o
-obj-$(CONFIG_ANDROID_VSOC) += vsoc.o
diff --git a/drivers/staging/android/TODO b/drivers/staging/android/TODO
index 767dd98..80eccfa 100644
--- a/drivers/staging/android/TODO
+++ b/drivers/staging/android/TODO
@@ -9,14 +9,5 @@
- Split /dev/ion up into multiple nodes (e.g. /dev/ion/heap0)
- Better test framework (integration with VGEM was suggested)
-vsoc.c, uapi/vsoc_shm.h
- - The current driver uses the same wait queue for all of the futexes in a
- region. This will cause false wakeups in regions with a large number of
- waiting threads. We should eventually use multiple queues and select the
- queue based on the region.
- - Add debugfs support for examining the permissions of regions.
- - Remove VSOC_WAIT_FOR_INCOMING_INTERRUPT ioctl. This functionality has been
- superseded by the futex and is there for legacy reasons.
-
Please send patches to Greg Kroah-Hartman <greg@kroah.com> and Cc:
Arve Hjønnevåg <arve@android.com> and Riley Andrews <riandrews@android.com>
diff --git a/drivers/staging/android/ashmem.c b/drivers/staging/android/ashmem.c
index 5891d07..8044510 100644
--- a/drivers/staging/android/ashmem.c
+++ b/drivers/staging/android/ashmem.c
@@ -351,8 +351,23 @@ static inline vm_flags_t calc_vm_may_flags(unsigned long prot)
_calc_vm_trans(prot, PROT_EXEC, VM_MAYEXEC);
}
+static int ashmem_vmfile_mmap(struct file *file, struct vm_area_struct *vma)
+{
+ /* do not allow to mmap ashmem backing shmem file directly */
+ return -EPERM;
+}
+
+static unsigned long
+ashmem_vmfile_get_unmapped_area(struct file *file, unsigned long addr,
+ unsigned long len, unsigned long pgoff,
+ unsigned long flags)
+{
+ return current->mm->get_unmapped_area(file, addr, len, pgoff, flags);
+}
+
static int ashmem_mmap(struct file *file, struct vm_area_struct *vma)
{
+ static struct file_operations vmfile_fops;
struct ashmem_area *asma = file->private_data;
int ret = 0;
@@ -393,6 +408,19 @@ static int ashmem_mmap(struct file *file, struct vm_area_struct *vma)
}
vmfile->f_mode |= FMODE_LSEEK;
asma->file = vmfile;
+ /*
+ * override mmap operation of the vmfile so that it can't be
+ * remapped which would lead to creation of a new vma with no
+ * asma permission checks. Have to override get_unmapped_area
+ * as well to prevent VM_BUG_ON check for f_ops modification.
+ */
+ if (!vmfile_fops.mmap) {
+ vmfile_fops = *vmfile->f_op;
+ vmfile_fops.mmap = ashmem_vmfile_mmap;
+ vmfile_fops.get_unmapped_area =
+ ashmem_vmfile_get_unmapped_area;
+ }
+ vmfile->f_op = &vmfile_fops;
}
get_file(asma->file);
diff --git a/drivers/staging/android/uapi/vsoc_shm.h b/drivers/staging/android/uapi/vsoc_shm.h
deleted file mode 100644
index 6291fb2..0000000
--- a/drivers/staging/android/uapi/vsoc_shm.h
+++ /dev/null
@@ -1,295 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Copyright (C) 2017 Google, Inc.
- *
- */
-
-#ifndef _UAPI_LINUX_VSOC_SHM_H
-#define _UAPI_LINUX_VSOC_SHM_H
-
-#include <linux/types.h>
-
-/**
- * A permission is a token that permits a receiver to read and/or write an area
- * of memory within a Vsoc region.
- *
- * An fd_scoped permission grants both read and write access, and can be
- * attached to a file description (see open(2)).
- * Ownership of the area can then be shared by passing a file descriptor
- * among processes.
- *
- * begin_offset and end_offset define the area of memory that is controlled by
- * the permission. owner_offset points to a word, also in shared memory, that
- * controls ownership of the area.
- *
- * ownership of the region expires when the associated file description is
- * released.
- *
- * At most one permission can be attached to each file description.
- *
- * This is useful when implementing HALs like gralloc that scope and pass
- * ownership of shared resources via file descriptors.
- *
- * The caller is responsibe for doing any fencing.
- *
- * The calling process will normally identify a currently free area of
- * memory. It will construct a proposed fd_scoped_permission_arg structure:
- *
- * begin_offset and end_offset describe the area being claimed
- *
- * owner_offset points to the location in shared memory that indicates the
- * owner of the area.
- *
- * owned_value is the value that will be stored in owner_offset iff the
- * permission can be granted. It must be different than VSOC_REGION_FREE.
- *
- * Two fd_scoped_permission structures are compatible if they vary only by
- * their owned_value fields.
- *
- * The driver ensures that, for any group of simultaneous callers proposing
- * compatible fd_scoped_permissions, it will accept exactly one of the
- * propopsals. The other callers will get a failure with errno of EAGAIN.
- *
- * A process receiving a file descriptor can identify the region being
- * granted using the VSOC_GET_FD_SCOPED_PERMISSION ioctl.
- */
-struct fd_scoped_permission {
- __u32 begin_offset;
- __u32 end_offset;
- __u32 owner_offset;
- __u32 owned_value;
-};
-
-/*
- * This value represents a free area of memory. The driver expects to see this
- * value at owner_offset when creating a permission otherwise it will not do it,
- * and will write this value back once the permission is no longer needed.
- */
-#define VSOC_REGION_FREE ((__u32)0)
-
-/**
- * ioctl argument for VSOC_CREATE_FD_SCOPE_PERMISSION
- */
-struct fd_scoped_permission_arg {
- struct fd_scoped_permission perm;
- __s32 managed_region_fd;
-};
-
-#define VSOC_NODE_FREE ((__u32)0)
-
-/*
- * Describes a signal table in shared memory. Each non-zero entry in the
- * table indicates that the receiver should signal the futex at the given
- * offset. Offsets are relative to the region, not the shared memory window.
- *
- * interrupt_signalled_offset is used to reliably signal interrupts across the
- * vmm boundary. There are two roles: transmitter and receiver. For example,
- * in the host_to_guest_signal_table the host is the transmitter and the
- * guest is the receiver. The protocol is as follows:
- *
- * 1. The transmitter should convert the offset of the futex to an offset
- * in the signal table [0, (1 << num_nodes_lg2))
- * The transmitter can choose any appropriate hashing algorithm, including
- * hash = futex_offset & ((1 << num_nodes_lg2) - 1)
- *
- * 3. The transmitter should atomically compare and swap futex_offset with 0
- * at hash. There are 3 possible outcomes
- * a. The swap fails because the futex_offset is already in the table.
- * The transmitter should stop.
- * b. Some other offset is in the table. This is a hash collision. The
- * transmitter should move to another table slot and try again. One
- * possible algorithm:
- * hash = (hash + 1) & ((1 << num_nodes_lg2) - 1)
- * c. The swap worked. Continue below.
- *
- * 3. The transmitter atomically swaps 1 with the value at the
- * interrupt_signalled_offset. There are two outcomes:
- * a. The prior value was 1. In this case an interrupt has already been
- * posted. The transmitter is done.
- * b. The prior value was 0, indicating that the receiver may be sleeping.
- * The transmitter will issue an interrupt.
- *
- * 4. On waking the receiver immediately exchanges a 0 with the
- * interrupt_signalled_offset. If it receives a 0 then this a spurious
- * interrupt. That may occasionally happen in the current protocol, but
- * should be rare.
- *
- * 5. The receiver scans the signal table by atomicaly exchanging 0 at each
- * location. If a non-zero offset is returned from the exchange the
- * receiver wakes all sleepers at the given offset:
- * futex((int*)(region_base + old_value), FUTEX_WAKE, MAX_INT);
- *
- * 6. The receiver thread then does a conditional wait, waking immediately
- * if the value at interrupt_signalled_offset is non-zero. This catches cases
- * here additional signals were posted while the table was being scanned.
- * On the guest the wait is handled via the VSOC_WAIT_FOR_INCOMING_INTERRUPT
- * ioctl.
- */
-struct vsoc_signal_table_layout {
- /* log_2(Number of signal table entries) */
- __u32 num_nodes_lg2;
- /*
- * Offset to the first signal table entry relative to the start of the
- * region
- */
- __u32 futex_uaddr_table_offset;
- /*
- * Offset to an atomic_t / atomic uint32_t. A non-zero value indicates
- * that one or more offsets are currently posted in the table.
- * semi-unique access to an entry in the table
- */
- __u32 interrupt_signalled_offset;
-};
-
-#define VSOC_REGION_WHOLE ((__s32)0)
-#define VSOC_DEVICE_NAME_SZ 16
-
-/**
- * Each HAL would (usually) talk to a single device region
- * Mulitple entities care about these regions:
- * - The ivshmem_server will populate the regions in shared memory
- * - The guest kernel will read the region, create minor device nodes, and
- * allow interested parties to register for FUTEX_WAKE events in the region
- * - HALs will access via the minor device nodes published by the guest kernel
- * - Host side processes will access the region via the ivshmem_server:
- * 1. Pass name to ivshmem_server at a UNIX socket
- * 2. ivshmemserver will reply with 2 fds:
- * - host->guest doorbell fd
- * - guest->host doorbell fd
- * - fd for the shared memory region
- * - region offset
- * 3. Start a futex receiver thread on the doorbell fd pointed at the
- * signal_nodes
- */
-struct vsoc_device_region {
- __u16 current_version;
- __u16 min_compatible_version;
- __u32 region_begin_offset;
- __u32 region_end_offset;
- __u32 offset_of_region_data;
- struct vsoc_signal_table_layout guest_to_host_signal_table;
- struct vsoc_signal_table_layout host_to_guest_signal_table;
- /* Name of the device. Must always be terminated with a '\0', so
- * the longest supported device name is 15 characters.
- */
- char device_name[VSOC_DEVICE_NAME_SZ];
- /* There are two ways that permissions to access regions are handled:
- * - When subdivided_by is VSOC_REGION_WHOLE, any process that can
- * open the device node for the region gains complete access to it.
- * - When subdivided is set processes that open the region cannot
- * access it. Access to a sub-region must be established by invoking
- * the VSOC_CREATE_FD_SCOPE_PERMISSION ioctl on the region
- * referenced in subdivided_by, providing a fileinstance
- * (represented by a fd) opened on this region.
- */
- __u32 managed_by;
-};
-
-/*
- * The vsoc layout descriptor.
- * The first 4K should be reserved for the shm header and region descriptors.
- * The regions should be page aligned.
- */
-
-struct vsoc_shm_layout_descriptor {
- __u16 major_version;
- __u16 minor_version;
-
- /* size of the shm. This may be redundant but nice to have */
- __u32 size;
-
- /* number of shared memory regions */
- __u32 region_count;
-
- /* The offset to the start of region descriptors */
- __u32 vsoc_region_desc_offset;
-};
-
-/*
- * This specifies the current version that should be stored in
- * vsoc_shm_layout_descriptor.major_version and
- * vsoc_shm_layout_descriptor.minor_version.
- * It should be updated only if the vsoc_device_region and
- * vsoc_shm_layout_descriptor structures have changed.
- * Versioning within each region is transferred
- * via the min_compatible_version and current_version fields in
- * vsoc_device_region. The driver does not consult these fields: they are left
- * for the HALs and host processes and will change independently of the layout
- * version.
- */
-#define CURRENT_VSOC_LAYOUT_MAJOR_VERSION 2
-#define CURRENT_VSOC_LAYOUT_MINOR_VERSION 0
-
-#define VSOC_CREATE_FD_SCOPED_PERMISSION \
- _IOW(0xF5, 0, struct fd_scoped_permission)
-#define VSOC_GET_FD_SCOPED_PERMISSION _IOR(0xF5, 1, struct fd_scoped_permission)
-
-/*
- * This is used to signal the host to scan the guest_to_host_signal_table
- * for new futexes to wake. This sends an interrupt if one is not already
- * in flight.
- */
-#define VSOC_MAYBE_SEND_INTERRUPT_TO_HOST _IO(0xF5, 2)
-
-/*
- * When this returns the guest will scan host_to_guest_signal_table to
- * check for new futexes to wake.
- */
-/* TODO(ghartman): Consider moving this to the bottom half */
-#define VSOC_WAIT_FOR_INCOMING_INTERRUPT _IO(0xF5, 3)
-
-/*
- * Guest HALs will use this to retrieve the region description after
- * opening their device node.
- */
-#define VSOC_DESCRIBE_REGION _IOR(0xF5, 4, struct vsoc_device_region)
-
-/*
- * Wake any threads that may be waiting for a host interrupt on this region.
- * This is mostly used during shutdown.
- */
-#define VSOC_SELF_INTERRUPT _IO(0xF5, 5)
-
-/*
- * This is used to signal the host to scan the guest_to_host_signal_table
- * for new futexes to wake. This sends an interrupt unconditionally.
- */
-#define VSOC_SEND_INTERRUPT_TO_HOST _IO(0xF5, 6)
-
-enum wait_types {
- VSOC_WAIT_UNDEFINED = 0,
- VSOC_WAIT_IF_EQUAL = 1,
- VSOC_WAIT_IF_EQUAL_TIMEOUT = 2
-};
-
-/*
- * Wait for a condition to be true
- *
- * Note, this is sized and aligned so the 32 bit and 64 bit layouts are
- * identical.
- */
-struct vsoc_cond_wait {
- /* Input: Offset of the 32 bit word to check */
- __u32 offset;
- /* Input: Value that will be compared with the offset */
- __u32 value;
- /* Monotonic time to wake at in seconds */
- __u64 wake_time_sec;
- /* Input: Monotonic time to wait in nanoseconds */
- __u32 wake_time_nsec;
- /* Input: Type of wait */
- __u32 wait_type;
- /* Output: Number of times the thread woke before returning. */
- __u32 wakes;
- /* Ensure that we're 8-byte aligned and 8 byte length for 32/64 bit
- * compatibility.
- */
- __u32 reserved_1;
-};
-
-#define VSOC_COND_WAIT _IOWR(0xF5, 7, struct vsoc_cond_wait)
-
-/* Wake any local threads waiting at the offset given in arg */
-#define VSOC_COND_WAKE _IO(0xF5, 8)
-
-#endif /* _UAPI_LINUX_VSOC_SHM_H */
diff --git a/drivers/staging/android/vsoc.c b/drivers/staging/android/vsoc.c
deleted file mode 100644
index 1240bb0..0000000
--- a/drivers/staging/android/vsoc.c
+++ /dev/null
@@ -1,1149 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * drivers/android/staging/vsoc.c
- *
- * Android Virtual System on a Chip (VSoC) driver
- *
- * Copyright (C) 2017 Google, Inc.
- *
- * Author: ghartman@google.com
- *
- * Based on drivers/char/kvm_ivshmem.c - driver for KVM Inter-VM shared memory
- * Copyright 2009 Cam Macdonell <cam@cs.ualberta.ca>
- *
- * Based on cirrusfb.c and 8139cp.c:
- * Copyright 1999-2001 Jeff Garzik
- * Copyright 2001-2004 Jeff Garzik
- */
-
-#include <linux/dma-mapping.h>
-#include <linux/freezer.h>
-#include <linux/futex.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/mutex.h>
-#include <linux/pci.h>
-#include <linux/proc_fs.h>
-#include <linux/sched.h>
-#include <linux/syscalls.h>
-#include <linux/uaccess.h>
-#include <linux/interrupt.h>
-#include <linux/cdev.h>
-#include <linux/file.h>
-#include "uapi/vsoc_shm.h"
-
-#define VSOC_DEV_NAME "vsoc"
-
-/*
- * Description of the ivshmem-doorbell PCI device used by QEmu. These
- * constants follow docs/specs/ivshmem-spec.txt, which can be found in
- * the QEmu repository. This was last reconciled with the version that
- * came out with 2.8
- */
-
-/*
- * These constants are determined KVM Inter-VM shared memory device
- * register offsets
- */
-enum {
- INTR_MASK = 0x00, /* Interrupt Mask */
- INTR_STATUS = 0x04, /* Interrupt Status */
- IV_POSITION = 0x08, /* VM ID */
- DOORBELL = 0x0c, /* Doorbell */
-};
-
-static const int REGISTER_BAR; /* Equal to 0 */
-static const int MAX_REGISTER_BAR_LEN = 0x100;
-/*
- * The MSI-x BAR is not used directly.
- *
- * static const int MSI_X_BAR = 1;
- */
-static const int SHARED_MEMORY_BAR = 2;
-
-struct vsoc_region_data {
- char name[VSOC_DEVICE_NAME_SZ + 1];
- wait_queue_head_t interrupt_wait_queue;
- /* TODO(b/73664181): Use multiple futex wait queues */
- wait_queue_head_t futex_wait_queue;
- /* Flag indicating that an interrupt has been signalled by the host. */
- atomic_t *incoming_signalled;
- /* Flag indicating the guest has signalled the host. */
- atomic_t *outgoing_signalled;
- bool irq_requested;
- bool device_created;
-};
-
-struct vsoc_device {
- /* Kernel virtual address of REGISTER_BAR. */
- void __iomem *regs;
- /* Physical address of SHARED_MEMORY_BAR. */
- phys_addr_t shm_phys_start;
- /* Kernel virtual address of SHARED_MEMORY_BAR. */
- void __iomem *kernel_mapped_shm;
- /* Size of the entire shared memory window in bytes. */
- size_t shm_size;
- /*
- * Pointer to the virtual address of the shared memory layout structure.
- * This is probably identical to kernel_mapped_shm, but saving this
- * here saves a lot of annoying casts.
- */
- struct vsoc_shm_layout_descriptor *layout;
- /*
- * Points to a table of region descriptors in the kernel's virtual
- * address space. Calculated from
- * vsoc_shm_layout_descriptor.vsoc_region_desc_offset
- */
- struct vsoc_device_region *regions;
- /* Head of a list of permissions that have been granted. */
- struct list_head permissions;
- struct pci_dev *dev;
- /* Per-region (and therefore per-interrupt) information. */
- struct vsoc_region_data *regions_data;
- /*
- * Table of msi-x entries. This has to be separated from struct
- * vsoc_region_data because the kernel deals with them as an array.
- */
- struct msix_entry *msix_entries;
- /* Mutex that protectes the permission list */
- struct mutex mtx;
- /* Major number assigned by the kernel */
- int major;
- /* Character device assigned by the kernel */
- struct cdev cdev;
- /* Device class assigned by the kernel */
- struct class *class;
- /*
- * Flags that indicate what we've initialized. These are used to do an
- * orderly cleanup of the device.
- */
- bool enabled_device;
- bool requested_regions;
- bool cdev_added;
- bool class_added;
- bool msix_enabled;
-};
-
-static struct vsoc_device vsoc_dev;
-
-/*
- * TODO(ghartman): Add a /sys filesystem entry that summarizes the permissions.
- */
-
-struct fd_scoped_permission_node {
- struct fd_scoped_permission permission;
- struct list_head list;
-};
-
-struct vsoc_private_data {
- struct fd_scoped_permission_node *fd_scoped_permission_node;
-};
-
-static long vsoc_ioctl(struct file *, unsigned int, unsigned long);
-static int vsoc_mmap(struct file *, struct vm_area_struct *);
-static int vsoc_open(struct inode *, struct file *);
-static int vsoc_release(struct inode *, struct file *);
-static ssize_t vsoc_read(struct file *, char __user *, size_t, loff_t *);
-static ssize_t vsoc_write(struct file *, const char __user *, size_t, loff_t *);
-static loff_t vsoc_lseek(struct file *filp, loff_t offset, int origin);
-static int
-do_create_fd_scoped_permission(struct vsoc_device_region *region_p,
- struct fd_scoped_permission_node *np,
- struct fd_scoped_permission_arg __user *arg);
-static void
-do_destroy_fd_scoped_permission(struct vsoc_device_region *owner_region_p,
- struct fd_scoped_permission *perm);
-static long do_vsoc_describe_region(struct file *,
- struct vsoc_device_region __user *);
-static ssize_t vsoc_get_area(struct file *filp, __u32 *perm_off);
-
-/**
- * Validate arguments on entry points to the driver.
- */
-inline int vsoc_validate_inode(struct inode *inode)
-{
- if (iminor(inode) >= vsoc_dev.layout->region_count) {
- dev_err(&vsoc_dev.dev->dev,
- "describe_region: invalid region %d\n", iminor(inode));
- return -ENODEV;
- }
- return 0;
-}
-
-inline int vsoc_validate_filep(struct file *filp)
-{
- int ret = vsoc_validate_inode(file_inode(filp));
-
- if (ret)
- return ret;
- if (!filp->private_data) {
- dev_err(&vsoc_dev.dev->dev,
- "No private data on fd, region %d\n",
- iminor(file_inode(filp)));
- return -EBADFD;
- }
- return 0;
-}
-
-/* Converts from shared memory offset to virtual address */
-static inline void *shm_off_to_virtual_addr(__u32 offset)
-{
- return (void __force *)vsoc_dev.kernel_mapped_shm + offset;
-}
-
-/* Converts from shared memory offset to physical address */
-static inline phys_addr_t shm_off_to_phys_addr(__u32 offset)
-{
- return vsoc_dev.shm_phys_start + offset;
-}
-
-/**
- * Convenience functions to obtain the region from the inode or file.
- * Dangerous to call before validating the inode/file.
- */
-static
-inline struct vsoc_device_region *vsoc_region_from_inode(struct inode *inode)
-{
- return &vsoc_dev.regions[iminor(inode)];
-}
-
-static
-inline struct vsoc_device_region *vsoc_region_from_filep(struct file *inode)
-{
- return vsoc_region_from_inode(file_inode(inode));
-}
-
-static inline uint32_t vsoc_device_region_size(struct vsoc_device_region *r)
-{
- return r->region_end_offset - r->region_begin_offset;
-}
-
-static const struct file_operations vsoc_ops = {
- .owner = THIS_MODULE,
- .open = vsoc_open,
- .mmap = vsoc_mmap,
- .read = vsoc_read,
- .unlocked_ioctl = vsoc_ioctl,
- .compat_ioctl = vsoc_ioctl,
- .write = vsoc_write,
- .llseek = vsoc_lseek,
- .release = vsoc_release,
-};
-
-static struct pci_device_id vsoc_id_table[] = {
- {0x1af4, 0x1110, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
- {0},
-};
-
-MODULE_DEVICE_TABLE(pci, vsoc_id_table);
-
-static void vsoc_remove_device(struct pci_dev *pdev);
-static int vsoc_probe_device(struct pci_dev *pdev,
- const struct pci_device_id *ent);
-
-static struct pci_driver vsoc_pci_driver = {
- .name = "vsoc",
- .id_table = vsoc_id_table,
- .probe = vsoc_probe_device,
- .remove = vsoc_remove_device,
-};
-
-static int
-do_create_fd_scoped_permission(struct vsoc_device_region *region_p,
- struct fd_scoped_permission_node *np,
- struct fd_scoped_permission_arg __user *arg)
-{
- struct file *managed_filp;
- s32 managed_fd;
- atomic_t *owner_ptr = NULL;
- struct vsoc_device_region *managed_region_p;
-
- if (copy_from_user(&np->permission,
- &arg->perm, sizeof(np->permission)) ||
- copy_from_user(&managed_fd,
- &arg->managed_region_fd, sizeof(managed_fd))) {
- return -EFAULT;
- }
- managed_filp = fdget(managed_fd).file;
- /* Check that it's a valid fd, */
- if (!managed_filp || vsoc_validate_filep(managed_filp))
- return -EPERM;
- /* EEXIST if the given fd already has a permission. */
- if (((struct vsoc_private_data *)managed_filp->private_data)->
- fd_scoped_permission_node)
- return -EEXIST;
- managed_region_p = vsoc_region_from_filep(managed_filp);
- /* Check that the provided region is managed by this one */
- if (&vsoc_dev.regions[managed_region_p->managed_by] != region_p)
- return -EPERM;
- /* The area must be well formed and have non-zero size */
- if (np->permission.begin_offset >= np->permission.end_offset)
- return -EINVAL;
- /* The area must fit in the memory window */
- if (np->permission.end_offset >
- vsoc_device_region_size(managed_region_p))
- return -ERANGE;
- /* The area must be in the region data section */
- if (np->permission.begin_offset <
- managed_region_p->offset_of_region_data)
- return -ERANGE;
- /* The area must be page aligned */
- if (!PAGE_ALIGNED(np->permission.begin_offset) ||
- !PAGE_ALIGNED(np->permission.end_offset))
- return -EINVAL;
- /* Owner offset must be naturally aligned in the window */
- if (np->permission.owner_offset &
- (sizeof(np->permission.owner_offset) - 1))
- return -EINVAL;
- /* The owner flag must reside in the owner memory */
- if (np->permission.owner_offset + sizeof(np->permission.owner_offset) >
- vsoc_device_region_size(region_p))
- return -ERANGE;
- /* The owner flag must reside in the data section */
- if (np->permission.owner_offset < region_p->offset_of_region_data)
- return -EINVAL;
- /* The owner value must change to claim the memory */
- if (np->permission.owned_value == VSOC_REGION_FREE)
- return -EINVAL;
- owner_ptr =
- (atomic_t *)shm_off_to_virtual_addr(region_p->region_begin_offset +
- np->permission.owner_offset);
- /* We've already verified that this is in the shared memory window, so
- * it should be safe to write to this address.
- */
- if (atomic_cmpxchg(owner_ptr,
- VSOC_REGION_FREE,
- np->permission.owned_value) != VSOC_REGION_FREE) {
- return -EBUSY;
- }
- ((struct vsoc_private_data *)managed_filp->private_data)->
- fd_scoped_permission_node = np;
- /* The file offset needs to be adjusted if the calling
- * process did any read/write operations on the fd
- * before creating the permission.
- */
- if (managed_filp->f_pos) {
- if (managed_filp->f_pos > np->permission.end_offset) {
- /* If the offset is beyond the permission end, set it
- * to the end.
- */
- managed_filp->f_pos = np->permission.end_offset;
- } else {
- /* If the offset is within the permission interval
- * keep it there otherwise reset it to zero.
- */
- if (managed_filp->f_pos < np->permission.begin_offset) {
- managed_filp->f_pos = 0;
- } else {
- managed_filp->f_pos -=
- np->permission.begin_offset;
- }
- }
- }
- return 0;
-}
-
-static void
-do_destroy_fd_scoped_permission_node(struct vsoc_device_region *owner_region_p,
- struct fd_scoped_permission_node *node)
-{
- if (node) {
- do_destroy_fd_scoped_permission(owner_region_p,
- &node->permission);
- mutex_lock(&vsoc_dev.mtx);
- list_del(&node->list);
- mutex_unlock(&vsoc_dev.mtx);
- kfree(node);
- }
-}
-
-static void
-do_destroy_fd_scoped_permission(struct vsoc_device_region *owner_region_p,
- struct fd_scoped_permission *perm)
-{
- atomic_t *owner_ptr = NULL;
- int prev = 0;
-
- if (!perm)
- return;
- owner_ptr = (atomic_t *)shm_off_to_virtual_addr
- (owner_region_p->region_begin_offset + perm->owner_offset);
- prev = atomic_xchg(owner_ptr, VSOC_REGION_FREE);
- if (prev != perm->owned_value)
- dev_err(&vsoc_dev.dev->dev,
- "%x-%x: owner (%s) %x: expected to be %x was %x",
- perm->begin_offset, perm->end_offset,
- owner_region_p->device_name, perm->owner_offset,
- perm->owned_value, prev);
-}
-
-static long do_vsoc_describe_region(struct file *filp,
- struct vsoc_device_region __user *dest)
-{
- struct vsoc_device_region *region_p;
- int retval = vsoc_validate_filep(filp);
-
- if (retval)
- return retval;
- region_p = vsoc_region_from_filep(filp);
- if (copy_to_user(dest, region_p, sizeof(*region_p)))
- return -EFAULT;
- return 0;
-}
-
-/**
- * Implements the inner logic of cond_wait. Copies to and from userspace are
- * done in the helper function below.
- */
-static int handle_vsoc_cond_wait(struct file *filp, struct vsoc_cond_wait *arg)
-{
- DEFINE_WAIT(wait);
- u32 region_number = iminor(file_inode(filp));
- struct vsoc_region_data *data = vsoc_dev.regions_data + region_number;
- struct hrtimer_sleeper timeout, *to = NULL;
- int ret = 0;
- struct vsoc_device_region *region_p = vsoc_region_from_filep(filp);
- atomic_t *address = NULL;
- ktime_t wake_time;
-
- /* Ensure that the offset is aligned */
- if (arg->offset & (sizeof(uint32_t) - 1))
- return -EADDRNOTAVAIL;
- /* Ensure that the offset is within shared memory */
- if (((uint64_t)arg->offset) + region_p->region_begin_offset +
- sizeof(uint32_t) > region_p->region_end_offset)
- return -E2BIG;
- address = shm_off_to_virtual_addr(region_p->region_begin_offset +
- arg->offset);
-
- /* Ensure that the type of wait is valid */
- switch (arg->wait_type) {
- case VSOC_WAIT_IF_EQUAL:
- break;
- case VSOC_WAIT_IF_EQUAL_TIMEOUT:
- to = &timeout;
- break;
- default:
- return -EINVAL;
- }
-
- if (to) {
- /* Copy the user-supplied timesec into the kernel structure.
- * We do things this way to flatten differences between 32 bit
- * and 64 bit timespecs.
- */
- if (arg->wake_time_nsec >= NSEC_PER_SEC)
- return -EINVAL;
- wake_time = ktime_set(arg->wake_time_sec, arg->wake_time_nsec);
-
- hrtimer_init_sleeper_on_stack(to, CLOCK_MONOTONIC,
- HRTIMER_MODE_ABS);
- hrtimer_set_expires_range_ns(&to->timer, wake_time,
- current->timer_slack_ns);
- }
-
- while (1) {
- prepare_to_wait(&data->futex_wait_queue, &wait,
- TASK_INTERRUPTIBLE);
- /*
- * Check the sentinel value after prepare_to_wait. If the value
- * changes after this check the writer will call signal,
- * changing the task state from INTERRUPTIBLE to RUNNING. That
- * will ensure that schedule() will eventually schedule this
- * task.
- */
- if (atomic_read(address) != arg->value) {
- ret = 0;
- break;
- }
- if (to) {
- hrtimer_sleeper_start_expires(to, HRTIMER_MODE_ABS);
- if (likely(to->task))
- freezable_schedule();
- hrtimer_cancel(&to->timer);
- if (!to->task) {
- ret = -ETIMEDOUT;
- break;
- }
- } else {
- freezable_schedule();
- }
- /* Count the number of times that we woke up. This is useful
- * for unit testing.
- */
- ++arg->wakes;
- if (signal_pending(current)) {
- ret = -EINTR;
- break;
- }
- }
- finish_wait(&data->futex_wait_queue, &wait);
- if (to)
- destroy_hrtimer_on_stack(&to->timer);
- return ret;
-}
-
-/**
- * Handles the details of copying from/to userspace to ensure that the copies
- * happen on all of the return paths of cond_wait.
- */
-static int do_vsoc_cond_wait(struct file *filp,
- struct vsoc_cond_wait __user *untrusted_in)
-{
- struct vsoc_cond_wait arg;
- int rval = 0;
-
- if (copy_from_user(&arg, untrusted_in, sizeof(arg)))
- return -EFAULT;
- /* wakes is an out parameter. Initialize it to something sensible. */
- arg.wakes = 0;
- rval = handle_vsoc_cond_wait(filp, &arg);
- if (copy_to_user(untrusted_in, &arg, sizeof(arg)))
- return -EFAULT;
- return rval;
-}
-
-static int do_vsoc_cond_wake(struct file *filp, uint32_t offset)
-{
- struct vsoc_device_region *region_p = vsoc_region_from_filep(filp);
- u32 region_number = iminor(file_inode(filp));
- struct vsoc_region_data *data = vsoc_dev.regions_data + region_number;
- /* Ensure that the offset is aligned */
- if (offset & (sizeof(uint32_t) - 1))
- return -EADDRNOTAVAIL;
- /* Ensure that the offset is within shared memory */
- if (((uint64_t)offset) + region_p->region_begin_offset +
- sizeof(uint32_t) > region_p->region_end_offset)
- return -E2BIG;
- /*
- * TODO(b/73664181): Use multiple futex wait queues.
- * We need to wake every sleeper when the condition changes. Typically
- * only a single thread will be waiting on the condition, but there
- * are exceptions. The worst case is about 10 threads.
- */
- wake_up_interruptible_all(&data->futex_wait_queue);
- return 0;
-}
-
-static long vsoc_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
-{
- int rv = 0;
- struct vsoc_device_region *region_p;
- u32 reg_num;
- struct vsoc_region_data *reg_data;
- int retval = vsoc_validate_filep(filp);
-
- if (retval)
- return retval;
- region_p = vsoc_region_from_filep(filp);
- reg_num = iminor(file_inode(filp));
- reg_data = vsoc_dev.regions_data + reg_num;
- switch (cmd) {
- case VSOC_CREATE_FD_SCOPED_PERMISSION:
- {
- struct fd_scoped_permission_node *node = NULL;
-
- node = kzalloc(sizeof(*node), GFP_KERNEL);
- /* We can't allocate memory for the permission */
- if (!node)
- return -ENOMEM;
- INIT_LIST_HEAD(&node->list);
- rv = do_create_fd_scoped_permission
- (region_p,
- node,
- (struct fd_scoped_permission_arg __user *)arg);
- if (!rv) {
- mutex_lock(&vsoc_dev.mtx);
- list_add(&node->list, &vsoc_dev.permissions);
- mutex_unlock(&vsoc_dev.mtx);
- } else {
- kfree(node);
- return rv;
- }
- }
- break;
-
- case VSOC_GET_FD_SCOPED_PERMISSION:
- {
- struct fd_scoped_permission_node *node =
- ((struct vsoc_private_data *)filp->private_data)->
- fd_scoped_permission_node;
- if (!node)
- return -ENOENT;
- if (copy_to_user
- ((struct fd_scoped_permission __user *)arg,
- &node->permission, sizeof(node->permission)))
- return -EFAULT;
- }
- break;
-
- case VSOC_MAYBE_SEND_INTERRUPT_TO_HOST:
- if (!atomic_xchg(reg_data->outgoing_signalled, 1)) {
- writel(reg_num, vsoc_dev.regs + DOORBELL);
- return 0;
- } else {
- return -EBUSY;
- }
- break;
-
- case VSOC_SEND_INTERRUPT_TO_HOST:
- writel(reg_num, vsoc_dev.regs + DOORBELL);
- return 0;
- case VSOC_WAIT_FOR_INCOMING_INTERRUPT:
- wait_event_interruptible
- (reg_data->interrupt_wait_queue,
- (atomic_read(reg_data->incoming_signalled) != 0));
- break;
-
- case VSOC_DESCRIBE_REGION:
- return do_vsoc_describe_region
- (filp,
- (struct vsoc_device_region __user *)arg);
-
- case VSOC_SELF_INTERRUPT:
- atomic_set(reg_data->incoming_signalled, 1);
- wake_up_interruptible(®_data->interrupt_wait_queue);
- break;
-
- case VSOC_COND_WAIT:
- return do_vsoc_cond_wait(filp,
- (struct vsoc_cond_wait __user *)arg);
- case VSOC_COND_WAKE:
- return do_vsoc_cond_wake(filp, arg);
-
- default:
- return -EINVAL;
- }
- return 0;
-}
-
-static ssize_t vsoc_read(struct file *filp, char __user *buffer, size_t len,
- loff_t *poffset)
-{
- __u32 area_off;
- const void *area_p;
- ssize_t area_len;
- int retval = vsoc_validate_filep(filp);
-
- if (retval)
- return retval;
- area_len = vsoc_get_area(filp, &area_off);
- area_p = shm_off_to_virtual_addr(area_off);
- area_p += *poffset;
- area_len -= *poffset;
- if (area_len <= 0)
- return 0;
- if (area_len < len)
- len = area_len;
- if (copy_to_user(buffer, area_p, len))
- return -EFAULT;
- *poffset += len;
- return len;
-}
-
-static loff_t vsoc_lseek(struct file *filp, loff_t offset, int origin)
-{
- ssize_t area_len = 0;
- int retval = vsoc_validate_filep(filp);
-
- if (retval)
- return retval;
- area_len = vsoc_get_area(filp, NULL);
- switch (origin) {
- case SEEK_SET:
- break;
-
- case SEEK_CUR:
- if (offset > 0 && offset + filp->f_pos < 0)
- return -EOVERFLOW;
- offset += filp->f_pos;
- break;
-
- case SEEK_END:
- if (offset > 0 && offset + area_len < 0)
- return -EOVERFLOW;
- offset += area_len;
- break;
-
- case SEEK_DATA:
- if (offset >= area_len)
- return -EINVAL;
- if (offset < 0)
- offset = 0;
- break;
-
- case SEEK_HOLE:
- /* Next hole is always the end of the region, unless offset is
- * beyond that
- */
- if (offset < area_len)
- offset = area_len;
- break;
-
- default:
- return -EINVAL;
- }
-
- if (offset < 0 || offset > area_len)
- return -EINVAL;
- filp->f_pos = offset;
-
- return offset;
-}
-
-static ssize_t vsoc_write(struct file *filp, const char __user *buffer,
- size_t len, loff_t *poffset)
-{
- __u32 area_off;
- void *area_p;
- ssize_t area_len;
- int retval = vsoc_validate_filep(filp);
-
- if (retval)
- return retval;
- area_len = vsoc_get_area(filp, &area_off);
- area_p = shm_off_to_virtual_addr(area_off);
- area_p += *poffset;
- area_len -= *poffset;
- if (area_len <= 0)
- return 0;
- if (area_len < len)
- len = area_len;
- if (copy_from_user(area_p, buffer, len))
- return -EFAULT;
- *poffset += len;
- return len;
-}
-
-static irqreturn_t vsoc_interrupt(int irq, void *region_data_v)
-{
- struct vsoc_region_data *region_data =
- (struct vsoc_region_data *)region_data_v;
- int reg_num = region_data - vsoc_dev.regions_data;
-
- if (unlikely(!region_data))
- return IRQ_NONE;
-
- if (unlikely(reg_num < 0 ||
- reg_num >= vsoc_dev.layout->region_count)) {
- dev_err(&vsoc_dev.dev->dev,
- "invalid irq @%p reg_num=0x%04x\n",
- region_data, reg_num);
- return IRQ_NONE;
- }
- if (unlikely(vsoc_dev.regions_data + reg_num != region_data)) {
- dev_err(&vsoc_dev.dev->dev,
- "irq not aligned @%p reg_num=0x%04x\n",
- region_data, reg_num);
- return IRQ_NONE;
- }
- wake_up_interruptible(®ion_data->interrupt_wait_queue);
- return IRQ_HANDLED;
-}
-
-static int vsoc_probe_device(struct pci_dev *pdev,
- const struct pci_device_id *ent)
-{
- int result;
- int i;
- resource_size_t reg_size;
- dev_t devt;
-
- vsoc_dev.dev = pdev;
- result = pci_enable_device(pdev);
- if (result) {
- dev_err(&pdev->dev,
- "pci_enable_device failed %s: error %d\n",
- pci_name(pdev), result);
- return result;
- }
- vsoc_dev.enabled_device = true;
- result = pci_request_regions(pdev, "vsoc");
- if (result < 0) {
- dev_err(&pdev->dev, "pci_request_regions failed\n");
- vsoc_remove_device(pdev);
- return -EBUSY;
- }
- vsoc_dev.requested_regions = true;
- /* Set up the control registers in BAR 0 */
- reg_size = pci_resource_len(pdev, REGISTER_BAR);
- if (reg_size > MAX_REGISTER_BAR_LEN)
- vsoc_dev.regs =
- pci_iomap(pdev, REGISTER_BAR, MAX_REGISTER_BAR_LEN);
- else
- vsoc_dev.regs = pci_iomap(pdev, REGISTER_BAR, reg_size);
-
- if (!vsoc_dev.regs) {
- dev_err(&pdev->dev,
- "cannot map registers of size %zu\n",
- (size_t)reg_size);
- vsoc_remove_device(pdev);
- return -EBUSY;
- }
-
- /* Map the shared memory in BAR 2 */
- vsoc_dev.shm_phys_start = pci_resource_start(pdev, SHARED_MEMORY_BAR);
- vsoc_dev.shm_size = pci_resource_len(pdev, SHARED_MEMORY_BAR);
-
- dev_info(&pdev->dev, "shared memory @ DMA %pa size=0x%zx\n",
- &vsoc_dev.shm_phys_start, vsoc_dev.shm_size);
- vsoc_dev.kernel_mapped_shm = pci_iomap_wc(pdev, SHARED_MEMORY_BAR, 0);
- if (!vsoc_dev.kernel_mapped_shm) {
- dev_err(&vsoc_dev.dev->dev, "cannot iomap region\n");
- vsoc_remove_device(pdev);
- return -EBUSY;
- }
-
- vsoc_dev.layout = (struct vsoc_shm_layout_descriptor __force *)
- vsoc_dev.kernel_mapped_shm;
- dev_info(&pdev->dev, "major_version: %d\n",
- vsoc_dev.layout->major_version);
- dev_info(&pdev->dev, "minor_version: %d\n",
- vsoc_dev.layout->minor_version);
- dev_info(&pdev->dev, "size: 0x%x\n", vsoc_dev.layout->size);
- dev_info(&pdev->dev, "regions: %d\n", vsoc_dev.layout->region_count);
- if (vsoc_dev.layout->major_version !=
- CURRENT_VSOC_LAYOUT_MAJOR_VERSION) {
- dev_err(&vsoc_dev.dev->dev,
- "driver supports only major_version %d\n",
- CURRENT_VSOC_LAYOUT_MAJOR_VERSION);
- vsoc_remove_device(pdev);
- return -EBUSY;
- }
- result = alloc_chrdev_region(&devt, 0, vsoc_dev.layout->region_count,
- VSOC_DEV_NAME);
- if (result) {
- dev_err(&vsoc_dev.dev->dev, "alloc_chrdev_region failed\n");
- vsoc_remove_device(pdev);
- return -EBUSY;
- }
- vsoc_dev.major = MAJOR(devt);
- cdev_init(&vsoc_dev.cdev, &vsoc_ops);
- vsoc_dev.cdev.owner = THIS_MODULE;
- result = cdev_add(&vsoc_dev.cdev, devt, vsoc_dev.layout->region_count);
- if (result) {
- dev_err(&vsoc_dev.dev->dev, "cdev_add error\n");
- vsoc_remove_device(pdev);
- return -EBUSY;
- }
- vsoc_dev.cdev_added = true;
- vsoc_dev.class = class_create(THIS_MODULE, VSOC_DEV_NAME);
- if (IS_ERR(vsoc_dev.class)) {
- dev_err(&vsoc_dev.dev->dev, "class_create failed\n");
- vsoc_remove_device(pdev);
- return PTR_ERR(vsoc_dev.class);
- }
- vsoc_dev.class_added = true;
- vsoc_dev.regions = (struct vsoc_device_region __force *)
- ((void *)vsoc_dev.layout +
- vsoc_dev.layout->vsoc_region_desc_offset);
- vsoc_dev.msix_entries =
- kcalloc(vsoc_dev.layout->region_count,
- sizeof(vsoc_dev.msix_entries[0]), GFP_KERNEL);
- if (!vsoc_dev.msix_entries) {
- dev_err(&vsoc_dev.dev->dev,
- "unable to allocate msix_entries\n");
- vsoc_remove_device(pdev);
- return -ENOSPC;
- }
- vsoc_dev.regions_data =
- kcalloc(vsoc_dev.layout->region_count,
- sizeof(vsoc_dev.regions_data[0]), GFP_KERNEL);
- if (!vsoc_dev.regions_data) {
- dev_err(&vsoc_dev.dev->dev,
- "unable to allocate regions' data\n");
- vsoc_remove_device(pdev);
- return -ENOSPC;
- }
- for (i = 0; i < vsoc_dev.layout->region_count; ++i)
- vsoc_dev.msix_entries[i].entry = i;
-
- result = pci_enable_msix_exact(vsoc_dev.dev, vsoc_dev.msix_entries,
- vsoc_dev.layout->region_count);
- if (result) {
- dev_info(&pdev->dev, "pci_enable_msix failed: %d\n", result);
- vsoc_remove_device(pdev);
- return -ENOSPC;
- }
- /* Check that all regions are well formed */
- for (i = 0; i < vsoc_dev.layout->region_count; ++i) {
- const struct vsoc_device_region *region = vsoc_dev.regions + i;
-
- if (!PAGE_ALIGNED(region->region_begin_offset) ||
- !PAGE_ALIGNED(region->region_end_offset)) {
- dev_err(&vsoc_dev.dev->dev,
- "region %d not aligned (%x:%x)", i,
- region->region_begin_offset,
- region->region_end_offset);
- vsoc_remove_device(pdev);
- return -EFAULT;
- }
- if (region->region_begin_offset >= region->region_end_offset ||
- region->region_end_offset > vsoc_dev.shm_size) {
- dev_err(&vsoc_dev.dev->dev,
- "region %d offsets are wrong: %x %x %zx",
- i, region->region_begin_offset,
- region->region_end_offset, vsoc_dev.shm_size);
- vsoc_remove_device(pdev);
- return -EFAULT;
- }
- if (region->managed_by >= vsoc_dev.layout->region_count) {
- dev_err(&vsoc_dev.dev->dev,
- "region %d has invalid owner: %u",
- i, region->managed_by);
- vsoc_remove_device(pdev);
- return -EFAULT;
- }
- }
- vsoc_dev.msix_enabled = true;
- for (i = 0; i < vsoc_dev.layout->region_count; ++i) {
- const struct vsoc_device_region *region = vsoc_dev.regions + i;
- size_t name_sz = sizeof(vsoc_dev.regions_data[i].name) - 1;
- const struct vsoc_signal_table_layout *h_to_g_signal_table =
- ®ion->host_to_guest_signal_table;
- const struct vsoc_signal_table_layout *g_to_h_signal_table =
- ®ion->guest_to_host_signal_table;
-
- vsoc_dev.regions_data[i].name[name_sz] = '\0';
- memcpy(vsoc_dev.regions_data[i].name, region->device_name,
- name_sz);
- dev_info(&pdev->dev, "region %d name=%s\n",
- i, vsoc_dev.regions_data[i].name);
- init_waitqueue_head
- (&vsoc_dev.regions_data[i].interrupt_wait_queue);
- init_waitqueue_head(&vsoc_dev.regions_data[i].futex_wait_queue);
- vsoc_dev.regions_data[i].incoming_signalled =
- shm_off_to_virtual_addr(region->region_begin_offset) +
- h_to_g_signal_table->interrupt_signalled_offset;
- vsoc_dev.regions_data[i].outgoing_signalled =
- shm_off_to_virtual_addr(region->region_begin_offset) +
- g_to_h_signal_table->interrupt_signalled_offset;
- result = request_irq(vsoc_dev.msix_entries[i].vector,
- vsoc_interrupt, 0,
- vsoc_dev.regions_data[i].name,
- vsoc_dev.regions_data + i);
- if (result) {
- dev_info(&pdev->dev,
- "request_irq failed irq=%d vector=%d\n",
- i, vsoc_dev.msix_entries[i].vector);
- vsoc_remove_device(pdev);
- return -ENOSPC;
- }
- vsoc_dev.regions_data[i].irq_requested = true;
- if (!device_create(vsoc_dev.class, NULL,
- MKDEV(vsoc_dev.major, i),
- NULL, vsoc_dev.regions_data[i].name)) {
- dev_err(&vsoc_dev.dev->dev, "device_create failed\n");
- vsoc_remove_device(pdev);
- return -EBUSY;
- }
- vsoc_dev.regions_data[i].device_created = true;
- }
- return 0;
-}
-
-/*
- * This should undo all of the allocations in the probe function in reverse
- * order.
- *
- * Notes:
- *
- * The device may have been partially initialized, so double check
- * that the allocations happened.
- *
- * This function may be called multiple times, so mark resources as freed
- * as they are deallocated.
- */
-static void vsoc_remove_device(struct pci_dev *pdev)
-{
- int i;
- /*
- * pdev is the first thing to be set on probe and the last thing
- * to be cleared here. If it's NULL then there is no cleanup.
- */
- if (!pdev || !vsoc_dev.dev)
- return;
- dev_info(&pdev->dev, "remove_device\n");
- if (vsoc_dev.regions_data) {
- for (i = 0; i < vsoc_dev.layout->region_count; ++i) {
- if (vsoc_dev.regions_data[i].device_created) {
- device_destroy(vsoc_dev.class,
- MKDEV(vsoc_dev.major, i));
- vsoc_dev.regions_data[i].device_created = false;
- }
- if (vsoc_dev.regions_data[i].irq_requested)
- free_irq(vsoc_dev.msix_entries[i].vector, NULL);
- vsoc_dev.regions_data[i].irq_requested = false;
- }
- kfree(vsoc_dev.regions_data);
- vsoc_dev.regions_data = NULL;
- }
- if (vsoc_dev.msix_enabled) {
- pci_disable_msix(pdev);
- vsoc_dev.msix_enabled = false;
- }
- kfree(vsoc_dev.msix_entries);
- vsoc_dev.msix_entries = NULL;
- vsoc_dev.regions = NULL;
- if (vsoc_dev.class_added) {
- class_destroy(vsoc_dev.class);
- vsoc_dev.class_added = false;
- }
- if (vsoc_dev.cdev_added) {
- cdev_del(&vsoc_dev.cdev);
- vsoc_dev.cdev_added = false;
- }
- if (vsoc_dev.major && vsoc_dev.layout) {
- unregister_chrdev_region(MKDEV(vsoc_dev.major, 0),
- vsoc_dev.layout->region_count);
- vsoc_dev.major = 0;
- }
- vsoc_dev.layout = NULL;
- if (vsoc_dev.kernel_mapped_shm) {
- pci_iounmap(pdev, vsoc_dev.kernel_mapped_shm);
- vsoc_dev.kernel_mapped_shm = NULL;
- }
- if (vsoc_dev.regs) {
- pci_iounmap(pdev, vsoc_dev.regs);
- vsoc_dev.regs = NULL;
- }
- if (vsoc_dev.requested_regions) {
- pci_release_regions(pdev);
- vsoc_dev.requested_regions = false;
- }
- if (vsoc_dev.enabled_device) {
- pci_disable_device(pdev);
- vsoc_dev.enabled_device = false;
- }
- /* Do this last: it indicates that the device is not initialized. */
- vsoc_dev.dev = NULL;
-}
-
-static void __exit vsoc_cleanup_module(void)
-{
- vsoc_remove_device(vsoc_dev.dev);
- pci_unregister_driver(&vsoc_pci_driver);
-}
-
-static int __init vsoc_init_module(void)
-{
- int err = -ENOMEM;
-
- INIT_LIST_HEAD(&vsoc_dev.permissions);
- mutex_init(&vsoc_dev.mtx);
-
- err = pci_register_driver(&vsoc_pci_driver);
- if (err < 0)
- return err;
- return 0;
-}
-
-static int vsoc_open(struct inode *inode, struct file *filp)
-{
- /* Can't use vsoc_validate_filep because filp is still incomplete */
- int ret = vsoc_validate_inode(inode);
-
- if (ret)
- return ret;
- filp->private_data =
- kzalloc(sizeof(struct vsoc_private_data), GFP_KERNEL);
- if (!filp->private_data)
- return -ENOMEM;
- return 0;
-}
-
-static int vsoc_release(struct inode *inode, struct file *filp)
-{
- struct vsoc_private_data *private_data = NULL;
- struct fd_scoped_permission_node *node = NULL;
- struct vsoc_device_region *owner_region_p = NULL;
- int retval = vsoc_validate_filep(filp);
-
- if (retval)
- return retval;
- private_data = (struct vsoc_private_data *)filp->private_data;
- if (!private_data)
- return 0;
-
- node = private_data->fd_scoped_permission_node;
- if (node) {
- owner_region_p = vsoc_region_from_inode(inode);
- if (owner_region_p->managed_by != VSOC_REGION_WHOLE) {
- owner_region_p =
- &vsoc_dev.regions[owner_region_p->managed_by];
- }
- do_destroy_fd_scoped_permission_node(owner_region_p, node);
- private_data->fd_scoped_permission_node = NULL;
- }
- kfree(private_data);
- filp->private_data = NULL;
-
- return 0;
-}
-
-/*
- * Returns the device relative offset and length of the area specified by the
- * fd scoped permission. If there is no fd scoped permission set, a default
- * permission covering the entire region is assumed, unless the region is owned
- * by another one, in which case the default is a permission with zero size.
- */
-static ssize_t vsoc_get_area(struct file *filp, __u32 *area_offset)
-{
- __u32 off = 0;
- ssize_t length = 0;
- struct vsoc_device_region *region_p;
- struct fd_scoped_permission *perm;
-
- region_p = vsoc_region_from_filep(filp);
- off = region_p->region_begin_offset;
- perm = &((struct vsoc_private_data *)filp->private_data)->
- fd_scoped_permission_node->permission;
- if (perm) {
- off += perm->begin_offset;
- length = perm->end_offset - perm->begin_offset;
- } else if (region_p->managed_by == VSOC_REGION_WHOLE) {
- /* No permission set and the regions is not owned by another,
- * default to full region access.
- */
- length = vsoc_device_region_size(region_p);
- } else {
- /* return zero length, access is denied. */
- length = 0;
- }
- if (area_offset)
- *area_offset = off;
- return length;
-}
-
-static int vsoc_mmap(struct file *filp, struct vm_area_struct *vma)
-{
- unsigned long len = vma->vm_end - vma->vm_start;
- __u32 area_off;
- phys_addr_t mem_off;
- ssize_t area_len;
- int retval = vsoc_validate_filep(filp);
-
- if (retval)
- return retval;
- area_len = vsoc_get_area(filp, &area_off);
- /* Add the requested offset */
- area_off += (vma->vm_pgoff << PAGE_SHIFT);
- area_len -= (vma->vm_pgoff << PAGE_SHIFT);
- if (area_len < len)
- return -EINVAL;
- vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
- mem_off = shm_off_to_phys_addr(area_off);
- if (io_remap_pfn_range(vma, vma->vm_start, mem_off >> PAGE_SHIFT,
- len, vma->vm_page_prot))
- return -EAGAIN;
- return 0;
-}
-
-module_init(vsoc_init_module);
-module_exit(vsoc_cleanup_module);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Greg Hartman <ghartman@google.com>");
-MODULE_DESCRIPTION("VSoC interpretation of QEmu's ivshmem device");
-MODULE_VERSION("1.0");
diff --git a/drivers/staging/greybus/audio_manager.c b/drivers/staging/greybus/audio_manager.c
index 9b19ea9..9a3f7c0 100644
--- a/drivers/staging/greybus/audio_manager.c
+++ b/drivers/staging/greybus/audio_manager.c
@@ -92,8 +92,8 @@ void gb_audio_manager_remove_all(void)
list_for_each_entry_safe(module, next, &modules_list, list) {
list_del(&module->list);
- kobject_put(&module->kobj);
ida_simple_remove(&module_id, module->id);
+ kobject_put(&module->kobj);
}
is_empty = list_empty(&modules_list);
diff --git a/drivers/staging/greybus/tools/loopback_test.c b/drivers/staging/greybus/tools/loopback_test.c
index ba6f905..69c6dce 100644
--- a/drivers/staging/greybus/tools/loopback_test.c
+++ b/drivers/staging/greybus/tools/loopback_test.c
@@ -19,6 +19,7 @@
#include <signal.h>
#define MAX_NUM_DEVICES 10
+#define MAX_SYSFS_PREFIX 0x80
#define MAX_SYSFS_PATH 0x200
#define CSV_MAX_LINE 0x1000
#define SYSFS_MAX_INT 0x20
@@ -67,7 +68,7 @@ struct loopback_results {
};
struct loopback_device {
- char name[MAX_SYSFS_PATH];
+ char name[MAX_STR_LEN];
char sysfs_entry[MAX_SYSFS_PATH];
char debugfs_entry[MAX_SYSFS_PATH];
struct loopback_results results;
@@ -93,8 +94,8 @@ struct loopback_test {
int stop_all;
int poll_count;
char test_name[MAX_STR_LEN];
- char sysfs_prefix[MAX_SYSFS_PATH];
- char debugfs_prefix[MAX_SYSFS_PATH];
+ char sysfs_prefix[MAX_SYSFS_PREFIX];
+ char debugfs_prefix[MAX_SYSFS_PREFIX];
struct timespec poll_timeout;
struct loopback_device devices[MAX_NUM_DEVICES];
struct loopback_results aggregate_results;
@@ -637,7 +638,7 @@ int find_loopback_devices(struct loopback_test *t)
static int open_poll_files(struct loopback_test *t)
{
struct loopback_device *dev;
- char buf[MAX_STR_LEN];
+ char buf[MAX_SYSFS_PATH + MAX_STR_LEN];
char dummy;
int fds_idx = 0;
int i;
@@ -655,7 +656,7 @@ static int open_poll_files(struct loopback_test *t)
goto err;
}
read(t->fds[fds_idx].fd, &dummy, 1);
- t->fds[fds_idx].events = EPOLLERR|EPOLLPRI;
+ t->fds[fds_idx].events = POLLERR | POLLPRI;
t->fds[fds_idx].revents = 0;
fds_idx++;
}
@@ -748,7 +749,7 @@ static int wait_for_complete(struct loopback_test *t)
}
for (i = 0; i < t->poll_count; i++) {
- if (t->fds[i].revents & EPOLLPRI) {
+ if (t->fds[i].revents & POLLPRI) {
/* Dummy read to clear the event */
read(t->fds[i].fd, &dummy, 1);
number_of_events++;
@@ -907,10 +908,10 @@ int main(int argc, char *argv[])
t.iteration_max = atoi(optarg);
break;
case 'S':
- snprintf(t.sysfs_prefix, MAX_SYSFS_PATH, "%s", optarg);
+ snprintf(t.sysfs_prefix, MAX_SYSFS_PREFIX, "%s", optarg);
break;
case 'D':
- snprintf(t.debugfs_prefix, MAX_SYSFS_PATH, "%s", optarg);
+ snprintf(t.debugfs_prefix, MAX_SYSFS_PREFIX, "%s", optarg);
break;
case 'm':
t.mask = atol(optarg);
@@ -961,10 +962,10 @@ int main(int argc, char *argv[])
}
if (!strcmp(t.sysfs_prefix, ""))
- snprintf(t.sysfs_prefix, MAX_SYSFS_PATH, "%s", sysfs_prefix);
+ snprintf(t.sysfs_prefix, MAX_SYSFS_PREFIX, "%s", sysfs_prefix);
if (!strcmp(t.debugfs_prefix, ""))
- snprintf(t.debugfs_prefix, MAX_SYSFS_PATH, "%s", debugfs_prefix);
+ snprintf(t.debugfs_prefix, MAX_SYSFS_PREFIX, "%s", debugfs_prefix);
ret = find_loopback_devices(&t);
if (ret)
diff --git a/drivers/staging/rtl8188eu/os_dep/ioctl_linux.c b/drivers/staging/rtl8188eu/os_dep/ioctl_linux.c
index 9b6ea86..ba53959 100644
--- a/drivers/staging/rtl8188eu/os_dep/ioctl_linux.c
+++ b/drivers/staging/rtl8188eu/os_dep/ioctl_linux.c
@@ -2009,21 +2009,16 @@ static int wpa_supplicant_ioctl(struct net_device *dev, struct iw_point *p)
struct ieee_param *param;
uint ret = 0;
- if (p->length < sizeof(struct ieee_param) || !p->pointer) {
- ret = -EINVAL;
- goto out;
- }
+ if (!p->pointer || p->length != sizeof(struct ieee_param))
+ return -EINVAL;
param = (struct ieee_param *)rtw_malloc(p->length);
- if (!param) {
- ret = -ENOMEM;
- goto out;
- }
+ if (!param)
+ return -ENOMEM;
if (copy_from_user(param, p->pointer, p->length)) {
kfree(param);
- ret = -EFAULT;
- goto out;
+ return -EFAULT;
}
switch (param->cmd) {
@@ -2054,9 +2049,6 @@ static int wpa_supplicant_ioctl(struct net_device *dev, struct iw_point *p)
ret = -EFAULT;
kfree(param);
-
-out:
-
return ret;
}
@@ -2791,26 +2783,19 @@ static int rtw_hostapd_ioctl(struct net_device *dev, struct iw_point *p)
* so, we just check hw_init_completed
*/
- if (!padapter->hw_init_completed) {
- ret = -EPERM;
- goto out;
- }
+ if (!padapter->hw_init_completed)
+ return -EPERM;
- if (!p->pointer) {
- ret = -EINVAL;
- goto out;
- }
+ if (!p->pointer || p->length != sizeof(struct ieee_param))
+ return -EINVAL;
param = (struct ieee_param *)rtw_malloc(p->length);
- if (!param) {
- ret = -ENOMEM;
- goto out;
- }
+ if (!param)
+ return -ENOMEM;
if (copy_from_user(param, p->pointer, p->length)) {
kfree(param);
- ret = -EFAULT;
- goto out;
+ return -EFAULT;
}
switch (param->cmd) {
@@ -2865,7 +2850,6 @@ static int rtw_hostapd_ioctl(struct net_device *dev, struct iw_point *p)
if (ret == 0 && copy_to_user(p->pointer, param, p->length))
ret = -EFAULT;
kfree(param);
-out:
return ret;
}
#endif
diff --git a/drivers/staging/rtl8188eu/os_dep/usb_intf.c b/drivers/staging/rtl8188eu/os_dep/usb_intf.c
index b5d42f4..845c881 100644
--- a/drivers/staging/rtl8188eu/os_dep/usb_intf.c
+++ b/drivers/staging/rtl8188eu/os_dep/usb_intf.c
@@ -38,6 +38,7 @@ static const struct usb_device_id rtw_usb_id_tbl[] = {
{USB_DEVICE(0x2001, 0x331B)}, /* D-Link DWA-121 rev B1 */
{USB_DEVICE(0x2357, 0x010c)}, /* TP-Link TL-WN722N v2 */
{USB_DEVICE(0x2357, 0x0111)}, /* TP-Link TL-WN727N v5.21 */
+ {USB_DEVICE(0x2C4E, 0x0102)}, /* MERCUSYS MW150US v2 */
{USB_DEVICE(0x0df6, 0x0076)}, /* Sitecom N150 v2 */
{USB_DEVICE(USB_VENDER_ID_REALTEK, 0xffef)}, /* Rosewill RNX-N150NUB */
{} /* Terminating entry */
diff --git a/drivers/staging/rtl8723bs/hal/rtl8723bs_xmit.c b/drivers/staging/rtl8723bs/hal/rtl8723bs_xmit.c
index b44e902..b6d56cf 100644
--- a/drivers/staging/rtl8723bs/hal/rtl8723bs_xmit.c
+++ b/drivers/staging/rtl8723bs/hal/rtl8723bs_xmit.c
@@ -476,14 +476,13 @@ int rtl8723bs_xmit_thread(void *context)
s32 ret;
struct adapter *padapter;
struct xmit_priv *pxmitpriv;
- u8 thread_name[20] = "RTWHALXT";
-
+ u8 thread_name[20];
ret = _SUCCESS;
padapter = context;
pxmitpriv = &padapter->xmitpriv;
- rtw_sprintf(thread_name, 20, "%s-"ADPT_FMT, thread_name, ADPT_ARG(padapter));
+ rtw_sprintf(thread_name, 20, "RTWHALXT-" ADPT_FMT, ADPT_ARG(padapter));
thread_enter(thread_name);
DBG_871X("start "FUNC_ADPT_FMT"\n", FUNC_ADPT_ARG(padapter));
diff --git a/drivers/staging/rtl8723bs/os_dep/ioctl_linux.c b/drivers/staging/rtl8723bs/os_dep/ioctl_linux.c
index db6528a..9b9038e 100644
--- a/drivers/staging/rtl8723bs/os_dep/ioctl_linux.c
+++ b/drivers/staging/rtl8723bs/os_dep/ioctl_linux.c
@@ -3373,21 +3373,16 @@ static int wpa_supplicant_ioctl(struct net_device *dev, struct iw_point *p)
/* down(&ieee->wx_sem); */
- if (p->length < sizeof(struct ieee_param) || !p->pointer) {
- ret = -EINVAL;
- goto out;
- }
+ if (!p->pointer || p->length != sizeof(struct ieee_param))
+ return -EINVAL;
param = rtw_malloc(p->length);
- if (param == NULL) {
- ret = -ENOMEM;
- goto out;
- }
+ if (param == NULL)
+ return -ENOMEM;
if (copy_from_user(param, p->pointer, p->length)) {
kfree(param);
- ret = -EFAULT;
- goto out;
+ return -EFAULT;
}
switch (param->cmd) {
@@ -3421,12 +3416,8 @@ static int wpa_supplicant_ioctl(struct net_device *dev, struct iw_point *p)
kfree(param);
-out:
-
/* up(&ieee->wx_sem); */
-
return ret;
-
}
static int rtw_set_encryption(struct net_device *dev, struct ieee_param *param, u32 param_len)
@@ -4200,28 +4191,19 @@ static int rtw_hostapd_ioctl(struct net_device *dev, struct iw_point *p)
* so, we just check hw_init_completed
*/
- if (!padapter->hw_init_completed) {
- ret = -EPERM;
- goto out;
- }
+ if (!padapter->hw_init_completed)
+ return -EPERM;
-
- /* if (p->length < sizeof(struct ieee_param) || !p->pointer) { */
- if (!p->pointer) {
- ret = -EINVAL;
- goto out;
- }
+ if (!p->pointer || p->length != sizeof(*param))
+ return -EINVAL;
param = rtw_malloc(p->length);
- if (param == NULL) {
- ret = -ENOMEM;
- goto out;
- }
+ if (param == NULL)
+ return -ENOMEM;
if (copy_from_user(param, p->pointer, p->length)) {
kfree(param);
- ret = -EFAULT;
- goto out;
+ return -EFAULT;
}
/* DBG_871X("%s, cmd =%d\n", __func__, param->cmd); */
@@ -4321,13 +4303,8 @@ static int rtw_hostapd_ioctl(struct net_device *dev, struct iw_point *p)
if (ret == 0 && copy_to_user(p->pointer, param, p->length))
ret = -EFAULT;
-
kfree(param);
-
-out:
-
return ret;
-
}
static int rtw_wx_set_priv(struct net_device *dev,
diff --git a/drivers/staging/speakup/main.c b/drivers/staging/speakup/main.c
index 488f253..81ecfd1 100644
--- a/drivers/staging/speakup/main.c
+++ b/drivers/staging/speakup/main.c
@@ -561,7 +561,7 @@ static u_long get_word(struct vc_data *vc)
return 0;
} else if (tmpx < vc->vc_cols - 2 &&
(ch == SPACE || ch == 0 || (ch < 0x100 && IS_WDLM(ch))) &&
- get_char(vc, (u_short *)&tmp_pos + 1, &temp) > SPACE) {
+ get_char(vc, (u_short *)tmp_pos + 1, &temp) > SPACE) {
tmp_pos += 2;
tmpx++;
} else {
diff --git a/drivers/staging/speakup/selection.c b/drivers/staging/speakup/selection.c
index a8b4d0c..032f326 100644
--- a/drivers/staging/speakup/selection.c
+++ b/drivers/staging/speakup/selection.c
@@ -51,9 +51,7 @@ static void __speakup_set_selection(struct work_struct *work)
goto unref;
}
- console_lock();
set_selection_kernel(&sel, tty);
- console_unlock();
unref:
tty_kref_put(tty);
diff --git a/drivers/staging/vt6656/dpc.c b/drivers/staging/vt6656/dpc.c
index 821aae8..a0b60e7 100644
--- a/drivers/staging/vt6656/dpc.c
+++ b/drivers/staging/vt6656/dpc.c
@@ -98,7 +98,7 @@ int vnt_rx_data(struct vnt_private *priv, struct vnt_rcb *ptr_rcb,
vnt_rf_rssi_to_dbm(priv, tail->rssi, &rx_dbm);
- priv->bb_pre_ed_rssi = (u8)rx_dbm + 1;
+ priv->bb_pre_ed_rssi = (u8)-rx_dbm + 1;
priv->current_rssi = priv->bb_pre_ed_rssi;
skb_pull(skb, sizeof(*head));
diff --git a/drivers/staging/wfx/Documentation/devicetree/bindings/net/wireless/siliabs,wfx.txt b/drivers/staging/wfx/Documentation/devicetree/bindings/net/wireless/siliabs,wfx.txt
index 26de676..081d58a 100644
--- a/drivers/staging/wfx/Documentation/devicetree/bindings/net/wireless/siliabs,wfx.txt
+++ b/drivers/staging/wfx/Documentation/devicetree/bindings/net/wireless/siliabs,wfx.txt
@@ -93,5 +93,5 @@
Must contains 64 hexadecimal digits. Not supported in current version.
WFx driver also supports `mac-address` and `local-mac-address` as described in
-Documentation/devicetree/binding/net/ethernet.txt
+Documentation/devicetree/bindings/net/ethernet.txt
diff --git a/drivers/staging/wfx/hif_tx.c b/drivers/staging/wfx/hif_tx.c
index 2428363..77bca43 100644
--- a/drivers/staging/wfx/hif_tx.c
+++ b/drivers/staging/wfx/hif_tx.c
@@ -140,6 +140,7 @@ int hif_shutdown(struct wfx_dev *wdev)
else
control_reg_write(wdev, 0);
mutex_unlock(&wdev->hif_cmd.lock);
+ mutex_unlock(&wdev->hif_cmd.key_renew_lock);
kfree(hif);
return ret;
}
@@ -289,7 +290,7 @@ int hif_stop_scan(struct wfx_vif *wvif)
}
int hif_join(struct wfx_vif *wvif, const struct ieee80211_bss_conf *conf,
- const struct ieee80211_channel *channel, const u8 *ssidie)
+ struct ieee80211_channel *channel, const u8 *ssid, int ssidlen)
{
int ret;
struct hif_msg *hif;
@@ -307,9 +308,9 @@ int hif_join(struct wfx_vif *wvif, const struct ieee80211_bss_conf *conf,
body->basic_rate_set =
cpu_to_le32(wfx_rate_mask_to_hw(wvif->wdev, conf->basic_rates));
memcpy(body->bssid, conf->bssid, sizeof(body->bssid));
- if (!conf->ibss_joined && ssidie) {
- body->ssid_length = cpu_to_le32(ssidie[1]);
- memcpy(body->ssid, &ssidie[2], ssidie[1]);
+ if (!conf->ibss_joined && ssid) {
+ body->ssid_length = cpu_to_le32(ssidlen);
+ memcpy(body->ssid, ssid, ssidlen);
}
wfx_fill_header(hif, wvif->id, HIF_REQ_ID_JOIN, sizeof(*body));
ret = wfx_cmd_send(wvif->wdev, hif, NULL, 0, false);
@@ -427,9 +428,9 @@ int hif_start(struct wfx_vif *wvif, const struct ieee80211_bss_conf *conf,
struct hif_msg *hif;
struct hif_req_start *body = wfx_alloc_hif(sizeof(*body), &hif);
- body->dtim_period = conf->dtim_period,
- body->short_preamble = conf->use_short_preamble,
- body->channel_number = cpu_to_le16(channel->hw_value),
+ body->dtim_period = conf->dtim_period;
+ body->short_preamble = conf->use_short_preamble;
+ body->channel_number = cpu_to_le16(channel->hw_value);
body->beacon_interval = cpu_to_le32(conf->beacon_int);
body->basic_rate_set =
cpu_to_le32(wfx_rate_mask_to_hw(wvif->wdev, conf->basic_rates));
diff --git a/drivers/staging/wfx/hif_tx.h b/drivers/staging/wfx/hif_tx.h
index 20977e4..f8520a1 100644
--- a/drivers/staging/wfx/hif_tx.h
+++ b/drivers/staging/wfx/hif_tx.h
@@ -46,7 +46,7 @@ int hif_scan(struct wfx_vif *wvif, struct cfg80211_scan_request *req80211,
int chan_start, int chan_num);
int hif_stop_scan(struct wfx_vif *wvif);
int hif_join(struct wfx_vif *wvif, const struct ieee80211_bss_conf *conf,
- const struct ieee80211_channel *channel, const u8 *ssidie);
+ struct ieee80211_channel *channel, const u8 *ssid, int ssidlen);
int hif_set_pm(struct wfx_vif *wvif, bool ps, int dynamic_ps_timeout);
int hif_set_bss_params(struct wfx_vif *wvif,
const struct hif_req_set_bss_params *arg);
diff --git a/drivers/staging/wfx/hif_tx_mib.h b/drivers/staging/wfx/hif_tx_mib.h
index bf3769c..26b1406 100644
--- a/drivers/staging/wfx/hif_tx_mib.h
+++ b/drivers/staging/wfx/hif_tx_mib.h
@@ -191,10 +191,10 @@ static inline int hif_set_block_ack_policy(struct wfx_vif *wvif,
}
static inline int hif_set_association_mode(struct wfx_vif *wvif,
- struct ieee80211_bss_conf *info,
- struct ieee80211_sta_ht_cap *ht_cap)
+ struct ieee80211_bss_conf *info)
{
int basic_rates = wfx_rate_mask_to_hw(wvif->wdev, info->basic_rates);
+ struct ieee80211_sta *sta = NULL;
struct hif_mib_set_association_mode val = {
.preambtype_use = 1,
.mode = 1,
@@ -204,12 +204,17 @@ static inline int hif_set_association_mode(struct wfx_vif *wvif,
.basic_rate_set = cpu_to_le32(basic_rates)
};
+ rcu_read_lock(); // protect sta
+ if (info->bssid && !info->ibss_joined)
+ sta = ieee80211_find_sta(wvif->vif, info->bssid);
+
// FIXME: it is strange to not retrieve all information from bss_info
- if (ht_cap && ht_cap->ht_supported) {
- val.mpdu_start_spacing = ht_cap->ampdu_density;
+ if (sta && sta->ht_cap.ht_supported) {
+ val.mpdu_start_spacing = sta->ht_cap.ampdu_density;
if (!(info->ht_operation_mode & IEEE80211_HT_OP_MODE_NON_GF_STA_PRSNT))
- val.greenfield = !!(ht_cap->cap & IEEE80211_HT_CAP_GRN_FLD);
+ val.greenfield = !!(sta->ht_cap.cap & IEEE80211_HT_CAP_GRN_FLD);
}
+ rcu_read_unlock();
return hif_write_mib(wvif->wdev, wvif->id,
HIF_MIB_ID_SET_ASSOCIATION_MODE, &val, sizeof(val));
diff --git a/drivers/staging/wfx/sta.c b/drivers/staging/wfx/sta.c
index 03d0f22..af4f4bb 100644
--- a/drivers/staging/wfx/sta.c
+++ b/drivers/staging/wfx/sta.c
@@ -491,9 +491,11 @@ static void wfx_set_mfp(struct wfx_vif *wvif,
static void wfx_do_join(struct wfx_vif *wvif)
{
int ret;
- const u8 *ssidie;
struct ieee80211_bss_conf *conf = &wvif->vif->bss_conf;
struct cfg80211_bss *bss = NULL;
+ u8 ssid[IEEE80211_MAX_SSID_LEN];
+ const u8 *ssidie = NULL;
+ int ssidlen = 0;
wfx_tx_lock_flush(wvif->wdev);
@@ -514,11 +516,14 @@ static void wfx_do_join(struct wfx_vif *wvif)
if (!wvif->beacon_int)
wvif->beacon_int = 1;
- rcu_read_lock();
+ rcu_read_lock(); // protect ssidie
if (!conf->ibss_joined)
ssidie = ieee80211_bss_get_ie(bss, WLAN_EID_SSID);
- else
- ssidie = NULL;
+ if (ssidie) {
+ ssidlen = ssidie[1];
+ memcpy(ssid, &ssidie[2], ssidie[1]);
+ }
+ rcu_read_unlock();
wfx_tx_flush(wvif->wdev);
@@ -527,10 +532,8 @@ static void wfx_do_join(struct wfx_vif *wvif)
wfx_set_mfp(wvif, bss);
- /* Perform actual join */
wvif->wdev->tx_burst_idx = -1;
- ret = hif_join(wvif, conf, wvif->channel, ssidie);
- rcu_read_unlock();
+ ret = hif_join(wvif, conf, wvif->channel, ssid, ssidlen);
if (ret) {
ieee80211_connection_loss(wvif->vif);
wvif->join_complete_status = -1;
@@ -605,7 +608,9 @@ int wfx_sta_remove(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
int i;
for (i = 0; i < ARRAY_SIZE(sta_priv->buffered); i++)
- WARN(sta_priv->buffered[i], "release station while Tx is in progress");
+ if (sta_priv->buffered[i])
+ dev_warn(wvif->wdev->dev, "release station while %d pending frame on queue %d",
+ sta_priv->buffered[i], i);
// FIXME: see note in wfx_sta_add()
if (vif->type == NL80211_IFTYPE_STATION)
return 0;
@@ -689,6 +694,7 @@ static void wfx_join_finalize(struct wfx_vif *wvif,
wfx_rate_mask_to_hw(wvif->wdev, sta->supp_rates[wvif->channel->band]);
else
wvif->bss_params.operational_rate_set = -1;
+ rcu_read_unlock();
if (sta &&
info->ht_operation_mode & IEEE80211_HT_OP_MODE_NON_GF_STA_PRSNT)
hif_dual_cts_protection(wvif, true);
@@ -701,8 +707,7 @@ static void wfx_join_finalize(struct wfx_vif *wvif,
wvif->bss_params.beacon_lost_count = 20;
wvif->bss_params.aid = info->aid;
- hif_set_association_mode(wvif, info, sta ? &sta->ht_cap : NULL);
- rcu_read_unlock();
+ hif_set_association_mode(wvif, info);
if (!info->ibss_joined) {
hif_keep_alive_period(wvif, 30 /* sec */);
diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c
index b94ed4e..09e55ea 100644
--- a/drivers/target/iscsi/iscsi_target.c
+++ b/drivers/target/iscsi/iscsi_target.c
@@ -1165,9 +1165,7 @@ int iscsit_setup_scsi_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd,
hdr->cmdsn, be32_to_cpu(hdr->data_length), payload_length,
conn->cid);
- if (target_get_sess_cmd(&cmd->se_cmd, true) < 0)
- return iscsit_add_reject_cmd(cmd,
- ISCSI_REASON_WAITING_FOR_LOGOUT, buf);
+ target_get_sess_cmd(&cmd->se_cmd, true);
cmd->sense_reason = transport_lookup_cmd_lun(&cmd->se_cmd,
scsilun_to_int(&hdr->lun));
@@ -2004,9 +2002,7 @@ iscsit_handle_task_mgt_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd,
conn->sess->se_sess, 0, DMA_NONE,
TCM_SIMPLE_TAG, cmd->sense_buffer + 2);
- if (target_get_sess_cmd(&cmd->se_cmd, true) < 0)
- return iscsit_add_reject_cmd(cmd,
- ISCSI_REASON_WAITING_FOR_LOGOUT, buf);
+ target_get_sess_cmd(&cmd->se_cmd, true);
/*
* TASK_REASSIGN for ERL=2 / connection stays inside of
@@ -4149,6 +4145,9 @@ int iscsit_close_connection(
iscsit_stop_nopin_response_timer(conn);
iscsit_stop_nopin_timer(conn);
+ if (conn->conn_transport->iscsit_wait_conn)
+ conn->conn_transport->iscsit_wait_conn(conn);
+
/*
* During Connection recovery drop unacknowledged out of order
* commands for this connection, and prepare the other commands
@@ -4231,11 +4230,6 @@ int iscsit_close_connection(
* must wait until they have completed.
*/
iscsit_check_conn_usage_count(conn);
- target_sess_cmd_list_set_waiting(sess->se_sess);
- target_wait_for_sess_cmds(sess->se_sess);
-
- if (conn->conn_transport->iscsit_wait_conn)
- conn->conn_transport->iscsit_wait_conn(conn);
ahash_request_free(conn->conn_tx_hash);
if (conn->conn_rx_hash) {
diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
index ea482d4..0ae9e60 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -666,6 +666,11 @@ static int transport_cmd_check_stop_to_fabric(struct se_cmd *cmd)
target_remove_from_state_list(cmd);
+ /*
+ * Clear struct se_cmd->se_lun before the handoff to FE.
+ */
+ cmd->se_lun = NULL;
+
spin_lock_irqsave(&cmd->t_state_lock, flags);
/*
* Determine if frontend context caller is requesting the stopping of
@@ -693,6 +698,17 @@ static int transport_cmd_check_stop_to_fabric(struct se_cmd *cmd)
return cmd->se_tfo->check_stop_free(cmd);
}
+static void transport_lun_remove_cmd(struct se_cmd *cmd)
+{
+ struct se_lun *lun = cmd->se_lun;
+
+ if (!lun)
+ return;
+
+ if (cmpxchg(&cmd->lun_ref_active, true, false))
+ percpu_ref_put(&lun->lun_ref);
+}
+
static void target_complete_failure_work(struct work_struct *work)
{
struct se_cmd *cmd = container_of(work, struct se_cmd, work);
@@ -783,6 +799,8 @@ static void target_handle_abort(struct se_cmd *cmd)
WARN_ON_ONCE(kref_read(&cmd->cmd_kref) == 0);
+ transport_lun_remove_cmd(cmd);
+
transport_cmd_check_stop_to_fabric(cmd);
}
@@ -1708,6 +1726,7 @@ static void target_complete_tmr_failure(struct work_struct *work)
se_cmd->se_tmr_req->response = TMR_LUN_DOES_NOT_EXIST;
se_cmd->se_tfo->queue_tm_rsp(se_cmd);
+ transport_lun_remove_cmd(se_cmd);
transport_cmd_check_stop_to_fabric(se_cmd);
}
@@ -1898,6 +1917,7 @@ void transport_generic_request_failure(struct se_cmd *cmd,
goto queue_full;
check_stop:
+ transport_lun_remove_cmd(cmd);
transport_cmd_check_stop_to_fabric(cmd);
return;
@@ -2195,6 +2215,7 @@ static void transport_complete_qf(struct se_cmd *cmd)
transport_handle_queue_full(cmd, cmd->se_dev, ret, false);
return;
}
+ transport_lun_remove_cmd(cmd);
transport_cmd_check_stop_to_fabric(cmd);
}
@@ -2289,6 +2310,7 @@ static void target_complete_ok_work(struct work_struct *work)
if (ret)
goto queue_full;
+ transport_lun_remove_cmd(cmd);
transport_cmd_check_stop_to_fabric(cmd);
return;
}
@@ -2314,6 +2336,7 @@ static void target_complete_ok_work(struct work_struct *work)
if (ret)
goto queue_full;
+ transport_lun_remove_cmd(cmd);
transport_cmd_check_stop_to_fabric(cmd);
return;
}
@@ -2349,6 +2372,7 @@ static void target_complete_ok_work(struct work_struct *work)
if (ret)
goto queue_full;
+ transport_lun_remove_cmd(cmd);
transport_cmd_check_stop_to_fabric(cmd);
return;
}
@@ -2384,6 +2408,7 @@ static void target_complete_ok_work(struct work_struct *work)
break;
}
+ transport_lun_remove_cmd(cmd);
transport_cmd_check_stop_to_fabric(cmd);
return;
@@ -2710,6 +2735,9 @@ int transport_generic_free_cmd(struct se_cmd *cmd, int wait_for_tasks)
*/
if (cmd->state_active)
target_remove_from_state_list(cmd);
+
+ if (cmd->se_lun)
+ transport_lun_remove_cmd(cmd);
}
if (aborted)
cmd->free_compl = &compl;
@@ -2781,9 +2809,6 @@ static void target_release_cmd_kref(struct kref *kref)
struct completion *abrt_compl = se_cmd->abrt_compl;
unsigned long flags;
- if (se_cmd->lun_ref_active)
- percpu_ref_put(&se_cmd->se_lun->lun_ref);
-
if (se_sess) {
spin_lock_irqsave(&se_sess->sess_cmd_lock, flags);
list_del_init(&se_cmd->se_cmd_list);
diff --git a/drivers/tee/amdtee/Kconfig b/drivers/tee/amdtee/Kconfig
index 4e32b64..191f971 100644
--- a/drivers/tee/amdtee/Kconfig
+++ b/drivers/tee/amdtee/Kconfig
@@ -3,6 +3,6 @@
config AMDTEE
tristate "AMD-TEE"
default m
- depends on CRYPTO_DEV_SP_PSP
+ depends on CRYPTO_DEV_SP_PSP && CRYPTO_DEV_CCP_DD
help
This implements AMD's Trusted Execution Environment (TEE) driver.
diff --git a/drivers/tee/amdtee/core.c b/drivers/tee/amdtee/core.c
index 6370bb5..27b4cd7 100644
--- a/drivers/tee/amdtee/core.c
+++ b/drivers/tee/amdtee/core.c
@@ -139,6 +139,9 @@ static struct amdtee_session *find_session(struct amdtee_context_data *ctxdata,
u32 index = get_session_index(session);
struct amdtee_session *sess;
+ if (index >= TEE_NUM_SESSIONS)
+ return NULL;
+
list_for_each_entry(sess, &ctxdata->sess_list, list_node)
if (ta_handle == sess->ta_handle &&
test_bit(index, sess->sess_mask))
@@ -212,6 +215,19 @@ static int copy_ta_binary(struct tee_context *ctx, void *ptr, void **ta,
return rc;
}
+static void destroy_session(struct kref *ref)
+{
+ struct amdtee_session *sess = container_of(ref, struct amdtee_session,
+ refcount);
+
+ /* Unload the TA from TEE */
+ handle_unload_ta(sess->ta_handle);
+ mutex_lock(&session_list_mutex);
+ list_del(&sess->list_node);
+ mutex_unlock(&session_list_mutex);
+ kfree(sess);
+}
+
int amdtee_open_session(struct tee_context *ctx,
struct tee_ioctl_open_session_arg *arg,
struct tee_param *param)
@@ -236,15 +252,13 @@ int amdtee_open_session(struct tee_context *ctx,
/* Load the TA binary into TEE environment */
handle_load_ta(ta, ta_size, arg);
- if (arg->ret == TEEC_SUCCESS) {
- mutex_lock(&session_list_mutex);
- sess = alloc_session(ctxdata, arg->session);
- mutex_unlock(&session_list_mutex);
- }
-
if (arg->ret != TEEC_SUCCESS)
goto out;
+ mutex_lock(&session_list_mutex);
+ sess = alloc_session(ctxdata, arg->session);
+ mutex_unlock(&session_list_mutex);
+
if (!sess) {
rc = -ENOMEM;
goto out;
@@ -259,40 +273,29 @@ int amdtee_open_session(struct tee_context *ctx,
if (i >= TEE_NUM_SESSIONS) {
pr_err("reached maximum session count %d\n", TEE_NUM_SESSIONS);
+ kref_put(&sess->refcount, destroy_session);
rc = -ENOMEM;
goto out;
}
/* Open session with loaded TA */
handle_open_session(arg, &session_info, param);
-
- if (arg->ret == TEEC_SUCCESS) {
- sess->session_info[i] = session_info;
- set_session_id(sess->ta_handle, i, &arg->session);
- } else {
+ if (arg->ret != TEEC_SUCCESS) {
pr_err("open_session failed %d\n", arg->ret);
spin_lock(&sess->lock);
clear_bit(i, sess->sess_mask);
spin_unlock(&sess->lock);
+ kref_put(&sess->refcount, destroy_session);
+ goto out;
}
+
+ sess->session_info[i] = session_info;
+ set_session_id(sess->ta_handle, i, &arg->session);
out:
free_pages((u64)ta, get_order(ta_size));
return rc;
}
-static void destroy_session(struct kref *ref)
-{
- struct amdtee_session *sess = container_of(ref, struct amdtee_session,
- refcount);
-
- /* Unload the TA from TEE */
- handle_unload_ta(sess->ta_handle);
- mutex_lock(&session_list_mutex);
- list_del(&sess->list_node);
- mutex_unlock(&session_list_mutex);
- kfree(sess);
-}
-
int amdtee_close_session(struct tee_context *ctx, u32 session)
{
struct amdtee_context_data *ctxdata = ctx->data;
diff --git a/drivers/thunderbolt/switch.c b/drivers/thunderbolt/switch.c
index ad5479f..a2ce990 100644
--- a/drivers/thunderbolt/switch.c
+++ b/drivers/thunderbolt/switch.c
@@ -348,6 +348,12 @@ static int tb_switch_nvm_read(void *priv, unsigned int offset, void *val,
return ret;
}
+static int tb_switch_nvm_no_read(void *priv, unsigned int offset, void *val,
+ size_t bytes)
+{
+ return -EPERM;
+}
+
static int tb_switch_nvm_write(void *priv, unsigned int offset, void *val,
size_t bytes)
{
@@ -393,6 +399,7 @@ static struct nvmem_device *register_nvmem(struct tb_switch *sw, int id,
config.read_only = true;
} else {
config.name = "nvm_non_active";
+ config.reg_read = tb_switch_nvm_no_read;
config.reg_write = tb_switch_nvm_write;
config.root_only = true;
}
@@ -947,7 +954,7 @@ static bool tb_port_is_width_supported(struct tb_port *port, int width)
ret = tb_port_read(port, &phy, TB_CFG_PORT,
port->cap_phy + LANE_ADP_CS_0, 1);
if (ret)
- return ret;
+ return false;
widths = (phy & LANE_ADP_CS_0_SUPPORTED_WIDTH_MASK) >>
LANE_ADP_CS_0_SUPPORTED_WIDTH_SHIFT;
diff --git a/drivers/tty/serdev/core.c b/drivers/tty/serdev/core.c
index 42345e7..c5f0d93 100644
--- a/drivers/tty/serdev/core.c
+++ b/drivers/tty/serdev/core.c
@@ -18,6 +18,7 @@
#include <linux/sched.h>
#include <linux/serdev.h>
#include <linux/slab.h>
+#include <linux/platform_data/x86/apple.h>
static bool is_registered;
static DEFINE_IDA(ctrl_ida);
@@ -631,6 +632,15 @@ static int acpi_serdev_check_resources(struct serdev_controller *ctrl,
if (ret)
return ret;
+ /*
+ * Apple machines provide an empty resource template, so on those
+ * machines just look for immediate children with a "baud" property
+ * (from the _DSM method) instead.
+ */
+ if (!lookup.controller_handle && x86_apple_machine &&
+ !acpi_dev_get_property(adev, "baud", ACPI_TYPE_BUFFER, NULL))
+ acpi_get_parent(adev->handle, &lookup.controller_handle);
+
/* Make sure controller and ResourceSource handle match */
if (ACPI_HANDLE(ctrl->dev.parent) != lookup.controller_handle)
return -ENODEV;
diff --git a/drivers/tty/serdev/serdev-ttyport.c b/drivers/tty/serdev/serdev-ttyport.c
index d1cdd2a..d367803e 100644
--- a/drivers/tty/serdev/serdev-ttyport.c
+++ b/drivers/tty/serdev/serdev-ttyport.c
@@ -265,7 +265,6 @@ struct device *serdev_tty_port_register(struct tty_port *port,
struct device *parent,
struct tty_driver *drv, int idx)
{
- const struct tty_port_client_operations *old_ops;
struct serdev_controller *ctrl;
struct serport *serport;
int ret;
@@ -284,7 +283,6 @@ struct device *serdev_tty_port_register(struct tty_port *port,
ctrl->ops = &ctrl_ops;
- old_ops = port->client_ops;
port->client_ops = &client_ops;
port->client_data = ctrl;
@@ -297,7 +295,7 @@ struct device *serdev_tty_port_register(struct tty_port *port,
err_reset_data:
port->client_data = NULL;
- port->client_ops = old_ops;
+ port->client_ops = &tty_port_default_client_ops;
serdev_controller_put(ctrl);
return ERR_PTR(ret);
@@ -312,8 +310,8 @@ int serdev_tty_port_unregister(struct tty_port *port)
return -ENODEV;
serdev_controller_remove(ctrl);
- port->client_ops = NULL;
port->client_data = NULL;
+ port->client_ops = &tty_port_default_client_ops;
serdev_controller_put(ctrl);
return 0;
diff --git a/drivers/tty/serial/8250/8250_aspeed_vuart.c b/drivers/tty/serial/8250/8250_aspeed_vuart.c
index d657aa1..c33e02c 100644
--- a/drivers/tty/serial/8250/8250_aspeed_vuart.c
+++ b/drivers/tty/serial/8250/8250_aspeed_vuart.c
@@ -446,7 +446,6 @@ static int aspeed_vuart_probe(struct platform_device *pdev)
port.port.line = rc;
port.port.irq = irq_of_parse_and_map(np, 0);
- port.port.irqflags = IRQF_SHARED;
port.port.handle_irq = aspeed_vuart_handle_irq;
port.port.iotype = UPIO_MEM;
port.port.type = PORT_16550A;
diff --git a/drivers/tty/serial/8250/8250_core.c b/drivers/tty/serial/8250/8250_core.c
index 0894a22..f2a33c9 100644
--- a/drivers/tty/serial/8250/8250_core.c
+++ b/drivers/tty/serial/8250/8250_core.c
@@ -174,7 +174,7 @@ static int serial_link_irq_chain(struct uart_8250_port *up)
struct hlist_head *h;
struct hlist_node *n;
struct irq_info *i;
- int ret, irq_flags = up->port.flags & UPF_SHARE_IRQ ? IRQF_SHARED : 0;
+ int ret;
mutex_lock(&hash_mutex);
@@ -209,9 +209,8 @@ static int serial_link_irq_chain(struct uart_8250_port *up)
INIT_LIST_HEAD(&up->list);
i->head = &up->list;
spin_unlock_irq(&i->lock);
- irq_flags |= up->port.irqflags;
ret = request_irq(up->port.irq, serial8250_interrupt,
- irq_flags, up->port.name, i);
+ up->port.irqflags, up->port.name, i);
if (ret < 0)
serial_do_unlink(i, up);
}
diff --git a/drivers/tty/serial/8250/8250_exar.c b/drivers/tty/serial/8250/8250_exar.c
index 91e9b07..d330da7 100644
--- a/drivers/tty/serial/8250/8250_exar.c
+++ b/drivers/tty/serial/8250/8250_exar.c
@@ -25,6 +25,14 @@
#include "8250.h"
+#define PCI_DEVICE_ID_ACCES_COM_2S 0x1052
+#define PCI_DEVICE_ID_ACCES_COM_4S 0x105d
+#define PCI_DEVICE_ID_ACCES_COM_8S 0x106c
+#define PCI_DEVICE_ID_ACCES_COM232_8 0x10a8
+#define PCI_DEVICE_ID_ACCES_COM_2SM 0x10d2
+#define PCI_DEVICE_ID_ACCES_COM_4SM 0x10db
+#define PCI_DEVICE_ID_ACCES_COM_8SM 0x10ea
+
#define PCI_DEVICE_ID_COMMTECH_4224PCI335 0x0002
#define PCI_DEVICE_ID_COMMTECH_4222PCI335 0x0004
#define PCI_DEVICE_ID_COMMTECH_2324PCI335 0x000a
@@ -677,6 +685,22 @@ static int __maybe_unused exar_resume(struct device *dev)
static SIMPLE_DEV_PM_OPS(exar_pci_pm, exar_suspend, exar_resume);
+static const struct exar8250_board acces_com_2x = {
+ .num_ports = 2,
+ .setup = pci_xr17c154_setup,
+};
+
+static const struct exar8250_board acces_com_4x = {
+ .num_ports = 4,
+ .setup = pci_xr17c154_setup,
+};
+
+static const struct exar8250_board acces_com_8x = {
+ .num_ports = 8,
+ .setup = pci_xr17c154_setup,
+};
+
+
static const struct exar8250_board pbn_fastcom335_2 = {
.num_ports = 2,
.setup = pci_fastcom335_setup,
@@ -745,6 +769,15 @@ static const struct exar8250_board pbn_exar_XR17V8358 = {
}
static const struct pci_device_id exar_pci_tbl[] = {
+ EXAR_DEVICE(ACCESSIO, ACCES_COM_2S, acces_com_2x),
+ EXAR_DEVICE(ACCESSIO, ACCES_COM_4S, acces_com_4x),
+ EXAR_DEVICE(ACCESSIO, ACCES_COM_8S, acces_com_8x),
+ EXAR_DEVICE(ACCESSIO, ACCES_COM232_8, acces_com_8x),
+ EXAR_DEVICE(ACCESSIO, ACCES_COM_2SM, acces_com_2x),
+ EXAR_DEVICE(ACCESSIO, ACCES_COM_4SM, acces_com_4x),
+ EXAR_DEVICE(ACCESSIO, ACCES_COM_8SM, acces_com_8x),
+
+
CONNECT_DEVICE(XR17C152, UART_2_232, pbn_connect),
CONNECT_DEVICE(XR17C154, UART_4_232, pbn_connect),
CONNECT_DEVICE(XR17C158, UART_8_232, pbn_connect),
diff --git a/drivers/tty/serial/8250/8250_of.c b/drivers/tty/serial/8250/8250_of.c
index 531ad67..f668775 100644
--- a/drivers/tty/serial/8250/8250_of.c
+++ b/drivers/tty/serial/8250/8250_of.c
@@ -202,7 +202,6 @@ static int of_platform_serial_setup(struct platform_device *ofdev,
port->type = type;
port->uartclk = clk;
- port->irqflags |= IRQF_SHARED;
if (of_property_read_bool(np, "no-loopback-test"))
port->flags |= UPF_SKIP_TEST;
diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c
index 430e346..0325f2e 100644
--- a/drivers/tty/serial/8250/8250_port.c
+++ b/drivers/tty/serial/8250/8250_port.c
@@ -2177,6 +2177,10 @@ int serial8250_do_startup(struct uart_port *port)
}
}
+ /* Check if we need to have shared IRQs */
+ if (port->irq && (up->port.flags & UPF_SHARE_IRQ))
+ up->port.irqflags |= IRQF_SHARED;
+
if (port->irq && !(up->port.flags & UPF_NO_THRE_TEST)) {
unsigned char iir1;
/*
diff --git a/drivers/tty/serial/ar933x_uart.c b/drivers/tty/serial/ar933x_uart.c
index 3bdd56a..ea12f10 100644
--- a/drivers/tty/serial/ar933x_uart.c
+++ b/drivers/tty/serial/ar933x_uart.c
@@ -286,6 +286,10 @@ static void ar933x_uart_set_termios(struct uart_port *port,
ar933x_uart_rmw_set(up, AR933X_UART_CS_REG,
AR933X_UART_CS_HOST_INT_EN);
+ /* enable RX and TX ready overide */
+ ar933x_uart_rmw_set(up, AR933X_UART_CS_REG,
+ AR933X_UART_CS_TX_READY_ORIDE | AR933X_UART_CS_RX_READY_ORIDE);
+
/* reenable the UART */
ar933x_uart_rmw(up, AR933X_UART_CS_REG,
AR933X_UART_CS_IF_MODE_M << AR933X_UART_CS_IF_MODE_S,
@@ -418,6 +422,10 @@ static int ar933x_uart_startup(struct uart_port *port)
ar933x_uart_rmw_set(up, AR933X_UART_CS_REG,
AR933X_UART_CS_HOST_INT_EN);
+ /* enable RX and TX ready overide */
+ ar933x_uart_rmw_set(up, AR933X_UART_CS_REG,
+ AR933X_UART_CS_TX_READY_ORIDE | AR933X_UART_CS_RX_READY_ORIDE);
+
/* Enable RX interrupts */
up->ier = AR933X_UART_INT_RX_VALID;
ar933x_uart_write(up, AR933X_UART_INT_EN_REG, up->ier);
diff --git a/drivers/tty/serial/atmel_serial.c b/drivers/tty/serial/atmel_serial.c
index c15c398..a39c87a 100644
--- a/drivers/tty/serial/atmel_serial.c
+++ b/drivers/tty/serial/atmel_serial.c
@@ -570,7 +570,8 @@ static void atmel_stop_tx(struct uart_port *port)
atmel_uart_writel(port, ATMEL_US_IDR, atmel_port->tx_done_mask);
if (atmel_uart_is_half_duplex(port))
- atmel_start_rx(port);
+ if (!atomic_read(&atmel_port->tasklet_shutdown))
+ atmel_start_rx(port);
}
diff --git a/drivers/tty/serial/cpm_uart/cpm_uart_core.c b/drivers/tty/serial/cpm_uart/cpm_uart_core.c
index 19d5a4c..d4b81b0 100644
--- a/drivers/tty/serial/cpm_uart/cpm_uart_core.c
+++ b/drivers/tty/serial/cpm_uart/cpm_uart_core.c
@@ -1373,6 +1373,7 @@ static struct console cpm_scc_uart_console = {
static int __init cpm_uart_console_init(void)
{
+ cpm_muram_init();
register_console(&cpm_scc_uart_console);
return 0;
}
diff --git a/drivers/tty/serial/fsl_lpuart.c b/drivers/tty/serial/fsl_lpuart.c
index 91e2805..c31b8f3 100644
--- a/drivers/tty/serial/fsl_lpuart.c
+++ b/drivers/tty/serial/fsl_lpuart.c
@@ -264,6 +264,7 @@ struct lpuart_port {
int rx_dma_rng_buf_len;
unsigned int dma_tx_nents;
wait_queue_head_t dma_wait;
+ bool id_allocated;
};
struct lpuart_soc_data {
@@ -2390,6 +2391,8 @@ static int __init lpuart32_imx_early_console_setup(struct earlycon_device *devic
OF_EARLYCON_DECLARE(lpuart, "fsl,vf610-lpuart", lpuart_early_console_setup);
OF_EARLYCON_DECLARE(lpuart32, "fsl,ls1021a-lpuart", lpuart32_early_console_setup);
OF_EARLYCON_DECLARE(lpuart32, "fsl,imx7ulp-lpuart", lpuart32_imx_early_console_setup);
+EARLYCON_DECLARE(lpuart, lpuart_early_console_setup);
+EARLYCON_DECLARE(lpuart32, lpuart32_early_console_setup);
#define LPUART_CONSOLE (&lpuart_console)
#define LPUART32_CONSOLE (&lpuart32_console)
@@ -2420,19 +2423,6 @@ static int lpuart_probe(struct platform_device *pdev)
if (!sport)
return -ENOMEM;
- ret = of_alias_get_id(np, "serial");
- if (ret < 0) {
- ret = ida_simple_get(&fsl_lpuart_ida, 0, UART_NR, GFP_KERNEL);
- if (ret < 0) {
- dev_err(&pdev->dev, "port line is full, add device failed\n");
- return ret;
- }
- }
- if (ret >= ARRAY_SIZE(lpuart_ports)) {
- dev_err(&pdev->dev, "serial%d out of range\n", ret);
- return -EINVAL;
- }
- sport->port.line = ret;
res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
sport->port.membase = devm_ioremap_resource(&pdev->dev, res);
if (IS_ERR(sport->port.membase))
@@ -2477,9 +2467,25 @@ static int lpuart_probe(struct platform_device *pdev)
}
}
+ ret = of_alias_get_id(np, "serial");
+ if (ret < 0) {
+ ret = ida_simple_get(&fsl_lpuart_ida, 0, UART_NR, GFP_KERNEL);
+ if (ret < 0) {
+ dev_err(&pdev->dev, "port line is full, add device failed\n");
+ return ret;
+ }
+ sport->id_allocated = true;
+ }
+ if (ret >= ARRAY_SIZE(lpuart_ports)) {
+ dev_err(&pdev->dev, "serial%d out of range\n", ret);
+ ret = -EINVAL;
+ goto failed_out_of_range;
+ }
+ sport->port.line = ret;
+
ret = lpuart_enable_clks(sport);
if (ret)
- return ret;
+ goto failed_clock_enable;
sport->port.uartclk = lpuart_get_baud_clk_rate(sport);
lpuart_ports[sport->port.line] = sport;
@@ -2529,6 +2535,10 @@ static int lpuart_probe(struct platform_device *pdev)
failed_attach_port:
failed_irq_request:
lpuart_disable_clks(sport);
+failed_clock_enable:
+failed_out_of_range:
+ if (sport->id_allocated)
+ ida_simple_remove(&fsl_lpuart_ida, sport->port.line);
return ret;
}
@@ -2538,7 +2548,8 @@ static int lpuart_remove(struct platform_device *pdev)
uart_remove_one_port(&lpuart_reg, &sport->port);
- ida_simple_remove(&fsl_lpuart_ida, sport->port.line);
+ if (sport->id_allocated)
+ ida_simple_remove(&fsl_lpuart_ida, sport->port.line);
lpuart_disable_clks(sport);
diff --git a/drivers/tty/serial/imx.c b/drivers/tty/serial/imx.c
index 0c6c631..d337782 100644
--- a/drivers/tty/serial/imx.c
+++ b/drivers/tty/serial/imx.c
@@ -599,7 +599,7 @@ static void imx_uart_dma_tx(struct imx_port *sport)
sport->tx_bytes = uart_circ_chars_pending(xmit);
- if (xmit->tail < xmit->head) {
+ if (xmit->tail < xmit->head || xmit->head == 0) {
sport->dma_tx_nents = 1;
sg_init_one(sgl, xmit->buf + xmit->tail, sport->tx_bytes);
} else {
diff --git a/drivers/tty/serial/mvebu-uart.c b/drivers/tty/serial/mvebu-uart.c
index c12a125..4e9a590 100644
--- a/drivers/tty/serial/mvebu-uart.c
+++ b/drivers/tty/serial/mvebu-uart.c
@@ -851,7 +851,7 @@ static int mvebu_uart_probe(struct platform_device *pdev)
port->membase = devm_ioremap_resource(&pdev->dev, reg);
if (IS_ERR(port->membase))
- return -PTR_ERR(port->membase);
+ return PTR_ERR(port->membase);
mvuart = devm_kzalloc(&pdev->dev, sizeof(struct mvebu_uart),
GFP_KERNEL);
diff --git a/drivers/tty/serial/qcom_geni_serial.c b/drivers/tty/serial/qcom_geni_serial.c
index 191abb1..0bd1684 100644
--- a/drivers/tty/serial/qcom_geni_serial.c
+++ b/drivers/tty/serial/qcom_geni_serial.c
@@ -129,6 +129,7 @@ static int handle_rx_console(struct uart_port *uport, u32 bytes, bool drop);
static int handle_rx_uart(struct uart_port *uport, u32 bytes, bool drop);
static unsigned int qcom_geni_serial_tx_empty(struct uart_port *port);
static void qcom_geni_serial_stop_rx(struct uart_port *uport);
+static void qcom_geni_serial_handle_rx(struct uart_port *uport, bool drop);
static const unsigned long root_freq[] = {7372800, 14745600, 19200000, 29491200,
32000000, 48000000, 64000000, 80000000,
@@ -599,7 +600,7 @@ static void qcom_geni_serial_stop_rx(struct uart_port *uport)
u32 irq_en;
u32 status;
struct qcom_geni_serial_port *port = to_dev_port(uport, uport);
- u32 irq_clear = S_CMD_DONE_EN;
+ u32 s_irq_status;
irq_en = readl(uport->membase + SE_GENI_S_IRQ_EN);
irq_en &= ~(S_RX_FIFO_WATERMARK_EN | S_RX_FIFO_LAST_EN);
@@ -615,10 +616,19 @@ static void qcom_geni_serial_stop_rx(struct uart_port *uport)
return;
geni_se_cancel_s_cmd(&port->se);
- qcom_geni_serial_poll_bit(uport, SE_GENI_S_CMD_CTRL_REG,
- S_GENI_CMD_CANCEL, false);
+ qcom_geni_serial_poll_bit(uport, SE_GENI_S_IRQ_STATUS,
+ S_CMD_CANCEL_EN, true);
+ /*
+ * If timeout occurs secondary engine remains active
+ * and Abort sequence is executed.
+ */
+ s_irq_status = readl(uport->membase + SE_GENI_S_IRQ_STATUS);
+ /* Flush the Rx buffer */
+ if (s_irq_status & S_RX_FIFO_LAST_EN)
+ qcom_geni_serial_handle_rx(uport, true);
+ writel(s_irq_status, uport->membase + SE_GENI_S_IRQ_CLEAR);
+
status = readl(uport->membase + SE_GENI_STATUS);
- writel(irq_clear, uport->membase + SE_GENI_S_IRQ_CLEAR);
if (status & S_GENI_CMD_ACTIVE)
qcom_geni_serial_abort_rx(uport);
}
diff --git a/drivers/tty/serial/serial-tegra.c b/drivers/tty/serial/serial-tegra.c
index 33034b8..8de8bac 100644
--- a/drivers/tty/serial/serial-tegra.c
+++ b/drivers/tty/serial/serial-tegra.c
@@ -692,11 +692,22 @@ static void tegra_uart_copy_rx_to_tty(struct tegra_uart_port *tup,
count, DMA_TO_DEVICE);
}
+static void do_handle_rx_pio(struct tegra_uart_port *tup)
+{
+ struct tty_struct *tty = tty_port_tty_get(&tup->uport.state->port);
+ struct tty_port *port = &tup->uport.state->port;
+
+ tegra_uart_handle_rx_pio(tup, port);
+ if (tty) {
+ tty_flip_buffer_push(port);
+ tty_kref_put(tty);
+ }
+}
+
static void tegra_uart_rx_buffer_push(struct tegra_uart_port *tup,
unsigned int residue)
{
struct tty_port *port = &tup->uport.state->port;
- struct tty_struct *tty = tty_port_tty_get(port);
unsigned int count;
async_tx_ack(tup->rx_dma_desc);
@@ -705,11 +716,7 @@ static void tegra_uart_rx_buffer_push(struct tegra_uart_port *tup,
/* If we are here, DMA is stopped */
tegra_uart_copy_rx_to_tty(tup, port, count);
- tegra_uart_handle_rx_pio(tup, port);
- if (tty) {
- tty_flip_buffer_push(port);
- tty_kref_put(tty);
- }
+ do_handle_rx_pio(tup);
}
static void tegra_uart_rx_dma_complete(void *args)
@@ -749,8 +756,10 @@ static void tegra_uart_terminate_rx_dma(struct tegra_uart_port *tup)
{
struct dma_tx_state state;
- if (!tup->rx_dma_active)
+ if (!tup->rx_dma_active) {
+ do_handle_rx_pio(tup);
return;
+ }
dmaengine_terminate_all(tup->rx_dma_chan);
dmaengine_tx_status(tup->rx_dma_chan, tup->rx_cookie, &state);
@@ -816,18 +825,6 @@ static void tegra_uart_handle_modem_signal_change(struct uart_port *u)
uart_handle_cts_change(&tup->uport, msr & UART_MSR_CTS);
}
-static void do_handle_rx_pio(struct tegra_uart_port *tup)
-{
- struct tty_struct *tty = tty_port_tty_get(&tup->uport.state->port);
- struct tty_port *port = &tup->uport.state->port;
-
- tegra_uart_handle_rx_pio(tup, port);
- if (tty) {
- tty_flip_buffer_push(port);
- tty_kref_put(tty);
- }
-}
-
static irqreturn_t tegra_uart_isr(int irq, void *data)
{
struct tegra_uart_port *tup = data;
diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c
index a1453fe..5a6f36b 100644
--- a/drivers/tty/tty_io.c
+++ b/drivers/tty/tty_io.c
@@ -1589,9 +1589,7 @@ void tty_kclose(struct tty_struct *tty)
tty_debug_hangup(tty, "freeing structure\n");
/*
* The release_tty function takes care of the details of clearing
- * the slots and preserving the termios structure. The tty_unlock_pair
- * should be safe as we keep a kref while the tty is locked (so the
- * unlock never unlocks a freed tty).
+ * the slots and preserving the termios structure.
*/
mutex_lock(&tty_mutex);
tty_port_set_kopened(tty->port, 0);
@@ -1621,9 +1619,7 @@ void tty_release_struct(struct tty_struct *tty, int idx)
tty_debug_hangup(tty, "freeing structure\n");
/*
* The release_tty function takes care of the details of clearing
- * the slots and preserving the termios structure. The tty_unlock_pair
- * should be safe as we keep a kref while the tty is locked (so the
- * unlock never unlocks a freed tty).
+ * the slots and preserving the termios structure.
*/
mutex_lock(&tty_mutex);
release_tty(tty, idx);
@@ -2734,9 +2730,11 @@ static int compat_tty_tiocgserial(struct tty_struct *tty,
struct serial_struct32 v32;
struct serial_struct v;
int err;
- memset(&v, 0, sizeof(struct serial_struct));
- if (!tty->ops->set_serial)
+ memset(&v, 0, sizeof(v));
+ memset(&v32, 0, sizeof(v32));
+
+ if (!tty->ops->get_serial)
return -ENOTTY;
err = tty->ops->get_serial(tty, &v);
if (!err) {
diff --git a/drivers/tty/tty_port.c b/drivers/tty/tty_port.c
index 044c3cb..ea80bf8 100644
--- a/drivers/tty/tty_port.c
+++ b/drivers/tty/tty_port.c
@@ -52,10 +52,11 @@ static void tty_port_default_wakeup(struct tty_port *port)
}
}
-static const struct tty_port_client_operations default_client_ops = {
+const struct tty_port_client_operations tty_port_default_client_ops = {
.receive_buf = tty_port_default_receive_buf,
.write_wakeup = tty_port_default_wakeup,
};
+EXPORT_SYMBOL_GPL(tty_port_default_client_ops);
void tty_port_init(struct tty_port *port)
{
@@ -68,7 +69,7 @@ void tty_port_init(struct tty_port *port)
spin_lock_init(&port->lock);
port->close_delay = (50 * HZ) / 100;
port->closing_wait = (3000 * HZ) / 100;
- port->client_ops = &default_client_ops;
+ port->client_ops = &tty_port_default_client_ops;
kref_init(&port->kref);
}
EXPORT_SYMBOL(tty_port_init);
diff --git a/drivers/tty/vt/selection.c b/drivers/tty/vt/selection.c
index 78732fe..d7d2e4b8 100644
--- a/drivers/tty/vt/selection.c
+++ b/drivers/tty/vt/selection.c
@@ -16,6 +16,7 @@
#include <linux/tty.h>
#include <linux/sched.h>
#include <linux/mm.h>
+#include <linux/mutex.h>
#include <linux/slab.h>
#include <linux/types.h>
@@ -29,6 +30,8 @@
#include <linux/console.h>
#include <linux/tty_flip.h>
+#include <linux/sched/signal.h>
+
/* Don't take this from <ctype.h>: 011-015 on the screen aren't spaces */
#define isspace(c) ((c) == ' ')
@@ -43,6 +46,7 @@ static volatile int sel_start = -1; /* cleared by clear_selection */
static int sel_end;
static int sel_buffer_lth;
static char *sel_buffer;
+static DEFINE_MUTEX(sel_lock);
/* clear_selection, highlight and highlight_pointer can be called
from interrupt (via scrollback/front) */
@@ -177,14 +181,14 @@ int set_selection_user(const struct tiocl_selection __user *sel,
return set_selection_kernel(&v, tty);
}
-int set_selection_kernel(struct tiocl_selection *v, struct tty_struct *tty)
+static int __set_selection_kernel(struct tiocl_selection *v, struct tty_struct *tty)
{
struct vc_data *vc = vc_cons[fg_console].d;
int new_sel_start, new_sel_end, spc;
char *bp, *obp;
int i, ps, pe, multiplier;
u32 c;
- int mode;
+ int mode, ret = 0;
poke_blanked_console();
@@ -332,7 +336,21 @@ int set_selection_kernel(struct tiocl_selection *v, struct tty_struct *tty)
}
}
sel_buffer_lth = bp - sel_buffer;
- return 0;
+
+ return ret;
+}
+
+int set_selection_kernel(struct tiocl_selection *v, struct tty_struct *tty)
+{
+ int ret;
+
+ mutex_lock(&sel_lock);
+ console_lock();
+ ret = __set_selection_kernel(v, tty);
+ console_unlock();
+ mutex_unlock(&sel_lock);
+
+ return ret;
}
EXPORT_SYMBOL_GPL(set_selection_kernel);
@@ -350,6 +368,7 @@ int paste_selection(struct tty_struct *tty)
unsigned int count;
struct tty_ldisc *ld;
DECLARE_WAITQUEUE(wait, current);
+ int ret = 0;
console_lock();
poke_blanked_console();
@@ -361,10 +380,17 @@ int paste_selection(struct tty_struct *tty)
tty_buffer_lock_exclusive(&vc->port);
add_wait_queue(&vc->paste_wait, &wait);
+ mutex_lock(&sel_lock);
while (sel_buffer && sel_buffer_lth > pasted) {
set_current_state(TASK_INTERRUPTIBLE);
+ if (signal_pending(current)) {
+ ret = -EINTR;
+ break;
+ }
if (tty_throttled(tty)) {
+ mutex_unlock(&sel_lock);
schedule();
+ mutex_lock(&sel_lock);
continue;
}
__set_current_state(TASK_RUNNING);
@@ -373,11 +399,12 @@ int paste_selection(struct tty_struct *tty)
count);
pasted += count;
}
+ mutex_unlock(&sel_lock);
remove_wait_queue(&vc->paste_wait, &wait);
__set_current_state(TASK_RUNNING);
tty_buffer_unlock_exclusive(&vc->port);
tty_ldisc_deref(ld);
- return 0;
+ return ret;
}
EXPORT_SYMBOL_GPL(paste_selection);
diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c
index 35d21cd..15d2769 100644
--- a/drivers/tty/vt/vt.c
+++ b/drivers/tty/vt/vt.c
@@ -936,10 +936,21 @@ static void flush_scrollback(struct vc_data *vc)
WARN_CONSOLE_UNLOCKED();
set_origin(vc);
- if (vc->vc_sw->con_flush_scrollback)
+ if (vc->vc_sw->con_flush_scrollback) {
vc->vc_sw->con_flush_scrollback(vc);
- else
+ } else if (con_is_visible(vc)) {
+ /*
+ * When no con_flush_scrollback method is provided then the
+ * legacy way for flushing the scrollback buffer is to use
+ * a side effect of the con_switch method. We do it only on
+ * the foreground console as background consoles have no
+ * scrollback buffers in that case and we obviously don't
+ * want to switch to them.
+ */
+ hide_cursor(vc);
vc->vc_sw->con_switch(vc);
+ set_cursor(vc);
+ }
}
/*
@@ -3035,10 +3046,8 @@ int tioclinux(struct tty_struct *tty, unsigned long arg)
switch (type)
{
case TIOCL_SETSEL:
- console_lock();
ret = set_selection_user((struct tiocl_selection
__user *)(p+1), tty);
- console_unlock();
break;
case TIOCL_PASTESEL:
ret = paste_selection(tty);
diff --git a/drivers/tty/vt/vt_ioctl.c b/drivers/tty/vt/vt_ioctl.c
index 8b0ed13..ee6c91e 100644
--- a/drivers/tty/vt/vt_ioctl.c
+++ b/drivers/tty/vt/vt_ioctl.c
@@ -876,15 +876,20 @@ int vt_ioctl(struct tty_struct *tty,
return -EINVAL;
for (i = 0; i < MAX_NR_CONSOLES; i++) {
+ struct vc_data *vcp;
+
if (!vc_cons[i].d)
continue;
console_lock();
- if (v.v_vlin)
- vc_cons[i].d->vc_scan_lines = v.v_vlin;
- if (v.v_clin)
- vc_cons[i].d->vc_font.height = v.v_clin;
- vc_cons[i].d->vc_resize_user = 1;
- vc_resize(vc_cons[i].d, v.v_cols, v.v_rows);
+ vcp = vc_cons[i].d;
+ if (vcp) {
+ if (v.v_vlin)
+ vcp->vc_scan_lines = v.v_vlin;
+ if (v.v_clin)
+ vcp->vc_font.height = v.v_clin;
+ vcp->vc_resize_user = 1;
+ vc_resize(vcp, v.v_cols, v.v_rows);
+ }
console_unlock();
}
break;
diff --git a/drivers/usb/cdns3/gadget.c b/drivers/usb/cdns3/gadget.c
index 736b0c6..3574dbb 100644
--- a/drivers/usb/cdns3/gadget.c
+++ b/drivers/usb/cdns3/gadget.c
@@ -2550,7 +2550,7 @@ int cdns3_gadget_ep_dequeue(struct usb_ep *ep,
/* Update ring only if removed request is on pending_req_list list */
if (req_on_hw_ring) {
link_trb->buffer = TRB_BUFFER(priv_ep->trb_pool_dma +
- (priv_req->start_trb * TRB_SIZE));
+ ((priv_req->end_trb + 1) * TRB_SIZE));
link_trb->control = (link_trb->control & TRB_CYCLE) |
TRB_TYPE(TRB_LINK) | TRB_CHAIN;
@@ -2595,11 +2595,21 @@ int __cdns3_gadget_ep_clear_halt(struct cdns3_endpoint *priv_ep)
{
struct cdns3_device *priv_dev = priv_ep->cdns3_dev;
struct usb_request *request;
+ struct cdns3_request *priv_req;
+ struct cdns3_trb *trb = NULL;
int ret;
int val;
trace_cdns3_halt(priv_ep, 0, 0);
+ request = cdns3_next_request(&priv_ep->pending_req_list);
+ if (request) {
+ priv_req = to_cdns3_request(request);
+ trb = priv_req->trb;
+ if (trb)
+ trb->control = trb->control ^ TRB_CYCLE;
+ }
+
writel(EP_CMD_CSTALL | EP_CMD_EPRST, &priv_dev->regs->ep_cmd);
/* wait for EPRST cleared */
@@ -2610,10 +2620,11 @@ int __cdns3_gadget_ep_clear_halt(struct cdns3_endpoint *priv_ep)
priv_ep->flags &= ~(EP_STALLED | EP_STALL_PENDING);
- request = cdns3_next_request(&priv_ep->pending_req_list);
-
- if (request)
+ if (request) {
+ if (trb)
+ trb->control = trb->control ^ TRB_CYCLE;
cdns3_rearm_transfer(priv_ep, 1);
+ }
cdns3_start_all_request(priv_dev, priv_ep);
return ret;
diff --git a/drivers/usb/chipidea/udc.c b/drivers/usb/chipidea/udc.c
index ffaf46f..4c4ac30 100644
--- a/drivers/usb/chipidea/udc.c
+++ b/drivers/usb/chipidea/udc.c
@@ -1530,18 +1530,19 @@ static const struct usb_ep_ops usb_ep_ops = {
static void ci_hdrc_gadget_connect(struct usb_gadget *_gadget, int is_active)
{
struct ci_hdrc *ci = container_of(_gadget, struct ci_hdrc, gadget);
- unsigned long flags;
if (is_active) {
pm_runtime_get_sync(&_gadget->dev);
hw_device_reset(ci);
- spin_lock_irqsave(&ci->lock, flags);
+ spin_lock_irq(&ci->lock);
if (ci->driver) {
hw_device_state(ci, ci->ep0out->qh.dma);
usb_gadget_set_state(_gadget, USB_STATE_POWERED);
+ spin_unlock_irq(&ci->lock);
usb_udc_vbus_handler(_gadget, true);
+ } else {
+ spin_unlock_irq(&ci->lock);
}
- spin_unlock_irqrestore(&ci->lock, flags);
} else {
usb_udc_vbus_handler(_gadget, false);
if (ci->driver)
diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c
index 62f4fb9..47f09a6 100644
--- a/drivers/usb/class/cdc-acm.c
+++ b/drivers/usb/class/cdc-acm.c
@@ -896,10 +896,10 @@ static int get_serial_info(struct tty_struct *tty, struct serial_struct *ss)
ss->xmit_fifo_size = acm->writesize;
ss->baud_base = le32_to_cpu(acm->line.dwDTERate);
- ss->close_delay = acm->port.close_delay / 10;
+ ss->close_delay = jiffies_to_msecs(acm->port.close_delay) / 10;
ss->closing_wait = acm->port.closing_wait == ASYNC_CLOSING_WAIT_NONE ?
ASYNC_CLOSING_WAIT_NONE :
- acm->port.closing_wait / 10;
+ jiffies_to_msecs(acm->port.closing_wait) / 10;
return 0;
}
@@ -907,24 +907,32 @@ static int set_serial_info(struct tty_struct *tty, struct serial_struct *ss)
{
struct acm *acm = tty->driver_data;
unsigned int closing_wait, close_delay;
+ unsigned int old_closing_wait, old_close_delay;
int retval = 0;
- close_delay = ss->close_delay * 10;
+ close_delay = msecs_to_jiffies(ss->close_delay * 10);
closing_wait = ss->closing_wait == ASYNC_CLOSING_WAIT_NONE ?
- ASYNC_CLOSING_WAIT_NONE : ss->closing_wait * 10;
+ ASYNC_CLOSING_WAIT_NONE :
+ msecs_to_jiffies(ss->closing_wait * 10);
+
+ /* we must redo the rounding here, so that the values match */
+ old_close_delay = jiffies_to_msecs(acm->port.close_delay) / 10;
+ old_closing_wait = acm->port.closing_wait == ASYNC_CLOSING_WAIT_NONE ?
+ ASYNC_CLOSING_WAIT_NONE :
+ jiffies_to_msecs(acm->port.closing_wait) / 10;
mutex_lock(&acm->port.mutex);
- if (!capable(CAP_SYS_ADMIN)) {
- if ((close_delay != acm->port.close_delay) ||
- (closing_wait != acm->port.closing_wait))
+ if ((ss->close_delay != old_close_delay) ||
+ (ss->closing_wait != old_closing_wait)) {
+ if (!capable(CAP_SYS_ADMIN))
retval = -EPERM;
- else
- retval = -EOPNOTSUPP;
- } else {
- acm->port.close_delay = close_delay;
- acm->port.closing_wait = closing_wait;
- }
+ else {
+ acm->port.close_delay = close_delay;
+ acm->port.closing_wait = closing_wait;
+ }
+ } else
+ retval = -EOPNOTSUPP;
mutex_unlock(&acm->port.mutex);
return retval;
diff --git a/drivers/usb/core/config.c b/drivers/usb/core/config.c
index 26bc05e..b7918f6 100644
--- a/drivers/usb/core/config.c
+++ b/drivers/usb/core/config.c
@@ -256,6 +256,7 @@ static int usb_parse_endpoint(struct device *ddev, int cfgno,
struct usb_host_interface *ifp, int num_ep,
unsigned char *buffer, int size)
{
+ struct usb_device *udev = to_usb_device(ddev);
unsigned char *buffer0 = buffer;
struct usb_endpoint_descriptor *d;
struct usb_host_endpoint *endpoint;
@@ -297,6 +298,16 @@ static int usb_parse_endpoint(struct device *ddev, int cfgno,
goto skip_to_next_endpoint_or_interface_descriptor;
}
+ /* Ignore blacklisted endpoints */
+ if (udev->quirks & USB_QUIRK_ENDPOINT_BLACKLIST) {
+ if (usb_endpoint_is_blacklisted(udev, ifp, d)) {
+ dev_warn(ddev, "config %d interface %d altsetting %d has a blacklisted endpoint with address 0x%X, skipping\n",
+ cfgno, inum, asnum,
+ d->bEndpointAddress);
+ goto skip_to_next_endpoint_or_interface_descriptor;
+ }
+ }
+
endpoint = &ifp->endpoint[ifp->desc.bNumEndpoints];
++ifp->desc.bNumEndpoints;
@@ -311,7 +322,7 @@ static int usb_parse_endpoint(struct device *ddev, int cfgno,
j = 255;
if (usb_endpoint_xfer_int(d)) {
i = 1;
- switch (to_usb_device(ddev)->speed) {
+ switch (udev->speed) {
case USB_SPEED_SUPER_PLUS:
case USB_SPEED_SUPER:
case USB_SPEED_HIGH:
@@ -332,8 +343,7 @@ static int usb_parse_endpoint(struct device *ddev, int cfgno,
/*
* This quirk fixes bIntervals reported in ms.
*/
- if (to_usb_device(ddev)->quirks &
- USB_QUIRK_LINEAR_FRAME_INTR_BINTERVAL) {
+ if (udev->quirks & USB_QUIRK_LINEAR_FRAME_INTR_BINTERVAL) {
n = clamp(fls(d->bInterval) + 3, i, j);
i = j = n;
}
@@ -341,8 +351,7 @@ static int usb_parse_endpoint(struct device *ddev, int cfgno,
* This quirk fixes bIntervals reported in
* linear microframes.
*/
- if (to_usb_device(ddev)->quirks &
- USB_QUIRK_LINEAR_UFRAME_INTR_BINTERVAL) {
+ if (udev->quirks & USB_QUIRK_LINEAR_UFRAME_INTR_BINTERVAL) {
n = clamp(fls(d->bInterval), i, j);
i = j = n;
}
@@ -359,7 +368,7 @@ static int usb_parse_endpoint(struct device *ddev, int cfgno,
} else if (usb_endpoint_xfer_isoc(d)) {
i = 1;
j = 16;
- switch (to_usb_device(ddev)->speed) {
+ switch (udev->speed) {
case USB_SPEED_HIGH:
n = 7; /* 8 ms = 2^(7-1) uframes */
break;
@@ -381,8 +390,7 @@ static int usb_parse_endpoint(struct device *ddev, int cfgno,
* explicitly forbidden by the USB spec. In an attempt to make
* them usable, we will try treating them as Interrupt endpoints.
*/
- if (to_usb_device(ddev)->speed == USB_SPEED_LOW &&
- usb_endpoint_xfer_bulk(d)) {
+ if (udev->speed == USB_SPEED_LOW && usb_endpoint_xfer_bulk(d)) {
dev_warn(ddev, "config %d interface %d altsetting %d "
"endpoint 0x%X is Bulk; changing to Interrupt\n",
cfgno, inum, asnum, d->bEndpointAddress);
@@ -406,7 +414,7 @@ static int usb_parse_endpoint(struct device *ddev, int cfgno,
/* Find the highest legal maxpacket size for this endpoint */
i = 0; /* additional transactions per microframe */
- switch (to_usb_device(ddev)->speed) {
+ switch (udev->speed) {
case USB_SPEED_LOW:
maxpacket_maxes = low_speed_maxpacket_maxes;
break;
@@ -442,8 +450,7 @@ static int usb_parse_endpoint(struct device *ddev, int cfgno,
* maxpacket sizes other than 512. High speed HCDs may not
* be able to handle that particular bug, so let's warn...
*/
- if (to_usb_device(ddev)->speed == USB_SPEED_HIGH
- && usb_endpoint_xfer_bulk(d)) {
+ if (udev->speed == USB_SPEED_HIGH && usb_endpoint_xfer_bulk(d)) {
if (maxp != 512)
dev_warn(ddev, "config %d interface %d altsetting %d "
"bulk endpoint 0x%X has invalid maxpacket %d\n",
@@ -452,7 +459,7 @@ static int usb_parse_endpoint(struct device *ddev, int cfgno,
}
/* Parse a possible SuperSpeed endpoint companion descriptor */
- if (to_usb_device(ddev)->speed >= USB_SPEED_SUPER)
+ if (udev->speed >= USB_SPEED_SUPER)
usb_parse_ss_endpoint_companion(ddev, cfgno,
inum, asnum, endpoint, buffer, size);
diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index 3405b14..54cd8ef 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -38,7 +38,9 @@
#include "otg_whitelist.h"
#define USB_VENDOR_GENESYS_LOGIC 0x05e3
+#define USB_VENDOR_SMSC 0x0424
#define HUB_QUIRK_CHECK_PORT_AUTOSUSPEND 0x01
+#define HUB_QUIRK_DISABLE_AUTOSUSPEND 0x02
#define USB_TP_TRANSMISSION_DELAY 40 /* ns */
#define USB_TP_TRANSMISSION_DELAY_MAX 65535 /* ns */
@@ -986,13 +988,17 @@ int usb_remove_device(struct usb_device *udev)
{
struct usb_hub *hub;
struct usb_interface *intf;
+ int ret;
if (!udev->parent) /* Can't remove a root hub */
return -EINVAL;
hub = usb_hub_to_struct_hub(udev->parent);
intf = to_usb_interface(hub->intfdev);
- usb_autopm_get_interface(intf);
+ ret = usb_autopm_get_interface(intf);
+ if (ret < 0)
+ return ret;
+
set_bit(udev->portnum, hub->removed_bits);
hub_port_logical_disconnect(hub, udev->portnum);
usb_autopm_put_interface(intf);
@@ -1217,11 +1223,6 @@ static void hub_activate(struct usb_hub *hub, enum hub_activation_type type)
#ifdef CONFIG_PM
udev->reset_resume = 1;
#endif
- /* Don't set the change_bits when the device
- * was powered off.
- */
- if (test_bit(port1, hub->power_bits))
- set_bit(port1, hub->change_bits);
} else {
/* The power session is gone; tell hub_wq */
@@ -1731,6 +1732,10 @@ static void hub_disconnect(struct usb_interface *intf)
kfree(hub->buffer);
pm_suspend_ignore_children(&intf->dev, false);
+
+ if (hub->quirk_disable_autosuspend)
+ usb_autopm_put_interface(intf);
+
kref_put(&hub->kref, hub_release);
}
@@ -1863,6 +1868,11 @@ static int hub_probe(struct usb_interface *intf, const struct usb_device_id *id)
if (id->driver_info & HUB_QUIRK_CHECK_PORT_AUTOSUSPEND)
hub->quirk_check_port_auto_suspend = 1;
+ if (id->driver_info & HUB_QUIRK_DISABLE_AUTOSUSPEND) {
+ hub->quirk_disable_autosuspend = 1;
+ usb_autopm_get_interface_no_resume(intf);
+ }
+
if (hub_configure(hub, &desc->endpoint[0].desc) >= 0)
return 0;
@@ -5599,6 +5609,10 @@ static void hub_event(struct work_struct *work)
}
static const struct usb_device_id hub_id_table[] = {
+ { .match_flags = USB_DEVICE_ID_MATCH_VENDOR | USB_DEVICE_ID_MATCH_INT_CLASS,
+ .idVendor = USB_VENDOR_SMSC,
+ .bInterfaceClass = USB_CLASS_HUB,
+ .driver_info = HUB_QUIRK_DISABLE_AUTOSUSPEND},
{ .match_flags = USB_DEVICE_ID_MATCH_VENDOR
| USB_DEVICE_ID_MATCH_INT_CLASS,
.idVendor = USB_VENDOR_GENESYS_LOGIC,
diff --git a/drivers/usb/core/hub.h b/drivers/usb/core/hub.h
index a9e24e4..a97dd1b 100644
--- a/drivers/usb/core/hub.h
+++ b/drivers/usb/core/hub.h
@@ -61,6 +61,7 @@ struct usb_hub {
unsigned quiescing:1;
unsigned disconnected:1;
unsigned in_reset:1;
+ unsigned quirk_disable_autosuspend:1;
unsigned quirk_check_port_auto_suspend:1;
diff --git a/drivers/usb/core/port.c b/drivers/usb/core/port.c
index bbbb35f..235a7c6 100644
--- a/drivers/usb/core/port.c
+++ b/drivers/usb/core/port.c
@@ -213,7 +213,10 @@ static int usb_port_runtime_resume(struct device *dev)
if (!port_dev->is_superspeed && peer)
pm_runtime_get_sync(&peer->dev);
- usb_autopm_get_interface(intf);
+ retval = usb_autopm_get_interface(intf);
+ if (retval < 0)
+ return retval;
+
retval = usb_hub_set_port_power(hdev, hub, port1, true);
msleep(hub_power_on_good_delay(hub));
if (udev && !retval) {
@@ -266,7 +269,10 @@ static int usb_port_runtime_suspend(struct device *dev)
if (usb_port_block_power_off)
return -EBUSY;
- usb_autopm_get_interface(intf);
+ retval = usb_autopm_get_interface(intf);
+ if (retval < 0)
+ return retval;
+
retval = usb_hub_set_port_power(hdev, hub, port1, false);
usb_clear_port_feature(hdev, port1, USB_PORT_FEAT_C_CONNECTION);
if (!port_dev->is_superspeed)
diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c
index 6b64130..da30b56 100644
--- a/drivers/usb/core/quirks.c
+++ b/drivers/usb/core/quirks.c
@@ -231,6 +231,9 @@ static const struct usb_device_id usb_quirk_list[] = {
/* Logitech PTZ Pro Camera */
{ USB_DEVICE(0x046d, 0x0853), .driver_info = USB_QUIRK_DELAY_INIT },
+ /* Logitech Screen Share */
+ { USB_DEVICE(0x046d, 0x086c), .driver_info = USB_QUIRK_NO_LPM },
+
/* Logitech Quickcam Fusion */
{ USB_DEVICE(0x046d, 0x08c1), .driver_info = USB_QUIRK_RESET_RESUME },
@@ -354,6 +357,10 @@ static const struct usb_device_id usb_quirk_list[] = {
{ USB_DEVICE(0x0904, 0x6103), .driver_info =
USB_QUIRK_LINEAR_FRAME_INTR_BINTERVAL },
+ /* Sound Devices USBPre2 */
+ { USB_DEVICE(0x0926, 0x0202), .driver_info =
+ USB_QUIRK_ENDPOINT_BLACKLIST },
+
/* Keytouch QWERTY Panel keyboard */
{ USB_DEVICE(0x0926, 0x3333), .driver_info =
USB_QUIRK_CONFIG_INTF_STRINGS },
@@ -371,6 +378,12 @@ static const struct usb_device_id usb_quirk_list[] = {
{ USB_DEVICE(0x0b05, 0x17e0), .driver_info =
USB_QUIRK_IGNORE_REMOTE_WAKEUP },
+ /* Realtek hub in Dell WD19 (Type-C) */
+ { USB_DEVICE(0x0bda, 0x0487), .driver_info = USB_QUIRK_NO_LPM },
+
+ /* Generic RTL8153 based ethernet adapters */
+ { USB_DEVICE(0x0bda, 0x8153), .driver_info = USB_QUIRK_NO_LPM },
+
/* Action Semiconductor flash disk */
{ USB_DEVICE(0x10d6, 0x2200), .driver_info =
USB_QUIRK_STRING_FETCH_255 },
@@ -445,6 +458,9 @@ static const struct usb_device_id usb_quirk_list[] = {
/* INTEL VALUE SSD */
{ USB_DEVICE(0x8086, 0xf1a5), .driver_info = USB_QUIRK_RESET_RESUME },
+ /* novation SoundControl XL */
+ { USB_DEVICE(0x1235, 0x0061), .driver_info = USB_QUIRK_RESET_RESUME },
+
{ } /* terminating entry must be last */
};
@@ -472,6 +488,39 @@ static const struct usb_device_id usb_amd_resume_quirk_list[] = {
{ } /* terminating entry must be last */
};
+/*
+ * Entries for blacklisted endpoints that should be ignored when parsing
+ * configuration descriptors.
+ *
+ * Matched for devices with USB_QUIRK_ENDPOINT_BLACKLIST.
+ */
+static const struct usb_device_id usb_endpoint_blacklist[] = {
+ { USB_DEVICE_INTERFACE_NUMBER(0x0926, 0x0202, 1), .driver_info = 0x85 },
+ { }
+};
+
+bool usb_endpoint_is_blacklisted(struct usb_device *udev,
+ struct usb_host_interface *intf,
+ struct usb_endpoint_descriptor *epd)
+{
+ const struct usb_device_id *id;
+ unsigned int address;
+
+ for (id = usb_endpoint_blacklist; id->match_flags; ++id) {
+ if (!usb_match_device(udev, id))
+ continue;
+
+ if (!usb_match_one_id_intf(udev, intf, id))
+ continue;
+
+ address = id->driver_info;
+ if (address == epd->bEndpointAddress)
+ return true;
+ }
+
+ return false;
+}
+
static bool usb_match_any_interface(struct usb_device *udev,
const struct usb_device_id *id)
{
diff --git a/drivers/usb/core/usb.h b/drivers/usb/core/usb.h
index cf4783c..3ad0ee5 100644
--- a/drivers/usb/core/usb.h
+++ b/drivers/usb/core/usb.h
@@ -37,6 +37,9 @@ extern void usb_authorize_interface(struct usb_interface *);
extern void usb_detect_quirks(struct usb_device *udev);
extern void usb_detect_interface_quirks(struct usb_device *udev);
extern void usb_release_quirk_list(void);
+extern bool usb_endpoint_is_blacklisted(struct usb_device *udev,
+ struct usb_host_interface *intf,
+ struct usb_endpoint_descriptor *epd);
extern int usb_remove_device(struct usb_device *udev);
extern int usb_get_device_descriptor(struct usb_device *dev,
diff --git a/drivers/usb/dwc2/gadget.c b/drivers/usb/dwc2/gadget.c
index 88f7d6d..92ed32e 100644
--- a/drivers/usb/dwc2/gadget.c
+++ b/drivers/usb/dwc2/gadget.c
@@ -1083,11 +1083,6 @@ static void dwc2_hsotg_start_req(struct dwc2_hsotg *hsotg,
else
packets = 1; /* send one packet if length is zero. */
- if (hs_ep->isochronous && length > (hs_ep->mc * hs_ep->ep.maxpacket)) {
- dev_err(hsotg->dev, "req length > maxpacket*mc\n");
- return;
- }
-
if (dir_in && index != 0)
if (hs_ep->isochronous)
epsize = DXEPTSIZ_MC(packets);
@@ -1391,6 +1386,13 @@ static int dwc2_hsotg_ep_queue(struct usb_ep *ep, struct usb_request *req,
req->actual = 0;
req->status = -EINPROGRESS;
+ /* Don't queue ISOC request if length greater than mps*mc */
+ if (hs_ep->isochronous &&
+ req->length > (hs_ep->mc * hs_ep->ep.maxpacket)) {
+ dev_err(hs->dev, "req length > maxpacket*mc\n");
+ return -EINVAL;
+ }
+
/* In DDMA mode for ISOC's don't queue request if length greater
* than descriptor limits.
*/
@@ -1632,6 +1634,7 @@ static int dwc2_hsotg_process_req_status(struct dwc2_hsotg *hsotg,
struct dwc2_hsotg_ep *ep0 = hsotg->eps_out[0];
struct dwc2_hsotg_ep *ep;
__le16 reply;
+ u16 status;
int ret;
dev_dbg(hsotg->dev, "%s: USB_REQ_GET_STATUS\n", __func__);
@@ -1643,11 +1646,10 @@ static int dwc2_hsotg_process_req_status(struct dwc2_hsotg *hsotg,
switch (ctrl->bRequestType & USB_RECIP_MASK) {
case USB_RECIP_DEVICE:
- /*
- * bit 0 => self powered
- * bit 1 => remote wakeup
- */
- reply = cpu_to_le16(0);
+ status = 1 << USB_DEVICE_SELF_POWERED;
+ status |= hsotg->remote_wakeup_allowed <<
+ USB_DEVICE_REMOTE_WAKEUP;
+ reply = cpu_to_le16(status);
break;
case USB_RECIP_INTERFACE:
@@ -1758,7 +1760,10 @@ static int dwc2_hsotg_process_req_feature(struct dwc2_hsotg *hsotg,
case USB_RECIP_DEVICE:
switch (wValue) {
case USB_DEVICE_REMOTE_WAKEUP:
- hsotg->remote_wakeup_allowed = 1;
+ if (set)
+ hsotg->remote_wakeup_allowed = 1;
+ else
+ hsotg->remote_wakeup_allowed = 0;
break;
case USB_DEVICE_TEST_MODE:
@@ -1768,16 +1773,17 @@ static int dwc2_hsotg_process_req_feature(struct dwc2_hsotg *hsotg,
return -EINVAL;
hsotg->test_mode = wIndex >> 8;
- ret = dwc2_hsotg_send_reply(hsotg, ep0, NULL, 0);
- if (ret) {
- dev_err(hsotg->dev,
- "%s: failed to send reply\n", __func__);
- return ret;
- }
break;
default:
return -ENOENT;
}
+
+ ret = dwc2_hsotg_send_reply(hsotg, ep0, NULL, 0);
+ if (ret) {
+ dev_err(hsotg->dev,
+ "%s: failed to send reply\n", __func__);
+ return ret;
+ }
break;
case USB_RECIP_ENDPOINT:
diff --git a/drivers/usb/dwc3/debug.h b/drivers/usb/dwc3/debug.h
index e56beb9..4a13cea 100644
--- a/drivers/usb/dwc3/debug.h
+++ b/drivers/usb/dwc3/debug.h
@@ -256,86 +256,77 @@ static inline const char *dwc3_ep_event_string(char *str, size_t size,
u8 epnum = event->endpoint_number;
size_t len;
int status;
- int ret;
- ret = snprintf(str, size, "ep%d%s: ", epnum >> 1,
+ len = scnprintf(str, size, "ep%d%s: ", epnum >> 1,
(epnum & 1) ? "in" : "out");
- if (ret < 0)
- return "UNKNOWN";
status = event->status;
switch (event->endpoint_event) {
case DWC3_DEPEVT_XFERCOMPLETE:
- len = strlen(str);
- snprintf(str + len, size - len, "Transfer Complete (%c%c%c)",
+ len += scnprintf(str + len, size - len,
+ "Transfer Complete (%c%c%c)",
status & DEPEVT_STATUS_SHORT ? 'S' : 's',
status & DEPEVT_STATUS_IOC ? 'I' : 'i',
status & DEPEVT_STATUS_LST ? 'L' : 'l');
- len = strlen(str);
-
if (epnum <= 1)
- snprintf(str + len, size - len, " [%s]",
+ scnprintf(str + len, size - len, " [%s]",
dwc3_ep0_state_string(ep0state));
break;
case DWC3_DEPEVT_XFERINPROGRESS:
- len = strlen(str);
-
- snprintf(str + len, size - len, "Transfer In Progress [%d] (%c%c%c)",
+ scnprintf(str + len, size - len,
+ "Transfer In Progress [%d] (%c%c%c)",
event->parameters,
status & DEPEVT_STATUS_SHORT ? 'S' : 's',
status & DEPEVT_STATUS_IOC ? 'I' : 'i',
status & DEPEVT_STATUS_LST ? 'M' : 'm');
break;
case DWC3_DEPEVT_XFERNOTREADY:
- len = strlen(str);
-
- snprintf(str + len, size - len, "Transfer Not Ready [%d]%s",
+ len += scnprintf(str + len, size - len,
+ "Transfer Not Ready [%d]%s",
event->parameters,
status & DEPEVT_STATUS_TRANSFER_ACTIVE ?
" (Active)" : " (Not Active)");
- len = strlen(str);
-
/* Control Endpoints */
if (epnum <= 1) {
int phase = DEPEVT_STATUS_CONTROL_PHASE(event->status);
switch (phase) {
case DEPEVT_STATUS_CONTROL_DATA:
- snprintf(str + ret, size - ret,
+ scnprintf(str + len, size - len,
" [Data Phase]");
break;
case DEPEVT_STATUS_CONTROL_STATUS:
- snprintf(str + ret, size - ret,
+ scnprintf(str + len, size - len,
" [Status Phase]");
}
}
break;
case DWC3_DEPEVT_RXTXFIFOEVT:
- snprintf(str + ret, size - ret, "FIFO");
+ scnprintf(str + len, size - len, "FIFO");
break;
case DWC3_DEPEVT_STREAMEVT:
status = event->status;
switch (status) {
case DEPEVT_STREAMEVT_FOUND:
- snprintf(str + ret, size - ret, " Stream %d Found",
+ scnprintf(str + len, size - len, " Stream %d Found",
event->parameters);
break;
case DEPEVT_STREAMEVT_NOTFOUND:
default:
- snprintf(str + ret, size - ret, " Stream Not Found");
+ scnprintf(str + len, size - len, " Stream Not Found");
break;
}
break;
case DWC3_DEPEVT_EPCMDCMPLT:
- snprintf(str + ret, size - ret, "Endpoint Command Complete");
+ scnprintf(str + len, size - len, "Endpoint Command Complete");
break;
default:
- snprintf(str, size, "UNKNOWN");
+ scnprintf(str + len, size - len, "UNKNOWN");
}
return str;
diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index 1b8014a..1e00bf2 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -1071,7 +1071,14 @@ static void dwc3_prepare_one_trb_sg(struct dwc3_ep *dep,
unsigned int rem = length % maxp;
unsigned chain = true;
- if (sg_is_last(s))
+ /*
+ * IOMMU driver is coalescing the list of sgs which shares a
+ * page boundary into one and giving it to USB driver. With
+ * this the number of sgs mapped is not equal to the number of
+ * sgs passed. So mark the chain bit to false if it isthe last
+ * mapped sg.
+ */
+ if (i == remaining - 1)
chain = false;
if (rem && usb_endpoint_dir_out(dep->endpoint.desc) && !chain) {
@@ -2429,7 +2436,8 @@ static int dwc3_gadget_ep_reclaim_completed_trb(struct dwc3_ep *dep,
if (event->status & DEPEVT_STATUS_SHORT && !chain)
return 1;
- if (event->status & DEPEVT_STATUS_IOC)
+ if ((trb->ctrl & DWC3_TRB_CTRL_IOC) ||
+ (trb->ctrl & DWC3_TRB_CTRL_LST))
return 1;
return 0;
diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c
index 3b4f670..223f72d 100644
--- a/drivers/usb/gadget/composite.c
+++ b/drivers/usb/gadget/composite.c
@@ -437,12 +437,14 @@ static u8 encode_bMaxPower(enum usb_device_speed speed,
val = CONFIG_USB_GADGET_VBUS_DRAW;
if (!val)
return 0;
- switch (speed) {
- case USB_SPEED_SUPER:
- return DIV_ROUND_UP(val, 8);
- default:
- return DIV_ROUND_UP(val, 2);
- }
+ if (speed < USB_SPEED_SUPER)
+ return min(val, 500U) / 2;
+ else
+ /*
+ * USB 3.x supports up to 900mA, but since 900 isn't divisible
+ * by 8 the integral division will effectively cap to 896mA.
+ */
+ return min(val, 900U) / 8;
}
static int config_buf(struct usb_configuration *config,
@@ -854,6 +856,10 @@ static int set_config(struct usb_composite_dev *cdev,
/* when we return, be sure our power usage is valid */
power = c->MaxPower ? c->MaxPower : CONFIG_USB_GADGET_VBUS_DRAW;
+ if (gadget->speed < USB_SPEED_SUPER)
+ power = min(power, 500U);
+ else
+ power = min(power, 900U);
done:
usb_gadget_vbus_draw(gadget, power);
if (result >= 0 && cdev->delayed_status)
@@ -2280,7 +2286,7 @@ void composite_resume(struct usb_gadget *gadget)
{
struct usb_composite_dev *cdev = get_gadget_data(gadget);
struct usb_function *f;
- u16 maxpower;
+ unsigned maxpower;
/* REVISIT: should we have config level
* suspend/resume callbacks?
@@ -2294,10 +2300,14 @@ void composite_resume(struct usb_gadget *gadget)
f->resume(f);
}
- maxpower = cdev->config->MaxPower;
+ maxpower = cdev->config->MaxPower ?
+ cdev->config->MaxPower : CONFIG_USB_GADGET_VBUS_DRAW;
+ if (gadget->speed < USB_SPEED_SUPER)
+ maxpower = min(maxpower, 500U);
+ else
+ maxpower = min(maxpower, 900U);
- usb_gadget_vbus_draw(gadget, maxpower ?
- maxpower : CONFIG_USB_GADGET_VBUS_DRAW);
+ usb_gadget_vbus_draw(gadget, maxpower);
}
cdev->suspended = 0;
diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c
index 6171d28..5719176 100644
--- a/drivers/usb/gadget/function/f_fs.c
+++ b/drivers/usb/gadget/function/f_fs.c
@@ -1162,18 +1162,19 @@ static int ffs_aio_cancel(struct kiocb *kiocb)
{
struct ffs_io_data *io_data = kiocb->private;
struct ffs_epfile *epfile = kiocb->ki_filp->private_data;
+ unsigned long flags;
int value;
ENTER();
- spin_lock_irq(&epfile->ffs->eps_lock);
+ spin_lock_irqsave(&epfile->ffs->eps_lock, flags);
if (likely(io_data && io_data->ep && io_data->req))
value = usb_ep_dequeue(io_data->ep, io_data->req);
else
value = -EINVAL;
- spin_unlock_irq(&epfile->ffs->eps_lock);
+ spin_unlock_irqrestore(&epfile->ffs->eps_lock, flags);
return value;
}
diff --git a/drivers/usb/gadget/function/u_audio.c b/drivers/usb/gadget/function/u_audio.c
index 6d956f1..e6d32c5 100644
--- a/drivers/usb/gadget/function/u_audio.c
+++ b/drivers/usb/gadget/function/u_audio.c
@@ -361,7 +361,7 @@ int u_audio_start_capture(struct g_audio *audio_dev)
ep = audio_dev->out_ep;
prm = &uac->c_prm;
config_ep_by_speed(gadget, &audio_dev->func, ep);
- req_len = prm->max_psize;
+ req_len = ep->maxpacket;
prm->ep_enabled = true;
usb_ep_enable(ep);
@@ -379,7 +379,7 @@ int u_audio_start_capture(struct g_audio *audio_dev)
req->context = &prm->ureq[i];
req->length = req_len;
req->complete = u_audio_iso_complete;
- req->buf = prm->rbuf + i * prm->max_psize;
+ req->buf = prm->rbuf + i * ep->maxpacket;
}
if (usb_ep_queue(ep, prm->ureq[i].req, GFP_ATOMIC))
@@ -430,9 +430,9 @@ int u_audio_start_playback(struct g_audio *audio_dev)
uac->p_pktsize = min_t(unsigned int,
uac->p_framesize *
(params->p_srate / uac->p_interval),
- prm->max_psize);
+ ep->maxpacket);
- if (uac->p_pktsize < prm->max_psize)
+ if (uac->p_pktsize < ep->maxpacket)
uac->p_pktsize_residue = uac->p_framesize *
(params->p_srate % uac->p_interval);
else
@@ -457,7 +457,7 @@ int u_audio_start_playback(struct g_audio *audio_dev)
req->context = &prm->ureq[i];
req->length = req_len;
req->complete = u_audio_iso_complete;
- req->buf = prm->rbuf + i * prm->max_psize;
+ req->buf = prm->rbuf + i * ep->maxpacket;
}
if (usb_ep_queue(ep, prm->ureq[i].req, GFP_ATOMIC))
diff --git a/drivers/usb/gadget/function/u_serial.c b/drivers/usb/gadget/function/u_serial.c
index f986e5c..8167d37 100644
--- a/drivers/usb/gadget/function/u_serial.c
+++ b/drivers/usb/gadget/function/u_serial.c
@@ -561,8 +561,10 @@ static int gs_start_io(struct gs_port *port)
port->n_read = 0;
started = gs_start_rx(port);
- /* unblock any pending writes into our circular buffer */
if (started) {
+ gs_start_tx(port);
+ /* Unblock any pending writes into our circular buffer, in case
+ * we didn't in gs_start_tx() */
tty_wakeup(port->port.tty);
} else {
gs_free_requests(ep, head, &port->read_allocated);
diff --git a/drivers/usb/gadget/udc/udc-xilinx.c b/drivers/usb/gadget/udc/udc-xilinx.c
index 29d8e5f..b1cfc82 100644
--- a/drivers/usb/gadget/udc/udc-xilinx.c
+++ b/drivers/usb/gadget/udc/udc-xilinx.c
@@ -1399,7 +1399,6 @@ static int xudc_start(struct usb_gadget *gadget,
/**
* xudc_stop - stops the device.
* @gadget: pointer to the usb gadget structure
- * @driver: pointer to usb gadget driver structure
*
* Return: zero always
*/
diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c
index 7a3a29e..af92b25 100644
--- a/drivers/usb/host/xhci-hub.c
+++ b/drivers/usb/host/xhci-hub.c
@@ -55,6 +55,7 @@ static u8 usb_bos_descriptor [] = {
static int xhci_create_usb3_bos_desc(struct xhci_hcd *xhci, char *buf,
u16 wLength)
{
+ struct xhci_port_cap *port_cap = NULL;
int i, ssa_count;
u32 temp;
u16 desc_size, ssp_cap_size, ssa_size = 0;
@@ -64,16 +65,24 @@ static int xhci_create_usb3_bos_desc(struct xhci_hcd *xhci, char *buf,
ssp_cap_size = sizeof(usb_bos_descriptor) - desc_size;
/* does xhci support USB 3.1 Enhanced SuperSpeed */
- if (xhci->usb3_rhub.min_rev >= 0x01) {
+ for (i = 0; i < xhci->num_port_caps; i++) {
+ if (xhci->port_caps[i].maj_rev == 0x03 &&
+ xhci->port_caps[i].min_rev >= 0x01) {
+ usb3_1 = true;
+ port_cap = &xhci->port_caps[i];
+ break;
+ }
+ }
+
+ if (usb3_1) {
/* does xhci provide a PSI table for SSA speed attributes? */
- if (xhci->usb3_rhub.psi_count) {
+ if (port_cap->psi_count) {
/* two SSA entries for each unique PSI ID, RX and TX */
- ssa_count = xhci->usb3_rhub.psi_uid_count * 2;
+ ssa_count = port_cap->psi_uid_count * 2;
ssa_size = ssa_count * sizeof(u32);
ssp_cap_size -= 16; /* skip copying the default SSA */
}
desc_size += ssp_cap_size;
- usb3_1 = true;
}
memcpy(buf, &usb_bos_descriptor, min(desc_size, wLength));
@@ -99,7 +108,7 @@ static int xhci_create_usb3_bos_desc(struct xhci_hcd *xhci, char *buf,
}
/* If PSI table exists, add the custom speed attributes from it */
- if (usb3_1 && xhci->usb3_rhub.psi_count) {
+ if (usb3_1 && port_cap->psi_count) {
u32 ssp_cap_base, bm_attrib, psi, psi_mant, psi_exp;
int offset;
@@ -111,7 +120,7 @@ static int xhci_create_usb3_bos_desc(struct xhci_hcd *xhci, char *buf,
/* attribute count SSAC bits 4:0 and ID count SSIC bits 8:5 */
bm_attrib = (ssa_count - 1) & 0x1f;
- bm_attrib |= (xhci->usb3_rhub.psi_uid_count - 1) << 5;
+ bm_attrib |= (port_cap->psi_uid_count - 1) << 5;
put_unaligned_le32(bm_attrib, &buf[ssp_cap_base + 4]);
if (wLength < desc_size + ssa_size)
@@ -124,8 +133,8 @@ static int xhci_create_usb3_bos_desc(struct xhci_hcd *xhci, char *buf,
* USB 3.1 requires two SSA entries (RX and TX) for every link
*/
offset = desc_size;
- for (i = 0; i < xhci->usb3_rhub.psi_count; i++) {
- psi = xhci->usb3_rhub.psi[i];
+ for (i = 0; i < port_cap->psi_count; i++) {
+ psi = port_cap->psi[i];
psi &= ~USB_SSP_SUBLINK_SPEED_RSVD;
psi_exp = XHCI_EXT_PORT_PSIE(psi);
psi_mant = XHCI_EXT_PORT_PSIM(psi);
diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c
index 3b1388f..884c601 100644
--- a/drivers/usb/host/xhci-mem.c
+++ b/drivers/usb/host/xhci-mem.c
@@ -1475,9 +1475,15 @@ int xhci_endpoint_init(struct xhci_hcd *xhci,
/* Allow 3 retries for everything but isoc, set CErr = 3 */
if (!usb_endpoint_xfer_isoc(&ep->desc))
err_count = 3;
- /* Some devices get this wrong */
- if (usb_endpoint_xfer_bulk(&ep->desc) && udev->speed == USB_SPEED_HIGH)
- max_packet = 512;
+ /* HS bulk max packet should be 512, FS bulk supports 8, 16, 32 or 64 */
+ if (usb_endpoint_xfer_bulk(&ep->desc)) {
+ if (udev->speed == USB_SPEED_HIGH)
+ max_packet = 512;
+ if (udev->speed == USB_SPEED_FULL) {
+ max_packet = rounddown_pow_of_two(max_packet);
+ max_packet = clamp_val(max_packet, 8, 64);
+ }
+ }
/* xHCI 1.0 and 1.1 indicates that ctrl ep avg TRB Length should be 8 */
if (usb_endpoint_xfer_control(&ep->desc) && xhci->hci_version >= 0x100)
avg_trb_len = 8;
@@ -1909,17 +1915,17 @@ void xhci_mem_cleanup(struct xhci_hcd *xhci)
xhci->usb3_rhub.num_ports = 0;
xhci->num_active_eps = 0;
kfree(xhci->usb2_rhub.ports);
- kfree(xhci->usb2_rhub.psi);
kfree(xhci->usb3_rhub.ports);
- kfree(xhci->usb3_rhub.psi);
kfree(xhci->hw_ports);
kfree(xhci->rh_bw);
kfree(xhci->ext_caps);
+ for (i = 0; i < xhci->num_port_caps; i++)
+ kfree(xhci->port_caps[i].psi);
+ kfree(xhci->port_caps);
+ xhci->num_port_caps = 0;
xhci->usb2_rhub.ports = NULL;
- xhci->usb2_rhub.psi = NULL;
xhci->usb3_rhub.ports = NULL;
- xhci->usb3_rhub.psi = NULL;
xhci->hw_ports = NULL;
xhci->rh_bw = NULL;
xhci->ext_caps = NULL;
@@ -2120,6 +2126,7 @@ static void xhci_add_in_port(struct xhci_hcd *xhci, unsigned int num_ports,
u8 major_revision, minor_revision;
struct xhci_hub *rhub;
struct device *dev = xhci_to_hcd(xhci)->self.sysdev;
+ struct xhci_port_cap *port_cap;
temp = readl(addr);
major_revision = XHCI_EXT_PORT_MAJOR(temp);
@@ -2154,31 +2161,39 @@ static void xhci_add_in_port(struct xhci_hcd *xhci, unsigned int num_ports,
/* WTF? "Valid values are ‘1’ to MaxPorts" */
return;
- rhub->psi_count = XHCI_EXT_PORT_PSIC(temp);
- if (rhub->psi_count) {
- rhub->psi = kcalloc_node(rhub->psi_count, sizeof(*rhub->psi),
- GFP_KERNEL, dev_to_node(dev));
- if (!rhub->psi)
- rhub->psi_count = 0;
+ port_cap = &xhci->port_caps[xhci->num_port_caps++];
+ if (xhci->num_port_caps > max_caps)
+ return;
- rhub->psi_uid_count++;
- for (i = 0; i < rhub->psi_count; i++) {
- rhub->psi[i] = readl(addr + 4 + i);
+ port_cap->maj_rev = major_revision;
+ port_cap->min_rev = minor_revision;
+ port_cap->psi_count = XHCI_EXT_PORT_PSIC(temp);
+
+ if (port_cap->psi_count) {
+ port_cap->psi = kcalloc_node(port_cap->psi_count,
+ sizeof(*port_cap->psi),
+ GFP_KERNEL, dev_to_node(dev));
+ if (!port_cap->psi)
+ port_cap->psi_count = 0;
+
+ port_cap->psi_uid_count++;
+ for (i = 0; i < port_cap->psi_count; i++) {
+ port_cap->psi[i] = readl(addr + 4 + i);
/* count unique ID values, two consecutive entries can
* have the same ID if link is assymetric
*/
- if (i && (XHCI_EXT_PORT_PSIV(rhub->psi[i]) !=
- XHCI_EXT_PORT_PSIV(rhub->psi[i - 1])))
- rhub->psi_uid_count++;
+ if (i && (XHCI_EXT_PORT_PSIV(port_cap->psi[i]) !=
+ XHCI_EXT_PORT_PSIV(port_cap->psi[i - 1])))
+ port_cap->psi_uid_count++;
xhci_dbg(xhci, "PSIV:%d PSIE:%d PLT:%d PFD:%d LP:%d PSIM:%d\n",
- XHCI_EXT_PORT_PSIV(rhub->psi[i]),
- XHCI_EXT_PORT_PSIE(rhub->psi[i]),
- XHCI_EXT_PORT_PLT(rhub->psi[i]),
- XHCI_EXT_PORT_PFD(rhub->psi[i]),
- XHCI_EXT_PORT_LP(rhub->psi[i]),
- XHCI_EXT_PORT_PSIM(rhub->psi[i]));
+ XHCI_EXT_PORT_PSIV(port_cap->psi[i]),
+ XHCI_EXT_PORT_PSIE(port_cap->psi[i]),
+ XHCI_EXT_PORT_PLT(port_cap->psi[i]),
+ XHCI_EXT_PORT_PFD(port_cap->psi[i]),
+ XHCI_EXT_PORT_LP(port_cap->psi[i]),
+ XHCI_EXT_PORT_PSIM(port_cap->psi[i]));
}
}
/* cache usb2 port capabilities */
@@ -2213,6 +2228,7 @@ static void xhci_add_in_port(struct xhci_hcd *xhci, unsigned int num_ports,
continue;
}
hw_port->rhub = rhub;
+ hw_port->port_cap = port_cap;
rhub->num_ports++;
}
/* FIXME: Should we disable ports not in the Extended Capabilities? */
@@ -2303,6 +2319,11 @@ static int xhci_setup_port_arrays(struct xhci_hcd *xhci, gfp_t flags)
if (!xhci->ext_caps)
return -ENOMEM;
+ xhci->port_caps = kcalloc_node(cap_count, sizeof(*xhci->port_caps),
+ flags, dev_to_node(dev));
+ if (!xhci->port_caps)
+ return -ENOMEM;
+
offset = cap_start;
while (offset) {
diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c
index 4917c5b..1fddc41 100644
--- a/drivers/usb/host/xhci-pci.c
+++ b/drivers/usb/host/xhci-pci.c
@@ -49,6 +49,7 @@
#define PCI_DEVICE_ID_INTEL_TITAN_RIDGE_4C_XHCI 0x15ec
#define PCI_DEVICE_ID_INTEL_TITAN_RIDGE_DD_XHCI 0x15f0
#define PCI_DEVICE_ID_INTEL_ICE_LAKE_XHCI 0x8a13
+#define PCI_DEVICE_ID_INTEL_CML_XHCI 0xa3af
#define PCI_DEVICE_ID_AMD_PROMONTORYA_4 0x43b9
#define PCI_DEVICE_ID_AMD_PROMONTORYA_3 0x43ba
@@ -135,7 +136,8 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci)
xhci->quirks |= XHCI_AMD_PLL_FIX;
if (pdev->vendor == PCI_VENDOR_ID_AMD &&
- (pdev->device == 0x15e0 ||
+ (pdev->device == 0x145c ||
+ pdev->device == 0x15e0 ||
pdev->device == 0x15e1 ||
pdev->device == 0x43bb))
xhci->quirks |= XHCI_SUSPEND_DELAY;
@@ -187,7 +189,8 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci)
pdev->device == PCI_DEVICE_ID_INTEL_BROXTON_M_XHCI ||
pdev->device == PCI_DEVICE_ID_INTEL_BROXTON_B_XHCI ||
pdev->device == PCI_DEVICE_ID_INTEL_APL_XHCI ||
- pdev->device == PCI_DEVICE_ID_INTEL_DNV_XHCI)) {
+ pdev->device == PCI_DEVICE_ID_INTEL_DNV_XHCI ||
+ pdev->device == PCI_DEVICE_ID_INTEL_CML_XHCI)) {
xhci->quirks |= XHCI_PME_STUCK_QUIRK;
}
if (pdev->vendor == PCI_VENDOR_ID_INTEL &&
@@ -302,6 +305,9 @@ static int xhci_pci_setup(struct usb_hcd *hcd)
if (!usb_hcd_is_primary_hcd(hcd))
return 0;
+ if (xhci->quirks & XHCI_PME_STUCK_QUIRK)
+ xhci_pme_acpi_rtd3_enable(pdev);
+
xhci_dbg(xhci, "Got SBRN %u\n", (unsigned int) xhci->sbrn);
/* Find any debug ports */
@@ -359,9 +365,6 @@ static int xhci_pci_probe(struct pci_dev *dev, const struct pci_device_id *id)
HCC_MAX_PSA(xhci->hcc_params) >= 4)
xhci->shared_hcd->can_do_streams = 1;
- if (xhci->quirks & XHCI_PME_STUCK_QUIRK)
- xhci_pme_acpi_rtd3_enable(dev);
-
/* USB-2 and USB-3 roothubs initialized, allow runtime pm suspend */
pm_runtime_put_noidle(&dev->dev);
diff --git a/drivers/usb/host/xhci-plat.c b/drivers/usb/host/xhci-plat.c
index d90cd5e..315b455 100644
--- a/drivers/usb/host/xhci-plat.c
+++ b/drivers/usb/host/xhci-plat.c
@@ -445,6 +445,7 @@ MODULE_DEVICE_TABLE(acpi, usb_xhci_acpi_match);
static struct platform_driver usb_xhci_driver = {
.probe = xhci_plat_probe,
.remove = xhci_plat_remove,
+ .shutdown = usb_hcd_platform_shutdown,
.driver = {
.name = "xhci-hcd",
.pm = &xhci_plat_pm_ops,
diff --git a/drivers/usb/host/xhci-trace.h b/drivers/usb/host/xhci-trace.h
index 56eb867..b19582b 100644
--- a/drivers/usb/host/xhci-trace.h
+++ b/drivers/usb/host/xhci-trace.h
@@ -289,23 +289,12 @@ DECLARE_EVENT_CLASS(xhci_log_urb,
),
TP_printk("ep%d%s-%s: urb %p pipe %u slot %d length %d/%d sgs %d/%d stream %d flags %08x",
__entry->epnum, __entry->dir_in ? "in" : "out",
- ({ char *s;
- switch (__entry->type) {
- case USB_ENDPOINT_XFER_INT:
- s = "intr";
- break;
- case USB_ENDPOINT_XFER_CONTROL:
- s = "control";
- break;
- case USB_ENDPOINT_XFER_BULK:
- s = "bulk";
- break;
- case USB_ENDPOINT_XFER_ISOC:
- s = "isoc";
- break;
- default:
- s = "UNKNOWN";
- } s; }), __entry->urb, __entry->pipe, __entry->slot_id,
+ __print_symbolic(__entry->type,
+ { USB_ENDPOINT_XFER_INT, "intr" },
+ { USB_ENDPOINT_XFER_CONTROL, "control" },
+ { USB_ENDPOINT_XFER_BULK, "bulk" },
+ { USB_ENDPOINT_XFER_ISOC, "isoc" }),
+ __entry->urb, __entry->pipe, __entry->slot_id,
__entry->actual, __entry->length, __entry->num_mapped_sgs,
__entry->num_sgs, __entry->stream, __entry->flags
)
diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h
index 13d8838..3ecee10 100644
--- a/drivers/usb/host/xhci.h
+++ b/drivers/usb/host/xhci.h
@@ -1702,12 +1702,20 @@ struct xhci_bus_state {
* Intel Lynx Point LP xHCI host.
*/
#define XHCI_MAX_REXIT_TIMEOUT_MS 20
+struct xhci_port_cap {
+ u32 *psi; /* array of protocol speed ID entries */
+ u8 psi_count;
+ u8 psi_uid_count;
+ u8 maj_rev;
+ u8 min_rev;
+};
struct xhci_port {
__le32 __iomem *addr;
int hw_portnum;
int hcd_portnum;
struct xhci_hub *rhub;
+ struct xhci_port_cap *port_cap;
};
struct xhci_hub {
@@ -1719,9 +1727,6 @@ struct xhci_hub {
/* supported prococol extended capabiliy values */
u8 maj_rev;
u8 min_rev;
- u32 *psi; /* array of protocol speed ID entries */
- u8 psi_count;
- u8 psi_uid_count;
};
/* There is one xhci_hcd structure per controller */
@@ -1880,6 +1885,9 @@ struct xhci_hcd {
/* cached usb2 extened protocol capabilites */
u32 *ext_caps;
unsigned int num_ext_caps;
+ /* cached extended protocol port capabilities */
+ struct xhci_port_cap *port_caps;
+ unsigned int num_port_caps;
/* Compliance Mode Recovery Data */
struct timer_list comp_mode_recovery_timer;
u32 port_status_u0;
diff --git a/drivers/usb/misc/iowarrior.c b/drivers/usb/misc/iowarrior.c
index dce44fb..dce2030 100644
--- a/drivers/usb/misc/iowarrior.c
+++ b/drivers/usb/misc/iowarrior.c
@@ -33,6 +33,14 @@
#define USB_DEVICE_ID_CODEMERCS_IOWPV2 0x1512
/* full speed iowarrior */
#define USB_DEVICE_ID_CODEMERCS_IOW56 0x1503
+/* fuller speed iowarrior */
+#define USB_DEVICE_ID_CODEMERCS_IOW28 0x1504
+#define USB_DEVICE_ID_CODEMERCS_IOW28L 0x1505
+#define USB_DEVICE_ID_CODEMERCS_IOW100 0x1506
+
+/* OEMed devices */
+#define USB_DEVICE_ID_CODEMERCS_IOW24SAG 0x158a
+#define USB_DEVICE_ID_CODEMERCS_IOW56AM 0x158b
/* Get a minor range for your devices from the usb maintainer */
#ifdef CONFIG_USB_DYNAMIC_MINORS
@@ -133,6 +141,11 @@ static const struct usb_device_id iowarrior_ids[] = {
{USB_DEVICE(USB_VENDOR_ID_CODEMERCS, USB_DEVICE_ID_CODEMERCS_IOWPV1)},
{USB_DEVICE(USB_VENDOR_ID_CODEMERCS, USB_DEVICE_ID_CODEMERCS_IOWPV2)},
{USB_DEVICE(USB_VENDOR_ID_CODEMERCS, USB_DEVICE_ID_CODEMERCS_IOW56)},
+ {USB_DEVICE(USB_VENDOR_ID_CODEMERCS, USB_DEVICE_ID_CODEMERCS_IOW24SAG)},
+ {USB_DEVICE(USB_VENDOR_ID_CODEMERCS, USB_DEVICE_ID_CODEMERCS_IOW56AM)},
+ {USB_DEVICE(USB_VENDOR_ID_CODEMERCS, USB_DEVICE_ID_CODEMERCS_IOW28)},
+ {USB_DEVICE(USB_VENDOR_ID_CODEMERCS, USB_DEVICE_ID_CODEMERCS_IOW28L)},
+ {USB_DEVICE(USB_VENDOR_ID_CODEMERCS, USB_DEVICE_ID_CODEMERCS_IOW100)},
{} /* Terminating entry */
};
MODULE_DEVICE_TABLE(usb, iowarrior_ids);
@@ -357,6 +370,7 @@ static ssize_t iowarrior_write(struct file *file,
}
switch (dev->product_id) {
case USB_DEVICE_ID_CODEMERCS_IOW24:
+ case USB_DEVICE_ID_CODEMERCS_IOW24SAG:
case USB_DEVICE_ID_CODEMERCS_IOWPV1:
case USB_DEVICE_ID_CODEMERCS_IOWPV2:
case USB_DEVICE_ID_CODEMERCS_IOW40:
@@ -371,6 +385,10 @@ static ssize_t iowarrior_write(struct file *file,
goto exit;
break;
case USB_DEVICE_ID_CODEMERCS_IOW56:
+ case USB_DEVICE_ID_CODEMERCS_IOW56AM:
+ case USB_DEVICE_ID_CODEMERCS_IOW28:
+ case USB_DEVICE_ID_CODEMERCS_IOW28L:
+ case USB_DEVICE_ID_CODEMERCS_IOW100:
/* The IOW56 uses asynchronous IO and more urbs */
if (atomic_read(&dev->write_busy) == MAX_WRITES_IN_FLIGHT) {
/* Wait until we are below the limit for submitted urbs */
@@ -493,6 +511,7 @@ static long iowarrior_ioctl(struct file *file, unsigned int cmd,
switch (cmd) {
case IOW_WRITE:
if (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW24 ||
+ dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW24SAG ||
dev->product_id == USB_DEVICE_ID_CODEMERCS_IOWPV1 ||
dev->product_id == USB_DEVICE_ID_CODEMERCS_IOWPV2 ||
dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW40) {
@@ -767,7 +786,11 @@ static int iowarrior_probe(struct usb_interface *interface,
goto error;
}
- if (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW56) {
+ if ((dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW56) ||
+ (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW56AM) ||
+ (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW28) ||
+ (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW28L) ||
+ (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW100)) {
res = usb_find_last_int_out_endpoint(iface_desc,
&dev->int_out_endpoint);
if (res) {
@@ -780,7 +803,11 @@ static int iowarrior_probe(struct usb_interface *interface,
/* we have to check the report_size often, so remember it in the endianness suitable for our machine */
dev->report_size = usb_endpoint_maxp(dev->int_in_endpoint);
if ((dev->interface->cur_altsetting->desc.bInterfaceNumber == 0) &&
- (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW56))
+ ((dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW56) ||
+ (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW56AM) ||
+ (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW28) ||
+ (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW28L) ||
+ (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW100)))
/* IOWarrior56 has wMaxPacketSize different from report size */
dev->report_size = 7;
diff --git a/drivers/usb/misc/usb251xb.c b/drivers/usb/misc/usb251xb.c
index 10c9e7f..29fe577 100644
--- a/drivers/usb/misc/usb251xb.c
+++ b/drivers/usb/misc/usb251xb.c
@@ -424,10 +424,6 @@ static int usb251xb_get_ofdata(struct usb251xb *hub,
return err;
}
- hub->vdd = devm_regulator_get(dev, "vdd");
- if (IS_ERR(hub->vdd))
- return PTR_ERR(hub->vdd);
-
if (of_property_read_u16_array(np, "vendor-id", &hub->vendor_id, 1))
hub->vendor_id = USB251XB_DEF_VENDOR_ID;
@@ -640,6 +636,13 @@ static int usb251xb_get_ofdata(struct usb251xb *hub,
}
#endif /* CONFIG_OF */
+static void usb251xb_regulator_disable_action(void *data)
+{
+ struct usb251xb *hub = data;
+
+ regulator_disable(hub->vdd);
+}
+
static int usb251xb_probe(struct usb251xb *hub)
{
struct device *dev = hub->dev;
@@ -676,10 +679,19 @@ static int usb251xb_probe(struct usb251xb *hub)
if (err)
return err;
+ hub->vdd = devm_regulator_get(dev, "vdd");
+ if (IS_ERR(hub->vdd))
+ return PTR_ERR(hub->vdd);
+
err = regulator_enable(hub->vdd);
if (err)
return err;
+ err = devm_add_action_or_reset(dev,
+ usb251xb_regulator_disable_action, hub);
+ if (err)
+ return err;
+
err = usb251xb_connect(hub);
if (err) {
dev_err(dev, "Failed to connect hub (%d)\n", err);
diff --git a/drivers/usb/phy/phy-tegra-usb.c b/drivers/usb/phy/phy-tegra-usb.c
index 037e8ee..6153cc3 100644
--- a/drivers/usb/phy/phy-tegra-usb.c
+++ b/drivers/usb/phy/phy-tegra-usb.c
@@ -969,6 +969,10 @@ static int utmi_phy_probe(struct tegra_usb_phy *tegra_phy,
return -ENXIO;
}
+ /*
+ * Note that UTMI pad registers are shared by all PHYs, therefore
+ * devm_platform_ioremap_resource() can't be used here.
+ */
tegra_phy->pad_regs = devm_ioremap(&pdev->dev, res->start,
resource_size(res));
if (!tegra_phy->pad_regs) {
@@ -1087,6 +1091,10 @@ static int tegra_usb_phy_probe(struct platform_device *pdev)
return -ENXIO;
}
+ /*
+ * Note that PHY and USB controller are using shared registers,
+ * therefore devm_platform_ioremap_resource() can't be used here.
+ */
tegra_phy->regs = devm_ioremap(&pdev->dev, res->start,
resource_size(res));
if (!tegra_phy->regs) {
diff --git a/drivers/usb/serial/ch341.c b/drivers/usb/serial/ch341.c
index d3f420f..c5ecdcd 100644
--- a/drivers/usb/serial/ch341.c
+++ b/drivers/usb/serial/ch341.c
@@ -205,6 +205,16 @@ static int ch341_get_divisor(speed_t speed)
16 * speed - 16 * CH341_CLKRATE / (clk_div * (div + 1)))
div++;
+ /*
+ * Prefer lower base clock (fact = 0) if even divisor.
+ *
+ * Note that this makes the receiver more tolerant to errors.
+ */
+ if (fact == 1 && div % 2 == 0) {
+ div /= 2;
+ fact = 0;
+ }
+
return (0x100 - div) << 8 | fact << 2 | ps;
}
diff --git a/drivers/usb/serial/ir-usb.c b/drivers/usb/serial/ir-usb.c
index 79d0586..172261a 100644
--- a/drivers/usb/serial/ir-usb.c
+++ b/drivers/usb/serial/ir-usb.c
@@ -448,7 +448,7 @@ static void ir_set_termios(struct tty_struct *tty,
usb_sndbulkpipe(udev, port->bulk_out_endpointAddress),
transfer_buffer, 1, &actual_length, 5000);
if (ret || actual_length != 1) {
- if (actual_length != 1)
+ if (!ret)
ret = -EIO;
dev_err(&port->dev, "failed to change line speed: %d\n", ret);
}
diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c
index 084cc2f..0b5dcf9 100644
--- a/drivers/usb/serial/option.c
+++ b/drivers/usb/serial/option.c
@@ -1183,6 +1183,8 @@ static const struct usb_device_id option_ids[] = {
.driver_info = NCTRL(0) },
{ USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x110a, 0xff), /* Telit ME910G1 */
.driver_info = NCTRL(0) | RSVD(3) },
+ { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x110b, 0xff), /* Telit ME910G1 (ECM) */
+ .driver_info = NCTRL(0) },
{ USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE910),
.driver_info = NCTRL(0) | RSVD(1) | RSVD(2) },
{ USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE910_USBCFG4),
diff --git a/drivers/usb/serial/pl2303.c b/drivers/usb/serial/pl2303.c
index aab737e..c5a2995 100644
--- a/drivers/usb/serial/pl2303.c
+++ b/drivers/usb/serial/pl2303.c
@@ -99,6 +99,7 @@ static const struct usb_device_id id_table[] = {
{ USB_DEVICE(SUPERIAL_VENDOR_ID, SUPERIAL_PRODUCT_ID) },
{ USB_DEVICE(HP_VENDOR_ID, HP_LD220_PRODUCT_ID) },
{ USB_DEVICE(HP_VENDOR_ID, HP_LD220TA_PRODUCT_ID) },
+ { USB_DEVICE(HP_VENDOR_ID, HP_LD381_PRODUCT_ID) },
{ USB_DEVICE(HP_VENDOR_ID, HP_LD960_PRODUCT_ID) },
{ USB_DEVICE(HP_VENDOR_ID, HP_LD960TA_PRODUCT_ID) },
{ USB_DEVICE(HP_VENDOR_ID, HP_LCM220_PRODUCT_ID) },
diff --git a/drivers/usb/serial/pl2303.h b/drivers/usb/serial/pl2303.h
index a019ea7..52db551 100644
--- a/drivers/usb/serial/pl2303.h
+++ b/drivers/usb/serial/pl2303.h
@@ -130,6 +130,7 @@
#define HP_LM920_PRODUCT_ID 0x026b
#define HP_TD620_PRODUCT_ID 0x0956
#define HP_LD960_PRODUCT_ID 0x0b39
+#define HP_LD381_PRODUCT_ID 0x0f7f
#define HP_LCM220_PRODUCT_ID 0x3139
#define HP_LCM960_PRODUCT_ID 0x3239
#define HP_LD220_PRODUCT_ID 0x3524
diff --git a/drivers/usb/storage/uas.c b/drivers/usb/storage/uas.c
index 95bba3b..3670fda 100644
--- a/drivers/usb/storage/uas.c
+++ b/drivers/usb/storage/uas.c
@@ -45,6 +45,7 @@ struct uas_dev_info {
struct scsi_cmnd *cmnd[MAX_CMNDS];
spinlock_t lock;
struct work_struct work;
+ struct work_struct scan_work; /* for async scanning */
};
enum {
@@ -114,6 +115,17 @@ static void uas_do_work(struct work_struct *work)
spin_unlock_irqrestore(&devinfo->lock, flags);
}
+static void uas_scan_work(struct work_struct *work)
+{
+ struct uas_dev_info *devinfo =
+ container_of(work, struct uas_dev_info, scan_work);
+ struct Scsi_Host *shost = usb_get_intfdata(devinfo->intf);
+
+ dev_dbg(&devinfo->intf->dev, "starting scan\n");
+ scsi_scan_host(shost);
+ dev_dbg(&devinfo->intf->dev, "scan complete\n");
+}
+
static void uas_add_work(struct uas_cmd_info *cmdinfo)
{
struct scsi_pointer *scp = (void *)cmdinfo;
@@ -982,6 +994,7 @@ static int uas_probe(struct usb_interface *intf, const struct usb_device_id *id)
init_usb_anchor(&devinfo->data_urbs);
spin_lock_init(&devinfo->lock);
INIT_WORK(&devinfo->work, uas_do_work);
+ INIT_WORK(&devinfo->scan_work, uas_scan_work);
result = uas_configure_endpoints(devinfo);
if (result)
@@ -998,7 +1011,9 @@ static int uas_probe(struct usb_interface *intf, const struct usb_device_id *id)
if (result)
goto free_streams;
- scsi_scan_host(shost);
+ /* Submit the delayed_work for SCSI-device scanning */
+ schedule_work(&devinfo->scan_work);
+
return result;
free_streams:
@@ -1166,6 +1181,12 @@ static void uas_disconnect(struct usb_interface *intf)
usb_kill_anchored_urbs(&devinfo->data_urbs);
uas_zap_pending(devinfo, DID_NO_CONNECT);
+ /*
+ * Prevent SCSI scanning (if it hasn't started yet)
+ * or wait for the SCSI-scanning routine to stop.
+ */
+ cancel_work_sync(&devinfo->scan_work);
+
scsi_remove_host(shost);
uas_free_streams(devinfo);
scsi_host_put(shost);
diff --git a/drivers/usb/storage/unusual_devs.h b/drivers/usb/storage/unusual_devs.h
index 1cd9b63..1880f3e 100644
--- a/drivers/usb/storage/unusual_devs.h
+++ b/drivers/usb/storage/unusual_devs.h
@@ -1258,6 +1258,12 @@ UNUSUAL_DEV( 0x090a, 0x1200, 0x0000, 0x9999,
USB_SC_RBC, USB_PR_BULK, NULL,
0 ),
+UNUSUAL_DEV(0x090c, 0x1000, 0x1100, 0x1100,
+ "Samsung",
+ "Flash Drive FIT",
+ USB_SC_DEVICE, USB_PR_DEVICE, NULL,
+ US_FL_MAX_SECTORS_64),
+
/* aeb */
UNUSUAL_DEV( 0x090c, 0x1132, 0x0000, 0xffff,
"Feiya",
diff --git a/drivers/usb/typec/ucsi/displayport.c b/drivers/usb/typec/ucsi/displayport.c
index 0f1273ae..048381c 100644
--- a/drivers/usb/typec/ucsi/displayport.c
+++ b/drivers/usb/typec/ucsi/displayport.c
@@ -271,6 +271,9 @@ void ucsi_displayport_remove_partner(struct typec_altmode *alt)
return;
dp = typec_altmode_get_drvdata(alt);
+ if (!dp)
+ return;
+
dp->data.conf = 0;
dp->data.status = 0;
dp->initialized = false;
@@ -285,6 +288,8 @@ struct typec_altmode *ucsi_register_displayport(struct ucsi_connector *con,
struct typec_altmode *alt;
struct ucsi_dp *dp;
+ mutex_lock(&con->lock);
+
/* We can't rely on the firmware with the capabilities. */
desc->vdo |= DP_CAP_DP_SIGNALING | DP_CAP_RECEPTACLE;
@@ -293,12 +298,15 @@ struct typec_altmode *ucsi_register_displayport(struct ucsi_connector *con,
desc->vdo |= all_assignments << 16;
alt = typec_port_register_altmode(con->port, desc);
- if (IS_ERR(alt))
+ if (IS_ERR(alt)) {
+ mutex_unlock(&con->lock);
return alt;
+ }
dp = devm_kzalloc(&alt->dev, sizeof(*dp), GFP_KERNEL);
if (!dp) {
typec_unregister_altmode(alt);
+ mutex_unlock(&con->lock);
return ERR_PTR(-ENOMEM);
}
@@ -311,5 +319,7 @@ struct typec_altmode *ucsi_register_displayport(struct ucsi_connector *con,
alt->ops = &ucsi_displayport_ops;
typec_altmode_set_drvdata(alt, dp);
+ mutex_unlock(&con->lock);
+
return alt;
}
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index e158159..18e205e 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -1414,10 +1414,6 @@ static int vhost_net_release(struct inode *inode, struct file *f)
static struct socket *get_raw_socket(int fd)
{
- struct {
- struct sockaddr_ll sa;
- char buf[MAX_ADDR_LEN];
- } uaddr;
int r;
struct socket *sock = sockfd_lookup(fd, &r);
@@ -1430,11 +1426,7 @@ static struct socket *get_raw_socket(int fd)
goto err;
}
- r = sock->ops->getname(sock, (struct sockaddr *)&uaddr.sa, 0);
- if (r < 0)
- goto err;
-
- if (uaddr.sa.sll_family != AF_PACKET) {
+ if (sock->sk->sk_family != AF_PACKET) {
r = -EPFNOSUPPORT;
goto err;
}
diff --git a/drivers/video/backlight/Kconfig b/drivers/video/backlight/Kconfig
index 403707a..0093bbd 100644
--- a/drivers/video/backlight/Kconfig
+++ b/drivers/video/backlight/Kconfig
@@ -456,6 +456,13 @@
help
Support for backlight control on RAVE SP device.
+config BACKLIGHT_LED
+ tristate "Generic LED based Backlight Driver"
+ depends on LEDS_CLASS && OF
+ help
+ If you have a LCD backlight adjustable by LED class driver, say Y
+ to enable this driver.
+
endif # BACKLIGHT_CLASS_DEVICE
endmenu
diff --git a/drivers/video/backlight/Makefile b/drivers/video/backlight/Makefile
index 6f87770..0c1a15246 100644
--- a/drivers/video/backlight/Makefile
+++ b/drivers/video/backlight/Makefile
@@ -57,3 +57,4 @@
obj-$(CONFIG_BACKLIGHT_WM831X) += wm831x_bl.o
obj-$(CONFIG_BACKLIGHT_ARCXCNN) += arcxcnn_bl.o
obj-$(CONFIG_BACKLIGHT_RAVE_SP) += rave-sp-backlight.o
+obj-$(CONFIG_BACKLIGHT_LED) += led_bl.o
diff --git a/drivers/video/backlight/led_bl.c b/drivers/video/backlight/led_bl.c
new file mode 100644
index 0000000..3f66549
--- /dev/null
+++ b/drivers/video/backlight/led_bl.c
@@ -0,0 +1,260 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2015-2019 Texas Instruments Incorporated - http://www.ti.com/
+ * Author: Tomi Valkeinen <tomi.valkeinen@ti.com>
+ *
+ * Based on pwm_bl.c
+ */
+
+#include <linux/backlight.h>
+#include <linux/leds.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+
+struct led_bl_data {
+ struct device *dev;
+ struct backlight_device *bl_dev;
+ struct led_classdev **leds;
+ bool enabled;
+ int nb_leds;
+ unsigned int *levels;
+ unsigned int default_brightness;
+ unsigned int max_brightness;
+};
+
+static void led_bl_set_brightness(struct led_bl_data *priv, int level)
+{
+ int i;
+ int bkl_brightness;
+
+ if (priv->levels)
+ bkl_brightness = priv->levels[level];
+ else
+ bkl_brightness = level;
+
+ for (i = 0; i < priv->nb_leds; i++)
+ led_set_brightness(priv->leds[i], bkl_brightness);
+
+ priv->enabled = true;
+}
+
+static void led_bl_power_off(struct led_bl_data *priv)
+{
+ int i;
+
+ if (!priv->enabled)
+ return;
+
+ for (i = 0; i < priv->nb_leds; i++)
+ led_set_brightness(priv->leds[i], LED_OFF);
+
+ priv->enabled = false;
+}
+
+static int led_bl_update_status(struct backlight_device *bl)
+{
+ struct led_bl_data *priv = bl_get_data(bl);
+ int brightness = bl->props.brightness;
+
+ if (bl->props.power != FB_BLANK_UNBLANK ||
+ bl->props.fb_blank != FB_BLANK_UNBLANK ||
+ bl->props.state & BL_CORE_FBBLANK)
+ brightness = 0;
+
+ if (brightness > 0)
+ led_bl_set_brightness(priv, brightness);
+ else
+ led_bl_power_off(priv);
+
+ return 0;
+}
+
+static const struct backlight_ops led_bl_ops = {
+ .update_status = led_bl_update_status,
+};
+
+static int led_bl_get_leds(struct device *dev,
+ struct led_bl_data *priv)
+{
+ int i, nb_leds, ret;
+ struct device_node *node = dev->of_node;
+ struct led_classdev **leds;
+ unsigned int max_brightness;
+ unsigned int default_brightness;
+
+ ret = of_count_phandle_with_args(node, "leds", NULL);
+ if (ret < 0) {
+ dev_err(dev, "Unable to get led count\n");
+ return -EINVAL;
+ }
+
+ nb_leds = ret;
+ if (nb_leds < 1) {
+ dev_err(dev, "At least one LED must be specified!\n");
+ return -EINVAL;
+ }
+
+ leds = devm_kzalloc(dev, sizeof(struct led_classdev *) * nb_leds,
+ GFP_KERNEL);
+ if (!leds)
+ return -ENOMEM;
+
+ for (i = 0; i < nb_leds; i++) {
+ leds[i] = devm_of_led_get(dev, i);
+ if (IS_ERR(leds[i]))
+ return PTR_ERR(leds[i]);
+ }
+
+ /* check that the LEDs all have the same brightness range */
+ max_brightness = leds[0]->max_brightness;
+ for (i = 1; i < nb_leds; i++) {
+ if (max_brightness != leds[i]->max_brightness) {
+ dev_err(dev, "LEDs must have identical ranges\n");
+ return -EINVAL;
+ }
+ }
+
+ /* get the default brightness from the first LED from the list */
+ default_brightness = leds[0]->brightness;
+
+ priv->nb_leds = nb_leds;
+ priv->leds = leds;
+ priv->max_brightness = max_brightness;
+ priv->default_brightness = default_brightness;
+
+ return 0;
+}
+
+static int led_bl_parse_levels(struct device *dev,
+ struct led_bl_data *priv)
+{
+ struct device_node *node = dev->of_node;
+ int num_levels;
+ u32 value;
+ int ret;
+
+ if (!node)
+ return -ENODEV;
+
+ num_levels = of_property_count_u32_elems(node, "brightness-levels");
+ if (num_levels > 1) {
+ int i;
+ unsigned int db;
+ u32 *levels = NULL;
+
+ levels = devm_kzalloc(dev, sizeof(u32) * num_levels,
+ GFP_KERNEL);
+ if (!levels)
+ return -ENOMEM;
+
+ ret = of_property_read_u32_array(node, "brightness-levels",
+ levels,
+ num_levels);
+ if (ret < 0)
+ return ret;
+
+ /*
+ * Try to map actual LED brightness to backlight brightness
+ * level
+ */
+ db = priv->default_brightness;
+ for (i = 0 ; i < num_levels; i++) {
+ if ((i && db > levels[i-1]) && db <= levels[i])
+ break;
+ }
+ priv->default_brightness = i;
+ priv->max_brightness = num_levels - 1;
+ priv->levels = levels;
+ } else if (num_levels >= 0)
+ dev_warn(dev, "Not enough levels defined\n");
+
+ ret = of_property_read_u32(node, "default-brightness-level", &value);
+ if (!ret && value <= priv->max_brightness)
+ priv->default_brightness = value;
+ else if (!ret && value > priv->max_brightness)
+ dev_warn(dev, "Invalid default brightness. Ignoring it\n");
+
+ return 0;
+}
+
+static int led_bl_probe(struct platform_device *pdev)
+{
+ struct backlight_properties props;
+ struct led_bl_data *priv;
+ int ret, i;
+
+ priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
+ if (!priv)
+ return -ENOMEM;
+
+ platform_set_drvdata(pdev, priv);
+
+ priv->dev = &pdev->dev;
+
+ ret = led_bl_get_leds(&pdev->dev, priv);
+ if (ret)
+ return ret;
+
+ ret = led_bl_parse_levels(&pdev->dev, priv);
+ if (ret < 0) {
+ dev_err(&pdev->dev, "Failed to parse DT data\n");
+ return ret;
+ }
+
+ memset(&props, 0, sizeof(struct backlight_properties));
+ props.type = BACKLIGHT_RAW;
+ props.max_brightness = priv->max_brightness;
+ props.brightness = priv->default_brightness;
+ props.power = (priv->default_brightness > 0) ? FB_BLANK_POWERDOWN :
+ FB_BLANK_UNBLANK;
+ priv->bl_dev = backlight_device_register(dev_name(&pdev->dev),
+ &pdev->dev, priv, &led_bl_ops, &props);
+ if (IS_ERR(priv->bl_dev)) {
+ dev_err(&pdev->dev, "Failed to register backlight\n");
+ return PTR_ERR(priv->bl_dev);
+ }
+
+ for (i = 0; i < priv->nb_leds; i++)
+ led_sysfs_disable(priv->leds[i]);
+
+ backlight_update_status(priv->bl_dev);
+
+ return 0;
+}
+
+static int led_bl_remove(struct platform_device *pdev)
+{
+ struct led_bl_data *priv = platform_get_drvdata(pdev);
+ struct backlight_device *bl = priv->bl_dev;
+ int i;
+
+ backlight_device_unregister(bl);
+
+ led_bl_power_off(priv);
+ for (i = 0; i < priv->nb_leds; i++)
+ led_sysfs_enable(priv->leds[i]);
+
+ return 0;
+}
+
+static const struct of_device_id led_bl_of_match[] = {
+ { .compatible = "led-backlight" },
+ { }
+};
+
+MODULE_DEVICE_TABLE(of, led_bl_of_match);
+
+static struct platform_driver led_bl_driver = {
+ .driver = {
+ .name = "led-backlight",
+ .of_match_table = of_match_ptr(led_bl_of_match),
+ },
+ .probe = led_bl_probe,
+ .remove = led_bl_remove,
+};
+
+module_platform_driver(led_bl_driver);
+
+MODULE_DESCRIPTION("LED based Backlight Driver");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:led-backlight");
diff --git a/drivers/video/console/vgacon.c b/drivers/video/console/vgacon.c
index de7b838..998b0de 100644
--- a/drivers/video/console/vgacon.c
+++ b/drivers/video/console/vgacon.c
@@ -1316,6 +1316,9 @@ static int vgacon_font_get(struct vc_data *c, struct console_font *font)
static int vgacon_resize(struct vc_data *c, unsigned int width,
unsigned int height, unsigned int user)
{
+ if ((width << 1) * height > vga_vram_size)
+ return -EINVAL;
+
if (width % 2 || width > screen_info.orig_video_cols ||
height > (screen_info.orig_video_lines * vga_default_font_height)/
c->vc_font.height)
diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index 7bfe365..341458f 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -959,8 +959,8 @@ static int virtballoon_probe(struct virtio_device *vdev)
iput(vb->vb_dev_info.inode);
out_kern_unmount:
kern_unmount(balloon_mnt);
-#endif
out_del_vqs:
+#endif
vdev->config->del_vqs(vdev);
out_free_vb:
kfree(vb);
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 867c7eb..58b96ba 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -2203,10 +2203,10 @@ void vring_del_virtqueue(struct virtqueue *_vq)
vq->split.queue_size_in_bytes,
vq->split.vring.desc,
vq->split.queue_dma_addr);
-
- kfree(vq->split.desc_state);
}
}
+ if (!vq->packed_ring)
+ kfree(vq->split.desc_state);
list_del(&_vq->list);
kfree(vq);
}
diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig
index cec868f..9ea2b43 100644
--- a/drivers/watchdog/Kconfig
+++ b/drivers/watchdog/Kconfig
@@ -207,6 +207,7 @@
config DA9062_WATCHDOG
tristate "Dialog DA9062/61 Watchdog"
depends on MFD_DA9062 || COMPILE_TEST
+ depends on I2C
select WATCHDOG_CORE
help
Support for the watchdog in the DA9062 and DA9061 PMICs.
@@ -841,6 +842,7 @@
tristate "Mediatek SoCs watchdog support"
depends on ARCH_MEDIATEK || COMPILE_TEST
select WATCHDOG_CORE
+ select RESET_CONTROLLER
help
Say Y here to include support for the watchdog timer
in Mediatek SoCs.
diff --git a/drivers/watchdog/da9062_wdt.c b/drivers/watchdog/da9062_wdt.c
index 47eefe0..0ad15d5 100644
--- a/drivers/watchdog/da9062_wdt.c
+++ b/drivers/watchdog/da9062_wdt.c
@@ -16,6 +16,7 @@
#include <linux/jiffies.h>
#include <linux/mfd/da9062/registers.h>
#include <linux/mfd/da9062/core.h>
+#include <linux/property.h>
#include <linux/regmap.h>
#include <linux/of.h>
@@ -31,6 +32,7 @@ static const unsigned int wdt_timeout[] = { 0, 2, 4, 8, 16, 32, 65, 131 };
struct da9062_watchdog {
struct da9062 *hw;
struct watchdog_device wdtdev;
+ bool use_sw_pm;
};
static unsigned int da9062_wdt_timeout_to_sel(unsigned int secs)
@@ -95,13 +97,6 @@ static int da9062_wdt_stop(struct watchdog_device *wdd)
struct da9062_watchdog *wdt = watchdog_get_drvdata(wdd);
int ret;
- ret = da9062_reset_watchdog_timer(wdt);
- if (ret) {
- dev_err(wdt->hw->dev, "Failed to ping the watchdog (err = %d)\n",
- ret);
- return ret;
- }
-
ret = regmap_update_bits(wdt->hw->regmap,
DA9062AA_CONTROL_D,
DA9062AA_TWDSCALE_MASK,
@@ -200,6 +195,8 @@ static int da9062_wdt_probe(struct platform_device *pdev)
if (!wdt)
return -ENOMEM;
+ wdt->use_sw_pm = device_property_present(dev, "dlg,use-sw-pm");
+
wdt->hw = chip;
wdt->wdtdev.info = &da9062_watchdog_info;
@@ -226,6 +223,10 @@ static int da9062_wdt_probe(struct platform_device *pdev)
static int __maybe_unused da9062_wdt_suspend(struct device *dev)
{
struct watchdog_device *wdd = dev_get_drvdata(dev);
+ struct da9062_watchdog *wdt = watchdog_get_drvdata(wdd);
+
+ if (!wdt->use_sw_pm)
+ return 0;
if (watchdog_active(wdd))
return da9062_wdt_stop(wdd);
@@ -236,6 +237,10 @@ static int __maybe_unused da9062_wdt_suspend(struct device *dev)
static int __maybe_unused da9062_wdt_resume(struct device *dev)
{
struct watchdog_device *wdd = dev_get_drvdata(dev);
+ struct da9062_watchdog *wdt = watchdog_get_drvdata(wdd);
+
+ if (!wdt->use_sw_pm)
+ return 0;
if (watchdog_active(wdd))
return da9062_wdt_start(wdd);
diff --git a/drivers/watchdog/iTCO_vendor.h b/drivers/watchdog/iTCO_vendor.h
index 0f7373b..69e92e6 100644
--- a/drivers/watchdog/iTCO_vendor.h
+++ b/drivers/watchdog/iTCO_vendor.h
@@ -1,10 +1,12 @@
/* SPDX-License-Identifier: GPL-2.0 */
/* iTCO Vendor Specific Support hooks */
#ifdef CONFIG_ITCO_VENDOR_SUPPORT
+extern int iTCO_vendorsupport;
extern void iTCO_vendor_pre_start(struct resource *, unsigned int);
extern void iTCO_vendor_pre_stop(struct resource *);
extern int iTCO_vendor_check_noreboot_on(void);
#else
+#define iTCO_vendorsupport 0
#define iTCO_vendor_pre_start(acpibase, heartbeat) {}
#define iTCO_vendor_pre_stop(acpibase) {}
#define iTCO_vendor_check_noreboot_on() 1
diff --git a/drivers/watchdog/iTCO_vendor_support.c b/drivers/watchdog/iTCO_vendor_support.c
index 4f1b96f..cf0eaa0 100644
--- a/drivers/watchdog/iTCO_vendor_support.c
+++ b/drivers/watchdog/iTCO_vendor_support.c
@@ -39,8 +39,10 @@
/* Broken BIOS */
#define BROKEN_BIOS 911
-static int vendorsupport;
-module_param(vendorsupport, int, 0);
+int iTCO_vendorsupport;
+EXPORT_SYMBOL(iTCO_vendorsupport);
+
+module_param_named(vendorsupport, iTCO_vendorsupport, int, 0);
MODULE_PARM_DESC(vendorsupport, "iTCO vendor specific support mode, default="
"0 (none), 1=SuperMicro Pent3, 911=Broken SMI BIOS");
@@ -152,7 +154,7 @@ static void broken_bios_stop(struct resource *smires)
void iTCO_vendor_pre_start(struct resource *smires,
unsigned int heartbeat)
{
- switch (vendorsupport) {
+ switch (iTCO_vendorsupport) {
case SUPERMICRO_OLD_BOARD:
supermicro_old_pre_start(smires);
break;
@@ -165,7 +167,7 @@ EXPORT_SYMBOL(iTCO_vendor_pre_start);
void iTCO_vendor_pre_stop(struct resource *smires)
{
- switch (vendorsupport) {
+ switch (iTCO_vendorsupport) {
case SUPERMICRO_OLD_BOARD:
supermicro_old_pre_stop(smires);
break;
@@ -178,7 +180,7 @@ EXPORT_SYMBOL(iTCO_vendor_pre_stop);
int iTCO_vendor_check_noreboot_on(void)
{
- switch (vendorsupport) {
+ switch (iTCO_vendorsupport) {
case SUPERMICRO_OLD_BOARD:
return 0;
default:
@@ -189,13 +191,13 @@ EXPORT_SYMBOL(iTCO_vendor_check_noreboot_on);
static int __init iTCO_vendor_init_module(void)
{
- if (vendorsupport == SUPERMICRO_NEW_BOARD) {
+ if (iTCO_vendorsupport == SUPERMICRO_NEW_BOARD) {
pr_warn("Option vendorsupport=%d is no longer supported, "
"please use the w83627hf_wdt driver instead\n",
SUPERMICRO_NEW_BOARD);
return -EINVAL;
}
- pr_info("vendor-support=%d\n", vendorsupport);
+ pr_info("vendor-support=%d\n", iTCO_vendorsupport);
return 0;
}
diff --git a/drivers/watchdog/iTCO_wdt.c b/drivers/watchdog/iTCO_wdt.c
index 156360e..e707c47 100644
--- a/drivers/watchdog/iTCO_wdt.c
+++ b/drivers/watchdog/iTCO_wdt.c
@@ -459,13 +459,25 @@ static int iTCO_wdt_probe(struct platform_device *pdev)
if (!p->tco_res)
return -ENODEV;
- p->smi_res = platform_get_resource(pdev, IORESOURCE_IO, ICH_RES_IO_SMI);
- if (!p->smi_res)
- return -ENODEV;
-
p->iTCO_version = pdata->version;
p->pci_dev = to_pci_dev(dev->parent);
+ p->smi_res = platform_get_resource(pdev, IORESOURCE_IO, ICH_RES_IO_SMI);
+ if (p->smi_res) {
+ /* The TCO logic uses the TCO_EN bit in the SMI_EN register */
+ if (!devm_request_region(dev, p->smi_res->start,
+ resource_size(p->smi_res),
+ pdev->name)) {
+ pr_err("I/O address 0x%04llx already in use, device disabled\n",
+ (u64)SMI_EN(p));
+ return -EBUSY;
+ }
+ } else if (iTCO_vendorsupport ||
+ turn_SMI_watchdog_clear_off >= p->iTCO_version) {
+ pr_err("SMI I/O resource is missing\n");
+ return -ENODEV;
+ }
+
iTCO_wdt_no_reboot_bit_setup(p, pdata);
/*
@@ -492,14 +504,6 @@ static int iTCO_wdt_probe(struct platform_device *pdev)
/* Set the NO_REBOOT bit to prevent later reboots, just for sure */
p->update_no_reboot_bit(p->no_reboot_priv, true);
- /* The TCO logic uses the TCO_EN bit in the SMI_EN register */
- if (!devm_request_region(dev, p->smi_res->start,
- resource_size(p->smi_res),
- pdev->name)) {
- pr_err("I/O address 0x%04llx already in use, device disabled\n",
- (u64)SMI_EN(p));
- return -EBUSY;
- }
if (turn_SMI_watchdog_clear_off >= p->iTCO_version) {
/*
* Bit 13: TCO_EN -> 0
diff --git a/drivers/watchdog/wdat_wdt.c b/drivers/watchdog/wdat_wdt.c
index b069349..3065dd6 100644
--- a/drivers/watchdog/wdat_wdt.c
+++ b/drivers/watchdog/wdat_wdt.c
@@ -54,6 +54,13 @@ module_param(nowayout, bool, 0);
MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started (default="
__MODULE_STRING(WATCHDOG_NOWAYOUT) ")");
+#define WDAT_DEFAULT_TIMEOUT 30
+
+static int timeout = WDAT_DEFAULT_TIMEOUT;
+module_param(timeout, int, 0);
+MODULE_PARM_DESC(timeout, "Watchdog timeout in seconds (default="
+ __MODULE_STRING(WDAT_DEFAULT_TIMEOUT) ")");
+
static int wdat_wdt_read(struct wdat_wdt *wdat,
const struct wdat_instruction *instr, u32 *value)
{
@@ -389,7 +396,7 @@ static int wdat_wdt_probe(struct platform_device *pdev)
memset(&r, 0, sizeof(r));
r.start = gas->address;
- r.end = r.start + gas->access_width - 1;
+ r.end = r.start + ACPI_ACCESS_BYTE_WIDTH(gas->access_width) - 1;
if (gas->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) {
r.flags = IORESOURCE_MEM;
} else if (gas->space_id == ACPI_ADR_SPACE_SYSTEM_IO) {
@@ -438,6 +445,22 @@ static int wdat_wdt_probe(struct platform_device *pdev)
platform_set_drvdata(pdev, wdat);
+ /*
+ * Set initial timeout so that userspace has time to configure the
+ * watchdog properly after it has opened the device. In some cases
+ * the BIOS default is too short and causes immediate reboot.
+ */
+ if (timeout * 1000 < wdat->wdd.min_hw_heartbeat_ms ||
+ timeout * 1000 > wdat->wdd.max_hw_heartbeat_ms) {
+ dev_warn(dev, "Invalid timeout %d given, using %d\n",
+ timeout, WDAT_DEFAULT_TIMEOUT);
+ timeout = WDAT_DEFAULT_TIMEOUT;
+ }
+
+ ret = wdat_wdt_set_timeout(&wdat->wdd, timeout);
+ if (ret)
+ return ret;
+
watchdog_set_nowayout(&wdat->wdd, nowayout);
return devm_watchdog_register_device(dev, &wdat->wdd);
}
diff --git a/drivers/xen/preempt.c b/drivers/xen/preempt.c
index 70650b2..17240c5 100644
--- a/drivers/xen/preempt.c
+++ b/drivers/xen/preempt.c
@@ -33,7 +33,9 @@ asmlinkage __visible void xen_maybe_preempt_hcall(void)
* cpu.
*/
__this_cpu_write(xen_in_preemptible_hcall, false);
- _cond_resched();
+ local_irq_enable();
+ cond_resched();
+ local_irq_disable();
__this_cpu_write(xen_in_preemptible_hcall, true);
}
}
diff --git a/drivers/xen/xen-pciback/pciback.h b/drivers/xen/xen-pciback/pciback.h
index ce1077e..7c95516 100644
--- a/drivers/xen/xen-pciback/pciback.h
+++ b/drivers/xen/xen-pciback/pciback.h
@@ -52,7 +52,7 @@ struct xen_pcibk_dev_data {
unsigned int ack_intr:1; /* .. and ACK-ing */
unsigned long handled;
unsigned int irq; /* Saved in case device transitions to MSI/MSI-X */
- char irq_name[0]; /* xen-pcibk[000:04:00.0] */
+ char irq_name[]; /* xen-pcibk[000:04:00.0] */
};
/* Used by XenBus and xen_pcibk_ops.c */
diff --git a/drivers/xen/xenbus/xenbus_comms.c b/drivers/xen/xenbus/xenbus_comms.c
index d239fc3..eb5151f 100644
--- a/drivers/xen/xenbus/xenbus_comms.c
+++ b/drivers/xen/xenbus/xenbus_comms.c
@@ -313,6 +313,8 @@ static int process_msg(void)
req->msg.type = state.msg.type;
req->msg.len = state.msg.len;
req->body = state.body;
+ /* write body, then update state */
+ virt_wmb();
req->state = xb_req_state_got_reply;
req->cb(req);
} else
@@ -395,6 +397,8 @@ static int process_writes(void)
if (state.req->state == xb_req_state_aborted)
kfree(state.req);
else {
+ /* write err, then update state */
+ virt_wmb();
state.req->state = xb_req_state_got_reply;
wake_up(&state.req->wq);
}
diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c
index 66975da..8c4d05b 100644
--- a/drivers/xen/xenbus/xenbus_probe.c
+++ b/drivers/xen/xenbus/xenbus_probe.c
@@ -239,9 +239,9 @@ int xenbus_dev_probe(struct device *_dev)
goto fail;
}
- spin_lock(&dev->reclaim_lock);
+ down(&dev->reclaim_sem);
err = drv->probe(dev, id);
- spin_unlock(&dev->reclaim_lock);
+ up(&dev->reclaim_sem);
if (err)
goto fail_put;
@@ -271,9 +271,9 @@ int xenbus_dev_remove(struct device *_dev)
free_otherend_watch(dev);
if (drv->remove) {
- spin_lock(&dev->reclaim_lock);
+ down(&dev->reclaim_sem);
drv->remove(dev);
- spin_unlock(&dev->reclaim_lock);
+ up(&dev->reclaim_sem);
}
module_put(drv->driver.owner);
@@ -473,7 +473,7 @@ int xenbus_probe_node(struct xen_bus_type *bus,
goto fail;
dev_set_name(&xendev->dev, "%s", devname);
- spin_lock_init(&xendev->reclaim_lock);
+ sema_init(&xendev->reclaim_sem, 1);
/* Register with generic device framework. */
err = device_register(&xendev->dev);
diff --git a/drivers/xen/xenbus/xenbus_probe_backend.c b/drivers/xen/xenbus/xenbus_probe_backend.c
index 791f6fe..9b2fbe6 100644
--- a/drivers/xen/xenbus/xenbus_probe_backend.c
+++ b/drivers/xen/xenbus/xenbus_probe_backend.c
@@ -45,6 +45,7 @@
#include <linux/mm.h>
#include <linux/notifier.h>
#include <linux/export.h>
+#include <linux/semaphore.h>
#include <asm/page.h>
#include <asm/pgtable.h>
@@ -257,10 +258,10 @@ static int backend_reclaim_memory(struct device *dev, void *data)
drv = to_xenbus_driver(dev->driver);
if (drv && drv->reclaim_memory) {
xdev = to_xenbus_device(dev);
- if (!spin_trylock(&xdev->reclaim_lock))
+ if (down_trylock(&xdev->reclaim_sem))
return 0;
drv->reclaim_memory(xdev);
- spin_unlock(&xdev->reclaim_lock);
+ up(&xdev->reclaim_sem);
}
return 0;
}
diff --git a/drivers/xen/xenbus/xenbus_xs.c b/drivers/xen/xenbus/xenbus_xs.c
index ddc18da..3a06eb6 100644
--- a/drivers/xen/xenbus/xenbus_xs.c
+++ b/drivers/xen/xenbus/xenbus_xs.c
@@ -191,8 +191,11 @@ static bool xenbus_ok(void)
static bool test_reply(struct xb_req_data *req)
{
- if (req->state == xb_req_state_got_reply || !xenbus_ok())
+ if (req->state == xb_req_state_got_reply || !xenbus_ok()) {
+ /* read req->state before all other fields */
+ virt_rmb();
return true;
+ }
/* Make sure to reread req->state each time. */
barrier();
@@ -202,7 +205,7 @@ static bool test_reply(struct xb_req_data *req)
static void *read_reply(struct xb_req_data *req)
{
- while (req->state != xb_req_state_got_reply) {
+ do {
wait_event(req->wq, test_reply(req));
if (!xenbus_ok())
@@ -216,7 +219,7 @@ static void *read_reply(struct xb_req_data *req)
if (req->err)
return ERR_PTR(req->err);
- }
+ } while (req->state != xb_req_state_got_reply);
return req->body;
}
diff --git a/fs/afs/addr_list.c b/fs/afs/addr_list.c
index df415c0..de1ae0b 100644
--- a/fs/afs/addr_list.c
+++ b/fs/afs/addr_list.c
@@ -19,7 +19,7 @@
void afs_put_addrlist(struct afs_addr_list *alist)
{
if (alist && refcount_dec_and_test(&alist->usage))
- call_rcu(&alist->rcu, (rcu_callback_t)kfree);
+ kfree_rcu(alist, rcu);
}
/*
diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c
index ff3994a6..6765949 100644
--- a/fs/afs/cmservice.c
+++ b/fs/afs/cmservice.c
@@ -244,6 +244,17 @@ static void afs_cm_destructor(struct afs_call *call)
}
/*
+ * Abort a service call from within an action function.
+ */
+static void afs_abort_service_call(struct afs_call *call, u32 abort_code, int error,
+ const char *why)
+{
+ rxrpc_kernel_abort_call(call->net->socket, call->rxcall,
+ abort_code, error, why);
+ afs_set_call_complete(call, error, 0);
+}
+
+/*
* The server supplied a list of callbacks that it wanted to break.
*/
static void SRXAFSCB_CallBack(struct work_struct *work)
@@ -510,8 +521,7 @@ static void SRXAFSCB_ProbeUuid(struct work_struct *work)
if (memcmp(r, &call->net->uuid, sizeof(call->net->uuid)) == 0)
afs_send_empty_reply(call);
else
- rxrpc_kernel_abort_call(call->net->socket, call->rxcall,
- 1, 1, "K-1");
+ afs_abort_service_call(call, 1, 1, "K-1");
afs_put_call(call);
_leave("");
diff --git a/fs/afs/fs_probe.c b/fs/afs/fs_probe.c
index cfe62b1..e1b9ed6 100644
--- a/fs/afs/fs_probe.c
+++ b/fs/afs/fs_probe.c
@@ -145,6 +145,7 @@ static int afs_do_probe_fileserver(struct afs_net *net,
read_lock(&server->fs_lock);
ac.alist = rcu_dereference_protected(server->addresses,
lockdep_is_held(&server->fs_lock));
+ afs_get_addrlist(ac.alist);
read_unlock(&server->fs_lock);
atomic_set(&server->probe_outstanding, ac.alist->nr_addrs);
@@ -163,6 +164,7 @@ static int afs_do_probe_fileserver(struct afs_net *net,
if (!in_progress)
afs_fs_probe_done(server);
+ afs_put_addrlist(ac.alist);
return in_progress;
}
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 1d81fc4..ef732dd 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -81,7 +81,7 @@ enum afs_call_state {
* List of server addresses.
*/
struct afs_addr_list {
- struct rcu_head rcu; /* Must be first */
+ struct rcu_head rcu;
refcount_t usage;
u32 version; /* Version */
unsigned char max_addrs;
@@ -154,7 +154,7 @@ struct afs_call {
};
unsigned char unmarshall; /* unmarshalling phase */
unsigned char addr_ix; /* Address in ->alist */
- bool incoming; /* T if incoming call */
+ bool drop_ref; /* T if need to drop ref for incoming call */
bool send_pages; /* T if data from mapping should be sent */
bool need_attention; /* T if RxRPC poked us */
bool async; /* T if asynchronous */
@@ -1209,8 +1209,16 @@ static inline void afs_set_call_complete(struct afs_call *call,
ok = true;
}
spin_unlock_bh(&call->state_lock);
- if (ok)
+ if (ok) {
trace_afs_call_done(call);
+
+ /* Asynchronous calls have two refs to release - one from the alloc and
+ * one queued with the work item - and we can't just deallocate the
+ * call because the work item may be queued again.
+ */
+ if (call->drop_ref)
+ afs_put_call(call);
+ }
}
/*
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index 58d3965..1ecc67d 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -18,7 +18,6 @@ struct workqueue_struct *afs_async_calls;
static void afs_wake_up_call_waiter(struct sock *, struct rxrpc_call *, unsigned long);
static void afs_wake_up_async_call(struct sock *, struct rxrpc_call *, unsigned long);
-static void afs_delete_async_call(struct work_struct *);
static void afs_process_async_call(struct work_struct *);
static void afs_rx_new_call(struct sock *, struct rxrpc_call *, unsigned long);
static void afs_rx_discard_new_call(struct rxrpc_call *, unsigned long);
@@ -169,7 +168,7 @@ void afs_put_call(struct afs_call *call)
int n = atomic_dec_return(&call->usage);
int o = atomic_read(&net->nr_outstanding_calls);
- trace_afs_call(call, afs_call_trace_put, n + 1, o,
+ trace_afs_call(call, afs_call_trace_put, n, o,
__builtin_return_address(0));
ASSERTCMP(n, >=, 0);
@@ -402,8 +401,10 @@ void afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call, gfp_t gfp)
/* If the call is going to be asynchronous, we need an extra ref for
* the call to hold itself so the caller need not hang on to its ref.
*/
- if (call->async)
+ if (call->async) {
afs_get_call(call, afs_call_trace_get);
+ call->drop_ref = true;
+ }
/* create a call */
rxcall = rxrpc_kernel_begin_call(call->net->socket, srx, call->key,
@@ -413,7 +414,8 @@ void afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call, gfp_t gfp)
afs_wake_up_async_call :
afs_wake_up_call_waiter),
call->upgrade,
- call->intr,
+ (call->intr ? RXRPC_PREINTERRUPTIBLE :
+ RXRPC_UNINTERRUPTIBLE),
call->debug_id);
if (IS_ERR(rxcall)) {
ret = PTR_ERR(rxcall);
@@ -584,8 +586,6 @@ static void afs_deliver_to_call(struct afs_call *call)
done:
if (call->type->done)
call->type->done(call);
- if (state == AFS_CALL_COMPLETE && call->incoming)
- afs_put_call(call);
out:
_leave("");
return;
@@ -604,11 +604,7 @@ static void afs_deliver_to_call(struct afs_call *call)
long afs_wait_for_call_to_complete(struct afs_call *call,
struct afs_addr_cursor *ac)
{
- signed long rtt2, timeout;
long ret;
- bool stalled = false;
- u64 rtt;
- u32 life, last_life;
bool rxrpc_complete = false;
DECLARE_WAITQUEUE(myself, current);
@@ -619,14 +615,6 @@ long afs_wait_for_call_to_complete(struct afs_call *call,
if (ret < 0)
goto out;
- rtt = rxrpc_kernel_get_rtt(call->net->socket, call->rxcall);
- rtt2 = nsecs_to_jiffies64(rtt) * 2;
- if (rtt2 < 2)
- rtt2 = 2;
-
- timeout = rtt2;
- rxrpc_kernel_check_life(call->net->socket, call->rxcall, &last_life);
-
add_wait_queue(&call->waitq, &myself);
for (;;) {
set_current_state(TASK_UNINTERRUPTIBLE);
@@ -637,37 +625,19 @@ long afs_wait_for_call_to_complete(struct afs_call *call,
call->need_attention = false;
__set_current_state(TASK_RUNNING);
afs_deliver_to_call(call);
- timeout = rtt2;
continue;
}
if (afs_check_call_state(call, AFS_CALL_COMPLETE))
break;
- if (!rxrpc_kernel_check_life(call->net->socket, call->rxcall, &life)) {
+ if (!rxrpc_kernel_check_life(call->net->socket, call->rxcall)) {
/* rxrpc terminated the call. */
rxrpc_complete = true;
break;
}
- if (call->intr && timeout == 0 &&
- life == last_life && signal_pending(current)) {
- if (stalled)
- break;
- __set_current_state(TASK_RUNNING);
- rxrpc_kernel_probe_life(call->net->socket, call->rxcall);
- timeout = rtt2;
- stalled = true;
- continue;
- }
-
- if (life != last_life) {
- timeout = rtt2;
- last_life = life;
- stalled = false;
- }
-
- timeout = schedule_timeout(timeout);
+ schedule();
}
remove_wait_queue(&call->waitq, &myself);
@@ -735,7 +705,7 @@ static void afs_wake_up_async_call(struct sock *sk, struct rxrpc_call *rxcall,
u = atomic_fetch_add_unless(&call->usage, 1, 0);
if (u != 0) {
- trace_afs_call(call, afs_call_trace_wake, u,
+ trace_afs_call(call, afs_call_trace_wake, u + 1,
atomic_read(&call->net->nr_outstanding_calls),
__builtin_return_address(0));
@@ -745,21 +715,6 @@ static void afs_wake_up_async_call(struct sock *sk, struct rxrpc_call *rxcall,
}
/*
- * Delete an asynchronous call. The work item carries a ref to the call struct
- * that we need to release.
- */
-static void afs_delete_async_call(struct work_struct *work)
-{
- struct afs_call *call = container_of(work, struct afs_call, async_work);
-
- _enter("");
-
- afs_put_call(call);
-
- _leave("");
-}
-
-/*
* Perform I/O processing on an asynchronous call. The work item carries a ref
* to the call struct that we either need to release or to pass on.
*/
@@ -774,16 +729,6 @@ static void afs_process_async_call(struct work_struct *work)
afs_deliver_to_call(call);
}
- if (call->state == AFS_CALL_COMPLETE) {
- /* We have two refs to release - one from the alloc and one
- * queued with the work item - and we can't just deallocate the
- * call because the work item may be queued again.
- */
- call->async_work.func = afs_delete_async_call;
- if (!queue_work(afs_async_calls, &call->async_work))
- afs_put_call(call);
- }
-
afs_put_call(call);
_leave("");
}
@@ -810,6 +755,7 @@ void afs_charge_preallocation(struct work_struct *work)
if (!call)
break;
+ call->drop_ref = true;
call->async = true;
call->state = AFS_CALL_SV_AWAIT_OP_ID;
init_waitqueue_head(&call->waitq);
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 69bf2fb..9501880 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1520,10 +1520,22 @@ int bdev_disk_changed(struct block_device *bdev, bool invalidate)
if (ret)
return ret;
- if (invalidate)
- set_capacity(disk, 0);
- else if (disk->fops->revalidate_disk)
- disk->fops->revalidate_disk(disk);
+ /*
+ * Historically we only set the capacity to zero for devices that
+ * support partitions (independ of actually having partitions created).
+ * Doing that is rather inconsistent, but changing it broke legacy
+ * udisks polling for legacy ide-cdrom devices. Use the crude check
+ * below to get the sane behavior for most device while not breaking
+ * userspace for this particular setup.
+ */
+ if (invalidate) {
+ if (disk_part_scan_enabled(disk) ||
+ !(disk->flags & GENHD_FL_REMOVABLE))
+ set_capacity(disk, 0);
+ } else {
+ if (disk->fops->revalidate_disk)
+ disk->fops->revalidate_disk(disk);
+ }
check_disk_size_change(disk, bdev, !invalidate);
diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
index 404e050..7f09147 100644
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -856,9 +856,9 @@ static void clear_incompat_bg_bits(struct btrfs_fs_info *fs_info, u64 flags)
found_raid1c34 = true;
up_read(&sinfo->groups_sem);
}
- if (found_raid56)
+ if (!found_raid56)
btrfs_clear_fs_incompat(fs_info, RAID56);
- if (found_raid1c34)
+ if (!found_raid1c34)
btrfs_clear_fs_incompat(fs_info, RAID1C34);
}
}
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 7fa9bb7..c6c9a6a 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -3164,6 +3164,7 @@ int __cold open_ctree(struct super_block *sb,
/* do not make disk changes in broken FS or nologreplay is given */
if (btrfs_super_log_root(disk_super) != 0 &&
!btrfs_test_opt(fs_info, NOLOGREPLAY)) {
+ btrfs_info(fs_info, "start tree-log replay");
ret = btrfs_replay_log(fs_info, fs_devices);
if (ret) {
err = ret;
@@ -3199,6 +3200,7 @@ int __cold open_ctree(struct super_block *sb,
if (IS_ERR(fs_info->fs_root)) {
err = PTR_ERR(fs_info->fs_root);
btrfs_warn(fs_info, "failed to read fs tree: %d", err);
+ fs_info->fs_root = NULL;
goto fail_qgroup;
}
@@ -4275,6 +4277,7 @@ static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
cond_resched();
spin_lock(&delayed_refs->lock);
}
+ btrfs_qgroup_destroy_extent_records(trans);
spin_unlock(&delayed_refs->lock);
@@ -4500,7 +4503,6 @@ void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans,
wake_up(&fs_info->transaction_wait);
btrfs_destroy_delayed_inodes(fs_info);
- btrfs_assert_delayed_root_empty(fs_info);
btrfs_destroy_marked_extents(fs_info, &cur_trans->dirty_pages,
EXTENT_DIRTY);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 0163fdd..a7bc661 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -4430,6 +4430,8 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
ret = alloc_reserved_file_extent(trans, 0, root_objectid, 0, owner,
offset, ins, 1);
+ if (ret)
+ btrfs_pin_extent(fs_info, ins->objectid, ins->offset, 1);
btrfs_put_block_group(block_group);
return ret;
}
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index 6f417ff..bd6229f 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -237,6 +237,17 @@ static void try_merge_map(struct extent_map_tree *tree, struct extent_map *em)
struct extent_map *merge = NULL;
struct rb_node *rb;
+ /*
+ * We can't modify an extent map that is in the tree and that is being
+ * used by another task, as it can cause that other task to see it in
+ * inconsistent state during the merging. We always have 1 reference for
+ * the tree and 1 for this task (which is unpinning the extent map or
+ * clearing the logging flag), so anything > 2 means it's being used by
+ * other tasks too.
+ */
+ if (refcount_read(&em->refs) > 2)
+ return;
+
if (em->start != 0) {
rb = rb_prev(&em->rb_node);
if (rb)
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 5b3ec93..d267eb5 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -4085,6 +4085,8 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
u64 bytes_deleted = 0;
bool be_nice = false;
bool should_throttle = false;
+ const u64 lock_start = ALIGN_DOWN(new_size, fs_info->sectorsize);
+ struct extent_state *cached_state = NULL;
BUG_ON(new_size > 0 && min_type != BTRFS_EXTENT_DATA_KEY);
@@ -4101,6 +4103,10 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
return -ENOMEM;
path->reada = READA_BACK;
+ if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID)
+ lock_extent_bits(&BTRFS_I(inode)->io_tree, lock_start, (u64)-1,
+ &cached_state);
+
/*
* We want to drop from the next block forward in case this new size is
* not block aligned since we will be keeping the last block of the
@@ -4137,7 +4143,6 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
goto out;
}
- path->leave_spinning = 1;
ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
if (ret < 0)
goto out;
@@ -4289,7 +4294,6 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
root == fs_info->tree_root)) {
struct btrfs_ref ref = { 0 };
- btrfs_set_path_blocking(path);
bytes_deleted += extent_num_bytes;
btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF,
@@ -4365,6 +4369,8 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
if (!ret && last_size > new_size)
last_size = new_size;
btrfs_ordered_update_i_size(inode, last_size, NULL);
+ unlock_extent_cached(&BTRFS_I(inode)->io_tree, lock_start,
+ (u64)-1, &cached_state);
}
btrfs_free_path(path);
@@ -7777,6 +7783,7 @@ static inline blk_status_t btrfs_lookup_and_bind_dio_csum(struct inode *inode,
{
struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
struct btrfs_io_bio *orig_io_bio = btrfs_io_bio(dip->orig_bio);
+ u16 csum_size;
blk_status_t ret;
/*
@@ -7796,7 +7803,8 @@ static inline blk_status_t btrfs_lookup_and_bind_dio_csum(struct inode *inode,
file_offset -= dip->logical_offset;
file_offset >>= inode->i_sb->s_blocksize_bits;
- io_bio->csum = (u8 *)(((u32 *)orig_io_bio->csum) + file_offset);
+ csum_size = btrfs_super_csum_size(btrfs_sb(inode->i_sb)->super_copy);
+ io_bio->csum = orig_io_bio->csum + csum_size * file_offset;
return 0;
}
@@ -9488,6 +9496,10 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
ret = btrfs_sync_log(trans, BTRFS_I(old_inode)->root, &ctx);
if (ret)
commit_transaction = true;
+ } else if (sync_log) {
+ mutex_lock(&root->log_mutex);
+ list_del(&ctx.list);
+ mutex_unlock(&root->log_mutex);
}
if (commit_transaction) {
ret = btrfs_commit_transaction(trans);
@@ -9818,6 +9830,7 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_key ins;
u64 cur_offset = start;
+ u64 clear_offset = start;
u64 i_size;
u64 cur_bytes;
u64 last_alloc = (u64)-1;
@@ -9852,6 +9865,15 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
btrfs_end_transaction(trans);
break;
}
+
+ /*
+ * We've reserved this space, and thus converted it from
+ * ->bytes_may_use to ->bytes_reserved. Any error that happens
+ * from here on out we will only need to clear our reservation
+ * for the remaining unreserved area, so advance our
+ * clear_offset by our extent size.
+ */
+ clear_offset += ins.offset;
btrfs_dec_block_group_reservations(fs_info, ins.objectid);
last_alloc = ins.offset;
@@ -9931,9 +9953,9 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
if (own_trans)
btrfs_end_transaction(trans);
}
- if (cur_offset < end)
- btrfs_free_reserved_data_space(inode, NULL, cur_offset,
- end - cur_offset + 1);
+ if (clear_offset < end)
+ btrfs_free_reserved_data_space(inode, NULL, clear_offset,
+ end - clear_offset + 1);
return ret;
}
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index ecb9fb6..a65f189 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -679,10 +679,15 @@ int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
}
btrfs_start_ordered_extent(inode, ordered, 1);
end = ordered->file_offset;
+ /*
+ * If the ordered extent had an error save the error but don't
+ * exit without waiting first for all other ordered extents in
+ * the range to complete.
+ */
if (test_bit(BTRFS_ORDERED_IOERR, &ordered->flags))
ret = -EIO;
btrfs_put_ordered_extent(ordered);
- if (ret || end == 0 || end == start)
+ if (end == 0 || end == start)
break;
end--;
}
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 98d9a50..ff1870f 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -4002,3 +4002,16 @@ int btrfs_qgroup_trace_subtree_after_cow(struct btrfs_trans_handle *trans,
}
return ret;
}
+
+void btrfs_qgroup_destroy_extent_records(struct btrfs_transaction *trans)
+{
+ struct btrfs_qgroup_extent_record *entry;
+ struct btrfs_qgroup_extent_record *next;
+ struct rb_root *root;
+
+ root = &trans->delayed_refs.dirty_extent_root;
+ rbtree_postorder_for_each_entry_safe(entry, next, root, node) {
+ ulist_free(entry->old_roots);
+ kfree(entry);
+ }
+}
diff --git a/fs/btrfs/qgroup.h b/fs/btrfs/qgroup.h
index 236f122..1bc6544 100644
--- a/fs/btrfs/qgroup.h
+++ b/fs/btrfs/qgroup.h
@@ -414,5 +414,6 @@ int btrfs_qgroup_add_swapped_blocks(struct btrfs_trans_handle *trans,
u64 last_snapshot);
int btrfs_qgroup_trace_subtree_after_cow(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct extent_buffer *eb);
+void btrfs_qgroup_destroy_extent_records(struct btrfs_transaction *trans);
#endif
diff --git a/fs/btrfs/ref-verify.c b/fs/btrfs/ref-verify.c
index b57f361..454a101 100644
--- a/fs/btrfs/ref-verify.c
+++ b/fs/btrfs/ref-verify.c
@@ -744,6 +744,7 @@ int btrfs_ref_tree_mod(struct btrfs_fs_info *fs_info,
*/
be = add_block_entry(fs_info, bytenr, num_bytes, ref_root);
if (IS_ERR(be)) {
+ kfree(ref);
kfree(ra);
ret = PTR_ERR(be);
goto out;
@@ -757,6 +758,8 @@ int btrfs_ref_tree_mod(struct btrfs_fs_info *fs_info,
"re-allocated a block that still has references to it!");
dump_block_entry(fs_info, be);
dump_ref_action(fs_info, ra);
+ kfree(ref);
+ kfree(ra);
goto out_unlock;
}
@@ -819,6 +822,7 @@ int btrfs_ref_tree_mod(struct btrfs_fs_info *fs_info,
"dropping a ref for a existing root that doesn't have a ref on the block");
dump_block_entry(fs_info, be);
dump_ref_action(fs_info, ra);
+ kfree(ref);
kfree(ra);
goto out_unlock;
}
@@ -834,6 +838,7 @@ int btrfs_ref_tree_mod(struct btrfs_fs_info *fs_info,
"attempting to add another ref for an existing ref on a tree block");
dump_block_entry(fs_info, be);
dump_ref_action(fs_info, ra);
+ kfree(ref);
kfree(ra);
goto out_unlock;
}
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 0616a54..67c6385 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -1834,6 +1834,8 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
}
if (btrfs_super_log_root(fs_info->super_copy) != 0) {
+ btrfs_warn(fs_info,
+ "mount required to replay tree-log, cannot remount read-write");
ret = -EINVAL;
goto restore;
}
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index 7436422..3c10e78 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -901,6 +901,12 @@ static int addrm_unknown_feature_attrs(struct btrfs_fs_info *fs_info, bool add)
static void __btrfs_sysfs_remove_fsid(struct btrfs_fs_devices *fs_devs)
{
+ if (fs_devs->devinfo_kobj) {
+ kobject_del(fs_devs->devinfo_kobj);
+ kobject_put(fs_devs->devinfo_kobj);
+ fs_devs->devinfo_kobj = NULL;
+ }
+
if (fs_devs->devices_kobj) {
kobject_del(fs_devs->devices_kobj);
kobject_put(fs_devs->devices_kobj);
@@ -1289,7 +1295,7 @@ int btrfs_sysfs_add_device_link(struct btrfs_fs_devices *fs_devices,
init_completion(&dev->kobj_unregister);
error = kobject_init_and_add(&dev->devid_kobj, &devid_ktype,
- fs_devices->devices_kobj, "%llu",
+ fs_devices->devinfo_kobj, "%llu",
dev->devid);
if (error) {
kobject_put(&dev->devid_kobj);
@@ -1369,6 +1375,15 @@ int btrfs_sysfs_add_fsid(struct btrfs_fs_devices *fs_devs)
return -ENOMEM;
}
+ fs_devs->devinfo_kobj = kobject_create_and_add("devinfo",
+ &fs_devs->fsid_kobj);
+ if (!fs_devs->devinfo_kobj) {
+ btrfs_err(fs_devs->fs_info,
+ "failed to init sysfs devinfo kobject");
+ btrfs_sysfs_remove_fsid(fs_devs);
+ return -ENOMEM;
+ }
+
return 0;
}
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 33dcc88..beb6c69 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -121,6 +121,8 @@ void btrfs_put_transaction(struct btrfs_transaction *transaction)
BUG_ON(!list_empty(&transaction->list));
WARN_ON(!RB_EMPTY_ROOT(
&transaction->delayed_refs.href_root.rb_root));
+ WARN_ON(!RB_EMPTY_ROOT(
+ &transaction->delayed_refs.dirty_extent_root));
if (transaction->delayed_refs.pending_csums)
btrfs_err(transaction->fs_info,
"pending csums is %llu",
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 409f481..f01552a 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -258,6 +258,7 @@ struct btrfs_fs_devices {
/* sysfs kobjects */
struct kobject fsid_kobj;
struct kobject *devices_kobj;
+ struct kobject *devinfo_kobj;
struct completion kobj_unregister;
};
diff --git a/fs/buffer.c b/fs/buffer.c
index b8d2837..3f5758e 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -3019,49 +3019,6 @@ static void end_bio_bh_io_sync(struct bio *bio)
bio_put(bio);
}
-/*
- * This allows us to do IO even on the odd last sectors
- * of a device, even if the block size is some multiple
- * of the physical sector size.
- *
- * We'll just truncate the bio to the size of the device,
- * and clear the end of the buffer head manually.
- *
- * Truly out-of-range accesses will turn into actual IO
- * errors, this only handles the "we need to be able to
- * do IO at the final sector" case.
- */
-void guard_bio_eod(struct bio *bio)
-{
- sector_t maxsector;
- struct hd_struct *part;
-
- rcu_read_lock();
- part = __disk_get_part(bio->bi_disk, bio->bi_partno);
- if (part)
- maxsector = part_nr_sects_read(part);
- else
- maxsector = get_capacity(bio->bi_disk);
- rcu_read_unlock();
-
- if (!maxsector)
- return;
-
- /*
- * If the *whole* IO is past the end of the device,
- * let it through, and the IO layer will turn it into
- * an EIO.
- */
- if (unlikely(bio->bi_iter.bi_sector >= maxsector))
- return;
-
- maxsector -= bio->bi_iter.bi_sector;
- if (likely((bio->bi_iter.bi_size >> 9) <= maxsector))
- return;
-
- bio_truncate(bio, maxsector << 9);
-}
-
static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
enum rw_hint write_hint, struct writeback_control *wbc)
{
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index c3b8e8e..5a478cd 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -1415,9 +1415,13 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
struct inode *inode = file_inode(file);
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
+ struct ceph_osd_client *osdc = &fsc->client->osdc;
struct ceph_cap_flush *prealloc_cf;
ssize_t count, written = 0;
int err, want, got;
+ bool direct_lock = false;
+ u32 map_flags;
+ u64 pool_flags;
loff_t pos;
loff_t limit = max(i_size_read(inode), fsc->max_file_size);
@@ -1428,8 +1432,11 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
if (!prealloc_cf)
return -ENOMEM;
+ if ((iocb->ki_flags & (IOCB_DIRECT | IOCB_APPEND)) == IOCB_DIRECT)
+ direct_lock = true;
+
retry_snap:
- if (iocb->ki_flags & IOCB_DIRECT)
+ if (direct_lock)
ceph_start_io_direct(inode);
else
ceph_start_io_write(inode);
@@ -1477,8 +1484,12 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
goto out;
}
- /* FIXME: not complete since it doesn't account for being at quota */
- if (ceph_osdmap_flag(&fsc->client->osdc, CEPH_OSDMAP_FULL)) {
+ down_read(&osdc->lock);
+ map_flags = osdc->osdmap->flags;
+ pool_flags = ceph_pg_pool_flags(osdc->osdmap, ci->i_layout.pool_id);
+ up_read(&osdc->lock);
+ if ((map_flags & CEPH_OSDMAP_FULL) ||
+ (pool_flags & CEPH_POOL_FLAG_FULL)) {
err = -ENOSPC;
goto out;
}
@@ -1519,14 +1530,15 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
/* we might need to revert back to that point */
data = *from;
- if (iocb->ki_flags & IOCB_DIRECT) {
+ if (iocb->ki_flags & IOCB_DIRECT)
written = ceph_direct_read_write(iocb, &data, snapc,
&prealloc_cf);
- ceph_end_io_direct(inode);
- } else {
+ else
written = ceph_sync_write(iocb, &data, pos, snapc);
+ if (direct_lock)
+ ceph_end_io_direct(inode);
+ else
ceph_end_io_write(inode);
- }
if (written > 0)
iov_iter_advance(from, written);
ceph_put_snap_context(snapc);
@@ -1570,14 +1582,15 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
}
if (written >= 0) {
- if (ceph_osdmap_flag(&fsc->client->osdc, CEPH_OSDMAP_NEARFULL))
+ if ((map_flags & CEPH_OSDMAP_NEARFULL) ||
+ (pool_flags & CEPH_POOL_FLAG_NEARFULL))
iocb->ki_flags |= IOCB_DSYNC;
written = generic_write_sync(iocb, written);
}
goto out_unlocked;
out:
- if (iocb->ki_flags & IOCB_DIRECT)
+ if (direct_lock)
ceph_end_io_direct(inode);
else
ceph_end_io_write(inode);
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index ccfcc66..923be93 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -1155,5 +1155,6 @@ void ceph_cleanup_snapid_map(struct ceph_mds_client *mdsc)
pr_err("snapid map %llx -> %x still in use\n",
sm->snap, sm->dev);
}
+ kfree(sm);
}
}
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index 1d9f083..c7f1506 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -203,6 +203,26 @@ struct ceph_parse_opts_ctx {
};
/*
+ * Remove adjacent slashes and then the trailing slash, unless it is
+ * the only remaining character.
+ *
+ * E.g. "//dir1////dir2///" --> "/dir1/dir2", "///" --> "/".
+ */
+static void canonicalize_path(char *path)
+{
+ int i, j = 0;
+
+ for (i = 0; path[i] != '\0'; i++) {
+ if (path[i] != '/' || j < 1 || path[j - 1] != '/')
+ path[j++] = path[i];
+ }
+
+ if (j > 1 && path[j - 1] == '/')
+ j--;
+ path[j] = '\0';
+}
+
+/*
* Parse the source parameter. Distinguish the server list from the path.
*
* The source will look like:
@@ -224,15 +244,16 @@ static int ceph_parse_source(struct fs_parameter *param, struct fs_context *fc)
dev_name_end = strchr(dev_name, '/');
if (dev_name_end) {
- kfree(fsopt->server_path);
-
/*
* The server_path will include the whole chars from userland
* including the leading '/'.
*/
+ kfree(fsopt->server_path);
fsopt->server_path = kstrdup(dev_name_end, GFP_KERNEL);
if (!fsopt->server_path)
return -ENOMEM;
+
+ canonicalize_path(fsopt->server_path);
} else {
dev_name_end = dev_name + strlen(dev_name);
}
@@ -456,73 +477,6 @@ static int strcmp_null(const char *s1, const char *s2)
return strcmp(s1, s2);
}
-/**
- * path_remove_extra_slash - Remove the extra slashes in the server path
- * @server_path: the server path and could be NULL
- *
- * Return NULL if the path is NULL or only consists of "/", or a string
- * without any extra slashes including the leading slash(es) and the
- * slash(es) at the end of the server path, such as:
- * "//dir1////dir2///" --> "dir1/dir2"
- */
-static char *path_remove_extra_slash(const char *server_path)
-{
- const char *path = server_path;
- const char *cur, *end;
- char *buf, *p;
- int len;
-
- /* if the server path is omitted */
- if (!path)
- return NULL;
-
- /* remove all the leading slashes */
- while (*path == '/')
- path++;
-
- /* if the server path only consists of slashes */
- if (*path == '\0')
- return NULL;
-
- len = strlen(path);
-
- buf = kmalloc(len + 1, GFP_KERNEL);
- if (!buf)
- return ERR_PTR(-ENOMEM);
-
- end = path + len;
- p = buf;
- do {
- cur = strchr(path, '/');
- if (!cur)
- cur = end;
-
- len = cur - path;
-
- /* including one '/' */
- if (cur != end)
- len += 1;
-
- memcpy(p, path, len);
- p += len;
-
- while (cur <= end && *cur == '/')
- cur++;
- path = cur;
- } while (path < end);
-
- *p = '\0';
-
- /*
- * remove the last slash if there has and just to make sure that
- * we will get something like "dir1/dir2"
- */
- if (*(--p) == '/')
- *p = '\0';
-
- return buf;
-}
-
static int compare_mount_options(struct ceph_mount_options *new_fsopt,
struct ceph_options *new_opt,
struct ceph_fs_client *fsc)
@@ -530,7 +484,6 @@ static int compare_mount_options(struct ceph_mount_options *new_fsopt,
struct ceph_mount_options *fsopt1 = new_fsopt;
struct ceph_mount_options *fsopt2 = fsc->mount_options;
int ofs = offsetof(struct ceph_mount_options, snapdir_name);
- char *p1, *p2;
int ret;
ret = memcmp(fsopt1, fsopt2, ofs);
@@ -540,21 +493,12 @@ static int compare_mount_options(struct ceph_mount_options *new_fsopt,
ret = strcmp_null(fsopt1->snapdir_name, fsopt2->snapdir_name);
if (ret)
return ret;
+
ret = strcmp_null(fsopt1->mds_namespace, fsopt2->mds_namespace);
if (ret)
return ret;
- p1 = path_remove_extra_slash(fsopt1->server_path);
- if (IS_ERR(p1))
- return PTR_ERR(p1);
- p2 = path_remove_extra_slash(fsopt2->server_path);
- if (IS_ERR(p2)) {
- kfree(p1);
- return PTR_ERR(p2);
- }
- ret = strcmp_null(p1, p2);
- kfree(p1);
- kfree(p2);
+ ret = strcmp_null(fsopt1->server_path, fsopt2->server_path);
if (ret)
return ret;
@@ -957,7 +901,9 @@ static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc,
mutex_lock(&fsc->client->mount_mutex);
if (!fsc->sb->s_root) {
- const char *path, *p;
+ const char *path = fsc->mount_options->server_path ?
+ fsc->mount_options->server_path + 1 : "";
+
err = __ceph_open_session(fsc->client, started);
if (err < 0)
goto out;
@@ -969,22 +915,11 @@ static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc,
goto out;
}
- p = path_remove_extra_slash(fsc->mount_options->server_path);
- if (IS_ERR(p)) {
- err = PTR_ERR(p);
- goto out;
- }
- /* if the server path is omitted or just consists of '/' */
- if (!p)
- path = "";
- else
- path = p;
dout("mount opening path '%s'\n", path);
ceph_fs_debugfs_init(fsc);
root = open_root_dentry(fsc, path, started);
- kfree(p);
if (IS_ERR(root)) {
err = PTR_ERR(root);
goto out;
@@ -1097,10 +1032,6 @@ static int ceph_get_tree(struct fs_context *fc)
if (!fc->source)
return invalfc(fc, "No source");
-#ifdef CONFIG_CEPH_FS_POSIX_ACL
- fc->sb_flags |= SB_POSIXACL;
-#endif
-
/* create client (which we may/may not use) */
fsc = create_fs_client(pctx->opts, pctx->copts);
pctx->opts = NULL;
@@ -1223,6 +1154,10 @@ static int ceph_init_fs_context(struct fs_context *fc)
fsopt->max_readdir_bytes = CEPH_MAX_READDIR_BYTES_DEFAULT;
fsopt->congestion_kb = default_congestion_kb();
+#ifdef CONFIG_CEPH_FS_POSIX_ACL
+ fc->sb_flags |= SB_POSIXACL;
+#endif
+
fc->fs_private = pctx;
fc->ops = &ceph_context_ops;
return 0;
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 1e456a9..037cdfb 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -91,7 +91,7 @@ struct ceph_mount_options {
char *snapdir_name; /* default ".snap" */
char *mds_namespace; /* default NULL */
- char *server_path; /* default "/" */
+ char *server_path; /* default NULL (means "/") */
char *fscache_uniq; /* default NULL */
};
diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c
index 606f26d..cc3ada1 100644
--- a/fs/cifs/cifs_dfs_ref.c
+++ b/fs/cifs/cifs_dfs_ref.c
@@ -324,6 +324,8 @@ static struct vfsmount *cifs_dfs_do_automount(struct dentry *mntpt)
if (full_path == NULL)
goto cdda_exit;
+ convert_delimiter(full_path, '\\');
+
cifs_dbg(FYI, "%s: full_path: %s\n", __func__, full_path);
if (!cifs_sb_master_tlink(cifs_sb)) {
diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c
index 440828a..716574a 100644
--- a/fs/cifs/cifsacl.c
+++ b/fs/cifs/cifsacl.c
@@ -601,7 +601,7 @@ static void access_flags_to_mode(__le32 ace_flags, int type, umode_t *pmode,
((flags & FILE_EXEC_RIGHTS) == FILE_EXEC_RIGHTS))
*pmode |= (S_IXUGO & (*pbits_to_set));
- cifs_dbg(NOISY, "access flags 0x%x mode now 0x%x\n", flags, *pmode);
+ cifs_dbg(NOISY, "access flags 0x%x mode now %04o\n", flags, *pmode);
return;
}
@@ -630,7 +630,7 @@ static void mode_to_access_flags(umode_t mode, umode_t bits_to_use,
if (mode & S_IXUGO)
*pace_flags |= SET_FILE_EXEC_RIGHTS;
- cifs_dbg(NOISY, "mode: 0x%x, access flags now 0x%x\n",
+ cifs_dbg(NOISY, "mode: %04o, access flags now 0x%x\n",
mode, *pace_flags);
return;
}
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index febab27..fa77fe5 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -414,7 +414,7 @@ cifs_show_security(struct seq_file *s, struct cifs_ses *ses)
seq_puts(s, "ntlm");
break;
case Kerberos:
- seq_printf(s, "krb5,cruid=%u", from_kuid_munged(&init_user_ns,ses->cred_uid));
+ seq_puts(s, "krb5");
break;
case RawNTLMSSP:
seq_puts(s, "ntlmssp");
@@ -427,6 +427,10 @@ cifs_show_security(struct seq_file *s, struct cifs_ses *ses)
if (ses->sign)
seq_puts(s, "i");
+
+ if (ses->sectype == Kerberos)
+ seq_printf(s, ",cruid=%u",
+ from_kuid_munged(&init_user_ns, ses->cred_uid));
}
static void
@@ -526,6 +530,8 @@ cifs_show_options(struct seq_file *s, struct dentry *root)
if (tcon->seal)
seq_puts(s, ",seal");
+ else if (tcon->ses->server->ignore_signature)
+ seq_puts(s, ",signloosely");
if (tcon->nocase)
seq_puts(s, ",nocase");
if (tcon->local_lease)
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index de82cfa..0d95636 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -1281,6 +1281,7 @@ struct cifs_fid {
__u64 volatile_fid; /* volatile file id for smb2 */
__u8 lease_key[SMB2_LEASE_KEY_SIZE]; /* lease key for smb2 */
__u8 create_guid[16];
+ __u32 access;
struct cifs_pending_open *pending_open;
unsigned int epoch;
#ifdef CONFIG_CIFS_DEBUG2
@@ -1741,6 +1742,12 @@ static inline bool is_retryable_error(int error)
return false;
}
+
+/* cifs_get_writable_file() flags */
+#define FIND_WR_ANY 0
+#define FIND_WR_FSUID_ONLY 1
+#define FIND_WR_WITH_DELETE 2
+
#define MID_FREE 0
#define MID_REQUEST_ALLOCATED 1
#define MID_REQUEST_SUBMITTED 2
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index 89eaaf4..e5cb681 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -134,11 +134,12 @@ extern bool backup_cred(struct cifs_sb_info *);
extern bool is_size_safe_to_change(struct cifsInodeInfo *, __u64 eof);
extern void cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
unsigned int bytes_written);
-extern struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *, bool);
+extern struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *, int);
extern int cifs_get_writable_file(struct cifsInodeInfo *cifs_inode,
- bool fsuid_only,
+ int flags,
struct cifsFileInfo **ret_file);
extern int cifs_get_writable_path(struct cifs_tcon *tcon, const char *name,
+ int flags,
struct cifsFileInfo **ret_file);
extern struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *, bool);
extern int cifs_get_readable_path(struct cifs_tcon *tcon, const char *name,
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 3c89569..6f6fb36 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -1492,6 +1492,7 @@ CIFS_open(const unsigned int xid, struct cifs_open_parms *oparms, int *oplock,
*oplock = rsp->OplockLevel;
/* cifs fid stays in le */
oparms->fid->netfid = rsp->Fid;
+ oparms->fid->access = desired_access;
/* Let caller know file was created so we can set the mode. */
/* Do we care about the CreateAction in any other cases? */
@@ -2115,7 +2116,7 @@ cifs_writev_requeue(struct cifs_writedata *wdata)
wdata2->tailsz = tailsz;
wdata2->bytes = cur_len;
- rc = cifs_get_writable_file(CIFS_I(inode), false,
+ rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY,
&wdata2->cfile);
if (!wdata2->cfile) {
cifs_dbg(VFS, "No writable handle to retry writepages rc=%d\n",
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index a941ac7..4804d1d 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -4151,7 +4151,7 @@ int cifs_setup_cifs_sb(struct smb_vol *pvolume_info,
cifs_sb->mnt_gid = pvolume_info->linux_gid;
cifs_sb->mnt_file_mode = pvolume_info->file_mode;
cifs_sb->mnt_dir_mode = pvolume_info->dir_mode;
- cifs_dbg(FYI, "file mode: 0x%hx dir mode: 0x%hx\n",
+ cifs_dbg(FYI, "file mode: %04ho dir mode: %04ho\n",
cifs_sb->mnt_file_mode, cifs_sb->mnt_dir_mode);
cifs_sb->actimeo = pvolume_info->actimeo;
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 0ef0994..36e7b2f 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -555,7 +555,6 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry,
if (server->ops->close)
server->ops->close(xid, tcon, &fid);
cifs_del_pending_open(&open);
- fput(file);
rc = -ENOMEM;
}
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index bc9516a..8f9d849 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -1169,7 +1169,8 @@ cifs_posix_lock_set(struct file *file, struct file_lock *flock)
rc = posix_lock_file(file, flock, NULL);
up_write(&cinode->lock_sem);
if (rc == FILE_LOCK_DEFERRED) {
- rc = wait_event_interruptible(flock->fl_wait, !flock->fl_blocker);
+ rc = wait_event_interruptible(flock->fl_wait,
+ list_empty(&flock->fl_blocked_member));
if (!rc)
goto try_again;
locks_delete_block(flock);
@@ -1958,7 +1959,7 @@ struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
/* Return -EBADF if no handle is found and general rc otherwise */
int
-cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, bool fsuid_only,
+cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, int flags,
struct cifsFileInfo **ret_file)
{
struct cifsFileInfo *open_file, *inv_file = NULL;
@@ -1966,7 +1967,8 @@ cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, bool fsuid_only,
bool any_available = false;
int rc = -EBADF;
unsigned int refind = 0;
-
+ bool fsuid_only = flags & FIND_WR_FSUID_ONLY;
+ bool with_delete = flags & FIND_WR_WITH_DELETE;
*ret_file = NULL;
/*
@@ -1998,6 +2000,8 @@ cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, bool fsuid_only,
continue;
if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
continue;
+ if (with_delete && !(open_file->fid.access & DELETE))
+ continue;
if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
if (!open_file->invalidHandle) {
/* found a good writable file */
@@ -2045,12 +2049,12 @@ cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, bool fsuid_only,
}
struct cifsFileInfo *
-find_writable_file(struct cifsInodeInfo *cifs_inode, bool fsuid_only)
+find_writable_file(struct cifsInodeInfo *cifs_inode, int flags)
{
struct cifsFileInfo *cfile;
int rc;
- rc = cifs_get_writable_file(cifs_inode, fsuid_only, &cfile);
+ rc = cifs_get_writable_file(cifs_inode, flags, &cfile);
if (rc)
cifs_dbg(FYI, "couldn't find writable handle rc=%d", rc);
@@ -2059,6 +2063,7 @@ find_writable_file(struct cifsInodeInfo *cifs_inode, bool fsuid_only)
int
cifs_get_writable_path(struct cifs_tcon *tcon, const char *name,
+ int flags,
struct cifsFileInfo **ret_file)
{
struct list_head *tmp;
@@ -2085,7 +2090,7 @@ cifs_get_writable_path(struct cifs_tcon *tcon, const char *name,
kfree(full_path);
cinode = CIFS_I(d_inode(cfile->dentry));
spin_unlock(&tcon->open_file_lock);
- return cifs_get_writable_file(cinode, 0, ret_file);
+ return cifs_get_writable_file(cinode, flags, ret_file);
}
spin_unlock(&tcon->open_file_lock);
@@ -2162,7 +2167,8 @@ static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
if (mapping->host->i_size - offset < (loff_t)to)
to = (unsigned)(mapping->host->i_size - offset);
- rc = cifs_get_writable_file(CIFS_I(mapping->host), false, &open_file);
+ rc = cifs_get_writable_file(CIFS_I(mapping->host), FIND_WR_ANY,
+ &open_file);
if (!rc) {
bytes_written = cifs_write(open_file, open_file->pid,
write_data, to - from, &offset);
@@ -2355,7 +2361,7 @@ static int cifs_writepages(struct address_space *mapping,
if (cfile)
cifsFileInfo_put(cfile);
- rc = cifs_get_writable_file(CIFS_I(inode), false, &cfile);
+ rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY, &cfile);
/* in case of an error store it to return later */
if (rc)
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 9ba623b..b16f8d2 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -653,8 +653,8 @@ cifs_all_info_to_fattr(struct cifs_fattr *fattr, FILE_ALL_INFO *info,
*/
if ((fattr->cf_nlink < 1) && !tcon->unix_ext &&
!info->DeletePending) {
- cifs_dbg(1, "bogus file nlink value %u\n",
- fattr->cf_nlink);
+ cifs_dbg(VFS, "bogus file nlink value %u\n",
+ fattr->cf_nlink);
fattr->cf_flags |= CIFS_FATTR_UNKNOWN_NLINK;
}
}
@@ -1648,7 +1648,7 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, umode_t mode)
struct TCP_Server_Info *server;
char *full_path;
- cifs_dbg(FYI, "In cifs_mkdir, mode = 0x%hx inode = 0x%p\n",
+ cifs_dbg(FYI, "In cifs_mkdir, mode = %04ho inode = 0x%p\n",
mode, inode);
cifs_sb = CIFS_SB(inode->i_sb);
@@ -2073,6 +2073,7 @@ int cifs_revalidate_dentry_attr(struct dentry *dentry)
struct inode *inode = d_inode(dentry);
struct super_block *sb = dentry->d_sb;
char *full_path = NULL;
+ int count = 0;
if (inode == NULL)
return -ENOENT;
@@ -2094,15 +2095,18 @@ int cifs_revalidate_dentry_attr(struct dentry *dentry)
full_path, inode, inode->i_count.counter,
dentry, cifs_get_time(dentry), jiffies);
+again:
if (cifs_sb_master_tcon(CIFS_SB(sb))->unix_ext)
rc = cifs_get_inode_info_unix(&inode, full_path, sb, xid);
else
rc = cifs_get_inode_info(&inode, full_path, NULL, sb,
xid, NULL);
-
+ if (rc == -EAGAIN && count++ < 10)
+ goto again;
out:
kfree(full_path);
free_xid(xid);
+
return rc;
}
@@ -2187,7 +2191,7 @@ int cifs_getattr(const struct path *path, struct kstat *stat,
if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_GID))
stat->gid = current_fsgid();
}
- return rc;
+ return 0;
}
int cifs_fiemap(struct inode *inode, struct fiemap_extent_info *fei, u64 start,
@@ -2278,7 +2282,7 @@ cifs_set_file_size(struct inode *inode, struct iattr *attrs,
* writebehind data than the SMB timeout for the SetPathInfo
* request would allow
*/
- open_file = find_writable_file(cifsInode, true);
+ open_file = find_writable_file(cifsInode, FIND_WR_FSUID_ONLY);
if (open_file) {
tcon = tlink_tcon(open_file->tlink);
server = tcon->ses->server;
@@ -2428,7 +2432,7 @@ cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs)
args->ctime = NO_CHANGE_64;
args->device = 0;
- open_file = find_writable_file(cifsInode, true);
+ open_file = find_writable_file(cifsInode, FIND_WR_FSUID_ONLY);
if (open_file) {
u16 nfid = open_file->fid.netfid;
u32 npid = open_file->pid;
@@ -2531,7 +2535,7 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs)
rc = 0;
if (attrs->ia_valid & ATTR_MTIME) {
- rc = cifs_get_writable_file(cifsInode, false, &wfile);
+ rc = cifs_get_writable_file(cifsInode, FIND_WR_ANY, &wfile);
if (!rc) {
tcon = tlink_tcon(wfile->tlink);
rc = tcon->ses->server->ops->flush(xid, tcon, &wfile->fid);
diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c
index eb994e3..b130efa 100644
--- a/fs/cifs/smb1ops.c
+++ b/fs/cifs/smb1ops.c
@@ -766,7 +766,7 @@ smb_set_file_info(struct inode *inode, const char *full_path,
struct cifs_tcon *tcon;
/* if the file is already open for write, just use that fileid */
- open_file = find_writable_file(cinode, true);
+ open_file = find_writable_file(cinode, FIND_WR_FSUID_ONLY);
if (open_file) {
fid.netfid = open_file->fid.netfid;
netpid = open_file->pid;
diff --git a/fs/cifs/smb2inode.c b/fs/cifs/smb2inode.c
index 1cf2075..a8c301a 100644
--- a/fs/cifs/smb2inode.c
+++ b/fs/cifs/smb2inode.c
@@ -521,7 +521,7 @@ smb2_mkdir_setinfo(struct inode *inode, const char *name,
cifs_i = CIFS_I(inode);
dosattrs = cifs_i->cifsAttrs | ATTR_READONLY;
data.Attributes = cpu_to_le32(dosattrs);
- cifs_get_writable_path(tcon, name, &cfile);
+ cifs_get_writable_path(tcon, name, FIND_WR_ANY, &cfile);
tmprc = smb2_compound_op(xid, tcon, cifs_sb, name,
FILE_WRITE_ATTRIBUTES, FILE_CREATE,
CREATE_NOT_FILE, ACL_NO_MODE,
@@ -577,7 +577,7 @@ smb2_rename_path(const unsigned int xid, struct cifs_tcon *tcon,
{
struct cifsFileInfo *cfile;
- cifs_get_writable_path(tcon, from_name, &cfile);
+ cifs_get_writable_path(tcon, from_name, FIND_WR_WITH_DELETE, &cfile);
return smb2_set_path_attr(xid, tcon, from_name, to_name,
cifs_sb, DELETE, SMB2_OP_RENAME, cfile);
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index baa825f..cfe9b80 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -1116,7 +1116,8 @@ smb2_set_ea(const unsigned int xid, struct cifs_tcon *tcon,
void *data[1];
struct smb2_file_full_ea_info *ea = NULL;
struct kvec close_iov[1];
- int rc;
+ struct smb2_query_info_rsp *rsp;
+ int rc, used_len = 0;
if (smb3_encryption_required(tcon))
flags |= CIFS_TRANSFORM_REQ;
@@ -1139,6 +1140,38 @@ smb2_set_ea(const unsigned int xid, struct cifs_tcon *tcon,
cifs_sb);
if (rc == -ENODATA)
goto sea_exit;
+ } else {
+ /* If we are adding a attribute we should first check
+ * if there will be enough space available to store
+ * the new EA. If not we should not add it since we
+ * would not be able to even read the EAs back.
+ */
+ rc = smb2_query_info_compound(xid, tcon, utf16_path,
+ FILE_READ_EA,
+ FILE_FULL_EA_INFORMATION,
+ SMB2_O_INFO_FILE,
+ CIFSMaxBufSize -
+ MAX_SMB2_CREATE_RESPONSE_SIZE -
+ MAX_SMB2_CLOSE_RESPONSE_SIZE,
+ &rsp_iov[1], &resp_buftype[1], cifs_sb);
+ if (rc == 0) {
+ rsp = (struct smb2_query_info_rsp *)rsp_iov[1].iov_base;
+ used_len = le32_to_cpu(rsp->OutputBufferLength);
+ }
+ free_rsp_buf(resp_buftype[1], rsp_iov[1].iov_base);
+ resp_buftype[1] = CIFS_NO_BUFFER;
+ memset(&rsp_iov[1], 0, sizeof(rsp_iov[1]));
+ rc = 0;
+
+ /* Use a fudge factor of 256 bytes in case we collide
+ * with a different set_EAs command.
+ */
+ if(CIFSMaxBufSize - MAX_SMB2_CREATE_RESPONSE_SIZE -
+ MAX_SMB2_CLOSE_RESPONSE_SIZE - 256 <
+ used_len + ea_name_len + ea_value_len + 1) {
+ rc = -ENOSPC;
+ goto sea_exit;
+ }
}
}
@@ -1331,6 +1364,7 @@ smb2_set_fid(struct cifsFileInfo *cfile, struct cifs_fid *fid, __u32 oplock)
cfile->fid.persistent_fid = fid->persistent_fid;
cfile->fid.volatile_fid = fid->volatile_fid;
+ cfile->fid.access = fid->access;
#ifdef CONFIG_CIFS_DEBUG2
cfile->fid.mid = fid->mid;
#endif /* CIFS_DEBUG2 */
@@ -2188,6 +2222,8 @@ smb2_query_dir_first(const unsigned int xid, struct cifs_tcon *tcon,
goto qdf_free;
}
+ atomic_inc(&tcon->num_remote_opens);
+
qd_rsp = (struct smb2_query_directory_rsp *)rsp_iov[1].iov_base;
if (qd_rsp->sync_hdr.Status == STATUS_NO_MORE_FILES) {
trace_smb3_query_dir_done(xid, fid->persistent_fid,
@@ -3294,7 +3330,7 @@ static loff_t smb3_llseek(struct file *file, struct cifs_tcon *tcon, loff_t offs
* some servers (Windows2016) will not reflect recent writes in
* QUERY_ALLOCATED_RANGES until SMB2_flush is called.
*/
- wrcfile = find_writable_file(cifsi, false);
+ wrcfile = find_writable_file(cifsi, FIND_WR_ANY);
if (wrcfile) {
filemap_write_and_wait(inode->i_mapping);
smb2_flush_file(xid, tcon, &wrcfile->fid);
@@ -3383,7 +3419,7 @@ static int smb3_fiemap(struct cifs_tcon *tcon,
if (rc)
goto out;
- if (out_data_len < sizeof(struct file_allocated_range_buffer)) {
+ if (out_data_len && out_data_len < sizeof(struct file_allocated_range_buffer)) {
rc = -EINVAL;
goto out;
}
@@ -4795,6 +4831,7 @@ struct smb_version_operations smb21_operations = {
.wp_retry_size = smb2_wp_retry_size,
.dir_needs_close = smb2_dir_needs_close,
.enum_snapshots = smb3_enum_snapshots,
+ .notify = smb3_notify,
.get_dfs_refer = smb2_get_dfs_refer,
.select_sectype = smb2_select_sectype,
#ifdef CONFIG_CIFS_XATTR
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 1234f9c..28c0be5e 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -2771,6 +2771,7 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path,
atomic_inc(&tcon->num_remote_opens);
oparms->fid->persistent_fid = rsp->PersistentFileId;
oparms->fid->volatile_fid = rsp->VolatileFileId;
+ oparms->fid->access = oparms->desired_access;
#ifdef CONFIG_CIFS_DEBUG2
oparms->fid->mid = le64_to_cpu(rsp->sync_hdr.MessageId);
#endif /* CIFS_DEBUG2 */
diff --git a/fs/crypto/keysetup.c b/fs/crypto/keysetup.c
index 65cb09f..08c9f21 100644
--- a/fs/crypto/keysetup.c
+++ b/fs/crypto/keysetup.c
@@ -539,6 +539,15 @@ int fscrypt_drop_inode(struct inode *inode)
mk = ci->ci_master_key->payload.data[0];
/*
+ * With proper, non-racy use of FS_IOC_REMOVE_ENCRYPTION_KEY, all inodes
+ * protected by the key were cleaned by sync_filesystem(). But if
+ * userspace is still using the files, inodes can be dirtied between
+ * then and now. We mustn't lose any writes, so skip dirty inodes here.
+ */
+ if (inode->i_state & I_DIRTY_ALL)
+ return 0;
+
+ /*
* Note: since we aren't holding ->mk_secret_sem, the result here can
* immediately become outdated. But there's no correctness problem with
* unnecessarily evicting. Nor is there a correctness problem with not
diff --git a/fs/dax.c b/fs/dax.c
index 1f1f020..35da144 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -937,12 +937,11 @@ static int dax_writeback_one(struct xa_state *xas, struct dax_device *dax_dev,
* on persistent storage prior to completion of the operation.
*/
int dax_writeback_mapping_range(struct address_space *mapping,
- struct block_device *bdev, struct writeback_control *wbc)
+ struct dax_device *dax_dev, struct writeback_control *wbc)
{
XA_STATE(xas, &mapping->i_pages, wbc->range_start >> PAGE_SHIFT);
struct inode *inode = mapping->host;
pgoff_t end_index = wbc->range_end >> PAGE_SHIFT;
- struct dax_device *dax_dev;
void *entry;
int ret = 0;
unsigned int scanned = 0;
@@ -953,10 +952,6 @@ int dax_writeback_mapping_range(struct address_space *mapping,
if (!mapping->nrexceptional || wbc->sync_mode != WB_SYNC_ALL)
return 0;
- dax_dev = dax_get_by_host(bdev->bd_disk->disk_name);
- if (!dax_dev)
- return -EIO;
-
trace_dax_writeback_range(inode, xas.xa_index, end_index);
tag_pages_for_writeback(mapping, xas.xa_index, end_index);
@@ -977,7 +972,6 @@ int dax_writeback_mapping_range(struct address_space *mapping,
xas_lock_irq(&xas);
}
xas_unlock_irq(&xas);
- put_dax(dax_dev);
trace_dax_writeback_range_done(inode, xas.xa_index, end_index);
return ret;
}
@@ -1207,6 +1201,9 @@ dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter,
lockdep_assert_held(&inode->i_rwsem);
}
+ if (iocb->ki_flags & IOCB_NOWAIT)
+ flags |= IOMAP_NOWAIT;
+
while (iov_iter_count(iter)) {
ret = iomap_apply(inode, pos, iov_iter_count(iter), flags, ops,
iter, dax_iomap_actor);
diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c
index 634b09d..db987b5 100644
--- a/fs/debugfs/file.c
+++ b/fs/debugfs/file.c
@@ -1090,21 +1090,12 @@ static const struct file_operations fops_regset32 = {
* This function creates a file in debugfs with the given name that reports
* the names and values of a set of 32-bit registers. If the @mode variable
* is so set it can be read from. Writing is not supported.
- *
- * This function will return a pointer to a dentry if it succeeds. This
- * pointer must be passed to the debugfs_remove() function when the file is
- * to be removed (no automatic cleanup happens if your module is unloaded,
- * you are responsible here.) If an error occurs, ERR_PTR(-ERROR) will be
- * returned.
- *
- * If debugfs is not enabled in the kernel, the value ERR_PTR(-ENODEV) will
- * be returned.
*/
-struct dentry *debugfs_create_regset32(const char *name, umode_t mode,
- struct dentry *parent,
- struct debugfs_regset32 *regset)
+void debugfs_create_regset32(const char *name, umode_t mode,
+ struct dentry *parent,
+ struct debugfs_regset32 *regset)
{
- return debugfs_create_file(name, mode, parent, regset, &fops_regset32);
+ debugfs_create_file(name, mode, parent, regset, &fops_regset32);
}
EXPORT_SYMBOL_GPL(debugfs_create_regset32);
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index db1ef14..2c449ae 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -311,8 +311,10 @@ static int crypt_scatterlist(struct ecryptfs_crypt_stat *crypt_stat,
struct extent_crypt_result ecr;
int rc = 0;
- BUG_ON(!crypt_stat || !crypt_stat->tfm
- || !(crypt_stat->flags & ECRYPTFS_STRUCT_INITIALIZED));
+ if (!crypt_stat || !crypt_stat->tfm
+ || !(crypt_stat->flags & ECRYPTFS_STRUCT_INITIALIZED))
+ return -EINVAL;
+
if (unlikely(ecryptfs_verbosity > 0)) {
ecryptfs_printk(KERN_DEBUG, "Key size [%zd]; key:\n",
crypt_stat->key_size);
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index 1c1a56b..e6ac78c 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -8,7 +8,7 @@
* Copyright (C) 2004-2008 International Business Machines Corp.
* Author(s): Michael A. Halcrow <mahalcro@us.ibm.com>
* Trevor S. Highland <trevor.highland@gmail.com>
- * Tyler Hicks <tyhicks@ou.edu>
+ * Tyler Hicks <code@tyhicks.com>
*/
#ifndef ECRYPTFS_KERNEL_H
diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c
index 7d326aa..af3eb02 100644
--- a/fs/ecryptfs/keystore.c
+++ b/fs/ecryptfs/keystore.c
@@ -1304,7 +1304,7 @@ parse_tag_1_packet(struct ecryptfs_crypt_stat *crypt_stat,
printk(KERN_WARNING "Tag 1 packet contains key larger "
"than ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES\n");
rc = -EINVAL;
- goto out;
+ goto out_free;
}
memcpy((*new_auth_tok)->session_key.encrypted_key,
&data[(*packet_size)], (body_size - (ECRYPTFS_SIG_SIZE + 2)));
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index b8a7ce3..e63259f 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -7,7 +7,7 @@
* Copyright (C) 2004-2007 International Business Machines Corp.
* Author(s): Michael A. Halcrow <mahalcro@us.ibm.com>
* Michael C. Thompson <mcthomps@us.ibm.com>
- * Tyler Hicks <tyhicks@ou.edu>
+ * Tyler Hicks <code@tyhicks.com>
*/
#include <linux/dcache.h>
diff --git a/fs/ecryptfs/messaging.c b/fs/ecryptfs/messaging.c
index d668e60..8646ba7 100644
--- a/fs/ecryptfs/messaging.c
+++ b/fs/ecryptfs/messaging.c
@@ -4,7 +4,7 @@
*
* Copyright (C) 2004-2008 International Business Machines Corp.
* Author(s): Michael A. Halcrow <mhalcrow@us.ibm.com>
- * Tyler Hicks <tyhicks@ou.edu>
+ * Tyler Hicks <code@tyhicks.com>
*/
#include <linux/sched.h>
#include <linux/slab.h>
@@ -379,6 +379,7 @@ int __init ecryptfs_init_messaging(void)
* ecryptfs_message_buf_len),
GFP_KERNEL);
if (!ecryptfs_msg_ctx_arr) {
+ kfree(ecryptfs_daemon_hash);
rc = -ENOMEM;
goto out;
}
diff --git a/fs/erofs/decompressor.c b/fs/erofs/decompressor.c
index 5779a15..5d2d819 100644
--- a/fs/erofs/decompressor.c
+++ b/fs/erofs/decompressor.c
@@ -157,17 +157,27 @@ static int z_erofs_lz4_decompress(struct z_erofs_decompress_req *rq, u8 *out)
}
}
- ret = LZ4_decompress_safe_partial(src + inputmargin, out,
- inlen, rq->outputsize,
- rq->outputsize);
- if (ret < 0) {
- erofs_err(rq->sb, "failed to decompress, in[%u, %u] out[%u]",
- inlen, inputmargin, rq->outputsize);
+ /* legacy format could compress extra data in a pcluster. */
+ if (rq->partial_decoding || !support_0padding)
+ ret = LZ4_decompress_safe_partial(src + inputmargin, out,
+ inlen, rq->outputsize,
+ rq->outputsize);
+ else
+ ret = LZ4_decompress_safe(src + inputmargin, out,
+ inlen, rq->outputsize);
+
+ if (ret != rq->outputsize) {
+ erofs_err(rq->sb, "failed to decompress %d in[%u, %u] out[%u]",
+ ret, inlen, inputmargin, rq->outputsize);
+
WARN_ON(1);
print_hex_dump(KERN_DEBUG, "[ in]: ", DUMP_PREFIX_OFFSET,
16, 1, src + inputmargin, inlen, true);
print_hex_dump(KERN_DEBUG, "[out]: ", DUMP_PREFIX_OFFSET,
16, 1, out, rq->outputsize, true);
+
+ if (ret >= 0)
+ memset(out + ret, 0, rq->outputsize - ret);
ret = -EIO;
}
diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h
index c4c6dcd..5eead7f 100644
--- a/fs/erofs/internal.h
+++ b/fs/erofs/internal.h
@@ -52,8 +52,8 @@ struct erofs_sb_info {
struct list_head list;
struct mutex umount_mutex;
- /* the dedicated workstation for compression */
- struct radix_tree_root workstn_tree;
+ /* managed XArray arranged in physical block number */
+ struct xarray managed_pslots;
/* threshold for decompression synchronously */
unsigned int max_sync_decompress_pages;
@@ -402,8 +402,8 @@ static inline void *erofs_get_pcpubuf(unsigned int pagenr)
int erofs_workgroup_put(struct erofs_workgroup *grp);
struct erofs_workgroup *erofs_find_workgroup(struct super_block *sb,
pgoff_t index);
-int erofs_register_workgroup(struct super_block *sb,
- struct erofs_workgroup *grp);
+struct erofs_workgroup *erofs_insert_workgroup(struct super_block *sb,
+ struct erofs_workgroup *grp);
void erofs_workgroup_free_rcu(struct erofs_workgroup *grp);
void erofs_shrinker_register(struct super_block *sb);
void erofs_shrinker_unregister(struct super_block *sb);
diff --git a/fs/erofs/super.c b/fs/erofs/super.c
index 057e6d7..b514c67e 100644
--- a/fs/erofs/super.c
+++ b/fs/erofs/super.c
@@ -425,7 +425,7 @@ static int erofs_fill_super(struct super_block *sb, void *data, int silent)
sb->s_flags &= ~SB_POSIXACL;
#ifdef CONFIG_EROFS_FS_ZIP
- INIT_RADIX_TREE(&sbi->workstn_tree, GFP_ATOMIC);
+ xa_init(&sbi->managed_pslots);
#endif
/* get the root inode */
diff --git a/fs/erofs/utils.c b/fs/erofs/utils.c
index fddc505..52d0be1 100644
--- a/fs/erofs/utils.c
+++ b/fs/erofs/utils.c
@@ -37,9 +37,6 @@ void *erofs_get_pcpubuf(unsigned int pagenr)
/* global shrink count (for all mounted EROFS instances) */
static atomic_long_t erofs_global_shrink_cnt;
-#define __erofs_workgroup_get(grp) atomic_inc(&(grp)->refcount)
-#define __erofs_workgroup_put(grp) atomic_dec(&(grp)->refcount)
-
static int erofs_workgroup_get(struct erofs_workgroup *grp)
{
int o;
@@ -66,7 +63,7 @@ struct erofs_workgroup *erofs_find_workgroup(struct super_block *sb,
repeat:
rcu_read_lock();
- grp = radix_tree_lookup(&sbi->workstn_tree, index);
+ grp = xa_load(&sbi->managed_pslots, index);
if (grp) {
if (erofs_workgroup_get(grp)) {
/* prefer to relax rcu read side */
@@ -80,43 +77,37 @@ struct erofs_workgroup *erofs_find_workgroup(struct super_block *sb,
return grp;
}
-int erofs_register_workgroup(struct super_block *sb,
- struct erofs_workgroup *grp)
+struct erofs_workgroup *erofs_insert_workgroup(struct super_block *sb,
+ struct erofs_workgroup *grp)
{
- struct erofs_sb_info *sbi;
- int err;
-
- /* grp shouldn't be broken or used before */
- if (atomic_read(&grp->refcount) != 1) {
- DBG_BUGON(1);
- return -EINVAL;
- }
-
- err = radix_tree_preload(GFP_NOFS);
- if (err)
- return err;
-
- sbi = EROFS_SB(sb);
- xa_lock(&sbi->workstn_tree);
+ struct erofs_sb_info *const sbi = EROFS_SB(sb);
+ struct erofs_workgroup *pre;
/*
- * Bump up reference count before making this workgroup
- * visible to other users in order to avoid potential UAF
- * without serialized by workstn_lock.
+ * Bump up a reference count before making this visible
+ * to others for the XArray in order to avoid potential
+ * UAF without serialized by xa_lock.
*/
- __erofs_workgroup_get(grp);
+ atomic_inc(&grp->refcount);
- err = radix_tree_insert(&sbi->workstn_tree, grp->index, grp);
- if (err)
- /*
- * it's safe to decrease since the workgroup isn't visible
- * and refcount >= 2 (cannot be freezed).
- */
- __erofs_workgroup_put(grp);
-
- xa_unlock(&sbi->workstn_tree);
- radix_tree_preload_end();
- return err;
+repeat:
+ xa_lock(&sbi->managed_pslots);
+ pre = __xa_cmpxchg(&sbi->managed_pslots, grp->index,
+ NULL, grp, GFP_NOFS);
+ if (pre) {
+ if (xa_is_err(pre)) {
+ pre = ERR_PTR(xa_err(pre));
+ } else if (erofs_workgroup_get(pre)) {
+ /* try to legitimize the current in-tree one */
+ xa_unlock(&sbi->managed_pslots);
+ cond_resched();
+ goto repeat;
+ }
+ atomic_dec(&grp->refcount);
+ grp = pre;
+ }
+ xa_unlock(&sbi->managed_pslots);
+ return grp;
}
static void __erofs_workgroup_free(struct erofs_workgroup *grp)
@@ -155,7 +146,7 @@ static bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi,
/*
* Note that all cached pages should be unattached
- * before deleted from the radix tree. Otherwise some
+ * before deleted from the XArray. Otherwise some
* cached pages could be still attached to the orphan
* old workgroup when the new one is available in the tree.
*/
@@ -169,7 +160,7 @@ static bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi,
* however in order to avoid some race conditions, add a
* DBG_BUGON to observe this in advance.
*/
- DBG_BUGON(radix_tree_delete(&sbi->workstn_tree, grp->index) != grp);
+ DBG_BUGON(xa_erase(&sbi->managed_pslots, grp->index) != grp);
/*
* If managed cache is on, last refcount should indicate
@@ -182,22 +173,11 @@ static bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi,
static unsigned long erofs_shrink_workstation(struct erofs_sb_info *sbi,
unsigned long nr_shrink)
{
- pgoff_t first_index = 0;
- void *batch[PAGEVEC_SIZE];
+ struct erofs_workgroup *grp;
unsigned int freed = 0;
+ unsigned long index;
- int i, found;
-repeat:
- xa_lock(&sbi->workstn_tree);
-
- found = radix_tree_gang_lookup(&sbi->workstn_tree,
- batch, first_index, PAGEVEC_SIZE);
-
- for (i = 0; i < found; ++i) {
- struct erofs_workgroup *grp = batch[i];
-
- first_index = grp->index + 1;
-
+ xa_for_each(&sbi->managed_pslots, index, grp) {
/* try to shrink each valid workgroup */
if (!erofs_try_to_release_workgroup(sbi, grp))
continue;
@@ -206,10 +186,6 @@ static unsigned long erofs_shrink_workstation(struct erofs_sb_info *sbi,
if (!--nr_shrink)
break;
}
- xa_unlock(&sbi->workstn_tree);
-
- if (i && nr_shrink)
- goto repeat;
return freed;
}
@@ -286,7 +262,7 @@ static unsigned long erofs_shrink_scan(struct shrinker *shrink,
spin_unlock(&erofs_sb_list_lock);
sbi->shrinker_run_no = run_no;
- freed += erofs_shrink_workstation(sbi, nr);
+ freed += erofs_shrink_workstation(sbi, nr - freed);
spin_lock(&erofs_sb_list_lock);
/* Get the next list element before we move this one */
diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c
index 80e47f0..c4b6c9a 100644
--- a/fs/erofs/zdata.c
+++ b/fs/erofs/zdata.c
@@ -67,16 +67,6 @@ static void z_erofs_pcluster_init_once(void *ptr)
pcl->compressed_pages[i] = NULL;
}
-static void z_erofs_pcluster_init_always(struct z_erofs_pcluster *pcl)
-{
- struct z_erofs_collection *cl = z_erofs_primarycollection(pcl);
-
- atomic_set(&pcl->obj.refcount, 1);
-
- DBG_BUGON(cl->nr_pages);
- DBG_BUGON(cl->vcnt);
-}
-
int __init z_erofs_init_zip_subsystem(void)
{
pcluster_cachep = kmem_cache_create("erofs_compress",
@@ -341,26 +331,19 @@ static int z_erofs_lookup_collection(struct z_erofs_collector *clt,
struct inode *inode,
struct erofs_map_blocks *map)
{
- struct erofs_workgroup *grp;
- struct z_erofs_pcluster *pcl;
+ struct z_erofs_pcluster *pcl = clt->pcl;
struct z_erofs_collection *cl;
unsigned int length;
- grp = erofs_find_workgroup(inode->i_sb, map->m_pa >> PAGE_SHIFT);
- if (!grp)
- return -ENOENT;
-
- pcl = container_of(grp, struct z_erofs_pcluster, obj);
+ /* to avoid unexpected loop formed by corrupted images */
if (clt->owned_head == &pcl->next || pcl == clt->tailpcl) {
DBG_BUGON(1);
- erofs_workgroup_put(grp);
return -EFSCORRUPTED;
}
cl = z_erofs_primarycollection(pcl);
if (cl->pageofs != (map->m_la & ~PAGE_MASK)) {
DBG_BUGON(1);
- erofs_workgroup_put(grp);
return -EFSCORRUPTED;
}
@@ -368,7 +351,6 @@ static int z_erofs_lookup_collection(struct z_erofs_collector *clt,
if (length & Z_EROFS_PCLUSTER_FULL_LENGTH) {
if ((map->m_llen << Z_EROFS_PCLUSTER_LENGTH_BIT) > length) {
DBG_BUGON(1);
- erofs_workgroup_put(grp);
return -EFSCORRUPTED;
}
} else {
@@ -391,7 +373,6 @@ static int z_erofs_lookup_collection(struct z_erofs_collector *clt,
/* clean tailpcl if the current owned_head is Z_EROFS_PCLUSTER_TAIL */
if (clt->owned_head == Z_EROFS_PCLUSTER_TAIL)
clt->tailpcl = NULL;
- clt->pcl = pcl;
clt->cl = cl;
return 0;
}
@@ -402,6 +383,7 @@ static int z_erofs_register_collection(struct z_erofs_collector *clt,
{
struct z_erofs_pcluster *pcl;
struct z_erofs_collection *cl;
+ struct erofs_workgroup *grp;
int err;
/* no available workgroup, let's allocate one */
@@ -409,7 +391,7 @@ static int z_erofs_register_collection(struct z_erofs_collector *clt,
if (!pcl)
return -ENOMEM;
- z_erofs_pcluster_init_always(pcl);
+ atomic_set(&pcl->obj.refcount, 1);
pcl->obj.index = map->m_pa >> PAGE_SHIFT;
pcl->length = (map->m_llen << Z_EROFS_PCLUSTER_LENGTH_BIT) |
@@ -429,19 +411,29 @@ static int z_erofs_register_collection(struct z_erofs_collector *clt,
clt->mode = COLLECT_PRIMARY_FOLLOWED;
cl = z_erofs_primarycollection(pcl);
+
+ /* must be cleaned before freeing to slab */
+ DBG_BUGON(cl->nr_pages);
+ DBG_BUGON(cl->vcnt);
+
cl->pageofs = map->m_la & ~PAGE_MASK;
/*
* lock all primary followed works before visible to others
* and mutex_trylock *never* fails for a new pcluster.
*/
- mutex_trylock(&cl->lock);
+ DBG_BUGON(!mutex_trylock(&cl->lock));
- err = erofs_register_workgroup(inode->i_sb, &pcl->obj);
- if (err) {
- mutex_unlock(&cl->lock);
- kmem_cache_free(pcluster_cachep, pcl);
- return -EAGAIN;
+ grp = erofs_insert_workgroup(inode->i_sb, &pcl->obj);
+ if (IS_ERR(grp)) {
+ err = PTR_ERR(grp);
+ goto err_out;
+ }
+
+ if (grp != &pcl->obj) {
+ clt->pcl = container_of(grp, struct z_erofs_pcluster, obj);
+ err = -EEXIST;
+ goto err_out;
}
/* used to check tail merging loop due to corrupted images */
if (clt->owned_head == Z_EROFS_PCLUSTER_TAIL)
@@ -450,12 +442,18 @@ static int z_erofs_register_collection(struct z_erofs_collector *clt,
clt->pcl = pcl;
clt->cl = cl;
return 0;
+
+err_out:
+ mutex_unlock(&cl->lock);
+ kmem_cache_free(pcluster_cachep, pcl);
+ return err;
}
static int z_erofs_collector_begin(struct z_erofs_collector *clt,
struct inode *inode,
struct erofs_map_blocks *map)
{
+ struct erofs_workgroup *grp;
int ret;
DBG_BUGON(clt->cl);
@@ -469,21 +467,25 @@ static int z_erofs_collector_begin(struct z_erofs_collector *clt,
return -EINVAL;
}
-repeat:
- ret = z_erofs_lookup_collection(clt, inode, map);
- if (ret == -ENOENT) {
+ grp = erofs_find_workgroup(inode->i_sb, map->m_pa >> PAGE_SHIFT);
+ if (grp) {
+ clt->pcl = container_of(grp, struct z_erofs_pcluster, obj);
+ } else {
ret = z_erofs_register_collection(clt, inode, map);
- /* someone registered at the same time, give another try */
- if (ret == -EAGAIN) {
- cond_resched();
- goto repeat;
- }
+ if (!ret)
+ goto out;
+ if (ret != -EEXIST)
+ return ret;
}
- if (ret)
+ ret = z_erofs_lookup_collection(clt, inode, map);
+ if (ret) {
+ erofs_workgroup_put(&clt->pcl->obj);
return ret;
+ }
+out:
z_erofs_pagevec_ctor_init(&clt->vector, Z_EROFS_NR_INLINE_PAGEVECS,
clt->cl->pagevec, clt->cl->vcnt);
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index b041b66..eee3c92a 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -1854,9 +1854,9 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
waiter = true;
init_waitqueue_entry(&wait, current);
- spin_lock_irq(&ep->wq.lock);
+ write_lock_irq(&ep->lock);
__add_wait_queue_exclusive(&ep->wq, &wait);
- spin_unlock_irq(&ep->wq.lock);
+ write_unlock_irq(&ep->lock);
}
for (;;) {
@@ -1904,9 +1904,9 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
goto fetch_events;
if (waiter) {
- spin_lock_irq(&ep->wq.lock);
+ write_lock_irq(&ep->lock);
__remove_wait_queue(&ep->wq, &wait);
- spin_unlock_irq(&ep->wq.lock);
+ write_unlock_irq(&ep->lock);
}
return res;
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 119667e..c885cf7 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -960,8 +960,9 @@ ext2_writepages(struct address_space *mapping, struct writeback_control *wbc)
static int
ext2_dax_writepages(struct address_space *mapping, struct writeback_control *wbc)
{
- return dax_writeback_mapping_range(mapping,
- mapping->host->i_sb->s_bdev, wbc);
+ struct ext2_sb_info *sbi = EXT2_SB(mapping->host->i_sb);
+
+ return dax_writeback_mapping_range(mapping, sbi->s_daxdev, wbc);
}
const struct address_space_operations ext2_aops = {
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 5f993a4..8fd0b3c 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -270,6 +270,7 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block *sb,
ext4_group_t ngroups = ext4_get_groups_count(sb);
struct ext4_group_desc *desc;
struct ext4_sb_info *sbi = EXT4_SB(sb);
+ struct buffer_head *bh_p;
if (block_group >= ngroups) {
ext4_error(sb, "block_group >= groups_count - block_group = %u,"
@@ -280,7 +281,14 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block *sb,
group_desc = block_group >> EXT4_DESC_PER_BLOCK_BITS(sb);
offset = block_group & (EXT4_DESC_PER_BLOCK(sb) - 1);
- if (!sbi->s_group_desc[group_desc]) {
+ bh_p = sbi_array_rcu_deref(sbi, s_group_desc, group_desc);
+ /*
+ * sbi_array_rcu_deref returns with rcu unlocked, this is ok since
+ * the pointer being dereferenced won't be dereferenced again. By
+ * looking at the usage in add_new_gdb() the value isn't modified,
+ * just the pointer, and so it remains valid.
+ */
+ if (!bh_p) {
ext4_error(sb, "Group descriptor not loaded - "
"block_group = %u, group_desc = %u, desc = %u",
block_group, group_desc, offset);
@@ -288,10 +296,10 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block *sb,
}
desc = (struct ext4_group_desc *)(
- (__u8 *)sbi->s_group_desc[group_desc]->b_data +
+ (__u8 *)bh_p->b_data +
offset * EXT4_DESC_SIZE(sb));
if (bh)
- *bh = sbi->s_group_desc[group_desc];
+ *bh = bh_p;
return desc;
}
diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c
index 1ee04e7..0a734ff 100644
--- a/fs/ext4/block_validity.c
+++ b/fs/ext4/block_validity.c
@@ -207,6 +207,7 @@ static int ext4_protect_reserved_inode(struct super_block *sb,
return PTR_ERR(inode);
num = (inode->i_size + sb->s_blocksize - 1) >> sb->s_blocksize_bits;
while (i < num) {
+ cond_resched();
map.m_lblk = i;
map.m_len = num - i;
n = ext4_map_blocks(NULL, inode, &map, 0);
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index 1f34074..9aa1f75 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -129,12 +129,14 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)
if (err != ERR_BAD_DX_DIR) {
return err;
}
- /*
- * We don't set the inode dirty flag since it's not
- * critical that it get flushed back to the disk.
- */
- ext4_clear_inode_flag(file_inode(file),
- EXT4_INODE_INDEX);
+ /* Can we just clear INDEX flag to ignore htree information? */
+ if (!ext4_has_metadata_csum(sb)) {
+ /*
+ * We don't set the inode dirty flag since it's not
+ * critical that it gets flushed back to the disk.
+ */
+ ext4_clear_inode_flag(inode, EXT4_INODE_INDEX);
+ }
}
if (ext4_has_inline_data(inode)) {
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 9a2ee24..61b37a0 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1400,7 +1400,7 @@ struct ext4_sb_info {
loff_t s_bitmap_maxbytes; /* max bytes for bitmap files */
struct buffer_head * s_sbh; /* Buffer containing the super block */
struct ext4_super_block *s_es; /* Pointer to the super block in the buffer */
- struct buffer_head **s_group_desc;
+ struct buffer_head * __rcu *s_group_desc;
unsigned int s_mount_opt;
unsigned int s_mount_opt2;
unsigned int s_mount_flags;
@@ -1462,7 +1462,7 @@ struct ext4_sb_info {
#endif
/* for buddy allocator */
- struct ext4_group_info ***s_group_info;
+ struct ext4_group_info ** __rcu *s_group_info;
struct inode *s_buddy_cache;
spinlock_t s_md_lock;
unsigned short *s_mb_offsets;
@@ -1512,7 +1512,7 @@ struct ext4_sb_info {
unsigned int s_extent_max_zeroout_kb;
unsigned int s_log_groups_per_flex;
- struct flex_groups *s_flex_groups;
+ struct flex_groups * __rcu *s_flex_groups;
ext4_group_t s_flex_groups_allocated;
/* workqueue for reserved extent conversions (buffered io) */
@@ -1552,8 +1552,11 @@ struct ext4_sb_info {
struct ratelimit_state s_warning_ratelimit_state;
struct ratelimit_state s_msg_ratelimit_state;
- /* Barrier between changing inodes' journal flags and writepages ops. */
- struct percpu_rw_semaphore s_journal_flag_rwsem;
+ /*
+ * Barrier between writepages ops and changing any inode's JOURNAL_DATA
+ * or EXTENTS flag.
+ */
+ struct percpu_rw_semaphore s_writepages_rwsem;
struct dax_device *s_daxdev;
#ifdef CONFIG_EXT4_DEBUG
unsigned long s_simulate_fail;
@@ -1577,6 +1580,23 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
}
/*
+ * Returns: sbi->field[index]
+ * Used to access an array element from the following sbi fields which require
+ * rcu protection to avoid dereferencing an invalid pointer due to reassignment
+ * - s_group_desc
+ * - s_group_info
+ * - s_flex_group
+ */
+#define sbi_array_rcu_deref(sbi, field, index) \
+({ \
+ typeof(*((sbi)->field)) _v; \
+ rcu_read_lock(); \
+ _v = ((typeof(_v)*)rcu_dereference((sbi)->field))[index]; \
+ rcu_read_unlock(); \
+ _v; \
+})
+
+/*
* Simulate_fail codes
*/
#define EXT4_SIM_BBITMAP_EIO 1
@@ -2544,8 +2564,11 @@ void ext4_insert_dentry(struct inode *inode,
struct ext4_filename *fname);
static inline void ext4_update_dx_flag(struct inode *inode)
{
- if (!ext4_has_feature_dir_index(inode->i_sb))
+ if (!ext4_has_feature_dir_index(inode->i_sb)) {
+ /* ext4_iget() should have caught this... */
+ WARN_ON_ONCE(ext4_has_feature_metadata_csum(inode->i_sb));
ext4_clear_inode_flag(inode, EXT4_INODE_INDEX);
+ }
}
static const unsigned char ext4_filetype_table[] = {
DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
@@ -2727,6 +2750,7 @@ extern int ext4_generic_delete_entry(handle_t *handle,
extern bool ext4_empty_dir(struct inode *inode);
/* resize.c */
+extern void ext4_kvfree_array_rcu(void *to_free);
extern int ext4_group_add(struct super_block *sb,
struct ext4_new_group_data *input);
extern int ext4_group_extend(struct super_block *sb,
@@ -2973,13 +2997,13 @@ static inline
struct ext4_group_info *ext4_get_group_info(struct super_block *sb,
ext4_group_t group)
{
- struct ext4_group_info ***grp_info;
+ struct ext4_group_info **grp_info;
long indexv, indexh;
BUG_ON(group >= EXT4_SB(sb)->s_groups_count);
- grp_info = EXT4_SB(sb)->s_group_info;
indexv = group >> (EXT4_DESC_PER_BLOCK_BITS(sb));
indexh = group & ((EXT4_DESC_PER_BLOCK(sb)) - 1);
- return grp_info[indexv][indexh];
+ grp_info = sbi_array_rcu_deref(EXT4_SB(sb), s_group_info, indexv);
+ return grp_info[indexh];
}
/*
@@ -3029,7 +3053,7 @@ static inline void ext4_update_i_disksize(struct inode *inode, loff_t newsize)
!inode_is_locked(inode));
down_write(&EXT4_I(inode)->i_data_sem);
if (newsize > EXT4_I(inode)->i_disksize)
- EXT4_I(inode)->i_disksize = newsize;
+ WRITE_ONCE(EXT4_I(inode)->i_disksize, newsize);
up_write(&EXT4_I(inode)->i_data_sem);
}
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index c66e8f9..f95ee99 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -328,11 +328,13 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
percpu_counter_inc(&sbi->s_freeinodes_counter);
if (sbi->s_log_groups_per_flex) {
- ext4_group_t f = ext4_flex_group(sbi, block_group);
+ struct flex_groups *fg;
- atomic_inc(&sbi->s_flex_groups[f].free_inodes);
+ fg = sbi_array_rcu_deref(sbi, s_flex_groups,
+ ext4_flex_group(sbi, block_group));
+ atomic_inc(&fg->free_inodes);
if (is_directory)
- atomic_dec(&sbi->s_flex_groups[f].used_dirs);
+ atomic_dec(&fg->used_dirs);
}
BUFFER_TRACE(bh2, "call ext4_handle_dirty_metadata");
fatal = ext4_handle_dirty_metadata(handle, NULL, bh2);
@@ -368,12 +370,13 @@ static void get_orlov_stats(struct super_block *sb, ext4_group_t g,
int flex_size, struct orlov_stats *stats)
{
struct ext4_group_desc *desc;
- struct flex_groups *flex_group = EXT4_SB(sb)->s_flex_groups;
if (flex_size > 1) {
- stats->free_inodes = atomic_read(&flex_group[g].free_inodes);
- stats->free_clusters = atomic64_read(&flex_group[g].free_clusters);
- stats->used_dirs = atomic_read(&flex_group[g].used_dirs);
+ struct flex_groups *fg = sbi_array_rcu_deref(EXT4_SB(sb),
+ s_flex_groups, g);
+ stats->free_inodes = atomic_read(&fg->free_inodes);
+ stats->free_clusters = atomic64_read(&fg->free_clusters);
+ stats->used_dirs = atomic_read(&fg->used_dirs);
return;
}
@@ -1054,7 +1057,8 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir,
if (sbi->s_log_groups_per_flex) {
ext4_group_t f = ext4_flex_group(sbi, group);
- atomic_inc(&sbi->s_flex_groups[f].used_dirs);
+ atomic_inc(&sbi_array_rcu_deref(sbi, s_flex_groups,
+ f)->used_dirs);
}
}
if (ext4_has_group_desc_csum(sb)) {
@@ -1077,7 +1081,8 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir,
if (sbi->s_log_groups_per_flex) {
flex_group = ext4_flex_group(sbi, group);
- atomic_dec(&sbi->s_flex_groups[flex_group].free_inodes);
+ atomic_dec(&sbi_array_rcu_deref(sbi, s_flex_groups,
+ flex_group)->free_inodes);
}
inode->i_ino = ino + group * EXT4_INODES_PER_GROUP(sb);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 3313168..fa0ff78 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -2465,7 +2465,7 @@ static int mpage_map_and_submit_extent(handle_t *handle,
* truncate are avoided by checking i_size under i_data_sem.
*/
disksize = ((loff_t)mpd->first_page) << PAGE_SHIFT;
- if (disksize > EXT4_I(inode)->i_disksize) {
+ if (disksize > READ_ONCE(EXT4_I(inode)->i_disksize)) {
int err2;
loff_t i_size;
@@ -2628,7 +2628,7 @@ static int ext4_writepages(struct address_space *mapping,
if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
return -EIO;
- percpu_down_read(&sbi->s_journal_flag_rwsem);
+ percpu_down_read(&sbi->s_writepages_rwsem);
trace_ext4_writepages(inode, wbc);
/*
@@ -2849,7 +2849,7 @@ static int ext4_writepages(struct address_space *mapping,
out_writepages:
trace_ext4_writepages_result(inode, wbc, ret,
nr_to_write - wbc->nr_to_write);
- percpu_up_read(&sbi->s_journal_flag_rwsem);
+ percpu_up_read(&sbi->s_writepages_rwsem);
return ret;
}
@@ -2864,13 +2864,13 @@ static int ext4_dax_writepages(struct address_space *mapping,
if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
return -EIO;
- percpu_down_read(&sbi->s_journal_flag_rwsem);
+ percpu_down_read(&sbi->s_writepages_rwsem);
trace_ext4_writepages(inode, wbc);
- ret = dax_writeback_mapping_range(mapping, inode->i_sb->s_bdev, wbc);
+ ret = dax_writeback_mapping_range(mapping, sbi->s_daxdev, wbc);
trace_ext4_writepages_result(inode, wbc, ret,
nr_to_write - wbc->nr_to_write);
- percpu_up_read(&sbi->s_journal_flag_rwsem);
+ percpu_up_read(&sbi->s_writepages_rwsem);
return ret;
}
@@ -4644,6 +4644,18 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
ret = -EFSCORRUPTED;
goto bad_inode;
}
+ /*
+ * If dir_index is not enabled but there's dir with INDEX flag set,
+ * we'd normally treat htree data as empty space. But with metadata
+ * checksumming that corrupts checksums so forbid that.
+ */
+ if (!ext4_has_feature_dir_index(sb) && ext4_has_metadata_csum(sb) &&
+ ext4_test_inode_flag(inode, EXT4_INODE_INDEX)) {
+ ext4_error_inode(inode, function, line, 0,
+ "iget: Dir with htree data on filesystem without dir_index feature.");
+ ret = -EFSCORRUPTED;
+ goto bad_inode;
+ }
ei->i_disksize = inode->i_size;
#ifdef CONFIG_QUOTA
ei->i_reserved_quota = 0;
@@ -5849,7 +5861,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
}
}
- percpu_down_write(&sbi->s_journal_flag_rwsem);
+ percpu_down_write(&sbi->s_writepages_rwsem);
jbd2_journal_lock_updates(journal);
/*
@@ -5866,7 +5878,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
err = jbd2_journal_flush(journal);
if (err < 0) {
jbd2_journal_unlock_updates(journal);
- percpu_up_write(&sbi->s_journal_flag_rwsem);
+ percpu_up_write(&sbi->s_writepages_rwsem);
return err;
}
ext4_clear_inode_flag(inode, EXT4_INODE_JOURNAL_DATA);
@@ -5874,7 +5886,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
ext4_set_aops(inode);
jbd2_journal_unlock_updates(journal);
- percpu_up_write(&sbi->s_journal_flag_rwsem);
+ percpu_up_write(&sbi->s_writepages_rwsem);
if (val)
up_write(&EXT4_I(inode)->i_mmap_sem);
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index f648381..51a78eb 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -2356,7 +2356,7 @@ int ext4_mb_alloc_groupinfo(struct super_block *sb, ext4_group_t ngroups)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
unsigned size;
- struct ext4_group_info ***new_groupinfo;
+ struct ext4_group_info ***old_groupinfo, ***new_groupinfo;
size = (ngroups + EXT4_DESC_PER_BLOCK(sb) - 1) >>
EXT4_DESC_PER_BLOCK_BITS(sb);
@@ -2369,13 +2369,16 @@ int ext4_mb_alloc_groupinfo(struct super_block *sb, ext4_group_t ngroups)
ext4_msg(sb, KERN_ERR, "can't allocate buddy meta group");
return -ENOMEM;
}
- if (sbi->s_group_info) {
- memcpy(new_groupinfo, sbi->s_group_info,
+ rcu_read_lock();
+ old_groupinfo = rcu_dereference(sbi->s_group_info);
+ if (old_groupinfo)
+ memcpy(new_groupinfo, old_groupinfo,
sbi->s_group_info_size * sizeof(*sbi->s_group_info));
- kvfree(sbi->s_group_info);
- }
- sbi->s_group_info = new_groupinfo;
+ rcu_read_unlock();
+ rcu_assign_pointer(sbi->s_group_info, new_groupinfo);
sbi->s_group_info_size = size / sizeof(*sbi->s_group_info);
+ if (old_groupinfo)
+ ext4_kvfree_array_rcu(old_groupinfo);
ext4_debug("allocated s_groupinfo array for %d meta_bg's\n",
sbi->s_group_info_size);
return 0;
@@ -2387,6 +2390,7 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
{
int i;
int metalen = 0;
+ int idx = group >> EXT4_DESC_PER_BLOCK_BITS(sb);
struct ext4_sb_info *sbi = EXT4_SB(sb);
struct ext4_group_info **meta_group_info;
struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits);
@@ -2405,12 +2409,12 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
"for a buddy group");
goto exit_meta_group_info;
}
- sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)] =
- meta_group_info;
+ rcu_read_lock();
+ rcu_dereference(sbi->s_group_info)[idx] = meta_group_info;
+ rcu_read_unlock();
}
- meta_group_info =
- sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)];
+ meta_group_info = sbi_array_rcu_deref(sbi, s_group_info, idx);
i = group & (EXT4_DESC_PER_BLOCK(sb) - 1);
meta_group_info[i] = kmem_cache_zalloc(cachep, GFP_NOFS);
@@ -2458,8 +2462,13 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
exit_group_info:
/* If a meta_group_info table has been allocated, release it now */
if (group % EXT4_DESC_PER_BLOCK(sb) == 0) {
- kfree(sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)]);
- sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)] = NULL;
+ struct ext4_group_info ***group_info;
+
+ rcu_read_lock();
+ group_info = rcu_dereference(sbi->s_group_info);
+ kfree(group_info[idx]);
+ group_info[idx] = NULL;
+ rcu_read_unlock();
}
exit_meta_group_info:
return -ENOMEM;
@@ -2472,6 +2481,7 @@ static int ext4_mb_init_backend(struct super_block *sb)
struct ext4_sb_info *sbi = EXT4_SB(sb);
int err;
struct ext4_group_desc *desc;
+ struct ext4_group_info ***group_info;
struct kmem_cache *cachep;
err = ext4_mb_alloc_groupinfo(sb, ngroups);
@@ -2507,11 +2517,16 @@ static int ext4_mb_init_backend(struct super_block *sb)
while (i-- > 0)
kmem_cache_free(cachep, ext4_get_group_info(sb, i));
i = sbi->s_group_info_size;
+ rcu_read_lock();
+ group_info = rcu_dereference(sbi->s_group_info);
while (i-- > 0)
- kfree(sbi->s_group_info[i]);
+ kfree(group_info[i]);
+ rcu_read_unlock();
iput(sbi->s_buddy_cache);
err_freesgi:
- kvfree(sbi->s_group_info);
+ rcu_read_lock();
+ kvfree(rcu_dereference(sbi->s_group_info));
+ rcu_read_unlock();
return -ENOMEM;
}
@@ -2700,7 +2715,7 @@ int ext4_mb_release(struct super_block *sb)
ext4_group_t ngroups = ext4_get_groups_count(sb);
ext4_group_t i;
int num_meta_group_infos;
- struct ext4_group_info *grinfo;
+ struct ext4_group_info *grinfo, ***group_info;
struct ext4_sb_info *sbi = EXT4_SB(sb);
struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits);
@@ -2719,9 +2734,12 @@ int ext4_mb_release(struct super_block *sb)
num_meta_group_infos = (ngroups +
EXT4_DESC_PER_BLOCK(sb) - 1) >>
EXT4_DESC_PER_BLOCK_BITS(sb);
+ rcu_read_lock();
+ group_info = rcu_dereference(sbi->s_group_info);
for (i = 0; i < num_meta_group_infos; i++)
- kfree(sbi->s_group_info[i]);
- kvfree(sbi->s_group_info);
+ kfree(group_info[i]);
+ kvfree(group_info);
+ rcu_read_unlock();
}
kfree(sbi->s_mb_offsets);
kfree(sbi->s_mb_maxs);
@@ -3020,7 +3038,8 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
ext4_group_t flex_group = ext4_flex_group(sbi,
ac->ac_b_ex.fe_group);
atomic64_sub(ac->ac_b_ex.fe_len,
- &sbi->s_flex_groups[flex_group].free_clusters);
+ &sbi_array_rcu_deref(sbi, s_flex_groups,
+ flex_group)->free_clusters);
}
err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
@@ -4918,7 +4937,8 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
if (sbi->s_log_groups_per_flex) {
ext4_group_t flex_group = ext4_flex_group(sbi, block_group);
atomic64_add(count_clusters,
- &sbi->s_flex_groups[flex_group].free_clusters);
+ &sbi_array_rcu_deref(sbi, s_flex_groups,
+ flex_group)->free_clusters);
}
/*
@@ -5075,7 +5095,8 @@ int ext4_group_add_blocks(handle_t *handle, struct super_block *sb,
if (sbi->s_log_groups_per_flex) {
ext4_group_t flex_group = ext4_flex_group(sbi, block_group);
atomic64_add(clusters_freed,
- &sbi->s_flex_groups[flex_group].free_clusters);
+ &sbi_array_rcu_deref(sbi, s_flex_groups,
+ flex_group)->free_clusters);
}
ext4_mb_unload_buddy(&e4b);
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
index 89725fa..fb6520f 100644
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -407,6 +407,7 @@ static int free_ext_block(handle_t *handle, struct inode *inode)
int ext4_ext_migrate(struct inode *inode)
{
+ struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
handle_t *handle;
int retval = 0, i;
__le32 *i_data;
@@ -431,6 +432,8 @@ int ext4_ext_migrate(struct inode *inode)
*/
return retval;
+ percpu_down_write(&sbi->s_writepages_rwsem);
+
/*
* Worst case we can touch the allocation bitmaps, a bgd
* block, and a block to link in the orphan list. We do need
@@ -441,7 +444,7 @@ int ext4_ext_migrate(struct inode *inode)
if (IS_ERR(handle)) {
retval = PTR_ERR(handle);
- return retval;
+ goto out_unlock;
}
goal = (((inode->i_ino - 1) / EXT4_INODES_PER_GROUP(inode->i_sb)) *
EXT4_INODES_PER_GROUP(inode->i_sb)) + 1;
@@ -452,7 +455,7 @@ int ext4_ext_migrate(struct inode *inode)
if (IS_ERR(tmp_inode)) {
retval = PTR_ERR(tmp_inode);
ext4_journal_stop(handle);
- return retval;
+ goto out_unlock;
}
i_size_write(tmp_inode, i_size_read(inode));
/*
@@ -494,7 +497,7 @@ int ext4_ext_migrate(struct inode *inode)
*/
ext4_orphan_del(NULL, tmp_inode);
retval = PTR_ERR(handle);
- goto out;
+ goto out_tmp_inode;
}
ei = EXT4_I(inode);
@@ -576,10 +579,11 @@ int ext4_ext_migrate(struct inode *inode)
ext4_ext_tree_init(handle, tmp_inode);
out_stop:
ext4_journal_stop(handle);
-out:
+out_tmp_inode:
unlock_new_inode(tmp_inode);
iput(tmp_inode);
-
+out_unlock:
+ percpu_up_write(&sbi->s_writepages_rwsem);
return retval;
}
@@ -589,7 +593,8 @@ int ext4_ext_migrate(struct inode *inode)
int ext4_ind_migrate(struct inode *inode)
{
struct ext4_extent_header *eh;
- struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es;
+ struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+ struct ext4_super_block *es = sbi->s_es;
struct ext4_inode_info *ei = EXT4_I(inode);
struct ext4_extent *ex;
unsigned int i, len;
@@ -613,9 +618,13 @@ int ext4_ind_migrate(struct inode *inode)
if (test_opt(inode->i_sb, DELALLOC))
ext4_alloc_da_blocks(inode);
+ percpu_down_write(&sbi->s_writepages_rwsem);
+
handle = ext4_journal_start(inode, EXT4_HT_MIGRATE, 1);
- if (IS_ERR(handle))
- return PTR_ERR(handle);
+ if (IS_ERR(handle)) {
+ ret = PTR_ERR(handle);
+ goto out_unlock;
+ }
down_write(&EXT4_I(inode)->i_data_sem);
ret = ext4_ext_check_inode(inode);
@@ -650,5 +659,7 @@ int ext4_ind_migrate(struct inode *inode)
errout:
ext4_journal_stop(handle);
up_write(&EXT4_I(inode)->i_data_sem);
+out_unlock:
+ percpu_up_write(&sbi->s_writepages_rwsem);
return ret;
}
diff --git a/fs/ext4/mmp.c b/fs/ext4/mmp.c
index 1c44b1a..87f7551 100644
--- a/fs/ext4/mmp.c
+++ b/fs/ext4/mmp.c
@@ -120,10 +120,10 @@ void __dump_mmp_msg(struct super_block *sb, struct mmp_struct *mmp,
{
__ext4_warning(sb, function, line, "%s", msg);
__ext4_warning(sb, function, line,
- "MMP failure info: last update time: %llu, last update "
- "node: %s, last update device: %s",
- (long long unsigned int) le64_to_cpu(mmp->mmp_time),
- mmp->mmp_nodename, mmp->mmp_bdevname);
+ "MMP failure info: last update time: %llu, last update node: %.*s, last update device: %.*s",
+ (unsigned long long)le64_to_cpu(mmp->mmp_time),
+ (int)sizeof(mmp->mmp_nodename), mmp->mmp_nodename,
+ (int)sizeof(mmp->mmp_bdevname), mmp->mmp_bdevname);
}
/*
@@ -154,6 +154,7 @@ static int kmmpd(void *data)
mmp_check_interval = max(EXT4_MMP_CHECK_MULT * mmp_update_interval,
EXT4_MMP_MIN_CHECK_INTERVAL);
mmp->mmp_check_interval = cpu_to_le16(mmp_check_interval);
+ BUILD_BUG_ON(sizeof(mmp->mmp_bdevname) < BDEVNAME_SIZE);
bdevname(bh->b_bdev, mmp->mmp_bdevname);
memcpy(mmp->mmp_nodename, init_utsname()->nodename,
@@ -379,7 +380,8 @@ int ext4_multi_mount_protect(struct super_block *sb,
/*
* Start a kernel thread to update the MMP block periodically.
*/
- EXT4_SB(sb)->s_mmp_tsk = kthread_run(kmmpd, mmpd_data, "kmmpd-%s",
+ EXT4_SB(sb)->s_mmp_tsk = kthread_run(kmmpd, mmpd_data, "kmmpd-%.*s",
+ (int)sizeof(mmp->mmp_bdevname),
bdevname(bh->b_bdev,
mmp->mmp_bdevname));
if (IS_ERR(EXT4_SB(sb)->s_mmp_tsk)) {
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 129d2eb..b05ea72 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1511,6 +1511,7 @@ static struct buffer_head *__ext4_find_entry(struct inode *dir,
/*
* We deal with the read-ahead logic here.
*/
+ cond_resched();
if (ra_ptr >= ra_max) {
/* Refill the readahead buffer */
ra_ptr = 0;
@@ -2213,6 +2214,13 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
retval = ext4_dx_add_entry(handle, &fname, dir, inode);
if (!retval || (retval != ERR_BAD_DX_DIR))
goto out;
+ /* Can we just ignore htree data? */
+ if (ext4_has_metadata_csum(sb)) {
+ EXT4_ERROR_INODE(dir,
+ "Directory has corrupted htree index.");
+ retval = -EFSCORRUPTED;
+ goto out;
+ }
ext4_clear_inode_flag(dir, EXT4_INODE_INDEX);
dx_fallback++;
ext4_mark_inode_dirty(handle, dir);
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index 86a2500..a50b512 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -17,6 +17,33 @@
#include "ext4_jbd2.h"
+struct ext4_rcu_ptr {
+ struct rcu_head rcu;
+ void *ptr;
+};
+
+static void ext4_rcu_ptr_callback(struct rcu_head *head)
+{
+ struct ext4_rcu_ptr *ptr;
+
+ ptr = container_of(head, struct ext4_rcu_ptr, rcu);
+ kvfree(ptr->ptr);
+ kfree(ptr);
+}
+
+void ext4_kvfree_array_rcu(void *to_free)
+{
+ struct ext4_rcu_ptr *ptr = kzalloc(sizeof(*ptr), GFP_KERNEL);
+
+ if (ptr) {
+ ptr->ptr = to_free;
+ call_rcu(&ptr->rcu, ext4_rcu_ptr_callback);
+ return;
+ }
+ synchronize_rcu();
+ kvfree(to_free);
+}
+
int ext4_resize_begin(struct super_block *sb)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
@@ -542,8 +569,8 @@ static int setup_new_flex_group_blocks(struct super_block *sb,
brelse(gdb);
goto out;
}
- memcpy(gdb->b_data, sbi->s_group_desc[j]->b_data,
- gdb->b_size);
+ memcpy(gdb->b_data, sbi_array_rcu_deref(sbi,
+ s_group_desc, j)->b_data, gdb->b_size);
set_buffer_uptodate(gdb);
err = ext4_handle_dirty_metadata(handle, NULL, gdb);
@@ -860,13 +887,15 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
}
brelse(dind);
- o_group_desc = EXT4_SB(sb)->s_group_desc;
+ rcu_read_lock();
+ o_group_desc = rcu_dereference(EXT4_SB(sb)->s_group_desc);
memcpy(n_group_desc, o_group_desc,
EXT4_SB(sb)->s_gdb_count * sizeof(struct buffer_head *));
+ rcu_read_unlock();
n_group_desc[gdb_num] = gdb_bh;
- EXT4_SB(sb)->s_group_desc = n_group_desc;
+ rcu_assign_pointer(EXT4_SB(sb)->s_group_desc, n_group_desc);
EXT4_SB(sb)->s_gdb_count++;
- kvfree(o_group_desc);
+ ext4_kvfree_array_rcu(o_group_desc);
le16_add_cpu(&es->s_reserved_gdt_blocks, -1);
err = ext4_handle_dirty_super(handle, sb);
@@ -909,9 +938,11 @@ static int add_new_gdb_meta_bg(struct super_block *sb,
return err;
}
- o_group_desc = EXT4_SB(sb)->s_group_desc;
+ rcu_read_lock();
+ o_group_desc = rcu_dereference(EXT4_SB(sb)->s_group_desc);
memcpy(n_group_desc, o_group_desc,
EXT4_SB(sb)->s_gdb_count * sizeof(struct buffer_head *));
+ rcu_read_unlock();
n_group_desc[gdb_num] = gdb_bh;
BUFFER_TRACE(gdb_bh, "get_write_access");
@@ -922,9 +953,9 @@ static int add_new_gdb_meta_bg(struct super_block *sb,
return err;
}
- EXT4_SB(sb)->s_group_desc = n_group_desc;
+ rcu_assign_pointer(EXT4_SB(sb)->s_group_desc, n_group_desc);
EXT4_SB(sb)->s_gdb_count++;
- kvfree(o_group_desc);
+ ext4_kvfree_array_rcu(o_group_desc);
return err;
}
@@ -1188,7 +1219,8 @@ static int ext4_add_new_descs(handle_t *handle, struct super_block *sb,
* use non-sparse filesystems anymore. This is already checked above.
*/
if (gdb_off) {
- gdb_bh = sbi->s_group_desc[gdb_num];
+ gdb_bh = sbi_array_rcu_deref(sbi, s_group_desc,
+ gdb_num);
BUFFER_TRACE(gdb_bh, "get_write_access");
err = ext4_journal_get_write_access(handle, gdb_bh);
@@ -1270,7 +1302,7 @@ static int ext4_setup_new_descs(handle_t *handle, struct super_block *sb,
/*
* get_write_access() has been called on gdb_bh by ext4_add_new_desc().
*/
- gdb_bh = sbi->s_group_desc[gdb_num];
+ gdb_bh = sbi_array_rcu_deref(sbi, s_group_desc, gdb_num);
/* Update group descriptor block for new group */
gdp = (struct ext4_group_desc *)(gdb_bh->b_data +
gdb_off * EXT4_DESC_SIZE(sb));
@@ -1398,11 +1430,14 @@ static void ext4_update_super(struct super_block *sb,
percpu_counter_read(&sbi->s_freeclusters_counter));
if (ext4_has_feature_flex_bg(sb) && sbi->s_log_groups_per_flex) {
ext4_group_t flex_group;
+ struct flex_groups *fg;
+
flex_group = ext4_flex_group(sbi, group_data[0].group);
+ fg = sbi_array_rcu_deref(sbi, s_flex_groups, flex_group);
atomic64_add(EXT4_NUM_B2C(sbi, free_blocks),
- &sbi->s_flex_groups[flex_group].free_clusters);
+ &fg->free_clusters);
atomic_add(EXT4_INODES_PER_GROUP(sb) * flex_gd->count,
- &sbi->s_flex_groups[flex_group].free_inodes);
+ &fg->free_inodes);
}
/*
@@ -1497,7 +1532,8 @@ static int ext4_flex_group_add(struct super_block *sb,
for (; gdb_num <= gdb_num_end; gdb_num++) {
struct buffer_head *gdb_bh;
- gdb_bh = sbi->s_group_desc[gdb_num];
+ gdb_bh = sbi_array_rcu_deref(sbi, s_group_desc,
+ gdb_num);
if (old_gdb == gdb_bh->b_blocknr)
continue;
update_backups(sb, gdb_bh->b_blocknr, gdb_bh->b_data,
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 8434217..c8dff4c 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -43,7 +43,7 @@
#include <linux/uaccess.h>
#include <linux/iversion.h>
#include <linux/unicode.h>
-
+#include <linux/part_stat.h>
#include <linux/kthread.h>
#include <linux/freezer.h>
@@ -927,7 +927,6 @@ void ext4_update_dynamic_rev(struct super_block *sb)
static struct block_device *ext4_blkdev_get(dev_t dev, struct super_block *sb)
{
struct block_device *bdev;
- char b[BDEVNAME_SIZE];
bdev = blkdev_get_by_dev(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL, sb);
if (IS_ERR(bdev))
@@ -935,8 +934,9 @@ static struct block_device *ext4_blkdev_get(dev_t dev, struct super_block *sb)
return bdev;
fail:
- ext4_msg(sb, KERN_ERR, "failed to open journal device %s: %ld",
- __bdevname(dev, b), PTR_ERR(bdev));
+ ext4_msg(sb, KERN_ERR,
+ "failed to open journal device unknown-block(%u,%u) %ld",
+ MAJOR(dev), MINOR(dev), PTR_ERR(bdev));
return NULL;
}
@@ -1014,6 +1014,8 @@ static void ext4_put_super(struct super_block *sb)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
struct ext4_super_block *es = sbi->s_es;
+ struct buffer_head **group_desc;
+ struct flex_groups **flex_groups;
int aborted = 0;
int i, err;
@@ -1046,15 +1048,23 @@ static void ext4_put_super(struct super_block *sb)
if (!sb_rdonly(sb))
ext4_commit_super(sb, 1);
+ rcu_read_lock();
+ group_desc = rcu_dereference(sbi->s_group_desc);
for (i = 0; i < sbi->s_gdb_count; i++)
- brelse(sbi->s_group_desc[i]);
- kvfree(sbi->s_group_desc);
- kvfree(sbi->s_flex_groups);
+ brelse(group_desc[i]);
+ kvfree(group_desc);
+ flex_groups = rcu_dereference(sbi->s_flex_groups);
+ if (flex_groups) {
+ for (i = 0; i < sbi->s_flex_groups_allocated; i++)
+ kvfree(flex_groups[i]);
+ kvfree(flex_groups);
+ }
+ rcu_read_unlock();
percpu_counter_destroy(&sbi->s_freeclusters_counter);
percpu_counter_destroy(&sbi->s_freeinodes_counter);
percpu_counter_destroy(&sbi->s_dirs_counter);
percpu_counter_destroy(&sbi->s_dirtyclusters_counter);
- percpu_free_rwsem(&sbi->s_journal_flag_rwsem);
+ percpu_free_rwsem(&sbi->s_writepages_rwsem);
#ifdef CONFIG_QUOTA
for (i = 0; i < EXT4_MAXQUOTAS; i++)
kfree(get_qf_name(sb, sbi, i));
@@ -2380,8 +2390,8 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
int ext4_alloc_flex_bg_array(struct super_block *sb, ext4_group_t ngroup)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
- struct flex_groups *new_groups;
- int size;
+ struct flex_groups **old_groups, **new_groups;
+ int size, i, j;
if (!sbi->s_log_groups_per_flex)
return 0;
@@ -2390,22 +2400,37 @@ int ext4_alloc_flex_bg_array(struct super_block *sb, ext4_group_t ngroup)
if (size <= sbi->s_flex_groups_allocated)
return 0;
- size = roundup_pow_of_two(size * sizeof(struct flex_groups));
- new_groups = kvzalloc(size, GFP_KERNEL);
+ new_groups = kvzalloc(roundup_pow_of_two(size *
+ sizeof(*sbi->s_flex_groups)), GFP_KERNEL);
if (!new_groups) {
- ext4_msg(sb, KERN_ERR, "not enough memory for %d flex groups",
- size / (int) sizeof(struct flex_groups));
+ ext4_msg(sb, KERN_ERR,
+ "not enough memory for %d flex group pointers", size);
return -ENOMEM;
}
-
- if (sbi->s_flex_groups) {
- memcpy(new_groups, sbi->s_flex_groups,
- (sbi->s_flex_groups_allocated *
- sizeof(struct flex_groups)));
- kvfree(sbi->s_flex_groups);
+ for (i = sbi->s_flex_groups_allocated; i < size; i++) {
+ new_groups[i] = kvzalloc(roundup_pow_of_two(
+ sizeof(struct flex_groups)),
+ GFP_KERNEL);
+ if (!new_groups[i]) {
+ for (j = sbi->s_flex_groups_allocated; j < i; j++)
+ kvfree(new_groups[j]);
+ kvfree(new_groups);
+ ext4_msg(sb, KERN_ERR,
+ "not enough memory for %d flex groups", size);
+ return -ENOMEM;
+ }
}
- sbi->s_flex_groups = new_groups;
- sbi->s_flex_groups_allocated = size / sizeof(struct flex_groups);
+ rcu_read_lock();
+ old_groups = rcu_dereference(sbi->s_flex_groups);
+ if (old_groups)
+ memcpy(new_groups, old_groups,
+ (sbi->s_flex_groups_allocated *
+ sizeof(struct flex_groups *)));
+ rcu_read_unlock();
+ rcu_assign_pointer(sbi->s_flex_groups, new_groups);
+ sbi->s_flex_groups_allocated = size;
+ if (old_groups)
+ ext4_kvfree_array_rcu(old_groups);
return 0;
}
@@ -2413,6 +2438,7 @@ static int ext4_fill_flex_info(struct super_block *sb)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
struct ext4_group_desc *gdp = NULL;
+ struct flex_groups *fg;
ext4_group_t flex_group;
int i, err;
@@ -2430,12 +2456,11 @@ static int ext4_fill_flex_info(struct super_block *sb)
gdp = ext4_get_group_desc(sb, i, NULL);
flex_group = ext4_flex_group(sbi, i);
- atomic_add(ext4_free_inodes_count(sb, gdp),
- &sbi->s_flex_groups[flex_group].free_inodes);
+ fg = sbi_array_rcu_deref(sbi, s_flex_groups, flex_group);
+ atomic_add(ext4_free_inodes_count(sb, gdp), &fg->free_inodes);
atomic64_add(ext4_free_group_clusters(sb, gdp),
- &sbi->s_flex_groups[flex_group].free_clusters);
- atomic_add(ext4_used_dirs_count(sb, gdp),
- &sbi->s_flex_groups[flex_group].used_dirs);
+ &fg->free_clusters);
+ atomic_add(ext4_used_dirs_count(sb, gdp), &fg->used_dirs);
}
return 1;
@@ -3009,17 +3034,11 @@ static int ext4_feature_set_ok(struct super_block *sb, int readonly)
return 0;
}
-#ifndef CONFIG_QUOTA
- if (ext4_has_feature_quota(sb) && !readonly) {
+#if !IS_ENABLED(CONFIG_QUOTA) || !IS_ENABLED(CONFIG_QFMT_V2)
+ if (!readonly && (ext4_has_feature_quota(sb) ||
+ ext4_has_feature_project(sb))) {
ext4_msg(sb, KERN_ERR,
- "Filesystem with quota feature cannot be mounted RDWR "
- "without CONFIG_QUOTA");
- return 0;
- }
- if (ext4_has_feature_project(sb) && !readonly) {
- ext4_msg(sb, KERN_ERR,
- "Filesystem with project quota feature cannot be mounted RDWR "
- "without CONFIG_QUOTA");
+ "The kernel was not built with CONFIG_QUOTA and CONFIG_QFMT_V2");
return 0;
}
#endif /* CONFIG_QUOTA */
@@ -3640,9 +3659,10 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
{
struct dax_device *dax_dev = fs_dax_get_by_bdev(sb->s_bdev);
char *orig_data = kstrdup(data, GFP_KERNEL);
- struct buffer_head *bh;
+ struct buffer_head *bh, **group_desc;
struct ext4_super_block *es = NULL;
struct ext4_sb_info *sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
+ struct flex_groups **flex_groups;
ext4_fsblk_t block;
ext4_fsblk_t sb_block = get_sb_block(&data);
ext4_fsblk_t logical_sb_block;
@@ -3814,6 +3834,15 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
*/
sbi->s_li_wait_mult = EXT4_DEF_LI_WAIT_MULT;
+ blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size);
+ if (blocksize < EXT4_MIN_BLOCK_SIZE ||
+ blocksize > EXT4_MAX_BLOCK_SIZE) {
+ ext4_msg(sb, KERN_ERR,
+ "Unsupported filesystem blocksize %d (%d log_block_size)",
+ blocksize, le32_to_cpu(es->s_log_block_size));
+ goto failed_mount;
+ }
+
if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV) {
sbi->s_inode_size = EXT4_GOOD_OLD_INODE_SIZE;
sbi->s_first_ino = EXT4_GOOD_OLD_FIRST_INO;
@@ -3831,6 +3860,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
ext4_msg(sb, KERN_ERR,
"unsupported inode size: %d",
sbi->s_inode_size);
+ ext4_msg(sb, KERN_ERR, "blocksize: %d", blocksize);
goto failed_mount;
}
/*
@@ -4033,14 +4063,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
if (!ext4_feature_set_ok(sb, (sb_rdonly(sb))))
goto failed_mount;
- blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size);
- if (blocksize < EXT4_MIN_BLOCK_SIZE ||
- blocksize > EXT4_MAX_BLOCK_SIZE) {
- ext4_msg(sb, KERN_ERR,
- "Unsupported filesystem blocksize %d (%d log_block_size)",
- blocksize, le32_to_cpu(es->s_log_block_size));
- goto failed_mount;
- }
if (le32_to_cpu(es->s_log_block_size) >
(EXT4_MAX_BLOCK_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE)) {
ext4_msg(sb, KERN_ERR,
@@ -4294,9 +4316,10 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
goto failed_mount;
}
}
- sbi->s_group_desc = kvmalloc_array(db_count,
- sizeof(struct buffer_head *),
- GFP_KERNEL);
+ rcu_assign_pointer(sbi->s_group_desc,
+ kvmalloc_array(db_count,
+ sizeof(struct buffer_head *),
+ GFP_KERNEL));
if (sbi->s_group_desc == NULL) {
ext4_msg(sb, KERN_ERR, "not enough memory");
ret = -ENOMEM;
@@ -4312,14 +4335,19 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
}
for (i = 0; i < db_count; i++) {
+ struct buffer_head *bh;
+
block = descriptor_loc(sb, logical_sb_block, i);
- sbi->s_group_desc[i] = sb_bread_unmovable(sb, block);
- if (!sbi->s_group_desc[i]) {
+ bh = sb_bread_unmovable(sb, block);
+ if (!bh) {
ext4_msg(sb, KERN_ERR,
"can't read group descriptor %d", i);
db_count = i;
goto failed_mount2;
}
+ rcu_read_lock();
+ rcu_dereference(sbi->s_group_desc)[i] = bh;
+ rcu_read_unlock();
}
sbi->s_gdb_count = db_count;
if (!ext4_check_descriptors(sb, logical_sb_block, &first_not_zeroed)) {
@@ -4598,7 +4626,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
err = percpu_counter_init(&sbi->s_dirtyclusters_counter, 0,
GFP_KERNEL);
if (!err)
- err = percpu_init_rwsem(&sbi->s_journal_flag_rwsem);
+ err = percpu_init_rwsem(&sbi->s_writepages_rwsem);
if (err) {
ext4_msg(sb, KERN_ERR, "insufficient memory");
@@ -4686,13 +4714,19 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
ext4_unregister_li_request(sb);
failed_mount6:
ext4_mb_release(sb);
- if (sbi->s_flex_groups)
- kvfree(sbi->s_flex_groups);
+ rcu_read_lock();
+ flex_groups = rcu_dereference(sbi->s_flex_groups);
+ if (flex_groups) {
+ for (i = 0; i < sbi->s_flex_groups_allocated; i++)
+ kvfree(flex_groups[i]);
+ kvfree(flex_groups);
+ }
+ rcu_read_unlock();
percpu_counter_destroy(&sbi->s_freeclusters_counter);
percpu_counter_destroy(&sbi->s_freeinodes_counter);
percpu_counter_destroy(&sbi->s_dirs_counter);
percpu_counter_destroy(&sbi->s_dirtyclusters_counter);
- percpu_free_rwsem(&sbi->s_journal_flag_rwsem);
+ percpu_free_rwsem(&sbi->s_writepages_rwsem);
failed_mount5:
ext4_ext_release(sb);
ext4_release_system_zone(sb);
@@ -4721,9 +4755,12 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
if (sbi->s_mmp_tsk)
kthread_stop(sbi->s_mmp_tsk);
failed_mount2:
+ rcu_read_lock();
+ group_desc = rcu_dereference(sbi->s_group_desc);
for (i = 0; i < db_count; i++)
- brelse(sbi->s_group_desc[i]);
- kvfree(sbi->s_group_desc);
+ brelse(group_desc[i]);
+ kvfree(group_desc);
+ rcu_read_unlock();
failed_mount:
if (sbi->s_chksum_driver)
crypto_free_shash(sbi->s_chksum_driver);
@@ -5585,10 +5622,7 @@ static int ext4_statfs_project(struct super_block *sb,
return PTR_ERR(dquot);
spin_lock(&dquot->dq_dqb_lock);
- limit = 0;
- if (dquot->dq_dqb.dqb_bsoftlimit &&
- (!limit || dquot->dq_dqb.dqb_bsoftlimit < limit))
- limit = dquot->dq_dqb.dqb_bsoftlimit;
+ limit = dquot->dq_dqb.dqb_bsoftlimit;
if (dquot->dq_dqb.dqb_bhardlimit &&
(!limit || dquot->dq_dqb.dqb_bhardlimit < limit))
limit = dquot->dq_dqb.dqb_bhardlimit;
@@ -5603,10 +5637,7 @@ static int ext4_statfs_project(struct super_block *sb,
(buf->f_blocks - curblock) : 0;
}
- limit = 0;
- if (dquot->dq_dqb.dqb_isoftlimit &&
- (!limit || dquot->dq_dqb.dqb_isoftlimit < limit))
- limit = dquot->dq_dqb.dqb_isoftlimit;
+ limit = dquot->dq_dqb.dqb_isoftlimit;
if (dquot->dq_dqb.dqb_ihardlimit &&
(!limit || dquot->dq_dqb.dqb_ihardlimit < limit))
limit = dquot->dq_dqb.dqb_ihardlimit;
diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c
index d218ebd..04bfaf6 100644
--- a/fs/ext4/sysfs.c
+++ b/fs/ext4/sysfs.c
@@ -13,6 +13,7 @@
#include <linux/seq_file.h>
#include <linux/slab.h>
#include <linux/proc_fs.h>
+#include <linux/part_stat.h>
#include "ext4.h"
#include "ext4_jbd2.h"
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 5355be6..088c3e7 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -22,6 +22,7 @@
#include <linux/bio.h>
#include <linux/blkdev.h>
#include <linux/quotaops.h>
+#include <linux/part_stat.h>
#include <crypto/hash.h>
#include <linux/fscrypt.h>
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 65a7a43..d398b2d 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -24,6 +24,7 @@
#include <linux/sysfs.h>
#include <linux/quota.h>
#include <linux/unicode.h>
+#include <linux/part_stat.h>
#include "f2fs.h"
#include "node.h"
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 594b05a..71946da 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -750,6 +750,13 @@ static struct inode *fat_alloc_inode(struct super_block *sb)
return NULL;
init_rwsem(&ei->truncate_lock);
+ /* Zeroing to allow iput() even if partial initialized inode. */
+ ei->mmu_private = 0;
+ ei->i_start = 0;
+ ei->i_logstart = 0;
+ ei->i_attrs = 0;
+ ei->i_pos = 0;
+
return &ei->vfs_inode;
}
@@ -1374,16 +1381,6 @@ static int parse_options(struct super_block *sb, char *options, int is_vfat,
return 0;
}
-static void fat_dummy_inode_init(struct inode *inode)
-{
- /* Initialize this dummy inode to work as no-op. */
- MSDOS_I(inode)->mmu_private = 0;
- MSDOS_I(inode)->i_start = 0;
- MSDOS_I(inode)->i_logstart = 0;
- MSDOS_I(inode)->i_attrs = 0;
- MSDOS_I(inode)->i_pos = 0;
-}
-
static int fat_read_root(struct inode *inode)
{
struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb);
@@ -1844,13 +1841,11 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat,
fat_inode = new_inode(sb);
if (!fat_inode)
goto out_fail;
- fat_dummy_inode_init(fat_inode);
sbi->fat_inode = fat_inode;
fsinfo_inode = new_inode(sb);
if (!fsinfo_inode)
goto out_fail;
- fat_dummy_inode_init(fsinfo_inode);
fsinfo_inode->i_ino = MSDOS_FSINFO_INO;
sbi->fsinfo_inode = fsinfo_inode;
insert_inode_hash(fsinfo_inode);
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 9bc1675..2e4c0fa 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -735,8 +735,9 @@ static void send_sigio_to_task(struct task_struct *p,
return;
switch (signum) {
- kernel_siginfo_t si;
- default:
+ default: {
+ kernel_siginfo_t si;
+
/* Queue a rt signal with the appropriate fd as its
value. We use SI_SIGIO as the source, not
SI_KERNEL, since kernel signals always get
@@ -769,6 +770,7 @@ static void send_sigio_to_task(struct task_struct *p,
si.si_fd = fd;
if (!do_send_sig_info(signum, &si, p, type))
break;
+ }
/* fall-through - fall back on the old plain SIGIO signal */
case 0:
do_send_sig_info(SIGIO, SEND_SIG_PRIV, p, type);
diff --git a/fs/file.c b/fs/file.c
index a364e1a..c8a4e4c 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -540,9 +540,14 @@ static int alloc_fd(unsigned start, unsigned flags)
return __alloc_fd(current->files, start, rlimit(RLIMIT_NOFILE), flags);
}
+int __get_unused_fd_flags(unsigned flags, unsigned long nofile)
+{
+ return __alloc_fd(current->files, 0, nofile, flags);
+}
+
int get_unused_fd_flags(unsigned flags)
{
- return __alloc_fd(current->files, 0, rlimit(RLIMIT_NOFILE), flags);
+ return __get_unused_fd_flags(flags, rlimit(RLIMIT_NOFILE));
}
EXPORT_SYMBOL(get_unused_fd_flags);
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 8e02d76..97eec75 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -276,12 +276,10 @@ static void flush_bg_queue(struct fuse_conn *fc)
void fuse_request_end(struct fuse_conn *fc, struct fuse_req *req)
{
struct fuse_iqueue *fiq = &fc->iq;
- bool async;
if (test_and_set_bit(FR_FINISHED, &req->flags))
goto put_request;
- async = req->args->end;
/*
* test_and_set_bit() implies smp_mb() between bit
* changing and below intr_entry check. Pairs with
@@ -324,7 +322,7 @@ void fuse_request_end(struct fuse_conn *fc, struct fuse_req *req)
wake_up(&req->waitq);
}
- if (async)
+ if (test_bit(FR_ASYNC, &req->flags))
req->args->end(fc, req->args, req->out.h.error);
put_request:
fuse_put_request(fc, req);
@@ -471,6 +469,8 @@ static void fuse_args_to_req(struct fuse_req *req, struct fuse_args *args)
req->in.h.opcode = args->opcode;
req->in.h.nodeid = args->nodeid;
req->args = args;
+ if (args->end)
+ __set_bit(FR_ASYNC, &req->flags);
}
ssize_t fuse_simple_request(struct fuse_conn *fc, struct fuse_args *args)
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index aa75e23..ca344bf 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -301,6 +301,7 @@ struct fuse_io_priv {
* FR_SENT: request is in userspace, waiting for an answer
* FR_FINISHED: request is finished
* FR_PRIVATE: request is on private list
+ * FR_ASYNC: request is asynchronous
*/
enum fuse_req_flag {
FR_ISREPLY,
@@ -314,6 +315,7 @@ enum fuse_req_flag {
FR_SENT,
FR_FINISHED,
FR_PRIVATE,
+ FR_ASYNC,
};
/**
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 2716d56..8294851 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -1248,7 +1248,7 @@ static int gfs2_atomic_open(struct inode *dir, struct dentry *dentry,
if (!(file->f_mode & FMODE_OPENED))
return finish_no_open(file, d);
dput(d);
- return 0;
+ return excl && (flags & O_CREAT) ? -EEXIST : 0;
}
BUG_ON(d != NULL);
diff --git a/fs/inode.c b/fs/inode.c
index 7d57068..93d9252 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -138,6 +138,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
inode->i_sb = sb;
inode->i_blkbits = sb->s_blocksize_bits;
inode->i_flags = 0;
+ atomic64_set(&inode->i_sequence, 0);
atomic_set(&inode->i_count, 1);
inode->i_op = &empty_iops;
inode->i_fop = &no_open_fops;
diff --git a/fs/internal.h b/fs/internal.h
index f3f280b..4d37912 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -38,7 +38,6 @@ static inline int __sync_blockdev(struct block_device *bdev, int wait)
/*
* buffer.c
*/
-extern void guard_bio_eod(struct bio *bio);
extern int __block_write_begin_int(struct page *page, loff_t pos, unsigned len,
get_block_t *get_block, struct iomap *iomap);
diff --git a/fs/io-wq.c b/fs/io-wq.c
index cb60a42b..cc5cf22 100644
--- a/fs/io-wq.c
+++ b/fs/io-wq.c
@@ -16,6 +16,7 @@
#include <linux/slab.h>
#include <linux/kthread.h>
#include <linux/rculist_nulls.h>
+#include <linux/fs_struct.h>
#include "io-wq.h"
@@ -59,6 +60,7 @@ struct io_worker {
const struct cred *cur_creds;
const struct cred *saved_creds;
struct files_struct *restore_files;
+ struct fs_struct *restore_fs;
};
#if BITS_PER_LONG == 64
@@ -67,6 +69,8 @@ struct io_worker {
#define IO_WQ_HASH_ORDER 5
#endif
+#define IO_WQ_NR_HASH_BUCKETS (1u << IO_WQ_HASH_ORDER)
+
struct io_wqe_acct {
unsigned nr_workers;
unsigned max_workers;
@@ -96,6 +100,7 @@ struct io_wqe {
struct list_head all_list;
struct io_wq *wq;
+ struct io_wq_work *hash_tail[IO_WQ_NR_HASH_BUCKETS];
};
/*
@@ -105,8 +110,7 @@ struct io_wq {
struct io_wqe **wqes;
unsigned long state;
- get_work_fn *get_work;
- put_work_fn *put_work;
+ free_work_fn *free_work;
struct task_struct *manager;
struct user_struct *user;
@@ -151,6 +155,9 @@ static bool __io_worker_unuse(struct io_wqe *wqe, struct io_worker *worker)
task_unlock(current);
}
+ if (current->fs != worker->restore_fs)
+ current->fs = worker->restore_fs;
+
/*
* If we have an active mm, we need to drop the wq lock before unusing
* it. If we do, return true and let the caller retry the idle loop.
@@ -311,6 +318,7 @@ static void io_worker_start(struct io_wqe *wqe, struct io_worker *worker)
worker->flags |= (IO_WORKER_F_UP | IO_WORKER_F_RUNNING);
worker->restore_files = current->files;
+ worker->restore_fs = current->fs;
io_wqe_inc_running(wqe, worker);
}
@@ -370,26 +378,35 @@ static bool __io_worker_idle(struct io_wqe *wqe, struct io_worker *worker)
return __io_worker_unuse(wqe, worker);
}
-static struct io_wq_work *io_get_next_work(struct io_wqe *wqe, unsigned *hash)
+static inline unsigned int io_get_work_hash(struct io_wq_work *work)
+{
+ return work->flags >> IO_WQ_HASH_SHIFT;
+}
+
+static struct io_wq_work *io_get_next_work(struct io_wqe *wqe)
__must_hold(wqe->lock)
{
struct io_wq_work_node *node, *prev;
- struct io_wq_work *work;
+ struct io_wq_work *work, *tail;
+ unsigned int hash;
wq_list_for_each(node, prev, &wqe->work_list) {
work = container_of(node, struct io_wq_work, list);
/* not hashed, can run anytime */
- if (!(work->flags & IO_WQ_WORK_HASHED)) {
- wq_node_del(&wqe->work_list, node, prev);
+ if (!io_wq_is_hashed(work)) {
+ wq_list_del(&wqe->work_list, node, prev);
return work;
}
/* hashed, can run if not already running */
- *hash = work->flags >> IO_WQ_HASH_SHIFT;
- if (!(wqe->hash_map & BIT_ULL(*hash))) {
- wqe->hash_map |= BIT_ULL(*hash);
- wq_node_del(&wqe->work_list, node, prev);
+ hash = io_get_work_hash(work);
+ if (!(wqe->hash_map & BIT(hash))) {
+ wqe->hash_map |= BIT(hash);
+ /* all items with this hash lie in [work, tail] */
+ tail = wqe->hash_tail[hash];
+ wqe->hash_tail[hash] = NULL;
+ wq_list_cut(&wqe->work_list, &tail->list, prev);
return work;
}
}
@@ -434,16 +451,49 @@ static void io_wq_switch_creds(struct io_worker *worker,
worker->saved_creds = old_creds;
}
+static void io_impersonate_work(struct io_worker *worker,
+ struct io_wq_work *work)
+{
+ if (work->files && current->files != work->files) {
+ task_lock(current);
+ current->files = work->files;
+ task_unlock(current);
+ }
+ if (work->fs && current->fs != work->fs)
+ current->fs = work->fs;
+ if (work->mm != worker->mm)
+ io_wq_switch_mm(worker, work);
+ if (worker->cur_creds != work->creds)
+ io_wq_switch_creds(worker, work);
+}
+
+static void io_assign_current_work(struct io_worker *worker,
+ struct io_wq_work *work)
+{
+ if (work) {
+ /* flush pending signals before assigning new work */
+ if (signal_pending(current))
+ flush_signals(current);
+ cond_resched();
+ }
+
+ spin_lock_irq(&worker->lock);
+ worker->cur_work = work;
+ spin_unlock_irq(&worker->lock);
+}
+
+static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work);
+
static void io_worker_handle_work(struct io_worker *worker)
__releases(wqe->lock)
{
- struct io_wq_work *work, *old_work = NULL, *put_work = NULL;
struct io_wqe *wqe = worker->wqe;
struct io_wq *wq = wqe->wq;
do {
- unsigned hash = -1U;
-
+ struct io_wq_work *work;
+ unsigned int hash;
+get_next:
/*
* If we got some work, mark us as busy. If we didn't, but
* the list isn't empty, it means we stalled on hashed work.
@@ -451,118 +501,80 @@ static void io_worker_handle_work(struct io_worker *worker)
* can't make progress, any work completion or insertion will
* clear the stalled flag.
*/
- work = io_get_next_work(wqe, &hash);
+ work = io_get_next_work(wqe);
if (work)
__io_worker_busy(wqe, worker, work);
else if (!wq_list_empty(&wqe->work_list))
wqe->flags |= IO_WQE_FLAG_STALLED;
spin_unlock_irq(&wqe->lock);
- if (put_work && wq->put_work)
- wq->put_work(old_work);
if (!work)
break;
-next:
- /* flush any pending signals before assigning new work */
- if (signal_pending(current))
- flush_signals(current);
+ io_assign_current_work(worker, work);
- cond_resched();
+ /* handle a whole dependent link */
+ do {
+ struct io_wq_work *old_work, *next_hashed, *linked;
- spin_lock_irq(&worker->lock);
- worker->cur_work = work;
- spin_unlock_irq(&worker->lock);
+ next_hashed = wq_next_work(work);
+ io_impersonate_work(worker, work);
+ /*
+ * OK to set IO_WQ_WORK_CANCEL even for uncancellable
+ * work, the worker function will do the right thing.
+ */
+ if (test_bit(IO_WQ_BIT_CANCEL, &wq->state))
+ work->flags |= IO_WQ_WORK_CANCEL;
- if (work->flags & IO_WQ_WORK_CB)
- work->func(&work);
+ hash = io_get_work_hash(work);
+ linked = old_work = work;
+ linked->func(&linked);
+ linked = (old_work == linked) ? NULL : linked;
- if (work->files && current->files != work->files) {
- task_lock(current);
- current->files = work->files;
- task_unlock(current);
- }
- if (work->mm != worker->mm)
- io_wq_switch_mm(worker, work);
- if (worker->cur_creds != work->creds)
- io_wq_switch_creds(worker, work);
- /*
- * OK to set IO_WQ_WORK_CANCEL even for uncancellable work,
- * the worker function will do the right thing.
- */
- if (test_bit(IO_WQ_BIT_CANCEL, &wq->state))
- work->flags |= IO_WQ_WORK_CANCEL;
- if (worker->mm)
- work->flags |= IO_WQ_WORK_HAS_MM;
+ work = next_hashed;
+ if (!work && linked && !io_wq_is_hashed(linked)) {
+ work = linked;
+ linked = NULL;
+ }
+ io_assign_current_work(worker, work);
+ wq->free_work(old_work);
- if (wq->get_work && !(work->flags & IO_WQ_WORK_INTERNAL)) {
- put_work = work;
- wq->get_work(work);
- }
+ if (linked)
+ io_wqe_enqueue(wqe, linked);
- old_work = work;
- work->func(&work);
-
- spin_lock_irq(&worker->lock);
- worker->cur_work = NULL;
- spin_unlock_irq(&worker->lock);
+ if (hash != -1U && !next_hashed) {
+ spin_lock_irq(&wqe->lock);
+ wqe->hash_map &= ~BIT_ULL(hash);
+ wqe->flags &= ~IO_WQE_FLAG_STALLED;
+ /* dependent work is not hashed */
+ hash = -1U;
+ /* skip unnecessary unlock-lock wqe->lock */
+ if (!work)
+ goto get_next;
+ spin_unlock_irq(&wqe->lock);
+ }
+ } while (work);
spin_lock_irq(&wqe->lock);
-
- if (hash != -1U) {
- wqe->hash_map &= ~BIT_ULL(hash);
- wqe->flags &= ~IO_WQE_FLAG_STALLED;
- }
- if (work && work != old_work) {
- spin_unlock_irq(&wqe->lock);
-
- if (put_work && wq->put_work) {
- wq->put_work(put_work);
- put_work = NULL;
- }
-
- /* dependent work not hashed */
- hash = -1U;
- goto next;
- }
} while (1);
}
-static inline void io_worker_spin_for_work(struct io_wqe *wqe)
-{
- int i = 0;
-
- while (++i < 1000) {
- if (io_wqe_run_queue(wqe))
- break;
- if (need_resched())
- break;
- cpu_relax();
- }
-}
-
static int io_wqe_worker(void *data)
{
struct io_worker *worker = data;
struct io_wqe *wqe = worker->wqe;
struct io_wq *wq = wqe->wq;
- bool did_work;
io_worker_start(wqe, worker);
- did_work = false;
while (!test_bit(IO_WQ_BIT_EXIT, &wq->state)) {
set_current_state(TASK_INTERRUPTIBLE);
loop:
- if (did_work)
- io_worker_spin_for_work(wqe);
spin_lock_irq(&wqe->lock);
if (io_wqe_run_queue(wqe)) {
__set_current_state(TASK_RUNNING);
io_worker_handle_work(worker);
- did_work = true;
goto loop;
}
- did_work = false;
/* drops the lock on success, retry */
if (__io_worker_idle(wqe, worker)) {
__release(&wqe->lock);
@@ -691,11 +703,16 @@ static int io_wq_manager(void *data)
/* create fixed workers */
refcount_set(&wq->refs, workers_to_create);
for_each_node(node) {
+ if (!node_online(node))
+ continue;
if (!create_io_worker(wq, wq->wqes[node], IO_WQ_ACCT_BOUND))
goto err;
workers_to_create--;
}
+ while (workers_to_create--)
+ refcount_dec(&wq->refs);
+
complete(&wq->done);
while (!kthread_should_stop()) {
@@ -703,6 +720,9 @@ static int io_wq_manager(void *data)
struct io_wqe *wqe = wq->wqes[node];
bool fork_worker[2] = { false, false };
+ if (!node_online(node))
+ continue;
+
spin_lock_irq(&wqe->lock);
if (io_wqe_need_worker(wqe, IO_WQ_ACCT_BOUND))
fork_worker[IO_WQ_ACCT_BOUND] = true;
@@ -750,6 +770,40 @@ static bool io_wq_can_queue(struct io_wqe *wqe, struct io_wqe_acct *acct,
return true;
}
+static void io_run_cancel(struct io_wq_work *work, struct io_wqe *wqe)
+{
+ struct io_wq *wq = wqe->wq;
+
+ do {
+ struct io_wq_work *old_work = work;
+
+ work->flags |= IO_WQ_WORK_CANCEL;
+ work->func(&work);
+ work = (work == old_work) ? NULL : work;
+ wq->free_work(old_work);
+ } while (work);
+}
+
+static void io_wqe_insert_work(struct io_wqe *wqe, struct io_wq_work *work)
+{
+ unsigned int hash;
+ struct io_wq_work *tail;
+
+ if (!io_wq_is_hashed(work)) {
+append:
+ wq_list_add_tail(&work->list, &wqe->work_list);
+ return;
+ }
+
+ hash = io_get_work_hash(work);
+ tail = wqe->hash_tail[hash];
+ wqe->hash_tail[hash] = work;
+ if (!tail)
+ goto append;
+
+ wq_list_add_after(&work->list, &tail->list, &wqe->work_list);
+}
+
static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work)
{
struct io_wqe_acct *acct = io_work_get_acct(wqe, work);
@@ -763,14 +817,13 @@ static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work)
* It's close enough to not be an issue, fork() has the same delay.
*/
if (unlikely(!io_wq_can_queue(wqe, acct, work))) {
- work->flags |= IO_WQ_WORK_CANCEL;
- work->func(&work);
+ io_run_cancel(work, wqe);
return;
}
work_flags = work->flags;
spin_lock_irqsave(&wqe->lock, flags);
- wq_list_add_tail(&work->list, &wqe->work_list);
+ io_wqe_insert_work(wqe, work);
wqe->flags &= ~IO_WQE_FLAG_STALLED;
spin_unlock_irqrestore(&wqe->lock, flags);
@@ -787,19 +840,15 @@ void io_wq_enqueue(struct io_wq *wq, struct io_wq_work *work)
}
/*
- * Enqueue work, hashed by some key. Work items that hash to the same value
- * will not be done in parallel. Used to limit concurrent writes, generally
- * hashed by inode.
+ * Work items that hash to the same value will not be done in parallel.
+ * Used to limit concurrent writes, generally hashed by inode.
*/
-void io_wq_enqueue_hashed(struct io_wq *wq, struct io_wq_work *work, void *val)
+void io_wq_hash_work(struct io_wq_work *work, void *val)
{
- struct io_wqe *wqe = wq->wqes[numa_node_id()];
- unsigned bit;
-
+ unsigned int bit;
bit = hash_ptr(val, IO_WQ_HASH_ORDER);
work->flags |= (IO_WQ_WORK_HASHED | (bit << IO_WQ_HASH_SHIFT));
- io_wqe_enqueue(wqe, work);
}
static bool io_wqe_worker_send_sig(struct io_worker *worker, void *data)
@@ -821,7 +870,9 @@ static bool io_wq_for_each_worker(struct io_wqe *wqe,
list_for_each_entry_rcu(worker, &wqe->all_list, all_list) {
if (io_worker_get(worker)) {
- ret = func(worker, data);
+ /* no task if node is/was offline */
+ if (worker->task)
+ ret = func(worker, data);
io_worker_release(worker);
if (ret)
break;
@@ -847,14 +898,13 @@ void io_wq_cancel_all(struct io_wq *wq)
}
struct io_cb_cancel_data {
- struct io_wqe *wqe;
- work_cancel_fn *cancel;
- void *caller_data;
+ work_cancel_fn *fn;
+ void *data;
};
-static bool io_work_cancel(struct io_worker *worker, void *cancel_data)
+static bool io_wq_worker_cancel(struct io_worker *worker, void *data)
{
- struct io_cb_cancel_data *data = cancel_data;
+ struct io_cb_cancel_data *match = data;
unsigned long flags;
bool ret = false;
@@ -865,82 +915,7 @@ static bool io_work_cancel(struct io_worker *worker, void *cancel_data)
spin_lock_irqsave(&worker->lock, flags);
if (worker->cur_work &&
!(worker->cur_work->flags & IO_WQ_WORK_NO_CANCEL) &&
- data->cancel(worker->cur_work, data->caller_data)) {
- send_sig(SIGINT, worker->task, 1);
- ret = true;
- }
- spin_unlock_irqrestore(&worker->lock, flags);
-
- return ret;
-}
-
-static enum io_wq_cancel io_wqe_cancel_cb_work(struct io_wqe *wqe,
- work_cancel_fn *cancel,
- void *cancel_data)
-{
- struct io_cb_cancel_data data = {
- .wqe = wqe,
- .cancel = cancel,
- .caller_data = cancel_data,
- };
- struct io_wq_work_node *node, *prev;
- struct io_wq_work *work;
- unsigned long flags;
- bool found = false;
-
- spin_lock_irqsave(&wqe->lock, flags);
- wq_list_for_each(node, prev, &wqe->work_list) {
- work = container_of(node, struct io_wq_work, list);
-
- if (cancel(work, cancel_data)) {
- wq_node_del(&wqe->work_list, node, prev);
- found = true;
- break;
- }
- }
- spin_unlock_irqrestore(&wqe->lock, flags);
-
- if (found) {
- work->flags |= IO_WQ_WORK_CANCEL;
- work->func(&work);
- return IO_WQ_CANCEL_OK;
- }
-
- rcu_read_lock();
- found = io_wq_for_each_worker(wqe, io_work_cancel, &data);
- rcu_read_unlock();
- return found ? IO_WQ_CANCEL_RUNNING : IO_WQ_CANCEL_NOTFOUND;
-}
-
-enum io_wq_cancel io_wq_cancel_cb(struct io_wq *wq, work_cancel_fn *cancel,
- void *data)
-{
- enum io_wq_cancel ret = IO_WQ_CANCEL_NOTFOUND;
- int node;
-
- for_each_node(node) {
- struct io_wqe *wqe = wq->wqes[node];
-
- ret = io_wqe_cancel_cb_work(wqe, cancel, data);
- if (ret != IO_WQ_CANCEL_NOTFOUND)
- break;
- }
-
- return ret;
-}
-
-static bool io_wq_worker_cancel(struct io_worker *worker, void *data)
-{
- struct io_wq_work *work = data;
- unsigned long flags;
- bool ret = false;
-
- if (worker->cur_work != work)
- return false;
-
- spin_lock_irqsave(&worker->lock, flags);
- if (worker->cur_work == work &&
- !(worker->cur_work->flags & IO_WQ_WORK_NO_CANCEL)) {
+ match->fn(worker->cur_work, match->data)) {
send_sig(SIGINT, worker->task, 1);
ret = true;
}
@@ -950,15 +925,13 @@ static bool io_wq_worker_cancel(struct io_worker *worker, void *data)
}
static enum io_wq_cancel io_wqe_cancel_work(struct io_wqe *wqe,
- struct io_wq_work *cwork)
+ struct io_cb_cancel_data *match)
{
struct io_wq_work_node *node, *prev;
struct io_wq_work *work;
unsigned long flags;
bool found = false;
- cwork->flags |= IO_WQ_WORK_CANCEL;
-
/*
* First check pending list, if we're lucky we can just remove it
* from there. CANCEL_OK means that the work is returned as-new,
@@ -968,8 +941,8 @@ static enum io_wq_cancel io_wqe_cancel_work(struct io_wqe *wqe,
wq_list_for_each(node, prev, &wqe->work_list) {
work = container_of(node, struct io_wq_work, list);
- if (work == cwork) {
- wq_node_del(&wqe->work_list, node, prev);
+ if (match->fn(work, match->data)) {
+ wq_list_del(&wqe->work_list, node, prev);
found = true;
break;
}
@@ -977,8 +950,7 @@ static enum io_wq_cancel io_wqe_cancel_work(struct io_wqe *wqe,
spin_unlock_irqrestore(&wqe->lock, flags);
if (found) {
- work->flags |= IO_WQ_WORK_CANCEL;
- work->func(&work);
+ io_run_cancel(work, wqe);
return IO_WQ_CANCEL_OK;
}
@@ -989,20 +961,25 @@ static enum io_wq_cancel io_wqe_cancel_work(struct io_wqe *wqe,
* completion will run normally in this case.
*/
rcu_read_lock();
- found = io_wq_for_each_worker(wqe, io_wq_worker_cancel, cwork);
+ found = io_wq_for_each_worker(wqe, io_wq_worker_cancel, match);
rcu_read_unlock();
return found ? IO_WQ_CANCEL_RUNNING : IO_WQ_CANCEL_NOTFOUND;
}
-enum io_wq_cancel io_wq_cancel_work(struct io_wq *wq, struct io_wq_work *cwork)
+enum io_wq_cancel io_wq_cancel_cb(struct io_wq *wq, work_cancel_fn *cancel,
+ void *data)
{
+ struct io_cb_cancel_data match = {
+ .fn = cancel,
+ .data = data,
+ };
enum io_wq_cancel ret = IO_WQ_CANCEL_NOTFOUND;
int node;
for_each_node(node) {
struct io_wqe *wqe = wq->wqes[node];
- ret = io_wqe_cancel_work(wqe, cwork);
+ ret = io_wqe_cancel_work(wqe, &match);
if (ret != IO_WQ_CANCEL_NOTFOUND)
break;
}
@@ -1010,38 +987,28 @@ enum io_wq_cancel io_wq_cancel_work(struct io_wq *wq, struct io_wq_work *cwork)
return ret;
}
-struct io_wq_flush_data {
- struct io_wq_work work;
- struct completion done;
-};
-
-static void io_wq_flush_func(struct io_wq_work **workptr)
+static bool io_wq_io_cb_cancel_data(struct io_wq_work *work, void *data)
{
- struct io_wq_work *work = *workptr;
- struct io_wq_flush_data *data;
-
- data = container_of(work, struct io_wq_flush_data, work);
- complete(&data->done);
+ return work == data;
}
-/*
- * Doesn't wait for previously queued work to finish. When this completes,
- * it just means that previously queued work was started.
- */
-void io_wq_flush(struct io_wq *wq)
+enum io_wq_cancel io_wq_cancel_work(struct io_wq *wq, struct io_wq_work *cwork)
{
- struct io_wq_flush_data data;
- int node;
+ return io_wq_cancel_cb(wq, io_wq_io_cb_cancel_data, (void *)cwork);
+}
- for_each_node(node) {
- struct io_wqe *wqe = wq->wqes[node];
+static bool io_wq_pid_match(struct io_wq_work *work, void *data)
+{
+ pid_t pid = (pid_t) (unsigned long) data;
- init_completion(&data.done);
- INIT_IO_WORK(&data.work, io_wq_flush_func);
- data.work.flags |= IO_WQ_WORK_INTERNAL;
- io_wqe_enqueue(wqe, &data.work);
- wait_for_completion(&data.done);
- }
+ return work->task_pid == pid;
+}
+
+enum io_wq_cancel io_wq_cancel_pid(struct io_wq *wq, pid_t pid)
+{
+ void *data = (void *) (unsigned long) pid;
+
+ return io_wq_cancel_cb(wq, io_wq_pid_match, data);
}
struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
@@ -1049,6 +1016,9 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
int ret = -ENOMEM, node;
struct io_wq *wq;
+ if (WARN_ON_ONCE(!data->free_work))
+ return ERR_PTR(-EINVAL);
+
wq = kzalloc(sizeof(*wq), GFP_KERNEL);
if (!wq)
return ERR_PTR(-ENOMEM);
@@ -1059,20 +1029,22 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
return ERR_PTR(-ENOMEM);
}
- wq->get_work = data->get_work;
- wq->put_work = data->put_work;
+ wq->free_work = data->free_work;
/* caller must already hold a reference to this */
wq->user = data->user;
for_each_node(node) {
struct io_wqe *wqe;
+ int alloc_node = node;
- wqe = kzalloc_node(sizeof(struct io_wqe), GFP_KERNEL, node);
+ if (!node_online(alloc_node))
+ alloc_node = NUMA_NO_NODE;
+ wqe = kzalloc_node(sizeof(struct io_wqe), GFP_KERNEL, alloc_node);
if (!wqe)
goto err;
wq->wqes[node] = wqe;
- wqe->node = node;
+ wqe->node = alloc_node;
wqe->acct[IO_WQ_ACCT_BOUND].max_workers = bounded;
atomic_set(&wqe->acct[IO_WQ_ACCT_BOUND].nr_running, 0);
if (wq->user) {
@@ -1080,7 +1052,6 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
task_rlimit(current, RLIMIT_NPROC);
}
atomic_set(&wqe->acct[IO_WQ_ACCT_UNBOUND].nr_running, 0);
- wqe->node = node;
wqe->wq = wq;
spin_lock_init(&wqe->lock);
INIT_WQ_LIST(&wqe->work_list);
@@ -1115,7 +1086,7 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
bool io_wq_get(struct io_wq *wq, struct io_wq_data *data)
{
- if (data->get_work != wq->get_work || data->put_work != wq->put_work)
+ if (data->free_work != wq->free_work)
return false;
return refcount_inc_not_zero(&wq->use_refs);
diff --git a/fs/io-wq.h b/fs/io-wq.h
index 50b3378..3ee7356 100644
--- a/fs/io-wq.h
+++ b/fs/io-wq.h
@@ -5,11 +5,8 @@ struct io_wq;
enum {
IO_WQ_WORK_CANCEL = 1,
- IO_WQ_WORK_HAS_MM = 2,
IO_WQ_WORK_HASHED = 4,
IO_WQ_WORK_UNBOUND = 32,
- IO_WQ_WORK_INTERNAL = 64,
- IO_WQ_WORK_CB = 128,
IO_WQ_WORK_NO_CANCEL = 256,
IO_WQ_WORK_CONCURRENT = 512,
@@ -31,6 +28,18 @@ struct io_wq_work_list {
struct io_wq_work_node *last;
};
+static inline void wq_list_add_after(struct io_wq_work_node *node,
+ struct io_wq_work_node *pos,
+ struct io_wq_work_list *list)
+{
+ struct io_wq_work_node *next = pos->next;
+
+ pos->next = node;
+ node->next = next;
+ if (!next)
+ list->last = node;
+}
+
static inline void wq_list_add_tail(struct io_wq_work_node *node,
struct io_wq_work_list *list)
{
@@ -43,17 +52,26 @@ static inline void wq_list_add_tail(struct io_wq_work_node *node,
}
}
-static inline void wq_node_del(struct io_wq_work_list *list,
+static inline void wq_list_cut(struct io_wq_work_list *list,
+ struct io_wq_work_node *last,
+ struct io_wq_work_node *prev)
+{
+ /* first in the list, if prev==NULL */
+ if (!prev)
+ WRITE_ONCE(list->first, last->next);
+ else
+ prev->next = last->next;
+
+ if (last == list->last)
+ list->last = prev;
+ last->next = NULL;
+}
+
+static inline void wq_list_del(struct io_wq_work_list *list,
struct io_wq_work_node *node,
struct io_wq_work_node *prev)
{
- if (node == list->first)
- WRITE_ONCE(list->first, node->next);
- if (node == list->last)
- list->last = prev;
- if (prev)
- prev->next = node->next;
- node->next = NULL;
+ wq_list_cut(list, node, prev);
}
#define wq_list_for_each(pos, prv, head) \
@@ -66,35 +84,35 @@ static inline void wq_node_del(struct io_wq_work_list *list,
} while (0)
struct io_wq_work {
- union {
- struct io_wq_work_node list;
- void *data;
- };
+ struct io_wq_work_node list;
void (*func)(struct io_wq_work **);
struct files_struct *files;
struct mm_struct *mm;
const struct cred *creds;
+ struct fs_struct *fs;
unsigned flags;
+ pid_t task_pid;
};
-#define INIT_IO_WORK(work, _func) \
- do { \
- (work)->list.next = NULL; \
- (work)->func = _func; \
- (work)->flags = 0; \
- (work)->files = NULL; \
- (work)->mm = NULL; \
- (work)->creds = NULL; \
- } while (0) \
+#define INIT_IO_WORK(work, _func) \
+ do { \
+ *(work) = (struct io_wq_work){ .func = _func }; \
+ } while (0) \
-typedef void (get_work_fn)(struct io_wq_work *);
-typedef void (put_work_fn)(struct io_wq_work *);
+static inline struct io_wq_work *wq_next_work(struct io_wq_work *work)
+{
+ if (!work->list.next)
+ return NULL;
+
+ return container_of(work->list.next, struct io_wq_work, list);
+}
+
+typedef void (free_work_fn)(struct io_wq_work *);
struct io_wq_data {
struct user_struct *user;
- get_work_fn *get_work;
- put_work_fn *put_work;
+ free_work_fn *free_work;
};
struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data);
@@ -102,11 +120,16 @@ bool io_wq_get(struct io_wq *wq, struct io_wq_data *data);
void io_wq_destroy(struct io_wq *wq);
void io_wq_enqueue(struct io_wq *wq, struct io_wq_work *work);
-void io_wq_enqueue_hashed(struct io_wq *wq, struct io_wq_work *work, void *val);
-void io_wq_flush(struct io_wq *wq);
+void io_wq_hash_work(struct io_wq_work *work, void *val);
+
+static inline bool io_wq_is_hashed(struct io_wq_work *work)
+{
+ return work->flags & IO_WQ_WORK_HASHED;
+}
void io_wq_cancel_all(struct io_wq *wq);
enum io_wq_cancel io_wq_cancel_work(struct io_wq *wq, struct io_wq_work *cwork);
+enum io_wq_cancel io_wq_cancel_pid(struct io_wq *wq, pid_t pid);
typedef bool (work_cancel_fn)(struct io_wq_work *, void *);
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 77f22c3..358f97b 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -44,6 +44,7 @@
#include <linux/errno.h>
#include <linux/syscalls.h>
#include <linux/compat.h>
+#include <net/compat.h>
#include <linux/refcount.h>
#include <linux/uio.h>
#include <linux/bits.h>
@@ -75,6 +76,9 @@
#include <linux/fsnotify.h>
#include <linux/fadvise.h>
#include <linux/eventpoll.h>
+#include <linux/fs_struct.h>
+#include <linux/splice.h>
+#include <linux/task_work.h>
#define CREATE_TRACE_POINTS
#include <trace/events/io_uring.h>
@@ -182,21 +186,23 @@ struct fixed_file_table {
struct file **files;
};
-enum {
- FFD_F_ATOMIC,
-};
-
struct fixed_file_data {
struct fixed_file_table *table;
struct io_ring_ctx *ctx;
struct percpu_ref refs;
struct llist_head put_llist;
- unsigned long state;
struct work_struct ref_work;
struct completion done;
};
+struct io_buffer {
+ struct list_head list;
+ __u64 addr;
+ __s32 len;
+ __u16 bid;
+};
+
struct io_ring_ctx {
struct {
struct percpu_ref refs;
@@ -204,11 +210,11 @@ struct io_ring_ctx {
struct {
unsigned int flags;
- int compat: 1;
- int account_mem: 1;
- int cq_overflow_flushed: 1;
- int drain_next: 1;
- int eventfd_async: 1;
+ unsigned int compat: 1;
+ unsigned int account_mem: 1;
+ unsigned int cq_overflow_flushed: 1;
+ unsigned int drain_next: 1;
+ unsigned int eventfd_async: 1;
/*
* Ring buffer of indices into array of io_uring_sqe, which is
@@ -274,6 +280,8 @@ struct io_ring_ctx {
struct socket *ring_sock;
#endif
+ struct idr io_buffer_idr;
+
struct idr personality_idr;
struct {
@@ -294,7 +302,6 @@ struct io_ring_ctx {
struct {
spinlock_t completion_lock;
- struct llist_head poll_llist;
/*
* ->poll_list is protected by the ctx->uring_lock for
@@ -347,6 +354,7 @@ struct io_accept {
struct sockaddr __user *addr;
int __user *addr_len;
int flags;
+ unsigned long nofile;
};
struct io_sync {
@@ -389,7 +397,9 @@ struct io_sr_msg {
void __user *buf;
};
int msg_flags;
+ int bgid;
size_t len;
+ struct io_buffer *kbuf;
};
struct io_open {
@@ -401,6 +411,7 @@ struct io_open {
struct filename *filename;
struct statx __user *buffer;
struct open_how how;
+ unsigned long nofile;
};
struct io_files_update {
@@ -432,6 +443,24 @@ struct io_epoll {
struct epoll_event event;
};
+struct io_splice {
+ struct file *file_out;
+ struct file *file_in;
+ loff_t off_out;
+ loff_t off_in;
+ u64 len;
+ unsigned int flags;
+};
+
+struct io_provide_buf {
+ struct file *file;
+ __u64 addr;
+ __s32 len;
+ __u32 bgid;
+ __u16 nbufs;
+ __u16 bid;
+};
+
struct io_async_connect {
struct sockaddr_storage address;
};
@@ -441,6 +470,7 @@ struct io_async_msghdr {
struct iovec *iov;
struct sockaddr __user *uaddr;
struct msghdr msg;
+ struct sockaddr_storage addr;
};
struct io_async_rw {
@@ -450,17 +480,12 @@ struct io_async_rw {
ssize_t size;
};
-struct io_async_open {
- struct filename *filename;
-};
-
struct io_async_ctx {
union {
struct io_async_rw rw;
struct io_async_msghdr msg;
struct io_async_connect connect;
struct io_timeout_data timeout;
- struct io_async_open open;
};
};
@@ -470,6 +495,7 @@ enum {
REQ_F_LINK_BIT = IOSQE_IO_LINK_BIT,
REQ_F_HARDLINK_BIT = IOSQE_IO_HARDLINK_BIT,
REQ_F_FORCE_ASYNC_BIT = IOSQE_ASYNC_BIT,
+ REQ_F_BUFFER_SELECT_BIT = IOSQE_BUFFER_SELECT_BIT,
REQ_F_LINK_NEXT_BIT,
REQ_F_FAIL_LINK_BIT,
@@ -483,6 +509,13 @@ enum {
REQ_F_MUST_PUNT_BIT,
REQ_F_TIMEOUT_NOSEQ_BIT,
REQ_F_COMP_LOCKED_BIT,
+ REQ_F_NEED_CLEANUP_BIT,
+ REQ_F_OVERFLOW_BIT,
+ REQ_F_POLLED_BIT,
+ REQ_F_BUFFER_SELECTED_BIT,
+
+ /* not a real bit, just to check we're not overflowing the space */
+ __REQ_F_LAST_BIT,
};
enum {
@@ -496,6 +529,8 @@ enum {
REQ_F_HARDLINK = BIT(REQ_F_HARDLINK_BIT),
/* IOSQE_ASYNC */
REQ_F_FORCE_ASYNC = BIT(REQ_F_FORCE_ASYNC_BIT),
+ /* IOSQE_BUFFER_SELECT */
+ REQ_F_BUFFER_SELECT = BIT(REQ_F_BUFFER_SELECT_BIT),
/* already grabbed next link */
REQ_F_LINK_NEXT = BIT(REQ_F_LINK_NEXT_BIT),
@@ -521,6 +556,19 @@ enum {
REQ_F_TIMEOUT_NOSEQ = BIT(REQ_F_TIMEOUT_NOSEQ_BIT),
/* completion under lock */
REQ_F_COMP_LOCKED = BIT(REQ_F_COMP_LOCKED_BIT),
+ /* needs cleanup */
+ REQ_F_NEED_CLEANUP = BIT(REQ_F_NEED_CLEANUP_BIT),
+ /* in overflow list */
+ REQ_F_OVERFLOW = BIT(REQ_F_OVERFLOW_BIT),
+ /* already went through poll handler */
+ REQ_F_POLLED = BIT(REQ_F_POLLED_BIT),
+ /* buffer already selected */
+ REQ_F_BUFFER_SELECTED = BIT(REQ_F_BUFFER_SELECTED_BIT),
+};
+
+struct async_poll {
+ struct io_poll_iocb poll;
+ struct io_wq_work work;
};
/*
@@ -546,33 +594,45 @@ struct io_kiocb {
struct io_fadvise fadvise;
struct io_madvise madvise;
struct io_epoll epoll;
+ struct io_splice splice;
+ struct io_provide_buf pbuf;
};
struct io_async_ctx *io;
- /*
- * llist_node is only used for poll deferred completions
- */
- struct llist_node llist_node;
- bool has_user;
- bool in_async;
bool needs_fixed_file;
u8 opcode;
struct io_ring_ctx *ctx;
- union {
- struct list_head list;
- struct hlist_node hash_node;
- };
- struct list_head link_list;
+ struct list_head list;
unsigned int flags;
refcount_t refs;
+ union {
+ struct task_struct *task;
+ unsigned long fsize;
+ };
u64 user_data;
u32 result;
u32 sequence;
+ struct list_head link_list;
+
struct list_head inflight_entry;
- struct io_wq_work work;
+ union {
+ /*
+ * Only commands that never go async can use the below fields,
+ * obviously. Right now only IORING_OP_POLL_ADD uses them, and
+ * async armed poll handlers for regular commands. The latter
+ * restore the work, if needed.
+ */
+ struct {
+ struct callback_head task_work;
+ struct hlist_node hash_node;
+ struct async_poll *apoll;
+ int cflags;
+ };
+ struct io_wq_work work;
+ };
};
#define IO_PLUG_THRESHOLD 2
@@ -614,6 +674,13 @@ struct io_op_def {
unsigned not_supported : 1;
/* needs file table */
unsigned file_table : 1;
+ /* needs ->fs */
+ unsigned needs_fs : 1;
+ /* set if opcode supports polled "wait" */
+ unsigned pollin : 1;
+ unsigned pollout : 1;
+ /* op supports buffer selection */
+ unsigned buffer_select : 1;
};
static const struct io_op_def io_op_defs[] = {
@@ -623,6 +690,8 @@ static const struct io_op_def io_op_defs[] = {
.needs_mm = 1,
.needs_file = 1,
.unbound_nonreg_file = 1,
+ .pollin = 1,
+ .buffer_select = 1,
},
[IORING_OP_WRITEV] = {
.async_ctx = 1,
@@ -630,6 +699,7 @@ static const struct io_op_def io_op_defs[] = {
.needs_file = 1,
.hash_reg_file = 1,
.unbound_nonreg_file = 1,
+ .pollout = 1,
},
[IORING_OP_FSYNC] = {
.needs_file = 1,
@@ -637,11 +707,13 @@ static const struct io_op_def io_op_defs[] = {
[IORING_OP_READ_FIXED] = {
.needs_file = 1,
.unbound_nonreg_file = 1,
+ .pollin = 1,
},
[IORING_OP_WRITE_FIXED] = {
.needs_file = 1,
.hash_reg_file = 1,
.unbound_nonreg_file = 1,
+ .pollout = 1,
},
[IORING_OP_POLL_ADD] = {
.needs_file = 1,
@@ -656,12 +728,17 @@ static const struct io_op_def io_op_defs[] = {
.needs_mm = 1,
.needs_file = 1,
.unbound_nonreg_file = 1,
+ .needs_fs = 1,
+ .pollout = 1,
},
[IORING_OP_RECVMSG] = {
.async_ctx = 1,
.needs_mm = 1,
.needs_file = 1,
.unbound_nonreg_file = 1,
+ .needs_fs = 1,
+ .pollin = 1,
+ .buffer_select = 1,
},
[IORING_OP_TIMEOUT] = {
.async_ctx = 1,
@@ -673,6 +750,7 @@ static const struct io_op_def io_op_defs[] = {
.needs_file = 1,
.unbound_nonreg_file = 1,
.file_table = 1,
+ .pollin = 1,
},
[IORING_OP_ASYNC_CANCEL] = {},
[IORING_OP_LINK_TIMEOUT] = {
@@ -684,6 +762,7 @@ static const struct io_op_def io_op_defs[] = {
.needs_mm = 1,
.needs_file = 1,
.unbound_nonreg_file = 1,
+ .pollout = 1,
},
[IORING_OP_FALLOCATE] = {
.needs_file = 1,
@@ -692,6 +771,7 @@ static const struct io_op_def io_op_defs[] = {
.needs_file = 1,
.fd_non_neg = 1,
.file_table = 1,
+ .needs_fs = 1,
},
[IORING_OP_CLOSE] = {
.needs_file = 1,
@@ -705,16 +785,20 @@ static const struct io_op_def io_op_defs[] = {
.needs_mm = 1,
.needs_file = 1,
.fd_non_neg = 1,
+ .needs_fs = 1,
},
[IORING_OP_READ] = {
.needs_mm = 1,
.needs_file = 1,
.unbound_nonreg_file = 1,
+ .pollin = 1,
+ .buffer_select = 1,
},
[IORING_OP_WRITE] = {
.needs_mm = 1,
.needs_file = 1,
.unbound_nonreg_file = 1,
+ .pollout = 1,
},
[IORING_OP_FADVISE] = {
.needs_file = 1,
@@ -726,21 +810,32 @@ static const struct io_op_def io_op_defs[] = {
.needs_mm = 1,
.needs_file = 1,
.unbound_nonreg_file = 1,
+ .pollout = 1,
},
[IORING_OP_RECV] = {
.needs_mm = 1,
.needs_file = 1,
.unbound_nonreg_file = 1,
+ .pollin = 1,
+ .buffer_select = 1,
},
[IORING_OP_OPENAT2] = {
.needs_file = 1,
.fd_non_neg = 1,
.file_table = 1,
+ .needs_fs = 1,
},
[IORING_OP_EPOLL_CTL] = {
.unbound_nonreg_file = 1,
.file_table = 1,
},
+ [IORING_OP_SPLICE] = {
+ .needs_file = 1,
+ .hash_reg_file = 1,
+ .unbound_nonreg_file = 1,
+ },
+ [IORING_OP_PROVIDE_BUFFERS] = {},
+ [IORING_OP_REMOVE_BUFFERS] = {},
};
static void io_wq_submit_work(struct io_wq_work **workptr);
@@ -754,6 +849,11 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx,
unsigned nr_args);
static int io_grab_files(struct io_kiocb *req);
static void io_ring_file_ref_flush(struct fixed_file_data *data);
+static void io_cleanup_req(struct io_kiocb *req);
+static int io_file_get(struct io_submit_state *state, struct io_kiocb *req,
+ int fd, struct file **out_file, bool fixed);
+static void __io_queue_sqe(struct io_kiocb *req,
+ const struct io_uring_sqe *sqe);
static struct kmem_cache *req_cachep;
@@ -820,11 +920,11 @@ static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
INIT_LIST_HEAD(&ctx->cq_overflow_list);
init_completion(&ctx->completions[0]);
init_completion(&ctx->completions[1]);
+ idr_init(&ctx->io_buffer_idr);
idr_init(&ctx->personality_idr);
mutex_init(&ctx->uring_lock);
init_waitqueue_head(&ctx->wait);
spin_lock_init(&ctx->completion_lock);
- init_llist_head(&ctx->poll_llist);
INIT_LIST_HEAD(&ctx->poll_list);
INIT_LIST_HEAD(&ctx->defer_list);
INIT_LIST_HEAD(&ctx->timeout_list);
@@ -909,6 +1009,18 @@ static inline void io_req_work_grab_env(struct io_kiocb *req,
}
if (!req->work.creds)
req->work.creds = get_current_cred();
+ if (!req->work.fs && def->needs_fs) {
+ spin_lock(¤t->fs->lock);
+ if (!current->fs->in_exec) {
+ req->work.fs = current->fs;
+ req->work.fs->users++;
+ } else {
+ req->work.flags |= IO_WQ_WORK_CANCEL;
+ }
+ spin_unlock(¤t->fs->lock);
+ }
+ if (!req->work.task_pid)
+ req->work.task_pid = task_pid_vnr(current);
}
static inline void io_req_work_drop_env(struct io_kiocb *req)
@@ -921,17 +1033,26 @@ static inline void io_req_work_drop_env(struct io_kiocb *req)
put_cred(req->work.creds);
req->work.creds = NULL;
}
+ if (req->work.fs) {
+ struct fs_struct *fs = req->work.fs;
+
+ spin_lock(&req->work.fs->lock);
+ if (--fs->users)
+ fs = NULL;
+ spin_unlock(&req->work.fs->lock);
+ if (fs)
+ free_fs_struct(fs);
+ }
}
-static inline bool io_prep_async_work(struct io_kiocb *req,
+static inline void io_prep_async_work(struct io_kiocb *req,
struct io_kiocb **link)
{
const struct io_op_def *def = &io_op_defs[req->opcode];
- bool do_hashed = false;
if (req->flags & REQ_F_ISREG) {
if (def->hash_reg_file)
- do_hashed = true;
+ io_wq_hash_work(&req->work, file_inode(req->file));
} else {
if (def->unbound_nonreg_file)
req->work.flags |= IO_WQ_WORK_UNBOUND;
@@ -940,25 +1061,18 @@ static inline bool io_prep_async_work(struct io_kiocb *req,
io_req_work_grab_env(req, def);
*link = io_prep_linked_timeout(req);
- return do_hashed;
}
static inline void io_queue_async_work(struct io_kiocb *req)
{
struct io_ring_ctx *ctx = req->ctx;
struct io_kiocb *link;
- bool do_hashed;
- do_hashed = io_prep_async_work(req, &link);
+ io_prep_async_work(req, &link);
- trace_io_uring_queue_async_work(ctx, do_hashed, req, &req->work,
- req->flags);
- if (!do_hashed) {
- io_wq_enqueue(ctx->io_wq, &req->work);
- } else {
- io_wq_enqueue_hashed(ctx->io_wq, &req->work,
- file_inode(req->file));
- }
+ trace_io_uring_queue_async_work(ctx, io_wq_is_hashed(&req->work), req,
+ &req->work, req->flags);
+ io_wq_enqueue(ctx->io_wq, &req->work);
if (link)
io_queue_linked_timeout(link);
@@ -972,6 +1086,7 @@ static void io_kill_timeout(struct io_kiocb *req)
if (ret != -1) {
atomic_inc(&req->ctx->cq_timeouts);
list_del_init(&req->list);
+ req->flags |= REQ_F_COMP_LOCKED;
io_cqring_fill_event(req, 0);
io_put_req(req);
}
@@ -1024,24 +1139,19 @@ static inline bool io_should_trigger_evfd(struct io_ring_ctx *ctx)
return false;
if (!ctx->eventfd_async)
return true;
- return io_wq_current_is_worker() || in_interrupt();
+ return io_wq_current_is_worker();
}
-static void __io_cqring_ev_posted(struct io_ring_ctx *ctx, bool trigger_ev)
+static void io_cqring_ev_posted(struct io_ring_ctx *ctx)
{
if (waitqueue_active(&ctx->wait))
wake_up(&ctx->wait);
if (waitqueue_active(&ctx->sqo_wait))
wake_up(&ctx->sqo_wait);
- if (trigger_ev)
+ if (io_should_trigger_evfd(ctx))
eventfd_signal(ctx->cq_ev_fd, 1);
}
-static void io_cqring_ev_posted(struct io_ring_ctx *ctx)
-{
- __io_cqring_ev_posted(ctx, io_should_trigger_evfd(ctx));
-}
-
/* Returns true if there are no backlogged entries after the flush */
static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force)
{
@@ -1074,10 +1184,11 @@ static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force)
req = list_first_entry(&ctx->cq_overflow_list, struct io_kiocb,
list);
list_move(&req->list, &list);
+ req->flags &= ~REQ_F_OVERFLOW;
if (cqe) {
WRITE_ONCE(cqe->user_data, req->user_data);
WRITE_ONCE(cqe->res, req->result);
- WRITE_ONCE(cqe->flags, 0);
+ WRITE_ONCE(cqe->flags, req->cflags);
} else {
WRITE_ONCE(ctx->rings->cq_overflow,
atomic_inc_return(&ctx->cached_cq_overflow));
@@ -1101,7 +1212,7 @@ static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force)
return cqe != NULL;
}
-static void io_cqring_fill_event(struct io_kiocb *req, long res)
+static void __io_cqring_fill_event(struct io_kiocb *req, long res, long cflags)
{
struct io_ring_ctx *ctx = req->ctx;
struct io_uring_cqe *cqe;
@@ -1117,7 +1228,7 @@ static void io_cqring_fill_event(struct io_kiocb *req, long res)
if (likely(cqe)) {
WRITE_ONCE(cqe->user_data, req->user_data);
WRITE_ONCE(cqe->res, res);
- WRITE_ONCE(cqe->flags, 0);
+ WRITE_ONCE(cqe->flags, cflags);
} else if (ctx->cq_overflow_flushed) {
WRITE_ONCE(ctx->rings->cq_overflow,
atomic_inc_return(&ctx->cached_cq_overflow));
@@ -1126,25 +1237,37 @@ static void io_cqring_fill_event(struct io_kiocb *req, long res)
set_bit(0, &ctx->sq_check_overflow);
set_bit(0, &ctx->cq_check_overflow);
}
+ req->flags |= REQ_F_OVERFLOW;
refcount_inc(&req->refs);
req->result = res;
+ req->cflags = cflags;
list_add_tail(&req->list, &ctx->cq_overflow_list);
}
}
-static void io_cqring_add_event(struct io_kiocb *req, long res)
+static void io_cqring_fill_event(struct io_kiocb *req, long res)
+{
+ __io_cqring_fill_event(req, res, 0);
+}
+
+static void __io_cqring_add_event(struct io_kiocb *req, long res, long cflags)
{
struct io_ring_ctx *ctx = req->ctx;
unsigned long flags;
spin_lock_irqsave(&ctx->completion_lock, flags);
- io_cqring_fill_event(req, res);
+ __io_cqring_fill_event(req, res, cflags);
io_commit_cqring(ctx);
spin_unlock_irqrestore(&ctx->completion_lock, flags);
io_cqring_ev_posted(ctx);
}
+static void io_cqring_add_event(struct io_kiocb *req, long res)
+{
+ __io_cqring_add_event(req, res, 0);
+}
+
static inline bool io_is_fallback_req(struct io_kiocb *req)
{
return req == (struct io_kiocb *)
@@ -1214,6 +1337,15 @@ static struct io_kiocb *io_get_req(struct io_ring_ctx *ctx,
return NULL;
}
+static inline void io_put_file(struct io_kiocb *req, struct file *file,
+ bool fixed)
+{
+ if (fixed)
+ percpu_ref_put(&req->ctx->file_data->refs);
+ else
+ fput(file);
+}
+
static void __io_req_do_free(struct io_kiocb *req)
{
if (likely(!io_is_fallback_req(req)))
@@ -1224,15 +1356,12 @@ static void __io_req_do_free(struct io_kiocb *req)
static void __io_req_aux_free(struct io_kiocb *req)
{
- struct io_ring_ctx *ctx = req->ctx;
+ if (req->flags & REQ_F_NEED_CLEANUP)
+ io_cleanup_req(req);
kfree(req->io);
- if (req->file) {
- if (req->flags & REQ_F_FIXED_FILE)
- percpu_ref_put(&ctx->file_data->refs);
- else
- fput(req->file);
- }
+ if (req->file)
+ io_put_file(req, req->file, (req->flags & REQ_F_FIXED_FILE));
io_req_work_drop_env(req);
}
@@ -1439,6 +1568,30 @@ static void io_free_req(struct io_kiocb *req)
io_queue_async_work(nxt);
}
+static void io_link_work_cb(struct io_wq_work **workptr)
+{
+ struct io_kiocb *req = container_of(*workptr, struct io_kiocb, work);
+ struct io_kiocb *link;
+
+ link = list_first_entry(&req->link_list, struct io_kiocb, link_list);
+ io_queue_linked_timeout(link);
+ io_wq_submit_work(workptr);
+}
+
+static void io_wq_assign_next(struct io_wq_work **workptr, struct io_kiocb *nxt)
+{
+ struct io_kiocb *link;
+ const struct io_op_def *def = &io_op_defs[nxt->opcode];
+
+ if ((nxt->flags & REQ_F_ISREG) && def->hash_reg_file)
+ io_wq_hash_work(&nxt->work, file_inode(nxt->file));
+
+ *workptr = &nxt->work;
+ link = io_prep_linked_timeout(nxt);
+ if (link)
+ nxt->work.func = io_link_work_cb;
+}
+
/*
* Drop reference to request, return next in chain (if there is one) if this
* was the last reference to this request.
@@ -1446,10 +1599,10 @@ static void io_free_req(struct io_kiocb *req)
__attribute__((nonnull))
static void io_put_req_find_next(struct io_kiocb *req, struct io_kiocb **nxtptr)
{
- io_req_find_next(req, nxtptr);
-
- if (refcount_dec_and_test(&req->refs))
+ if (refcount_dec_and_test(&req->refs)) {
+ io_req_find_next(req, nxtptr);
__io_free_req(req);
+ }
}
static void io_put_req(struct io_kiocb *req)
@@ -1458,6 +1611,26 @@ static void io_put_req(struct io_kiocb *req)
io_free_req(req);
}
+static void io_steal_work(struct io_kiocb *req,
+ struct io_wq_work **workptr)
+{
+ /*
+ * It's in an io-wq worker, so there always should be at least
+ * one reference, which will be dropped in io_put_work() just
+ * after the current handler returns.
+ *
+ * It also means, that if the counter dropped to 1, then there is
+ * no asynchronous users left, so it's safe to steal the next work.
+ */
+ if (refcount_read(&req->refs) == 1) {
+ struct io_kiocb *nxt = NULL;
+
+ io_req_find_next(req, &nxt);
+ if (nxt)
+ io_wq_assign_next(workptr, nxt);
+ }
+}
+
/*
* Must only be used if we don't need to care about links, usually from
* within the completion handling itself.
@@ -1519,6 +1692,19 @@ static inline bool io_req_multi_free(struct req_batch *rb, struct io_kiocb *req)
return true;
}
+static int io_put_kbuf(struct io_kiocb *req)
+{
+ struct io_buffer *kbuf;
+ int cflags;
+
+ kbuf = (struct io_buffer *) (unsigned long) req->rw.addr;
+ cflags = kbuf->bid << IORING_CQE_BUFFER_SHIFT;
+ cflags |= IORING_CQE_F_BUFFER;
+ req->rw.addr = 0;
+ kfree(kbuf);
+ return cflags;
+}
+
/*
* Find and free completed poll iocbs
*/
@@ -1530,10 +1716,15 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events,
rb.to_free = rb.need_iter = 0;
while (!list_empty(done)) {
+ int cflags = 0;
+
req = list_first_entry(done, struct io_kiocb, list);
list_del(&req->list);
- io_cqring_fill_event(req, req->result);
+ if (req->flags & REQ_F_BUFFER_SELECTED)
+ cflags = io_put_kbuf(req);
+
+ __io_cqring_fill_event(req, req->result, cflags);
(*nr_events)++;
if (refcount_dec_and_test(&req->refs) &&
@@ -1542,6 +1733,8 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events,
}
io_commit_cqring(ctx);
+ if (ctx->flags & IORING_SETUP_SQPOLL)
+ io_cqring_ev_posted(ctx);
io_free_req_many(ctx, &rb);
}
@@ -1635,11 +1828,17 @@ static void io_iopoll_reap_events(struct io_ring_ctx *ctx)
mutex_unlock(&ctx->uring_lock);
}
-static int __io_iopoll_check(struct io_ring_ctx *ctx, unsigned *nr_events,
- long min)
+static int io_iopoll_check(struct io_ring_ctx *ctx, unsigned *nr_events,
+ long min)
{
int iters = 0, ret = 0;
+ /*
+ * We disallow the app entering submit/complete with polling, but we
+ * still need to lock the ring to prevent racing with polled issue
+ * that got punted to a workqueue.
+ */
+ mutex_lock(&ctx->uring_lock);
do {
int tmin = 0;
@@ -1675,21 +1874,6 @@ static int __io_iopoll_check(struct io_ring_ctx *ctx, unsigned *nr_events,
ret = 0;
} while (min && !*nr_events && !need_resched());
- return ret;
-}
-
-static int io_iopoll_check(struct io_ring_ctx *ctx, unsigned *nr_events,
- long min)
-{
- int ret;
-
- /*
- * We disallow the app entering submit/complete with polling, but we
- * still need to lock the ring to prevent racing with polled issue
- * that got punted to a workqueue.
- */
- mutex_lock(&ctx->uring_lock);
- ret = __io_iopoll_check(ctx, nr_events, min);
mutex_unlock(&ctx->uring_lock);
return ret;
}
@@ -1717,13 +1901,16 @@ static inline void req_set_fail_links(struct io_kiocb *req)
static void io_complete_rw_common(struct kiocb *kiocb, long res)
{
struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw.kiocb);
+ int cflags = 0;
if (kiocb->ki_flags & IOCB_WRITE)
kiocb_end_write(req);
if (res != req->result)
req_set_fail_links(req);
- io_cqring_add_event(req, res);
+ if (req->flags & REQ_F_BUFFER_SELECTED)
+ cflags = io_put_kbuf(req);
+ __io_cqring_add_event(req, res, cflags);
}
static void io_complete_rw(struct kiocb *kiocb, long res, long res2)
@@ -1734,17 +1921,6 @@ static void io_complete_rw(struct kiocb *kiocb, long res, long res2)
io_put_req(req);
}
-static struct io_kiocb *__io_complete_rw(struct kiocb *kiocb, long res)
-{
- struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw.kiocb);
- struct io_kiocb *nxt = NULL;
-
- io_complete_rw_common(kiocb, res);
- io_put_req_find_next(req, &nxt);
-
- return nxt;
-}
-
static void io_complete_rw_iopoll(struct kiocb *kiocb, long res, long res2)
{
struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw.kiocb);
@@ -1793,6 +1969,10 @@ static void io_iopoll_req_issued(struct io_kiocb *req)
list_add(&req->list, &ctx->poll_list);
else
list_add_tail(&req->list, &ctx->poll_list);
+
+ if ((ctx->flags & IORING_SETUP_SQPOLL) &&
+ wq_has_sleeper(&ctx->sqo_wait))
+ wake_up(&ctx->sqo_wait);
}
static void io_file_put(struct io_submit_state *state)
@@ -1811,7 +1991,7 @@ static void io_file_put(struct io_submit_state *state)
* assuming most submissions are for one file, or at least that each file
* has more than one submission.
*/
-static struct file *io_file_get(struct io_submit_state *state, int fd)
+static struct file *__io_file_get(struct io_submit_state *state, int fd)
{
if (!state)
return fget(fd);
@@ -1908,7 +2088,7 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe,
req->rw.addr = READ_ONCE(sqe->addr);
req->rw.len = READ_ONCE(sqe->len);
- /* we own ->private, reuse it for the buffer index */
+ /* we own ->private, reuse it for the buffer index / buffer ID */
req->rw.kiocb.private = (void *) (unsigned long)
READ_ONCE(sqe->buf_index);
return 0;
@@ -1935,15 +2115,14 @@ static inline void io_rw_done(struct kiocb *kiocb, ssize_t ret)
}
}
-static void kiocb_done(struct kiocb *kiocb, ssize_t ret, struct io_kiocb **nxt,
- bool in_async)
+static void kiocb_done(struct kiocb *kiocb, ssize_t ret)
{
struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw.kiocb);
if (req->flags & REQ_F_CUR_POS)
req->file->f_pos = kiocb->ki_pos;
- if (in_async && ret >= 0 && kiocb->ki_complete == io_complete_rw)
- *nxt = __io_complete_rw(kiocb, ret);
+ if (ret >= 0 && kiocb->ki_complete == io_complete_rw)
+ io_complete_rw(kiocb, ret, 0);
else
io_rw_done(kiocb, ret);
}
@@ -2022,11 +2201,147 @@ static ssize_t io_import_fixed(struct io_kiocb *req, int rw,
return len;
}
+static void io_ring_submit_unlock(struct io_ring_ctx *ctx, bool needs_lock)
+{
+ if (needs_lock)
+ mutex_unlock(&ctx->uring_lock);
+}
+
+static void io_ring_submit_lock(struct io_ring_ctx *ctx, bool needs_lock)
+{
+ /*
+ * "Normal" inline submissions always hold the uring_lock, since we
+ * grab it from the system call. Same is true for the SQPOLL offload.
+ * The only exception is when we've detached the request and issue it
+ * from an async worker thread, grab the lock for that case.
+ */
+ if (needs_lock)
+ mutex_lock(&ctx->uring_lock);
+}
+
+static struct io_buffer *io_buffer_select(struct io_kiocb *req, size_t *len,
+ int bgid, struct io_buffer *kbuf,
+ bool needs_lock)
+{
+ struct io_buffer *head;
+
+ if (req->flags & REQ_F_BUFFER_SELECTED)
+ return kbuf;
+
+ io_ring_submit_lock(req->ctx, needs_lock);
+
+ lockdep_assert_held(&req->ctx->uring_lock);
+
+ head = idr_find(&req->ctx->io_buffer_idr, bgid);
+ if (head) {
+ if (!list_empty(&head->list)) {
+ kbuf = list_last_entry(&head->list, struct io_buffer,
+ list);
+ list_del(&kbuf->list);
+ } else {
+ kbuf = head;
+ idr_remove(&req->ctx->io_buffer_idr, bgid);
+ }
+ if (*len > kbuf->len)
+ *len = kbuf->len;
+ } else {
+ kbuf = ERR_PTR(-ENOBUFS);
+ }
+
+ io_ring_submit_unlock(req->ctx, needs_lock);
+
+ return kbuf;
+}
+
+static void __user *io_rw_buffer_select(struct io_kiocb *req, size_t *len,
+ bool needs_lock)
+{
+ struct io_buffer *kbuf;
+ int bgid;
+
+ kbuf = (struct io_buffer *) (unsigned long) req->rw.addr;
+ bgid = (int) (unsigned long) req->rw.kiocb.private;
+ kbuf = io_buffer_select(req, len, bgid, kbuf, needs_lock);
+ if (IS_ERR(kbuf))
+ return kbuf;
+ req->rw.addr = (u64) (unsigned long) kbuf;
+ req->flags |= REQ_F_BUFFER_SELECTED;
+ return u64_to_user_ptr(kbuf->addr);
+}
+
+#ifdef CONFIG_COMPAT
+static ssize_t io_compat_import(struct io_kiocb *req, struct iovec *iov,
+ bool needs_lock)
+{
+ struct compat_iovec __user *uiov;
+ compat_ssize_t clen;
+ void __user *buf;
+ ssize_t len;
+
+ uiov = u64_to_user_ptr(req->rw.addr);
+ if (!access_ok(uiov, sizeof(*uiov)))
+ return -EFAULT;
+ if (__get_user(clen, &uiov->iov_len))
+ return -EFAULT;
+ if (clen < 0)
+ return -EINVAL;
+
+ len = clen;
+ buf = io_rw_buffer_select(req, &len, needs_lock);
+ if (IS_ERR(buf))
+ return PTR_ERR(buf);
+ iov[0].iov_base = buf;
+ iov[0].iov_len = (compat_size_t) len;
+ return 0;
+}
+#endif
+
+static ssize_t __io_iov_buffer_select(struct io_kiocb *req, struct iovec *iov,
+ bool needs_lock)
+{
+ struct iovec __user *uiov = u64_to_user_ptr(req->rw.addr);
+ void __user *buf;
+ ssize_t len;
+
+ if (copy_from_user(iov, uiov, sizeof(*uiov)))
+ return -EFAULT;
+
+ len = iov[0].iov_len;
+ if (len < 0)
+ return -EINVAL;
+ buf = io_rw_buffer_select(req, &len, needs_lock);
+ if (IS_ERR(buf))
+ return PTR_ERR(buf);
+ iov[0].iov_base = buf;
+ iov[0].iov_len = len;
+ return 0;
+}
+
+static ssize_t io_iov_buffer_select(struct io_kiocb *req, struct iovec *iov,
+ bool needs_lock)
+{
+ if (req->flags & REQ_F_BUFFER_SELECTED)
+ return 0;
+ if (!req->rw.len)
+ return 0;
+ else if (req->rw.len > 1)
+ return -EINVAL;
+
+#ifdef CONFIG_COMPAT
+ if (req->ctx->compat)
+ return io_compat_import(req, iov, needs_lock);
+#endif
+
+ return __io_iov_buffer_select(req, iov, needs_lock);
+}
+
static ssize_t io_import_iovec(int rw, struct io_kiocb *req,
- struct iovec **iovec, struct iov_iter *iter)
+ struct iovec **iovec, struct iov_iter *iter,
+ bool needs_lock)
{
void __user *buf = u64_to_user_ptr(req->rw.addr);
size_t sqe_len = req->rw.len;
+ ssize_t ret;
u8 opcode;
opcode = req->opcode;
@@ -2035,15 +2350,23 @@ static ssize_t io_import_iovec(int rw, struct io_kiocb *req,
return io_import_fixed(req, rw, iter);
}
- /* buffer index only valid with fixed read/write */
- if (req->rw.kiocb.private)
+ /* buffer index only valid with fixed read/write, or buffer select */
+ if (req->rw.kiocb.private && !(req->flags & REQ_F_BUFFER_SELECT))
return -EINVAL;
if (opcode == IORING_OP_READ || opcode == IORING_OP_WRITE) {
- ssize_t ret;
+ if (req->flags & REQ_F_BUFFER_SELECT) {
+ buf = io_rw_buffer_select(req, &sqe_len, needs_lock);
+ if (IS_ERR(buf)) {
+ *iovec = NULL;
+ return PTR_ERR(buf);
+ }
+ req->rw.len = sqe_len;
+ }
+
ret = import_single_range(rw, buf, sqe_len, *iovec, iter);
*iovec = NULL;
- return ret;
+ return ret < 0 ? ret : sqe_len;
}
if (req->io) {
@@ -2056,8 +2379,15 @@ static ssize_t io_import_iovec(int rw, struct io_kiocb *req,
return iorw->size;
}
- if (!req->has_user)
- return -EFAULT;
+ if (req->flags & REQ_F_BUFFER_SELECT) {
+ ret = io_iov_buffer_select(req, *iovec, needs_lock);
+ if (!ret) {
+ ret = (*iovec)->iov_len;
+ iov_iter_init(iter, rw, *iovec, 1, ret);
+ }
+ *iovec = NULL;
+ return ret;
+ }
#ifdef CONFIG_COMPAT
if (req->ctx->compat)
@@ -2137,26 +2467,23 @@ static void io_req_map_rw(struct io_kiocb *req, ssize_t io_size,
req->io->rw.iov = req->io->rw.fast_iov;
memcpy(req->io->rw.iov, fast_iov,
sizeof(struct iovec) * iter->nr_segs);
+ } else {
+ req->flags |= REQ_F_NEED_CLEANUP;
}
}
+static inline int __io_alloc_async_ctx(struct io_kiocb *req)
+{
+ req->io = kmalloc(sizeof(*req->io), GFP_KERNEL);
+ return req->io == NULL;
+}
+
static int io_alloc_async_ctx(struct io_kiocb *req)
{
if (!io_op_defs[req->opcode].async_ctx)
return 0;
- req->io = kmalloc(sizeof(*req->io), GFP_KERNEL);
- return req->io == NULL;
-}
-static void io_rw_async(struct io_wq_work **workptr)
-{
- struct io_kiocb *req = container_of(*workptr, struct io_kiocb, work);
- struct iovec *iov = NULL;
-
- if (req->io->rw.iov != req->io->rw.fast_iov)
- iov = req->io->rw.iov;
- io_wq_submit_work(workptr);
- kfree(iov);
+ return __io_alloc_async_ctx(req);
}
static int io_setup_async_rw(struct io_kiocb *req, ssize_t io_size,
@@ -2166,12 +2493,11 @@ static int io_setup_async_rw(struct io_kiocb *req, ssize_t io_size,
if (!io_op_defs[req->opcode].async_ctx)
return 0;
if (!req->io) {
- if (io_alloc_async_ctx(req))
+ if (__io_alloc_async_ctx(req))
return -ENOMEM;
io_req_map_rw(req, io_size, iovec, fast_iov, iter);
}
- req->work.func = io_rw_async;
return 0;
}
@@ -2189,13 +2515,14 @@ static int io_read_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe,
if (unlikely(!(req->file->f_mode & FMODE_READ)))
return -EBADF;
- if (!req->io)
+ /* either don't need iovec imported or already have it */
+ if (!req->io || req->flags & REQ_F_NEED_CLEANUP)
return 0;
io = req->io;
io->rw.iov = io->rw.fast_iov;
req->io = NULL;
- ret = io_import_iovec(READ, req, &io->rw.iov, &iter);
+ ret = io_import_iovec(READ, req, &io->rw.iov, &iter, !force_nonblock);
req->io = io;
if (ret < 0)
return ret;
@@ -2204,8 +2531,7 @@ static int io_read_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe,
return 0;
}
-static int io_read(struct io_kiocb *req, struct io_kiocb **nxt,
- bool force_nonblock)
+static int io_read(struct io_kiocb *req, bool force_nonblock)
{
struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
struct kiocb *kiocb = &req->rw.kiocb;
@@ -2213,13 +2539,13 @@ static int io_read(struct io_kiocb *req, struct io_kiocb **nxt,
size_t iov_count;
ssize_t io_size, ret;
- ret = io_import_iovec(READ, req, &iovec, &iter);
+ ret = io_import_iovec(READ, req, &iovec, &iter, !force_nonblock);
if (ret < 0)
return ret;
/* Ensure we clear previously set non-block flag */
if (!force_nonblock)
- req->rw.kiocb.ki_flags &= ~IOCB_NOWAIT;
+ kiocb->ki_flags &= ~IOCB_NOWAIT;
req->result = 0;
io_size = ret;
@@ -2230,10 +2556,8 @@ static int io_read(struct io_kiocb *req, struct io_kiocb **nxt,
* If the file doesn't support async, mark it as REQ_F_MUST_PUNT so
* we know to async punt it even if it was opened O_NONBLOCK
*/
- if (force_nonblock && !io_file_supports_async(req->file)) {
- req->flags |= REQ_F_MUST_PUNT;
+ if (force_nonblock && !io_file_supports_async(req->file))
goto copy_iov;
- }
iov_count = iov_iter_count(&iter);
ret = rw_verify_area(READ, req->file, &kiocb->ki_pos, iov_count);
@@ -2247,19 +2571,22 @@ static int io_read(struct io_kiocb *req, struct io_kiocb **nxt,
/* Catch -EAGAIN return for forced non-blocking submission */
if (!force_nonblock || ret2 != -EAGAIN) {
- kiocb_done(kiocb, ret2, nxt, req->in_async);
+ kiocb_done(kiocb, ret2);
} else {
copy_iov:
ret = io_setup_async_rw(req, io_size, iovec,
inline_vecs, &iter);
if (ret)
goto out_free;
+ /* any defer here is final, must blocking retry */
+ if (!(req->flags & REQ_F_NOWAIT))
+ req->flags |= REQ_F_MUST_PUNT;
return -EAGAIN;
}
}
out_free:
- if (!io_wq_current_is_worker())
- kfree(iovec);
+ kfree(iovec);
+ req->flags &= ~REQ_F_NEED_CLEANUP;
return ret;
}
@@ -2277,13 +2604,16 @@ static int io_write_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe,
if (unlikely(!(req->file->f_mode & FMODE_WRITE)))
return -EBADF;
- if (!req->io)
+ req->fsize = rlimit(RLIMIT_FSIZE);
+
+ /* either don't need iovec imported or already have it */
+ if (!req->io || req->flags & REQ_F_NEED_CLEANUP)
return 0;
io = req->io;
io->rw.iov = io->rw.fast_iov;
req->io = NULL;
- ret = io_import_iovec(WRITE, req, &io->rw.iov, &iter);
+ ret = io_import_iovec(WRITE, req, &io->rw.iov, &iter, !force_nonblock);
req->io = io;
if (ret < 0)
return ret;
@@ -2292,8 +2622,7 @@ static int io_write_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe,
return 0;
}
-static int io_write(struct io_kiocb *req, struct io_kiocb **nxt,
- bool force_nonblock)
+static int io_write(struct io_kiocb *req, bool force_nonblock)
{
struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
struct kiocb *kiocb = &req->rw.kiocb;
@@ -2301,7 +2630,7 @@ static int io_write(struct io_kiocb *req, struct io_kiocb **nxt,
size_t iov_count;
ssize_t ret, io_size;
- ret = io_import_iovec(WRITE, req, &iovec, &iter);
+ ret = io_import_iovec(WRITE, req, &iovec, &iter, !force_nonblock);
if (ret < 0)
return ret;
@@ -2318,10 +2647,8 @@ static int io_write(struct io_kiocb *req, struct io_kiocb **nxt,
* If the file doesn't support async, mark it as REQ_F_MUST_PUNT so
* we know to async punt it even if it was opened O_NONBLOCK
*/
- if (force_nonblock && !io_file_supports_async(req->file)) {
- req->flags |= REQ_F_MUST_PUNT;
+ if (force_nonblock && !io_file_supports_async(req->file))
goto copy_iov;
- }
/* file path doesn't support NOWAIT for non-direct_IO */
if (force_nonblock && !(kiocb->ki_flags & IOCB_DIRECT) &&
@@ -2348,27 +2675,112 @@ static int io_write(struct io_kiocb *req, struct io_kiocb **nxt,
}
kiocb->ki_flags |= IOCB_WRITE;
+ if (!force_nonblock)
+ current->signal->rlim[RLIMIT_FSIZE].rlim_cur = req->fsize;
+
if (req->file->f_op->write_iter)
ret2 = call_write_iter(req->file, kiocb, &iter);
else
ret2 = loop_rw_iter(WRITE, req->file, kiocb, &iter);
+
+ if (!force_nonblock)
+ current->signal->rlim[RLIMIT_FSIZE].rlim_cur = RLIM_INFINITY;
+
+ /*
+ * Raw bdev writes will return -EOPNOTSUPP for IOCB_NOWAIT. Just
+ * retry them without IOCB_NOWAIT.
+ */
+ if (ret2 == -EOPNOTSUPP && (kiocb->ki_flags & IOCB_NOWAIT))
+ ret2 = -EAGAIN;
if (!force_nonblock || ret2 != -EAGAIN) {
- kiocb_done(kiocb, ret2, nxt, req->in_async);
+ kiocb_done(kiocb, ret2);
} else {
copy_iov:
ret = io_setup_async_rw(req, io_size, iovec,
inline_vecs, &iter);
if (ret)
goto out_free;
+ /* any defer here is final, must blocking retry */
+ req->flags |= REQ_F_MUST_PUNT;
return -EAGAIN;
}
}
out_free:
- if (!io_wq_current_is_worker())
- kfree(iovec);
+ req->flags &= ~REQ_F_NEED_CLEANUP;
+ kfree(iovec);
return ret;
}
+static int io_splice_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+{
+ struct io_splice* sp = &req->splice;
+ unsigned int valid_flags = SPLICE_F_FD_IN_FIXED | SPLICE_F_ALL;
+ int ret;
+
+ if (req->flags & REQ_F_NEED_CLEANUP)
+ return 0;
+
+ sp->file_in = NULL;
+ sp->off_in = READ_ONCE(sqe->splice_off_in);
+ sp->off_out = READ_ONCE(sqe->off);
+ sp->len = READ_ONCE(sqe->len);
+ sp->flags = READ_ONCE(sqe->splice_flags);
+
+ if (unlikely(sp->flags & ~valid_flags))
+ return -EINVAL;
+
+ ret = io_file_get(NULL, req, READ_ONCE(sqe->splice_fd_in), &sp->file_in,
+ (sp->flags & SPLICE_F_FD_IN_FIXED));
+ if (ret)
+ return ret;
+ req->flags |= REQ_F_NEED_CLEANUP;
+
+ if (!S_ISREG(file_inode(sp->file_in)->i_mode))
+ req->work.flags |= IO_WQ_WORK_UNBOUND;
+
+ return 0;
+}
+
+static bool io_splice_punt(struct file *file)
+{
+ if (get_pipe_info(file))
+ return false;
+ if (!io_file_supports_async(file))
+ return true;
+ return !(file->f_mode & O_NONBLOCK);
+}
+
+static int io_splice(struct io_kiocb *req, bool force_nonblock)
+{
+ struct io_splice *sp = &req->splice;
+ struct file *in = sp->file_in;
+ struct file *out = sp->file_out;
+ unsigned int flags = sp->flags & ~SPLICE_F_FD_IN_FIXED;
+ loff_t *poff_in, *poff_out;
+ long ret;
+
+ if (force_nonblock) {
+ if (io_splice_punt(in) || io_splice_punt(out))
+ return -EAGAIN;
+ flags |= SPLICE_F_NONBLOCK;
+ }
+
+ poff_in = (sp->off_in == -1) ? NULL : &sp->off_in;
+ poff_out = (sp->off_out == -1) ? NULL : &sp->off_out;
+ ret = do_splice(in, poff_in, out, poff_out, sp->len, flags);
+ if (force_nonblock && ret == -EAGAIN)
+ return -EAGAIN;
+
+ io_put_file(req, in, (sp->flags & SPLICE_F_FD_IN_FIXED));
+ req->flags &= ~REQ_F_NEED_CLEANUP;
+
+ io_cqring_add_event(req, ret);
+ if (ret != sp->len)
+ req_set_fail_links(req);
+ io_put_req(req);
+ return 0;
+}
+
/*
* IORING_OP_NOP just posts a completion event, nothing else.
*/
@@ -2417,82 +2829,63 @@ static bool io_req_cancelled(struct io_kiocb *req)
return false;
}
-static void io_link_work_cb(struct io_wq_work **workptr)
+static void __io_fsync(struct io_kiocb *req)
{
- struct io_wq_work *work = *workptr;
- struct io_kiocb *link = work->data;
-
- io_queue_linked_timeout(link);
- work->func = io_wq_submit_work;
-}
-
-static void io_wq_assign_next(struct io_wq_work **workptr, struct io_kiocb *nxt)
-{
- struct io_kiocb *link;
-
- io_prep_async_work(nxt, &link);
- *workptr = &nxt->work;
- if (link) {
- nxt->work.flags |= IO_WQ_WORK_CB;
- nxt->work.func = io_link_work_cb;
- nxt->work.data = link;
- }
-}
-
-static void io_fsync_finish(struct io_wq_work **workptr)
-{
- struct io_kiocb *req = container_of(*workptr, struct io_kiocb, work);
loff_t end = req->sync.off + req->sync.len;
- struct io_kiocb *nxt = NULL;
int ret;
- if (io_req_cancelled(req))
- return;
-
ret = vfs_fsync_range(req->file, req->sync.off,
end > 0 ? end : LLONG_MAX,
req->sync.flags & IORING_FSYNC_DATASYNC);
if (ret < 0)
req_set_fail_links(req);
io_cqring_add_event(req, ret);
- io_put_req_find_next(req, &nxt);
- if (nxt)
- io_wq_assign_next(workptr, nxt);
+ io_put_req(req);
}
-static int io_fsync(struct io_kiocb *req, struct io_kiocb **nxt,
- bool force_nonblock)
+static void io_fsync_finish(struct io_wq_work **workptr)
{
- struct io_wq_work *work, *old_work;
+ struct io_kiocb *req = container_of(*workptr, struct io_kiocb, work);
+ if (io_req_cancelled(req))
+ return;
+ __io_fsync(req);
+ io_steal_work(req, workptr);
+}
+
+static int io_fsync(struct io_kiocb *req, bool force_nonblock)
+{
/* fsync always requires a blocking context */
if (force_nonblock) {
- io_put_req(req);
req->work.func = io_fsync_finish;
return -EAGAIN;
}
-
- work = old_work = &req->work;
- io_fsync_finish(&work);
- if (work && work != old_work)
- *nxt = container_of(work, struct io_kiocb, work);
+ __io_fsync(req);
return 0;
}
+static void __io_fallocate(struct io_kiocb *req)
+{
+ int ret;
+
+ current->signal->rlim[RLIMIT_FSIZE].rlim_cur = req->fsize;
+ ret = vfs_fallocate(req->file, req->sync.mode, req->sync.off,
+ req->sync.len);
+ current->signal->rlim[RLIMIT_FSIZE].rlim_cur = RLIM_INFINITY;
+ if (ret < 0)
+ req_set_fail_links(req);
+ io_cqring_add_event(req, ret);
+ io_put_req(req);
+}
+
static void io_fallocate_finish(struct io_wq_work **workptr)
{
struct io_kiocb *req = container_of(*workptr, struct io_kiocb, work);
- struct io_kiocb *nxt = NULL;
- int ret;
- ret = vfs_fallocate(req->file, req->sync.mode, req->sync.off,
- req->sync.len);
- if (ret < 0)
- req_set_fail_links(req);
- io_cqring_add_event(req, ret);
- io_put_req_find_next(req, &nxt);
- if (nxt)
- io_wq_assign_next(workptr, nxt);
+ if (io_req_cancelled(req))
+ return;
+ __io_fallocate(req);
+ io_steal_work(req, workptr);
}
static int io_fallocate_prep(struct io_kiocb *req,
@@ -2504,26 +2897,19 @@ static int io_fallocate_prep(struct io_kiocb *req,
req->sync.off = READ_ONCE(sqe->off);
req->sync.len = READ_ONCE(sqe->addr);
req->sync.mode = READ_ONCE(sqe->len);
+ req->fsize = rlimit(RLIMIT_FSIZE);
return 0;
}
-static int io_fallocate(struct io_kiocb *req, struct io_kiocb **nxt,
- bool force_nonblock)
+static int io_fallocate(struct io_kiocb *req, bool force_nonblock)
{
- struct io_wq_work *work, *old_work;
-
/* fallocate always requiring blocking context */
if (force_nonblock) {
- io_put_req(req);
req->work.func = io_fallocate_finish;
return -EAGAIN;
}
- work = old_work = &req->work;
- io_fallocate_finish(&work);
- if (work && work != old_work)
- *nxt = container_of(work, struct io_kiocb, work);
-
+ __io_fallocate(req);
return 0;
}
@@ -2534,6 +2920,10 @@ static int io_openat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
if (sqe->ioprio || sqe->buf_index)
return -EINVAL;
+ if (sqe->flags & IOSQE_FIXED_FILE)
+ return -EBADF;
+ if (req->flags & REQ_F_NEED_CLEANUP)
+ return 0;
req->open.dfd = READ_ONCE(sqe->fd);
req->open.how.mode = READ_ONCE(sqe->len);
@@ -2547,6 +2937,8 @@ static int io_openat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
return ret;
}
+ req->open.nofile = rlimit(RLIMIT_NOFILE);
+ req->flags |= REQ_F_NEED_CLEANUP;
return 0;
}
@@ -2559,6 +2951,10 @@ static int io_openat2_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
if (sqe->ioprio || sqe->buf_index)
return -EINVAL;
+ if (sqe->flags & IOSQE_FIXED_FILE)
+ return -EBADF;
+ if (req->flags & REQ_F_NEED_CLEANUP)
+ return 0;
req->open.dfd = READ_ONCE(sqe->fd);
fname = u64_to_user_ptr(READ_ONCE(sqe->addr));
@@ -2583,11 +2979,12 @@ static int io_openat2_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
return ret;
}
+ req->open.nofile = rlimit(RLIMIT_NOFILE);
+ req->flags |= REQ_F_NEED_CLEANUP;
return 0;
}
-static int io_openat2(struct io_kiocb *req, struct io_kiocb **nxt,
- bool force_nonblock)
+static int io_openat2(struct io_kiocb *req, bool force_nonblock)
{
struct open_flags op;
struct file *file;
@@ -2600,7 +2997,7 @@ static int io_openat2(struct io_kiocb *req, struct io_kiocb **nxt,
if (ret)
goto err;
- ret = get_unused_fd_flags(req->open.how.flags);
+ ret = __get_unused_fd_flags(req->open.how.flags, req->open.nofile);
if (ret < 0)
goto err;
@@ -2614,18 +3011,175 @@ static int io_openat2(struct io_kiocb *req, struct io_kiocb **nxt,
}
err:
putname(req->open.filename);
+ req->flags &= ~REQ_F_NEED_CLEANUP;
if (ret < 0)
req_set_fail_links(req);
io_cqring_add_event(req, ret);
- io_put_req_find_next(req, nxt);
+ io_put_req(req);
return 0;
}
-static int io_openat(struct io_kiocb *req, struct io_kiocb **nxt,
- bool force_nonblock)
+static int io_openat(struct io_kiocb *req, bool force_nonblock)
{
req->open.how = build_open_how(req->open.how.flags, req->open.how.mode);
- return io_openat2(req, nxt, force_nonblock);
+ return io_openat2(req, force_nonblock);
+}
+
+static int io_remove_buffers_prep(struct io_kiocb *req,
+ const struct io_uring_sqe *sqe)
+{
+ struct io_provide_buf *p = &req->pbuf;
+ u64 tmp;
+
+ if (sqe->ioprio || sqe->rw_flags || sqe->addr || sqe->len || sqe->off)
+ return -EINVAL;
+
+ tmp = READ_ONCE(sqe->fd);
+ if (!tmp || tmp > USHRT_MAX)
+ return -EINVAL;
+
+ memset(p, 0, sizeof(*p));
+ p->nbufs = tmp;
+ p->bgid = READ_ONCE(sqe->buf_group);
+ return 0;
+}
+
+static int __io_remove_buffers(struct io_ring_ctx *ctx, struct io_buffer *buf,
+ int bgid, unsigned nbufs)
+{
+ unsigned i = 0;
+
+ /* shouldn't happen */
+ if (!nbufs)
+ return 0;
+
+ /* the head kbuf is the list itself */
+ while (!list_empty(&buf->list)) {
+ struct io_buffer *nxt;
+
+ nxt = list_first_entry(&buf->list, struct io_buffer, list);
+ list_del(&nxt->list);
+ kfree(nxt);
+ if (++i == nbufs)
+ return i;
+ }
+ i++;
+ kfree(buf);
+ idr_remove(&ctx->io_buffer_idr, bgid);
+
+ return i;
+}
+
+static int io_remove_buffers(struct io_kiocb *req, bool force_nonblock)
+{
+ struct io_provide_buf *p = &req->pbuf;
+ struct io_ring_ctx *ctx = req->ctx;
+ struct io_buffer *head;
+ int ret = 0;
+
+ io_ring_submit_lock(ctx, !force_nonblock);
+
+ lockdep_assert_held(&ctx->uring_lock);
+
+ ret = -ENOENT;
+ head = idr_find(&ctx->io_buffer_idr, p->bgid);
+ if (head)
+ ret = __io_remove_buffers(ctx, head, p->bgid, p->nbufs);
+
+ io_ring_submit_lock(ctx, !force_nonblock);
+ if (ret < 0)
+ req_set_fail_links(req);
+ io_cqring_add_event(req, ret);
+ io_put_req(req);
+ return 0;
+}
+
+static int io_provide_buffers_prep(struct io_kiocb *req,
+ const struct io_uring_sqe *sqe)
+{
+ struct io_provide_buf *p = &req->pbuf;
+ u64 tmp;
+
+ if (sqe->ioprio || sqe->rw_flags)
+ return -EINVAL;
+
+ tmp = READ_ONCE(sqe->fd);
+ if (!tmp || tmp > USHRT_MAX)
+ return -E2BIG;
+ p->nbufs = tmp;
+ p->addr = READ_ONCE(sqe->addr);
+ p->len = READ_ONCE(sqe->len);
+
+ if (!access_ok(u64_to_user_ptr(p->addr), p->len))
+ return -EFAULT;
+
+ p->bgid = READ_ONCE(sqe->buf_group);
+ tmp = READ_ONCE(sqe->off);
+ if (tmp > USHRT_MAX)
+ return -E2BIG;
+ p->bid = tmp;
+ return 0;
+}
+
+static int io_add_buffers(struct io_provide_buf *pbuf, struct io_buffer **head)
+{
+ struct io_buffer *buf;
+ u64 addr = pbuf->addr;
+ int i, bid = pbuf->bid;
+
+ for (i = 0; i < pbuf->nbufs; i++) {
+ buf = kmalloc(sizeof(*buf), GFP_KERNEL);
+ if (!buf)
+ break;
+
+ buf->addr = addr;
+ buf->len = pbuf->len;
+ buf->bid = bid;
+ addr += pbuf->len;
+ bid++;
+ if (!*head) {
+ INIT_LIST_HEAD(&buf->list);
+ *head = buf;
+ } else {
+ list_add_tail(&buf->list, &(*head)->list);
+ }
+ }
+
+ return i ? i : -ENOMEM;
+}
+
+static int io_provide_buffers(struct io_kiocb *req, bool force_nonblock)
+{
+ struct io_provide_buf *p = &req->pbuf;
+ struct io_ring_ctx *ctx = req->ctx;
+ struct io_buffer *head, *list;
+ int ret = 0;
+
+ io_ring_submit_lock(ctx, !force_nonblock);
+
+ lockdep_assert_held(&ctx->uring_lock);
+
+ list = head = idr_find(&ctx->io_buffer_idr, p->bgid);
+
+ ret = io_add_buffers(p, &head);
+ if (ret < 0)
+ goto out;
+
+ if (!list) {
+ ret = idr_alloc(&ctx->io_buffer_idr, head, p->bgid, p->bgid + 1,
+ GFP_KERNEL);
+ if (ret < 0) {
+ __io_remove_buffers(ctx, head, p->bgid, -1U);
+ goto out;
+ }
+ }
+out:
+ io_ring_submit_unlock(ctx, !force_nonblock);
+ if (ret < 0)
+ req_set_fail_links(req);
+ io_cqring_add_event(req, ret);
+ io_put_req(req);
+ return 0;
}
static int io_epoll_ctl_prep(struct io_kiocb *req,
@@ -2653,8 +3207,7 @@ static int io_epoll_ctl_prep(struct io_kiocb *req,
#endif
}
-static int io_epoll_ctl(struct io_kiocb *req, struct io_kiocb **nxt,
- bool force_nonblock)
+static int io_epoll_ctl(struct io_kiocb *req, bool force_nonblock)
{
#if defined(CONFIG_EPOLL)
struct io_epoll *ie = &req->epoll;
@@ -2667,7 +3220,7 @@ static int io_epoll_ctl(struct io_kiocb *req, struct io_kiocb **nxt,
if (ret < 0)
req_set_fail_links(req);
io_cqring_add_event(req, ret);
- io_put_req_find_next(req, nxt);
+ io_put_req(req);
return 0;
#else
return -EOPNOTSUPP;
@@ -2689,8 +3242,7 @@ static int io_madvise_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
#endif
}
-static int io_madvise(struct io_kiocb *req, struct io_kiocb **nxt,
- bool force_nonblock)
+static int io_madvise(struct io_kiocb *req, bool force_nonblock)
{
#if defined(CONFIG_ADVISE_SYSCALLS) && defined(CONFIG_MMU)
struct io_madvise *ma = &req->madvise;
@@ -2703,7 +3255,7 @@ static int io_madvise(struct io_kiocb *req, struct io_kiocb **nxt,
if (ret < 0)
req_set_fail_links(req);
io_cqring_add_event(req, ret);
- io_put_req_find_next(req, nxt);
+ io_put_req(req);
return 0;
#else
return -EOPNOTSUPP;
@@ -2721,8 +3273,7 @@ static int io_fadvise_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
return 0;
}
-static int io_fadvise(struct io_kiocb *req, struct io_kiocb **nxt,
- bool force_nonblock)
+static int io_fadvise(struct io_kiocb *req, bool force_nonblock)
{
struct io_fadvise *fa = &req->fadvise;
int ret;
@@ -2742,7 +3293,7 @@ static int io_fadvise(struct io_kiocb *req, struct io_kiocb **nxt,
if (ret < 0)
req_set_fail_links(req);
io_cqring_add_event(req, ret);
- io_put_req_find_next(req, nxt);
+ io_put_req(req);
return 0;
}
@@ -2754,6 +3305,10 @@ static int io_statx_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
if (sqe->ioprio || sqe->buf_index)
return -EINVAL;
+ if (sqe->flags & IOSQE_FIXED_FILE)
+ return -EBADF;
+ if (req->flags & REQ_F_NEED_CLEANUP)
+ return 0;
req->open.dfd = READ_ONCE(sqe->fd);
req->open.mask = READ_ONCE(sqe->len);
@@ -2771,11 +3326,11 @@ static int io_statx_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
return ret;
}
+ req->flags |= REQ_F_NEED_CLEANUP;
return 0;
}
-static int io_statx(struct io_kiocb *req, struct io_kiocb **nxt,
- bool force_nonblock)
+static int io_statx(struct io_kiocb *req, bool force_nonblock)
{
struct io_open *ctx = &req->open;
unsigned lookup_flags;
@@ -2808,10 +3363,11 @@ static int io_statx(struct io_kiocb *req, struct io_kiocb **nxt,
ret = cp_statx(&stat, ctx->buffer);
err:
putname(ctx->filename);
+ req->flags &= ~REQ_F_NEED_CLEANUP;
if (ret < 0)
req_set_fail_links(req);
io_cqring_add_event(req, ret);
- io_put_req_find_next(req, nxt);
+ io_put_req(req);
return 0;
}
@@ -2827,7 +3383,7 @@ static int io_close_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
sqe->rw_flags || sqe->buf_index)
return -EINVAL;
if (sqe->flags & IOSQE_FIXED_FILE)
- return -EINVAL;
+ return -EBADF;
req->close.fd = READ_ONCE(sqe->fd);
if (req->file->f_op == &io_uring_fops ||
@@ -2837,30 +3393,29 @@ static int io_close_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
return 0;
}
+/* only called when __close_fd_get_file() is done */
+static void __io_close_finish(struct io_kiocb *req)
+{
+ int ret;
+
+ ret = filp_close(req->close.put_file, req->work.files);
+ if (ret < 0)
+ req_set_fail_links(req);
+ io_cqring_add_event(req, ret);
+ fput(req->close.put_file);
+ io_put_req(req);
+}
+
static void io_close_finish(struct io_wq_work **workptr)
{
struct io_kiocb *req = container_of(*workptr, struct io_kiocb, work);
- struct io_kiocb *nxt = NULL;
- /* Invoked with files, we need to do the close */
- if (req->work.files) {
- int ret;
-
- ret = filp_close(req->close.put_file, req->work.files);
- if (ret < 0)
- req_set_fail_links(req);
- io_cqring_add_event(req, ret);
- }
-
- fput(req->close.put_file);
-
- io_put_req_find_next(req, &nxt);
- if (nxt)
- io_wq_assign_next(workptr, nxt);
+ /* not cancellable, don't do io_req_cancelled() */
+ __io_close_finish(req);
+ io_steal_work(req, workptr);
}
-static int io_close(struct io_kiocb *req, struct io_kiocb **nxt,
- bool force_nonblock)
+static int io_close(struct io_kiocb *req, bool force_nonblock)
{
int ret;
@@ -2870,37 +3425,25 @@ static int io_close(struct io_kiocb *req, struct io_kiocb **nxt,
return ret;
/* if the file has a flush method, be safe and punt to async */
- if (req->close.put_file->f_op->flush && !io_wq_current_is_worker())
- goto eagain;
+ if (req->close.put_file->f_op->flush && force_nonblock) {
+ /* submission ref will be dropped, take it for async */
+ refcount_inc(&req->refs);
+
+ req->work.func = io_close_finish;
+ /*
+ * Do manual async queue here to avoid grabbing files - we don't
+ * need the files, and it'll cause io_close_finish() to close
+ * the file again and cause a double CQE entry for this request
+ */
+ io_queue_async_work(req);
+ return 0;
+ }
/*
* No ->flush(), safely close from here and just punt the
* fput() to async context.
*/
- ret = filp_close(req->close.put_file, current->files);
-
- if (ret < 0)
- req_set_fail_links(req);
- io_cqring_add_event(req, ret);
-
- if (io_wq_current_is_worker()) {
- struct io_wq_work *old_work, *work;
-
- old_work = work = &req->work;
- io_close_finish(&work);
- if (work && work != old_work)
- *nxt = container_of(work, struct io_kiocb, work);
- return 0;
- }
-
-eagain:
- req->work.func = io_close_finish;
- /*
- * Do manual async queue here to avoid grabbing files - we don't
- * need the files, and it'll cause io_close_finish() to close
- * the file again and cause a double CQE entry for this request
- */
- io_queue_async_work(req);
+ __io_close_finish(req);
return 0;
}
@@ -2922,82 +3465,91 @@ static int io_prep_sfr(struct io_kiocb *req, const struct io_uring_sqe *sqe)
return 0;
}
-static void io_sync_file_range_finish(struct io_wq_work **workptr)
+static void __io_sync_file_range(struct io_kiocb *req)
{
- struct io_kiocb *req = container_of(*workptr, struct io_kiocb, work);
- struct io_kiocb *nxt = NULL;
int ret;
- if (io_req_cancelled(req))
- return;
-
ret = sync_file_range(req->file, req->sync.off, req->sync.len,
req->sync.flags);
if (ret < 0)
req_set_fail_links(req);
io_cqring_add_event(req, ret);
- io_put_req_find_next(req, &nxt);
+ io_put_req(req);
+}
+
+
+static void io_sync_file_range_finish(struct io_wq_work **workptr)
+{
+ struct io_kiocb *req = container_of(*workptr, struct io_kiocb, work);
+ struct io_kiocb *nxt = NULL;
+
+ if (io_req_cancelled(req))
+ return;
+ __io_sync_file_range(req);
+ io_put_req(req); /* put submission ref */
if (nxt)
io_wq_assign_next(workptr, nxt);
}
-static int io_sync_file_range(struct io_kiocb *req, struct io_kiocb **nxt,
- bool force_nonblock)
+static int io_sync_file_range(struct io_kiocb *req, bool force_nonblock)
{
- struct io_wq_work *work, *old_work;
-
/* sync_file_range always requires a blocking context */
if (force_nonblock) {
- io_put_req(req);
req->work.func = io_sync_file_range_finish;
return -EAGAIN;
}
- work = old_work = &req->work;
- io_sync_file_range_finish(&work);
- if (work && work != old_work)
- *nxt = container_of(work, struct io_kiocb, work);
+ __io_sync_file_range(req);
return 0;
}
#if defined(CONFIG_NET)
-static void io_sendrecv_async(struct io_wq_work **workptr)
+static int io_setup_async_msg(struct io_kiocb *req,
+ struct io_async_msghdr *kmsg)
{
- struct io_kiocb *req = container_of(*workptr, struct io_kiocb, work);
- struct iovec *iov = NULL;
-
- if (req->io->rw.iov != req->io->rw.fast_iov)
- iov = req->io->msg.iov;
- io_wq_submit_work(workptr);
- kfree(iov);
+ if (req->io)
+ return -EAGAIN;
+ if (io_alloc_async_ctx(req)) {
+ if (kmsg->iov != kmsg->fast_iov)
+ kfree(kmsg->iov);
+ return -ENOMEM;
+ }
+ req->flags |= REQ_F_NEED_CLEANUP;
+ memcpy(&req->io->msg, kmsg, sizeof(*kmsg));
+ return -EAGAIN;
}
-#endif
static int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
-#if defined(CONFIG_NET)
struct io_sr_msg *sr = &req->sr_msg;
struct io_async_ctx *io = req->io;
+ int ret;
sr->msg_flags = READ_ONCE(sqe->msg_flags);
sr->msg = u64_to_user_ptr(READ_ONCE(sqe->addr));
sr->len = READ_ONCE(sqe->len);
+#ifdef CONFIG_COMPAT
+ if (req->ctx->compat)
+ sr->msg_flags |= MSG_CMSG_COMPAT;
+#endif
+
if (!io || req->opcode == IORING_OP_SEND)
return 0;
+ /* iovec is already imported */
+ if (req->flags & REQ_F_NEED_CLEANUP)
+ return 0;
io->msg.iov = io->msg.fast_iov;
- return sendmsg_copy_msghdr(&io->msg.msg, sr->msg, sr->msg_flags,
+ ret = sendmsg_copy_msghdr(&io->msg.msg, sr->msg, sr->msg_flags,
&io->msg.iov);
-#else
- return -EOPNOTSUPP;
-#endif
+ if (!ret)
+ req->flags |= REQ_F_NEED_CLEANUP;
+ return ret;
}
-static int io_sendmsg(struct io_kiocb *req, struct io_kiocb **nxt,
- bool force_nonblock)
+static int io_sendmsg(struct io_kiocb *req, bool force_nonblock)
{
-#if defined(CONFIG_NET)
struct io_async_msghdr *kmsg = NULL;
struct socket *sock;
int ret;
@@ -3008,12 +3560,11 @@ static int io_sendmsg(struct io_kiocb *req, struct io_kiocb **nxt,
sock = sock_from_file(req->file, &ret);
if (sock) {
struct io_async_ctx io;
- struct sockaddr_storage addr;
unsigned flags;
if (req->io) {
kmsg = &req->io->msg;
- kmsg->msg.msg_name = &addr;
+ kmsg->msg.msg_name = &req->io->msg.addr;
/* if iov is set, it's allocated already */
if (!kmsg->iov)
kmsg->iov = kmsg->fast_iov;
@@ -3022,7 +3573,7 @@ static int io_sendmsg(struct io_kiocb *req, struct io_kiocb **nxt,
struct io_sr_msg *sr = &req->sr_msg;
kmsg = &io.msg;
- kmsg->msg.msg_name = &addr;
+ kmsg->msg.msg_name = &io.msg.addr;
io.msg.iov = io.msg.fast_iov;
ret = sendmsg_copy_msghdr(&io.msg.msg, sr->msg,
@@ -3038,35 +3589,24 @@ static int io_sendmsg(struct io_kiocb *req, struct io_kiocb **nxt,
flags |= MSG_DONTWAIT;
ret = __sys_sendmsg_sock(sock, &kmsg->msg, flags);
- if (force_nonblock && ret == -EAGAIN) {
- if (req->io)
- return -EAGAIN;
- if (io_alloc_async_ctx(req))
- return -ENOMEM;
- memcpy(&req->io->msg, &io.msg, sizeof(io.msg));
- req->work.func = io_sendrecv_async;
- return -EAGAIN;
- }
+ if (force_nonblock && ret == -EAGAIN)
+ return io_setup_async_msg(req, kmsg);
if (ret == -ERESTARTSYS)
ret = -EINTR;
}
- if (!io_wq_current_is_worker() && kmsg && kmsg->iov != kmsg->fast_iov)
+ if (kmsg && kmsg->iov != kmsg->fast_iov)
kfree(kmsg->iov);
+ req->flags &= ~REQ_F_NEED_CLEANUP;
io_cqring_add_event(req, ret);
if (ret < 0)
req_set_fail_links(req);
- io_put_req_find_next(req, nxt);
+ io_put_req(req);
return 0;
-#else
- return -EOPNOTSUPP;
-#endif
}
-static int io_send(struct io_kiocb *req, struct io_kiocb **nxt,
- bool force_nonblock)
+static int io_send(struct io_kiocb *req, bool force_nonblock)
{
-#if defined(CONFIG_NET)
struct socket *sock;
int ret;
@@ -3107,73 +3647,186 @@ static int io_send(struct io_kiocb *req, struct io_kiocb **nxt,
io_cqring_add_event(req, ret);
if (ret < 0)
req_set_fail_links(req);
- io_put_req_find_next(req, nxt);
+ io_put_req(req);
return 0;
-#else
- return -EOPNOTSUPP;
+}
+
+static int __io_recvmsg_copy_hdr(struct io_kiocb *req, struct io_async_ctx *io)
+{
+ struct io_sr_msg *sr = &req->sr_msg;
+ struct iovec __user *uiov;
+ size_t iov_len;
+ int ret;
+
+ ret = __copy_msghdr_from_user(&io->msg.msg, sr->msg, &io->msg.uaddr,
+ &uiov, &iov_len);
+ if (ret)
+ return ret;
+
+ if (req->flags & REQ_F_BUFFER_SELECT) {
+ if (iov_len > 1)
+ return -EINVAL;
+ if (copy_from_user(io->msg.iov, uiov, sizeof(*uiov)))
+ return -EFAULT;
+ sr->len = io->msg.iov[0].iov_len;
+ iov_iter_init(&io->msg.msg.msg_iter, READ, io->msg.iov, 1,
+ sr->len);
+ io->msg.iov = NULL;
+ } else {
+ ret = import_iovec(READ, uiov, iov_len, UIO_FASTIOV,
+ &io->msg.iov, &io->msg.msg.msg_iter);
+ if (ret > 0)
+ ret = 0;
+ }
+
+ return ret;
+}
+
+#ifdef CONFIG_COMPAT
+static int __io_compat_recvmsg_copy_hdr(struct io_kiocb *req,
+ struct io_async_ctx *io)
+{
+ struct compat_msghdr __user *msg_compat;
+ struct io_sr_msg *sr = &req->sr_msg;
+ struct compat_iovec __user *uiov;
+ compat_uptr_t ptr;
+ compat_size_t len;
+ int ret;
+
+ msg_compat = (struct compat_msghdr __user *) sr->msg;
+ ret = __get_compat_msghdr(&io->msg.msg, msg_compat, &io->msg.uaddr,
+ &ptr, &len);
+ if (ret)
+ return ret;
+
+ uiov = compat_ptr(ptr);
+ if (req->flags & REQ_F_BUFFER_SELECT) {
+ compat_ssize_t clen;
+
+ if (len > 1)
+ return -EINVAL;
+ if (!access_ok(uiov, sizeof(*uiov)))
+ return -EFAULT;
+ if (__get_user(clen, &uiov->iov_len))
+ return -EFAULT;
+ if (clen < 0)
+ return -EINVAL;
+ sr->len = io->msg.iov[0].iov_len;
+ io->msg.iov = NULL;
+ } else {
+ ret = compat_import_iovec(READ, uiov, len, UIO_FASTIOV,
+ &io->msg.iov,
+ &io->msg.msg.msg_iter);
+ if (ret < 0)
+ return ret;
+ }
+
+ return 0;
+}
#endif
+
+static int io_recvmsg_copy_hdr(struct io_kiocb *req, struct io_async_ctx *io)
+{
+ io->msg.iov = io->msg.fast_iov;
+
+#ifdef CONFIG_COMPAT
+ if (req->ctx->compat)
+ return __io_compat_recvmsg_copy_hdr(req, io);
+#endif
+
+ return __io_recvmsg_copy_hdr(req, io);
+}
+
+static struct io_buffer *io_recv_buffer_select(struct io_kiocb *req,
+ int *cflags, bool needs_lock)
+{
+ struct io_sr_msg *sr = &req->sr_msg;
+ struct io_buffer *kbuf;
+
+ if (!(req->flags & REQ_F_BUFFER_SELECT))
+ return NULL;
+
+ kbuf = io_buffer_select(req, &sr->len, sr->bgid, sr->kbuf, needs_lock);
+ if (IS_ERR(kbuf))
+ return kbuf;
+
+ sr->kbuf = kbuf;
+ req->flags |= REQ_F_BUFFER_SELECTED;
+
+ *cflags = kbuf->bid << IORING_CQE_BUFFER_SHIFT;
+ *cflags |= IORING_CQE_F_BUFFER;
+ return kbuf;
}
static int io_recvmsg_prep(struct io_kiocb *req,
const struct io_uring_sqe *sqe)
{
-#if defined(CONFIG_NET)
struct io_sr_msg *sr = &req->sr_msg;
struct io_async_ctx *io = req->io;
+ int ret;
sr->msg_flags = READ_ONCE(sqe->msg_flags);
sr->msg = u64_to_user_ptr(READ_ONCE(sqe->addr));
sr->len = READ_ONCE(sqe->len);
+ sr->bgid = READ_ONCE(sqe->buf_group);
+
+#ifdef CONFIG_COMPAT
+ if (req->ctx->compat)
+ sr->msg_flags |= MSG_CMSG_COMPAT;
+#endif
if (!io || req->opcode == IORING_OP_RECV)
return 0;
+ /* iovec is already imported */
+ if (req->flags & REQ_F_NEED_CLEANUP)
+ return 0;
- io->msg.iov = io->msg.fast_iov;
- return recvmsg_copy_msghdr(&io->msg.msg, sr->msg, sr->msg_flags,
- &io->msg.uaddr, &io->msg.iov);
-#else
- return -EOPNOTSUPP;
-#endif
+ ret = io_recvmsg_copy_hdr(req, io);
+ if (!ret)
+ req->flags |= REQ_F_NEED_CLEANUP;
+ return ret;
}
-static int io_recvmsg(struct io_kiocb *req, struct io_kiocb **nxt,
- bool force_nonblock)
+static int io_recvmsg(struct io_kiocb *req, bool force_nonblock)
{
-#if defined(CONFIG_NET)
struct io_async_msghdr *kmsg = NULL;
struct socket *sock;
- int ret;
+ int ret, cflags = 0;
if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
return -EINVAL;
sock = sock_from_file(req->file, &ret);
if (sock) {
+ struct io_buffer *kbuf;
struct io_async_ctx io;
- struct sockaddr_storage addr;
unsigned flags;
if (req->io) {
kmsg = &req->io->msg;
- kmsg->msg.msg_name = &addr;
+ kmsg->msg.msg_name = &req->io->msg.addr;
/* if iov is set, it's allocated already */
if (!kmsg->iov)
kmsg->iov = kmsg->fast_iov;
kmsg->msg.msg_iter.iov = kmsg->iov;
} else {
- struct io_sr_msg *sr = &req->sr_msg;
-
kmsg = &io.msg;
- kmsg->msg.msg_name = &addr;
+ kmsg->msg.msg_name = &io.msg.addr;
- io.msg.iov = io.msg.fast_iov;
- ret = recvmsg_copy_msghdr(&io.msg.msg, sr->msg,
- sr->msg_flags, &io.msg.uaddr,
- &io.msg.iov);
+ ret = io_recvmsg_copy_hdr(req, &io);
if (ret)
return ret;
}
+ kbuf = io_recv_buffer_select(req, &cflags, !force_nonblock);
+ if (IS_ERR(kbuf)) {
+ return PTR_ERR(kbuf);
+ } else if (kbuf) {
+ kmsg->fast_iov[0].iov_base = u64_to_user_ptr(kbuf->addr);
+ iov_iter_init(&kmsg->msg.msg_iter, READ, kmsg->iov,
+ 1, req->sr_msg.len);
+ }
+
flags = req->sr_msg.msg_flags;
if (flags & MSG_DONTWAIT)
req->flags |= REQ_F_NOWAIT;
@@ -3182,37 +3835,27 @@ static int io_recvmsg(struct io_kiocb *req, struct io_kiocb **nxt,
ret = __sys_recvmsg_sock(sock, &kmsg->msg, req->sr_msg.msg,
kmsg->uaddr, flags);
- if (force_nonblock && ret == -EAGAIN) {
- if (req->io)
- return -EAGAIN;
- if (io_alloc_async_ctx(req))
- return -ENOMEM;
- memcpy(&req->io->msg, &io.msg, sizeof(io.msg));
- req->work.func = io_sendrecv_async;
- return -EAGAIN;
- }
+ if (force_nonblock && ret == -EAGAIN)
+ return io_setup_async_msg(req, kmsg);
if (ret == -ERESTARTSYS)
ret = -EINTR;
}
- if (!io_wq_current_is_worker() && kmsg && kmsg->iov != kmsg->fast_iov)
+ if (kmsg && kmsg->iov != kmsg->fast_iov)
kfree(kmsg->iov);
- io_cqring_add_event(req, ret);
+ req->flags &= ~REQ_F_NEED_CLEANUP;
+ __io_cqring_add_event(req, ret, cflags);
if (ret < 0)
req_set_fail_links(req);
- io_put_req_find_next(req, nxt);
+ io_put_req(req);
return 0;
-#else
- return -EOPNOTSUPP;
-#endif
}
-static int io_recv(struct io_kiocb *req, struct io_kiocb **nxt,
- bool force_nonblock)
+static int io_recv(struct io_kiocb *req, bool force_nonblock)
{
-#if defined(CONFIG_NET)
+ struct io_buffer *kbuf = NULL;
struct socket *sock;
- int ret;
+ int ret, cflags = 0;
if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
return -EINVAL;
@@ -3220,15 +3863,25 @@ static int io_recv(struct io_kiocb *req, struct io_kiocb **nxt,
sock = sock_from_file(req->file, &ret);
if (sock) {
struct io_sr_msg *sr = &req->sr_msg;
+ void __user *buf = sr->buf;
struct msghdr msg;
struct iovec iov;
unsigned flags;
- ret = import_single_range(READ, sr->buf, sr->len, &iov,
- &msg.msg_iter);
- if (ret)
- return ret;
+ kbuf = io_recv_buffer_select(req, &cflags, !force_nonblock);
+ if (IS_ERR(kbuf))
+ return PTR_ERR(kbuf);
+ else if (kbuf)
+ buf = u64_to_user_ptr(kbuf->addr);
+ ret = import_single_range(READ, buf, sr->len, &iov,
+ &msg.msg_iter);
+ if (ret) {
+ kfree(kbuf);
+ return ret;
+ }
+
+ req->flags |= REQ_F_NEED_CLEANUP;
msg.msg_name = NULL;
msg.msg_control = NULL;
msg.msg_controllen = 0;
@@ -3249,20 +3902,17 @@ static int io_recv(struct io_kiocb *req, struct io_kiocb **nxt,
ret = -EINTR;
}
- io_cqring_add_event(req, ret);
+ kfree(kbuf);
+ req->flags &= ~REQ_F_NEED_CLEANUP;
+ __io_cqring_add_event(req, ret, cflags);
if (ret < 0)
req_set_fail_links(req);
- io_put_req_find_next(req, nxt);
+ io_put_req(req);
return 0;
-#else
- return -EOPNOTSUPP;
-#endif
}
-
static int io_accept_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
-#if defined(CONFIG_NET)
struct io_accept *accept = &req->accept;
if (unlikely(req->ctx->flags & (IORING_SETUP_IOPOLL|IORING_SETUP_SQPOLL)))
@@ -3273,15 +3923,11 @@ static int io_accept_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
accept->addr = u64_to_user_ptr(READ_ONCE(sqe->addr));
accept->addr_len = u64_to_user_ptr(READ_ONCE(sqe->addr2));
accept->flags = READ_ONCE(sqe->accept_flags);
+ accept->nofile = rlimit(RLIMIT_NOFILE);
return 0;
-#else
- return -EOPNOTSUPP;
-#endif
}
-#if defined(CONFIG_NET)
-static int __io_accept(struct io_kiocb *req, struct io_kiocb **nxt,
- bool force_nonblock)
+static int __io_accept(struct io_kiocb *req, bool force_nonblock)
{
struct io_accept *accept = &req->accept;
unsigned file_flags;
@@ -3289,7 +3935,8 @@ static int __io_accept(struct io_kiocb *req, struct io_kiocb **nxt,
file_flags = force_nonblock ? O_NONBLOCK : 0;
ret = __sys_accept4_file(req->file, file_flags, accept->addr,
- accept->addr_len, accept->flags);
+ accept->addr_len, accept->flags,
+ accept->nofile);
if (ret == -EAGAIN && force_nonblock)
return -EAGAIN;
if (ret == -ERESTARTSYS)
@@ -3297,44 +3944,34 @@ static int __io_accept(struct io_kiocb *req, struct io_kiocb **nxt,
if (ret < 0)
req_set_fail_links(req);
io_cqring_add_event(req, ret);
- io_put_req_find_next(req, nxt);
+ io_put_req(req);
return 0;
}
static void io_accept_finish(struct io_wq_work **workptr)
{
struct io_kiocb *req = container_of(*workptr, struct io_kiocb, work);
- struct io_kiocb *nxt = NULL;
if (io_req_cancelled(req))
return;
- __io_accept(req, &nxt, false);
- if (nxt)
- io_wq_assign_next(workptr, nxt);
+ __io_accept(req, false);
+ io_steal_work(req, workptr);
}
-#endif
-static int io_accept(struct io_kiocb *req, struct io_kiocb **nxt,
- bool force_nonblock)
+static int io_accept(struct io_kiocb *req, bool force_nonblock)
{
-#if defined(CONFIG_NET)
int ret;
- ret = __io_accept(req, nxt, force_nonblock);
+ ret = __io_accept(req, force_nonblock);
if (ret == -EAGAIN && force_nonblock) {
req->work.func = io_accept_finish;
- io_put_req(req);
return -EAGAIN;
}
return 0;
-#else
- return -EOPNOTSUPP;
-#endif
}
static int io_connect_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
-#if defined(CONFIG_NET)
struct io_connect *conn = &req->connect;
struct io_async_ctx *io = req->io;
@@ -3351,15 +3988,10 @@ static int io_connect_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
return move_addr_to_kernel(conn->addr, conn->addr_len,
&io->connect.address);
-#else
- return -EOPNOTSUPP;
-#endif
}
-static int io_connect(struct io_kiocb *req, struct io_kiocb **nxt,
- bool force_nonblock)
+static int io_connect(struct io_kiocb *req, bool force_nonblock)
{
-#if defined(CONFIG_NET)
struct io_async_ctx __io, *io;
unsigned file_flags;
int ret;
@@ -3395,25 +4027,301 @@ static int io_connect(struct io_kiocb *req, struct io_kiocb **nxt,
if (ret < 0)
req_set_fail_links(req);
io_cqring_add_event(req, ret);
- io_put_req_find_next(req, nxt);
+ io_put_req(req);
return 0;
-#else
+}
+#else /* !CONFIG_NET */
+static int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+{
return -EOPNOTSUPP;
-#endif
}
-static void io_poll_remove_one(struct io_kiocb *req)
+static int io_sendmsg(struct io_kiocb *req, bool force_nonblock)
{
- struct io_poll_iocb *poll = &req->poll;
+ return -EOPNOTSUPP;
+}
+
+static int io_send(struct io_kiocb *req, bool force_nonblock)
+{
+ return -EOPNOTSUPP;
+}
+
+static int io_recvmsg_prep(struct io_kiocb *req,
+ const struct io_uring_sqe *sqe)
+{
+ return -EOPNOTSUPP;
+}
+
+static int io_recvmsg(struct io_kiocb *req, bool force_nonblock)
+{
+ return -EOPNOTSUPP;
+}
+
+static int io_recv(struct io_kiocb *req, bool force_nonblock)
+{
+ return -EOPNOTSUPP;
+}
+
+static int io_accept_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+{
+ return -EOPNOTSUPP;
+}
+
+static int io_accept(struct io_kiocb *req, bool force_nonblock)
+{
+ return -EOPNOTSUPP;
+}
+
+static int io_connect_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+{
+ return -EOPNOTSUPP;
+}
+
+static int io_connect(struct io_kiocb *req, bool force_nonblock)
+{
+ return -EOPNOTSUPP;
+}
+#endif /* CONFIG_NET */
+
+struct io_poll_table {
+ struct poll_table_struct pt;
+ struct io_kiocb *req;
+ int error;
+};
+
+static void __io_queue_proc(struct io_poll_iocb *poll, struct io_poll_table *pt,
+ struct wait_queue_head *head)
+{
+ if (unlikely(poll->head)) {
+ pt->error = -EINVAL;
+ return;
+ }
+
+ pt->error = 0;
+ poll->head = head;
+ add_wait_queue(head, &poll->wait);
+}
+
+static void io_async_queue_proc(struct file *file, struct wait_queue_head *head,
+ struct poll_table_struct *p)
+{
+ struct io_poll_table *pt = container_of(p, struct io_poll_table, pt);
+
+ __io_queue_proc(&pt->req->apoll->poll, pt, head);
+}
+
+static int __io_async_wake(struct io_kiocb *req, struct io_poll_iocb *poll,
+ __poll_t mask, task_work_func_t func)
+{
+ struct task_struct *tsk;
+
+ /* for instances that support it check for an event match first: */
+ if (mask && !(mask & poll->events))
+ return 0;
+
+ trace_io_uring_task_add(req->ctx, req->opcode, req->user_data, mask);
+
+ list_del_init(&poll->wait.entry);
+
+ tsk = req->task;
+ req->result = mask;
+ init_task_work(&req->task_work, func);
+ /*
+ * If this fails, then the task is exiting. If that is the case, then
+ * the exit check will ultimately cancel these work items. Hence we
+ * don't need to check here and handle it specifically.
+ */
+ task_work_add(tsk, &req->task_work, true);
+ wake_up_process(tsk);
+ return 1;
+}
+
+static void io_async_task_func(struct callback_head *cb)
+{
+ struct io_kiocb *req = container_of(cb, struct io_kiocb, task_work);
+ struct async_poll *apoll = req->apoll;
+ struct io_ring_ctx *ctx = req->ctx;
+
+ trace_io_uring_task_run(req->ctx, req->opcode, req->user_data);
+
+ WARN_ON_ONCE(!list_empty(&req->apoll->poll.wait.entry));
+
+ if (hash_hashed(&req->hash_node)) {
+ spin_lock_irq(&ctx->completion_lock);
+ hash_del(&req->hash_node);
+ spin_unlock_irq(&ctx->completion_lock);
+ }
+
+ /* restore ->work in case we need to retry again */
+ memcpy(&req->work, &apoll->work, sizeof(req->work));
+
+ __set_current_state(TASK_RUNNING);
+ mutex_lock(&ctx->uring_lock);
+ __io_queue_sqe(req, NULL);
+ mutex_unlock(&ctx->uring_lock);
+
+ kfree(apoll);
+}
+
+static int io_async_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
+ void *key)
+{
+ struct io_kiocb *req = wait->private;
+ struct io_poll_iocb *poll = &req->apoll->poll;
+
+ trace_io_uring_poll_wake(req->ctx, req->opcode, req->user_data,
+ key_to_poll(key));
+
+ return __io_async_wake(req, poll, key_to_poll(key), io_async_task_func);
+}
+
+static void io_poll_req_insert(struct io_kiocb *req)
+{
+ struct io_ring_ctx *ctx = req->ctx;
+ struct hlist_head *list;
+
+ list = &ctx->cancel_hash[hash_long(req->user_data, ctx->cancel_hash_bits)];
+ hlist_add_head(&req->hash_node, list);
+}
+
+static __poll_t __io_arm_poll_handler(struct io_kiocb *req,
+ struct io_poll_iocb *poll,
+ struct io_poll_table *ipt, __poll_t mask,
+ wait_queue_func_t wake_func)
+ __acquires(&ctx->completion_lock)
+{
+ struct io_ring_ctx *ctx = req->ctx;
+ bool cancel = false;
+
+ poll->file = req->file;
+ poll->head = NULL;
+ poll->done = poll->canceled = false;
+ poll->events = mask;
+
+ ipt->pt._key = mask;
+ ipt->req = req;
+ ipt->error = -EINVAL;
+
+ INIT_LIST_HEAD(&poll->wait.entry);
+ init_waitqueue_func_entry(&poll->wait, wake_func);
+ poll->wait.private = req;
+
+ mask = vfs_poll(req->file, &ipt->pt) & poll->events;
+
+ spin_lock_irq(&ctx->completion_lock);
+ if (likely(poll->head)) {
+ spin_lock(&poll->head->lock);
+ if (unlikely(list_empty(&poll->wait.entry))) {
+ if (ipt->error)
+ cancel = true;
+ ipt->error = 0;
+ mask = 0;
+ }
+ if (mask || ipt->error)
+ list_del_init(&poll->wait.entry);
+ else if (cancel)
+ WRITE_ONCE(poll->canceled, true);
+ else if (!poll->done) /* actually waiting for an event */
+ io_poll_req_insert(req);
+ spin_unlock(&poll->head->lock);
+ }
+
+ return mask;
+}
+
+static bool io_arm_poll_handler(struct io_kiocb *req)
+{
+ const struct io_op_def *def = &io_op_defs[req->opcode];
+ struct io_ring_ctx *ctx = req->ctx;
+ struct async_poll *apoll;
+ struct io_poll_table ipt;
+ __poll_t mask, ret;
+
+ if (!req->file || !file_can_poll(req->file))
+ return false;
+ if (req->flags & (REQ_F_MUST_PUNT | REQ_F_POLLED))
+ return false;
+ if (!def->pollin && !def->pollout)
+ return false;
+
+ apoll = kmalloc(sizeof(*apoll), GFP_ATOMIC);
+ if (unlikely(!apoll))
+ return false;
+
+ req->flags |= REQ_F_POLLED;
+ memcpy(&apoll->work, &req->work, sizeof(req->work));
+
+ /*
+ * Don't need a reference here, as we're adding it to the task
+ * task_works list. If the task exits, the list is pruned.
+ */
+ req->task = current;
+ req->apoll = apoll;
+ INIT_HLIST_NODE(&req->hash_node);
+
+ mask = 0;
+ if (def->pollin)
+ mask |= POLLIN | POLLRDNORM;
+ if (def->pollout)
+ mask |= POLLOUT | POLLWRNORM;
+ mask |= POLLERR | POLLPRI;
+
+ ipt.pt._qproc = io_async_queue_proc;
+
+ ret = __io_arm_poll_handler(req, &apoll->poll, &ipt, mask,
+ io_async_wake);
+ if (ret) {
+ ipt.error = 0;
+ apoll->poll.done = true;
+ spin_unlock_irq(&ctx->completion_lock);
+ memcpy(&req->work, &apoll->work, sizeof(req->work));
+ kfree(apoll);
+ return false;
+ }
+ spin_unlock_irq(&ctx->completion_lock);
+ trace_io_uring_poll_arm(ctx, req->opcode, req->user_data, mask,
+ apoll->poll.events);
+ return true;
+}
+
+static bool __io_poll_remove_one(struct io_kiocb *req,
+ struct io_poll_iocb *poll)
+{
+ bool do_complete = false;
spin_lock(&poll->head->lock);
WRITE_ONCE(poll->canceled, true);
if (!list_empty(&poll->wait.entry)) {
list_del_init(&poll->wait.entry);
- io_queue_async_work(req);
+ do_complete = true;
}
spin_unlock(&poll->head->lock);
+ return do_complete;
+}
+
+static bool io_poll_remove_one(struct io_kiocb *req)
+{
+ bool do_complete;
+
+ if (req->opcode == IORING_OP_POLL_ADD) {
+ do_complete = __io_poll_remove_one(req, &req->poll);
+ } else {
+ /* non-poll requests have submit ref still */
+ do_complete = __io_poll_remove_one(req, &req->apoll->poll);
+ if (do_complete)
+ io_put_req(req);
+ }
+
hash_del(&req->hash_node);
+
+ if (do_complete) {
+ io_cqring_fill_event(req, -ECANCELED);
+ io_commit_cqring(req->ctx);
+ req->flags |= REQ_F_COMP_LOCKED;
+ io_put_req(req);
+ }
+
+ return do_complete;
}
static void io_poll_remove_all(struct io_ring_ctx *ctx)
@@ -3431,6 +4339,8 @@ static void io_poll_remove_all(struct io_ring_ctx *ctx)
io_poll_remove_one(req);
}
spin_unlock_irq(&ctx->completion_lock);
+
+ io_cqring_ev_posted(ctx);
}
static int io_poll_cancel(struct io_ring_ctx *ctx, __u64 sqe_addr)
@@ -3440,10 +4350,11 @@ static int io_poll_cancel(struct io_ring_ctx *ctx, __u64 sqe_addr)
list = &ctx->cancel_hash[hash_long(sqe_addr, ctx->cancel_hash_bits)];
hlist_for_each_entry(req, list, hash_node) {
- if (sqe_addr == req->user_data) {
- io_poll_remove_one(req);
+ if (sqe_addr != req->user_data)
+ continue;
+ if (io_poll_remove_one(req))
return 0;
- }
+ return -EALREADY;
}
return -ENOENT;
@@ -3489,186 +4400,54 @@ static void io_poll_complete(struct io_kiocb *req, __poll_t mask, int error)
struct io_ring_ctx *ctx = req->ctx;
req->poll.done = true;
- if (error)
- io_cqring_fill_event(req, error);
- else
- io_cqring_fill_event(req, mangle_poll(mask));
+ io_cqring_fill_event(req, error ? error : mangle_poll(mask));
io_commit_cqring(ctx);
}
-static void io_poll_complete_work(struct io_wq_work **workptr)
+static void io_poll_task_handler(struct io_kiocb *req, struct io_kiocb **nxt)
{
- struct io_wq_work *work = *workptr;
- struct io_kiocb *req = container_of(work, struct io_kiocb, work);
- struct io_poll_iocb *poll = &req->poll;
- struct poll_table_struct pt = { ._key = poll->events };
struct io_ring_ctx *ctx = req->ctx;
- struct io_kiocb *nxt = NULL;
- __poll_t mask = 0;
- int ret = 0;
- if (work->flags & IO_WQ_WORK_CANCEL) {
- WRITE_ONCE(poll->canceled, true);
- ret = -ECANCELED;
- } else if (READ_ONCE(poll->canceled)) {
- ret = -ECANCELED;
- }
-
- if (ret != -ECANCELED)
- mask = vfs_poll(poll->file, &pt) & poll->events;
-
- /*
- * Note that ->ki_cancel callers also delete iocb from active_reqs after
- * calling ->ki_cancel. We need the ctx_lock roundtrip here to
- * synchronize with them. In the cancellation case the list_del_init
- * itself is not actually needed, but harmless so we keep it in to
- * avoid further branches in the fast path.
- */
spin_lock_irq(&ctx->completion_lock);
- if (!mask && ret != -ECANCELED) {
- add_wait_queue(poll->head, &poll->wait);
- spin_unlock_irq(&ctx->completion_lock);
- return;
- }
hash_del(&req->hash_node);
- io_poll_complete(req, mask, ret);
+ io_poll_complete(req, req->result, 0);
+ req->flags |= REQ_F_COMP_LOCKED;
+ io_put_req_find_next(req, nxt);
spin_unlock_irq(&ctx->completion_lock);
io_cqring_ev_posted(ctx);
-
- if (ret < 0)
- req_set_fail_links(req);
- io_put_req_find_next(req, &nxt);
- if (nxt)
- io_wq_assign_next(workptr, nxt);
}
-static void __io_poll_flush(struct io_ring_ctx *ctx, struct llist_node *nodes)
+static void io_poll_task_func(struct callback_head *cb)
{
- struct io_kiocb *req, *tmp;
- struct req_batch rb;
+ struct io_kiocb *req = container_of(cb, struct io_kiocb, task_work);
+ struct io_kiocb *nxt = NULL;
- rb.to_free = rb.need_iter = 0;
- spin_lock_irq(&ctx->completion_lock);
- llist_for_each_entry_safe(req, tmp, nodes, llist_node) {
- hash_del(&req->hash_node);
- io_poll_complete(req, req->result, 0);
+ io_poll_task_handler(req, &nxt);
+ if (nxt) {
+ struct io_ring_ctx *ctx = nxt->ctx;
- if (refcount_dec_and_test(&req->refs) &&
- !io_req_multi_free(&rb, req)) {
- req->flags |= REQ_F_COMP_LOCKED;
- io_free_req(req);
- }
+ mutex_lock(&ctx->uring_lock);
+ __io_queue_sqe(nxt, NULL);
+ mutex_unlock(&ctx->uring_lock);
}
- spin_unlock_irq(&ctx->completion_lock);
-
- io_cqring_ev_posted(ctx);
- io_free_req_many(ctx, &rb);
-}
-
-static void io_poll_flush(struct io_wq_work **workptr)
-{
- struct io_kiocb *req = container_of(*workptr, struct io_kiocb, work);
- struct llist_node *nodes;
-
- nodes = llist_del_all(&req->ctx->poll_llist);
- if (nodes)
- __io_poll_flush(req->ctx, nodes);
-}
-
-static void io_poll_trigger_evfd(struct io_wq_work **workptr)
-{
- struct io_kiocb *req = container_of(*workptr, struct io_kiocb, work);
-
- eventfd_signal(req->ctx->cq_ev_fd, 1);
- io_put_req(req);
}
static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
void *key)
{
- struct io_poll_iocb *poll = wait->private;
- struct io_kiocb *req = container_of(poll, struct io_kiocb, poll);
- struct io_ring_ctx *ctx = req->ctx;
- __poll_t mask = key_to_poll(key);
+ struct io_kiocb *req = wait->private;
+ struct io_poll_iocb *poll = &req->poll;
- /* for instances that support it check for an event match first: */
- if (mask && !(mask & poll->events))
- return 0;
-
- list_del_init(&poll->wait.entry);
-
- /*
- * Run completion inline if we can. We're using trylock here because
- * we are violating the completion_lock -> poll wq lock ordering.
- * If we have a link timeout we're going to need the completion_lock
- * for finalizing the request, mark us as having grabbed that already.
- */
- if (mask) {
- unsigned long flags;
-
- if (llist_empty(&ctx->poll_llist) &&
- spin_trylock_irqsave(&ctx->completion_lock, flags)) {
- bool trigger_ev;
-
- hash_del(&req->hash_node);
- io_poll_complete(req, mask, 0);
-
- trigger_ev = io_should_trigger_evfd(ctx);
- if (trigger_ev && eventfd_signal_count()) {
- trigger_ev = false;
- req->work.func = io_poll_trigger_evfd;
- } else {
- req->flags |= REQ_F_COMP_LOCKED;
- io_put_req(req);
- req = NULL;
- }
- spin_unlock_irqrestore(&ctx->completion_lock, flags);
- __io_cqring_ev_posted(ctx, trigger_ev);
- } else {
- req->result = mask;
- req->llist_node.next = NULL;
- /* if the list wasn't empty, we're done */
- if (!llist_add(&req->llist_node, &ctx->poll_llist))
- req = NULL;
- else
- req->work.func = io_poll_flush;
- }
- }
- if (req)
- io_queue_async_work(req);
-
- return 1;
+ return __io_async_wake(req, poll, key_to_poll(key), io_poll_task_func);
}
-struct io_poll_table {
- struct poll_table_struct pt;
- struct io_kiocb *req;
- int error;
-};
-
static void io_poll_queue_proc(struct file *file, struct wait_queue_head *head,
struct poll_table_struct *p)
{
struct io_poll_table *pt = container_of(p, struct io_poll_table, pt);
- if (unlikely(pt->req->poll.head)) {
- pt->error = -EINVAL;
- return;
- }
-
- pt->error = 0;
- pt->req->poll.head = head;
- add_wait_queue(head, &pt->req->poll.wait);
-}
-
-static void io_poll_req_insert(struct io_kiocb *req)
-{
- struct io_ring_ctx *ctx = req->ctx;
- struct hlist_head *list;
-
- list = &ctx->cancel_hash[hash_long(req->user_data, ctx->cancel_hash_bits)];
- hlist_add_head(&req->hash_node, list);
+ __io_queue_proc(&pt->req->poll, pt, head);
}
static int io_poll_add_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
@@ -3685,55 +4464,29 @@ static int io_poll_add_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe
events = READ_ONCE(sqe->poll_events);
poll->events = demangle_poll(events) | EPOLLERR | EPOLLHUP;
+
+ /*
+ * Don't need a reference here, as we're adding it to the task
+ * task_works list. If the task exits, the list is pruned.
+ */
+ req->task = current;
return 0;
}
-static int io_poll_add(struct io_kiocb *req, struct io_kiocb **nxt)
+static int io_poll_add(struct io_kiocb *req)
{
struct io_poll_iocb *poll = &req->poll;
struct io_ring_ctx *ctx = req->ctx;
struct io_poll_table ipt;
- bool cancel = false;
__poll_t mask;
- INIT_IO_WORK(&req->work, io_poll_complete_work);
INIT_HLIST_NODE(&req->hash_node);
-
- poll->head = NULL;
- poll->done = false;
- poll->canceled = false;
-
- ipt.pt._qproc = io_poll_queue_proc;
- ipt.pt._key = poll->events;
- ipt.req = req;
- ipt.error = -EINVAL; /* same as no support for IOCB_CMD_POLL */
-
- /* initialized the list so that we can do list_empty checks */
- INIT_LIST_HEAD(&poll->wait.entry);
- init_waitqueue_func_entry(&poll->wait, io_poll_wake);
- poll->wait.private = poll;
-
INIT_LIST_HEAD(&req->list);
+ ipt.pt._qproc = io_poll_queue_proc;
- mask = vfs_poll(poll->file, &ipt.pt) & poll->events;
+ mask = __io_arm_poll_handler(req, &req->poll, &ipt, poll->events,
+ io_poll_wake);
- spin_lock_irq(&ctx->completion_lock);
- if (likely(poll->head)) {
- spin_lock(&poll->head->lock);
- if (unlikely(list_empty(&poll->wait.entry))) {
- if (ipt.error)
- cancel = true;
- ipt.error = 0;
- mask = 0;
- }
- if (mask || ipt.error)
- list_del_init(&poll->wait.entry);
- else if (cancel)
- WRITE_ONCE(poll->canceled, true);
- else if (!poll->done) /* actually waiting for an event */
- io_poll_req_insert(req);
- spin_unlock(&poll->head->lock);
- }
if (mask) { /* no async, we'd stolen it */
ipt.error = 0;
io_poll_complete(req, mask, 0);
@@ -3742,7 +4495,7 @@ static int io_poll_add(struct io_kiocb *req, struct io_kiocb **nxt)
if (mask) {
io_cqring_ev_posted(ctx);
- io_put_req_find_next(req, nxt);
+ io_put_req(req);
}
return ipt.error;
}
@@ -3991,7 +4744,7 @@ static int io_async_cancel_one(struct io_ring_ctx *ctx, void *sqe_addr)
static void io_async_find_and_cancel(struct io_ring_ctx *ctx,
struct io_kiocb *req, __u64 sqe_addr,
- struct io_kiocb **nxt, int success_ret)
+ int success_ret)
{
unsigned long flags;
int ret;
@@ -4017,7 +4770,7 @@ static void io_async_find_and_cancel(struct io_ring_ctx *ctx,
if (ret < 0)
req_set_fail_links(req);
- io_put_req_find_next(req, nxt);
+ io_put_req(req);
}
static int io_async_cancel_prep(struct io_kiocb *req,
@@ -4033,11 +4786,11 @@ static int io_async_cancel_prep(struct io_kiocb *req,
return 0;
}
-static int io_async_cancel(struct io_kiocb *req, struct io_kiocb **nxt)
+static int io_async_cancel(struct io_kiocb *req)
{
struct io_ring_ctx *ctx = req->ctx;
- io_async_find_and_cancel(ctx, req, req->cancel.addr, nxt, 0);
+ io_async_find_and_cancel(ctx, req, req->cancel.addr, 0);
return 0;
}
@@ -4083,6 +4836,9 @@ static int io_req_defer_prep(struct io_kiocb *req,
{
ssize_t ret = 0;
+ if (!sqe)
+ return 0;
+
if (io_op_defs[req->opcode].file_table) {
ret = io_grab_files(req);
if (unlikely(ret))
@@ -4169,6 +4925,15 @@ static int io_req_defer_prep(struct io_kiocb *req,
case IORING_OP_EPOLL_CTL:
ret = io_epoll_ctl_prep(req, sqe);
break;
+ case IORING_OP_SPLICE:
+ ret = io_splice_prep(req, sqe);
+ break;
+ case IORING_OP_PROVIDE_BUFFERS:
+ ret = io_provide_buffers_prep(req, sqe);
+ break;
+ case IORING_OP_REMOVE_BUFFERS:
+ ret = io_remove_buffers_prep(req, sqe);
+ break;
default:
printk_once(KERN_WARNING "io_uring: unhandled opcode %d\n",
req->opcode);
@@ -4207,8 +4972,51 @@ static int io_req_defer(struct io_kiocb *req, const struct io_uring_sqe *sqe)
return -EIOCBQUEUED;
}
+static void io_cleanup_req(struct io_kiocb *req)
+{
+ struct io_async_ctx *io = req->io;
+
+ switch (req->opcode) {
+ case IORING_OP_READV:
+ case IORING_OP_READ_FIXED:
+ case IORING_OP_READ:
+ if (req->flags & REQ_F_BUFFER_SELECTED)
+ kfree((void *)(unsigned long)req->rw.addr);
+ /* fallthrough */
+ case IORING_OP_WRITEV:
+ case IORING_OP_WRITE_FIXED:
+ case IORING_OP_WRITE:
+ if (io->rw.iov != io->rw.fast_iov)
+ kfree(io->rw.iov);
+ break;
+ case IORING_OP_RECVMSG:
+ if (req->flags & REQ_F_BUFFER_SELECTED)
+ kfree(req->sr_msg.kbuf);
+ /* fallthrough */
+ case IORING_OP_SENDMSG:
+ if (io->msg.iov != io->msg.fast_iov)
+ kfree(io->msg.iov);
+ break;
+ case IORING_OP_RECV:
+ if (req->flags & REQ_F_BUFFER_SELECTED)
+ kfree(req->sr_msg.kbuf);
+ break;
+ case IORING_OP_OPENAT:
+ case IORING_OP_OPENAT2:
+ case IORING_OP_STATX:
+ putname(req->open.filename);
+ break;
+ case IORING_OP_SPLICE:
+ io_put_file(req, req->splice.file_in,
+ (req->splice.flags & SPLICE_F_FD_IN_FIXED));
+ break;
+ }
+
+ req->flags &= ~REQ_F_NEED_CLEANUP;
+}
+
static int io_issue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
- struct io_kiocb **nxt, bool force_nonblock)
+ bool force_nonblock)
{
struct io_ring_ctx *ctx = req->ctx;
int ret;
@@ -4225,7 +5033,7 @@ static int io_issue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
if (ret < 0)
break;
}
- ret = io_read(req, nxt, force_nonblock);
+ ret = io_read(req, force_nonblock);
break;
case IORING_OP_WRITEV:
case IORING_OP_WRITE_FIXED:
@@ -4235,7 +5043,7 @@ static int io_issue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
if (ret < 0)
break;
}
- ret = io_write(req, nxt, force_nonblock);
+ ret = io_write(req, force_nonblock);
break;
case IORING_OP_FSYNC:
if (sqe) {
@@ -4243,7 +5051,7 @@ static int io_issue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
if (ret < 0)
break;
}
- ret = io_fsync(req, nxt, force_nonblock);
+ ret = io_fsync(req, force_nonblock);
break;
case IORING_OP_POLL_ADD:
if (sqe) {
@@ -4251,7 +5059,7 @@ static int io_issue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
if (ret)
break;
}
- ret = io_poll_add(req, nxt);
+ ret = io_poll_add(req);
break;
case IORING_OP_POLL_REMOVE:
if (sqe) {
@@ -4267,7 +5075,7 @@ static int io_issue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
if (ret < 0)
break;
}
- ret = io_sync_file_range(req, nxt, force_nonblock);
+ ret = io_sync_file_range(req, force_nonblock);
break;
case IORING_OP_SENDMSG:
case IORING_OP_SEND:
@@ -4277,9 +5085,9 @@ static int io_issue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
break;
}
if (req->opcode == IORING_OP_SENDMSG)
- ret = io_sendmsg(req, nxt, force_nonblock);
+ ret = io_sendmsg(req, force_nonblock);
else
- ret = io_send(req, nxt, force_nonblock);
+ ret = io_send(req, force_nonblock);
break;
case IORING_OP_RECVMSG:
case IORING_OP_RECV:
@@ -4289,9 +5097,9 @@ static int io_issue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
break;
}
if (req->opcode == IORING_OP_RECVMSG)
- ret = io_recvmsg(req, nxt, force_nonblock);
+ ret = io_recvmsg(req, force_nonblock);
else
- ret = io_recv(req, nxt, force_nonblock);
+ ret = io_recv(req, force_nonblock);
break;
case IORING_OP_TIMEOUT:
if (sqe) {
@@ -4315,7 +5123,7 @@ static int io_issue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
if (ret)
break;
}
- ret = io_accept(req, nxt, force_nonblock);
+ ret = io_accept(req, force_nonblock);
break;
case IORING_OP_CONNECT:
if (sqe) {
@@ -4323,7 +5131,7 @@ static int io_issue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
if (ret)
break;
}
- ret = io_connect(req, nxt, force_nonblock);
+ ret = io_connect(req, force_nonblock);
break;
case IORING_OP_ASYNC_CANCEL:
if (sqe) {
@@ -4331,7 +5139,7 @@ static int io_issue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
if (ret)
break;
}
- ret = io_async_cancel(req, nxt);
+ ret = io_async_cancel(req);
break;
case IORING_OP_FALLOCATE:
if (sqe) {
@@ -4339,7 +5147,7 @@ static int io_issue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
if (ret)
break;
}
- ret = io_fallocate(req, nxt, force_nonblock);
+ ret = io_fallocate(req, force_nonblock);
break;
case IORING_OP_OPENAT:
if (sqe) {
@@ -4347,7 +5155,7 @@ static int io_issue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
if (ret)
break;
}
- ret = io_openat(req, nxt, force_nonblock);
+ ret = io_openat(req, force_nonblock);
break;
case IORING_OP_CLOSE:
if (sqe) {
@@ -4355,7 +5163,7 @@ static int io_issue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
if (ret)
break;
}
- ret = io_close(req, nxt, force_nonblock);
+ ret = io_close(req, force_nonblock);
break;
case IORING_OP_FILES_UPDATE:
if (sqe) {
@@ -4371,7 +5179,7 @@ static int io_issue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
if (ret)
break;
}
- ret = io_statx(req, nxt, force_nonblock);
+ ret = io_statx(req, force_nonblock);
break;
case IORING_OP_FADVISE:
if (sqe) {
@@ -4379,7 +5187,7 @@ static int io_issue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
if (ret)
break;
}
- ret = io_fadvise(req, nxt, force_nonblock);
+ ret = io_fadvise(req, force_nonblock);
break;
case IORING_OP_MADVISE:
if (sqe) {
@@ -4387,7 +5195,7 @@ static int io_issue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
if (ret)
break;
}
- ret = io_madvise(req, nxt, force_nonblock);
+ ret = io_madvise(req, force_nonblock);
break;
case IORING_OP_OPENAT2:
if (sqe) {
@@ -4395,7 +5203,7 @@ static int io_issue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
if (ret)
break;
}
- ret = io_openat2(req, nxt, force_nonblock);
+ ret = io_openat2(req, force_nonblock);
break;
case IORING_OP_EPOLL_CTL:
if (sqe) {
@@ -4403,7 +5211,31 @@ static int io_issue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
if (ret)
break;
}
- ret = io_epoll_ctl(req, nxt, force_nonblock);
+ ret = io_epoll_ctl(req, force_nonblock);
+ break;
+ case IORING_OP_SPLICE:
+ if (sqe) {
+ ret = io_splice_prep(req, sqe);
+ if (ret < 0)
+ break;
+ }
+ ret = io_splice(req, force_nonblock);
+ break;
+ case IORING_OP_PROVIDE_BUFFERS:
+ if (sqe) {
+ ret = io_provide_buffers_prep(req, sqe);
+ if (ret)
+ break;
+ }
+ ret = io_provide_buffers(req, force_nonblock);
+ break;
+ case IORING_OP_REMOVE_BUFFERS:
+ if (sqe) {
+ ret = io_remove_buffers_prep(req, sqe);
+ if (ret)
+ break;
+ }
+ ret = io_remove_buffers(req, force_nonblock);
break;
default:
ret = -EINVAL;
@@ -4436,7 +5268,6 @@ static void io_wq_submit_work(struct io_wq_work **workptr)
{
struct io_wq_work *work = *workptr;
struct io_kiocb *req = container_of(work, struct io_kiocb, work);
- struct io_kiocb *nxt = NULL;
int ret = 0;
/* if NO_CANCEL is set, we must still run the work */
@@ -4446,10 +5277,8 @@ static void io_wq_submit_work(struct io_wq_work **workptr)
}
if (!ret) {
- req->has_user = (work->flags & IO_WQ_WORK_HAS_MM) != 0;
- req->in_async = true;
do {
- ret = io_issue_sqe(req, NULL, &nxt, false);
+ ret = io_issue_sqe(req, NULL, false);
/*
* We can get EAGAIN for polled IO even though we're
* forcing a sync submission from here, since we can't
@@ -4461,25 +5290,20 @@ static void io_wq_submit_work(struct io_wq_work **workptr)
} while (1);
}
- /* drop submission reference */
- io_put_req(req);
-
if (ret) {
req_set_fail_links(req);
io_cqring_add_event(req, ret);
io_put_req(req);
}
- /* if a dependent link is ready, pass it back */
- if (!ret && nxt)
- io_wq_assign_next(workptr, nxt);
+ io_steal_work(req, workptr);
}
static int io_req_needs_file(struct io_kiocb *req, int fd)
{
if (!io_op_defs[req->opcode].needs_file)
return 0;
- if (fd == -1 && io_op_defs[req->opcode].fd_non_neg)
+ if ((fd == -1 || fd == AT_FDCWD) && io_op_defs[req->opcode].fd_non_neg)
return 0;
return 1;
}
@@ -4493,12 +5317,38 @@ static inline struct file *io_file_from_index(struct io_ring_ctx *ctx,
return table->files[index & IORING_FILE_TABLE_MASK];;
}
+static int io_file_get(struct io_submit_state *state, struct io_kiocb *req,
+ int fd, struct file **out_file, bool fixed)
+{
+ struct io_ring_ctx *ctx = req->ctx;
+ struct file *file;
+
+ if (fixed) {
+ if (unlikely(!ctx->file_data ||
+ (unsigned) fd >= ctx->nr_user_files))
+ return -EBADF;
+ fd = array_index_nospec(fd, ctx->nr_user_files);
+ file = io_file_from_index(ctx, fd);
+ if (!file)
+ return -EBADF;
+ percpu_ref_get(&ctx->file_data->refs);
+ } else {
+ trace_io_uring_file_get(ctx, fd);
+ file = __io_file_get(state, fd);
+ if (unlikely(!file))
+ return -EBADF;
+ }
+
+ *out_file = file;
+ return 0;
+}
+
static int io_req_set_file(struct io_submit_state *state, struct io_kiocb *req,
const struct io_uring_sqe *sqe)
{
- struct io_ring_ctx *ctx = req->ctx;
unsigned flags;
int fd;
+ bool fixed;
flags = READ_ONCE(sqe->flags);
fd = READ_ONCE(sqe->fd);
@@ -4506,26 +5356,11 @@ static int io_req_set_file(struct io_submit_state *state, struct io_kiocb *req,
if (!io_req_needs_file(req, fd))
return 0;
- if (flags & IOSQE_FIXED_FILE) {
- if (unlikely(!ctx->file_data ||
- (unsigned) fd >= ctx->nr_user_files))
- return -EBADF;
- fd = array_index_nospec(fd, ctx->nr_user_files);
- req->file = io_file_from_index(ctx, fd);
- if (!req->file)
- return -EBADF;
- req->flags |= REQ_F_FIXED_FILE;
- percpu_ref_get(&ctx->file_data->refs);
- } else {
- if (req->needs_fixed_file)
- return -EBADF;
- trace_io_uring_file_get(ctx, fd);
- req->file = io_file_get(state, fd);
- if (unlikely(!req->file))
- return -EBADF;
- }
+ fixed = (flags & IOSQE_FIXED_FILE);
+ if (unlikely(!fixed && req->needs_fixed_file))
+ return -EBADF;
- return 0;
+ return io_file_get(state, req, fd, &req->file, fixed);
}
static int io_grab_files(struct io_kiocb *req)
@@ -4587,8 +5422,7 @@ static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer)
if (prev) {
req_set_fail_links(prev);
- io_async_find_and_cancel(ctx, req, prev->user_data, NULL,
- -ETIME);
+ io_async_find_and_cancel(ctx, req, prev->user_data, -ETIME);
io_put_req(prev);
} else {
io_cqring_add_event(req, -ETIME);
@@ -4625,6 +5459,9 @@ static struct io_kiocb *io_prep_linked_timeout(struct io_kiocb *req)
if (!(req->flags & REQ_F_LINK))
return NULL;
+ /* for polled retry, if flag is set, we already went through here */
+ if (req->flags & REQ_F_POLLED)
+ return NULL;
nxt = list_first_entry_or_null(&req->link_list, struct io_kiocb,
link_list);
@@ -4638,13 +5475,23 @@ static struct io_kiocb *io_prep_linked_timeout(struct io_kiocb *req)
static void __io_queue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
struct io_kiocb *linked_timeout;
- struct io_kiocb *nxt = NULL;
+ struct io_kiocb *nxt;
+ const struct cred *old_creds = NULL;
int ret;
again:
linked_timeout = io_prep_linked_timeout(req);
- ret = io_issue_sqe(req, sqe, &nxt, true);
+ if (req->work.creds && req->work.creds != current_cred()) {
+ if (old_creds)
+ revert_creds(old_creds);
+ if (old_creds == req->work.creds)
+ old_creds = NULL; /* restored original creds */
+ else
+ old_creds = override_creds(req->work.creds);
+ }
+
+ ret = io_issue_sqe(req, sqe, true);
/*
* We async punt it if the file wasn't marked NOWAIT, or if the file
@@ -4652,6 +5499,11 @@ static void __io_queue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe)
*/
if (ret == -EAGAIN && (!(req->flags & REQ_F_NOWAIT) ||
(req->flags & REQ_F_MUST_PUNT))) {
+ if (io_arm_poll_handler(req)) {
+ if (linked_timeout)
+ io_queue_linked_timeout(linked_timeout);
+ goto exit;
+ }
punt:
if (io_op_defs[req->opcode].file_table) {
ret = io_grab_files(req);
@@ -4664,12 +5516,13 @@ static void __io_queue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe)
* submit reference when the iocb is actually submitted.
*/
io_queue_async_work(req);
- goto done_req;
+ goto exit;
}
err:
+ nxt = NULL;
/* drop submission reference */
- io_put_req(req);
+ io_put_req_find_next(req, &nxt);
if (linked_timeout) {
if (!ret)
@@ -4684,15 +5537,16 @@ static void __io_queue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe)
req_set_fail_links(req);
io_put_req(req);
}
-done_req:
if (nxt) {
req = nxt;
- nxt = NULL;
if (req->flags & REQ_F_FORCE_ASYNC)
goto punt;
goto again;
}
+exit:
+ if (old_creds)
+ revert_creds(old_creds);
}
static void io_queue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe)
@@ -4732,12 +5586,12 @@ static inline void io_queue_link_head(struct io_kiocb *req)
}
#define SQE_VALID_FLAGS (IOSQE_FIXED_FILE|IOSQE_IO_DRAIN|IOSQE_IO_LINK| \
- IOSQE_IO_HARDLINK | IOSQE_ASYNC)
+ IOSQE_IO_HARDLINK | IOSQE_ASYNC | \
+ IOSQE_BUFFER_SELECT)
static bool io_submit_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
struct io_submit_state *state, struct io_kiocb **link)
{
- const struct cred *old_creds = NULL;
struct io_ring_ctx *ctx = req->ctx;
unsigned int sqe_flags;
int ret, id;
@@ -4750,29 +5604,32 @@ static bool io_submit_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
goto err_req;
}
+ if ((sqe_flags & IOSQE_BUFFER_SELECT) &&
+ !io_op_defs[req->opcode].buffer_select) {
+ ret = -EOPNOTSUPP;
+ goto err_req;
+ }
+
id = READ_ONCE(sqe->personality);
if (id) {
- const struct cred *personality_creds;
-
- personality_creds = idr_find(&ctx->personality_idr, id);
- if (unlikely(!personality_creds)) {
+ req->work.creds = idr_find(&ctx->personality_idr, id);
+ if (unlikely(!req->work.creds)) {
ret = -EINVAL;
goto err_req;
}
- old_creds = override_creds(personality_creds);
+ get_cred(req->work.creds);
}
/* same numerical values with corresponding REQ_F_*, safe to copy */
- req->flags |= sqe_flags & (IOSQE_IO_DRAIN|IOSQE_IO_HARDLINK|
- IOSQE_ASYNC);
+ req->flags |= sqe_flags & (IOSQE_IO_DRAIN | IOSQE_IO_HARDLINK |
+ IOSQE_ASYNC | IOSQE_FIXED_FILE |
+ IOSQE_BUFFER_SELECT);
ret = io_req_set_file(state, req, sqe);
if (unlikely(ret)) {
err_req:
io_cqring_add_event(req, ret);
io_double_put_req(req);
- if (old_creds)
- revert_creds(old_creds);
return false;
}
@@ -4824,6 +5681,11 @@ static bool io_submit_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
if (sqe_flags & (IOSQE_IO_LINK|IOSQE_IO_HARDLINK)) {
req->flags |= REQ_F_LINK;
INIT_LIST_HEAD(&req->link_list);
+
+ if (io_alloc_async_ctx(req)) {
+ ret = -EAGAIN;
+ goto err_req;
+ }
ret = io_req_defer_prep(req, sqe);
if (ret)
req->flags |= REQ_F_FAIL_LINK;
@@ -4833,8 +5695,6 @@ static bool io_submit_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
}
}
- if (old_creds)
- revert_creds(old_creds);
return true;
}
@@ -4950,6 +5810,7 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr,
for (i = 0; i < nr; i++) {
const struct io_uring_sqe *sqe;
struct io_kiocb *req;
+ int err;
req = io_get_req(ctx, statep);
if (unlikely(!req)) {
@@ -4966,21 +5827,23 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr,
submitted++;
if (unlikely(req->opcode >= IORING_OP_LAST)) {
- io_cqring_add_event(req, -EINVAL);
+ err = -EINVAL;
+fail_req:
+ io_cqring_add_event(req, err);
io_double_put_req(req);
break;
}
if (io_op_defs[req->opcode].needs_mm && !*mm) {
mm_fault = mm_fault || !mmget_not_zero(ctx->sqo_mm);
- if (!mm_fault) {
- use_mm(ctx->sqo_mm);
- *mm = ctx->sqo_mm;
+ if (unlikely(mm_fault)) {
+ err = -EFAULT;
+ goto fail_req;
}
+ use_mm(ctx->sqo_mm);
+ *mm = ctx->sqo_mm;
}
- req->has_user = *mm != NULL;
- req->in_async = async;
req->needs_fixed_file = async;
trace_io_uring_submit_sqe(ctx, req->opcode, req->user_data,
true, async);
@@ -5011,9 +5874,8 @@ static int io_sq_thread(void *data)
const struct cred *old_cred;
mm_segment_t old_fs;
DEFINE_WAIT(wait);
- unsigned inflight;
unsigned long timeout;
- int ret;
+ int ret = 0;
complete(&ctx->completions[1]);
@@ -5021,39 +5883,19 @@ static int io_sq_thread(void *data)
set_fs(USER_DS);
old_cred = override_creds(ctx->creds);
- ret = timeout = inflight = 0;
+ timeout = jiffies + ctx->sq_thread_idle;
while (!kthread_should_park()) {
unsigned int to_submit;
- if (inflight) {
+ if (!list_empty(&ctx->poll_list)) {
unsigned nr_events = 0;
- if (ctx->flags & IORING_SETUP_IOPOLL) {
- /*
- * inflight is the count of the maximum possible
- * entries we submitted, but it can be smaller
- * if we dropped some of them. If we don't have
- * poll entries available, then we know that we
- * have nothing left to poll for. Reset the
- * inflight count to zero in that case.
- */
- mutex_lock(&ctx->uring_lock);
- if (!list_empty(&ctx->poll_list))
- __io_iopoll_check(ctx, &nr_events, 0);
- else
- inflight = 0;
- mutex_unlock(&ctx->uring_lock);
- } else {
- /*
- * Normal IO, just pretend everything completed.
- * We don't have to poll completions for that.
- */
- nr_events = inflight;
- }
-
- inflight -= nr_events;
- if (!inflight)
+ mutex_lock(&ctx->uring_lock);
+ if (!list_empty(&ctx->poll_list))
+ io_iopoll_getevents(ctx, &nr_events, 0);
+ else
timeout = jiffies + ctx->sq_thread_idle;
+ mutex_unlock(&ctx->uring_lock);
}
to_submit = io_sqring_entries(ctx);
@@ -5064,20 +5906,6 @@ static int io_sq_thread(void *data)
*/
if (!to_submit || ret == -EBUSY) {
/*
- * We're polling. If we're within the defined idle
- * period, then let us spin without work before going
- * to sleep. The exception is if we got EBUSY doing
- * more IO, we should wait for the application to
- * reap events and wake us up.
- */
- if (inflight ||
- (!time_after(jiffies, timeout) && ret != -EBUSY &&
- !percpu_ref_is_dying(&ctx->refs))) {
- cond_resched();
- continue;
- }
-
- /*
* Drop cur_mm before scheduling, we can't hold it for
* long periods (or over schedule()). Do this before
* adding ourselves to the waitqueue, as the unuse/drop
@@ -5089,9 +5917,38 @@ static int io_sq_thread(void *data)
cur_mm = NULL;
}
+ /*
+ * We're polling. If we're within the defined idle
+ * period, then let us spin without work before going
+ * to sleep. The exception is if we got EBUSY doing
+ * more IO, we should wait for the application to
+ * reap events and wake us up.
+ */
+ if (!list_empty(&ctx->poll_list) ||
+ (!time_after(jiffies, timeout) && ret != -EBUSY &&
+ !percpu_ref_is_dying(&ctx->refs))) {
+ if (current->task_works)
+ task_work_run();
+ cond_resched();
+ continue;
+ }
+
prepare_to_wait(&ctx->sqo_wait, &wait,
TASK_INTERRUPTIBLE);
+ /*
+ * While doing polled IO, before going to sleep, we need
+ * to check if there are new reqs added to poll_list, it
+ * is because reqs may have been punted to io worker and
+ * will be added to poll_list later, hence check the
+ * poll_list again.
+ */
+ if ((ctx->flags & IORING_SETUP_IOPOLL) &&
+ !list_empty_careful(&ctx->poll_list)) {
+ finish_wait(&ctx->sqo_wait, &wait);
+ continue;
+ }
+
/* Tell userspace we may need a wakeup call */
ctx->rings->sq_flags |= IORING_SQ_NEED_WAKEUP;
/* make sure to read SQ tail after writing flags */
@@ -5103,6 +5960,10 @@ static int io_sq_thread(void *data)
finish_wait(&ctx->sqo_wait, &wait);
break;
}
+ if (current->task_works) {
+ task_work_run();
+ continue;
+ }
if (signal_pending(current))
flush_signals(current);
schedule();
@@ -5119,10 +5980,12 @@ static int io_sq_thread(void *data)
mutex_lock(&ctx->uring_lock);
ret = io_submit_sqes(ctx, to_submit, NULL, -1, &cur_mm, true);
mutex_unlock(&ctx->uring_lock);
- if (ret > 0)
- inflight += ret;
+ timeout = jiffies + ctx->sq_thread_idle;
}
+ if (current->task_works)
+ task_work_run();
+
set_fs(old_fs);
if (cur_mm) {
unuse_mm(cur_mm);
@@ -5187,8 +6050,13 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
struct io_rings *rings = ctx->rings;
int ret = 0;
- if (io_cqring_events(ctx, false) >= min_events)
- return 0;
+ do {
+ if (io_cqring_events(ctx, false) >= min_events)
+ return 0;
+ if (!current->task_works)
+ break;
+ task_work_run();
+ } while (1);
if (sig) {
#ifdef CONFIG_COMPAT
@@ -5208,6 +6076,8 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
do {
prepare_to_wait_exclusive(&ctx->wait, &iowq.wq,
TASK_INTERRUPTIBLE);
+ if (current->task_works)
+ task_work_run();
if (io_should_wake(&iowq, false))
break;
schedule();
@@ -5254,6 +6124,23 @@ static void io_file_ref_kill(struct percpu_ref *ref)
complete(&data->done);
}
+static void io_file_ref_exit_and_free(struct work_struct *work)
+{
+ struct fixed_file_data *data;
+
+ data = container_of(work, struct fixed_file_data, ref_work);
+
+ /*
+ * Ensure any percpu-ref atomic switch callback has run, it could have
+ * been in progress when the files were being unregistered. Once
+ * that's done, we can safely exit and free the ref and containing
+ * data structure.
+ */
+ rcu_barrier();
+ percpu_ref_exit(&data->refs);
+ kfree(data);
+}
+
static int io_sqe_files_unregister(struct io_ring_ctx *ctx)
{
struct fixed_file_data *data = ctx->file_data;
@@ -5266,14 +6153,14 @@ static int io_sqe_files_unregister(struct io_ring_ctx *ctx)
flush_work(&data->ref_work);
wait_for_completion(&data->done);
io_ring_file_ref_flush(data);
- percpu_ref_exit(&data->refs);
__io_sqe_files_unregister(ctx);
nr_tables = DIV_ROUND_UP(ctx->nr_user_files, IORING_MAX_FILES_TABLE);
for (i = 0; i < nr_tables; i++)
kfree(data->table[i].files);
kfree(data->table);
- kfree(data);
+ INIT_WORK(&data->ref_work, io_file_ref_exit_and_free);
+ queue_work(system_wq, &data->ref_work);
ctx->file_data = NULL;
ctx->nr_user_files = 0;
return 0;
@@ -5500,7 +6387,6 @@ static void io_ring_file_put(struct io_ring_ctx *ctx, struct file *file)
struct io_file_put {
struct llist_node llist;
struct file *file;
- struct completion *done;
};
static void io_ring_file_ref_flush(struct fixed_file_data *data)
@@ -5511,10 +6397,7 @@ static void io_ring_file_ref_flush(struct fixed_file_data *data)
while ((node = llist_del_all(&data->put_llist)) != NULL) {
llist_for_each_entry_safe(pfile, tmp, node, llist) {
io_ring_file_put(data->ctx, pfile->file);
- if (pfile->done)
- complete(pfile->done);
- else
- kfree(pfile);
+ kfree(pfile);
}
}
}
@@ -5525,7 +6408,6 @@ static void io_ring_file_ref_switch(struct work_struct *work)
data = container_of(work, struct fixed_file_data, ref_work);
io_ring_file_ref_flush(data);
- percpu_ref_get(&data->refs);
percpu_ref_switch_to_percpu(&data->refs);
}
@@ -5701,41 +6583,27 @@ static void io_atomic_switch(struct percpu_ref *ref)
{
struct fixed_file_data *data;
+ /*
+ * Juggle reference to ensure we hit zero, if needed, so we can
+ * switch back to percpu mode
+ */
data = container_of(ref, struct fixed_file_data, refs);
- clear_bit(FFD_F_ATOMIC, &data->state);
+ percpu_ref_put(&data->refs);
+ percpu_ref_get(&data->refs);
}
-static bool io_queue_file_removal(struct fixed_file_data *data,
+static int io_queue_file_removal(struct fixed_file_data *data,
struct file *file)
{
- struct io_file_put *pfile, pfile_stack;
- DECLARE_COMPLETION_ONSTACK(done);
+ struct io_file_put *pfile;
- /*
- * If we fail allocating the struct we need for doing async reomval
- * of this file, just punt to sync and wait for it.
- */
pfile = kzalloc(sizeof(*pfile), GFP_KERNEL);
- if (!pfile) {
- pfile = &pfile_stack;
- pfile->done = &done;
- }
+ if (!pfile)
+ return -ENOMEM;
pfile->file = file;
llist_add(&pfile->llist, &data->put_llist);
-
- if (pfile == &pfile_stack) {
- if (!test_and_set_bit(FFD_F_ATOMIC, &data->state)) {
- percpu_ref_put(&data->refs);
- percpu_ref_switch_to_atomic(&data->refs,
- io_atomic_switch);
- }
- wait_for_completion(&done);
- flush_work(&data->ref_work);
- return false;
- }
-
- return true;
+ return 0;
}
static int __io_sqe_files_update(struct io_ring_ctx *ctx,
@@ -5770,9 +6638,11 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx,
index = i & IORING_FILE_TABLE_MASK;
if (table->files[index]) {
file = io_file_from_index(ctx, index);
+ err = io_queue_file_removal(data, file);
+ if (err)
+ break;
table->files[index] = NULL;
- if (io_queue_file_removal(data, file))
- ref_switch = true;
+ ref_switch = true;
}
if (fd != -1) {
file = fget(fd);
@@ -5803,10 +6673,8 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx,
up->offset++;
}
- if (ref_switch && !test_and_set_bit(FFD_F_ATOMIC, &data->state)) {
- percpu_ref_put(&data->refs);
+ if (ref_switch)
percpu_ref_switch_to_atomic(&data->refs, io_atomic_switch);
- }
return done ? done : err;
}
@@ -5827,20 +6695,14 @@ static int io_sqe_files_update(struct io_ring_ctx *ctx, void __user *arg,
return __io_sqe_files_update(ctx, &up, nr_args);
}
-static void io_put_work(struct io_wq_work *work)
+static void io_free_work(struct io_wq_work *work)
{
struct io_kiocb *req = container_of(work, struct io_kiocb, work);
+ /* Consider that io_steal_work() relies on this ref */
io_put_req(req);
}
-static void io_get_work(struct io_wq_work *work)
-{
- struct io_kiocb *req = container_of(work, struct io_kiocb, work);
-
- refcount_inc(&req->refs);
-}
-
static int io_init_wq_offload(struct io_ring_ctx *ctx,
struct io_uring_params *p)
{
@@ -5851,8 +6713,7 @@ static int io_init_wq_offload(struct io_ring_ctx *ctx,
int ret = 0;
data.user = ctx->user;
- data.get_work = io_get_work;
- data.put_work = io_put_work;
+ data.free_work = io_free_work;
if (!(p->flags & IORING_SETUP_ATTACH_WQ)) {
/* Do QD, or 4 * CPUS, whatever is smallest */
@@ -6254,6 +7115,21 @@ static int io_eventfd_unregister(struct io_ring_ctx *ctx)
return -ENXIO;
}
+static int __io_destroy_buffers(int id, void *p, void *data)
+{
+ struct io_ring_ctx *ctx = data;
+ struct io_buffer *buf = p;
+
+ __io_remove_buffers(ctx, buf, id, -1U);
+ return 0;
+}
+
+static void io_destroy_buffers(struct io_ring_ctx *ctx)
+{
+ idr_for_each(&ctx->io_buffer_idr, __io_destroy_buffers, ctx);
+ idr_destroy(&ctx->io_buffer_idr);
+}
+
static void io_ring_ctx_free(struct io_ring_ctx *ctx)
{
io_finish_async(ctx);
@@ -6264,6 +7140,8 @@ static void io_ring_ctx_free(struct io_ring_ctx *ctx)
io_sqe_buffer_unregister(ctx);
io_sqe_files_unregister(ctx);
io_eventfd_unregister(ctx);
+ io_destroy_buffers(ctx);
+ idr_destroy(&ctx->personality_idr);
#if defined(CONFIG_UNIX)
if (ctx->ring_sock) {
@@ -6301,7 +7179,7 @@ static __poll_t io_uring_poll(struct file *file, poll_table *wait)
if (READ_ONCE(ctx->rings->sq.tail) - ctx->cached_sq_head !=
ctx->rings->sq_ring_entries)
mask |= EPOLLOUT | EPOLLWRNORM;
- if (READ_ONCE(ctx->rings->cq.head) != ctx->cached_cq_tail)
+ if (io_cqring_events(ctx, false))
mask |= EPOLLIN | EPOLLRDNORM;
return mask;
@@ -6393,6 +7271,29 @@ static void io_uring_cancel_files(struct io_ring_ctx *ctx,
if (!cancel_req)
break;
+ if (cancel_req->flags & REQ_F_OVERFLOW) {
+ spin_lock_irq(&ctx->completion_lock);
+ list_del(&cancel_req->list);
+ cancel_req->flags &= ~REQ_F_OVERFLOW;
+ if (list_empty(&ctx->cq_overflow_list)) {
+ clear_bit(0, &ctx->sq_check_overflow);
+ clear_bit(0, &ctx->cq_check_overflow);
+ }
+ spin_unlock_irq(&ctx->completion_lock);
+
+ WRITE_ONCE(ctx->rings->cq_overflow,
+ atomic_inc_return(&ctx->cached_cq_overflow));
+
+ /*
+ * Put inflight ref and overflow ref. If that's
+ * all we had, then we're done with this request.
+ */
+ if (refcount_sub_and_test(2, &cancel_req->refs)) {
+ io_put_req(cancel_req);
+ continue;
+ }
+ }
+
io_wq_cancel_work(ctx->io_wq, &cancel_req->work);
io_put_req(cancel_req);
schedule();
@@ -6405,6 +7306,13 @@ static int io_uring_flush(struct file *file, void *data)
struct io_ring_ctx *ctx = file->private_data;
io_uring_cancel_files(ctx, data);
+
+ /*
+ * If the task is going away, cancel work it may have pending
+ */
+ if (fatal_signal_pending(current) || (current->flags & PF_EXITING))
+ io_wq_cancel_pid(ctx->io_wq, task_pid_vnr(current));
+
return 0;
}
@@ -6487,6 +7395,9 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
int submitted = 0;
struct fd f;
+ if (current->task_works)
+ task_work_run();
+
if (flags & ~(IORING_ENTER_GETEVENTS | IORING_ENTER_SQ_WAKEUP))
return -EINVAL;
@@ -6533,7 +7444,14 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
min_complete = min(min_complete, ctx->cq_entries);
- if (ctx->flags & IORING_SETUP_IOPOLL) {
+ /*
+ * When SETUP_IOPOLL and SETUP_SQPOLL are both enabled, user
+ * space applications don't need to do io completion events
+ * polling again, they can rely on io_sq_thread to do polling
+ * work, which can reduce cpu usage and uring_lock contention.
+ */
+ if (ctx->flags & IORING_SETUP_IOPOLL &&
+ !(ctx->flags & IORING_SETUP_SQPOLL)) {
ret = io_iopoll_check(ctx, &nr_events, min_complete);
} else {
ret = io_cqring_wait(ctx, min_complete, sig, sigsz);
@@ -6547,6 +7465,7 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
return submitted ? submitted : ret;
}
+#ifdef CONFIG_PROC_FS
static int io_uring_show_cred(int id, void *p, void *data)
{
const struct cred *cred = p;
@@ -6608,6 +7527,17 @@ static void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, struct seq_file *m)
seq_printf(m, "Personalities:\n");
idr_for_each(&ctx->personality_idr, io_uring_show_cred, m);
}
+ seq_printf(m, "PollList:\n");
+ spin_lock_irq(&ctx->completion_lock);
+ for (i = 0; i < (1U << ctx->cancel_hash_bits); i++) {
+ struct hlist_head *list = &ctx->cancel_hash[i];
+ struct io_kiocb *req;
+
+ hlist_for_each_entry(req, list, hash_node)
+ seq_printf(m, " op=%d, task_works=%d\n", req->opcode,
+ req->task->task_works != NULL);
+ }
+ spin_unlock_irq(&ctx->completion_lock);
mutex_unlock(&ctx->uring_lock);
}
@@ -6620,6 +7550,7 @@ static void io_uring_show_fdinfo(struct seq_file *m, struct file *f)
percpu_ref_put(&ctx->refs);
}
}
+#endif
static const struct file_operations io_uring_fops = {
.release = io_uring_release,
@@ -6631,7 +7562,9 @@ static const struct file_operations io_uring_fops = {
#endif
.poll = io_uring_poll,
.fasync = io_uring_fasync,
+#ifdef CONFIG_PROC_FS
.show_fdinfo = io_uring_show_fdinfo,
+#endif
};
static int io_allocate_scq_urings(struct io_ring_ctx *ctx,
@@ -6821,7 +7754,7 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p)
p->features = IORING_FEAT_SINGLE_MMAP | IORING_FEAT_NODROP |
IORING_FEAT_SUBMIT_STABLE | IORING_FEAT_RW_CUR_POS |
- IORING_FEAT_CUR_PERSONALITY;
+ IORING_FEAT_CUR_PERSONALITY | IORING_FEAT_FAST_POLL;
trace_io_uring_create(ret, ctx, p->sq_entries, p->cq_entries, p->flags);
return ret;
err:
@@ -7099,6 +8032,7 @@ static int __init io_uring_init(void)
BUILD_BUG_SQE_ELEM(8, __u64, off);
BUILD_BUG_SQE_ELEM(8, __u64, addr2);
BUILD_BUG_SQE_ELEM(16, __u64, addr);
+ BUILD_BUG_SQE_ELEM(16, __u64, splice_off_in);
BUILD_BUG_SQE_ELEM(24, __u32, len);
BUILD_BUG_SQE_ELEM(28, __kernel_rwf_t, rw_flags);
BUILD_BUG_SQE_ELEM(28, /* compat */ int, rw_flags);
@@ -7113,11 +8047,14 @@ static int __init io_uring_init(void)
BUILD_BUG_SQE_ELEM(28, __u32, open_flags);
BUILD_BUG_SQE_ELEM(28, __u32, statx_flags);
BUILD_BUG_SQE_ELEM(28, __u32, fadvise_advice);
+ BUILD_BUG_SQE_ELEM(28, __u32, splice_flags);
BUILD_BUG_SQE_ELEM(32, __u64, user_data);
BUILD_BUG_SQE_ELEM(40, __u16, buf_index);
BUILD_BUG_SQE_ELEM(42, __u16, personality);
+ BUILD_BUG_SQE_ELEM(44, __s32, splice_fd_in);
BUILD_BUG_ON(ARRAY_SIZE(io_op_defs) != IORING_OP_LAST);
+ BUILD_BUG_ON(__REQ_F_LAST_BIT >= 8 * sizeof(int));
req_cachep = KMEM_CACHE(io_kiocb, SLAB_HWCACHE_ALIGN | SLAB_PANIC);
return 0;
};
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 2494095..27373f5 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -976,29 +976,33 @@ void jbd2_journal_commit_transaction(journal_t *journal)
* it. */
/*
- * A buffer which has been freed while still being journaled by
- * a previous transaction.
- */
- if (buffer_freed(bh)) {
+ * A buffer which has been freed while still being journaled
+ * by a previous transaction, refile the buffer to BJ_Forget of
+ * the running transaction. If the just committed transaction
+ * contains "add to orphan" operation, we can completely
+ * invalidate the buffer now. We are rather through in that
+ * since the buffer may be still accessible when blocksize <
+ * pagesize and it is attached to the last partial page.
+ */
+ if (buffer_freed(bh) && !jh->b_next_transaction) {
+ struct address_space *mapping;
+
+ clear_buffer_freed(bh);
+ clear_buffer_jbddirty(bh);
+
/*
- * If the running transaction is the one containing
- * "add to orphan" operation (b_next_transaction !=
- * NULL), we have to wait for that transaction to
- * commit before we can really get rid of the buffer.
- * So just clear b_modified to not confuse transaction
- * credit accounting and refile the buffer to
- * BJ_Forget of the running transaction. If the just
- * committed transaction contains "add to orphan"
- * operation, we can completely invalidate the buffer
- * now. We are rather through in that since the
- * buffer may be still accessible when blocksize <
- * pagesize and it is attached to the last partial
- * page.
+ * Block device buffers need to stay mapped all the
+ * time, so it is enough to clear buffer_jbddirty and
+ * buffer_freed bits. For the file mapping buffers (i.e.
+ * journalled data) we need to unmap buffer and clear
+ * more bits. We also need to be careful about the check
+ * because the data page mapping can get cleared under
+ * out hands, which alse need not to clear more bits
+ * because the page and buffers will be freed and can
+ * never be reused once we are done with them.
*/
- jh->b_modified = 0;
- if (!jh->b_next_transaction) {
- clear_buffer_freed(bh);
- clear_buffer_jbddirty(bh);
+ mapping = READ_ONCE(bh->b_page->mapping);
+ if (mapping && !sb_is_blkdev_sb(mapping->host->i_sb)) {
clear_buffer_mapped(bh);
clear_buffer_new(bh);
clear_buffer_req(bh);
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index e77a5a0..3dccc23 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -936,8 +936,6 @@ do_get_write_access(handle_t *handle, struct journal_head *jh,
char *frozen_buffer = NULL;
unsigned long start_lock, time_lock;
- if (is_handle_aborted(handle))
- return -EROFS;
journal = transaction->t_journal;
jbd_debug(5, "journal_head %p, force_copy %d\n", jh, force_copy);
@@ -1152,8 +1150,8 @@ static bool jbd2_write_access_granted(handle_t *handle, struct buffer_head *bh,
/* For undo access buffer must have data copied */
if (undo && !jh->b_committed_data)
goto out;
- if (jh->b_transaction != handle->h_transaction &&
- jh->b_next_transaction != handle->h_transaction)
+ if (READ_ONCE(jh->b_transaction) != handle->h_transaction &&
+ READ_ONCE(jh->b_next_transaction) != handle->h_transaction)
goto out;
/*
* There are two reasons for the barrier here:
@@ -1189,6 +1187,9 @@ int jbd2_journal_get_write_access(handle_t *handle, struct buffer_head *bh)
struct journal_head *jh;
int rc;
+ if (is_handle_aborted(handle))
+ return -EROFS;
+
if (jbd2_write_access_granted(handle, bh, false))
return 0;
@@ -1326,6 +1327,9 @@ int jbd2_journal_get_undo_access(handle_t *handle, struct buffer_head *bh)
struct journal_head *jh;
char *committed_data = NULL;
+ if (is_handle_aborted(handle))
+ return -EROFS;
+
if (jbd2_write_access_granted(handle, bh, true))
return 0;
@@ -2329,14 +2333,16 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh,
return -EBUSY;
}
/*
- * OK, buffer won't be reachable after truncate. We just set
- * j_next_transaction to the running transaction (if there is
- * one) and mark buffer as freed so that commit code knows it
- * should clear dirty bits when it is done with the buffer.
+ * OK, buffer won't be reachable after truncate. We just clear
+ * b_modified to not confuse transaction credit accounting, and
+ * set j_next_transaction to the running transaction (if there
+ * is one) and mark buffer as freed so that commit code knows
+ * it should clear dirty bits when it is done with the buffer.
*/
set_buffer_freed(bh);
if (journal->j_running_transaction && buffer_jbddirty(bh))
jh->b_next_transaction = journal->j_running_transaction;
+ jh->b_modified = 0;
spin_unlock(&journal->j_list_lock);
spin_unlock(&jh->b_state_lock);
write_unlock(&journal->j_state_lock);
@@ -2563,8 +2569,8 @@ bool __jbd2_journal_refile_buffer(struct journal_head *jh)
* our jh reference and thus __jbd2_journal_file_buffer() must not
* take a new one.
*/
- jh->b_transaction = jh->b_next_transaction;
- jh->b_next_transaction = NULL;
+ WRITE_ONCE(jh->b_transaction, jh->b_next_transaction);
+ WRITE_ONCE(jh->b_next_transaction, NULL);
if (buffer_freed(bh))
jlist = BJ_Forget;
else if (jh->b_modified)
diff --git a/fs/locks.c b/fs/locks.c
index 44b6da0..b8a31c1 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -725,7 +725,6 @@ static void __locks_delete_block(struct file_lock *waiter)
{
locks_delete_global_blocked(waiter);
list_del_init(&waiter->fl_blocked_member);
- waiter->fl_blocker = NULL;
}
static void __locks_wake_up_blocks(struct file_lock *blocker)
@@ -740,6 +739,13 @@ static void __locks_wake_up_blocks(struct file_lock *blocker)
waiter->fl_lmops->lm_notify(waiter);
else
wake_up(&waiter->fl_wait);
+
+ /*
+ * The setting of fl_blocker to NULL marks the "done"
+ * point in deleting a block. Paired with acquire at the top
+ * of locks_delete_block().
+ */
+ smp_store_release(&waiter->fl_blocker, NULL);
}
}
@@ -754,24 +760,41 @@ int locks_delete_block(struct file_lock *waiter)
int status = -ENOENT;
/*
- * If fl_blocker is NULL, it won't be set again as this thread
- * "owns" the lock and is the only one that might try to claim
- * the lock. So it is safe to test fl_blocker locklessly.
- * Also if fl_blocker is NULL, this waiter is not listed on
- * fl_blocked_requests for some lock, so no other request can
- * be added to the list of fl_blocked_requests for this
- * request. So if fl_blocker is NULL, it is safe to
- * locklessly check if fl_blocked_requests is empty. If both
- * of these checks succeed, there is no need to take the lock.
+ * If fl_blocker is NULL, it won't be set again as this thread "owns"
+ * the lock and is the only one that might try to claim the lock.
+ *
+ * We use acquire/release to manage fl_blocker so that we can
+ * optimize away taking the blocked_lock_lock in many cases.
+ *
+ * The smp_load_acquire guarantees two things:
+ *
+ * 1/ that fl_blocked_requests can be tested locklessly. If something
+ * was recently added to that list it must have been in a locked region
+ * *before* the locked region when fl_blocker was set to NULL.
+ *
+ * 2/ that no other thread is accessing 'waiter', so it is safe to free
+ * it. __locks_wake_up_blocks is careful not to touch waiter after
+ * fl_blocker is released.
+ *
+ * If a lockless check of fl_blocker shows it to be NULL, we know that
+ * no new locks can be inserted into its fl_blocked_requests list, and
+ * can avoid doing anything further if the list is empty.
*/
- if (waiter->fl_blocker == NULL &&
+ if (!smp_load_acquire(&waiter->fl_blocker) &&
list_empty(&waiter->fl_blocked_requests))
return status;
+
spin_lock(&blocked_lock_lock);
if (waiter->fl_blocker)
status = 0;
__locks_wake_up_blocks(waiter);
__locks_delete_block(waiter);
+
+ /*
+ * The setting of fl_blocker to NULL marks the "done" point in deleting
+ * a block. Paired with acquire at the top of this function.
+ */
+ smp_store_release(&waiter->fl_blocker, NULL);
spin_unlock(&blocked_lock_lock);
return status;
}
@@ -1364,7 +1387,8 @@ static int posix_lock_inode_wait(struct inode *inode, struct file_lock *fl)
error = posix_lock_inode(inode, fl, NULL);
if (error != FILE_LOCK_DEFERRED)
break;
- error = wait_event_interruptible(fl->fl_wait, !fl->fl_blocker);
+ error = wait_event_interruptible(fl->fl_wait,
+ list_empty(&fl->fl_blocked_member));
if (error)
break;
}
@@ -1449,7 +1473,8 @@ int locks_mandatory_area(struct inode *inode, struct file *filp, loff_t start,
error = posix_lock_inode(inode, &fl, NULL);
if (error != FILE_LOCK_DEFERRED)
break;
- error = wait_event_interruptible(fl.fl_wait, !fl.fl_blocker);
+ error = wait_event_interruptible(fl.fl_wait,
+ list_empty(&fl.fl_blocked_member));
if (!error) {
/*
* If we've been sleeping someone might have
@@ -1652,7 +1677,8 @@ int __break_lease(struct inode *inode, unsigned int mode, unsigned int type)
locks_dispose_list(&dispose);
error = wait_event_interruptible_timeout(new_fl->fl_wait,
- !new_fl->fl_blocker, break_time);
+ list_empty(&new_fl->fl_blocked_member),
+ break_time);
percpu_down_read(&file_rwsem);
spin_lock(&ctx->flc_lock);
@@ -2136,7 +2162,8 @@ static int flock_lock_inode_wait(struct inode *inode, struct file_lock *fl)
error = flock_lock_inode(inode, fl);
if (error != FILE_LOCK_DEFERRED)
break;
- error = wait_event_interruptible(fl->fl_wait, !fl->fl_blocker);
+ error = wait_event_interruptible(fl->fl_wait,
+ list_empty(&fl->fl_blocked_member));
if (error)
break;
}
@@ -2413,7 +2440,8 @@ static int do_lock_file_wait(struct file *filp, unsigned int cmd,
error = vfs_lock_file(filp, cmd, fl, NULL);
if (error != FILE_LOCK_DEFERRED)
break;
- error = wait_event_interruptible(fl->fl_wait, !fl->fl_blocker);
+ error = wait_event_interruptible(fl->fl_wait,
+ list_empty(&fl->fl_blocked_member));
if (error)
break;
}
diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig
index 40b6c5a..88e1763 100644
--- a/fs/nfs/Kconfig
+++ b/fs/nfs/Kconfig
@@ -164,7 +164,7 @@
If you want your system to mount its root file system via NFS,
choose Y here. This is common practice for managing systems
without local permanent storage. For details, read
- <file:Documentation/filesystems/nfs/nfsroot.txt>.
+ <file:Documentation/admin-guide/nfs/nfsroot.rst>.
Most people say N here.
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 989c30c..f1ff307 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -153,6 +153,7 @@ struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_init)
if ((clp = kzalloc(sizeof(*clp), GFP_KERNEL)) == NULL)
goto error_0;
+ clp->cl_minorversion = cl_init->minorversion;
clp->cl_nfs_mod = cl_init->nfs_mod;
if (!try_module_get(clp->cl_nfs_mod->owner))
goto error_dealloc;
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index 4a84107..1865322 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -42,13 +42,27 @@ static void nfs_mark_delegation_revoked(struct nfs_delegation *delegation)
if (!test_and_set_bit(NFS_DELEGATION_REVOKED, &delegation->flags)) {
delegation->stateid.type = NFS4_INVALID_STATEID_TYPE;
atomic_long_dec(&nfs_active_delegations);
+ if (!test_bit(NFS_DELEGATION_RETURNING, &delegation->flags))
+ nfs_clear_verifier_delegated(delegation->inode);
}
}
+static struct nfs_delegation *nfs_get_delegation(struct nfs_delegation *delegation)
+{
+ refcount_inc(&delegation->refcount);
+ return delegation;
+}
+
+static void nfs_put_delegation(struct nfs_delegation *delegation)
+{
+ if (refcount_dec_and_test(&delegation->refcount))
+ __nfs_free_delegation(delegation);
+}
+
static void nfs_free_delegation(struct nfs_delegation *delegation)
{
nfs_mark_delegation_revoked(delegation);
- __nfs_free_delegation(delegation);
+ nfs_put_delegation(delegation);
}
/**
@@ -241,13 +255,18 @@ void nfs_inode_reclaim_delegation(struct inode *inode, const struct cred *cred,
static int nfs_do_return_delegation(struct inode *inode, struct nfs_delegation *delegation, int issync)
{
+ const struct cred *cred;
int res = 0;
- if (!test_bit(NFS_DELEGATION_REVOKED, &delegation->flags))
- res = nfs4_proc_delegreturn(inode,
- delegation->cred,
+ if (!test_bit(NFS_DELEGATION_REVOKED, &delegation->flags)) {
+ spin_lock(&delegation->lock);
+ cred = get_cred(delegation->cred);
+ spin_unlock(&delegation->lock);
+ res = nfs4_proc_delegreturn(inode, cred,
&delegation->stateid,
issync);
+ put_cred(cred);
+ }
return res;
}
@@ -273,9 +292,13 @@ nfs_start_delegation_return_locked(struct nfs_inode *nfsi)
if (delegation == NULL)
goto out;
spin_lock(&delegation->lock);
- if (!test_and_set_bit(NFS_DELEGATION_RETURNING, &delegation->flags))
- ret = delegation;
+ if (!test_and_set_bit(NFS_DELEGATION_RETURNING, &delegation->flags)) {
+ /* Refcount matched in nfs_end_delegation_return() */
+ ret = nfs_get_delegation(delegation);
+ }
spin_unlock(&delegation->lock);
+ if (ret)
+ nfs_clear_verifier_delegated(&nfsi->vfs_inode);
out:
return ret;
}
@@ -393,6 +416,7 @@ int nfs_inode_set_delegation(struct inode *inode, const struct cred *cred,
if (delegation == NULL)
return -ENOMEM;
nfs4_stateid_copy(&delegation->stateid, stateid);
+ refcount_set(&delegation->refcount, 1);
delegation->type = type;
delegation->pagemod_limit = pagemod_limit;
delegation->change_attr = inode_peek_iversion_raw(inode);
@@ -492,6 +516,8 @@ static int nfs_end_delegation_return(struct inode *inode, struct nfs_delegation
err = nfs_do_return_delegation(inode, delegation, issync);
out:
+ /* Refcount matched in nfs_start_delegation_return_locked() */
+ nfs_put_delegation(delegation);
return err;
}
@@ -686,9 +712,12 @@ void nfs4_inode_return_delegation_on_close(struct inode *inode)
list_empty(&NFS_I(inode)->open_files) &&
!test_and_set_bit(NFS_DELEGATION_RETURNING, &delegation->flags)) {
clear_bit(NFS_DELEGATION_RETURN_IF_CLOSED, &delegation->flags);
- ret = delegation;
+ /* Refcount matched in nfs_end_delegation_return() */
+ ret = nfs_get_delegation(delegation);
}
spin_unlock(&delegation->lock);
+ if (ret)
+ nfs_clear_verifier_delegated(inode);
}
out:
rcu_read_unlock();
@@ -1088,10 +1117,11 @@ void nfs_delegation_reap_unclaimed(struct nfs_client *clp)
delegation = nfs_start_delegation_return_locked(NFS_I(inode));
rcu_read_unlock();
if (delegation != NULL) {
- delegation = nfs_detach_delegation(NFS_I(inode),
- delegation, server);
- if (delegation != NULL)
+ if (nfs_detach_delegation(NFS_I(inode), delegation,
+ server) != NULL)
nfs_free_delegation(delegation);
+ /* Match nfs_start_delegation_return_locked */
+ nfs_put_delegation(delegation);
}
iput(inode);
nfs_sb_deactive(server->super);
diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h
index 31b8460..9b00a0b 100644
--- a/fs/nfs/delegation.h
+++ b/fs/nfs/delegation.h
@@ -22,6 +22,7 @@ struct nfs_delegation {
unsigned long pagemod_limit;
__u64 change_attr;
unsigned long flags;
+ refcount_t refcount;
spinlock_t lock;
struct rcu_head rcu;
};
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 1320288..193d6fb 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -155,6 +155,7 @@ typedef struct {
loff_t current_index;
decode_dirent_t decode;
+ unsigned long dir_verifier;
unsigned long timestamp;
unsigned long gencount;
unsigned int cache_entry_index;
@@ -353,6 +354,7 @@ int nfs_readdir_xdr_filler(struct page **pages, nfs_readdir_descriptor_t *desc,
again:
timestamp = jiffies;
gencount = nfs_inc_attr_generation_counter();
+ desc->dir_verifier = nfs_save_change_attribute(inode);
error = NFS_PROTO(inode)->readdir(file_dentry(file), cred, entry->cookie, pages,
NFS_SERVER(inode)->dtsize, desc->plus);
if (error < 0) {
@@ -455,13 +457,13 @@ void nfs_force_use_readdirplus(struct inode *dir)
}
static
-void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry)
+void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry,
+ unsigned long dir_verifier)
{
struct qstr filename = QSTR_INIT(entry->name, entry->len);
DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
struct dentry *dentry;
struct dentry *alias;
- struct inode *dir = d_inode(parent);
struct inode *inode;
int status;
@@ -500,7 +502,7 @@ void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry)
if (nfs_same_file(dentry, entry)) {
if (!entry->fh->size)
goto out;
- nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
+ nfs_set_verifier(dentry, dir_verifier);
status = nfs_refresh_inode(d_inode(dentry), entry->fattr);
if (!status)
nfs_setsecurity(d_inode(dentry), entry->fattr, entry->label);
@@ -526,7 +528,7 @@ void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry)
dput(dentry);
dentry = alias;
}
- nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
+ nfs_set_verifier(dentry, dir_verifier);
out:
dput(dentry);
}
@@ -564,7 +566,8 @@ int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *en
count++;
if (desc->plus)
- nfs_prime_dcache(file_dentry(desc->file), entry);
+ nfs_prime_dcache(file_dentry(desc->file), entry,
+ desc->dir_verifier);
status = nfs_readdir_add_to_array(entry, page);
if (status != 0)
@@ -983,14 +986,113 @@ static int nfs_fsync_dir(struct file *filp, loff_t start, loff_t end,
* full lookup on all child dentries of 'dir' whenever a change occurs
* on the server that might have invalidated our dcache.
*
+ * Note that we reserve bit '0' as a tag to let us know when a dentry
+ * was revalidated while holding a delegation on its inode.
+ *
* The caller should be holding dir->i_lock
*/
void nfs_force_lookup_revalidate(struct inode *dir)
{
- NFS_I(dir)->cache_change_attribute++;
+ NFS_I(dir)->cache_change_attribute += 2;
}
EXPORT_SYMBOL_GPL(nfs_force_lookup_revalidate);
+/**
+ * nfs_verify_change_attribute - Detects NFS remote directory changes
+ * @dir: pointer to parent directory inode
+ * @verf: previously saved change attribute
+ *
+ * Return "false" if the verifiers doesn't match the change attribute.
+ * This would usually indicate that the directory contents have changed on
+ * the server, and that any dentries need revalidating.
+ */
+static bool nfs_verify_change_attribute(struct inode *dir, unsigned long verf)
+{
+ return (verf & ~1UL) == nfs_save_change_attribute(dir);
+}
+
+static void nfs_set_verifier_delegated(unsigned long *verf)
+{
+ *verf |= 1UL;
+}
+
+#if IS_ENABLED(CONFIG_NFS_V4)
+static void nfs_unset_verifier_delegated(unsigned long *verf)
+{
+ *verf &= ~1UL;
+}
+#endif /* IS_ENABLED(CONFIG_NFS_V4) */
+
+static bool nfs_test_verifier_delegated(unsigned long verf)
+{
+ return verf & 1;
+}
+
+static bool nfs_verifier_is_delegated(struct dentry *dentry)
+{
+ return nfs_test_verifier_delegated(dentry->d_time);
+}
+
+static void nfs_set_verifier_locked(struct dentry *dentry, unsigned long verf)
+{
+ struct inode *inode = d_inode(dentry);
+
+ if (!nfs_verifier_is_delegated(dentry) &&
+ !nfs_verify_change_attribute(d_inode(dentry->d_parent), verf))
+ goto out;
+ if (inode && NFS_PROTO(inode)->have_delegation(inode, FMODE_READ))
+ nfs_set_verifier_delegated(&verf);
+out:
+ dentry->d_time = verf;
+}
+
+/**
+ * nfs_set_verifier - save a parent directory verifier in the dentry
+ * @dentry: pointer to dentry
+ * @verf: verifier to save
+ *
+ * Saves the parent directory verifier in @dentry. If the inode has
+ * a delegation, we also tag the dentry as having been revalidated
+ * while holding a delegation so that we know we don't have to
+ * look it up again after a directory change.
+ */
+void nfs_set_verifier(struct dentry *dentry, unsigned long verf)
+{
+
+ spin_lock(&dentry->d_lock);
+ nfs_set_verifier_locked(dentry, verf);
+ spin_unlock(&dentry->d_lock);
+}
+EXPORT_SYMBOL_GPL(nfs_set_verifier);
+
+#if IS_ENABLED(CONFIG_NFS_V4)
+/**
+ * nfs_clear_verifier_delegated - clear the dir verifier delegation tag
+ * @inode: pointer to inode
+ *
+ * Iterates through the dentries in the inode alias list and clears
+ * the tag used to indicate that the dentry has been revalidated
+ * while holding a delegation.
+ * This function is intended for use when the delegation is being
+ * returned or revoked.
+ */
+void nfs_clear_verifier_delegated(struct inode *inode)
+{
+ struct dentry *alias;
+
+ if (!inode)
+ return;
+ spin_lock(&inode->i_lock);
+ hlist_for_each_entry(alias, &inode->i_dentry, d_u.d_alias) {
+ spin_lock(&alias->d_lock);
+ nfs_unset_verifier_delegated(&alias->d_time);
+ spin_unlock(&alias->d_lock);
+ }
+ spin_unlock(&inode->i_lock);
+}
+EXPORT_SYMBOL_GPL(nfs_clear_verifier_delegated);
+#endif /* IS_ENABLED(CONFIG_NFS_V4) */
+
/*
* A check for whether or not the parent directory has changed.
* In the case it has, we assume that the dentries are untrustworthy
@@ -1159,6 +1261,7 @@ nfs_lookup_revalidate_dentry(struct inode *dir, struct dentry *dentry,
struct nfs_fh *fhandle;
struct nfs_fattr *fattr;
struct nfs4_label *label;
+ unsigned long dir_verifier;
int ret;
ret = -ENOMEM;
@@ -1168,6 +1271,7 @@ nfs_lookup_revalidate_dentry(struct inode *dir, struct dentry *dentry,
if (fhandle == NULL || fattr == NULL || IS_ERR(label))
goto out;
+ dir_verifier = nfs_save_change_attribute(dir);
ret = NFS_PROTO(dir)->lookup(dir, dentry, fhandle, fattr, label);
if (ret < 0) {
switch (ret) {
@@ -1188,7 +1292,7 @@ nfs_lookup_revalidate_dentry(struct inode *dir, struct dentry *dentry,
goto out;
nfs_setsecurity(inode, fattr, label);
- nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
+ nfs_set_verifier(dentry, dir_verifier);
/* set a readdirplus hint that we had a cache miss */
nfs_force_use_readdirplus(dir);
@@ -1230,7 +1334,7 @@ nfs_do_lookup_revalidate(struct inode *dir, struct dentry *dentry,
goto out_bad;
}
- if (NFS_PROTO(dir)->have_delegation(inode, FMODE_READ))
+ if (nfs_verifier_is_delegated(dentry))
return nfs_lookup_revalidate_delegated(dir, dentry, inode);
/* Force a full look up iff the parent directory has changed */
@@ -1415,6 +1519,7 @@ struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, unsigned in
struct nfs_fh *fhandle = NULL;
struct nfs_fattr *fattr = NULL;
struct nfs4_label *label = NULL;
+ unsigned long dir_verifier;
int error;
dfprintk(VFS, "NFS: lookup(%pd2)\n", dentry);
@@ -1440,6 +1545,7 @@ struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, unsigned in
if (IS_ERR(label))
goto out;
+ dir_verifier = nfs_save_change_attribute(dir);
trace_nfs_lookup_enter(dir, dentry, flags);
error = NFS_PROTO(dir)->lookup(dir, dentry, fhandle, fattr, label);
if (error == -ENOENT)
@@ -1463,7 +1569,7 @@ struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, unsigned in
goto out_label;
dentry = res;
}
- nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
+ nfs_set_verifier(dentry, dir_verifier);
out_label:
trace_nfs_lookup_exit(dir, dentry, flags, error);
nfs4_label_free(label);
@@ -1668,7 +1774,7 @@ nfs4_do_lookup_revalidate(struct inode *dir, struct dentry *dentry,
if (inode == NULL)
goto full_reval;
- if (NFS_PROTO(dir)->have_delegation(inode, FMODE_READ))
+ if (nfs_verifier_is_delegated(dentry))
return nfs_lookup_revalidate_delegated(dir, dentry, inode);
/* NFS only supports OPEN on regular files */
diff --git a/fs/nfs/fs_context.c b/fs/nfs/fs_context.c
index e1b9384..e113fcb 100644
--- a/fs/nfs/fs_context.c
+++ b/fs/nfs/fs_context.c
@@ -832,6 +832,8 @@ static int nfs_parse_source(struct fs_context *fc,
if (len > maxnamlen)
goto out_hostname;
+ kfree(ctx->nfs_server.hostname);
+
/* N.B. caller will free nfs_server.hostname in all cases */
ctx->nfs_server.hostname = kmemdup_nul(dev_name, len, GFP_KERNEL);
if (!ctx->nfs_server.hostname)
@@ -1240,6 +1242,13 @@ static int nfs_fs_context_validate(struct fs_context *fc)
}
ctx->nfs_mod = nfs_mod;
}
+
+ /* Ensure the filesystem context has the correct fs_type */
+ if (fc->fs_type != ctx->nfs_mod->nfs_fs) {
+ module_put(fc->fs_type->owner);
+ __module_get(ctx->nfs_mod->nfs_fs->owner);
+ fc->fs_type = ctx->nfs_mod->nfs_fs;
+ }
return 0;
out_no_device_name:
diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c
index 52270bf..1abf126 100644
--- a/fs/nfs/fscache.c
+++ b/fs/nfs/fscache.c
@@ -31,6 +31,7 @@ static DEFINE_SPINLOCK(nfs_fscache_keys_lock);
struct nfs_server_key {
struct {
uint16_t nfsversion; /* NFS protocol version */
+ uint32_t minorversion; /* NFSv4 minor version */
uint16_t family; /* address family */
__be16 port; /* IP port */
} hdr;
@@ -55,6 +56,7 @@ void nfs_fscache_get_client_cookie(struct nfs_client *clp)
memset(&key, 0, sizeof(key));
key.hdr.nfsversion = clp->rpc_ops->version;
+ key.hdr.minorversion = clp->cl_minorversion;
key.hdr.family = clp->cl_addr.ss_family;
switch (clp->cl_addr.ss_family) {
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 1309e6f..11bf158 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -2114,6 +2114,7 @@ static void init_once(void *foo)
init_rwsem(&nfsi->rmdir_sem);
mutex_init(&nfsi->commit_mutex);
nfs4_init_once(nfsi);
+ nfsi->cache_change_attribute = 0;
}
static int __init nfs_init_inodecache(void)
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index ad60774..f3ece8e 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -153,7 +153,7 @@ struct vfsmount *nfs_d_automount(struct path *path)
/* Open a new filesystem context, transferring parameters from the
* parent superblock, including the network namespace.
*/
- fc = fs_context_for_submount(&nfs_fs_type, path->dentry);
+ fc = fs_context_for_submount(path->mnt->mnt_sb->s_type, path->dentry);
if (IS_ERR(fc))
return ERR_CAST(fc);
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index 0cd767e..0bd77cc 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -216,7 +216,6 @@ struct nfs_client *nfs4_alloc_client(const struct nfs_client_initdata *cl_init)
INIT_LIST_HEAD(&clp->cl_ds_clients);
rpc_init_wait_queue(&clp->cl_rpcwaitq, "NFS client");
clp->cl_state = 1 << NFS4CLNT_LEASE_EXPIRED;
- clp->cl_minorversion = cl_init->minorversion;
clp->cl_mvops = nfs_v4_minor_ops[cl_init->minorversion];
clp->cl_mig_gen = 1;
#if IS_ENABLED(CONFIG_NFS_V4_1)
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
index be4eb72..1297919 100644
--- a/fs/nfs/nfs4file.c
+++ b/fs/nfs/nfs4file.c
@@ -87,7 +87,6 @@ nfs4_file_open(struct inode *inode, struct file *filp)
if (inode != d_inode(dentry))
goto out_drop;
- nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
nfs_file_set_open_context(filp, ctx);
nfs_fscache_open_file(inode, filp);
err = 0;
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 95d07a3..69b7ab7 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2974,10 +2974,13 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata,
struct dentry *dentry;
struct nfs4_state *state;
fmode_t acc_mode = _nfs4_ctx_to_accessmode(ctx);
+ struct inode *dir = d_inode(opendata->dir);
+ unsigned long dir_verifier;
unsigned int seq;
int ret;
seq = raw_seqcount_begin(&sp->so_reclaim_seqcount);
+ dir_verifier = nfs_save_change_attribute(dir);
ret = _nfs4_proc_open(opendata, ctx);
if (ret != 0)
@@ -3005,8 +3008,19 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata,
dput(ctx->dentry);
ctx->dentry = dentry = alias;
}
- nfs_set_verifier(dentry,
- nfs_save_change_attribute(d_inode(opendata->dir)));
+ }
+
+ switch(opendata->o_arg.claim) {
+ default:
+ break;
+ case NFS4_OPEN_CLAIM_NULL:
+ case NFS4_OPEN_CLAIM_DELEGATE_CUR:
+ case NFS4_OPEN_CLAIM_DELEGATE_PREV:
+ if (!opendata->rpc_done)
+ break;
+ if (opendata->o_res.delegation_type != 0)
+ dir_verifier = nfs_save_change_attribute(dir);
+ nfs_set_verifier(dentry, dir_verifier);
}
/* Parse layoutget results before we check for access */
@@ -5322,7 +5336,7 @@ static void nfs4_proc_write_setup(struct nfs_pgio_header *hdr,
hdr->timestamp = jiffies;
msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_WRITE];
- nfs4_init_sequence(&hdr->args.seq_args, &hdr->res.seq_res, 1, 0);
+ nfs4_init_sequence(&hdr->args.seq_args, &hdr->res.seq_res, 0, 0);
nfs4_state_protect_write(server->nfs_client, clnt, msg, hdr);
}
diff --git a/fs/open.c b/fs/open.c
index 0788b37..b69d6ee 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -860,9 +860,6 @@ static int do_dentry_open(struct file *f,
* the return value of d_splice_alias(), then the caller needs to perform dput()
* on it after finish_open().
*
- * On successful return @file is a fully instantiated open file. After this, if
- * an error occurs in ->atomic_open(), it needs to clean up with fput().
- *
* Returns zero on success or -errno if the open failed.
*/
int finish_open(struct file *file, struct dentry *dentry,
diff --git a/fs/overlayfs/Kconfig b/fs/overlayfs/Kconfig
index 444e2da..714c14c 100644
--- a/fs/overlayfs/Kconfig
+++ b/fs/overlayfs/Kconfig
@@ -93,6 +93,7 @@
bool "Overlayfs: auto enable inode number mapping"
default n
depends on OVERLAY_FS
+ depends on 64BIT
help
If this config option is enabled then overlay filesystems will use
unused high bits in undelying filesystem inode numbers to map all
diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
index a531721..87c362f 100644
--- a/fs/overlayfs/file.c
+++ b/fs/overlayfs/file.c
@@ -244,6 +244,9 @@ static void ovl_aio_cleanup_handler(struct ovl_aio_req *aio_req)
if (iocb->ki_flags & IOCB_WRITE) {
struct inode *inode = file_inode(orig_iocb->ki_filp);
+ /* Actually acquired in ovl_write_iter() */
+ __sb_writers_acquired(file_inode(iocb->ki_filp)->i_sb,
+ SB_FREEZE_WRITE);
file_end_write(iocb->ki_filp);
ovl_copyattr(ovl_inode_real(inode), inode);
}
@@ -346,6 +349,9 @@ static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter)
goto out;
file_start_write(real.file);
+ /* Pacify lockdep, same trick as done in aio_write() */
+ __sb_writers_release(file_inode(real.file)->i_sb,
+ SB_FREEZE_WRITE);
aio_req->fd = real;
real.flags = 0;
aio_req->orig_iocb = iocb;
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index 3623d28..3d3f2b8 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -318,7 +318,12 @@ static inline unsigned int ovl_xino_bits(struct super_block *sb)
return ovl_same_dev(sb) ? OVL_FS(sb)->xino_mode : 0;
}
-static inline int ovl_inode_lock(struct inode *inode)
+static inline void ovl_inode_lock(struct inode *inode)
+{
+ mutex_lock(&OVL_I(inode)->lock);
+}
+
+static inline int ovl_inode_lock_interruptible(struct inode *inode)
{
return mutex_lock_interruptible(&OVL_I(inode)->lock);
}
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index 319fe0d..ac967f1 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -1411,6 +1411,8 @@ static int ovl_get_layers(struct super_block *sb, struct ovl_fs *ofs,
if (ofs->config.xino == OVL_XINO_ON)
pr_info("\"xino=on\" is useless with all layers on same fs, ignore.\n");
ofs->xino_mode = 0;
+ } else if (ofs->config.xino == OVL_XINO_OFF) {
+ ofs->xino_mode = -1;
} else if (ofs->config.xino == OVL_XINO_ON && ofs->xino_mode < 0) {
/*
* This is a roundup of number of bits needed for encoding
@@ -1623,8 +1625,13 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
sb->s_stack_depth = 0;
sb->s_maxbytes = MAX_LFS_FILESIZE;
/* Assume underlaying fs uses 32bit inodes unless proven otherwise */
- if (ofs->config.xino != OVL_XINO_OFF)
+ if (ofs->config.xino != OVL_XINO_OFF) {
ofs->xino_mode = BITS_PER_LONG - 32;
+ if (!ofs->xino_mode) {
+ pr_warn("xino not supported on 32bit kernel, falling back to xino=off.\n");
+ ofs->config.xino = OVL_XINO_OFF;
+ }
+ }
/* alloc/destroy_inode needed for setting up traps in inode cache */
sb->s_op = &ovl_super_operations;
diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c
index ea00508..042f7eb 100644
--- a/fs/overlayfs/util.c
+++ b/fs/overlayfs/util.c
@@ -509,7 +509,7 @@ int ovl_copy_up_start(struct dentry *dentry, int flags)
struct inode *inode = d_inode(dentry);
int err;
- err = ovl_inode_lock(inode);
+ err = ovl_inode_lock_interruptible(inode);
if (!err && ovl_already_copied_up_locked(dentry, flags)) {
err = 1; /* Already copied up */
ovl_inode_unlock(inode);
@@ -764,7 +764,7 @@ int ovl_nlink_start(struct dentry *dentry)
return err;
}
- err = ovl_inode_lock(inode);
+ err = ovl_inode_lock_interruptible(inode);
if (err)
return err;
diff --git a/fs/pipe.c b/fs/pipe.c
index 5a34d6c..2144507 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -722,9 +722,10 @@ pipe_release(struct inode *inode, struct file *file)
if (file->f_mode & FMODE_WRITE)
pipe->writers--;
- if (pipe->readers || pipe->writers) {
- wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM | EPOLLERR | EPOLLHUP);
- wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM | EPOLLERR | EPOLLHUP);
+ /* Was that the last reader or writer, but not the other side? */
+ if (!pipe->readers != !pipe->writers) {
+ wake_up_interruptible_all(&pipe->rd_wait);
+ wake_up_interruptible_all(&pipe->wr_wait);
kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
}
@@ -1026,8 +1027,8 @@ static int wait_for_partner(struct pipe_inode_info *pipe, unsigned int *cnt)
static void wake_up_partner(struct pipe_inode_info *pipe)
{
- wake_up_interruptible(&pipe->rd_wait);
- wake_up_interruptible(&pipe->wr_wait);
+ wake_up_interruptible_all(&pipe->rd_wait);
+ wake_up_interruptible_all(&pipe->wr_wait);
}
static int fifo_open(struct inode *inode, struct file *filp)
@@ -1144,7 +1145,7 @@ static int fifo_open(struct inode *inode, struct file *filp)
err_wr:
if (!--pipe->writers)
- wake_up_interruptible(&pipe->rd_wait);
+ wake_up_interruptible_all(&pipe->rd_wait);
ret = -ERESTARTSYS;
goto err;
@@ -1271,8 +1272,9 @@ static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long arg)
pipe->max_usage = nr_slots;
pipe->tail = tail;
pipe->head = head;
- wake_up_interruptible_all(&pipe->rd_wait);
- wake_up_interruptible_all(&pipe->wr_wait);
+
+ /* This might have made more room for writers */
+ wake_up_interruptible(&pipe->wr_wait);
return pipe->max_usage * PAGE_SIZE;
out_revert_acct:
diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c
index 7fbe8f0..d99b5d3 100644
--- a/fs/pstore/inode.c
+++ b/fs/pstore/inode.c
@@ -87,11 +87,11 @@ static void *pstore_ftrace_seq_next(struct seq_file *s, void *v, loff_t *pos)
struct pstore_private *ps = s->private;
struct pstore_ftrace_seq_data *data = v;
+ (*pos)++;
data->off += REC_SIZE;
if (data->off + REC_SIZE > ps->total_size)
return NULL;
- (*pos)++;
return data;
}
@@ -101,6 +101,9 @@ static int pstore_ftrace_seq_show(struct seq_file *s, void *v)
struct pstore_ftrace_seq_data *data = v;
struct pstore_ftrace_record *rec;
+ if (!data)
+ return 0;
+
rec = (struct pstore_ftrace_record *)(ps->record->buf + data->off);
seq_printf(s, "CPU:%d ts:%llu %08lx %08lx %ps <- %pS\n",
diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
index d896457..408277e 100644
--- a/fs/pstore/platform.c
+++ b/fs/pstore/platform.c
@@ -823,9 +823,9 @@ static int __init pstore_init(void)
ret = pstore_init_fs();
if (ret)
- return ret;
+ free_buf_for_compression();
- return 0;
+ return ret;
}
late_initcall(pstore_init);
diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c
index 013486b..7956221 100644
--- a/fs/pstore/ram.c
+++ b/fs/pstore/ram.c
@@ -963,7 +963,6 @@ static void __init ramoops_register_dummy(void)
pr_info("could not create platform device: %ld\n",
PTR_ERR(dummy));
dummy = NULL;
- ramoops_unregister_dummy();
}
}
diff --git a/fs/pstore/ram_core.c b/fs/pstore/ram_core.c
index 1f4d8c0..c917c19 100644
--- a/fs/pstore/ram_core.c
+++ b/fs/pstore/ram_core.c
@@ -34,7 +34,7 @@ struct persistent_ram_buffer {
uint32_t sig;
atomic_t start;
atomic_t size;
- uint8_t data[0];
+ uint8_t data[];
};
#define PERSISTENT_RAM_SIG (0x43474244) /* DBGC */
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index 072156c..5c76633 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -2599,7 +2599,6 @@ static int journal_init_dev(struct super_block *super,
int result;
dev_t jdev;
fmode_t blkdev_mode = FMODE_READ | FMODE_WRITE | FMODE_EXCL;
- char b[BDEVNAME_SIZE];
result = 0;
@@ -2621,8 +2620,8 @@ static int journal_init_dev(struct super_block *super,
result = PTR_ERR(journal->j_dev_bd);
journal->j_dev_bd = NULL;
reiserfs_warning(super, "sh-458",
- "cannot init journal device '%s': %i",
- __bdevname(jdev, b), result);
+ "cannot init journal device unknown-block(%u,%u): %i",
+ MAJOR(jdev), MINOR(jdev), result);
return result;
} else if (jdev != super->s_dev)
set_blocksize(journal->j_dev_bd, super->s_blocksize);
diff --git a/fs/splice.c b/fs/splice.c
index d671936..4735def 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -1109,9 +1109,9 @@ static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe,
/*
* Determine where to splice to/from.
*/
-static long do_splice(struct file *in, loff_t __user *off_in,
- struct file *out, loff_t __user *off_out,
- size_t len, unsigned int flags)
+long do_splice(struct file *in, loff_t __user *off_in,
+ struct file *out, loff_t __user *off_out,
+ size_t len, unsigned int flags)
{
struct pipe_inode_info *ipipe;
struct pipe_inode_info *opipe;
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 3a688eb..58e937b 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -587,7 +587,7 @@ xfs_dax_writepages(
xfs_iflags_clear(ip, XFS_ITRUNCATED);
return dax_writeback_mapping_range(mapping,
- xfs_inode_buftarg(ip)->bt_bdev, wbc);
+ xfs_inode_buftarg(ip)->bt_daxdev, wbc);
}
STATIC sector_t
diff --git a/fs/zonefs/Kconfig b/fs/zonefs/Kconfig
index fb87ad3..ef2697b 100644
--- a/fs/zonefs/Kconfig
+++ b/fs/zonefs/Kconfig
@@ -2,6 +2,7 @@
tristate "zonefs filesystem support"
depends on BLOCK
depends on BLK_DEV_ZONED
+ select FS_IOMAP
help
zonefs is a simple file system which exposes zones of a zoned block
device (e.g. host-managed or host-aware SMR disk drives) as files.
diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c
index 8bc6ef8..3ce9829 100644
--- a/fs/zonefs/super.c
+++ b/fs/zonefs/super.c
@@ -178,7 +178,8 @@ static void zonefs_update_stats(struct inode *inode, loff_t new_isize)
* amount of readable data in the zone.
*/
static loff_t zonefs_check_zone_condition(struct inode *inode,
- struct blk_zone *zone, bool warn)
+ struct blk_zone *zone, bool warn,
+ bool mount)
{
struct zonefs_inode_info *zi = ZONEFS_I(inode);
@@ -196,13 +197,26 @@ static loff_t zonefs_check_zone_condition(struct inode *inode,
zone->wp = zone->start;
return 0;
case BLK_ZONE_COND_READONLY:
- /* Do not allow writes in read-only zones */
+ /*
+ * The write pointer of read-only zones is invalid. If such a
+ * zone is found during mount, the file size cannot be retrieved
+ * so we treat the zone as offline (mount == true case).
+ * Otherwise, keep the file size as it was when last updated
+ * so that the user can recover data. In both cases, writes are
+ * always disabled for the zone.
+ */
if (warn)
zonefs_warn(inode->i_sb, "inode %lu: read-only zone\n",
inode->i_ino);
inode->i_flags |= S_IMMUTABLE;
+ if (mount) {
+ zone->cond = BLK_ZONE_COND_OFFLINE;
+ inode->i_mode &= ~0777;
+ zone->wp = zone->start;
+ return 0;
+ }
inode->i_mode &= ~0222;
- /* fallthrough */
+ return i_size_read(inode);
default:
if (zi->i_ztype == ZONEFS_ZTYPE_CNV)
return zi->i_max_size;
@@ -231,7 +245,7 @@ static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx,
* as there is no inconsistency between the inode size and the amount of
* data writen in the zone (data_size).
*/
- data_size = zonefs_check_zone_condition(inode, zone, true);
+ data_size = zonefs_check_zone_condition(inode, zone, true, false);
isize = i_size_read(inode);
if (zone->cond != BLK_ZONE_COND_OFFLINE &&
zone->cond != BLK_ZONE_COND_READONLY &&
@@ -274,7 +288,7 @@ static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx,
if (zone->cond != BLK_ZONE_COND_OFFLINE) {
zone->cond = BLK_ZONE_COND_OFFLINE;
data_size = zonefs_check_zone_condition(inode, zone,
- false);
+ false, false);
}
} else if (zone->cond == BLK_ZONE_COND_READONLY ||
sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_ZRO) {
@@ -283,7 +297,7 @@ static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx,
if (zone->cond != BLK_ZONE_COND_READONLY) {
zone->cond = BLK_ZONE_COND_READONLY;
data_size = zonefs_check_zone_condition(inode, zone,
- false);
+ false, false);
}
}
@@ -601,13 +615,13 @@ static ssize_t zonefs_file_dio_write(struct kiocb *iocb, struct iov_iter *from)
ssize_t ret;
/*
- * For async direct IOs to sequential zone files, ignore IOCB_NOWAIT
+ * For async direct IOs to sequential zone files, refuse IOCB_NOWAIT
* as this can cause write reordering (e.g. the first aio gets EAGAIN
* on the inode lock but the second goes through but is now unaligned).
*/
- if (zi->i_ztype == ZONEFS_ZTYPE_SEQ && !is_sync_kiocb(iocb)
- && (iocb->ki_flags & IOCB_NOWAIT))
- iocb->ki_flags &= ~IOCB_NOWAIT;
+ if (zi->i_ztype == ZONEFS_ZTYPE_SEQ && !is_sync_kiocb(iocb) &&
+ (iocb->ki_flags & IOCB_NOWAIT))
+ return -EOPNOTSUPP;
if (iocb->ki_flags & IOCB_NOWAIT) {
if (!inode_trylock(inode))
@@ -975,7 +989,7 @@ static void zonefs_init_file_inode(struct inode *inode, struct blk_zone *zone,
zi->i_zsector = zone->start;
zi->i_max_size = min_t(loff_t, MAX_LFS_FILESIZE,
zone->len << SECTOR_SHIFT);
- zi->i_wpoffset = zonefs_check_zone_condition(inode, zone, true);
+ zi->i_wpoffset = zonefs_check_zone_condition(inode, zone, true, true);
inode->i_uid = sbi->s_uid;
inode->i_gid = sbi->s_gid;
diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h
index 00994b1..8e8be98 100644
--- a/include/acpi/acpixf.h
+++ b/include/acpi/acpixf.h
@@ -752,6 +752,8 @@ ACPI_HW_DEPENDENT_RETURN_UINT32(u32 acpi_dispatch_gpe(acpi_handle gpe_device, u3
ACPI_HW_DEPENDENT_RETURN_STATUS(acpi_status acpi_disable_all_gpes(void))
ACPI_HW_DEPENDENT_RETURN_STATUS(acpi_status acpi_enable_all_runtime_gpes(void))
ACPI_HW_DEPENDENT_RETURN_STATUS(acpi_status acpi_enable_all_wakeup_gpes(void))
+ACPI_HW_DEPENDENT_RETURN_UINT32(u32 acpi_any_gpe_status_set(void))
+ACPI_HW_DEPENDENT_RETURN_UINT32(u32 acpi_any_fixed_event_status_set(void))
ACPI_HW_DEPENDENT_RETURN_STATUS(acpi_status
acpi_get_gpe_device(u32 gpe_index,
diff --git a/include/acpi/actypes.h b/include/acpi/actypes.h
index a2583c2..4defed5 100644
--- a/include/acpi/actypes.h
+++ b/include/acpi/actypes.h
@@ -532,11 +532,12 @@ typedef u64 acpi_integer;
strnlen (a, ACPI_NAMESEG_SIZE) == ACPI_NAMESEG_SIZE)
/*
- * Algorithm to obtain access bit width.
+ * Algorithm to obtain access bit or byte width.
* Can be used with access_width of struct acpi_generic_address and access_size of
* struct acpi_resource_generic_register.
*/
#define ACPI_ACCESS_BIT_WIDTH(size) (1 << ((size) + 2))
+#define ACPI_ACCESS_BYTE_WIDTH(size) (1 << ((size) - 1))
/*******************************************************************************
*
diff --git a/include/crypto/curve25519.h b/include/crypto/curve25519.h
index 4e6dc84..9ecb3c1 100644
--- a/include/crypto/curve25519.h
+++ b/include/crypto/curve25519.h
@@ -33,7 +33,8 @@ bool __must_check curve25519(u8 mypublic[CURVE25519_KEY_SIZE],
const u8 secret[CURVE25519_KEY_SIZE],
const u8 basepoint[CURVE25519_KEY_SIZE])
{
- if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_CURVE25519))
+ if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_CURVE25519) &&
+ (!IS_ENABLED(CONFIG_CRYPTO_CURVE25519_X86) || IS_ENABLED(CONFIG_AS_ADX)))
curve25519_arch(mypublic, secret, basepoint);
else
curve25519_generic(mypublic, secret, basepoint);
@@ -49,7 +50,8 @@ __must_check curve25519_generate_public(u8 pub[CURVE25519_KEY_SIZE],
CURVE25519_KEY_SIZE)))
return false;
- if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_CURVE25519))
+ if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_CURVE25519) &&
+ (!IS_ENABLED(CONFIG_CRYPTO_CURVE25519_X86) || IS_ENABLED(CONFIG_AS_ADX)))
curve25519_base_arch(pub, secret);
else
curve25519_generic(pub, secret, curve25519_base_point);
diff --git a/include/drm/drm_dp_mst_helper.h b/include/drm/drm_dp_mst_helper.h
index bcb39da..41725d8 100644
--- a/include/drm/drm_dp_mst_helper.h
+++ b/include/drm/drm_dp_mst_helper.h
@@ -81,7 +81,7 @@ struct drm_dp_vcpi {
* &drm_dp_mst_topology_mgr.base.lock.
* @num_sdp_stream_sinks: Number of stream sinks. Protected by
* &drm_dp_mst_topology_mgr.base.lock.
- * @available_pbn: Available bandwidth for this port. Protected by
+ * @full_pbn: Max possible bandwidth for this port. Protected by
* &drm_dp_mst_topology_mgr.base.lock.
* @next: link to next port on this branch device
* @aux: i2c aux transport to talk to device connected to this port, protected
@@ -126,7 +126,7 @@ struct drm_dp_mst_port {
u8 dpcd_rev;
u8 num_sdp_streams;
u8 num_sdp_stream_sinks;
- uint16_t available_pbn;
+ uint16_t full_pbn;
struct list_head next;
/**
* @mstb: the branch device connected to this port, if there is one.
diff --git a/include/drm/drm_gem_shmem_helper.h b/include/drm/drm_gem_shmem_helper.h
index e34a7b7..294b293 100644
--- a/include/drm/drm_gem_shmem_helper.h
+++ b/include/drm/drm_gem_shmem_helper.h
@@ -96,6 +96,11 @@ struct drm_gem_shmem_object {
* The address are un-mapped when the count reaches zero.
*/
unsigned int vmap_use_count;
+
+ /**
+ * @map_cached: map object cached (instead of using writecombine).
+ */
+ bool map_cached;
};
#define to_drm_gem_shmem_obj(obj) \
diff --git a/include/dt-bindings/clock/imx8mn-clock.h b/include/dt-bindings/clock/imx8mn-clock.h
index 0f2b842..65ac6eb6 100644
--- a/include/dt-bindings/clock/imx8mn-clock.h
+++ b/include/dt-bindings/clock/imx8mn-clock.h
@@ -122,8 +122,8 @@
#define IMX8MN_CLK_I2C1 105
#define IMX8MN_CLK_I2C2 106
#define IMX8MN_CLK_I2C3 107
-#define IMX8MN_CLK_I2C4 118
-#define IMX8MN_CLK_UART1 119
+#define IMX8MN_CLK_I2C4 108
+#define IMX8MN_CLK_UART1 109
#define IMX8MN_CLK_UART2 110
#define IMX8MN_CLK_UART3 111
#define IMX8MN_CLK_UART4 112
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 853d92c..c1c0f9e 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -441,14 +441,6 @@ void __bio_add_page(struct bio *bio, struct page *page,
unsigned int len, unsigned int off);
int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter);
void bio_release_pages(struct bio *bio, bool mark_dirty);
-struct rq_map_data;
-extern struct bio *bio_map_user_iov(struct request_queue *,
- struct iov_iter *, gfp_t);
-extern void bio_unmap_user(struct bio *);
-extern struct bio *bio_map_kern(struct request_queue *, void *, unsigned int,
- gfp_t);
-extern struct bio *bio_copy_kern(struct request_queue *, void *, unsigned int,
- gfp_t, int);
extern void bio_set_pages_dirty(struct bio *bio);
extern void bio_check_pages_dirty(struct bio *bio);
@@ -463,14 +455,9 @@ extern void bio_copy_data_iter(struct bio *dst, struct bvec_iter *dst_iter,
extern void bio_copy_data(struct bio *dst, struct bio *src);
extern void bio_list_copy_data(struct bio *dst, struct bio *src);
extern void bio_free_pages(struct bio *bio);
-
-extern struct bio *bio_copy_user_iov(struct request_queue *,
- struct rq_map_data *,
- struct iov_iter *,
- gfp_t);
-extern int bio_uncopy_user(struct bio *);
void zero_fill_bio_iter(struct bio *bio, struct bvec_iter iter);
void bio_truncate(struct bio *bio, unsigned new_size);
+void guard_bio_eod(struct bio *bio);
static inline void zero_fill_bio(struct bio *bio)
{
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 11cfd64..f389d7c 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -162,7 +162,10 @@ struct blk_mq_hw_ctx {
struct dentry *sched_debugfs_dir;
#endif
- /** @hctx_list: List of all hardware queues. */
+ /**
+ * @hctx_list: if this hctx is not in use, this is an entry in
+ * q->unused_hctx_list.
+ */
struct list_head hctx_list;
/**
@@ -409,6 +412,8 @@ enum {
<< BLK_MQ_F_ALLOC_POLICY_START_BIT)
struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *);
+struct request_queue *blk_mq_init_queue_data(struct blk_mq_tag_set *set,
+ void *queuedata);
struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
struct request_queue *q,
bool elevator_init);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 053ea4b..32868fb 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -524,7 +524,7 @@ struct request_queue {
unsigned int sg_reserved_size;
int node;
#ifdef CONFIG_BLK_DEV_IO_TRACE
- struct blk_trace *blk_trace;
+ struct blk_trace __rcu *blk_trace;
struct mutex blk_trace_mutex;
#endif
/*
@@ -952,6 +952,10 @@ static inline unsigned int blk_rq_stats_sectors(const struct request *rq)
}
#ifdef CONFIG_BLK_DEV_ZONED
+
+/* Helper to convert BLK_ZONE_ZONE_XXX to its string format XXX */
+const char *blk_zone_cond_str(enum blk_zone_cond zone_cond);
+
static inline unsigned int blk_rq_zone_no(struct request *rq)
{
return blk_queue_zone_no(rq->q, blk_rq_pos(rq));
@@ -1063,7 +1067,6 @@ extern void blk_abort_request(struct request *);
* Access functions for manipulating queue properties
*/
extern void blk_cleanup_queue(struct request_queue *);
-extern void blk_queue_make_request(struct request_queue *, make_request_fn *);
extern void blk_queue_bounce_limit(struct request_queue *, u64);
extern void blk_queue_max_hw_sectors(struct request_queue *, unsigned int);
extern void blk_queue_chunk_sectors(struct request_queue *, unsigned int);
@@ -1140,8 +1143,7 @@ extern void blk_dump_rq_flags(struct request *, char *);
extern long nr_blockdev_pages(void);
bool __must_check blk_get_queue(struct request_queue *);
-struct request_queue *blk_alloc_queue(gfp_t);
-struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id);
+struct request_queue *blk_alloc_queue(make_request_fn make_request, int node_id);
extern void blk_put_queue(struct request_queue *);
extern void blk_set_queue_dying(struct request_queue *);
@@ -1484,17 +1486,7 @@ static inline unsigned int block_size(struct block_device *bdev)
return bdev->bd_block_size;
}
-typedef struct {struct page *v;} Sector;
-
-unsigned char *read_dev_sector(struct block_device *, sector_t, Sector *);
-
-static inline void put_dev_sector(Sector p)
-{
- put_page(p.v);
-}
-
int kblockd_schedule_work(struct work_struct *work);
-int kblockd_schedule_work_on(int cpu, struct work_struct *work);
int kblockd_mod_delayed_work_on(int cpu, struct delayed_work *dwork, unsigned long delay);
#define MODULE_ALIAS_BLOCKDEV(major,minor) \
@@ -1707,6 +1699,7 @@ struct block_device_operations {
void (*swap_slot_free_notify) (struct block_device *, unsigned long);
int (*report_zones)(struct gendisk *, sector_t sector,
unsigned int nr_zones, report_zones_cb cb, void *data);
+ char *(*devnode)(struct gendisk *disk, umode_t *mode);
struct module *owner;
const struct pr_ops *pr_ops;
};
diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h
index 7bb2d8d..3b6ff59 100644
--- a/include/linux/blktrace_api.h
+++ b/include/linux/blktrace_api.h
@@ -51,9 +51,13 @@ void __trace_note_message(struct blk_trace *, struct blkcg *blkcg, const char *f
**/
#define blk_add_cgroup_trace_msg(q, cg, fmt, ...) \
do { \
- struct blk_trace *bt = (q)->blk_trace; \
+ struct blk_trace *bt; \
+ \
+ rcu_read_lock(); \
+ bt = rcu_dereference((q)->blk_trace); \
if (unlikely(bt)) \
__trace_note_message(bt, cg, fmt, ##__VA_ARGS__);\
+ rcu_read_unlock(); \
} while (0)
#define blk_add_trace_msg(q, fmt, ...) \
blk_add_cgroup_trace_msg(q, NULL, fmt, ##__VA_ARGS__)
@@ -61,10 +65,14 @@ void __trace_note_message(struct blk_trace *, struct blkcg *blkcg, const char *f
static inline bool blk_trace_note_message_enabled(struct request_queue *q)
{
- struct blk_trace *bt = q->blk_trace;
- if (likely(!bt))
- return false;
- return bt->act_mask & BLK_TC_NOTIFY;
+ struct blk_trace *bt;
+ bool ret;
+
+ rcu_read_lock();
+ bt = rcu_dereference(q->blk_trace);
+ ret = bt && (bt->act_mask & BLK_TC_NOTIFY);
+ rcu_read_unlock();
+ return ret;
}
extern void blk_add_driver_data(struct request_queue *q, struct request *rq,
diff --git a/include/linux/bootconfig.h b/include/linux/bootconfig.h
index 7e18c93..d11e183 100644
--- a/include/linux/bootconfig.h
+++ b/include/linux/bootconfig.h
@@ -10,6 +10,9 @@
#include <linux/kernel.h>
#include <linux/types.h>
+#define BOOTCONFIG_MAGIC "#BOOTCONFIG\n"
+#define BOOTCONFIG_MAGIC_LEN 12
+
/* XBC tree node */
struct xbc_node {
u16 next;
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 49b1a70..212991f 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -160,6 +160,7 @@ static inline void copy_map_value(struct bpf_map *map, void *dst, void *src)
}
void copy_map_value_locked(struct bpf_map *map, void *dst, void *src,
bool lock_src);
+int bpf_obj_name_cpy(char *dst, const char *src, unsigned int size);
struct bpf_offload_dev;
struct bpf_offloaded_map;
diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h
index c4458dc..76371aa 100644
--- a/include/linux/ceph/messenger.h
+++ b/include/linux/ceph/messenger.h
@@ -175,9 +175,10 @@ struct ceph_msg_data {
#endif /* CONFIG_BLOCK */
struct ceph_bvec_iter bvec_pos;
struct {
- struct page **pages; /* NOT OWNER. */
+ struct page **pages;
size_t length; /* total # bytes */
unsigned int alignment; /* first page */
+ bool own_pages;
};
struct ceph_pagelist *pagelist;
};
@@ -356,8 +357,8 @@ extern void ceph_con_keepalive(struct ceph_connection *con);
extern bool ceph_con_keepalive_expired(struct ceph_connection *con,
unsigned long interval);
-extern void ceph_msg_data_add_pages(struct ceph_msg *msg, struct page **pages,
- size_t length, size_t alignment);
+void ceph_msg_data_add_pages(struct ceph_msg *msg, struct page **pages,
+ size_t length, size_t alignment, bool own_pages);
extern void ceph_msg_data_add_pagelist(struct ceph_msg *msg,
struct ceph_pagelist *pagelist);
#ifdef CONFIG_BLOCK
diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h
index e081b56..5e60197 100644
--- a/include/linux/ceph/osdmap.h
+++ b/include/linux/ceph/osdmap.h
@@ -37,6 +37,9 @@ int ceph_spg_compare(const struct ceph_spg *lhs, const struct ceph_spg *rhs);
#define CEPH_POOL_FLAG_HASHPSPOOL (1ULL << 0) /* hash pg seed and pool id
together */
#define CEPH_POOL_FLAG_FULL (1ULL << 1) /* pool is full */
+#define CEPH_POOL_FLAG_FULL_QUOTA (1ULL << 10) /* pool ran out of quota,
+ will set FULL too */
+#define CEPH_POOL_FLAG_NEARFULL (1ULL << 11) /* pool is nearfull */
struct ceph_pg_pool_info {
struct rb_node node;
@@ -304,5 +307,6 @@ extern struct ceph_pg_pool_info *ceph_pg_pool_by_id(struct ceph_osdmap *map,
extern const char *ceph_pg_pool_name_by_id(struct ceph_osdmap *map, u64 id);
extern int ceph_pg_poolid_by_name(struct ceph_osdmap *map, const char *name);
+u64 ceph_pg_pool_flags(struct ceph_osdmap *map, u64 id);
#endif
diff --git a/include/linux/ceph/rados.h b/include/linux/ceph/rados.h
index 59bdfd4..88ed3c5 100644
--- a/include/linux/ceph/rados.h
+++ b/include/linux/ceph/rados.h
@@ -143,8 +143,10 @@ extern const char *ceph_osd_state_name(int s);
/*
* osd map flag bits
*/
-#define CEPH_OSDMAP_NEARFULL (1<<0) /* sync writes (near ENOSPC) */
-#define CEPH_OSDMAP_FULL (1<<1) /* no data writes (ENOSPC) */
+#define CEPH_OSDMAP_NEARFULL (1<<0) /* sync writes (near ENOSPC),
+ not set since ~luminous */
+#define CEPH_OSDMAP_FULL (1<<1) /* no data writes (ENOSPC),
+ not set since ~luminous */
#define CEPH_OSDMAP_PAUSERD (1<<2) /* pause all reads */
#define CEPH_OSDMAP_PAUSEWR (1<<3) /* pause all writes */
#define CEPH_OSDMAP_PAUSEREC (1<<4) /* pause recovery */
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index d7ddebd..e75d2191 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -62,6 +62,7 @@ struct css_task_iter {
struct list_head *mg_tasks_head;
struct list_head *dying_tasks_head;
+ struct list_head *cur_tasks_head;
struct css_set *cur_cset;
struct css_set *cur_dcset;
struct task_struct *cur_task;
diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h
index 952ac03..bd1ee90 100644
--- a/include/linux/clk-provider.h
+++ b/include/linux/clk-provider.h
@@ -522,9 +522,9 @@ struct clk *clk_register_gate(struct device *dev, const char *name,
* @clk_gate_flags: gate-specific flags for this clock
* @lock: shared register lock for this clock
*/
-#define clk_hw_register_gate_parent_hw(dev, name, parent_name, flags, reg, \
+#define clk_hw_register_gate_parent_hw(dev, name, parent_hw, flags, reg, \
bit_idx, clk_gate_flags, lock) \
- __clk_hw_register_gate((dev), NULL, (name), (parent_name), NULL, \
+ __clk_hw_register_gate((dev), NULL, (name), NULL, (parent_hw), \
NULL, (flags), (reg), (bit_idx), \
(clk_gate_flags), (lock))
/**
@@ -539,10 +539,10 @@ struct clk *clk_register_gate(struct device *dev, const char *name,
* @clk_gate_flags: gate-specific flags for this clock
* @lock: shared register lock for this clock
*/
-#define clk_hw_register_gate_parent_data(dev, name, parent_name, flags, reg, \
+#define clk_hw_register_gate_parent_data(dev, name, parent_data, flags, reg, \
bit_idx, clk_gate_flags, lock) \
- __clk_hw_register_gate((dev), NULL, (name), (parent_name), NULL, \
- NULL, (flags), (reg), (bit_idx), \
+ __clk_hw_register_gate((dev), NULL, (name), NULL, NULL, (parent_data), \
+ (flags), (reg), (bit_idx), \
(clk_gate_flags), (lock))
void clk_unregister_gate(struct clk *clk);
void clk_hw_unregister_gate(struct clk_hw *hw);
diff --git a/include/linux/compat.h b/include/linux/compat.h
index 11083d8..df2475b 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -248,15 +248,6 @@ typedef struct compat_siginfo {
} _sifields;
} compat_siginfo_t;
-/*
- * These functions operate on 32- or 64-bit specs depending on
- * COMPAT_USE_64BIT_TIME, hence the void user pointer arguments.
- */
-extern int compat_get_timespec(struct timespec *, const void __user *);
-extern int compat_put_timespec(const struct timespec *, void __user *);
-extern int compat_get_timeval(struct timeval *, const void __user *);
-extern int compat_put_timeval(const struct timeval *, void __user *);
-
struct compat_iovec {
compat_uptr_t iov_base;
compat_size_t iov_len;
@@ -416,26 +407,6 @@ int copy_siginfo_to_user32(struct compat_siginfo __user *to, const kernel_siginf
int get_compat_sigevent(struct sigevent *event,
const struct compat_sigevent __user *u_event);
-static inline int old_timeval32_compare(struct old_timeval32 *lhs,
- struct old_timeval32 *rhs)
-{
- if (lhs->tv_sec < rhs->tv_sec)
- return -1;
- if (lhs->tv_sec > rhs->tv_sec)
- return 1;
- return lhs->tv_usec - rhs->tv_usec;
-}
-
-static inline int old_timespec32_compare(struct old_timespec32 *lhs,
- struct old_timespec32 *rhs)
-{
- if (lhs->tv_sec < rhs->tv_sec)
- return -1;
- if (lhs->tv_sec > rhs->tv_sec)
- return 1;
- return lhs->tv_nsec - rhs->tv_nsec;
-}
-
extern int get_compat_sigset(sigset_t *set, const compat_sigset_t __user *compat);
/*
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 018dce8..0fb561d 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -201,9 +201,6 @@ static inline bool policy_is_shared(struct cpufreq_policy *policy)
return cpumask_weight(policy->cpus) > 1;
}
-/* /sys/devices/system/cpu/cpufreq: entry point for global variables */
-extern struct kobject *cpufreq_global_kobject;
-
#ifdef CONFIG_CPU_FREQ
unsigned int cpufreq_get(unsigned int cpu);
unsigned int cpufreq_quick_get(unsigned int cpu);
diff --git a/include/linux/dax.h b/include/linux/dax.h
index 9bd8528..328c2db 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -129,11 +129,6 @@ static inline bool generic_fsdax_supported(struct dax_device *dax_dev,
sectors);
}
-static inline struct dax_device *fs_dax_get_by_host(const char *host)
-{
- return dax_get_by_host(host);
-}
-
static inline void fs_put_dax(struct dax_device *dax_dev)
{
put_dax(dax_dev);
@@ -141,7 +136,7 @@ static inline void fs_put_dax(struct dax_device *dax_dev)
struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev);
int dax_writeback_mapping_range(struct address_space *mapping,
- struct block_device *bdev, struct writeback_control *wbc);
+ struct dax_device *dax_dev, struct writeback_control *wbc);
struct page *dax_layout_busy_page(struct address_space *mapping);
dax_entry_t dax_lock_page(struct page *page);
@@ -160,11 +155,6 @@ static inline bool generic_fsdax_supported(struct dax_device *dax_dev,
return false;
}
-static inline struct dax_device *fs_dax_get_by_host(const char *host)
-{
- return NULL;
-}
-
static inline void fs_put_dax(struct dax_device *dax_dev)
{
}
@@ -180,7 +170,7 @@ static inline struct page *dax_layout_busy_page(struct address_space *mapping)
}
static inline int dax_writeback_mapping_range(struct address_space *mapping,
- struct block_device *bdev, struct writeback_control *wbc)
+ struct dax_device *dax_dev, struct writeback_control *wbc)
{
return -EOPNOTSUPP;
}
diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h
index 3d013de..43efcc4 100644
--- a/include/linux/debugfs.h
+++ b/include/linux/debugfs.h
@@ -127,9 +127,9 @@ struct dentry *debugfs_create_blob(const char *name, umode_t mode,
struct dentry *parent,
struct debugfs_blob_wrapper *blob);
-struct dentry *debugfs_create_regset32(const char *name, umode_t mode,
- struct dentry *parent,
- struct debugfs_regset32 *regset);
+void debugfs_create_regset32(const char *name, umode_t mode,
+ struct dentry *parent,
+ struct debugfs_regset32 *regset);
void debugfs_print_regs32(struct seq_file *s, const struct debugfs_reg32 *regs,
int nregs, void __iomem *base, char *prefix);
@@ -304,11 +304,10 @@ static inline struct dentry *debugfs_create_blob(const char *name, umode_t mode,
return ERR_PTR(-ENODEV);
}
-static inline struct dentry *debugfs_create_regset32(const char *name,
- umode_t mode, struct dentry *parent,
- struct debugfs_regset32 *regset)
+static inline void debugfs_create_regset32(const char *name, umode_t mode,
+ struct dentry *parent,
+ struct debugfs_regset32 *regset)
{
- return ERR_PTR(-ENODEV);
}
static inline void debugfs_print_regs32(struct seq_file *s, const struct debugfs_reg32 *regs,
diff --git a/include/linux/device.h b/include/linux/device.h
index 0cd7c64..fa04dfd 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -798,6 +798,17 @@ static inline struct device_node *dev_of_node(struct device *dev)
return dev->of_node;
}
+static inline bool dev_has_sync_state(struct device *dev)
+{
+ if (!dev)
+ return false;
+ if (dev->driver && dev->driver->sync_state)
+ return true;
+ if (dev->bus && dev->bus->sync_state)
+ return true;
+ return false;
+}
+
/*
* High level routines for use by the bus drivers
*/
diff --git a/include/linux/dmar.h b/include/linux/dmar.h
index f64ca27..d7bf029 100644
--- a/include/linux/dmar.h
+++ b/include/linux/dmar.h
@@ -69,19 +69,23 @@ struct dmar_pci_notify_info {
extern struct rw_semaphore dmar_global_lock;
extern struct list_head dmar_drhd_units;
-#define for_each_drhd_unit(drhd) \
- list_for_each_entry_rcu(drhd, &dmar_drhd_units, list)
+#define for_each_drhd_unit(drhd) \
+ list_for_each_entry_rcu(drhd, &dmar_drhd_units, list, \
+ dmar_rcu_check())
#define for_each_active_drhd_unit(drhd) \
- list_for_each_entry_rcu(drhd, &dmar_drhd_units, list) \
+ list_for_each_entry_rcu(drhd, &dmar_drhd_units, list, \
+ dmar_rcu_check()) \
if (drhd->ignored) {} else
#define for_each_active_iommu(i, drhd) \
- list_for_each_entry_rcu(drhd, &dmar_drhd_units, list) \
+ list_for_each_entry_rcu(drhd, &dmar_drhd_units, list, \
+ dmar_rcu_check()) \
if (i=drhd->iommu, drhd->ignored) {} else
#define for_each_iommu(i, drhd) \
- list_for_each_entry_rcu(drhd, &dmar_drhd_units, list) \
+ list_for_each_entry_rcu(drhd, &dmar_drhd_units, list, \
+ dmar_rcu_check()) \
if (i=drhd->iommu, 0) {} else
static inline bool dmar_rcu_check(void)
diff --git a/include/linux/dsa/8021q.h b/include/linux/dsa/8021q.h
index 0aa803c..c620d91 100644
--- a/include/linux/dsa/8021q.h
+++ b/include/linux/dsa/8021q.h
@@ -28,8 +28,6 @@ int dsa_8021q_rx_switch_id(u16 vid);
int dsa_8021q_rx_source_port(u16 vid);
-struct sk_buff *dsa_8021q_remove_header(struct sk_buff *skb);
-
#else
int dsa_port_setup_8021q_tagging(struct dsa_switch *ds, int index,
@@ -64,11 +62,6 @@ int dsa_8021q_rx_source_port(u16 vid)
return 0;
}
-struct sk_buff *dsa_8021q_remove_header(struct sk_buff *skb)
-{
- return NULL;
-}
-
#endif /* IS_ENABLED(CONFIG_NET_DSA_TAG_8021Q) */
#endif /* _NET_DSA_8021Q_H */
diff --git a/include/linux/edac.h b/include/linux/edac.h
index cc31b97..0f20b98 100644
--- a/include/linux/edac.h
+++ b/include/linux/edac.h
@@ -383,6 +383,9 @@ struct dimm_info {
unsigned int csrow, cschannel; /* Points to the old API data */
u16 smbios_handle; /* Handle for SMBIOS type 17 */
+
+ u32 ce_count;
+ u32 ue_count;
};
/**
@@ -442,6 +445,7 @@ struct errcount_attribute_data {
* struct edac_raw_error_desc - Raw error report structure
* @grain: minimum granularity for an error report, in bytes
* @error_count: number of errors of the same type
+ * @type: severity of the error (CE/UE/Fatal)
* @top_layer: top layer of the error (layer[0])
* @mid_layer: middle layer of the error (layer[1])
* @low_layer: low layer of the error (layer[2])
@@ -453,8 +457,6 @@ struct errcount_attribute_data {
* @location: location of the error
* @label: label of the affected DIMM(s)
* @other_detail: other driver-specific detail about the error
- * @enable_per_layer_report: if false, the error affects all layers
- * (typically, a memory controller error)
*/
struct edac_raw_error_desc {
char location[LOCATION_SIZE];
@@ -462,6 +464,7 @@ struct edac_raw_error_desc {
long grain;
u16 error_count;
+ enum hw_event_mc_err_type type;
int top_layer;
int mid_layer;
int low_layer;
@@ -470,7 +473,6 @@ struct edac_raw_error_desc {
unsigned long syndrome;
const char *msg;
const char *other_detail;
- bool enable_per_layer_report;
};
/* MEMORY controller information structure
@@ -560,7 +562,6 @@ struct mem_ctl_info {
*/
u32 ce_noinfo_count, ue_noinfo_count;
u32 ue_mc, ce_mc;
- u32 *ce_per_layer[EDAC_MAX_LAYERS], *ue_per_layer[EDAC_MAX_LAYERS];
struct completion complete;
diff --git a/include/linux/file.h b/include/linux/file.h
index c6c7b24..142d102 100644
--- a/include/linux/file.h
+++ b/include/linux/file.h
@@ -85,6 +85,7 @@ extern int f_dupfd(unsigned int from, struct file *file, unsigned flags);
extern int replace_fd(unsigned fd, struct file *file, unsigned flags);
extern void set_close_on_exec(unsigned int fd, int flag);
extern bool get_close_on_exec(unsigned int fd);
+extern int __get_unused_fd_flags(unsigned flags, unsigned long nofile);
extern int get_unused_fd_flags(unsigned flags);
extern void put_unused_fd(unsigned int fd);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 3cd4fe6..593e911 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -698,6 +698,7 @@ struct inode {
struct rcu_head i_rcu;
};
atomic64_t i_version;
+ atomic64_t i_sequence; /* see futex */
atomic_t i_count;
atomic_t i_dio_count;
atomic_t i_writecount;
@@ -2699,7 +2700,6 @@ static inline void unregister_chrdev(unsigned int major, const char *name)
#ifdef CONFIG_BLOCK
#define BLKDEV_MAJOR_MAX 512
-extern const char *__bdevname(dev_t, char *buffer);
extern const char *bdevname(struct block_device *bdev, char *buffer);
extern struct block_device *lookup_bdev(const char *);
extern void blkdev_show(struct seq_file *,off_t);
diff --git a/include/linux/futex.h b/include/linux/futex.h
index 5cc3fed..b70df27d 100644
--- a/include/linux/futex.h
+++ b/include/linux/futex.h
@@ -31,23 +31,26 @@ struct task_struct;
union futex_key {
struct {
+ u64 i_seq;
unsigned long pgoff;
- struct inode *inode;
- int offset;
+ unsigned int offset;
} shared;
struct {
+ union {
+ struct mm_struct *mm;
+ u64 __tmp;
+ };
unsigned long address;
- struct mm_struct *mm;
- int offset;
+ unsigned int offset;
} private;
struct {
+ u64 ptr;
unsigned long word;
- void *ptr;
- int offset;
+ unsigned int offset;
} both;
};
-#define FUTEX_KEY_INIT (union futex_key) { .both = { .ptr = NULL } }
+#define FUTEX_KEY_INIT (union futex_key) { .both = { .ptr = 0ULL } }
#ifdef CONFIG_FUTEX
enum {
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 6fbe585..9b3fffd 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -27,39 +27,8 @@
#define part_to_dev(part) (&((part)->__dev))
extern struct device_type part_type;
-extern struct kobject *block_depr;
extern struct class block_class;
-enum {
-/* These three have identical behaviour; use the second one if DOS FDISK gets
- confused about extended/logical partitions starting past cylinder 1023. */
- DOS_EXTENDED_PARTITION = 5,
- LINUX_EXTENDED_PARTITION = 0x85,
- WIN98_EXTENDED_PARTITION = 0x0f,
-
- SUN_WHOLE_DISK = DOS_EXTENDED_PARTITION,
-
- LINUX_SWAP_PARTITION = 0x82,
- LINUX_DATA_PARTITION = 0x83,
- LINUX_LVM_PARTITION = 0x8e,
- LINUX_RAID_PARTITION = 0xfd, /* autodetect RAID partition */
-
- SOLARIS_X86_PARTITION = LINUX_SWAP_PARTITION,
- NEW_SOLARIS_X86_PARTITION = 0xbf,
-
- DM6_AUX1PARTITION = 0x51, /* no DDO: use xlated geom */
- DM6_AUX3PARTITION = 0x53, /* no DDO: use xlated geom */
- DM6_PARTITION = 0x54, /* has DDO: use xlated geom & offset */
- EZD_PARTITION = 0x55, /* EZ-DRIVE */
-
- FREEBSD_PARTITION = 0xa5, /* FreeBSD Partition ID */
- OPENBSD_PARTITION = 0xa6, /* OpenBSD Partition ID */
- NETBSD_PARTITION = 0xa9, /* NetBSD Partition ID */
- BSDI_PARTITION = 0xb7, /* BSDI Partition ID */
- MINIX_PARTITION = 0x81, /* Minix Partition ID */
- UNIXWARE_PARTITION = 0x63, /* Same as GNU_HURD and SCO Unix */
-};
-
#define DISK_MAX_PARTS 256
#define DISK_NAME_LEN 32
@@ -70,26 +39,12 @@ enum {
#include <linux/fs.h>
#include <linux/workqueue.h>
-struct partition {
- unsigned char boot_ind; /* 0x80 - active */
- unsigned char head; /* starting head */
- unsigned char sector; /* starting sector */
- unsigned char cyl; /* starting cylinder */
- unsigned char sys_ind; /* What partition type */
- unsigned char end_head; /* end head */
- unsigned char end_sector; /* end sector */
- unsigned char end_cyl; /* end cylinder */
- __le32 start_sect; /* starting sector counting from 0 */
- __le32 nr_sects; /* nr of sectors in partition */
-} __attribute__((packed));
-
struct disk_stats {
u64 nsecs[NR_STAT_GROUPS];
unsigned long sectors[NR_STAT_GROUPS];
unsigned long ios[NR_STAT_GROUPS];
unsigned long merges[NR_STAT_GROUPS];
unsigned long io_ticks;
- unsigned long time_in_queue;
local_t in_flight[2];
};
@@ -133,17 +88,64 @@ struct hd_struct {
struct rcu_work rcu_work;
};
-#define GENHD_FL_REMOVABLE 1
-/* 2 is unused */
-#define GENHD_FL_MEDIA_CHANGE_NOTIFY 4
-#define GENHD_FL_CD 8
-#define GENHD_FL_UP 16
-#define GENHD_FL_SUPPRESS_PARTITION_INFO 32
-#define GENHD_FL_EXT_DEVT 64 /* allow extended devt */
-#define GENHD_FL_NATIVE_CAPACITY 128
-#define GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE 256
-#define GENHD_FL_NO_PART_SCAN 512
-#define GENHD_FL_HIDDEN 1024
+/**
+ * DOC: genhd capability flags
+ *
+ * ``GENHD_FL_REMOVABLE`` (0x0001): indicates that the block device
+ * gives access to removable media.
+ * When set, the device remains present even when media is not
+ * inserted.
+ * Must not be set for devices which are removed entirely when the
+ * media is removed.
+ *
+ * ``GENHD_FL_CD`` (0x0008): the block device is a CD-ROM-style
+ * device.
+ * Affects responses to the ``CDROM_GET_CAPABILITY`` ioctl.
+ *
+ * ``GENHD_FL_UP`` (0x0010): indicates that the block device is "up",
+ * with a similar meaning to network interfaces.
+ *
+ * ``GENHD_FL_SUPPRESS_PARTITION_INFO`` (0x0020): don't include
+ * partition information in ``/proc/partitions`` or in the output of
+ * printk_all_partitions().
+ * Used for the null block device and some MMC devices.
+ *
+ * ``GENHD_FL_EXT_DEVT`` (0x0040): the driver supports extended
+ * dynamic ``dev_t``, i.e. it wants extended device numbers
+ * (``BLOCK_EXT_MAJOR``).
+ * This affects the maximum number of partitions.
+ *
+ * ``GENHD_FL_NATIVE_CAPACITY`` (0x0080): based on information in the
+ * partition table, the device's capacity has been extended to its
+ * native capacity; i.e. the device has hidden capacity used by one
+ * of the partitions (this is a flag used so that native capacity is
+ * only ever unlocked once).
+ *
+ * ``GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE`` (0x0100): event polling is
+ * blocked whenever a writer holds an exclusive lock.
+ *
+ * ``GENHD_FL_NO_PART_SCAN`` (0x0200): partition scanning is disabled.
+ * Used for loop devices in their default settings and some MMC
+ * devices.
+ *
+ * ``GENHD_FL_HIDDEN`` (0x0400): the block device is hidden; it
+ * doesn't produce events, doesn't appear in sysfs, and doesn't have
+ * an associated ``bdev``.
+ * Implies ``GENHD_FL_SUPPRESS_PARTITION_INFO`` and
+ * ``GENHD_FL_NO_PART_SCAN``.
+ * Used for multipath devices.
+ */
+#define GENHD_FL_REMOVABLE 0x0001
+/* 2 is unused (used to be GENHD_FL_DRIVERFS) */
+/* 4 is unused (used to be GENHD_FL_MEDIA_CHANGE_NOTIFY) */
+#define GENHD_FL_CD 0x0008
+#define GENHD_FL_UP 0x0010
+#define GENHD_FL_SUPPRESS_PARTITION_INFO 0x0020
+#define GENHD_FL_EXT_DEVT 0x0040
+#define GENHD_FL_NATIVE_CAPACITY 0x0080
+#define GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE 0x0100
+#define GENHD_FL_NO_PART_SCAN 0x0200
+#define GENHD_FL_HIDDEN 0x0400
enum {
DISK_EVENT_MEDIA_CHANGE = 1 << 0, /* media changed */
@@ -189,7 +191,6 @@ struct gendisk {
* disks that can't be partitioned. */
char disk_name[DISK_NAME_LEN]; /* name of major driver */
- char *(*devnode)(struct gendisk *gd, umode_t *mode);
unsigned short events; /* supported events */
unsigned short event_flags; /* flags related to event processing */
@@ -245,18 +246,6 @@ static inline bool disk_part_scan_enabled(struct gendisk *disk)
!(disk->flags & GENHD_FL_NO_PART_SCAN);
}
-static inline bool disk_has_partitions(struct gendisk *disk)
-{
- bool ret = false;
-
- rcu_read_lock();
- if (rcu_dereference(disk->part_tbl)->len > 1)
- ret = true;
- rcu_read_unlock();
-
- return ret;
-}
-
static inline dev_t disk_devt(struct gendisk *disk)
{
return MKDEV(disk->major, disk->first_minor);
@@ -295,143 +284,7 @@ extern void disk_part_iter_init(struct disk_part_iter *piter,
struct gendisk *disk, unsigned int flags);
extern struct hd_struct *disk_part_iter_next(struct disk_part_iter *piter);
extern void disk_part_iter_exit(struct disk_part_iter *piter);
-
-extern struct hd_struct *disk_map_sector_rcu(struct gendisk *disk,
- sector_t sector);
-
-/*
- * Macros to operate on percpu disk statistics:
- *
- * {disk|part|all}_stat_{add|sub|inc|dec}() modify the stat counters
- * and should be called between disk_stat_lock() and
- * disk_stat_unlock().
- *
- * part_stat_read() can be called at any time.
- *
- * part_stat_{add|set_all}() and {init|free}_part_stats are for
- * internal use only.
- */
-#ifdef CONFIG_SMP
-#define part_stat_lock() ({ rcu_read_lock(); get_cpu(); })
-#define part_stat_unlock() do { put_cpu(); rcu_read_unlock(); } while (0)
-
-#define part_stat_get_cpu(part, field, cpu) \
- (per_cpu_ptr((part)->dkstats, (cpu))->field)
-
-#define part_stat_get(part, field) \
- part_stat_get_cpu(part, field, smp_processor_id())
-
-#define part_stat_read(part, field) \
-({ \
- typeof((part)->dkstats->field) res = 0; \
- unsigned int _cpu; \
- for_each_possible_cpu(_cpu) \
- res += per_cpu_ptr((part)->dkstats, _cpu)->field; \
- res; \
-})
-
-static inline void part_stat_set_all(struct hd_struct *part, int value)
-{
- int i;
-
- for_each_possible_cpu(i)
- memset(per_cpu_ptr(part->dkstats, i), value,
- sizeof(struct disk_stats));
-}
-
-static inline int init_part_stats(struct hd_struct *part)
-{
- part->dkstats = alloc_percpu(struct disk_stats);
- if (!part->dkstats)
- return 0;
- return 1;
-}
-
-static inline void free_part_stats(struct hd_struct *part)
-{
- free_percpu(part->dkstats);
-}
-
-#else /* !CONFIG_SMP */
-#define part_stat_lock() ({ rcu_read_lock(); 0; })
-#define part_stat_unlock() rcu_read_unlock()
-
-#define part_stat_get(part, field) ((part)->dkstats.field)
-#define part_stat_get_cpu(part, field, cpu) part_stat_get(part, field)
-#define part_stat_read(part, field) part_stat_get(part, field)
-
-static inline void part_stat_set_all(struct hd_struct *part, int value)
-{
- memset(&part->dkstats, value, sizeof(struct disk_stats));
-}
-
-static inline int init_part_stats(struct hd_struct *part)
-{
- return 1;
-}
-
-static inline void free_part_stats(struct hd_struct *part)
-{
-}
-
-#endif /* CONFIG_SMP */
-
-#define part_stat_read_msecs(part, which) \
- div_u64(part_stat_read(part, nsecs[which]), NSEC_PER_MSEC)
-
-#define part_stat_read_accum(part, field) \
- (part_stat_read(part, field[STAT_READ]) + \
- part_stat_read(part, field[STAT_WRITE]) + \
- part_stat_read(part, field[STAT_DISCARD]))
-
-#define __part_stat_add(part, field, addnd) \
- (part_stat_get(part, field) += (addnd))
-
-#define part_stat_add(part, field, addnd) do { \
- __part_stat_add((part), field, addnd); \
- if ((part)->partno) \
- __part_stat_add(&part_to_disk((part))->part0, \
- field, addnd); \
-} while (0)
-
-#define part_stat_dec(gendiskp, field) \
- part_stat_add(gendiskp, field, -1)
-#define part_stat_inc(gendiskp, field) \
- part_stat_add(gendiskp, field, 1)
-#define part_stat_sub(gendiskp, field, subnd) \
- part_stat_add(gendiskp, field, -subnd)
-
-#define part_stat_local_dec(gendiskp, field) \
- local_dec(&(part_stat_get(gendiskp, field)))
-#define part_stat_local_inc(gendiskp, field) \
- local_inc(&(part_stat_get(gendiskp, field)))
-#define part_stat_local_read(gendiskp, field) \
- local_read(&(part_stat_get(gendiskp, field)))
-#define part_stat_local_read_cpu(gendiskp, field, cpu) \
- local_read(&(part_stat_get_cpu(gendiskp, field, cpu)))
-
-unsigned int part_in_flight(struct request_queue *q, struct hd_struct *part);
-void part_in_flight_rw(struct request_queue *q, struct hd_struct *part,
- unsigned int inflight[2]);
-void part_dec_in_flight(struct request_queue *q, struct hd_struct *part,
- int rw);
-void part_inc_in_flight(struct request_queue *q, struct hd_struct *part,
- int rw);
-
-static inline struct partition_meta_info *alloc_part_info(struct gendisk *disk)
-{
- if (disk)
- return kzalloc_node(sizeof(struct partition_meta_info),
- GFP_KERNEL, disk->node_id);
- return kzalloc(sizeof(struct partition_meta_info), GFP_KERNEL);
-}
-
-static inline void free_part_info(struct hd_struct *part)
-{
- kfree(part->info);
-}
-
-void update_io_ticks(struct hd_struct *part, unsigned long now);
+extern bool disk_has_partitions(struct gendisk *disk);
/* block/genhd.c */
extern void device_add_disk(struct device *parent, struct gendisk *disk,
@@ -461,6 +314,8 @@ static inline int get_disk_ro(struct gendisk *disk)
extern void disk_block_events(struct gendisk *disk);
extern void disk_unblock_events(struct gendisk *disk);
extern void disk_flush_events(struct gendisk *disk, unsigned int mask);
+extern void set_capacity_revalidate_and_notify(struct gendisk *disk,
+ sector_t size, bool revalidate);
extern unsigned int disk_clear_events(struct gendisk *disk, unsigned int mask);
/* drivers/char/random.c */
@@ -480,170 +335,11 @@ static inline void set_capacity(struct gendisk *disk, sector_t size)
disk->part0.nr_sects = size;
}
-#ifdef CONFIG_SOLARIS_X86_PARTITION
-
-#define SOLARIS_X86_NUMSLICE 16
-#define SOLARIS_X86_VTOC_SANE (0x600DDEEEUL)
-
-struct solaris_x86_slice {
- __le16 s_tag; /* ID tag of partition */
- __le16 s_flag; /* permission flags */
- __le32 s_start; /* start sector no of partition */
- __le32 s_size; /* # of blocks in partition */
-};
-
-struct solaris_x86_vtoc {
- unsigned int v_bootinfo[3]; /* info needed by mboot (unsupported) */
- __le32 v_sanity; /* to verify vtoc sanity */
- __le32 v_version; /* layout version */
- char v_volume[8]; /* volume name */
- __le16 v_sectorsz; /* sector size in bytes */
- __le16 v_nparts; /* number of partitions */
- unsigned int v_reserved[10]; /* free space */
- struct solaris_x86_slice
- v_slice[SOLARIS_X86_NUMSLICE]; /* slice headers */
- unsigned int timestamp[SOLARIS_X86_NUMSLICE]; /* timestamp (unsupported) */
- char v_asciilabel[128]; /* for compatibility */
-};
-
-#endif /* CONFIG_SOLARIS_X86_PARTITION */
-
-#ifdef CONFIG_BSD_DISKLABEL
-/*
- * BSD disklabel support by Yossi Gottlieb <yogo@math.tau.ac.il>
- * updated by Marc Espie <Marc.Espie@openbsd.org>
- */
-
-/* check against BSD src/sys/sys/disklabel.h for consistency */
-
-#define BSD_DISKMAGIC (0x82564557UL) /* The disk magic number */
-#define BSD_MAXPARTITIONS 16
-#define OPENBSD_MAXPARTITIONS 16
-#define BSD_FS_UNUSED 0 /* disklabel unused partition entry ID */
-struct bsd_disklabel {
- __le32 d_magic; /* the magic number */
- __s16 d_type; /* drive type */
- __s16 d_subtype; /* controller/d_type specific */
- char d_typename[16]; /* type name, e.g. "eagle" */
- char d_packname[16]; /* pack identifier */
- __u32 d_secsize; /* # of bytes per sector */
- __u32 d_nsectors; /* # of data sectors per track */
- __u32 d_ntracks; /* # of tracks per cylinder */
- __u32 d_ncylinders; /* # of data cylinders per unit */
- __u32 d_secpercyl; /* # of data sectors per cylinder */
- __u32 d_secperunit; /* # of data sectors per unit */
- __u16 d_sparespertrack; /* # of spare sectors per track */
- __u16 d_sparespercyl; /* # of spare sectors per cylinder */
- __u32 d_acylinders; /* # of alt. cylinders per unit */
- __u16 d_rpm; /* rotational speed */
- __u16 d_interleave; /* hardware sector interleave */
- __u16 d_trackskew; /* sector 0 skew, per track */
- __u16 d_cylskew; /* sector 0 skew, per cylinder */
- __u32 d_headswitch; /* head switch time, usec */
- __u32 d_trkseek; /* track-to-track seek, usec */
- __u32 d_flags; /* generic flags */
-#define NDDATA 5
- __u32 d_drivedata[NDDATA]; /* drive-type specific information */
-#define NSPARE 5
- __u32 d_spare[NSPARE]; /* reserved for future use */
- __le32 d_magic2; /* the magic number (again) */
- __le16 d_checksum; /* xor of data incl. partitions */
-
- /* filesystem and partition information: */
- __le16 d_npartitions; /* number of partitions in following */
- __le32 d_bbsize; /* size of boot area at sn0, bytes */
- __le32 d_sbsize; /* max size of fs superblock, bytes */
- struct bsd_partition { /* the partition table */
- __le32 p_size; /* number of sectors in partition */
- __le32 p_offset; /* starting sector */
- __le32 p_fsize; /* filesystem basic fragment size */
- __u8 p_fstype; /* filesystem type, see below */
- __u8 p_frag; /* filesystem fragments per block */
- __le16 p_cpg; /* filesystem cylinders per group */
- } d_partitions[BSD_MAXPARTITIONS]; /* actually may be more */
-};
-
-#endif /* CONFIG_BSD_DISKLABEL */
-
-#ifdef CONFIG_UNIXWARE_DISKLABEL
-/*
- * Unixware slices support by Andrzej Krzysztofowicz <ankry@mif.pg.gda.pl>
- * and Krzysztof G. Baranowski <kgb@knm.org.pl>
- */
-
-#define UNIXWARE_DISKMAGIC (0xCA5E600DUL) /* The disk magic number */
-#define UNIXWARE_DISKMAGIC2 (0x600DDEEEUL) /* The slice table magic nr */
-#define UNIXWARE_NUMSLICE 16
-#define UNIXWARE_FS_UNUSED 0 /* Unused slice entry ID */
-
-struct unixware_slice {
- __le16 s_label; /* label */
- __le16 s_flags; /* permission flags */
- __le32 start_sect; /* starting sector */
- __le32 nr_sects; /* number of sectors in slice */
-};
-
-struct unixware_disklabel {
- __le32 d_type; /* drive type */
- __le32 d_magic; /* the magic number */
- __le32 d_version; /* version number */
- char d_serial[12]; /* serial number of the device */
- __le32 d_ncylinders; /* # of data cylinders per device */
- __le32 d_ntracks; /* # of tracks per cylinder */
- __le32 d_nsectors; /* # of data sectors per track */
- __le32 d_secsize; /* # of bytes per sector */
- __le32 d_part_start; /* # of first sector of this partition */
- __le32 d_unknown1[12]; /* ? */
- __le32 d_alt_tbl; /* byte offset of alternate table */
- __le32 d_alt_len; /* byte length of alternate table */
- __le32 d_phys_cyl; /* # of physical cylinders per device */
- __le32 d_phys_trk; /* # of physical tracks per cylinder */
- __le32 d_phys_sec; /* # of physical sectors per track */
- __le32 d_phys_bytes; /* # of physical bytes per sector */
- __le32 d_unknown2; /* ? */
- __le32 d_unknown3; /* ? */
- __le32 d_pad[8]; /* pad */
-
- struct unixware_vtoc {
- __le32 v_magic; /* the magic number */
- __le32 v_version; /* version number */
- char v_name[8]; /* volume name */
- __le16 v_nslices; /* # of slices */
- __le16 v_unknown1; /* ? */
- __le32 v_reserved[10]; /* reserved */
- struct unixware_slice
- v_slice[UNIXWARE_NUMSLICE]; /* slice headers */
- } vtoc;
-
-}; /* 408 */
-
-#endif /* CONFIG_UNIXWARE_DISKLABEL */
-
-#ifdef CONFIG_MINIX_SUBPARTITION
-# define MINIX_NR_SUBPARTITIONS 4
-#endif /* CONFIG_MINIX_SUBPARTITION */
-
-#define ADDPART_FLAG_NONE 0
-#define ADDPART_FLAG_RAID 1
-#define ADDPART_FLAG_WHOLEDISK 2
-
-extern int blk_alloc_devt(struct hd_struct *part, dev_t *devt);
-extern void blk_free_devt(dev_t devt);
-extern void blk_invalidate_devt(dev_t devt);
extern dev_t blk_lookup_devt(const char *name, int partno);
-extern char *disk_name (struct gendisk *hd, int partno, char *buf);
int bdev_disk_changed(struct block_device *bdev, bool invalidate);
int blk_add_partitions(struct gendisk *disk, struct block_device *bdev);
int blk_drop_partitions(struct gendisk *disk, struct block_device *bdev);
-extern int disk_expand_part_tbl(struct gendisk *disk, int target);
-extern struct hd_struct * __must_check add_partition(struct gendisk *disk,
- int partno, sector_t start,
- sector_t len, int flags,
- struct partition_meta_info
- *info);
-extern void __delete_partition(struct percpu_ref *);
-extern void delete_partition(struct gendisk *, int);
extern void printk_all_partitions(void);
extern struct gendisk *__alloc_disk_node(int minors, int node_id);
@@ -657,20 +353,6 @@ extern void blk_register_region(dev_t devt, unsigned long range,
void *data);
extern void blk_unregister_region(dev_t devt, unsigned long range);
-extern ssize_t part_size_show(struct device *dev,
- struct device_attribute *attr, char *buf);
-extern ssize_t part_stat_show(struct device *dev,
- struct device_attribute *attr, char *buf);
-extern ssize_t part_inflight_show(struct device *dev,
- struct device_attribute *attr, char *buf);
-#ifdef CONFIG_FAIL_MAKE_REQUEST
-extern ssize_t part_fail_show(struct device *dev,
- struct device_attribute *attr, char *buf);
-extern ssize_t part_fail_store(struct device *dev,
- struct device_attribute *attr,
- const char *buf, size_t count);
-#endif /* CONFIG_FAIL_MAKE_REQUEST */
-
#define alloc_disk_node(minors, node_id) \
({ \
static struct lock_class_key __key; \
@@ -689,100 +371,6 @@ extern ssize_t part_fail_store(struct device *dev,
#define alloc_disk(minors) alloc_disk_node(minors, NUMA_NO_NODE)
-static inline int hd_ref_init(struct hd_struct *part)
-{
- if (percpu_ref_init(&part->ref, __delete_partition, 0,
- GFP_KERNEL))
- return -ENOMEM;
- return 0;
-}
-
-static inline void hd_struct_get(struct hd_struct *part)
-{
- percpu_ref_get(&part->ref);
-}
-
-static inline int hd_struct_try_get(struct hd_struct *part)
-{
- return percpu_ref_tryget_live(&part->ref);
-}
-
-static inline void hd_struct_put(struct hd_struct *part)
-{
- percpu_ref_put(&part->ref);
-}
-
-static inline void hd_struct_kill(struct hd_struct *part)
-{
- percpu_ref_kill(&part->ref);
-}
-
-static inline void hd_free_part(struct hd_struct *part)
-{
- free_part_stats(part);
- free_part_info(part);
- percpu_ref_exit(&part->ref);
-}
-
-/*
- * Any access of part->nr_sects which is not protected by partition
- * bd_mutex or gendisk bdev bd_mutex, should be done using this
- * accessor function.
- *
- * Code written along the lines of i_size_read() and i_size_write().
- * CONFIG_PREEMPTION case optimizes the case of UP kernel with preemption
- * on.
- */
-static inline sector_t part_nr_sects_read(struct hd_struct *part)
-{
-#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
- sector_t nr_sects;
- unsigned seq;
- do {
- seq = read_seqcount_begin(&part->nr_sects_seq);
- nr_sects = part->nr_sects;
- } while (read_seqcount_retry(&part->nr_sects_seq, seq));
- return nr_sects;
-#elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPTION)
- sector_t nr_sects;
-
- preempt_disable();
- nr_sects = part->nr_sects;
- preempt_enable();
- return nr_sects;
-#else
- return part->nr_sects;
-#endif
-}
-
-/*
- * Should be called with mutex lock held (typically bd_mutex) of partition
- * to provide mutual exlusion among writers otherwise seqcount might be
- * left in wrong state leaving the readers spinning infinitely.
- */
-static inline void part_nr_sects_write(struct hd_struct *part, sector_t size)
-{
-#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
- write_seqcount_begin(&part->nr_sects_seq);
- part->nr_sects = size;
- write_seqcount_end(&part->nr_sects_seq);
-#elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPTION)
- preempt_disable();
- part->nr_sects = size;
- preempt_enable();
-#else
- part->nr_sects = size;
-#endif
-}
-
-#if defined(CONFIG_BLK_DEV_INTEGRITY)
-extern void blk_integrity_add(struct gendisk *);
-extern void blk_integrity_del(struct gendisk *);
-#else /* CONFIG_BLK_DEV_INTEGRITY */
-static inline void blk_integrity_add(struct gendisk *disk) { }
-static inline void blk_integrity_del(struct gendisk *disk) { }
-#endif /* CONFIG_BLK_DEV_INTEGRITY */
-
#else /* CONFIG_BLOCK */
static inline void printk_all_partitions(void) { }
diff --git a/include/linux/hid.h b/include/linux/hid.h
index cd41f20..875f711 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -492,7 +492,7 @@ struct hid_report_enum {
};
#define HID_MIN_BUFFER_SIZE 64 /* make sure there is at least a packet size of space */
-#define HID_MAX_BUFFER_SIZE 4096 /* 4kb */
+#define HID_MAX_BUFFER_SIZE 8192 /* 8kb */
#define HID_CONTROL_FIFO_SIZE 256 /* to init devices with >100 reports */
#define HID_OUTPUT_FIFO_SIZE 64
diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index f834687..f6b9421 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -506,7 +506,7 @@ i2c_register_board_info(int busnum, struct i2c_board_info const *info,
* @smbus_xfer_atomic: same as @smbus_xfer. Yet, only using atomic context
* so e.g. PMICs can be accessed very late before shutdown. Optional.
* @functionality: Return the flags that this algorithm/adapter pair supports
- * from the I2C_FUNC_* flags.
+ * from the ``I2C_FUNC_*`` flags.
* @reg_slave: Register given client to I2C slave mode of this adapter
* @unreg_slave: Unregister given client from I2C slave mode of this adapter
*
@@ -515,7 +515,7 @@ i2c_register_board_info(int busnum, struct i2c_board_info const *info,
* be addressed using the same bus algorithms - i.e. bit-banging or the PCF8584
* to name two of the most common.
*
- * The return codes from the @master_xfer{_atomic} fields should indicate the
+ * The return codes from the ``master_xfer{_atomic}`` fields should indicate the
* type of error code that occurred during the transfer, as documented in the
* Kernel Documentation file Documentation/i2c/fault-codes.rst.
*/
diff --git a/include/linux/icmpv6.h b/include/linux/icmpv6.h
index ef1cbb5..33d3796 100644
--- a/include/linux/icmpv6.h
+++ b/include/linux/icmpv6.h
@@ -22,12 +22,22 @@ extern int inet6_unregister_icmp_sender(ip6_icmp_send_t *fn);
int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs, int type,
unsigned int data_len);
+#if IS_ENABLED(CONFIG_NF_NAT)
+void icmpv6_ndo_send(struct sk_buff *skb_in, u8 type, u8 code, __u32 info);
+#else
+#define icmpv6_ndo_send icmpv6_send
+#endif
+
#else
static inline void icmpv6_send(struct sk_buff *skb,
u8 type, u8 code, __u32 info)
{
+}
+static inline void icmpv6_ndo_send(struct sk_buff *skb,
+ u8 type, u8 code, __u32 info)
+{
}
#endif
diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 7d3f2ce..73c66a3 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -2102,14 +2102,14 @@ ieee80211_he_spr_size(const u8 *he_spr_ie)
{
struct ieee80211_he_spr *he_spr = (void *)he_spr_ie;
u8 spr_len = sizeof(struct ieee80211_he_spr);
- u32 he_spr_params;
+ u8 he_spr_params;
/* Make sure the input is not NULL */
if (!he_spr_ie)
return 0;
/* Calc required length */
- he_spr_params = le32_to_cpu(he_spr->he_sr_control);
+ he_spr_params = he_spr->he_sr_control;
if (he_spr_params & IEEE80211_HE_SPR_NON_SRG_OFFSET_PRESENT)
spr_len++;
if (he_spr_params & IEEE80211_HE_SPR_SRG_INFORMATION_PRESENT)
diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h
index 39faaaf..c91cf2d 100644
--- a/include/linux/inet_diag.h
+++ b/include/linux/inet_diag.h
@@ -2,15 +2,10 @@
#ifndef _INET_DIAG_H_
#define _INET_DIAG_H_ 1
+#include <net/netlink.h>
#include <uapi/linux/inet_diag.h>
-struct net;
-struct sock;
struct inet_hashinfo;
-struct nlattr;
-struct nlmsghdr;
-struct sk_buff;
-struct netlink_callback;
struct inet_diag_handler {
void (*dump)(struct sk_buff *skb,
@@ -62,6 +57,17 @@ int inet_diag_bc_sk(const struct nlattr *_bc, struct sock *sk);
void inet_diag_msg_common_fill(struct inet_diag_msg *r, struct sock *sk);
+static inline size_t inet_diag_msg_attrs_size(void)
+{
+ return nla_total_size(1) /* INET_DIAG_SHUTDOWN */
+ + nla_total_size(1) /* INET_DIAG_TOS */
+#if IS_ENABLED(CONFIG_IPV6)
+ + nla_total_size(1) /* INET_DIAG_TCLASS */
+ + nla_total_size(1) /* INET_DIAG_SKV6ONLY */
+#endif
+ + nla_total_size(4) /* INET_DIAG_MARK */
+ + nla_total_size(4); /* INET_DIAG_CLASS_ID */
+}
int inet_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb,
struct inet_diag_msg *r, int ext,
struct user_namespace *user_ns, bool net_admin);
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index 4a16b39..980234a 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -123,6 +123,8 @@
#define dmar_readq(a) readq(a)
#define dmar_writeq(a,v) writeq(v,a)
+#define dmar_readl(a) readl(a)
+#define dmar_writel(a, v) writel(v, a)
#define DMAR_VER_MAJOR(v) (((v) & 0xf0) >> 4)
#define DMAR_VER_MINOR(v) ((v) & 0x0f)
diff --git a/include/linux/intel-svm.h b/include/linux/intel-svm.h
index 94f047a..d7c403d 100644
--- a/include/linux/intel-svm.h
+++ b/include/linux/intel-svm.h
@@ -122,7 +122,7 @@ static inline int intel_svm_unbind_mm(struct device *dev, int pasid)
BUG();
}
-static int intel_svm_is_pasid_valid(struct device *dev, int pasid)
+static inline int intel_svm_is_pasid_valid(struct device *dev, int pasid)
{
return -EINVAL;
}
diff --git a/include/linux/io-mapping.h b/include/linux/io-mapping.h
index 837058b..b336622 100644
--- a/include/linux/io-mapping.h
+++ b/include/linux/io-mapping.h
@@ -16,7 +16,7 @@
* The io_mapping mechanism provides an abstraction for mapping
* individual pages from an io device to the CPU in an efficient fashion.
*
- * See Documentation/io-mapping.txt
+ * See Documentation/driver-api/io-mapping.rst
*/
struct io_mapping {
diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h
index dba15ca..1dcd919 100644
--- a/include/linux/iocontext.h
+++ b/include/linux/iocontext.h
@@ -8,6 +8,7 @@
enum {
ICQ_EXITED = 1 << 2,
+ ICQ_DESTROYED = 1 << 3,
};
/*
diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index b2d47571..8d062e8 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -192,7 +192,7 @@ enum {
IRQ_DOMAIN_FLAG_HIERARCHY = (1 << 0),
/* Irq domain name was allocated in __irq_domain_add() */
- IRQ_DOMAIN_NAME_ALLOCATED = (1 << 6),
+ IRQ_DOMAIN_NAME_ALLOCATED = (1 << 1),
/* Irq domain is an IPI domain with virq per cpu */
IRQ_DOMAIN_FLAG_IPI_PER_CPU = (1 << 2),
diff --git a/include/linux/ktime.h b/include/linux/ktime.h
index b2bb44f..d1fb051 100644
--- a/include/linux/ktime.h
+++ b/include/linux/ktime.h
@@ -66,33 +66,15 @@ static inline ktime_t ktime_set(const s64 secs, const unsigned long nsecs)
*/
#define ktime_sub_ns(kt, nsval) ((kt) - (nsval))
-/* convert a timespec to ktime_t format: */
-static inline ktime_t timespec_to_ktime(struct timespec ts)
-{
- return ktime_set(ts.tv_sec, ts.tv_nsec);
-}
-
/* convert a timespec64 to ktime_t format: */
static inline ktime_t timespec64_to_ktime(struct timespec64 ts)
{
return ktime_set(ts.tv_sec, ts.tv_nsec);
}
-/* convert a timeval to ktime_t format: */
-static inline ktime_t timeval_to_ktime(struct timeval tv)
-{
- return ktime_set(tv.tv_sec, tv.tv_usec * NSEC_PER_USEC);
-}
-
-/* Map the ktime_t to timespec conversion to ns_to_timespec function */
-#define ktime_to_timespec(kt) ns_to_timespec((kt))
-
/* Map the ktime_t to timespec conversion to ns_to_timespec function */
#define ktime_to_timespec64(kt) ns_to_timespec64((kt))
-/* Map the ktime_t to timeval conversion to ns_to_timeval function */
-#define ktime_to_timeval(kt) ns_to_timeval((kt))
-
/* Convert ktime_t to nanoseconds */
static inline s64 ktime_to_ns(const ktime_t kt)
{
@@ -216,25 +198,6 @@ static inline ktime_t ktime_sub_ms(const ktime_t kt, const u64 msec)
extern ktime_t ktime_add_safe(const ktime_t lhs, const ktime_t rhs);
/**
- * ktime_to_timespec_cond - convert a ktime_t variable to timespec
- * format only if the variable contains data
- * @kt: the ktime_t variable to convert
- * @ts: the timespec variable to store the result in
- *
- * Return: %true if there was a successful conversion, %false if kt was 0.
- */
-static inline __must_check bool ktime_to_timespec_cond(const ktime_t kt,
- struct timespec *ts)
-{
- if (kt) {
- *ts = ktime_to_timespec(kt);
- return true;
- } else {
- return false;
- }
-}
-
-/**
* ktime_to_timespec64_cond - convert a ktime_t variable to timespec64
* format only if the variable contains data
* @kt: the ktime_t variable to convert
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index e89eb67..bcb9b2a 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -889,6 +889,8 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu);
bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu);
int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu);
bool kvm_arch_dy_runnable(struct kvm_vcpu *vcpu);
+int kvm_arch_post_init_vm(struct kvm *kvm);
+void kvm_arch_pre_destroy_vm(struct kvm *kvm);
#ifndef __KVM_HAVE_ARCH_VM_ALLOC
/*
@@ -1342,7 +1344,7 @@ static inline void kvm_vcpu_set_dy_eligible(struct kvm_vcpu *vcpu, bool val)
#endif /* CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT */
struct kvm_vcpu *kvm_get_running_vcpu(void);
-struct kvm_vcpu __percpu **kvm_get_running_vcpus(void);
+struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void);
#ifdef CONFIG_HAVE_KVM_IRQ_BYPASS
bool kvm_arch_has_irq_bypass(void);
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 2ca9b70..cffa471 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -57,8 +57,6 @@
#define VPRINTK(fmt, args...)
#endif /* ATA_DEBUG */
-#define BPRINTK(fmt, args...) if (ap->flags & ATA_FLAG_DEBUGMSG) printk(KERN_ERR "%s: " fmt, __func__, ## args)
-
#define ata_print_version_once(dev, version) \
({ \
static bool __print_once; \
@@ -176,6 +174,7 @@ enum {
ATA_DEV_NONE = 11, /* no device */
/* struct ata_link flags */
+ /* NOTE: struct ata_force_param currently stores lflags in u16 */
ATA_LFLAG_NO_HRST = (1 << 1), /* avoid hardreset */
ATA_LFLAG_NO_SRST = (1 << 2), /* avoid softreset */
ATA_LFLAG_ASSUME_ATA = (1 << 3), /* assume ATA class */
@@ -531,12 +530,14 @@ typedef int (*ata_reset_fn_t)(struct ata_link *link, unsigned int *classes,
unsigned long deadline);
typedef void (*ata_postreset_fn_t)(struct ata_link *link, unsigned int *classes);
-extern struct device_attribute dev_attr_link_power_management_policy;
extern struct device_attribute dev_attr_unload_heads;
+#ifdef CONFIG_SATA_HOST
+extern struct device_attribute dev_attr_link_power_management_policy;
extern struct device_attribute dev_attr_ncq_prio_enable;
extern struct device_attribute dev_attr_em_message_type;
extern struct device_attribute dev_attr_em_message;
extern struct device_attribute dev_attr_sw_activity;
+#endif
enum sw_activity {
OFF,
@@ -1020,10 +1021,6 @@ struct ata_timing {
/*
* Core layer - drivers/ata/libata-core.c
*/
-extern const unsigned long sata_deb_timing_normal[];
-extern const unsigned long sata_deb_timing_hotplug[];
-extern const unsigned long sata_deb_timing_long[];
-
extern struct ata_port_operations ata_dummy_port_ops;
extern const struct ata_port_info ata_dummy_port_info;
@@ -1061,33 +1058,14 @@ static inline int is_multi_taskfile(struct ata_taskfile *tf)
(tf->command == ATA_CMD_WRITE_MULTI_FUA_EXT);
}
-static inline const unsigned long *
-sata_ehc_deb_timing(struct ata_eh_context *ehc)
-{
- if (ehc->i.flags & ATA_EHI_HOTPLUGGED)
- return sata_deb_timing_hotplug;
- else
- return sata_deb_timing_normal;
-}
-
static inline int ata_port_is_dummy(struct ata_port *ap)
{
return ap->ops == &ata_dummy_port_ops;
}
-extern int sata_set_spd(struct ata_link *link);
extern int ata_std_prereset(struct ata_link *link, unsigned long deadline);
extern int ata_wait_after_reset(struct ata_link *link, unsigned long deadline,
int (*check_ready)(struct ata_link *link));
-extern int sata_link_debounce(struct ata_link *link,
- const unsigned long *params, unsigned long deadline);
-extern int sata_link_resume(struct ata_link *link, const unsigned long *params,
- unsigned long deadline);
-extern int sata_link_scr_lpm(struct ata_link *link, enum ata_lpm_policy policy,
- bool spm_wakeup);
-extern int sata_link_hardreset(struct ata_link *link,
- const unsigned long *timing, unsigned long deadline,
- bool *online, int (*check_ready)(struct ata_link *));
extern int sata_std_hardreset(struct ata_link *link, unsigned int *class,
unsigned long deadline);
extern void ata_std_postreset(struct ata_link *link, unsigned int *classes);
@@ -1095,7 +1073,6 @@ extern void ata_std_postreset(struct ata_link *link, unsigned int *classes);
extern struct ata_host *ata_host_alloc(struct device *dev, int max_ports);
extern struct ata_host *ata_host_alloc_pinfo(struct device *dev,
const struct ata_port_info * const * ppi, int n_ports);
-extern int ata_slave_link_init(struct ata_port *ap);
extern void ata_host_get(struct ata_host *host);
extern void ata_host_put(struct ata_host *host);
extern int ata_host_start(struct ata_host *host);
@@ -1117,22 +1094,6 @@ extern int ata_scsi_ioctl(struct scsi_device *dev, unsigned int cmd,
extern int ata_scsi_queuecmd(struct Scsi_Host *h, struct scsi_cmnd *cmd);
extern int ata_sas_scsi_ioctl(struct ata_port *ap, struct scsi_device *dev,
unsigned int cmd, void __user *arg);
-extern void ata_sas_port_destroy(struct ata_port *);
-extern struct ata_port *ata_sas_port_alloc(struct ata_host *,
- struct ata_port_info *, struct Scsi_Host *);
-extern void ata_sas_async_probe(struct ata_port *ap);
-extern int ata_sas_sync_probe(struct ata_port *ap);
-extern int ata_sas_port_init(struct ata_port *);
-extern int ata_sas_port_start(struct ata_port *ap);
-extern int ata_sas_tport_add(struct device *parent, struct ata_port *ap);
-extern void ata_sas_tport_delete(struct ata_port *ap);
-extern void ata_sas_port_stop(struct ata_port *ap);
-extern int ata_sas_slave_configure(struct scsi_device *, struct ata_port *);
-extern int ata_sas_queuecmd(struct scsi_cmnd *cmd, struct ata_port *ap);
-extern int sata_scr_valid(struct ata_link *link);
-extern int sata_scr_read(struct ata_link *link, int reg, u32 *val);
-extern int sata_scr_write(struct ata_link *link, int reg, u32 val);
-extern int sata_scr_write_flush(struct ata_link *link, int reg, u32 val);
extern bool ata_link_online(struct ata_link *link);
extern bool ata_link_offline(struct ata_link *link);
#ifdef CONFIG_PM
@@ -1153,9 +1114,6 @@ extern void ata_msleep(struct ata_port *ap, unsigned int msecs);
extern u32 ata_wait_register(struct ata_port *ap, void __iomem *reg, u32 mask,
u32 val, unsigned long interval, unsigned long timeout);
extern int atapi_cmd_type(u8 opcode);
-extern void ata_tf_to_fis(const struct ata_taskfile *tf,
- u8 pmp, int is_cmd, u8 *fis);
-extern void ata_tf_from_fis(const u8 *fis, struct ata_taskfile *tf);
extern unsigned long ata_pack_xfermask(unsigned long pio_mask,
unsigned long mwdma_mask, unsigned long udma_mask);
extern void ata_unpack_xfermask(unsigned long xfer_mask,
@@ -1179,7 +1137,6 @@ extern void ata_id_c_string(const u16 *id, unsigned char *s,
extern unsigned int ata_do_dev_read_id(struct ata_device *dev,
struct ata_taskfile *tf, u16 *id);
extern void ata_qc_complete(struct ata_queued_cmd *qc);
-extern int ata_qc_complete_multiple(struct ata_port *ap, u64 qc_active);
extern u64 ata_qc_get_active(struct ata_port *ap);
extern void ata_scsi_simulate(struct ata_device *dev, struct scsi_cmnd *cmd);
extern int ata_std_bios_param(struct scsi_device *sdev,
@@ -1196,7 +1153,96 @@ extern struct ata_device *ata_dev_pair(struct ata_device *adev);
extern int ata_do_set_mode(struct ata_link *link, struct ata_device **r_failed_dev);
extern void ata_scsi_port_error_handler(struct Scsi_Host *host, struct ata_port *ap);
extern void ata_scsi_cmd_error_handler(struct Scsi_Host *host, struct ata_port *ap, struct list_head *eh_q);
+
+/*
+ * SATA specific code - drivers/ata/libata-sata.c
+ */
+#ifdef CONFIG_SATA_HOST
+extern const unsigned long sata_deb_timing_normal[];
+extern const unsigned long sata_deb_timing_hotplug[];
+extern const unsigned long sata_deb_timing_long[];
+
+static inline const unsigned long *
+sata_ehc_deb_timing(struct ata_eh_context *ehc)
+{
+ if (ehc->i.flags & ATA_EHI_HOTPLUGGED)
+ return sata_deb_timing_hotplug;
+ else
+ return sata_deb_timing_normal;
+}
+
+extern int sata_scr_valid(struct ata_link *link);
+extern int sata_scr_read(struct ata_link *link, int reg, u32 *val);
+extern int sata_scr_write(struct ata_link *link, int reg, u32 val);
+extern int sata_scr_write_flush(struct ata_link *link, int reg, u32 val);
+extern int sata_set_spd(struct ata_link *link);
+extern int sata_link_hardreset(struct ata_link *link,
+ const unsigned long *timing, unsigned long deadline,
+ bool *online, int (*check_ready)(struct ata_link *));
+extern int sata_link_resume(struct ata_link *link, const unsigned long *params,
+ unsigned long deadline);
+extern void ata_eh_analyze_ncq_error(struct ata_link *link);
+#else
+static inline const unsigned long *
+sata_ehc_deb_timing(struct ata_eh_context *ehc)
+{
+ return NULL;
+}
+static inline int sata_scr_valid(struct ata_link *link) { return 0; }
+static inline int sata_scr_read(struct ata_link *link, int reg, u32 *val)
+{
+ return -EOPNOTSUPP;
+}
+static inline int sata_scr_write(struct ata_link *link, int reg, u32 val)
+{
+ return -EOPNOTSUPP;
+}
+static inline int sata_scr_write_flush(struct ata_link *link, int reg, u32 val)
+{
+ return -EOPNOTSUPP;
+}
+static inline int sata_set_spd(struct ata_link *link) { return -EOPNOTSUPP; }
+static inline int sata_link_hardreset(struct ata_link *link,
+ const unsigned long *timing,
+ unsigned long deadline,
+ bool *online,
+ int (*check_ready)(struct ata_link *))
+{
+ if (online)
+ *online = false;
+ return -EOPNOTSUPP;
+}
+static inline int sata_link_resume(struct ata_link *link,
+ const unsigned long *params,
+ unsigned long deadline)
+{
+ return -EOPNOTSUPP;
+}
+static inline void ata_eh_analyze_ncq_error(struct ata_link *link) { }
+#endif
+extern int sata_link_debounce(struct ata_link *link,
+ const unsigned long *params, unsigned long deadline);
+extern int sata_link_scr_lpm(struct ata_link *link, enum ata_lpm_policy policy,
+ bool spm_wakeup);
+extern int ata_slave_link_init(struct ata_port *ap);
+extern void ata_sas_port_destroy(struct ata_port *);
+extern struct ata_port *ata_sas_port_alloc(struct ata_host *,
+ struct ata_port_info *, struct Scsi_Host *);
+extern void ata_sas_async_probe(struct ata_port *ap);
+extern int ata_sas_sync_probe(struct ata_port *ap);
+extern int ata_sas_port_init(struct ata_port *);
+extern int ata_sas_port_start(struct ata_port *ap);
+extern int ata_sas_tport_add(struct device *parent, struct ata_port *ap);
+extern void ata_sas_tport_delete(struct ata_port *ap);
+extern void ata_sas_port_stop(struct ata_port *ap);
+extern int ata_sas_slave_configure(struct scsi_device *, struct ata_port *);
+extern int ata_sas_queuecmd(struct scsi_cmnd *cmd, struct ata_port *ap);
+extern void ata_tf_to_fis(const struct ata_taskfile *tf,
+ u8 pmp, int is_cmd, u8 *fis);
+extern void ata_tf_from_fis(const u8 *fis, struct ata_taskfile *tf);
+extern int ata_qc_complete_multiple(struct ata_port *ap, u64 qc_active);
extern bool sata_lpm_ignore_phy_events(struct ata_link *link);
+extern int sata_async_notification(struct ata_port *ap);
extern int ata_cable_40wire(struct ata_port *ap);
extern int ata_cable_80wire(struct ata_port *ap);
@@ -1206,12 +1252,6 @@ extern int ata_cable_unknown(struct ata_port *ap);
/* Timing helpers */
extern unsigned int ata_pio_need_iordy(const struct ata_device *);
-extern const struct ata_timing *ata_timing_find_mode(u8 xfer_mode);
-extern int ata_timing_compute(struct ata_device *, unsigned short,
- struct ata_timing *, int, int);
-extern void ata_timing_merge(const struct ata_timing *,
- const struct ata_timing *, struct ata_timing *,
- unsigned int);
extern u8 ata_timing_cycle2mode(unsigned int xfer_shift, int cycle);
/* PCI */
@@ -1295,14 +1335,12 @@ extern void ata_port_wait_eh(struct ata_port *ap);
extern int ata_link_abort(struct ata_link *link);
extern int ata_port_abort(struct ata_port *ap);
extern int ata_port_freeze(struct ata_port *ap);
-extern int sata_async_notification(struct ata_port *ap);
extern void ata_eh_freeze_port(struct ata_port *ap);
extern void ata_eh_thaw_port(struct ata_port *ap);
extern void ata_eh_qc_complete(struct ata_queued_cmd *qc);
extern void ata_eh_qc_retry(struct ata_queued_cmd *qc);
-extern void ata_eh_analyze_ncq_error(struct ata_link *link);
extern void ata_do_eh(struct ata_port *ap, ata_prereset_fn_t prereset,
ata_reset_fn_t softreset, ata_reset_fn_t hardreset,
@@ -1343,7 +1381,7 @@ extern struct device_attribute *ata_common_sdev_attrs[];
* edge driver's module reference, otherwise the driver can be unloaded
* even if the scsi_device is being accessed.
*/
-#define ATA_BASE_SHT(drv_name) \
+#define __ATA_BASE_SHT(drv_name) \
.module = THIS_MODULE, \
.name = drv_name, \
.ioctl = ata_scsi_ioctl, \
@@ -1357,12 +1395,20 @@ extern struct device_attribute *ata_common_sdev_attrs[];
.slave_configure = ata_scsi_slave_config, \
.slave_destroy = ata_scsi_slave_destroy, \
.bios_param = ata_std_bios_param, \
- .unlock_native_capacity = ata_scsi_unlock_native_capacity, \
+ .unlock_native_capacity = ata_scsi_unlock_native_capacity
+
+#define ATA_BASE_SHT(drv_name) \
+ __ATA_BASE_SHT(drv_name), \
.sdev_attrs = ata_common_sdev_attrs
+#ifdef CONFIG_SATA_HOST
+extern struct device_attribute *ata_ncq_sdev_attrs[];
+
#define ATA_NCQ_SHT(drv_name) \
- ATA_BASE_SHT(drv_name), \
+ __ATA_BASE_SHT(drv_name), \
+ .sdev_attrs = ata_ncq_sdev_attrs, \
.change_queue_depth = ata_scsi_change_queue_depth
+#endif
/*
* PMP helpers
@@ -1635,6 +1681,8 @@ extern struct ata_device *ata_dev_next(struct ata_device *dev,
*/
static inline int ata_ncq_enabled(struct ata_device *dev)
{
+ if (!IS_ENABLED(CONFIG_SATA_HOST))
+ return 0;
return (dev->flags & (ATA_DFLAG_PIO | ATA_DFLAG_NCQ_OFF |
ATA_DFLAG_NCQ)) == ATA_DFLAG_NCQ;
}
@@ -1804,6 +1852,16 @@ static inline int ata_dma_enabled(struct ata_device *adev)
}
/**************************************************************************
+ * PATA timings - drivers/ata/libata-pata-timings.c
+ */
+extern const struct ata_timing *ata_timing_find_mode(u8 xfer_mode);
+extern int ata_timing_compute(struct ata_device *, unsigned short,
+ struct ata_timing *, int, int);
+extern void ata_timing_merge(const struct ata_timing *,
+ const struct ata_timing *, struct ata_timing *,
+ unsigned int);
+
+/**************************************************************************
* PMP - drivers/ata/libata-pmp.c
*/
#ifdef CONFIG_SATA_PMP
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index a7a0a1a5..e9ba013 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -695,6 +695,7 @@ static inline unsigned long lruvec_page_state_local(struct lruvec *lruvec,
void __mod_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx,
int val);
void __mod_lruvec_slab_state(void *p, enum node_stat_item idx, int val);
+void mod_memcg_obj_state(void *p, int idx, int val);
static inline void mod_lruvec_state(struct lruvec *lruvec,
enum node_stat_item idx, int val)
@@ -1123,6 +1124,10 @@ static inline void __mod_lruvec_slab_state(void *p, enum node_stat_item idx,
__mod_node_page_state(page_pgdat(page), idx, val);
}
+static inline void mod_memcg_obj_state(void *p, int idx, int val)
+{
+}
+
static inline
unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order,
gfp_t gfp_mask,
@@ -1427,6 +1432,8 @@ static inline int memcg_cache_id(struct mem_cgroup *memcg)
return memcg ? memcg->kmemcg_id : -1;
}
+struct mem_cgroup *mem_cgroup_from_obj(void *p);
+
#else
static inline int memcg_kmem_charge(struct page *page, gfp_t gfp, int order)
@@ -1468,6 +1475,11 @@ static inline void memcg_put_cache_ids(void)
{
}
+static inline struct mem_cgroup *mem_cgroup_from_obj(void *p)
+{
+ return NULL;
+}
+
#endif /* CONFIG_MEMCG_KMEM */
#endif /* _LINUX_MEMCONTROL_H */
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index ff8c9d5..bfdf415 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -688,7 +688,10 @@ struct mlx5_ifc_flow_table_nic_cap_bits {
u8 nic_rx_multi_path_tirs[0x1];
u8 nic_rx_multi_path_tirs_fts[0x1];
u8 allow_sniffer_and_nic_rx_shared_tir[0x1];
- u8 reserved_at_3[0x1d];
+ u8 reserved_at_3[0x4];
+ u8 sw_owner_reformat_supported[0x1];
+ u8 reserved_at_8[0x18];
+
u8 encap_general_header[0x1];
u8 reserved_at_21[0xa];
u8 log_max_packet_reformat_context[0x5];
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 52269e5..c54fb96 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2715,6 +2715,10 @@ static inline bool debug_pagealloc_enabled_static(void)
#if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_ARCH_HAS_SET_DIRECT_MAP)
extern void __kernel_map_pages(struct page *page, int numpages, int enable);
+/*
+ * When called in DEBUG_PAGEALLOC context, the call should most likely be
+ * guarded by debug_pagealloc_enabled() or debug_pagealloc_enabled_static()
+ */
static inline void
kernel_map_pages(struct page *page, int numpages, int enable)
{
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index ba70338..4c5eb3a 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -333,6 +333,7 @@ struct mmc_host {
MMC_CAP_UHS_SDR50 | MMC_CAP_UHS_SDR104 | \
MMC_CAP_UHS_DDR50)
#define MMC_CAP_SYNC_RUNTIME_PM (1 << 21) /* Synced runtime PM suspends. */
+#define MMC_CAP_NEED_RSP_BUSY (1 << 22) /* Commands with R1B can't use R1. */
#define MMC_CAP_DRIVER_TYPE_A (1 << 23) /* Host supports Driver Type A */
#define MMC_CAP_DRIVER_TYPE_C (1 << 24) /* Host supports Driver Type C */
#define MMC_CAP_DRIVER_TYPE_D (1 << 25) /* Host supports Driver Type D */
diff --git a/include/linux/msdos_partition.h b/include/linux/msdos_partition.h
new file mode 100644
index 0000000..2cb82db
--- /dev/null
+++ b/include/linux/msdos_partition.h
@@ -0,0 +1,50 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_MSDOS_PARTITION_H
+#define _LINUX_MSDOS_PARTITION_H
+
+#define MSDOS_LABEL_MAGIC 0xAA55
+
+struct msdos_partition {
+ u8 boot_ind; /* 0x80 - active */
+ u8 head; /* starting head */
+ u8 sector; /* starting sector */
+ u8 cyl; /* starting cylinder */
+ u8 sys_ind; /* What partition type */
+ u8 end_head; /* end head */
+ u8 end_sector; /* end sector */
+ u8 end_cyl; /* end cylinder */
+ __le32 start_sect; /* starting sector counting from 0 */
+ __le32 nr_sects; /* nr of sectors in partition */
+} __packed;
+
+enum msdos_sys_ind {
+ /*
+ * These three have identical behaviour; use the second one if DOS FDISK
+ * gets confused about extended/logical partitions starting past
+ * cylinder 1023.
+ */
+ DOS_EXTENDED_PARTITION = 5,
+ LINUX_EXTENDED_PARTITION = 0x85,
+ WIN98_EXTENDED_PARTITION = 0x0f,
+
+ LINUX_DATA_PARTITION = 0x83,
+ LINUX_LVM_PARTITION = 0x8e,
+ LINUX_RAID_PARTITION = 0xfd, /* autodetect RAID partition */
+
+ SOLARIS_X86_PARTITION = 0x82, /* also Linux swap partitions */
+ NEW_SOLARIS_X86_PARTITION = 0xbf,
+
+ DM6_AUX1PARTITION = 0x51, /* no DDO: use xlated geom */
+ DM6_AUX3PARTITION = 0x53, /* no DDO: use xlated geom */
+ DM6_PARTITION = 0x54, /* has DDO: use xlated geom & offset */
+ EZD_PARTITION = 0x55, /* EZ-DRIVE */
+
+ FREEBSD_PARTITION = 0xa5, /* FreeBSD Partition ID */
+ OPENBSD_PARTITION = 0xa6, /* OpenBSD Partition ID */
+ NETBSD_PARTITION = 0xa9, /* NetBSD Partition ID */
+ BSDI_PARTITION = 0xb7, /* BSDI Partition ID */
+ MINIX_PARTITION = 0x81, /* Minix Partition ID */
+ UNIXWARE_PARTITION = 0x63, /* Same as GNU_HURD and SCO Unix */
+};
+
+#endif /* LINUX_MSDOS_PARTITION_H */
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index a9c6b5c..6c3f703 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -72,6 +72,8 @@ void netdev_set_default_ethtool_ops(struct net_device *dev,
#define NET_RX_SUCCESS 0 /* keep 'em coming, baby */
#define NET_RX_DROP 1 /* packet dropped */
+#define MAX_NEST_DEV 8
+
/*
* Transmit return codes: transmit return codes originate from three different
* namespaces:
@@ -1616,6 +1618,7 @@ enum netdev_priv_flags {
* and drivers will need to set them appropriately.
*
* @mpls_features: Mask of features inheritable by MPLS
+ * @gso_partial_features: value(s) from NETIF_F_GSO\*
*
* @ifindex: interface index
* @group: The group the device belongs to
@@ -1640,8 +1643,11 @@ enum netdev_priv_flags {
* @netdev_ops: Includes several pointers to callbacks,
* if one wants to override the ndo_*() functions
* @ethtool_ops: Management operations
+ * @l3mdev_ops: Layer 3 master device operations
* @ndisc_ops: Includes callbacks for different IPv6 neighbour
* discovery handling. Necessary for e.g. 6LoWPAN.
+ * @xfrmdev_ops: Transformation offload operations
+ * @tlsdev_ops: Transport Layer Security offload operations
* @header_ops: Includes callbacks for creating,parsing,caching,etc
* of Layer 2 headers.
*
@@ -1680,6 +1686,7 @@ enum netdev_priv_flags {
* @dev_port: Used to differentiate devices that share
* the same function
* @addr_list_lock: XXX: need comments on this one
+ * @name_assign_type: network interface name assignment type
* @uc_promisc: Counter that indicates promiscuous mode
* has been enabled due to the need to listen to
* additional unicast addresses in a device that
@@ -1702,6 +1709,9 @@ enum netdev_priv_flags {
* @ip6_ptr: IPv6 specific data
* @ax25_ptr: AX.25 specific data
* @ieee80211_ptr: IEEE 802.11 specific data, assign before registering
+ * @ieee802154_ptr: IEEE 802.15.4 low-rate Wireless Personal Area Network
+ * device struct
+ * @mpls_ptr: mpls_dev struct pointer
*
* @dev_addr: Hw address (before bcast,
* because most packets are unicast)
@@ -1710,6 +1720,8 @@ enum netdev_priv_flags {
* @num_rx_queues: Number of RX queues
* allocated at register_netdev() time
* @real_num_rx_queues: Number of RX queues currently active in device
+ * @xdp_prog: XDP sockets filter program pointer
+ * @gro_flush_timeout: timeout for GRO layer in NAPI
*
* @rx_handler: handler for received packets
* @rx_handler_data: XXX: need comments on this one
@@ -1731,10 +1743,14 @@ enum netdev_priv_flags {
* @qdisc: Root qdisc from userspace point of view
* @tx_queue_len: Max frames per queue allowed
* @tx_global_lock: XXX: need comments on this one
+ * @xdp_bulkq: XDP device bulk queue
+ * @xps_cpus_map: all CPUs map for XPS device
+ * @xps_rxqs_map: all RXQs map for XPS device
*
* @xps_maps: XXX: need comments on this one
* @miniq_egress: clsact qdisc specific data for
* egress processing
+ * @qdisc_hash: qdisc hash table
* @watchdog_timeo: Represents the timeout that is used by
* the watchdog (see dev_watchdog())
* @watchdog_timer: List of timers
@@ -3548,7 +3564,7 @@ static inline unsigned int netif_attrmask_next(int n, const unsigned long *srcp,
}
/**
- * netif_attrmask_next_and - get the next CPU/Rx queue in *src1p & *src2p
+ * netif_attrmask_next_and - get the next CPU/Rx queue in \*src1p & \*src2p
* @n: CPU/Rx queue index
* @src1p: the first CPUs/Rx queues mask pointer
* @src2p: the second CPUs/Rx queues mask pointer
@@ -4375,11 +4391,8 @@ void *netdev_lower_get_next(struct net_device *dev,
ldev; \
ldev = netdev_lower_get_next(dev, &(iter)))
-struct net_device *netdev_all_lower_get_next(struct net_device *dev,
+struct net_device *netdev_next_lower_dev_rcu(struct net_device *dev,
struct list_head **iter);
-struct net_device *netdev_all_lower_get_next_rcu(struct net_device *dev,
- struct list_head **iter);
-
int netdev_walk_all_lower_dev(struct net_device *dev,
int (*fn)(struct net_device *lower_dev,
void *data),
diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h
index 908d38d..5448c8b4 100644
--- a/include/linux/netfilter/ipset/ip_set.h
+++ b/include/linux/netfilter/ipset/ip_set.h
@@ -121,6 +121,7 @@ struct ip_set_ext {
u32 timeout;
u8 packets_op;
u8 bytes_op;
+ bool target;
};
struct ip_set;
@@ -187,6 +188,14 @@ struct ip_set_type_variant {
/* Return true if "b" set is the same as "a"
* according to the create set parameters */
bool (*same_set)(const struct ip_set *a, const struct ip_set *b);
+ /* Region-locking is used */
+ bool region_lock;
+};
+
+struct ip_set_region {
+ spinlock_t lock; /* Region lock */
+ size_t ext_size; /* Size of the dynamic extensions */
+ u32 elements; /* Number of elements vs timeout */
};
/* The core set type structure */
@@ -501,7 +510,7 @@ ip_set_init_skbinfo(struct ip_set_skbinfo *skbinfo,
}
#define IP_SET_INIT_KEXT(skb, opt, set) \
- { .bytes = (skb)->len, .packets = 1, \
+ { .bytes = (skb)->len, .packets = 1, .target = true,\
.timeout = ip_set_adt_opt_timeout(opt, set) }
#define IP_SET_INIT_UEXT(set) \
diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index 205fa7b..60739d0 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -115,6 +115,19 @@ static inline void nl_set_extack_cookie_u64(struct netlink_ext_ack *extack,
{
u64 __cookie = cookie;
+ if (!extack)
+ return;
+ memcpy(extack->cookie, &__cookie, sizeof(__cookie));
+ extack->cookie_len = sizeof(__cookie);
+}
+
+static inline void nl_set_extack_cookie_u32(struct netlink_ext_ack *extack,
+ u32 cookie)
+{
+ u32 __cookie = cookie;
+
+ if (!extack)
+ return;
memcpy(extack->cookie, &__cookie, sizeof(__cookie));
extack->cookie_len = sizeof(__cookie);
}
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index a5f8f03..5d5b91e 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -337,35 +337,17 @@ static inline int nfs_server_capable(struct inode *inode, int cap)
return NFS_SERVER(inode)->caps & cap;
}
-static inline void nfs_set_verifier(struct dentry * dentry, unsigned long verf)
-{
- dentry->d_time = verf;
-}
-
/**
* nfs_save_change_attribute - Returns the inode attribute change cookie
* @dir - pointer to parent directory inode
- * The "change attribute" is updated every time we finish an operation
- * that will result in a metadata change on the server.
+ * The "cache change attribute" is updated when we need to revalidate
+ * our dentry cache after a directory was seen to change on the server.
*/
static inline unsigned long nfs_save_change_attribute(struct inode *dir)
{
return NFS_I(dir)->cache_change_attribute;
}
-/**
- * nfs_verify_change_attribute - Detects NFS remote directory changes
- * @dir - pointer to parent directory inode
- * @chattr - previously saved change attribute
- * Return "false" if the verifiers doesn't match the change attribute.
- * This would usually indicate that the directory contents have changed on
- * the server, and that any dentries need revalidating.
- */
-static inline int nfs_verify_change_attribute(struct inode *dir, unsigned long chattr)
-{
- return chattr == NFS_I(dir)->cache_change_attribute;
-}
-
/*
* linux/fs/nfs/inode.c
*/
@@ -495,6 +477,10 @@ extern const struct file_operations nfs_dir_operations;
extern const struct dentry_operations nfs_dentry_operations;
extern void nfs_force_lookup_revalidate(struct inode *dir);
+extern void nfs_set_verifier(struct dentry * dentry, unsigned long verf);
+#if IS_ENABLED(CONFIG_NFS_V4)
+extern void nfs_clear_verifier_delegated(struct inode *inode);
+#endif /* IS_ENABLED(CONFIG_NFS_V4) */
extern struct dentry *nfs_add_or_obtain(struct dentry *dentry,
struct nfs_fh *fh, struct nfs_fattr *fattr,
struct nfs4_label *label);
diff --git a/include/linux/of_clk.h b/include/linux/of_clk.h
index c86fcad..31b73a0 100644
--- a/include/linux/of_clk.h
+++ b/include/linux/of_clk.h
@@ -11,17 +11,17 @@ struct of_device_id;
#if defined(CONFIG_COMMON_CLK) && defined(CONFIG_OF)
-unsigned int of_clk_get_parent_count(struct device_node *np);
-const char *of_clk_get_parent_name(struct device_node *np, int index);
+unsigned int of_clk_get_parent_count(const struct device_node *np);
+const char *of_clk_get_parent_name(const struct device_node *np, int index);
void of_clk_init(const struct of_device_id *matches);
#else /* !CONFIG_COMMON_CLK || !CONFIG_OF */
-static inline unsigned int of_clk_get_parent_count(struct device_node *np)
+static inline unsigned int of_clk_get_parent_count(const struct device_node *np)
{
return 0;
}
-static inline const char *of_clk_get_parent_name(struct device_node *np,
+static inline const char *of_clk_get_parent_name(const struct device_node *np,
int index)
{
return NULL;
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 1bf83c8..77de28b 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -311,7 +311,7 @@ static inline int TestClearPage##uname(struct page *page) { return 0; }
__PAGEFLAG(Locked, locked, PF_NO_TAIL)
PAGEFLAG(Waiters, waiters, PF_ONLY_HEAD) __CLEARPAGEFLAG(Waiters, waiters, PF_ONLY_HEAD)
-PAGEFLAG(Error, error, PF_NO_COMPOUND) TESTCLEARFLAG(Error, error, PF_NO_COMPOUND)
+PAGEFLAG(Error, error, PF_NO_TAIL) TESTCLEARFLAG(Error, error, PF_NO_TAIL)
PAGEFLAG(Referenced, referenced, PF_HEAD)
TESTCLEARFLAG(Referenced, referenced, PF_HEAD)
__SETPAGEFLAG(Referenced, referenced, PF_HEAD)
diff --git a/include/linux/part_stat.h b/include/linux/part_stat.h
new file mode 100644
index 0000000..ece6076
--- /dev/null
+++ b/include/linux/part_stat.h
@@ -0,0 +1,115 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_PART_STAT_H
+#define _LINUX_PART_STAT_H
+
+#include <linux/genhd.h>
+
+/*
+ * Macros to operate on percpu disk statistics:
+ *
+ * {disk|part|all}_stat_{add|sub|inc|dec}() modify the stat counters
+ * and should be called between disk_stat_lock() and
+ * disk_stat_unlock().
+ *
+ * part_stat_read() can be called at any time.
+ *
+ * part_stat_{add|set_all}() and {init|free}_part_stats are for
+ * internal use only.
+ */
+#ifdef CONFIG_SMP
+#define part_stat_lock() ({ rcu_read_lock(); get_cpu(); })
+#define part_stat_unlock() do { put_cpu(); rcu_read_unlock(); } while (0)
+
+#define part_stat_get_cpu(part, field, cpu) \
+ (per_cpu_ptr((part)->dkstats, (cpu))->field)
+
+#define part_stat_get(part, field) \
+ part_stat_get_cpu(part, field, smp_processor_id())
+
+#define part_stat_read(part, field) \
+({ \
+ typeof((part)->dkstats->field) res = 0; \
+ unsigned int _cpu; \
+ for_each_possible_cpu(_cpu) \
+ res += per_cpu_ptr((part)->dkstats, _cpu)->field; \
+ res; \
+})
+
+static inline void part_stat_set_all(struct hd_struct *part, int value)
+{
+ int i;
+
+ for_each_possible_cpu(i)
+ memset(per_cpu_ptr(part->dkstats, i), value,
+ sizeof(struct disk_stats));
+}
+
+static inline int init_part_stats(struct hd_struct *part)
+{
+ part->dkstats = alloc_percpu(struct disk_stats);
+ if (!part->dkstats)
+ return 0;
+ return 1;
+}
+
+static inline void free_part_stats(struct hd_struct *part)
+{
+ free_percpu(part->dkstats);
+}
+
+#else /* !CONFIG_SMP */
+#define part_stat_lock() ({ rcu_read_lock(); 0; })
+#define part_stat_unlock() rcu_read_unlock()
+
+#define part_stat_get(part, field) ((part)->dkstats.field)
+#define part_stat_get_cpu(part, field, cpu) part_stat_get(part, field)
+#define part_stat_read(part, field) part_stat_get(part, field)
+
+static inline void part_stat_set_all(struct hd_struct *part, int value)
+{
+ memset(&part->dkstats, value, sizeof(struct disk_stats));
+}
+
+static inline int init_part_stats(struct hd_struct *part)
+{
+ return 1;
+}
+
+static inline void free_part_stats(struct hd_struct *part)
+{
+}
+
+#endif /* CONFIG_SMP */
+
+#define part_stat_read_accum(part, field) \
+ (part_stat_read(part, field[STAT_READ]) + \
+ part_stat_read(part, field[STAT_WRITE]) + \
+ part_stat_read(part, field[STAT_DISCARD]))
+
+#define __part_stat_add(part, field, addnd) \
+ (part_stat_get(part, field) += (addnd))
+
+#define part_stat_add(part, field, addnd) do { \
+ __part_stat_add((part), field, addnd); \
+ if ((part)->partno) \
+ __part_stat_add(&part_to_disk((part))->part0, \
+ field, addnd); \
+} while (0)
+
+#define part_stat_dec(gendiskp, field) \
+ part_stat_add(gendiskp, field, -1)
+#define part_stat_inc(gendiskp, field) \
+ part_stat_add(gendiskp, field, 1)
+#define part_stat_sub(gendiskp, field, subnd) \
+ part_stat_add(gendiskp, field, -subnd)
+
+#define part_stat_local_dec(gendiskp, field) \
+ local_dec(&(part_stat_get(gendiskp, field)))
+#define part_stat_local_inc(gendiskp, field) \
+ local_inc(&(part_stat_get(gendiskp, field)))
+#define part_stat_local_read(gendiskp, field) \
+ local_read(&(part_stat_get(gendiskp, field)))
+#define part_stat_local_read_cpu(gendiskp, field, cpu) \
+ local_read(&(part_stat_get_cpu(gendiskp, field, cpu)))
+
+#endif /* _LINUX_PART_STAT_H */
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 352c0d7..977e668 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -148,6 +148,8 @@
/* Vendors and devices. Sort key: vendor first, device next. */
+#define PCI_VENDOR_ID_LOONGSON 0x0014
+
#define PCI_VENDOR_ID_TTTECH 0x0357
#define PCI_DEVICE_ID_TTTECH_MC322 0x000a
diff --git a/include/linux/phy.h b/include/linux/phy.h
index c570e16..452e8ba 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -357,6 +357,7 @@ struct macsec_ops;
* is_gigabit_capable: Set to true if PHY supports 1000Mbps
* has_fixups: Set to true if this phy has fixups/quirks.
* suspended: Set to true if this phy has been suspended successfully.
+ * suspended_by_mdio_bus: Set to true if this phy was suspended by MDIO bus.
* sysfs_links: Internal boolean tracking sysfs symbolic links setup/removal.
* loopback_enabled: Set true if this phy has been loopbacked successfully.
* state: state of the PHY for management purposes
@@ -396,6 +397,7 @@ struct phy_device {
unsigned is_gigabit_capable:1;
unsigned has_fixups:1;
unsigned suspended:1;
+ unsigned suspended_by_mdio_bus:1;
unsigned sysfs_links:1;
unsigned loopback_enabled:1;
@@ -557,6 +559,7 @@ struct phy_driver {
/*
* Checks if the PHY generated an interrupt.
* For multi-PHY devices with shared PHY interrupt pin
+ * Set interrupt bits have to be cleared.
*/
int (*did_interrupt)(struct phy_device *phydev);
diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h
index d576503..ae58fad 100644
--- a/include/linux/pipe_fs_i.h
+++ b/include/linux/pipe_fs_i.h
@@ -29,7 +29,8 @@ struct pipe_buffer {
/**
* struct pipe_inode_info - a linux kernel pipe
* @mutex: mutex protecting the whole thing
- * @wait: reader/writer wait point in case of empty/full pipe
+ * @rd_wait: reader wait point in case of empty pipe
+ * @wr_wait: writer wait point in case of full pipe
* @head: The point of buffer production
* @tail: The point of buffer consumption
* @max_usage: The maximum number of slots that may be used in the ring
diff --git a/include/linux/platform_data/spi-omap2-mcspi.h b/include/linux/platform_data/spi-omap2-mcspi.h
index 0bf9fdd..3b400b1 100644
--- a/include/linux/platform_data/spi-omap2-mcspi.h
+++ b/include/linux/platform_data/spi-omap2-mcspi.h
@@ -11,6 +11,7 @@ struct omap2_mcspi_platform_config {
unsigned short num_cs;
unsigned int regs_offset;
unsigned int pin_dir:1;
+ size_t max_xfer_len;
};
struct omap2_mcspi_device_config {
diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h
index 276a03c..041bfa4 100644
--- a/include/linux/platform_device.h
+++ b/include/linux/platform_device.h
@@ -24,7 +24,7 @@ struct platform_device {
int id;
bool id_auto;
struct device dev;
- u64 dma_mask;
+ u64 platform_dma_mask;
u32 num_resources;
struct resource *resource;
diff --git a/include/linux/raid/detect.h b/include/linux/raid/detect.h
new file mode 100644
index 0000000..37dd3f4
--- /dev/null
+++ b/include/linux/raid/detect.h
@@ -0,0 +1,3 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+void md_autodetect_dev(dev_t dev);
diff --git a/include/linux/rculist_nulls.h b/include/linux/rculist_nulls.h
index e5b7520..9670b54 100644
--- a/include/linux/rculist_nulls.h
+++ b/include/linux/rculist_nulls.h
@@ -145,6 +145,13 @@ static inline void hlist_nulls_add_tail_rcu(struct hlist_nulls_node *n,
}
}
+/* after that hlist_nulls_del will work */
+static inline void hlist_nulls_add_fake(struct hlist_nulls_node *n)
+{
+ n->pprev = &n->next;
+ n->next = (struct hlist_nulls_node *)NULLS_MARKER(NULL);
+}
+
/**
* hlist_nulls_for_each_entry_rcu - iterate over rcu list of given type
* @tpos: the type * to use as a loop cursor.
diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
index beb9a9da..70ebef8 100644
--- a/include/linux/rhashtable.h
+++ b/include/linux/rhashtable.h
@@ -972,9 +972,9 @@ static inline int rhashtable_lookup_insert_key(
/**
* rhashtable_lookup_get_insert_key - lookup and insert object into hash table
* @ht: hash table
+ * @key: key
* @obj: pointer to hash head inside object
* @params: hash table parameters
- * @data: pointer to element data already in hashes
*
* Just like rhashtable_lookup_insert_key(), but this function returns the
* object if it exists, NULL if it does not and the insertion was successful,
diff --git a/include/linux/sched/nohz.h b/include/linux/sched/nohz.h
index 1abe91f..6d67e9a 100644
--- a/include/linux/sched/nohz.h
+++ b/include/linux/sched/nohz.h
@@ -15,9 +15,11 @@ static inline void nohz_balance_enter_idle(int cpu) { }
#ifdef CONFIG_NO_HZ_COMMON
void calc_load_nohz_start(void);
+void calc_load_nohz_remote(struct rq *rq);
void calc_load_nohz_stop(void);
#else
static inline void calc_load_nohz_start(void) { }
+static inline void calc_load_nohz_remote(struct rq *rq) { }
static inline void calc_load_nohz_stop(void) { }
#endif /* CONFIG_NO_HZ_COMMON */
diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h
index 03583b6..4192369 100644
--- a/include/linux/seccomp.h
+++ b/include/linux/seccomp.h
@@ -7,7 +7,8 @@
#define SECCOMP_FILTER_FLAG_MASK (SECCOMP_FILTER_FLAG_TSYNC | \
SECCOMP_FILTER_FLAG_LOG | \
SECCOMP_FILTER_FLAG_SPEC_ALLOW | \
- SECCOMP_FILTER_FLAG_NEW_LISTENER)
+ SECCOMP_FILTER_FLAG_NEW_LISTENER | \
+ SECCOMP_FILTER_FLAG_TSYNC_ESRCH)
#ifdef CONFIG_SECCOMP
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index ca8806b..e596202 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -611,9 +611,15 @@ typedef unsigned char *sk_buff_data_t;
* @next: Next buffer in list
* @prev: Previous buffer in list
* @tstamp: Time we arrived/left
+ * @skb_mstamp_ns: (aka @tstamp) earliest departure time; start point
+ * for retransmit timer
* @rbnode: RB tree node, alternative to next/prev for netem/tcp
+ * @list: queue head
* @sk: Socket we are owned by
+ * @ip_defrag_offset: (aka @sk) alternate use of @sk, used in
+ * fragmentation management
* @dev: Device we arrived on/are leaving by
+ * @dev_scratch: (aka @dev) alternate use of @dev when @dev would be %NULL
* @cb: Control buffer. Free for use by every layer. Put private vars here
* @_skb_refdst: destination entry (with norefcount bit)
* @sp: the security path, used for xfrm
@@ -632,12 +638,15 @@ typedef unsigned char *sk_buff_data_t;
* @pkt_type: Packet class
* @fclone: skbuff clone status
* @ipvs_property: skbuff is owned by ipvs
+ * @inner_protocol_type: whether the inner protocol is
+ * ENCAP_TYPE_ETHER or ENCAP_TYPE_IPPROTO
+ * @remcsum_offload: remote checksum offload is enabled
* @offload_fwd_mark: Packet was L2-forwarded in hardware
* @offload_l3_fwd_mark: Packet was L3-forwarded in hardware
* @tc_skip_classify: do not classify packet. set by IFB device
* @tc_at_ingress: used within tc_classify to distinguish in/egress
- * @tc_redirected: packet was redirected by a tc action
- * @tc_from_ingress: if tc_redirected, tc_at_ingress at time of redirect
+ * @redirected: packet was redirected by packet classifier
+ * @from_ingress: packet was redirected from the ingress path
* @peeked: this packet has been seen already, so stats have been
* done for it, don't do them again
* @nf_trace: netfilter packet trace flag
@@ -650,6 +659,8 @@ typedef unsigned char *sk_buff_data_t;
* @tc_index: Traffic control index
* @hash: the packet hash
* @queue_mapping: Queue mapping for multiqueue devices
+ * @head_frag: skb was allocated from page fragments,
+ * not allocated by kmalloc() or vmalloc().
* @pfmemalloc: skbuff was allocated from PFMEMALLOC reserves
* @active_extensions: active extensions (skb_ext_id types)
* @ndisc_nodetype: router type (from link layer)
@@ -660,15 +671,28 @@ typedef unsigned char *sk_buff_data_t;
* @wifi_acked_valid: wifi_acked was set
* @wifi_acked: whether frame was acked on wifi or not
* @no_fcs: Request NIC to treat last 4 bytes as Ethernet FCS
+ * @encapsulation: indicates the inner headers in the skbuff are valid
+ * @encap_hdr_csum: software checksum is needed
+ * @csum_valid: checksum is already valid
* @csum_not_inet: use CRC32c to resolve CHECKSUM_PARTIAL
+ * @csum_complete_sw: checksum was completed by software
+ * @csum_level: indicates the number of consecutive checksums found in
+ * the packet minus one that have been verified as
+ * CHECKSUM_UNNECESSARY (max 3)
* @dst_pending_confirm: need to confirm neighbour
* @decrypted: Decrypted SKB
* @napi_id: id of the NAPI struct this skb came from
+ * @sender_cpu: (aka @napi_id) source CPU in XPS
* @secmark: security marking
* @mark: Generic packet mark
+ * @reserved_tailroom: (aka @mark) number of bytes of free space available
+ * at the tail of an sk_buff
+ * @vlan_present: VLAN tag is present
* @vlan_proto: vlan encapsulation protocol
* @vlan_tci: vlan tag control information
* @inner_protocol: Protocol (encapsulation)
+ * @inner_ipproto: (aka @inner_protocol) stores ipproto when
+ * skb->inner_protocol_type == ENCAP_TYPE_IPPROTO;
* @inner_transport_header: Inner transport layer header (encapsulation)
* @inner_network_header: Network layer header (encapsulation)
* @inner_mac_header: Link layer header (encapsulation)
@@ -750,7 +774,9 @@ struct sk_buff {
#endif
#define CLONED_OFFSET() offsetof(struct sk_buff, __cloned_offset)
+ /* private: */
__u8 __cloned_offset[0];
+ /* public: */
__u8 cloned:1,
nohdr:1,
fclone:2,
@@ -775,7 +801,9 @@ struct sk_buff {
#endif
#define PKT_TYPE_OFFSET() offsetof(struct sk_buff, __pkt_type_offset)
+ /* private: */
__u8 __pkt_type_offset[0];
+ /* public: */
__u8 pkt_type:3;
__u8 ignore_df:1;
__u8 nf_trace:1;
@@ -798,7 +826,9 @@ struct sk_buff {
#define PKT_VLAN_PRESENT_BIT 0
#endif
#define PKT_VLAN_PRESENT_OFFSET() offsetof(struct sk_buff, __pkt_vlan_present_offset)
+ /* private: */
__u8 __pkt_vlan_present_offset[0];
+ /* public: */
__u8 vlan_present:1;
__u8 csum_complete_sw:1;
__u8 csum_level:2;
@@ -818,8 +848,10 @@ struct sk_buff {
#ifdef CONFIG_NET_CLS_ACT
__u8 tc_skip_classify:1;
__u8 tc_at_ingress:1;
- __u8 tc_redirected:1;
- __u8 tc_from_ingress:1;
+#endif
+#ifdef CONFIG_NET_REDIRECT
+ __u8 redirected:1;
+ __u8 from_ingress:1;
#endif
#ifdef CONFIG_TLS_DEVICE
__u8 decrypted:1;
@@ -4549,5 +4581,31 @@ static inline __wsum lco_csum(struct sk_buff *skb)
return csum_partial(l4_hdr, csum_start - l4_hdr, partial);
}
+static inline bool skb_is_redirected(const struct sk_buff *skb)
+{
+#ifdef CONFIG_NET_REDIRECT
+ return skb->redirected;
+#else
+ return false;
+#endif
+}
+
+static inline void skb_set_redirected(struct sk_buff *skb, bool from_ingress)
+{
+#ifdef CONFIG_NET_REDIRECT
+ skb->redirected = 1;
+ skb->from_ingress = from_ingress;
+ if (skb->from_ingress)
+ skb->tstamp = 0;
+#endif
+}
+
+static inline void skb_reset_redirect(struct sk_buff *skb)
+{
+#ifdef CONFIG_NET_REDIRECT
+ skb->redirected = 0;
+#endif
+}
+
#endif /* __KERNEL__ */
#endif /* _LINUX_SKBUFF_H */
diff --git a/include/linux/socket.h b/include/linux/socket.h
index 2d23134..54338fa 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -391,6 +391,10 @@ extern int recvmsg_copy_msghdr(struct msghdr *msg,
struct user_msghdr __user *umsg, unsigned flags,
struct sockaddr __user **uaddr,
struct iovec **iov);
+extern int __copy_msghdr_from_user(struct msghdr *kmsg,
+ struct user_msghdr __user *umsg,
+ struct sockaddr __user **save_addr,
+ struct iovec __user **uiov, size_t *nsegs);
/* helpers which do the actual work for syscalls */
extern int __sys_recvfrom(int fd, void __user *ubuf, size_t size,
@@ -401,7 +405,8 @@ extern int __sys_sendto(int fd, void __user *buff, size_t len,
int addr_len);
extern int __sys_accept4_file(struct file *file, unsigned file_flags,
struct sockaddr __user *upeer_sockaddr,
- int __user *upeer_addrlen, int flags);
+ int __user *upeer_addrlen, int flags,
+ unsigned long nofile);
extern int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr,
int __user *upeer_addrlen, int flags);
extern int __sys_socket(int family, int type, int protocol);
diff --git a/include/linux/splice.h b/include/linux/splice.h
index 74b4911..ebbbfea 100644
--- a/include/linux/splice.h
+++ b/include/linux/splice.h
@@ -78,6 +78,9 @@ extern ssize_t add_to_pipe(struct pipe_inode_info *,
struct pipe_buffer *);
extern ssize_t splice_direct_to_actor(struct file *, struct splice_desc *,
splice_direct_actor *);
+extern long do_splice(struct file *in, loff_t __user *off_in,
+ struct file *out, loff_t __user *off_out,
+ size_t len, unsigned int flags);
/*
* for dynamic pipe sizing
diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index 4a230c2..2b2055b 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -191,7 +191,7 @@ struct platform_s2idle_ops {
int (*begin)(void);
int (*prepare)(void);
int (*prepare_late)(void);
- void (*wake)(void);
+ bool (*wake)(void);
void (*restore_early)(void);
void (*restore)(void);
void (*end)(void);
diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index cde3dc1..046bb94 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -64,6 +64,9 @@ extern void swiotlb_tbl_sync_single(struct device *hwdev,
size_t size, enum dma_data_direction dir,
enum dma_sync_target target);
+dma_addr_t swiotlb_map(struct device *dev, phys_addr_t phys,
+ size_t size, enum dma_data_direction dir, unsigned long attrs);
+
#ifdef CONFIG_SWIOTLB
extern enum swiotlb_force swiotlb_force;
extern phys_addr_t io_tlb_start, io_tlb_end;
@@ -73,8 +76,6 @@ static inline bool is_swiotlb_buffer(phys_addr_t paddr)
return paddr >= io_tlb_start && paddr < io_tlb_end;
}
-bool swiotlb_map(struct device *dev, phys_addr_t *phys, dma_addr_t *dma_addr,
- size_t size, enum dma_data_direction dir, unsigned long attrs);
void __init swiotlb_exit(void);
unsigned int swiotlb_max_segment(void);
size_t swiotlb_max_mapping_size(struct device *dev);
@@ -85,12 +86,6 @@ static inline bool is_swiotlb_buffer(phys_addr_t paddr)
{
return false;
}
-static inline bool swiotlb_map(struct device *dev, phys_addr_t *phys,
- dma_addr_t *dma_addr, size_t size, enum dma_data_direction dir,
- unsigned long attrs)
-{
- return false;
-}
static inline void swiotlb_exit(void)
{
}
diff --git a/include/linux/time32.h b/include/linux/time32.h
index cad4c31..cf9320c 100644
--- a/include/linux/time32.h
+++ b/include/linux/time32.h
@@ -12,8 +12,6 @@
#include <linux/time64.h>
#include <linux/timex.h>
-#define TIME_T_MAX (__kernel_old_time_t)((1UL << ((sizeof(__kernel_old_time_t) << 3) - 1)) - 1)
-
typedef s32 old_time32_t;
struct old_timespec32 {
@@ -73,162 +71,12 @@ struct __kernel_timex;
int get_old_timex32(struct __kernel_timex *, const struct old_timex32 __user *);
int put_old_timex32(struct old_timex32 __user *, const struct __kernel_timex *);
-#if __BITS_PER_LONG == 64
-
-/* timespec64 is defined as timespec here */
-static inline struct timespec timespec64_to_timespec(const struct timespec64 ts64)
-{
- return *(const struct timespec *)&ts64;
-}
-
-static inline struct timespec64 timespec_to_timespec64(const struct timespec ts)
-{
- return *(const struct timespec64 *)&ts;
-}
-
-#else
-static inline struct timespec timespec64_to_timespec(const struct timespec64 ts64)
-{
- struct timespec ret;
-
- ret.tv_sec = (time_t)ts64.tv_sec;
- ret.tv_nsec = ts64.tv_nsec;
- return ret;
-}
-
-static inline struct timespec64 timespec_to_timespec64(const struct timespec ts)
-{
- struct timespec64 ret;
-
- ret.tv_sec = ts.tv_sec;
- ret.tv_nsec = ts.tv_nsec;
- return ret;
-}
-#endif
-
-static inline int timespec_equal(const struct timespec *a,
- const struct timespec *b)
-{
- return (a->tv_sec == b->tv_sec) && (a->tv_nsec == b->tv_nsec);
-}
-
-/*
- * lhs < rhs: return <0
- * lhs == rhs: return 0
- * lhs > rhs: return >0
- */
-static inline int timespec_compare(const struct timespec *lhs, const struct timespec *rhs)
-{
- if (lhs->tv_sec < rhs->tv_sec)
- return -1;
- if (lhs->tv_sec > rhs->tv_sec)
- return 1;
- return lhs->tv_nsec - rhs->tv_nsec;
-}
-
-/*
- * Returns true if the timespec is norm, false if denorm:
- */
-static inline bool timespec_valid(const struct timespec *ts)
-{
- /* Dates before 1970 are bogus */
- if (ts->tv_sec < 0)
- return false;
- /* Can't have more nanoseconds then a second */
- if ((unsigned long)ts->tv_nsec >= NSEC_PER_SEC)
- return false;
- return true;
-}
-
/**
- * timespec_to_ns - Convert timespec to nanoseconds
- * @ts: pointer to the timespec variable to be converted
- *
- * Returns the scalar nanosecond representation of the timespec
- * parameter.
- */
-static inline s64 timespec_to_ns(const struct timespec *ts)
-{
- return ((s64) ts->tv_sec * NSEC_PER_SEC) + ts->tv_nsec;
-}
-
-/**
- * ns_to_timespec - Convert nanoseconds to timespec
- * @nsec: the nanoseconds value to be converted
- *
- * Returns the timespec representation of the nsec parameter.
- */
-extern struct timespec ns_to_timespec(const s64 nsec);
-
-/**
- * timespec_add_ns - Adds nanoseconds to a timespec
- * @a: pointer to timespec to be incremented
- * @ns: unsigned nanoseconds value to be added
- *
- * This must always be inlined because its used from the x86-64 vdso,
- * which cannot call other kernel functions.
- */
-static __always_inline void timespec_add_ns(struct timespec *a, u64 ns)
-{
- a->tv_sec += __iter_div_u64_rem(a->tv_nsec + ns, NSEC_PER_SEC, &ns);
- a->tv_nsec = ns;
-}
-
-static inline unsigned long mktime(const unsigned int year,
- const unsigned int mon, const unsigned int day,
- const unsigned int hour, const unsigned int min,
- const unsigned int sec)
-{
- return mktime64(year, mon, day, hour, min, sec);
-}
-
-static inline bool timeval_valid(const struct timeval *tv)
-{
- /* Dates before 1970 are bogus */
- if (tv->tv_sec < 0)
- return false;
-
- /* Can't have more microseconds then a second */
- if (tv->tv_usec < 0 || tv->tv_usec >= USEC_PER_SEC)
- return false;
-
- return true;
-}
-
-/**
- * timeval_to_ns - Convert timeval to nanoseconds
- * @ts: pointer to the timeval variable to be converted
- *
- * Returns the scalar nanosecond representation of the timeval
- * parameter.
- */
-static inline s64 timeval_to_ns(const struct timeval *tv)
-{
- return ((s64) tv->tv_sec * NSEC_PER_SEC) +
- tv->tv_usec * NSEC_PER_USEC;
-}
-
-/**
- * ns_to_timeval - Convert nanoseconds to timeval
+ * ns_to_kernel_old_timeval - Convert nanoseconds to timeval
* @nsec: the nanoseconds value to be converted
*
* Returns the timeval representation of the nsec parameter.
*/
-extern struct timeval ns_to_timeval(const s64 nsec);
extern struct __kernel_old_timeval ns_to_kernel_old_timeval(s64 nsec);
-/*
- * Old names for the 32-bit time_t interfaces, these will be removed
- * when everything uses the new names.
- */
-#define compat_time_t old_time32_t
-#define compat_timeval old_timeval32
-#define compat_timespec old_timespec32
-#define compat_itimerspec old_itimerspec32
-#define ns_to_compat_timeval ns_to_old_timeval32
-#define get_compat_itimerspec64 get_old_itimerspec32
-#define put_compat_itimerspec64 put_old_itimerspec32
-#define compat_get_timespec64 get_old_timespec32
-#define compat_put_timespec64 put_old_timespec32
-
#endif
diff --git a/include/linux/timekeeping32.h b/include/linux/timekeeping32.h
index cc59cc9..266017f 100644
--- a/include/linux/timekeeping32.h
+++ b/include/linux/timekeeping32.h
@@ -11,36 +11,4 @@ static inline unsigned long get_seconds(void)
return ktime_get_real_seconds();
}
-static inline void getnstimeofday(struct timespec *ts)
-{
- struct timespec64 ts64;
-
- ktime_get_real_ts64(&ts64);
- *ts = timespec64_to_timespec(ts64);
-}
-
-static inline void ktime_get_ts(struct timespec *ts)
-{
- struct timespec64 ts64;
-
- ktime_get_ts64(&ts64);
- *ts = timespec64_to_timespec(ts64);
-}
-
-static inline void getrawmonotonic(struct timespec *ts)
-{
- struct timespec64 ts64;
-
- ktime_get_raw_ts64(&ts64);
- *ts = timespec64_to_timespec(ts64);
-}
-
-static inline void getboottime(struct timespec *ts)
-{
- struct timespec64 ts64;
-
- getboottime64(&ts64);
- *ts = timespec64_to_timespec(ts64);
-}
-
#endif
diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index af2c85d..6c7a10a 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -440,7 +440,7 @@ struct synth_event_trace_state {
struct synth_event *event;
unsigned int cur_field;
unsigned int n_u64;
- bool enabled;
+ bool disabled;
bool add_next;
bool add_name;
};
diff --git a/include/linux/tty.h b/include/linux/tty.h
index bfa4e2e..bd5fe0e 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -225,6 +225,8 @@ struct tty_port_client_operations {
void (*write_wakeup)(struct tty_port *port);
};
+extern const struct tty_port_client_operations tty_port_default_client_ops;
+
struct tty_port {
struct tty_bufhead buf; /* Locked internally */
struct tty_struct *tty; /* Back pointer */
diff --git a/include/linux/types.h b/include/linux/types.h
index eb870ad..d3021c8 100644
--- a/include/linux/types.h
+++ b/include/linux/types.h
@@ -65,11 +65,6 @@ typedef __kernel_ssize_t ssize_t;
typedef __kernel_ptrdiff_t ptrdiff_t;
#endif
-#ifndef _TIME_T
-#define _TIME_T
-typedef __kernel_old_time_t time_t;
-#endif
-
#ifndef _CLOCK_T
#define _CLOCK_T
typedef __kernel_clock_t clock_t;
diff --git a/include/linux/usb/quirks.h b/include/linux/usb/quirks.h
index a1be64c..22c1f57 100644
--- a/include/linux/usb/quirks.h
+++ b/include/linux/usb/quirks.h
@@ -69,4 +69,7 @@
/* Hub needs extra delay after resetting its port. */
#define USB_QUIRK_HUB_SLOW_RESET BIT(14)
+/* device has blacklisted endpoints */
+#define USB_QUIRK_ENDPOINT_BLACKLIST BIT(15)
+
#endif /* __LINUX_USB_QUIRKS_H */
diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index ec38132..0507a162 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -141,8 +141,9 @@ extern int remap_vmalloc_range_partial(struct vm_area_struct *vma,
extern int remap_vmalloc_range(struct vm_area_struct *vma, void *addr,
unsigned long pgoff);
-void vmalloc_sync_all(void);
-
+void vmalloc_sync_mappings(void);
+void vmalloc_sync_unmappings(void);
+
/*
* Lowlevel-APIs (not for driver use!)
*/
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 4261d1c..e48554e 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -487,6 +487,19 @@ extern void wq_worker_comm(char *buf, size_t size, struct task_struct *task);
*
* We queue the work to the CPU on which it was submitted, but if the CPU dies
* it can be processed by another CPU.
+ *
+ * Memory-ordering properties: If it returns %true, guarantees that all stores
+ * preceding the call to queue_work() in the program order will be visible from
+ * the CPU which will execute @work by the time such work executes, e.g.,
+ *
+ * { x is initially 0 }
+ *
+ * CPU0 CPU1
+ *
+ * WRITE_ONCE(x, 1); [ @work is being executed ]
+ * r0 = queue_work(wq, work); r1 = READ_ONCE(x);
+ *
+ * Forbids: r0 == true && r1 == 0
*/
static inline bool queue_work(struct workqueue_struct *wq,
struct work_struct *work)
@@ -546,6 +559,9 @@ static inline bool schedule_work_on(int cpu, struct work_struct *work)
* This puts a job in the kernel-global workqueue if it was not already
* queued and leaves it in the same position on the kernel-global
* workqueue otherwise.
+ *
+ * Shares the same memory-ordering properties of queue_work(), cf. the
+ * DocBook header of queue_work().
*/
static inline bool schedule_work(struct work_struct *work)
{
diff --git a/include/net/af_rxrpc.h b/include/net/af_rxrpc.h
index 1abae3c..04e97ba 100644
--- a/include/net/af_rxrpc.h
+++ b/include/net/af_rxrpc.h
@@ -16,6 +16,12 @@ struct sock;
struct socket;
struct rxrpc_call;
+enum rxrpc_interruptibility {
+ RXRPC_INTERRUPTIBLE, /* Call is interruptible */
+ RXRPC_PREINTERRUPTIBLE, /* Call can be cancelled whilst waiting for a slot */
+ RXRPC_UNINTERRUPTIBLE, /* Call should not be interruptible at all */
+};
+
/*
* Debug ID counter for tracing.
*/
@@ -41,7 +47,7 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *,
gfp_t,
rxrpc_notify_rx_t,
bool,
- bool,
+ enum rxrpc_interruptibility,
unsigned int);
int rxrpc_kernel_send_data(struct socket *, struct rxrpc_call *,
struct msghdr *, size_t,
@@ -58,9 +64,7 @@ int rxrpc_kernel_charge_accept(struct socket *, rxrpc_notify_rx_t,
rxrpc_user_attach_call_t, unsigned long, gfp_t,
unsigned int);
void rxrpc_kernel_set_tx_length(struct socket *, struct rxrpc_call *, s64);
-bool rxrpc_kernel_check_life(const struct socket *, const struct rxrpc_call *,
- u32 *);
-void rxrpc_kernel_probe_life(struct socket *, struct rxrpc_call *);
+bool rxrpc_kernel_check_life(const struct socket *, const struct rxrpc_call *);
u32 rxrpc_kernel_get_epoch(struct socket *, struct rxrpc_call *);
bool rxrpc_kernel_get_reply_time(struct socket *, struct rxrpc_call *,
ktime_t *);
diff --git a/include/net/compat.h b/include/net/compat.h
index f277653..e341260 100644
--- a/include/net/compat.h
+++ b/include/net/compat.h
@@ -38,6 +38,9 @@ struct compat_cmsghdr {
#define compat_mmsghdr mmsghdr
#endif /* defined(CONFIG_COMPAT) */
+int __get_compat_msghdr(struct msghdr *kmsg, struct compat_msghdr __user *umsg,
+ struct sockaddr __user **save_addr, compat_uptr_t *ptr,
+ compat_size_t *len);
int get_compat_msghdr(struct msghdr *, struct compat_msghdr __user *,
struct sockaddr __user **, struct iovec **);
struct sock_fprog __user *get_compat_bpf_fprog(char __user *optval);
diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h
index 54e227e..a259050 100644
--- a/include/net/fib_rules.h
+++ b/include/net/fib_rules.h
@@ -108,6 +108,7 @@ struct fib_rule_notifier_info {
[FRA_OIFNAME] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, \
[FRA_PRIORITY] = { .type = NLA_U32 }, \
[FRA_FWMARK] = { .type = NLA_U32 }, \
+ [FRA_TUN_ID] = { .type = NLA_U64 }, \
[FRA_FWMASK] = { .type = NLA_U32 }, \
[FRA_TABLE] = { .type = NLA_U32 }, \
[FRA_SUPPRESS_PREFIXLEN] = { .type = NLA_U32 }, \
diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h
index d93017a..6283839 100644
--- a/include/net/flow_dissector.h
+++ b/include/net/flow_dissector.h
@@ -5,6 +5,7 @@
#include <linux/types.h>
#include <linux/in6.h>
#include <linux/siphash.h>
+#include <linux/string.h>
#include <uapi/linux/if_ether.h>
struct sk_buff;
@@ -33,7 +34,6 @@ enum flow_dissect_ret {
/**
* struct flow_dissector_key_basic:
- * @thoff: Transport header offset
* @n_proto: Network header protocol (eg. IPv4/IPv6)
* @ip_proto: Transport header protocol (eg. TCP/UDP)
*/
@@ -349,4 +349,12 @@ struct bpf_flow_dissector {
void *data_end;
};
+static inline void
+flow_dissector_init_keys(struct flow_dissector_key_control *key_control,
+ struct flow_dissector_key_basic *key_basic)
+{
+ memset(key_control, 0, sizeof(*key_control));
+ memset(key_basic, 0, sizeof(*key_basic));
+}
+
#endif
diff --git a/include/net/icmp.h b/include/net/icmp.h
index 5d4bfdb..9ac2d26 100644
--- a/include/net/icmp.h
+++ b/include/net/icmp.h
@@ -43,6 +43,12 @@ static inline void icmp_send(struct sk_buff *skb_in, int type, int code, __be32
__icmp_send(skb_in, type, code, info, &IPCB(skb_in)->opt);
}
+#if IS_ENABLED(CONFIG_NF_NAT)
+void icmp_ndo_send(struct sk_buff *skb_in, int type, int code, __be32 info);
+#else
+#define icmp_ndo_send icmp_send
+#endif
+
int icmp_rcv(struct sk_buff *skb);
int icmp_err(struct sk_buff *skb, u32 info);
int icmp_init(void);
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index aa14580..77e6b5a 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -1004,12 +1004,11 @@ ieee80211_rate_get_vht_nss(const struct ieee80211_tx_rate *rate)
struct ieee80211_tx_info {
/* common information */
u32 flags;
- u8 band;
-
- u8 hw_queue;
-
- u16 ack_frame_id:6;
- u16 tx_time_est:10;
+ u32 band:3,
+ ack_frame_id:13,
+ hw_queue:4,
+ tx_time_est:10;
+ /* 2 free bits */
union {
struct {
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 1512087..c30f914 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -675,22 +675,6 @@ void __qdisc_calculate_pkt_len(struct sk_buff *skb,
const struct qdisc_size_table *stab);
int skb_do_redirect(struct sk_buff *);
-static inline void skb_reset_tc(struct sk_buff *skb)
-{
-#ifdef CONFIG_NET_CLS_ACT
- skb->tc_redirected = 0;
-#endif
-}
-
-static inline bool skb_is_tc_redirected(const struct sk_buff *skb)
-{
-#ifdef CONFIG_NET_CLS_ACT
- return skb->tc_redirected;
-#else
- return false;
-#endif
-}
-
static inline bool skb_at_tc_ingress(const struct sk_buff *skb)
{
#ifdef CONFIG_NET_CLS_ACT
diff --git a/include/net/sock.h b/include/net/sock.h
index 02162b0..3285645 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -117,19 +117,26 @@ typedef __u64 __bitwise __addrpair;
* struct sock_common - minimal network layer representation of sockets
* @skc_daddr: Foreign IPv4 addr
* @skc_rcv_saddr: Bound local IPv4 addr
+ * @skc_addrpair: 8-byte-aligned __u64 union of @skc_daddr & @skc_rcv_saddr
* @skc_hash: hash value used with various protocol lookup tables
* @skc_u16hashes: two u16 hash values used by UDP lookup tables
* @skc_dport: placeholder for inet_dport/tw_dport
* @skc_num: placeholder for inet_num/tw_num
+ * @skc_portpair: __u32 union of @skc_dport & @skc_num
* @skc_family: network address family
* @skc_state: Connection state
* @skc_reuse: %SO_REUSEADDR setting
* @skc_reuseport: %SO_REUSEPORT setting
+ * @skc_ipv6only: socket is IPV6 only
+ * @skc_net_refcnt: socket is using net ref counting
* @skc_bound_dev_if: bound device index if != 0
* @skc_bind_node: bind hash linkage for various protocol lookup tables
* @skc_portaddr_node: second hash linkage for UDP/UDP-Lite protocol
* @skc_prot: protocol handlers inside a network family
* @skc_net: reference to the network namespace of this socket
+ * @skc_v6_daddr: IPV6 destination address
+ * @skc_v6_rcv_saddr: IPV6 source address
+ * @skc_cookie: socket's cookie value
* @skc_node: main hash linkage for various protocol lookup tables
* @skc_nulls_node: main hash linkage for TCP/UDP/UDP-Lite protocol
* @skc_tx_queue_mapping: tx queue number for this connection
@@ -137,7 +144,15 @@ typedef __u64 __bitwise __addrpair;
* @skc_flags: place holder for sk_flags
* %SO_LINGER (l_onoff), %SO_BROADCAST, %SO_KEEPALIVE,
* %SO_OOBINLINE settings, %SO_TIMESTAMPING settings
+ * @skc_listener: connection request listener socket (aka rsk_listener)
+ * [union with @skc_flags]
+ * @skc_tw_dr: (aka tw_dr) ptr to &struct inet_timewait_death_row
+ * [union with @skc_flags]
* @skc_incoming_cpu: record/match cpu processing incoming packets
+ * @skc_rcv_wnd: (aka rsk_rcv_wnd) TCP receive window size (possibly scaled)
+ * [union with @skc_incoming_cpu]
+ * @skc_tw_rcv_nxt: (aka tw_rcv_nxt) TCP window next expected seq number
+ * [union with @skc_incoming_cpu]
* @skc_refcnt: reference count
*
* This is the minimal network layer representation of sockets, the header
@@ -245,6 +260,7 @@ struct bpf_sk_storage;
* @sk_dst_cache: destination cache
* @sk_dst_pending_confirm: need to confirm neighbour
* @sk_policy: flow policy
+ * @sk_rx_skb_cache: cache copy of recently accessed RX skb
* @sk_receive_queue: incoming packets
* @sk_wmem_alloc: transmit queue bytes committed
* @sk_tsq_flags: TCP Small Queues flags
@@ -265,6 +281,8 @@ struct bpf_sk_storage;
* @sk_no_check_rx: allow zero checksum in RX packets
* @sk_route_caps: route capabilities (e.g. %NETIF_F_TSO)
* @sk_route_nocaps: forbidden route capabilities (e.g NETIF_F_GSO_MASK)
+ * @sk_route_forced_caps: static, forced route capabilities
+ * (set in tcp_init_sock())
* @sk_gso_type: GSO type (e.g. %SKB_GSO_TCPV4)
* @sk_gso_max_size: Maximum GSO segment size to build
* @sk_gso_max_segs: Maximum number of GSO segments
@@ -303,6 +321,8 @@ struct bpf_sk_storage;
* @sk_frag: cached page frag
* @sk_peek_off: current peek_offset value
* @sk_send_head: front of stuff to transmit
+ * @tcp_rtx_queue: TCP re-transmit queue [union with @sk_send_head]
+ * @sk_tx_skb_cache: cache copy of recently accessed TX skb
* @sk_security: used by security modules
* @sk_mark: generic packet mark
* @sk_cgrp_data: cgroup data for this cgroup
@@ -313,11 +333,14 @@ struct bpf_sk_storage;
* @sk_write_space: callback to indicate there is bf sending space available
* @sk_error_report: callback to indicate errors (e.g. %MSG_ERRQUEUE)
* @sk_backlog_rcv: callback to process the backlog
+ * @sk_validate_xmit_skb: ptr to an optional validate function
* @sk_destruct: called at sock freeing time, i.e. when all refcnt == 0
* @sk_reuseport_cb: reuseport group container
+ * @sk_bpf_storage: ptr to cache and control for bpf_sk_storage
* @sk_rcu: used during RCU grace period
* @sk_clockid: clockid used by time-based scheduling (SO_TXTIME)
* @sk_txtime_deadline_mode: set deadline mode for SO_TXTIME
+ * @sk_txtime_report_errors: set report errors mode for SO_TXTIME
* @sk_txtime_unused: unused txtime flags
*/
struct sock {
@@ -393,7 +416,9 @@ struct sock {
struct sk_filter __rcu *sk_filter;
union {
struct socket_wq __rcu *sk_wq;
+ /* private: */
struct socket_wq *sk_wq_raw;
+ /* public: */
};
#ifdef CONFIG_XFRM
struct xfrm_policy __rcu *sk_policy[2];
@@ -2017,7 +2042,7 @@ static inline int skb_copy_to_page_nocache(struct sock *sk, struct iov_iter *fro
* sk_wmem_alloc_get - returns write allocations
* @sk: socket
*
- * Returns sk_wmem_alloc minus initial offset of one
+ * Return: sk_wmem_alloc minus initial offset of one
*/
static inline int sk_wmem_alloc_get(const struct sock *sk)
{
@@ -2028,7 +2053,7 @@ static inline int sk_wmem_alloc_get(const struct sock *sk)
* sk_rmem_alloc_get - returns read allocations
* @sk: socket
*
- * Returns sk_rmem_alloc
+ * Return: sk_rmem_alloc
*/
static inline int sk_rmem_alloc_get(const struct sock *sk)
{
@@ -2039,7 +2064,7 @@ static inline int sk_rmem_alloc_get(const struct sock *sk)
* sk_has_allocations - check if allocations are outstanding
* @sk: socket
*
- * Returns true if socket has write or read allocations
+ * Return: true if socket has write or read allocations
*/
static inline bool sk_has_allocations(const struct sock *sk)
{
@@ -2050,7 +2075,7 @@ static inline bool sk_has_allocations(const struct sock *sk)
* skwq_has_sleeper - check if there are any waiting processes
* @wq: struct socket_wq
*
- * Returns true if socket_wq has waiting processes
+ * Return: true if socket_wq has waiting processes
*
* The purpose of the skwq_has_sleeper and sock_poll_wait is to wrap the memory
* barrier call. They were added due to the race found within the tcp code.
@@ -2238,6 +2263,9 @@ struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp,
* gfpflags_allow_blocking() isn't enough here as direct reclaim may nest
* inside other socket operations and end up recursing into sk_page_frag()
* while it's already in use.
+ *
+ * Return: a per task page_frag if context allows that,
+ * otherwise a per socket one.
*/
static inline struct page_frag *sk_page_frag(struct sock *sk)
{
@@ -2432,6 +2460,7 @@ static inline void skb_setup_tx_timestamp(struct sk_buff *skb, __u16 tsflags)
&skb_shinfo(skb)->tskey);
}
+DECLARE_STATIC_KEY_FALSE(tcp_rx_skb_cache_key);
/**
* sk_eat_skb - Release a skb if it is no longer needed
* @sk: socket to eat this skb from
@@ -2440,7 +2469,6 @@ static inline void skb_setup_tx_timestamp(struct sk_buff *skb, __u16 tsflags)
* This routine must be called with interrupts disabled or with the socket
* locked so that the sk_buff queue operation is ok.
*/
-DECLARE_STATIC_KEY_FALSE(tcp_rx_skb_cache_key);
static inline void sk_eat_skb(struct sock *sk, struct sk_buff *skb)
{
__skb_unlink(skb, &sk->sk_receive_queue);
diff --git a/include/scsi/iscsi_proto.h b/include/scsi/iscsi_proto.h
index 533f567..b71b5c4f 100644
--- a/include/scsi/iscsi_proto.h
+++ b/include/scsi/iscsi_proto.h
@@ -627,7 +627,6 @@ struct iscsi_reject {
#define ISCSI_REASON_BOOKMARK_INVALID 9
#define ISCSI_REASON_BOOKMARK_NO_RESOURCES 10
#define ISCSI_REASON_NEGOTIATION_RESET 11
-#define ISCSI_REASON_WAITING_FOR_LOGOUT 12
/* Max. number of Key=Value pairs in a text message */
#define MAX_KEY_VALUE_PAIRS 8192
diff --git a/include/scsi/scsicam.h b/include/scsi/scsicam.h
index 57c7292..08edd60 100644
--- a/include/scsi/scsicam.h
+++ b/include/scsi/scsicam.h
@@ -13,8 +13,7 @@
#ifndef SCSICAM_H
#define SCSICAM_H
-extern int scsicam_bios_param (struct block_device *bdev, sector_t capacity, int *ip);
-extern int scsi_partsize(unsigned char *buf, unsigned long capacity,
- unsigned int *cyls, unsigned int *hds, unsigned int *secs);
-extern unsigned char *scsi_bios_ptable(struct block_device *bdev);
+int scsicam_bios_param(struct block_device *bdev, sector_t capacity, int *ip);
+bool scsi_partsize(struct block_device *bdev, sector_t capacity, int geom[3]);
+unsigned char *scsi_bios_ptable(struct block_device *bdev);
#endif /* def SCSICAM_H */
diff --git a/include/soc/mscc/ocelot_dev.h b/include/soc/mscc/ocelot_dev.h
index 0a50d53..7c08437 100644
--- a/include/soc/mscc/ocelot_dev.h
+++ b/include/soc/mscc/ocelot_dev.h
@@ -74,7 +74,7 @@
#define DEV_MAC_TAGS_CFG_TAG_ID_M GENMASK(31, 16)
#define DEV_MAC_TAGS_CFG_TAG_ID_X(x) (((x) & GENMASK(31, 16)) >> 16)
#define DEV_MAC_TAGS_CFG_VLAN_LEN_AWR_ENA BIT(2)
-#define DEV_MAC_TAGS_CFG_PB_ENA BIT(1)
+#define DEV_MAC_TAGS_CFG_VLAN_DBL_AWR_ENA BIT(1)
#define DEV_MAC_TAGS_CFG_VLAN_AWR_ENA BIT(0)
#define DEV_MAC_ADV_CHK_CFG 0x2c
diff --git a/include/sound/rawmidi.h b/include/sound/rawmidi.h
index 40ab204..a36b722 100644
--- a/include/sound/rawmidi.h
+++ b/include/sound/rawmidi.h
@@ -77,9 +77,9 @@ struct snd_rawmidi_substream {
struct list_head list; /* list of all substream for given stream */
int stream; /* direction */
int number; /* substream number */
- unsigned int opened: 1, /* open flag */
- append: 1, /* append flag (merge more streams) */
- active_sensing: 1; /* send active sensing when close */
+ bool opened; /* open flag */
+ bool append; /* append flag (merge more streams) */
+ bool active_sensing; /* send active sensing when close */
int use_count; /* use counter (for output) */
size_t bytes;
struct snd_rawmidi *rmidi;
diff --git a/include/sound/soc-dapm.h b/include/sound/soc-dapm.h
index 2a306c6..1b6afbc 100644
--- a/include/sound/soc-dapm.h
+++ b/include/sound/soc-dapm.h
@@ -392,8 +392,6 @@ int snd_soc_dapm_get_enum_double(struct snd_kcontrol *kcontrol,
struct snd_ctl_elem_value *ucontrol);
int snd_soc_dapm_put_enum_double(struct snd_kcontrol *kcontrol,
struct snd_ctl_elem_value *ucontrol);
-int snd_soc_dapm_put_enum_double_locked(struct snd_kcontrol *kcontrol,
- struct snd_ctl_elem_value *ucontrol);
int snd_soc_dapm_info_pin_switch(struct snd_kcontrol *kcontrol,
struct snd_ctl_elem_info *uinfo);
int snd_soc_dapm_get_pin_switch(struct snd_kcontrol *kcontrol,
diff --git a/include/sound/soc.h b/include/sound/soc.h
index f0e4f36..8a22666 100644
--- a/include/sound/soc.h
+++ b/include/sound/soc.h
@@ -1157,7 +1157,7 @@ struct snd_soc_pcm_runtime {
((i) < rtd->num_codecs) && ((dai) = rtd->codec_dais[i]); \
(i)++)
#define for_each_rtd_codec_dai_rollback(rtd, i, dai) \
- for (; ((--i) >= 0) && ((dai) = rtd->codec_dais[i]);)
+ for (; (--(i) >= 0) && ((dai) = rtd->codec_dais[i]);)
void snd_soc_close_delayed_work(struct snd_soc_pcm_runtime *rtd);
diff --git a/include/trace/events/afs.h b/include/trace/events/afs.h
index 564ba1b..c612cab 100644
--- a/include/trace/events/afs.h
+++ b/include/trace/events/afs.h
@@ -233,7 +233,7 @@ enum afs_cb_break_reason {
EM(afs_call_trace_get, "GET ") \
EM(afs_call_trace_put, "PUT ") \
EM(afs_call_trace_wake, "WAKE ") \
- E_(afs_call_trace_work, "WORK ")
+ E_(afs_call_trace_work, "QUEUE")
#define afs_server_traces \
EM(afs_server_trace_alloc, "ALLOC ") \
diff --git a/include/trace/events/io_uring.h b/include/trace/events/io_uring.h
index 27bd9e4..9f0d3b7 100644
--- a/include/trace/events/io_uring.h
+++ b/include/trace/events/io_uring.h
@@ -357,6 +357,109 @@ TRACE_EVENT(io_uring_submit_sqe,
__entry->force_nonblock, __entry->sq_thread)
);
+TRACE_EVENT(io_uring_poll_arm,
+
+ TP_PROTO(void *ctx, u8 opcode, u64 user_data, int mask, int events),
+
+ TP_ARGS(ctx, opcode, user_data, mask, events),
+
+ TP_STRUCT__entry (
+ __field( void *, ctx )
+ __field( u8, opcode )
+ __field( u64, user_data )
+ __field( int, mask )
+ __field( int, events )
+ ),
+
+ TP_fast_assign(
+ __entry->ctx = ctx;
+ __entry->opcode = opcode;
+ __entry->user_data = user_data;
+ __entry->mask = mask;
+ __entry->events = events;
+ ),
+
+ TP_printk("ring %p, op %d, data 0x%llx, mask 0x%x, events 0x%x",
+ __entry->ctx, __entry->opcode,
+ (unsigned long long) __entry->user_data,
+ __entry->mask, __entry->events)
+);
+
+TRACE_EVENT(io_uring_poll_wake,
+
+ TP_PROTO(void *ctx, u8 opcode, u64 user_data, int mask),
+
+ TP_ARGS(ctx, opcode, user_data, mask),
+
+ TP_STRUCT__entry (
+ __field( void *, ctx )
+ __field( u8, opcode )
+ __field( u64, user_data )
+ __field( int, mask )
+ ),
+
+ TP_fast_assign(
+ __entry->ctx = ctx;
+ __entry->opcode = opcode;
+ __entry->user_data = user_data;
+ __entry->mask = mask;
+ ),
+
+ TP_printk("ring %p, op %d, data 0x%llx, mask 0x%x",
+ __entry->ctx, __entry->opcode,
+ (unsigned long long) __entry->user_data,
+ __entry->mask)
+);
+
+TRACE_EVENT(io_uring_task_add,
+
+ TP_PROTO(void *ctx, u8 opcode, u64 user_data, int mask),
+
+ TP_ARGS(ctx, opcode, user_data, mask),
+
+ TP_STRUCT__entry (
+ __field( void *, ctx )
+ __field( u8, opcode )
+ __field( u64, user_data )
+ __field( int, mask )
+ ),
+
+ TP_fast_assign(
+ __entry->ctx = ctx;
+ __entry->opcode = opcode;
+ __entry->user_data = user_data;
+ __entry->mask = mask;
+ ),
+
+ TP_printk("ring %p, op %d, data 0x%llx, mask %x",
+ __entry->ctx, __entry->opcode,
+ (unsigned long long) __entry->user_data,
+ __entry->mask)
+);
+
+TRACE_EVENT(io_uring_task_run,
+
+ TP_PROTO(void *ctx, u8 opcode, u64 user_data),
+
+ TP_ARGS(ctx, opcode, user_data),
+
+ TP_STRUCT__entry (
+ __field( void *, ctx )
+ __field( u8, opcode )
+ __field( u64, user_data )
+ ),
+
+ TP_fast_assign(
+ __entry->ctx = ctx;
+ __entry->opcode = opcode;
+ __entry->user_data = user_data;
+ ),
+
+ TP_printk("ring %p, op %d, data 0x%llx",
+ __entry->ctx, __entry->opcode,
+ (unsigned long long) __entry->user_data)
+);
+
#endif /* _TRACE_IO_URING_H */
/* This part must be outside protection */
diff --git a/include/uapi/asm-generic/posix_types.h b/include/uapi/asm-generic/posix_types.h
index 2f9c805..b5f7594 100644
--- a/include/uapi/asm-generic/posix_types.h
+++ b/include/uapi/asm-generic/posix_types.h
@@ -87,7 +87,9 @@ typedef struct {
typedef __kernel_long_t __kernel_off_t;
typedef long long __kernel_loff_t;
typedef __kernel_long_t __kernel_old_time_t;
+#ifndef __KERNEL__
typedef __kernel_long_t __kernel_time_t;
+#endif
typedef long long __kernel_time64_t;
typedef __kernel_long_t __kernel_clock_t;
typedef int __kernel_timer_t;
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index f1d74a2..22f2352 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -1045,9 +1045,9 @@ union bpf_attr {
* supports redirection to the egress interface, and accepts no
* flag at all.
*
- * The same effect can be attained with the more generic
- * **bpf_redirect_map**\ (), which requires specific maps to be
- * used but offers better performance.
+ * The same effect can also be attained with the more generic
+ * **bpf_redirect_map**\ (), which uses a BPF map to store the
+ * redirect target instead of providing it directly to the helper.
* Return
* For XDP, the helper returns **XDP_REDIRECT** on success or
* **XDP_ABORTED** on error. For other program types, the values
@@ -1611,13 +1611,11 @@ union bpf_attr {
* the caller. Any higher bits in the *flags* argument must be
* unset.
*
- * When used to redirect packets to net devices, this helper
- * provides a high performance increase over **bpf_redirect**\ ().
- * This is due to various implementation details of the underlying
- * mechanisms, one of which is the fact that **bpf_redirect_map**\
- * () tries to send packet as a "bulk" to the device.
+ * See also bpf_redirect(), which only supports redirecting to an
+ * ifindex, but doesn't require a map to do so.
* Return
- * **XDP_REDIRECT** on success, or **XDP_ABORTED** on error.
+ * **XDP_REDIRECT** on success, or the value of the two lower bits
+ * of the **flags* argument on error.
*
* int bpf_sk_redirect_map(struct sk_buff *skb, struct bpf_map *map, u32 key, u64 flags)
* Description
diff --git a/include/uapi/linux/dm-ioctl.h b/include/uapi/linux/dm-ioctl.h
index 2df8cec..6622912 100644
--- a/include/uapi/linux/dm-ioctl.h
+++ b/include/uapi/linux/dm-ioctl.h
@@ -272,9 +272,9 @@ enum {
#define DM_DEV_SET_GEOMETRY _IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl)
#define DM_VERSION_MAJOR 4
-#define DM_VERSION_MINOR 41
+#define DM_VERSION_MINOR 42
#define DM_VERSION_PATCHLEVEL 0
-#define DM_VERSION_EXTRA "-ioctl (2019-09-16)"
+#define DM_VERSION_EXTRA "-ioctl (2020-02-27)"
/* Status bits */
#define DM_READONLY_FLAG (1 << 0) /* In/Out */
diff --git a/include/uapi/linux/fdreg.h b/include/uapi/linux/fdreg.h
index 5e2981d..1318881 100644
--- a/include/uapi/linux/fdreg.h
+++ b/include/uapi/linux/fdreg.h
@@ -7,26 +7,18 @@
* Handbook", Sanches and Canton.
*/
-#ifdef FDPATCHES
-#define FD_IOPORT fdc_state[fdc].address
-#else
-/* It would be a lot saner just to force fdc_state[fdc].address to always
- be set ! FIXME */
-#define FD_IOPORT 0x3f0
-#endif
-
/* Fd controller regs. S&C, about page 340 */
-#define FD_STATUS (4 + FD_IOPORT )
-#define FD_DATA (5 + FD_IOPORT )
+#define FD_STATUS 4
+#define FD_DATA 5
/* Digital Output Register */
-#define FD_DOR (2 + FD_IOPORT )
+#define FD_DOR 2
/* Digital Input Register (read) */
-#define FD_DIR (7 + FD_IOPORT )
+#define FD_DIR 7
/* Diskette Control Register (write)*/
-#define FD_DCR (7 + FD_IOPORT )
+#define FD_DCR 7
/* Bits of main status register */
#define STATUS_BUSYMASK 0x0F /* drive busy mask */
diff --git a/include/uapi/linux/in.h b/include/uapi/linux/in.h
index 1521073..8533bf07 100644
--- a/include/uapi/linux/in.h
+++ b/include/uapi/linux/in.h
@@ -74,6 +74,8 @@ enum {
#define IPPROTO_UDPLITE IPPROTO_UDPLITE
IPPROTO_MPLS = 137, /* MPLS in IP (RFC 4023) */
#define IPPROTO_MPLS IPPROTO_MPLS
+ IPPROTO_ETHERNET = 143, /* Ethernet-within-IPv6 Encapsulation */
+#define IPPROTO_ETHERNET IPPROTO_ETHERNET
IPPROTO_RAW = 255, /* Raw IP packets */
#define IPPROTO_RAW IPPROTO_RAW
IPPROTO_MPTCP = 262, /* Multipath TCP connection */
diff --git a/include/uapi/linux/input-event-codes.h b/include/uapi/linux/input-event-codes.h
index 0f1db1c..6923dc7 100644
--- a/include/uapi/linux/input-event-codes.h
+++ b/include/uapi/linux/input-event-codes.h
@@ -652,6 +652,9 @@
/* Electronic privacy screen control */
#define KEY_PRIVACY_SCREEN_TOGGLE 0x279
+/* Select an area of screen to be copied */
+#define KEY_SELECTIVE_SCREENSHOT 0x27a
+
/*
* Some keyboards have keys which do not have a defined meaning, these keys
* are intended to be programmed / bound to macros by the user. For most
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 3f7961c..e48d746 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/* SPDX-License-Identifier: (GPL-2.0 WITH Linux-syscall-note) OR MIT */
/*
* Header file for the io_uring interface.
*
@@ -23,7 +23,10 @@ struct io_uring_sqe {
__u64 off; /* offset into file */
__u64 addr2;
};
- __u64 addr; /* pointer to buffer or iovecs */
+ union {
+ __u64 addr; /* pointer to buffer or iovecs */
+ __u64 splice_off_in;
+ };
__u32 len; /* buffer size or number of iovecs */
union {
__kernel_rwf_t rw_flags;
@@ -37,14 +40,21 @@ struct io_uring_sqe {
__u32 open_flags;
__u32 statx_flags;
__u32 fadvise_advice;
+ __u32 splice_flags;
};
__u64 user_data; /* data to be passed back at completion time */
union {
struct {
- /* index into fixed buffers, if used */
- __u16 buf_index;
+ /* pack this to avoid bogus arm OABI complaints */
+ union {
+ /* index into fixed buffers, if used */
+ __u16 buf_index;
+ /* for grouped buffer selection */
+ __u16 buf_group;
+ } __attribute__((packed));
/* personality to use, if used */
__u16 personality;
+ __s32 splice_fd_in;
};
__u64 __pad2[3];
};
@@ -56,6 +66,7 @@ enum {
IOSQE_IO_LINK_BIT,
IOSQE_IO_HARDLINK_BIT,
IOSQE_ASYNC_BIT,
+ IOSQE_BUFFER_SELECT_BIT,
};
/*
@@ -71,6 +82,8 @@ enum {
#define IOSQE_IO_HARDLINK (1U << IOSQE_IO_HARDLINK_BIT)
/* always go async */
#define IOSQE_ASYNC (1U << IOSQE_ASYNC_BIT)
+/* select buffer from sqe->buf_group */
+#define IOSQE_BUFFER_SELECT (1U << IOSQE_BUFFER_SELECT_BIT)
/*
* io_uring_setup() flags
@@ -113,6 +126,9 @@ enum {
IORING_OP_RECV,
IORING_OP_OPENAT2,
IORING_OP_EPOLL_CTL,
+ IORING_OP_SPLICE,
+ IORING_OP_PROVIDE_BUFFERS,
+ IORING_OP_REMOVE_BUFFERS,
/* this goes last, obviously */
IORING_OP_LAST,
@@ -129,6 +145,12 @@ enum {
#define IORING_TIMEOUT_ABS (1U << 0)
/*
+ * sqe->splice_flags
+ * extends splice(2) flags
+ */
+#define SPLICE_F_FD_IN_FIXED (1U << 31) /* the last bit of __u32 */
+
+/*
* IO completion data structure (Completion Queue Entry)
*/
struct io_uring_cqe {
@@ -138,6 +160,17 @@ struct io_uring_cqe {
};
/*
+ * cqe->flags
+ *
+ * IORING_CQE_F_BUFFER If set, the upper 16 bits are the buffer ID
+ */
+#define IORING_CQE_F_BUFFER (1U << 0)
+
+enum {
+ IORING_CQE_BUFFER_SHIFT = 16,
+};
+
+/*
* Magic offsets for the application to mmap the data it needs
*/
#define IORING_OFF_SQ_RING 0ULL
@@ -204,6 +237,7 @@ struct io_uring_params {
#define IORING_FEAT_SUBMIT_STABLE (1U << 2)
#define IORING_FEAT_RW_CUR_POS (1U << 3)
#define IORING_FEAT_CUR_PERSONALITY (1U << 4)
+#define IORING_FEAT_FAST_POLL (1U << 5)
/*
* io_uring_register(2) opcodes and arguments
diff --git a/include/uapi/linux/netfilter/nf_conntrack_common.h b/include/uapi/linux/netfilter/nf_conntrack_common.h
index 336014b..b6f0bb1 100644
--- a/include/uapi/linux/netfilter/nf_conntrack_common.h
+++ b/include/uapi/linux/netfilter/nf_conntrack_common.h
@@ -97,6 +97,15 @@ enum ip_conntrack_status {
IPS_UNTRACKED_BIT = 12,
IPS_UNTRACKED = (1 << IPS_UNTRACKED_BIT),
+#ifdef __KERNEL__
+ /* Re-purposed for in-kernel use:
+ * Tags a conntrack entry that clashed with an existing entry
+ * on insert.
+ */
+ IPS_NAT_CLASH_BIT = IPS_UNTRACKED_BIT,
+ IPS_NAT_CLASH = IPS_UNTRACKED,
+#endif
+
/* Conntrack got a helper explicitly attached via CT target. */
IPS_HELPER_BIT = 13,
IPS_HELPER = (1 << IPS_HELPER_BIT),
@@ -110,7 +119,8 @@ enum ip_conntrack_status {
*/
IPS_UNCHANGEABLE_MASK = (IPS_NAT_DONE_MASK | IPS_NAT_MASK |
IPS_EXPECTED | IPS_CONFIRMED | IPS_DYING |
- IPS_SEQ_ADJUST | IPS_TEMPLATE | IPS_OFFLOAD),
+ IPS_SEQ_ADJUST | IPS_TEMPLATE | IPS_UNTRACKED |
+ IPS_OFFLOAD),
__IPS_MAX_BIT = 15,
};
diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h
index be84d87..c173545 100644
--- a/include/uapi/linux/seccomp.h
+++ b/include/uapi/linux/seccomp.h
@@ -22,6 +22,7 @@
#define SECCOMP_FILTER_FLAG_LOG (1UL << 1)
#define SECCOMP_FILTER_FLAG_SPEC_ALLOW (1UL << 2)
#define SECCOMP_FILTER_FLAG_NEW_LISTENER (1UL << 3)
+#define SECCOMP_FILTER_FLAG_TSYNC_ESRCH (1UL << 4)
/*
* All BPF programs must return a 32-bit value.
diff --git a/include/uapi/linux/serio.h b/include/uapi/linux/serio.h
index 50e9919..ed2a96f 100644
--- a/include/uapi/linux/serio.h
+++ b/include/uapi/linux/serio.h
@@ -9,7 +9,7 @@
#ifndef _UAPI_SERIO_H
#define _UAPI_SERIO_H
-
+#include <linux/const.h>
#include <linux/ioctl.h>
#define SPIOCSTYPE _IOW('q', 0x01, unsigned long)
@@ -18,10 +18,10 @@
/*
* bit masks for use in "interrupt" flags (3rd argument)
*/
-#define SERIO_TIMEOUT BIT(0)
-#define SERIO_PARITY BIT(1)
-#define SERIO_FRAME BIT(2)
-#define SERIO_OOB_DATA BIT(3)
+#define SERIO_TIMEOUT _BITUL(0)
+#define SERIO_PARITY _BITUL(1)
+#define SERIO_FRAME _BITUL(2)
+#define SERIO_OOB_DATA _BITUL(3)
/*
* Serio types
diff --git a/include/uapi/linux/swab.h b/include/uapi/linux/swab.h
index fa7f97d..7272f85 100644
--- a/include/uapi/linux/swab.h
+++ b/include/uapi/linux/swab.h
@@ -135,9 +135,9 @@ static inline __attribute_const__ __u32 __fswahb32(__u32 val)
static __always_inline unsigned long __swab(const unsigned long y)
{
-#if BITS_PER_LONG == 64
+#if __BITS_PER_LONG == 64
return __swab64(y);
-#else /* BITS_PER_LONG == 32 */
+#else /* __BITS_PER_LONG == 32 */
return __swab32(y);
#endif
}
diff --git a/include/uapi/linux/time.h b/include/uapi/linux/time.h
index a655aa2..4f4b6e4 100644
--- a/include/uapi/linux/time.h
+++ b/include/uapi/linux/time.h
@@ -5,6 +5,7 @@
#include <linux/types.h>
#include <linux/time_types.h>
+#ifndef __KERNEL__
#ifndef _STRUCT_TIMESPEC
#define _STRUCT_TIMESPEC
struct timespec {
@@ -18,6 +19,17 @@ struct timeval {
__kernel_suseconds_t tv_usec; /* microseconds */
};
+struct itimerspec {
+ struct timespec it_interval;/* timer period */
+ struct timespec it_value; /* timer expiration */
+};
+
+struct itimerval {
+ struct timeval it_interval;/* timer interval */
+ struct timeval it_value; /* current value */
+};
+#endif
+
struct timezone {
int tz_minuteswest; /* minutes west of Greenwich */
int tz_dsttime; /* type of dst correction */
@@ -31,16 +43,6 @@ struct timezone {
#define ITIMER_VIRTUAL 1
#define ITIMER_PROF 2
-struct itimerspec {
- struct timespec it_interval; /* timer period */
- struct timespec it_value; /* timer expiration */
-};
-
-struct itimerval {
- struct timeval it_interval; /* timer interval */
- struct timeval it_value; /* current value */
-};
-
/*
* The IDs of the various system clocks (for POSIX.1b interval timers):
*/
diff --git a/include/uapi/linux/usb/charger.h b/include/uapi/linux/usb/charger.h
index 5f72af3..ad22079 100644
--- a/include/uapi/linux/usb/charger.h
+++ b/include/uapi/linux/usb/charger.h
@@ -14,18 +14,18 @@
* ACA (Accessory Charger Adapters)
*/
enum usb_charger_type {
- UNKNOWN_TYPE,
- SDP_TYPE,
- DCP_TYPE,
- CDP_TYPE,
- ACA_TYPE,
+ UNKNOWN_TYPE = 0,
+ SDP_TYPE = 1,
+ DCP_TYPE = 2,
+ CDP_TYPE = 3,
+ ACA_TYPE = 4,
};
/* USB charger state */
enum usb_charger_state {
- USB_CHARGER_DEFAULT,
- USB_CHARGER_PRESENT,
- USB_CHARGER_ABSENT,
+ USB_CHARGER_DEFAULT = 0,
+ USB_CHARGER_PRESENT = 1,
+ USB_CHARGER_ABSENT = 2,
};
#endif /* _UAPI__LINUX_USB_CHARGER_H */
diff --git a/include/xen/interface/io/tpmif.h b/include/xen/interface/io/tpmif.h
index 28e7dcd..f8aa8ba 100644
--- a/include/xen/interface/io/tpmif.h
+++ b/include/xen/interface/io/tpmif.h
@@ -46,7 +46,7 @@ struct vtpm_shared_page {
uint8_t pad;
uint8_t nr_extra_pages; /* extra pages for long packets; may be zero */
- uint32_t extra_pages[0]; /* grant IDs; length in nr_extra_pages */
+ uint32_t extra_pages[]; /* grant IDs; length in nr_extra_pages */
};
#endif
diff --git a/include/xen/xenbus.h b/include/xen/xenbus.h
index 89a8895..850a43b 100644
--- a/include/xen/xenbus.h
+++ b/include/xen/xenbus.h
@@ -42,6 +42,7 @@
#include <linux/completion.h>
#include <linux/init.h>
#include <linux/slab.h>
+#include <linux/semaphore.h>
#include <xen/interface/xen.h>
#include <xen/interface/grant_table.h>
#include <xen/interface/io/xenbus.h>
@@ -76,7 +77,7 @@ struct xenbus_device {
enum xenbus_state state;
struct completion down;
struct work_struct work;
- spinlock_t reclaim_lock;
+ struct semaphore reclaim_sem;
};
static inline struct xenbus_device *to_xenbus_device(struct device *dev)
diff --git a/init/Kconfig b/init/Kconfig
index cfee56c..4f717bf 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -767,8 +767,7 @@
bool
config CC_HAS_INT128
- def_bool y
- depends on !$(cc-option,-D__SIZEOF_INT128__=0)
+ def_bool !$(cc-option,$(m64-flag) -D__SIZEOF_INT128__=0) && 64BIT
#
# For architectures that know their GCC __int128 support is sound
@@ -1226,14 +1225,12 @@
config BOOT_CONFIG
bool "Boot config support"
- depends on BLK_DEV_INITRD
- select LIBXBC
- default y
+ select BLK_DEV_INITRD
help
Extra boot config allows system admin to pass a config file as
complemental extension of kernel cmdline when booting.
The boot config file must be attached at the end of initramfs
- with checksum and size.
+ with checksum, size and magic word.
See <file:Documentation/admin-guide/bootconfig.rst> for details.
If unsure, say Y.
diff --git a/init/do_mounts.c b/init/do_mounts.c
index 0ae9cc2..29d326b 100644
--- a/init/do_mounts.c
+++ b/init/do_mounts.c
@@ -429,12 +429,10 @@ void __init mount_block_root(char *name, int flags)
struct page *page = alloc_page(GFP_KERNEL);
char *fs_names = page_address(page);
char *p;
-#ifdef CONFIG_BLOCK
char b[BDEVNAME_SIZE];
-#else
- const char *b = name;
-#endif
+ scnprintf(b, BDEVNAME_SIZE, "unknown-block(%u,%u)",
+ MAJOR(ROOT_DEV), MINOR(ROOT_DEV));
get_fs_names(fs_names);
retry:
for (p = fs_names; *p; p += strlen(p)+1) {
@@ -451,9 +449,6 @@ void __init mount_block_root(char *name, int flags)
* and bad superblock on root device.
* and give them a list of the available devices
*/
-#ifdef CONFIG_BLOCK
- __bdevname(ROOT_DEV, b);
-#endif
printk("VFS: Cannot open root device \"%s\" or %s: error %d\n",
root_device_name, b, err);
printk("Please append a correct \"root=\" boot option; here are the available partitions:\n");
@@ -476,9 +471,6 @@ void __init mount_block_root(char *name, int flags)
for (p = fs_names; *p; p += strlen(p)+1)
printk(" %s", p);
printk("\n");
-#ifdef CONFIG_BLOCK
- __bdevname(ROOT_DEV, b);
-#endif
panic("VFS: Unable to mount root fs on %s", b);
out:
put_page(page);
diff --git a/init/main.c b/init/main.c
index cc0ee48..ee4947a 100644
--- a/init/main.c
+++ b/init/main.c
@@ -142,6 +142,15 @@ static char *extra_command_line;
/* Extra init arguments */
static char *extra_init_args;
+#ifdef CONFIG_BOOT_CONFIG
+/* Is bootconfig on command line? */
+static bool bootconfig_found;
+static bool initargs_found;
+#else
+# define bootconfig_found false
+# define initargs_found false
+#endif
+
static char *execute_command;
static char *ramdisk_execute_command;
@@ -259,7 +268,6 @@ static int __init xbc_snprint_cmdline(char *buf, size_t size,
{
struct xbc_node *knode, *vnode;
char *end = buf + size;
- char c = '\"';
const char *val;
int ret;
@@ -270,25 +278,20 @@ static int __init xbc_snprint_cmdline(char *buf, size_t size,
return ret;
vnode = xbc_node_get_child(knode);
- ret = snprintf(buf, rest(buf, end), "%s%c", xbc_namebuf,
- vnode ? '=' : ' ');
- if (ret < 0)
- return ret;
- buf += ret;
- if (!vnode)
- continue;
-
- c = '\"';
- xbc_array_for_each_value(vnode, val) {
- ret = snprintf(buf, rest(buf, end), "%c%s", c, val);
+ if (!vnode) {
+ ret = snprintf(buf, rest(buf, end), "%s ", xbc_namebuf);
if (ret < 0)
return ret;
buf += ret;
- c = ',';
+ continue;
}
- if (rest(buf, end) > 2)
- strcpy(buf, "\" ");
- buf += 2;
+ xbc_array_for_each_value(vnode, val) {
+ ret = snprintf(buf, rest(buf, end), "%s=\"%s\" ",
+ xbc_namebuf, val);
+ if (ret < 0)
+ return ret;
+ buf += ret;
+ }
}
return buf - (end - size);
@@ -326,7 +329,7 @@ static char * __init xbc_make_cmdline(const char *key)
return new_cmdline;
}
-u32 boot_config_checksum(unsigned char *p, u32 size)
+static u32 boot_config_checksum(unsigned char *p, u32 size)
{
u32 ret = 0;
@@ -336,23 +339,40 @@ u32 boot_config_checksum(unsigned char *p, u32 size)
return ret;
}
+static int __init bootconfig_params(char *param, char *val,
+ const char *unused, void *arg)
+{
+ if (strcmp(param, "bootconfig") == 0) {
+ bootconfig_found = true;
+ } else if (strcmp(param, "--") == 0) {
+ initargs_found = true;
+ }
+ return 0;
+}
+
static void __init setup_boot_config(const char *cmdline)
{
+ static char tmp_cmdline[COMMAND_LINE_SIZE] __initdata;
u32 size, csum;
char *data, *copy;
- const char *p;
u32 *hdr;
int ret;
- p = strstr(cmdline, "bootconfig");
- if (!p || (p != cmdline && !isspace(*(p-1))) ||
- (p[10] && !isspace(p[10])))
+ strlcpy(tmp_cmdline, boot_command_line, COMMAND_LINE_SIZE);
+ parse_args("bootconfig", tmp_cmdline, NULL, 0, 0, 0, NULL,
+ bootconfig_params);
+
+ if (!bootconfig_found)
return;
if (!initrd_end)
goto not_found;
- hdr = (u32 *)(initrd_end - 8);
+ data = (char *)initrd_end - BOOTCONFIG_MAGIC_LEN;
+ if (memcmp(data, BOOTCONFIG_MAGIC, BOOTCONFIG_MAGIC_LEN))
+ goto not_found;
+
+ hdr = (u32 *)(data - 8);
size = hdr[0];
csum = hdr[1];
@@ -396,6 +416,14 @@ static void __init setup_boot_config(const char *cmdline)
}
#else
#define setup_boot_config(cmdline) do { } while (0)
+
+static int __init warn_bootconfig(char *str)
+{
+ pr_warn("WARNING: 'bootconfig' found on the kernel command line but CONFIG_BOOTCONFIG is not set.\n");
+ return 0;
+}
+early_param("bootconfig", warn_bootconfig);
+
#endif
/* Change NUL term back to "=", to make "param" the whole string. */
@@ -562,11 +590,12 @@ static void __init setup_command_line(char *command_line)
* to init.
*/
len = strlen(saved_command_line);
- if (!strstr(boot_command_line, " -- ")) {
+ if (initargs_found) {
+ saved_command_line[len++] = ' ';
+ } else {
strcpy(saved_command_line + len, " -- ");
len += 4;
- } else
- saved_command_line[len++] = ' ';
+ }
strcpy(saved_command_line + len, extra_init_args);
}
diff --git a/ipc/sem.c b/ipc/sem.c
index 4f4303f..3687b71 100644
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -2384,11 +2384,9 @@ void exit_sem(struct task_struct *tsk)
ipc_assert_locked_object(&sma->sem_perm);
list_del(&un->list_id);
- /* we are the last process using this ulp, acquiring ulp->lock
- * isn't required. Besides that, we are also protected against
- * IPC_RMID as we hold sma->sem_perm lock now
- */
+ spin_lock(&ulp->lock);
list_del_rcu(&un->list_proc);
+ spin_unlock(&ulp->lock);
/* perform adjustments registered in un */
for (i = 0; i < sma->sem_nsems; i++) {
diff --git a/kernel/audit.c b/kernel/audit.c
index 17b0d52..9ddfe2a 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -1101,13 +1101,11 @@ static void audit_log_feature_change(int which, u32 old_feature, u32 new_feature
audit_log_end(ab);
}
-static int audit_set_feature(struct sk_buff *skb)
+static int audit_set_feature(struct audit_features *uaf)
{
- struct audit_features *uaf;
int i;
BUILD_BUG_ON(AUDIT_LAST_FEATURE + 1 > ARRAY_SIZE(audit_feature_names));
- uaf = nlmsg_data(nlmsg_hdr(skb));
/* if there is ever a version 2 we should handle that here */
@@ -1175,6 +1173,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
{
u32 seq;
void *data;
+ int data_len;
int err;
struct audit_buffer *ab;
u16 msg_type = nlh->nlmsg_type;
@@ -1188,6 +1187,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
seq = nlh->nlmsg_seq;
data = nlmsg_data(nlh);
+ data_len = nlmsg_len(nlh);
switch (msg_type) {
case AUDIT_GET: {
@@ -1211,7 +1211,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
struct audit_status s;
memset(&s, 0, sizeof(s));
/* guard against past and future API changes */
- memcpy(&s, data, min_t(size_t, sizeof(s), nlmsg_len(nlh)));
+ memcpy(&s, data, min_t(size_t, sizeof(s), data_len));
if (s.mask & AUDIT_STATUS_ENABLED) {
err = audit_set_enabled(s.enabled);
if (err < 0)
@@ -1315,7 +1315,9 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
return err;
break;
case AUDIT_SET_FEATURE:
- err = audit_set_feature(skb);
+ if (data_len < sizeof(struct audit_features))
+ return -EINVAL;
+ err = audit_set_feature(data);
if (err)
return err;
break;
@@ -1327,6 +1329,8 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
err = audit_filter(msg_type, AUDIT_FILTER_USER);
if (err == 1) { /* match or error */
+ char *str = data;
+
err = 0;
if (msg_type == AUDIT_USER_TTY) {
err = tty_audit_push();
@@ -1334,26 +1338,24 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
break;
}
audit_log_user_recv_msg(&ab, msg_type);
- if (msg_type != AUDIT_USER_TTY)
+ if (msg_type != AUDIT_USER_TTY) {
+ /* ensure NULL termination */
+ str[data_len - 1] = '\0';
audit_log_format(ab, " msg='%.*s'",
AUDIT_MESSAGE_TEXT_MAX,
- (char *)data);
- else {
- int size;
-
+ str);
+ } else {
audit_log_format(ab, " data=");
- size = nlmsg_len(nlh);
- if (size > 0 &&
- ((unsigned char *)data)[size - 1] == '\0')
- size--;
- audit_log_n_untrustedstring(ab, data, size);
+ if (data_len > 0 && str[data_len - 1] == '\0')
+ data_len--;
+ audit_log_n_untrustedstring(ab, str, data_len);
}
audit_log_end(ab);
}
break;
case AUDIT_ADD_RULE:
case AUDIT_DEL_RULE:
- if (nlmsg_len(nlh) < sizeof(struct audit_rule_data))
+ if (data_len < sizeof(struct audit_rule_data))
return -EINVAL;
if (audit_enabled == AUDIT_LOCKED) {
audit_log_common_recv_msg(audit_context(), &ab,
@@ -1365,7 +1367,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
audit_log_end(ab);
return -EPERM;
}
- err = audit_rule_change(msg_type, seq, data, nlmsg_len(nlh));
+ err = audit_rule_change(msg_type, seq, data, data_len);
break;
case AUDIT_LIST_RULES:
err = audit_list_rules_send(skb, seq);
@@ -1380,7 +1382,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
case AUDIT_MAKE_EQUIV: {
void *bufp = data;
u32 sizes[2];
- size_t msglen = nlmsg_len(nlh);
+ size_t msglen = data_len;
char *old, *new;
err = -EINVAL;
@@ -1456,7 +1458,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
memset(&s, 0, sizeof(s));
/* guard against past and future API changes */
- memcpy(&s, data, min_t(size_t, sizeof(s), nlmsg_len(nlh)));
+ memcpy(&s, data, min_t(size_t, sizeof(s), data_len));
/* check if new data is valid */
if ((s.enabled != 0 && s.enabled != 1) ||
(s.log_passwd != 0 && s.log_passwd != 1))
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index b0126e9..026e34d 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -456,6 +456,7 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data,
bufp = data->buf;
for (i = 0; i < data->field_count; i++) {
struct audit_field *f = &entry->rule.fields[i];
+ u32 f_val;
err = -EINVAL;
@@ -464,12 +465,12 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data,
goto exit_free;
f->type = data->fields[i];
- f->val = data->values[i];
+ f_val = data->values[i];
/* Support legacy tests for a valid loginuid */
- if ((f->type == AUDIT_LOGINUID) && (f->val == AUDIT_UID_UNSET)) {
+ if ((f->type == AUDIT_LOGINUID) && (f_val == AUDIT_UID_UNSET)) {
f->type = AUDIT_LOGINUID_SET;
- f->val = 0;
+ f_val = 0;
entry->rule.pflags |= AUDIT_LOGINUID_LEGACY;
}
@@ -485,7 +486,7 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data,
case AUDIT_SUID:
case AUDIT_FSUID:
case AUDIT_OBJ_UID:
- f->uid = make_kuid(current_user_ns(), f->val);
+ f->uid = make_kuid(current_user_ns(), f_val);
if (!uid_valid(f->uid))
goto exit_free;
break;
@@ -494,11 +495,12 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data,
case AUDIT_SGID:
case AUDIT_FSGID:
case AUDIT_OBJ_GID:
- f->gid = make_kgid(current_user_ns(), f->val);
+ f->gid = make_kgid(current_user_ns(), f_val);
if (!gid_valid(f->gid))
goto exit_free;
break;
case AUDIT_ARCH:
+ f->val = f_val;
entry->rule.arch_f = f;
break;
case AUDIT_SUBJ_USER:
@@ -511,11 +513,13 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data,
case AUDIT_OBJ_TYPE:
case AUDIT_OBJ_LEV_LOW:
case AUDIT_OBJ_LEV_HIGH:
- str = audit_unpack_string(&bufp, &remain, f->val);
- if (IS_ERR(str))
+ str = audit_unpack_string(&bufp, &remain, f_val);
+ if (IS_ERR(str)) {
+ err = PTR_ERR(str);
goto exit_free;
- entry->rule.buflen += f->val;
-
+ }
+ entry->rule.buflen += f_val;
+ f->lsm_str = str;
err = security_audit_rule_init(f->type, f->op, str,
(void **)&f->lsm_rule);
/* Keep currently invalid fields around in case they
@@ -524,68 +528,71 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data,
pr_warn("audit rule for LSM \'%s\' is invalid\n",
str);
err = 0;
- }
- if (err) {
- kfree(str);
+ } else if (err)
goto exit_free;
- } else
- f->lsm_str = str;
break;
case AUDIT_WATCH:
- str = audit_unpack_string(&bufp, &remain, f->val);
- if (IS_ERR(str))
+ str = audit_unpack_string(&bufp, &remain, f_val);
+ if (IS_ERR(str)) {
+ err = PTR_ERR(str);
goto exit_free;
- entry->rule.buflen += f->val;
-
- err = audit_to_watch(&entry->rule, str, f->val, f->op);
+ }
+ err = audit_to_watch(&entry->rule, str, f_val, f->op);
if (err) {
kfree(str);
goto exit_free;
}
+ entry->rule.buflen += f_val;
break;
case AUDIT_DIR:
- str = audit_unpack_string(&bufp, &remain, f->val);
- if (IS_ERR(str))
+ str = audit_unpack_string(&bufp, &remain, f_val);
+ if (IS_ERR(str)) {
+ err = PTR_ERR(str);
goto exit_free;
- entry->rule.buflen += f->val;
-
+ }
err = audit_make_tree(&entry->rule, str, f->op);
kfree(str);
if (err)
goto exit_free;
+ entry->rule.buflen += f_val;
break;
case AUDIT_INODE:
+ f->val = f_val;
err = audit_to_inode(&entry->rule, f);
if (err)
goto exit_free;
break;
case AUDIT_FILTERKEY:
- if (entry->rule.filterkey || f->val > AUDIT_MAX_KEY_LEN)
+ if (entry->rule.filterkey || f_val > AUDIT_MAX_KEY_LEN)
goto exit_free;
- str = audit_unpack_string(&bufp, &remain, f->val);
- if (IS_ERR(str))
- goto exit_free;
- entry->rule.buflen += f->val;
- entry->rule.filterkey = str;
- break;
- case AUDIT_EXE:
- if (entry->rule.exe || f->val > PATH_MAX)
- goto exit_free;
- str = audit_unpack_string(&bufp, &remain, f->val);
+ str = audit_unpack_string(&bufp, &remain, f_val);
if (IS_ERR(str)) {
err = PTR_ERR(str);
goto exit_free;
}
- entry->rule.buflen += f->val;
-
- audit_mark = audit_alloc_mark(&entry->rule, str, f->val);
+ entry->rule.buflen += f_val;
+ entry->rule.filterkey = str;
+ break;
+ case AUDIT_EXE:
+ if (entry->rule.exe || f_val > PATH_MAX)
+ goto exit_free;
+ str = audit_unpack_string(&bufp, &remain, f_val);
+ if (IS_ERR(str)) {
+ err = PTR_ERR(str);
+ goto exit_free;
+ }
+ audit_mark = audit_alloc_mark(&entry->rule, str, f_val);
if (IS_ERR(audit_mark)) {
kfree(str);
err = PTR_ERR(audit_mark);
goto exit_free;
}
+ entry->rule.buflen += f_val;
entry->rule.exe = audit_mark;
break;
+ default:
+ f->val = f_val;
+ break;
}
}
diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c
index 042f955..68a89a9 100644
--- a/kernel/bpf/bpf_struct_ops.c
+++ b/kernel/bpf/bpf_struct_ops.c
@@ -482,13 +482,21 @@ static int bpf_struct_ops_map_delete_elem(struct bpf_map *map, void *key)
prev_state = cmpxchg(&st_map->kvalue.state,
BPF_STRUCT_OPS_STATE_INUSE,
BPF_STRUCT_OPS_STATE_TOBEFREE);
- if (prev_state == BPF_STRUCT_OPS_STATE_INUSE) {
+ switch (prev_state) {
+ case BPF_STRUCT_OPS_STATE_INUSE:
st_map->st_ops->unreg(&st_map->kvalue.data);
if (refcount_dec_and_test(&st_map->kvalue.refcnt))
bpf_map_put(map);
+ return 0;
+ case BPF_STRUCT_OPS_STATE_TOBEFREE:
+ return -EINPROGRESS;
+ case BPF_STRUCT_OPS_STATE_INIT:
+ return -ENOENT;
+ default:
+ WARN_ON_ONCE(1);
+ /* Should never happen. Treat it as not found. */
+ return -ENOENT;
}
-
- return 0;
}
static void bpf_struct_ops_map_seq_show_elem(struct bpf_map *map, void *key,
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 805c43b..7787bdc 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -2418,7 +2418,7 @@ static int btf_enum_check_member(struct btf_verifier_env *env,
struct_size = struct_type->size;
bytes_offset = BITS_ROUNDDOWN_BYTES(struct_bits_off);
- if (struct_size - bytes_offset < sizeof(int)) {
+ if (struct_size - bytes_offset < member_type->size) {
btf_verifier_log_member(env, struct_type, member,
"Member exceeds struct_size");
return -EINVAL;
@@ -4142,9 +4142,9 @@ int btf_distill_func_proto(struct bpf_verifier_log *log,
* EFAULT - verifier bug
* 0 - 99% match. The last 1% is validated by the verifier.
*/
-int btf_check_func_type_match(struct bpf_verifier_log *log,
- struct btf *btf1, const struct btf_type *t1,
- struct btf *btf2, const struct btf_type *t2)
+static int btf_check_func_type_match(struct bpf_verifier_log *log,
+ struct btf *btf1, const struct btf_type *t1,
+ struct btf *btf2, const struct btf_type *t2)
{
const struct btf_param *args1, *args2;
const char *fn1, *fn2, *s1, *s2;
@@ -4564,7 +4564,7 @@ int btf_get_info_by_fd(const struct btf *btf,
union bpf_attr __user *uattr)
{
struct bpf_btf_info __user *uinfo;
- struct bpf_btf_info info = {};
+ struct bpf_btf_info info;
u32 info_copy, btf_copy;
void __user *ubtf;
u32 uinfo_len;
@@ -4573,6 +4573,7 @@ int btf_get_info_by_fd(const struct btf *btf,
uinfo_len = attr->info.info_len;
info_copy = min_t(u32, uinfo_len, sizeof(info));
+ memset(&info, 0, sizeof(info));
if (copy_from_user(&info, uinfo, info_copy))
return -EFAULT;
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index 9a500fa..4f14724 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -227,6 +227,9 @@ int cgroup_bpf_inherit(struct cgroup *cgrp)
for (i = 0; i < NR; i++)
bpf_prog_array_free(arrays[i]);
+ for (p = cgroup_parent(cgrp); p; p = cgroup_parent(p))
+ cgroup_bpf_put(p);
+
percpu_ref_exit(&cgrp->bpf.refcnt);
return -ENOMEM;
@@ -302,8 +305,8 @@ int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
u32 saved_flags = (flags & (BPF_F_ALLOW_OVERRIDE | BPF_F_ALLOW_MULTI));
struct list_head *progs = &cgrp->bpf.progs[type];
struct bpf_prog *old_prog = NULL;
- struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE],
- *old_storage[MAX_BPF_CGROUP_STORAGE_TYPE] = {NULL};
+ struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE] = {};
+ struct bpf_cgroup_storage *old_storage[MAX_BPF_CGROUP_STORAGE_TYPE] = {};
struct bpf_prog_list *pl, *replace_pl = NULL;
enum bpf_cgroup_storage_type stype;
int err;
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index 2d182c4..a1468e3 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -56,6 +56,7 @@ struct htab_elem {
union {
struct bpf_htab *htab;
struct pcpu_freelist_node fnode;
+ struct htab_elem *batch_flink;
};
};
};
@@ -126,6 +127,17 @@ static void htab_free_elems(struct bpf_htab *htab)
bpf_map_area_free(htab->elems);
}
+/* The LRU list has a lock (lru_lock). Each htab bucket has a lock
+ * (bucket_lock). If both locks need to be acquired together, the lock
+ * order is always lru_lock -> bucket_lock and this only happens in
+ * bpf_lru_list.c logic. For example, certain code path of
+ * bpf_lru_pop_free(), which is called by function prealloc_lru_pop(),
+ * will acquire lru_lock first followed by acquiring bucket_lock.
+ *
+ * In hashtab.c, to avoid deadlock, lock acquisition of
+ * bucket_lock followed by lru_lock is not allowed. In such cases,
+ * bucket_lock needs to be released first before acquiring lru_lock.
+ */
static struct htab_elem *prealloc_lru_pop(struct bpf_htab *htab, void *key,
u32 hash)
{
@@ -1256,10 +1268,12 @@ __htab_map_lookup_and_delete_batch(struct bpf_map *map,
void __user *ukeys = u64_to_user_ptr(attr->batch.keys);
void *ubatch = u64_to_user_ptr(attr->batch.in_batch);
u32 batch, max_count, size, bucket_size;
+ struct htab_elem *node_to_free = NULL;
u64 elem_map_flags, map_flags;
struct hlist_nulls_head *head;
struct hlist_nulls_node *n;
- unsigned long flags;
+ unsigned long flags = 0;
+ bool locked = false;
struct htab_elem *l;
struct bucket *b;
int ret = 0;
@@ -1319,15 +1333,25 @@ __htab_map_lookup_and_delete_batch(struct bpf_map *map,
dst_val = values;
b = &htab->buckets[batch];
head = &b->head;
- raw_spin_lock_irqsave(&b->lock, flags);
+ /* do not grab the lock unless need it (bucket_cnt > 0). */
+ if (locked)
+ raw_spin_lock_irqsave(&b->lock, flags);
bucket_cnt = 0;
hlist_nulls_for_each_entry_rcu(l, n, head, hash_node)
bucket_cnt++;
+ if (bucket_cnt && !locked) {
+ locked = true;
+ goto again_nocopy;
+ }
+
if (bucket_cnt > (max_count - total)) {
if (total == 0)
ret = -ENOSPC;
+ /* Note that since bucket_cnt > 0 here, it is implicit
+ * that the locked was grabbed, so release it.
+ */
raw_spin_unlock_irqrestore(&b->lock, flags);
rcu_read_unlock();
this_cpu_dec(bpf_prog_active);
@@ -1337,6 +1361,9 @@ __htab_map_lookup_and_delete_batch(struct bpf_map *map,
if (bucket_cnt > bucket_size) {
bucket_size = bucket_cnt;
+ /* Note that since bucket_cnt > 0 here, it is implicit
+ * that the locked was grabbed, so release it.
+ */
raw_spin_unlock_irqrestore(&b->lock, flags);
rcu_read_unlock();
this_cpu_dec(bpf_prog_active);
@@ -1346,6 +1373,10 @@ __htab_map_lookup_and_delete_batch(struct bpf_map *map,
goto alloc;
}
+ /* Next block is only safe to run if you have grabbed the lock */
+ if (!locked)
+ goto next_batch;
+
hlist_nulls_for_each_entry_safe(l, n, head, hash_node) {
memcpy(dst_key, l->key, key_size);
@@ -1370,16 +1401,33 @@ __htab_map_lookup_and_delete_batch(struct bpf_map *map,
}
if (do_delete) {
hlist_nulls_del_rcu(&l->hash_node);
- if (is_lru_map)
- bpf_lru_push_free(&htab->lru, &l->lru_node);
- else
+
+ /* bpf_lru_push_free() will acquire lru_lock, which
+ * may cause deadlock. See comments in function
+ * prealloc_lru_pop(). Let us do bpf_lru_push_free()
+ * after releasing the bucket lock.
+ */
+ if (is_lru_map) {
+ l->batch_flink = node_to_free;
+ node_to_free = l;
+ } else {
free_htab_elem(htab, l);
+ }
}
dst_key += key_size;
dst_val += value_size;
}
raw_spin_unlock_irqrestore(&b->lock, flags);
+ locked = false;
+
+ while (node_to_free) {
+ l = node_to_free;
+ node_to_free = node_to_free->batch_flink;
+ bpf_lru_push_free(&htab->lru, &l->lru_node);
+ }
+
+next_batch:
/* If we are not copying data, we can go to next bucket and avoid
* unlocking the rcu.
*/
diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c
index 2c5dc65..bd09290 100644
--- a/kernel/bpf/offload.c
+++ b/kernel/bpf/offload.c
@@ -321,7 +321,7 @@ int bpf_prog_offload_info_fill(struct bpf_prog_info *info,
ulen = info->jited_prog_len;
info->jited_prog_len = aux->offload->jited_len;
- if (info->jited_prog_len & ulen) {
+ if (info->jited_prog_len && ulen) {
uinsns = u64_to_user_ptr(info->jited_prog_insns);
ulen = min_t(u32, info->jited_prog_len, ulen);
if (copy_to_user(uinsns, aux->offload->jited_image, ulen)) {
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index a91ad51..966b7b3 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -696,14 +696,15 @@ int bpf_get_file_flag(int flags)
offsetof(union bpf_attr, CMD##_LAST_FIELD) - \
sizeof(attr->CMD##_LAST_FIELD)) != NULL
-/* dst and src must have at least BPF_OBJ_NAME_LEN number of bytes.
- * Return 0 on success and < 0 on error.
+/* dst and src must have at least "size" number of bytes.
+ * Return strlen on success and < 0 on error.
*/
-static int bpf_obj_name_cpy(char *dst, const char *src)
+int bpf_obj_name_cpy(char *dst, const char *src, unsigned int size)
{
- const char *end = src + BPF_OBJ_NAME_LEN;
+ const char *end = src + size;
+ const char *orig_src = src;
- memset(dst, 0, BPF_OBJ_NAME_LEN);
+ memset(dst, 0, size);
/* Copy all isalnum(), '_' and '.' chars. */
while (src < end && *src) {
if (!isalnum(*src) &&
@@ -712,11 +713,11 @@ static int bpf_obj_name_cpy(char *dst, const char *src)
*dst++ = *src++;
}
- /* No '\0' found in BPF_OBJ_NAME_LEN number of bytes */
+ /* No '\0' found in "size" number of bytes */
if (src == end)
return -EINVAL;
- return 0;
+ return src - orig_src;
}
int map_check_no_btf(const struct bpf_map *map,
@@ -810,8 +811,9 @@ static int map_create(union bpf_attr *attr)
if (IS_ERR(map))
return PTR_ERR(map);
- err = bpf_obj_name_cpy(map->name, attr->map_name);
- if (err)
+ err = bpf_obj_name_cpy(map->name, attr->map_name,
+ sizeof(attr->map_name));
+ if (err < 0)
goto free_map;
atomic64_set(&map->refcnt, 1);
@@ -1510,6 +1512,11 @@ static int map_freeze(const union bpf_attr *attr)
if (IS_ERR(map))
return PTR_ERR(map);
+ if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS) {
+ fdput(f);
+ return -ENOTSUPP;
+ }
+
mutex_lock(&map->freeze_mutex);
if (map->writecnt) {
@@ -2093,8 +2100,9 @@ static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr)
goto free_prog;
prog->aux->load_time = ktime_get_boottime_ns();
- err = bpf_obj_name_cpy(prog->aux->name, attr->prog_name);
- if (err)
+ err = bpf_obj_name_cpy(prog->aux->name, attr->prog_name,
+ sizeof(attr->prog_name));
+ if (err < 0)
goto free_prog;
/* run eBPF verifier */
@@ -2787,7 +2795,7 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog,
union bpf_attr __user *uattr)
{
struct bpf_prog_info __user *uinfo = u64_to_user_ptr(attr->info.info);
- struct bpf_prog_info info = {};
+ struct bpf_prog_info info;
u32 info_len = attr->info.info_len;
struct bpf_prog_stats stats;
char __user *uinsns;
@@ -2799,6 +2807,7 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog,
return err;
info_len = min_t(u32, sizeof(info), info_len);
+ memset(&info, 0, sizeof(info));
if (copy_from_user(&info, uinfo, info_len))
return -EFAULT;
@@ -3062,7 +3071,7 @@ static int bpf_map_get_info_by_fd(struct bpf_map *map,
union bpf_attr __user *uattr)
{
struct bpf_map_info __user *uinfo = u64_to_user_ptr(attr->info.info);
- struct bpf_map_info info = {};
+ struct bpf_map_info info;
u32 info_len = attr->info.info_len;
int err;
@@ -3071,6 +3080,7 @@ static int bpf_map_get_info_by_fd(struct bpf_map *map,
return err;
info_len = min_t(u32, sizeof(info), info_len);
+ memset(&info, 0, sizeof(info));
info.type = map->map_type;
info.id = map->id;
info.key_size = map->key_size;
@@ -3354,7 +3364,7 @@ static int bpf_map_do_batch(const union bpf_attr *attr,
SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size)
{
- union bpf_attr attr = {};
+ union bpf_attr attr;
int err;
if (sysctl_unprivileged_bpf_disabled && !capable(CAP_SYS_ADMIN))
@@ -3366,6 +3376,7 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
size = min_t(u32, size, sizeof(attr));
/* copy attributes from user space, may be less than sizeof(bpf_attr) */
+ memset(&attr, 0, sizeof(attr));
if (copy_from_user(&attr, uattr, size) != 0)
return -EFAULT;
diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c
index be1a1c8..f2d7cea 100644
--- a/kernel/cgroup/cgroup-v1.c
+++ b/kernel/cgroup/cgroup-v1.c
@@ -471,6 +471,7 @@ static void *cgroup_pidlist_next(struct seq_file *s, void *v, loff_t *pos)
*/
p++;
if (p >= end) {
+ (*pos)++;
return NULL;
} else {
*pos = *p;
@@ -782,7 +783,7 @@ void cgroup1_release_agent(struct work_struct *work)
pathbuf = kmalloc(PATH_MAX, GFP_KERNEL);
agentbuf = kstrdup(cgrp->root->release_agent_path, GFP_KERNEL);
- if (!pathbuf || !agentbuf)
+ if (!pathbuf || !agentbuf || !strlen(agentbuf))
goto out;
spin_lock_irq(&css_set_lock);
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index db552b9..3dead04 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -3542,21 +3542,21 @@ static int cpu_stat_show(struct seq_file *seq, void *v)
static int cgroup_io_pressure_show(struct seq_file *seq, void *v)
{
struct cgroup *cgrp = seq_css(seq)->cgroup;
- struct psi_group *psi = cgroup_id(cgrp) == 1 ? &psi_system : &cgrp->psi;
+ struct psi_group *psi = cgroup_ino(cgrp) == 1 ? &psi_system : &cgrp->psi;
return psi_show(seq, psi, PSI_IO);
}
static int cgroup_memory_pressure_show(struct seq_file *seq, void *v)
{
struct cgroup *cgrp = seq_css(seq)->cgroup;
- struct psi_group *psi = cgroup_id(cgrp) == 1 ? &psi_system : &cgrp->psi;
+ struct psi_group *psi = cgroup_ino(cgrp) == 1 ? &psi_system : &cgrp->psi;
return psi_show(seq, psi, PSI_MEM);
}
static int cgroup_cpu_pressure_show(struct seq_file *seq, void *v)
{
struct cgroup *cgrp = seq_css(seq)->cgroup;
- struct psi_group *psi = cgroup_id(cgrp) == 1 ? &psi_system : &cgrp->psi;
+ struct psi_group *psi = cgroup_ino(cgrp) == 1 ? &psi_system : &cgrp->psi;
return psi_show(seq, psi, PSI_CPU);
}
@@ -4400,12 +4400,16 @@ static void css_task_iter_advance_css_set(struct css_task_iter *it)
}
} while (!css_set_populated(cset) && list_empty(&cset->dying_tasks));
- if (!list_empty(&cset->tasks))
+ if (!list_empty(&cset->tasks)) {
it->task_pos = cset->tasks.next;
- else if (!list_empty(&cset->mg_tasks))
+ it->cur_tasks_head = &cset->tasks;
+ } else if (!list_empty(&cset->mg_tasks)) {
it->task_pos = cset->mg_tasks.next;
- else
+ it->cur_tasks_head = &cset->mg_tasks;
+ } else {
it->task_pos = cset->dying_tasks.next;
+ it->cur_tasks_head = &cset->dying_tasks;
+ }
it->tasks_head = &cset->tasks;
it->mg_tasks_head = &cset->mg_tasks;
@@ -4463,10 +4467,14 @@ static void css_task_iter_advance(struct css_task_iter *it)
else
it->task_pos = it->task_pos->next;
- if (it->task_pos == it->tasks_head)
+ if (it->task_pos == it->tasks_head) {
it->task_pos = it->mg_tasks_head->next;
- if (it->task_pos == it->mg_tasks_head)
+ it->cur_tasks_head = it->mg_tasks_head;
+ }
+ if (it->task_pos == it->mg_tasks_head) {
it->task_pos = it->dying_tasks_head->next;
+ it->cur_tasks_head = it->dying_tasks_head;
+ }
if (it->task_pos == it->dying_tasks_head)
css_task_iter_advance_css_set(it);
} else {
@@ -4485,11 +4493,12 @@ static void css_task_iter_advance(struct css_task_iter *it)
goto repeat;
/* and dying leaders w/o live member threads */
- if (!atomic_read(&task->signal->live))
+ if (it->cur_tasks_head == it->dying_tasks_head &&
+ !atomic_read(&task->signal->live))
goto repeat;
} else {
/* skip all dying ones */
- if (task->flags & PF_EXITING)
+ if (it->cur_tasks_head == it->dying_tasks_head)
goto repeat;
}
}
@@ -4595,6 +4604,9 @@ static void *cgroup_procs_next(struct seq_file *s, void *v, loff_t *pos)
struct kernfs_open_file *of = s->private;
struct css_task_iter *it = of->priv;
+ if (pos)
+ (*pos)++;
+
return css_task_iter_next(it);
}
@@ -4610,7 +4622,7 @@ static void *__cgroup_procs_start(struct seq_file *s, loff_t *pos,
* from position 0, so we can simply keep iterating on !0 *pos.
*/
if (!it) {
- if (WARN_ON_ONCE((*pos)++))
+ if (WARN_ON_ONCE((*pos)))
return ERR_PTR(-EINVAL);
it = kzalloc(sizeof(*it), GFP_KERNEL);
@@ -4618,10 +4630,11 @@ static void *__cgroup_procs_start(struct seq_file *s, loff_t *pos,
return ERR_PTR(-ENOMEM);
of->priv = it;
css_task_iter_start(&cgrp->self, iter_flags, it);
- } else if (!(*pos)++) {
+ } else if (!(*pos)) {
css_task_iter_end(it);
css_task_iter_start(&cgrp->self, iter_flags, it);
- }
+ } else
+ return it->cur_task;
return cgroup_procs_next(s, NULL, NULL);
}
@@ -5927,11 +5940,14 @@ void cgroup_post_fork(struct task_struct *child)
spin_lock_irq(&css_set_lock);
- WARN_ON_ONCE(!list_empty(&child->cg_list));
- cset = task_css_set(current); /* current is @child's parent */
- get_css_set(cset);
- cset->nr_tasks++;
- css_set_move_task(child, NULL, cset, false);
+ /* init tasks are special, only link regular threads */
+ if (likely(child->pid)) {
+ WARN_ON_ONCE(!list_empty(&child->cg_list));
+ cset = task_css_set(current); /* current is @child's parent */
+ get_css_set(cset);
+ cset->nr_tasks++;
+ css_set_move_task(child, NULL, cset, false);
+ }
/*
* If the cgroup has to be frozen, the new task has too. Let's set
@@ -6255,6 +6271,10 @@ void cgroup_sk_alloc(struct sock_cgroup_data *skcd)
return;
}
+ /* Don't associate the sock with unrelated interrupted task's cgroup. */
+ if (in_interrupt())
+ return;
+
rcu_read_lock();
while (true) {
diff --git a/kernel/compat.c b/kernel/compat.c
index 95005f8..843dd17 100644
--- a/kernel/compat.c
+++ b/kernel/compat.c
@@ -26,70 +26,6 @@
#include <linux/uaccess.h>
-static int __compat_get_timeval(struct timeval *tv, const struct old_timeval32 __user *ctv)
-{
- return (!access_ok(ctv, sizeof(*ctv)) ||
- __get_user(tv->tv_sec, &ctv->tv_sec) ||
- __get_user(tv->tv_usec, &ctv->tv_usec)) ? -EFAULT : 0;
-}
-
-static int __compat_put_timeval(const struct timeval *tv, struct old_timeval32 __user *ctv)
-{
- return (!access_ok(ctv, sizeof(*ctv)) ||
- __put_user(tv->tv_sec, &ctv->tv_sec) ||
- __put_user(tv->tv_usec, &ctv->tv_usec)) ? -EFAULT : 0;
-}
-
-static int __compat_get_timespec(struct timespec *ts, const struct old_timespec32 __user *cts)
-{
- return (!access_ok(cts, sizeof(*cts)) ||
- __get_user(ts->tv_sec, &cts->tv_sec) ||
- __get_user(ts->tv_nsec, &cts->tv_nsec)) ? -EFAULT : 0;
-}
-
-static int __compat_put_timespec(const struct timespec *ts, struct old_timespec32 __user *cts)
-{
- return (!access_ok(cts, sizeof(*cts)) ||
- __put_user(ts->tv_sec, &cts->tv_sec) ||
- __put_user(ts->tv_nsec, &cts->tv_nsec)) ? -EFAULT : 0;
-}
-
-int compat_get_timeval(struct timeval *tv, const void __user *utv)
-{
- if (COMPAT_USE_64BIT_TIME)
- return copy_from_user(tv, utv, sizeof(*tv)) ? -EFAULT : 0;
- else
- return __compat_get_timeval(tv, utv);
-}
-EXPORT_SYMBOL_GPL(compat_get_timeval);
-
-int compat_put_timeval(const struct timeval *tv, void __user *utv)
-{
- if (COMPAT_USE_64BIT_TIME)
- return copy_to_user(utv, tv, sizeof(*tv)) ? -EFAULT : 0;
- else
- return __compat_put_timeval(tv, utv);
-}
-EXPORT_SYMBOL_GPL(compat_put_timeval);
-
-int compat_get_timespec(struct timespec *ts, const void __user *uts)
-{
- if (COMPAT_USE_64BIT_TIME)
- return copy_from_user(ts, uts, sizeof(*ts)) ? -EFAULT : 0;
- else
- return __compat_get_timespec(ts, uts);
-}
-EXPORT_SYMBOL_GPL(compat_get_timespec);
-
-int compat_put_timespec(const struct timespec *ts, void __user *uts)
-{
- if (COMPAT_USE_64BIT_TIME)
- return copy_to_user(uts, ts, sizeof(*ts)) ? -EFAULT : 0;
- else
- return __compat_put_timespec(ts, uts);
-}
-EXPORT_SYMBOL_GPL(compat_put_timespec);
-
#ifdef __ARCH_WANT_SYS_SIGPROCMASK
/*
diff --git a/kernel/dma/contiguous.c b/kernel/dma/contiguous.c
index daa4e6e..8bc6f2d 100644
--- a/kernel/dma/contiguous.c
+++ b/kernel/dma/contiguous.c
@@ -302,9 +302,16 @@ static int __init rmem_cma_setup(struct reserved_mem *rmem)
phys_addr_t align = PAGE_SIZE << max(MAX_ORDER - 1, pageblock_order);
phys_addr_t mask = align - 1;
unsigned long node = rmem->fdt_node;
+ bool default_cma = of_get_flat_dt_prop(node, "linux,cma-default", NULL);
struct cma *cma;
int err;
+ if (size_cmdline != -1 && default_cma) {
+ pr_info("Reserved memory: bypass %s node, using cmdline CMA params instead\n",
+ rmem->name);
+ return -EBUSY;
+ }
+
if (!of_get_flat_dt_prop(node, "reusable", NULL) ||
of_get_flat_dt_prop(node, "no-map", NULL))
return -EINVAL;
@@ -322,7 +329,7 @@ static int __init rmem_cma_setup(struct reserved_mem *rmem)
/* Architecture specific contiguous memory fixup. */
dma_contiguous_early_fixup(rmem->base, rmem->size);
- if (of_get_flat_dt_prop(node, "linux,cma-default", NULL))
+ if (default_cma)
dma_contiguous_set_default(cma);
rmem->ops = &rmem_cma_ops;
diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
index 6af7ae8..ac7956c 100644
--- a/kernel/dma/direct.c
+++ b/kernel/dma/direct.c
@@ -23,18 +23,6 @@
*/
unsigned int zone_dma_bits __ro_after_init = 24;
-static void report_addr(struct device *dev, dma_addr_t dma_addr, size_t size)
-{
- if (!dev->dma_mask) {
- dev_err_once(dev, "DMA map on device without dma_mask\n");
- } else if (*dev->dma_mask >= DMA_BIT_MASK(32) || dev->bus_dma_limit) {
- dev_err_once(dev,
- "overflow %pad+%zu of DMA mask %llx bus limit %llx\n",
- &dma_addr, size, *dev->dma_mask, dev->bus_dma_limit);
- }
- WARN_ON_ONCE(1);
-}
-
static inline dma_addr_t phys_to_dma_direct(struct device *dev,
phys_addr_t phys)
{
@@ -357,13 +345,6 @@ void dma_direct_unmap_sg(struct device *dev, struct scatterlist *sgl,
EXPORT_SYMBOL(dma_direct_unmap_sg);
#endif
-static inline bool dma_direct_possible(struct device *dev, dma_addr_t dma_addr,
- size_t size)
-{
- return swiotlb_force != SWIOTLB_FORCE &&
- dma_capable(dev, dma_addr, size, true);
-}
-
dma_addr_t dma_direct_map_page(struct device *dev, struct page *page,
unsigned long offset, size_t size, enum dma_data_direction dir,
unsigned long attrs)
@@ -371,9 +352,16 @@ dma_addr_t dma_direct_map_page(struct device *dev, struct page *page,
phys_addr_t phys = page_to_phys(page) + offset;
dma_addr_t dma_addr = phys_to_dma(dev, phys);
- if (unlikely(!dma_direct_possible(dev, dma_addr, size)) &&
- !swiotlb_map(dev, &phys, &dma_addr, size, dir, attrs)) {
- report_addr(dev, dma_addr, size);
+ if (unlikely(swiotlb_force == SWIOTLB_FORCE))
+ return swiotlb_map(dev, phys, size, dir, attrs);
+
+ if (unlikely(!dma_capable(dev, dma_addr, size, true))) {
+ if (swiotlb_force != SWIOTLB_NO_FORCE)
+ return swiotlb_map(dev, phys, size, dir, attrs);
+
+ dev_WARN_ONCE(dev, 1,
+ "DMA addr %pad+%zu overflow (mask %llx, bus limit %llx).\n",
+ &dma_addr, size, *dev->dma_mask, dev->bus_dma_limit);
return DMA_MAPPING_ERROR;
}
@@ -411,7 +399,10 @@ dma_addr_t dma_direct_map_resource(struct device *dev, phys_addr_t paddr,
dma_addr_t dma_addr = paddr;
if (unlikely(!dma_capable(dev, dma_addr, size, false))) {
- report_addr(dev, dma_addr, size);
+ dev_err_once(dev,
+ "DMA addr %pad+%zu overflow (mask %llx, bus limit %llx).\n",
+ &dma_addr, size, *dev->dma_mask, dev->bus_dma_limit);
+ WARN_ON_ONCE(1);
return DMA_MAPPING_ERROR;
}
@@ -472,28 +463,26 @@ int dma_direct_mmap(struct device *dev, struct vm_area_struct *vma,
}
#endif /* CONFIG_MMU */
-/*
- * Because 32-bit DMA masks are so common we expect every architecture to be
- * able to satisfy them - either by not supporting more physical memory, or by
- * providing a ZONE_DMA32. If neither is the case, the architecture needs to
- * use an IOMMU instead of the direct mapping.
- */
int dma_direct_supported(struct device *dev, u64 mask)
{
- u64 min_mask;
+ u64 min_mask = (max_pfn - 1) << PAGE_SHIFT;
- if (IS_ENABLED(CONFIG_ZONE_DMA))
- min_mask = DMA_BIT_MASK(zone_dma_bits);
- else
- min_mask = DMA_BIT_MASK(32);
-
- min_mask = min_t(u64, min_mask, (max_pfn - 1) << PAGE_SHIFT);
+ /*
+ * Because 32-bit DMA masks are so common we expect every architecture
+ * to be able to satisfy them - either by not supporting more physical
+ * memory, or by providing a ZONE_DMA32. If neither is the case, the
+ * architecture needs to use an IOMMU instead of the direct mapping.
+ */
+ if (mask >= DMA_BIT_MASK(32))
+ return 1;
/*
* This check needs to be against the actual bit mask value, so
* use __phys_to_dma() here so that the SME encryption mask isn't
* part of the check.
*/
+ if (IS_ENABLED(CONFIG_ZONE_DMA))
+ min_mask = min_t(u64, min_mask, DMA_BIT_MASK(zone_dma_bits));
return mask >= __phys_to_dma(dev, min_mask);
}
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index 9280d6f..c19379fa 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -22,6 +22,7 @@
#include <linux/cache.h>
#include <linux/dma-direct.h>
+#include <linux/dma-noncoherent.h>
#include <linux/mm.h>
#include <linux/export.h>
#include <linux/spinlock.h>
@@ -656,35 +657,38 @@ void swiotlb_tbl_sync_single(struct device *hwdev, phys_addr_t tlb_addr,
}
/*
- * Create a swiotlb mapping for the buffer at @phys, and in case of DMAing
+ * Create a swiotlb mapping for the buffer at @paddr, and in case of DMAing
* to the device copy the data into it as well.
*/
-bool swiotlb_map(struct device *dev, phys_addr_t *phys, dma_addr_t *dma_addr,
- size_t size, enum dma_data_direction dir, unsigned long attrs)
+dma_addr_t swiotlb_map(struct device *dev, phys_addr_t paddr, size_t size,
+ enum dma_data_direction dir, unsigned long attrs)
{
- trace_swiotlb_bounced(dev, *dma_addr, size, swiotlb_force);
+ phys_addr_t swiotlb_addr;
+ dma_addr_t dma_addr;
- if (unlikely(swiotlb_force == SWIOTLB_NO_FORCE)) {
- dev_warn_ratelimited(dev,
- "Cannot do DMA to address %pa\n", phys);
- return false;
- }
+ trace_swiotlb_bounced(dev, phys_to_dma(dev, paddr), size,
+ swiotlb_force);
- /* Oh well, have to allocate and map a bounce buffer. */
- *phys = swiotlb_tbl_map_single(dev, __phys_to_dma(dev, io_tlb_start),
- *phys, size, size, dir, attrs);
- if (*phys == (phys_addr_t)DMA_MAPPING_ERROR)
- return false;
+ swiotlb_addr = swiotlb_tbl_map_single(dev,
+ __phys_to_dma(dev, io_tlb_start),
+ paddr, size, size, dir, attrs);
+ if (swiotlb_addr == (phys_addr_t)DMA_MAPPING_ERROR)
+ return DMA_MAPPING_ERROR;
/* Ensure that the address returned is DMA'ble */
- *dma_addr = __phys_to_dma(dev, *phys);
- if (unlikely(!dma_capable(dev, *dma_addr, size, true))) {
- swiotlb_tbl_unmap_single(dev, *phys, size, size, dir,
+ dma_addr = __phys_to_dma(dev, swiotlb_addr);
+ if (unlikely(!dma_capable(dev, dma_addr, size, true))) {
+ swiotlb_tbl_unmap_single(dev, swiotlb_addr, size, size, dir,
attrs | DMA_ATTR_SKIP_CPU_SYNC);
- return false;
+ dev_WARN_ONCE(dev, 1,
+ "swiotlb addr %pad+%zu overflow (mask %llx, bus limit %llx).\n",
+ &dma_addr, size, *dev->dma_mask, dev->bus_dma_limit);
+ return DMA_MAPPING_ERROR;
}
- return true;
+ if (!dev_is_dma_coherent(dev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC))
+ arch_sync_dma_for_device(swiotlb_addr, size, dir);
+ return dma_addr;
}
size_t swiotlb_max_mapping_size(struct device *dev)
diff --git a/kernel/exit.c b/kernel/exit.c
index 2833ffb..0b81b26 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -619,8 +619,8 @@ static void forget_original_parent(struct task_struct *father,
reaper = find_new_reaper(father, reaper);
list_for_each_entry(p, &father->children, sibling) {
for_each_thread(p, t) {
- t->real_parent = reaper;
- BUG_ON((!t->ptrace) != (t->parent == father));
+ RCU_INIT_POINTER(t->real_parent, reaper);
+ BUG_ON((!t->ptrace) != (rcu_access_pointer(t->parent) == father));
if (likely(!t->ptrace))
t->parent = t->real_parent;
if (t->pdeath_signal)
diff --git a/kernel/fork.c b/kernel/fork.c
index 60a1295..d90af13 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -397,8 +397,8 @@ static void account_kernel_stack(struct task_struct *tsk, int account)
mod_zone_page_state(page_zone(first_page), NR_KERNEL_STACK_KB,
THREAD_SIZE / 1024 * account);
- mod_memcg_page_state(first_page, MEMCG_KERNEL_STACK_KB,
- account * (THREAD_SIZE / 1024));
+ mod_memcg_obj_state(stack, MEMCG_KERNEL_STACK_KB,
+ account * (THREAD_SIZE / 1024));
}
}
@@ -1508,7 +1508,7 @@ static int copy_sighand(unsigned long clone_flags, struct task_struct *tsk)
return 0;
}
sig = kmem_cache_alloc(sighand_cachep, GFP_KERNEL);
- rcu_assign_pointer(tsk->sighand, sig);
+ RCU_INIT_POINTER(tsk->sighand, sig);
if (!sig)
return -ENOMEM;
diff --git a/kernel/futex.c b/kernel/futex.c
index 0cf84c8..82dfacb 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -385,9 +385,9 @@ static inline int hb_waiters_pending(struct futex_hash_bucket *hb)
*/
static struct futex_hash_bucket *hash_futex(union futex_key *key)
{
- u32 hash = jhash2((u32*)&key->both.word,
- (sizeof(key->both.word)+sizeof(key->both.ptr))/4,
+ u32 hash = jhash2((u32 *)key, offsetof(typeof(*key), both.offset) / 4,
key->both.offset);
+
return &futex_queues[hash & (futex_hashsize - 1)];
}
@@ -429,7 +429,7 @@ static void get_futex_key_refs(union futex_key *key)
switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
case FUT_OFF_INODE:
- ihold(key->shared.inode); /* implies smp_mb(); (B) */
+ smp_mb(); /* explicit smp_mb(); (B) */
break;
case FUT_OFF_MMSHARED:
futex_get_mm(key); /* implies smp_mb(); (B) */
@@ -463,7 +463,6 @@ static void drop_futex_key_refs(union futex_key *key)
switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
case FUT_OFF_INODE:
- iput(key->shared.inode);
break;
case FUT_OFF_MMSHARED:
mmdrop(key->private.mm);
@@ -505,6 +504,46 @@ futex_setup_timer(ktime_t *time, struct hrtimer_sleeper *timeout,
return timeout;
}
+/*
+ * Generate a machine wide unique identifier for this inode.
+ *
+ * This relies on u64 not wrapping in the life-time of the machine; which with
+ * 1ns resolution means almost 585 years.
+ *
+ * This further relies on the fact that a well formed program will not unmap
+ * the file while it has a (shared) futex waiting on it. This mapping will have
+ * a file reference which pins the mount and inode.
+ *
+ * If for some reason an inode gets evicted and read back in again, it will get
+ * a new sequence number and will _NOT_ match, even though it is the exact same
+ * file.
+ *
+ * It is important that match_futex() will never have a false-positive, esp.
+ * for PI futexes that can mess up the state. The above argues that false-negatives
+ * are only possible for malformed programs.
+ */
+static u64 get_inode_sequence_number(struct inode *inode)
+{
+ static atomic64_t i_seq;
+ u64 old;
+
+ /* Does the inode already have a sequence number? */
+ old = atomic64_read(&inode->i_sequence);
+ if (likely(old))
+ return old;
+
+ for (;;) {
+ u64 new = atomic64_add_return(1, &i_seq);
+ if (WARN_ON_ONCE(!new))
+ continue;
+
+ old = atomic64_cmpxchg_relaxed(&inode->i_sequence, 0, new);
+ if (old)
+ return old;
+ return new;
+ }
+}
+
/**
* get_futex_key() - Get parameters which are the keys for a futex
* @uaddr: virtual address of the futex
@@ -517,9 +556,15 @@ futex_setup_timer(ktime_t *time, struct hrtimer_sleeper *timeout,
*
* The key words are stored in @key on success.
*
- * For shared mappings, it's (page->index, file_inode(vma->vm_file),
- * offset_within_page). For private mappings, it's (uaddr, current->mm).
- * We can usually work out the index without swapping in the page.
+ * For shared mappings (when @fshared), the key is:
+ * ( inode->i_sequence, page->index, offset_within_page )
+ * [ also see get_inode_sequence_number() ]
+ *
+ * For private mappings (or when !@fshared), the key is:
+ * ( current->mm, address, 0 )
+ *
+ * This allows (cross process, where applicable) identification of the futex
+ * without keeping the page pinned for the duration of the FUTEX_WAIT.
*
* lock_page() might sleep, the caller should not hold a spinlock.
*/
@@ -659,8 +704,6 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, enum futex_a
key->private.mm = mm;
key->private.address = address;
- get_futex_key_refs(key); /* implies smp_mb(); (B) */
-
} else {
struct inode *inode;
@@ -692,40 +735,14 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, enum futex_a
goto again;
}
- /*
- * Take a reference unless it is about to be freed. Previously
- * this reference was taken by ihold under the page lock
- * pinning the inode in place so i_lock was unnecessary. The
- * only way for this check to fail is if the inode was
- * truncated in parallel which is almost certainly an
- * application bug. In such a case, just retry.
- *
- * We are not calling into get_futex_key_refs() in file-backed
- * cases, therefore a successful atomic_inc return below will
- * guarantee that get_futex_key() will still imply smp_mb(); (B).
- */
- if (!atomic_inc_not_zero(&inode->i_count)) {
- rcu_read_unlock();
- put_page(page);
-
- goto again;
- }
-
- /* Should be impossible but lets be paranoid for now */
- if (WARN_ON_ONCE(inode->i_mapping != mapping)) {
- err = -EFAULT;
- rcu_read_unlock();
- iput(inode);
-
- goto out;
- }
-
key->both.offset |= FUT_OFF_INODE; /* inode-based key */
- key->shared.inode = inode;
+ key->shared.i_seq = get_inode_sequence_number(inode);
key->shared.pgoff = basepage_index(tail);
rcu_read_unlock();
}
+ get_futex_key_refs(key); /* implies smp_mb(); (B) */
+
out:
put_page(page);
return err;
diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h
index 3924fbe..c9d8eb7 100644
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -128,8 +128,6 @@ static inline void unregister_handler_proc(unsigned int irq,
extern bool irq_can_set_affinity_usr(unsigned int irq);
-extern int irq_select_affinity_usr(unsigned int irq);
-
extern void irq_set_thread_affinity(struct irq_desc *desc);
extern int irq_do_set_affinity(struct irq_data *data,
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 3089a60..fe40c65 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -323,7 +323,11 @@ int irq_set_affinity_locked(struct irq_data *data, const struct cpumask *mask,
if (desc->affinity_notify) {
kref_get(&desc->affinity_notify->kref);
- schedule_work(&desc->affinity_notify->work);
+ if (!schedule_work(&desc->affinity_notify->work)) {
+ /* Work was already scheduled, drop our extra ref */
+ kref_put(&desc->affinity_notify->kref,
+ desc->affinity_notify->release);
+ }
}
irqd_set(data, IRQD_AFFINITY_SET);
@@ -423,7 +427,10 @@ irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify)
raw_spin_unlock_irqrestore(&desc->lock, flags);
if (old_notify) {
- cancel_work_sync(&old_notify->work);
+ if (cancel_work_sync(&old_notify->work)) {
+ /* Pending work had a ref, put that one too */
+ kref_put(&old_notify->kref, old_notify->release);
+ }
kref_put(&old_notify->kref, old_notify->release);
}
@@ -481,23 +488,9 @@ int irq_setup_affinity(struct irq_desc *desc)
{
return irq_select_affinity(irq_desc_get_irq(desc));
}
-#endif
+#endif /* CONFIG_AUTO_IRQ_AFFINITY */
+#endif /* CONFIG_SMP */
-/*
- * Called when a bogus affinity is set via /proc/irq
- */
-int irq_select_affinity_usr(unsigned int irq)
-{
- struct irq_desc *desc = irq_to_desc(irq);
- unsigned long flags;
- int ret;
-
- raw_spin_lock_irqsave(&desc->lock, flags);
- ret = irq_setup_affinity(desc);
- raw_spin_unlock_irqrestore(&desc->lock, flags);
- return ret;
-}
-#endif
/**
* irq_set_vcpu_affinity - Set vcpu affinity for the interrupt
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
index 9e5783d..32c071d 100644
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -111,6 +111,28 @@ static int irq_affinity_list_proc_show(struct seq_file *m, void *v)
return show_irq_affinity(AFFINITY_LIST, m);
}
+#ifndef CONFIG_AUTO_IRQ_AFFINITY
+static inline int irq_select_affinity_usr(unsigned int irq)
+{
+ /*
+ * If the interrupt is started up already then this fails. The
+ * interrupt is assigned to an online CPU already. There is no
+ * point to move it around randomly. Tell user space that the
+ * selected mask is bogus.
+ *
+ * If not then any change to the affinity is pointless because the
+ * startup code invokes irq_setup_affinity() which will select
+ * a online CPU anyway.
+ */
+ return -EINVAL;
+}
+#else
+/* ALPHA magic affinity auto selector. Keep it for historical reasons. */
+static inline int irq_select_affinity_usr(unsigned int irq)
+{
+ return irq_select_affinity(irq);
+}
+#endif
static ssize_t write_irq_affinity(int type, struct file *file,
const char __user *buffer, size_t count, loff_t *pos)
diff --git a/kernel/notifier.c b/kernel/notifier.c
index 63d7501..5989bbb 100644
--- a/kernel/notifier.c
+++ b/kernel/notifier.c
@@ -519,7 +519,7 @@ NOKPROBE_SYMBOL(notify_die);
int register_die_notifier(struct notifier_block *nb)
{
- vmalloc_sync_all();
+ vmalloc_sync_mappings();
return atomic_notifier_chain_register(&die_chain, nb);
}
EXPORT_SYMBOL_GPL(register_die_notifier);
diff --git a/kernel/pid.c b/kernel/pid.c
index 0f4ecb5..647b4bb 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -247,6 +247,16 @@ struct pid *alloc_pid(struct pid_namespace *ns, pid_t *set_tid,
tmp = tmp->parent;
}
+ /*
+ * ENOMEM is not the most obvious choice especially for the case
+ * where the child subreaper has already exited and the pid
+ * namespace denies the creation of any new processes. But ENOMEM
+ * is what we have exposed to userspace for a long time and it is
+ * documented behavior for pid namespaces. So we can't easily
+ * change it even if there were an error code better suited.
+ */
+ retval = -ENOMEM;
+
if (unlikely(is_child_reaper(pid))) {
if (pid_ns_prepare_proc(ns))
goto out_free;
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index ddade80..d82b7b8 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -1681,7 +1681,7 @@ static unsigned long minimum_image_size(unsigned long saveable)
* hibernation for allocations made while saving the image and for device
* drivers, in case they need to allocate memory from their hibernation
* callbacks (these two numbers are given by PAGES_FOR_IO (which is a rough
- * estimate) and reserverd_size divided by PAGE_SIZE (which is tunable through
+ * estimate) and reserved_size divided by PAGE_SIZE (which is tunable through
* /sys/power/reserved_size, respectively). To make this happen, we compute the
* total number of available page frames and allocate at least
*
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index 2c47280..8b1bb5e 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -131,11 +131,12 @@ static void s2idle_loop(void)
* to avoid them upfront.
*/
for (;;) {
- if (s2idle_ops && s2idle_ops->wake)
- s2idle_ops->wake();
-
- if (pm_wakeup_pending())
+ if (s2idle_ops && s2idle_ops->wake) {
+ if (s2idle_ops->wake())
+ break;
+ } else if (pm_wakeup_pending()) {
break;
+ }
pm_wakeup_clear(false);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index fc1dfc0..1a9983d 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -552,27 +552,32 @@ void resched_cpu(int cpu)
*/
int get_nohz_timer_target(void)
{
- int i, cpu = smp_processor_id();
+ int i, cpu = smp_processor_id(), default_cpu = -1;
struct sched_domain *sd;
- if (!idle_cpu(cpu) && housekeeping_cpu(cpu, HK_FLAG_TIMER))
- return cpu;
+ if (housekeeping_cpu(cpu, HK_FLAG_TIMER)) {
+ if (!idle_cpu(cpu))
+ return cpu;
+ default_cpu = cpu;
+ }
rcu_read_lock();
for_each_domain(cpu, sd) {
- for_each_cpu(i, sched_domain_span(sd)) {
+ for_each_cpu_and(i, sched_domain_span(sd),
+ housekeeping_cpumask(HK_FLAG_TIMER)) {
if (cpu == i)
continue;
- if (!idle_cpu(i) && housekeeping_cpu(i, HK_FLAG_TIMER)) {
+ if (!idle_cpu(i)) {
cpu = i;
goto unlock;
}
}
}
- if (!housekeeping_cpu(cpu, HK_FLAG_TIMER))
- cpu = housekeeping_any_cpu(HK_FLAG_TIMER);
+ if (default_cpu == -1)
+ default_cpu = housekeeping_any_cpu(HK_FLAG_TIMER);
+ cpu = default_cpu;
unlock:
rcu_read_unlock();
return cpu;
@@ -1442,17 +1447,6 @@ void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)
#ifdef CONFIG_SMP
-static inline bool is_per_cpu_kthread(struct task_struct *p)
-{
- if (!(p->flags & PF_KTHREAD))
- return false;
-
- if (p->nr_cpus_allowed != 1)
- return false;
-
- return true;
-}
-
/*
* Per-CPU kthreads are allowed to run on !active && online CPUs, see
* __set_cpus_allowed_ptr() and select_fallback_rq().
@@ -3669,28 +3663,32 @@ static void sched_tick_remote(struct work_struct *work)
* statistics and checks timeslices in a time-independent way, regardless
* of when exactly it is running.
*/
- if (idle_cpu(cpu) || !tick_nohz_tick_stopped_cpu(cpu))
+ if (!tick_nohz_tick_stopped_cpu(cpu))
goto out_requeue;
rq_lock_irq(rq, &rf);
curr = rq->curr;
- if (is_idle_task(curr) || cpu_is_offline(cpu))
+ if (cpu_is_offline(cpu))
goto out_unlock;
+ curr = rq->curr;
update_rq_clock(rq);
- delta = rq_clock_task(rq) - curr->se.exec_start;
- /*
- * Make sure the next tick runs within a reasonable
- * amount of time.
- */
- WARN_ON_ONCE(delta > (u64)NSEC_PER_SEC * 3);
+ if (!is_idle_task(curr)) {
+ /*
+ * Make sure the next tick runs within a reasonable
+ * amount of time.
+ */
+ delta = rq_clock_task(rq) - curr->se.exec_start;
+ WARN_ON_ONCE(delta > (u64)NSEC_PER_SEC * 3);
+ }
curr->sched_class->task_tick(rq, curr, 0);
+ calc_load_nohz_remote(rq);
out_unlock:
rq_unlock_irq(rq, &rf);
-
out_requeue:
+
/*
* Run the remote tick once per second (1Hz). This arbitrary
* frequency is large enough to avoid overload but short enough
@@ -7063,8 +7061,15 @@ void sched_move_task(struct task_struct *tsk)
if (queued)
enqueue_task(rq, tsk, queue_flags);
- if (running)
+ if (running) {
set_next_task(rq, tsk);
+ /*
+ * After changing group, the running task may have joined a
+ * throttled one but it's still the running task. Trigger a
+ * resched to make sure that task can still run.
+ */
+ resched_curr(rq);
+ }
task_rq_unlock(rq, tsk, &rf);
}
@@ -7260,7 +7265,7 @@ capacity_from_percent(char *buf)
&req.percent);
if (req.ret)
return req;
- if (req.percent > UCLAMP_PERCENT_SCALE) {
+ if ((u64)req.percent > UCLAMP_PERCENT_SCALE) {
req.ret = -ERANGE;
return req;
}
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index fe4e0d7..c1217bf 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3516,7 +3516,6 @@ update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq)
* attach_entity_load_avg - attach this entity to its cfs_rq load avg
* @cfs_rq: cfs_rq to attach to
* @se: sched_entity to attach
- * @flags: migration hints
*
* Must call update_cfs_rq_load_avg() before this, since we rely on
* cfs_rq->avg.last_update_time being current.
@@ -5912,6 +5911,20 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
(available_idle_cpu(prev) || sched_idle_cpu(prev)))
return prev;
+ /*
+ * Allow a per-cpu kthread to stack with the wakee if the
+ * kworker thread and the tasks previous CPUs are the same.
+ * The assumption is that the wakee queued work for the
+ * per-cpu kthread that is now complete and the wakeup is
+ * essentially a sync wakeup. An obvious example of this
+ * pattern is IO completions.
+ */
+ if (is_per_cpu_kthread(current) &&
+ prev == smp_processor_id() &&
+ this_rq()->nr_running <= 1) {
+ return prev;
+ }
+
/* Check a recently used CPU as a potential idle candidate: */
recent_used_cpu = p->recent_used_cpu;
if (recent_used_cpu != prev &&
@@ -8324,6 +8337,8 @@ static inline void update_sg_wakeup_stats(struct sched_domain *sd,
sgs->group_capacity = group->sgc->capacity;
+ sgs->group_weight = group->group_weight;
+
sgs->group_type = group_classify(sd->imbalance_pct, group, sgs);
/*
@@ -8658,10 +8673,6 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s
/*
* Try to use spare capacity of local group without overloading it or
* emptying busiest.
- * XXX Spreading tasks across NUMA nodes is not always the best policy
- * and special care should be taken for SD_NUMA domain level before
- * spreading the tasks. For now, load_balance() fully relies on
- * NUMA_BALANCING and fbq_classify_group/rq to override the decision.
*/
if (local->group_type == group_has_spare) {
if (busiest->group_type > group_fully_busy) {
@@ -8701,16 +8712,37 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s
env->migration_type = migrate_task;
lsub_positive(&nr_diff, local->sum_nr_running);
env->imbalance = nr_diff >> 1;
- return;
+ } else {
+
+ /*
+ * If there is no overload, we just want to even the number of
+ * idle cpus.
+ */
+ env->migration_type = migrate_task;
+ env->imbalance = max_t(long, 0, (local->idle_cpus -
+ busiest->idle_cpus) >> 1);
}
- /*
- * If there is no overload, we just want to even the number of
- * idle cpus.
- */
- env->migration_type = migrate_task;
- env->imbalance = max_t(long, 0, (local->idle_cpus -
- busiest->idle_cpus) >> 1);
+ /* Consider allowing a small imbalance between NUMA groups */
+ if (env->sd->flags & SD_NUMA) {
+ unsigned int imbalance_min;
+
+ /*
+ * Compute an allowed imbalance based on a simple
+ * pair of communicating tasks that should remain
+ * local and ignore them.
+ *
+ * NOTE: Generally this would have been based on
+ * the domain size and this was evaluated. However,
+ * the benefit is similar across a range of workloads
+ * and machines but scaling by the domain size adds
+ * the risk that lower domains have to be rebalanced.
+ */
+ imbalance_min = 2;
+ if (busiest->sum_nr_running <= imbalance_min)
+ env->imbalance = 0;
+ }
+
return;
}
diff --git a/kernel/sched/loadavg.c b/kernel/sched/loadavg.c
index 28a5165..de22da6 100644
--- a/kernel/sched/loadavg.c
+++ b/kernel/sched/loadavg.c
@@ -231,16 +231,11 @@ static inline int calc_load_read_idx(void)
return calc_load_idx & 1;
}
-void calc_load_nohz_start(void)
+static void calc_load_nohz_fold(struct rq *rq)
{
- struct rq *this_rq = this_rq();
long delta;
- /*
- * We're going into NO_HZ mode, if there's any pending delta, fold it
- * into the pending NO_HZ delta.
- */
- delta = calc_load_fold_active(this_rq, 0);
+ delta = calc_load_fold_active(rq, 0);
if (delta) {
int idx = calc_load_write_idx();
@@ -248,6 +243,24 @@ void calc_load_nohz_start(void)
}
}
+void calc_load_nohz_start(void)
+{
+ /*
+ * We're going into NO_HZ mode, if there's any pending delta, fold it
+ * into the pending NO_HZ delta.
+ */
+ calc_load_nohz_fold(this_rq());
+}
+
+/*
+ * Keep track of the load for NOHZ_FULL, must be called between
+ * calc_load_nohz_{start,stop}().
+ */
+void calc_load_nohz_remote(struct rq *rq)
+{
+ calc_load_nohz_fold(rq);
+}
+
void calc_load_nohz_stop(void)
{
struct rq *this_rq = this_rq();
@@ -268,7 +281,7 @@ void calc_load_nohz_stop(void)
this_rq->calc_load_update += LOAD_FREQ;
}
-static long calc_load_nohz_fold(void)
+static long calc_load_nohz_read(void)
{
int idx = calc_load_read_idx();
long delta = 0;
@@ -323,7 +336,7 @@ static void calc_global_nohz(void)
}
#else /* !CONFIG_NO_HZ_COMMON */
-static inline long calc_load_nohz_fold(void) { return 0; }
+static inline long calc_load_nohz_read(void) { return 0; }
static inline void calc_global_nohz(void) { }
#endif /* CONFIG_NO_HZ_COMMON */
@@ -346,7 +359,7 @@ void calc_global_load(unsigned long ticks)
/*
* Fold the 'old' NO_HZ-delta to include all NO_HZ CPUs.
*/
- delta = calc_load_nohz_fold();
+ delta = calc_load_nohz_read();
if (delta)
atomic_long_add(delta, &calc_load_tasks);
diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c
index ac4bd0c..0285207 100644
--- a/kernel/sched/psi.c
+++ b/kernel/sched/psi.c
@@ -1199,6 +1199,9 @@ static ssize_t psi_write(struct file *file, const char __user *user_buf,
if (static_branch_likely(&psi_disabled))
return -EOPNOTSUPP;
+ if (!nbytes)
+ return -EINVAL;
+
buf_size = min(nbytes, sizeof(buf));
if (copy_from_user(buf, user_buf, buf_size))
return -EFAULT;
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 1a88dc8..9ea6478 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -896,7 +896,7 @@ struct rq {
*/
unsigned long nr_uninterruptible;
- struct task_struct *curr;
+ struct task_struct __rcu *curr;
struct task_struct *idle;
struct task_struct *stop;
unsigned long next_balance;
@@ -2479,3 +2479,16 @@ static inline void membarrier_switch_mm(struct rq *rq,
{
}
#endif
+
+#ifdef CONFIG_SMP
+static inline bool is_per_cpu_kthread(struct task_struct *p)
+{
+ if (!(p->flags & PF_KTHREAD))
+ return false;
+
+ if (p->nr_cpus_allowed != 1)
+ return false;
+
+ return true;
+}
+#endif
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index b6ea3dc..ec5c606 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -528,8 +528,12 @@ static long seccomp_attach_filter(unsigned int flags,
int ret;
ret = seccomp_can_sync_threads();
- if (ret)
- return ret;
+ if (ret) {
+ if (flags & SECCOMP_FILTER_FLAG_TSYNC_ESRCH)
+ return -ESRCH;
+ else
+ return ret;
+ }
}
/* Set log flag, if present. */
@@ -1221,6 +1225,7 @@ static const struct file_operations seccomp_notify_ops = {
.poll = seccomp_notify_poll,
.release = seccomp_notify_release,
.unlocked_ioctl = seccomp_notify_ioctl,
+ .compat_ioctl = seccomp_notify_ioctl,
};
static struct file *init_listener(struct seccomp_filter *filter)
@@ -1288,10 +1293,12 @@ static long seccomp_set_mode_filter(unsigned int flags,
* In the successful case, NEW_LISTENER returns the new listener fd.
* But in the failure case, TSYNC returns the thread that died. If you
* combine these two flags, there's no way to tell whether something
- * succeeded or failed. So, let's disallow this combination.
+ * succeeded or failed. So, let's disallow this combination if the user
+ * has not explicitly requested no errors from TSYNC.
*/
if ((flags & SECCOMP_FILTER_FLAG_TSYNC) &&
- (flags & SECCOMP_FILTER_FLAG_NEW_LISTENER))
+ (flags & SECCOMP_FILTER_FLAG_NEW_LISTENER) &&
+ ((flags & SECCOMP_FILTER_FLAG_TSYNC_ESRCH) == 0))
return -EINVAL;
/* Prepare the new filter before holding any locks. */
diff --git a/kernel/signal.c b/kernel/signal.c
index 9ad8dea..5b23963 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -413,27 +413,32 @@ __sigqueue_alloc(int sig, struct task_struct *t, gfp_t flags, int override_rlimi
{
struct sigqueue *q = NULL;
struct user_struct *user;
+ int sigpending;
/*
* Protect access to @t credentials. This can go away when all
* callers hold rcu read lock.
+ *
+ * NOTE! A pending signal will hold on to the user refcount,
+ * and we get/put the refcount only when the sigpending count
+ * changes from/to zero.
*/
rcu_read_lock();
- user = get_uid(__task_cred(t)->user);
- atomic_inc(&user->sigpending);
+ user = __task_cred(t)->user;
+ sigpending = atomic_inc_return(&user->sigpending);
+ if (sigpending == 1)
+ get_uid(user);
rcu_read_unlock();
- if (override_rlimit ||
- atomic_read(&user->sigpending) <=
- task_rlimit(t, RLIMIT_SIGPENDING)) {
+ if (override_rlimit || likely(sigpending <= task_rlimit(t, RLIMIT_SIGPENDING))) {
q = kmem_cache_alloc(sigqueue_cachep, flags);
} else {
print_dropped_signal(sig);
}
if (unlikely(q == NULL)) {
- atomic_dec(&user->sigpending);
- free_uid(user);
+ if (atomic_dec_and_test(&user->sigpending))
+ free_uid(user);
} else {
INIT_LIST_HEAD(&q->list);
q->flags = 0;
@@ -447,8 +452,8 @@ static void __sigqueue_free(struct sigqueue *q)
{
if (q->flags & SIGQUEUE_PREALLOC)
return;
- atomic_dec(&q->user->sigpending);
- free_uid(q->user);
+ if (atomic_dec_and_test(&q->user->sigpending))
+ free_uid(q->user);
kmem_cache_free(sigqueue_cachep, q);
}
diff --git a/kernel/sys.c b/kernel/sys.c
index f9bc5c3..d325f3a 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -47,6 +47,7 @@
#include <linux/syscalls.h>
#include <linux/kprobes.h>
#include <linux/user_namespace.h>
+#include <linux/time_namespace.h>
#include <linux/binfmts.h>
#include <linux/sched.h>
@@ -2546,6 +2547,7 @@ static int do_sysinfo(struct sysinfo *info)
memset(info, 0, sizeof(struct sysinfo));
ktime_get_boottime_ts64(&tp);
+ timens_add_boottime(&tp);
info->uptime = tp.tv_sec + (tp.tv_nsec ? 1 : 0);
get_avenrun(info->loads, 0, SI_LOAD_SHIFT - FSHIFT);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index d396aaa..ad5b88a 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -805,15 +805,6 @@ static struct ctl_table kern_table[] = {
.extra2 = &maxolduid,
},
#ifdef CONFIG_S390
-#ifdef CONFIG_MATHEMU
- {
- .procname = "ieee_emulation_warnings",
- .data = &sysctl_ieee_emulation_warnings,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
-#endif
{
.procname = "userprocess_debug",
.data = &show_unhandled_signals,
diff --git a/kernel/task_work.c b/kernel/task_work.c
index 0fef395..825f282 100644
--- a/kernel/task_work.c
+++ b/kernel/task_work.c
@@ -97,16 +97,26 @@ void task_work_run(void)
* work->func() can do task_work_add(), do not set
* work_exited unless the list is empty.
*/
- raw_spin_lock_irq(&task->pi_lock);
do {
+ head = NULL;
work = READ_ONCE(task->task_works);
- head = !work && (task->flags & PF_EXITING) ?
- &work_exited : NULL;
+ if (!work) {
+ if (task->flags & PF_EXITING)
+ head = &work_exited;
+ else
+ break;
+ }
} while (cmpxchg(&task->task_works, work, head) != work);
- raw_spin_unlock_irq(&task->pi_lock);
if (!work)
break;
+ /*
+ * Synchronize with task_work_cancel(). It can not remove
+ * the first entry == work, cmpxchg(task_works) must fail.
+ * But it can remove another entry from the ->next list.
+ */
+ raw_spin_lock_irq(&task->pi_lock);
+ raw_spin_unlock_irq(&task->pi_lock);
do {
next = work->next;
diff --git a/kernel/time/time.c b/kernel/time/time.c
index cdd7386..3985b2b 100644
--- a/kernel/time/time.c
+++ b/kernel/time/time.c
@@ -449,49 +449,6 @@ time64_t mktime64(const unsigned int year0, const unsigned int mon0,
}
EXPORT_SYMBOL(mktime64);
-/**
- * ns_to_timespec - Convert nanoseconds to timespec
- * @nsec: the nanoseconds value to be converted
- *
- * Returns the timespec representation of the nsec parameter.
- */
-struct timespec ns_to_timespec(const s64 nsec)
-{
- struct timespec ts;
- s32 rem;
-
- if (!nsec)
- return (struct timespec) {0, 0};
-
- ts.tv_sec = div_s64_rem(nsec, NSEC_PER_SEC, &rem);
- if (unlikely(rem < 0)) {
- ts.tv_sec--;
- rem += NSEC_PER_SEC;
- }
- ts.tv_nsec = rem;
-
- return ts;
-}
-EXPORT_SYMBOL(ns_to_timespec);
-
-/**
- * ns_to_timeval - Convert nanoseconds to timeval
- * @nsec: the nanoseconds value to be converted
- *
- * Returns the timeval representation of the nsec parameter.
- */
-struct timeval ns_to_timeval(const s64 nsec)
-{
- struct timespec ts = ns_to_timespec(nsec);
- struct timeval tv;
-
- tv.tv_sec = ts.tv_sec;
- tv.tv_usec = (suseconds_t) ts.tv_nsec / 1000;
-
- return tv;
-}
-EXPORT_SYMBOL(ns_to_timeval);
-
struct __kernel_old_timeval ns_to_kernel_old_timeval(const s64 nsec)
{
struct timespec64 ts = ns_to_timespec64(nsec);
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 91e8851..402eef8 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -143,8 +143,8 @@
config BOOTTIME_TRACING
bool "Boot-time Tracing support"
- depends on BOOT_CONFIG && TRACING
- default y
+ depends on TRACING
+ select BOOT_CONFIG
help
Enable developer to setup ftrace subsystem via supplemental
kernel cmdline at boot time for debugging (tracing) driver
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 0735ae8..ca39dc3 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -335,6 +335,7 @@ static void put_probe_ref(void)
static void blk_trace_cleanup(struct blk_trace *bt)
{
+ synchronize_rcu();
blk_trace_free(bt);
put_probe_ref();
}
@@ -629,8 +630,10 @@ static int compat_blk_trace_setup(struct request_queue *q, char *name,
static int __blk_trace_startstop(struct request_queue *q, int start)
{
int ret;
- struct blk_trace *bt = q->blk_trace;
+ struct blk_trace *bt;
+ bt = rcu_dereference_protected(q->blk_trace,
+ lockdep_is_held(&q->blk_trace_mutex));
if (bt == NULL)
return -EINVAL;
@@ -740,8 +743,8 @@ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg)
void blk_trace_shutdown(struct request_queue *q)
{
mutex_lock(&q->blk_trace_mutex);
-
- if (q->blk_trace) {
+ if (rcu_dereference_protected(q->blk_trace,
+ lockdep_is_held(&q->blk_trace_mutex))) {
__blk_trace_startstop(q, 0);
__blk_trace_remove(q);
}
@@ -752,8 +755,10 @@ void blk_trace_shutdown(struct request_queue *q)
#ifdef CONFIG_BLK_CGROUP
static u64 blk_trace_bio_get_cgid(struct request_queue *q, struct bio *bio)
{
- struct blk_trace *bt = q->blk_trace;
+ struct blk_trace *bt;
+ /* We don't use the 'bt' value here except as an optimization... */
+ bt = rcu_dereference_protected(q->blk_trace, 1);
if (!bt || !(blk_tracer_flags.val & TRACE_BLK_OPT_CGROUP))
return 0;
@@ -796,10 +801,14 @@ blk_trace_request_get_cgid(struct request_queue *q, struct request *rq)
static void blk_add_trace_rq(struct request *rq, int error,
unsigned int nr_bytes, u32 what, u64 cgid)
{
- struct blk_trace *bt = rq->q->blk_trace;
+ struct blk_trace *bt;
- if (likely(!bt))
+ rcu_read_lock();
+ bt = rcu_dereference(rq->q->blk_trace);
+ if (likely(!bt)) {
+ rcu_read_unlock();
return;
+ }
if (blk_rq_is_passthrough(rq))
what |= BLK_TC_ACT(BLK_TC_PC);
@@ -808,6 +817,7 @@ static void blk_add_trace_rq(struct request *rq, int error,
__blk_add_trace(bt, blk_rq_trace_sector(rq), nr_bytes, req_op(rq),
rq->cmd_flags, what, error, 0, NULL, cgid);
+ rcu_read_unlock();
}
static void blk_add_trace_rq_insert(void *ignore,
@@ -853,14 +863,19 @@ static void blk_add_trace_rq_complete(void *ignore, struct request *rq,
static void blk_add_trace_bio(struct request_queue *q, struct bio *bio,
u32 what, int error)
{
- struct blk_trace *bt = q->blk_trace;
+ struct blk_trace *bt;
- if (likely(!bt))
+ rcu_read_lock();
+ bt = rcu_dereference(q->blk_trace);
+ if (likely(!bt)) {
+ rcu_read_unlock();
return;
+ }
__blk_add_trace(bt, bio->bi_iter.bi_sector, bio->bi_iter.bi_size,
bio_op(bio), bio->bi_opf, what, error, 0, NULL,
blk_trace_bio_get_cgid(q, bio));
+ rcu_read_unlock();
}
static void blk_add_trace_bio_bounce(void *ignore,
@@ -905,11 +920,14 @@ static void blk_add_trace_getrq(void *ignore,
if (bio)
blk_add_trace_bio(q, bio, BLK_TA_GETRQ, 0);
else {
- struct blk_trace *bt = q->blk_trace;
+ struct blk_trace *bt;
+ rcu_read_lock();
+ bt = rcu_dereference(q->blk_trace);
if (bt)
__blk_add_trace(bt, 0, 0, rw, 0, BLK_TA_GETRQ, 0, 0,
NULL, 0);
+ rcu_read_unlock();
}
}
@@ -921,27 +939,35 @@ static void blk_add_trace_sleeprq(void *ignore,
if (bio)
blk_add_trace_bio(q, bio, BLK_TA_SLEEPRQ, 0);
else {
- struct blk_trace *bt = q->blk_trace;
+ struct blk_trace *bt;
+ rcu_read_lock();
+ bt = rcu_dereference(q->blk_trace);
if (bt)
__blk_add_trace(bt, 0, 0, rw, 0, BLK_TA_SLEEPRQ,
0, 0, NULL, 0);
+ rcu_read_unlock();
}
}
static void blk_add_trace_plug(void *ignore, struct request_queue *q)
{
- struct blk_trace *bt = q->blk_trace;
+ struct blk_trace *bt;
+ rcu_read_lock();
+ bt = rcu_dereference(q->blk_trace);
if (bt)
__blk_add_trace(bt, 0, 0, 0, 0, BLK_TA_PLUG, 0, 0, NULL, 0);
+ rcu_read_unlock();
}
static void blk_add_trace_unplug(void *ignore, struct request_queue *q,
unsigned int depth, bool explicit)
{
- struct blk_trace *bt = q->blk_trace;
+ struct blk_trace *bt;
+ rcu_read_lock();
+ bt = rcu_dereference(q->blk_trace);
if (bt) {
__be64 rpdu = cpu_to_be64(depth);
u32 what;
@@ -953,14 +979,17 @@ static void blk_add_trace_unplug(void *ignore, struct request_queue *q,
__blk_add_trace(bt, 0, 0, 0, 0, what, 0, sizeof(rpdu), &rpdu, 0);
}
+ rcu_read_unlock();
}
static void blk_add_trace_split(void *ignore,
struct request_queue *q, struct bio *bio,
unsigned int pdu)
{
- struct blk_trace *bt = q->blk_trace;
+ struct blk_trace *bt;
+ rcu_read_lock();
+ bt = rcu_dereference(q->blk_trace);
if (bt) {
__be64 rpdu = cpu_to_be64(pdu);
@@ -969,6 +998,7 @@ static void blk_add_trace_split(void *ignore,
BLK_TA_SPLIT, bio->bi_status, sizeof(rpdu),
&rpdu, blk_trace_bio_get_cgid(q, bio));
}
+ rcu_read_unlock();
}
/**
@@ -988,11 +1018,15 @@ static void blk_add_trace_bio_remap(void *ignore,
struct request_queue *q, struct bio *bio,
dev_t dev, sector_t from)
{
- struct blk_trace *bt = q->blk_trace;
+ struct blk_trace *bt;
struct blk_io_trace_remap r;
- if (likely(!bt))
+ rcu_read_lock();
+ bt = rcu_dereference(q->blk_trace);
+ if (likely(!bt)) {
+ rcu_read_unlock();
return;
+ }
r.device_from = cpu_to_be32(dev);
r.device_to = cpu_to_be32(bio_dev(bio));
@@ -1001,6 +1035,7 @@ static void blk_add_trace_bio_remap(void *ignore,
__blk_add_trace(bt, bio->bi_iter.bi_sector, bio->bi_iter.bi_size,
bio_op(bio), bio->bi_opf, BLK_TA_REMAP, bio->bi_status,
sizeof(r), &r, blk_trace_bio_get_cgid(q, bio));
+ rcu_read_unlock();
}
/**
@@ -1021,11 +1056,15 @@ static void blk_add_trace_rq_remap(void *ignore,
struct request *rq, dev_t dev,
sector_t from)
{
- struct blk_trace *bt = q->blk_trace;
+ struct blk_trace *bt;
struct blk_io_trace_remap r;
- if (likely(!bt))
+ rcu_read_lock();
+ bt = rcu_dereference(q->blk_trace);
+ if (likely(!bt)) {
+ rcu_read_unlock();
return;
+ }
r.device_from = cpu_to_be32(dev);
r.device_to = cpu_to_be32(disk_devt(rq->rq_disk));
@@ -1034,6 +1073,7 @@ static void blk_add_trace_rq_remap(void *ignore,
__blk_add_trace(bt, blk_rq_pos(rq), blk_rq_bytes(rq),
rq_data_dir(rq), 0, BLK_TA_REMAP, 0,
sizeof(r), &r, blk_trace_request_get_cgid(q, rq));
+ rcu_read_unlock();
}
/**
@@ -1051,14 +1091,19 @@ void blk_add_driver_data(struct request_queue *q,
struct request *rq,
void *data, size_t len)
{
- struct blk_trace *bt = q->blk_trace;
+ struct blk_trace *bt;
- if (likely(!bt))
+ rcu_read_lock();
+ bt = rcu_dereference(q->blk_trace);
+ if (likely(!bt)) {
+ rcu_read_unlock();
return;
+ }
__blk_add_trace(bt, blk_rq_trace_sector(rq), blk_rq_bytes(rq), 0, 0,
BLK_TA_DRV_DATA, 0, len, data,
blk_trace_request_get_cgid(q, rq));
+ rcu_read_unlock();
}
EXPORT_SYMBOL_GPL(blk_add_driver_data);
@@ -1597,6 +1642,7 @@ static int blk_trace_remove_queue(struct request_queue *q)
return -EINVAL;
put_probe_ref();
+ synchronize_rcu();
blk_trace_free(bt);
return 0;
}
@@ -1758,6 +1804,7 @@ static ssize_t sysfs_blk_trace_attr_show(struct device *dev,
struct hd_struct *p = dev_to_part(dev);
struct request_queue *q;
struct block_device *bdev;
+ struct blk_trace *bt;
ssize_t ret = -ENXIO;
bdev = bdget(part_devt(p));
@@ -1770,21 +1817,23 @@ static ssize_t sysfs_blk_trace_attr_show(struct device *dev,
mutex_lock(&q->blk_trace_mutex);
+ bt = rcu_dereference_protected(q->blk_trace,
+ lockdep_is_held(&q->blk_trace_mutex));
if (attr == &dev_attr_enable) {
- ret = sprintf(buf, "%u\n", !!q->blk_trace);
+ ret = sprintf(buf, "%u\n", !!bt);
goto out_unlock_bdev;
}
- if (q->blk_trace == NULL)
+ if (bt == NULL)
ret = sprintf(buf, "disabled\n");
else if (attr == &dev_attr_act_mask)
- ret = blk_trace_mask2str(buf, q->blk_trace->act_mask);
+ ret = blk_trace_mask2str(buf, bt->act_mask);
else if (attr == &dev_attr_pid)
- ret = sprintf(buf, "%u\n", q->blk_trace->pid);
+ ret = sprintf(buf, "%u\n", bt->pid);
else if (attr == &dev_attr_start_lba)
- ret = sprintf(buf, "%llu\n", q->blk_trace->start_lba);
+ ret = sprintf(buf, "%llu\n", bt->start_lba);
else if (attr == &dev_attr_end_lba)
- ret = sprintf(buf, "%llu\n", q->blk_trace->end_lba);
+ ret = sprintf(buf, "%llu\n", bt->end_lba);
out_unlock_bdev:
mutex_unlock(&q->blk_trace_mutex);
@@ -1801,6 +1850,7 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
struct block_device *bdev;
struct request_queue *q;
struct hd_struct *p;
+ struct blk_trace *bt;
u64 value;
ssize_t ret = -EINVAL;
@@ -1831,8 +1881,10 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
mutex_lock(&q->blk_trace_mutex);
+ bt = rcu_dereference_protected(q->blk_trace,
+ lockdep_is_held(&q->blk_trace_mutex));
if (attr == &dev_attr_enable) {
- if (!!value == !!q->blk_trace) {
+ if (!!value == !!bt) {
ret = 0;
goto out_unlock_bdev;
}
@@ -1844,18 +1896,21 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
}
ret = 0;
- if (q->blk_trace == NULL)
+ if (bt == NULL) {
ret = blk_trace_setup_queue(q, bdev);
+ bt = rcu_dereference_protected(q->blk_trace,
+ lockdep_is_held(&q->blk_trace_mutex));
+ }
if (ret == 0) {
if (attr == &dev_attr_act_mask)
- q->blk_trace->act_mask = value;
+ bt->act_mask = value;
else if (attr == &dev_attr_pid)
- q->blk_trace->pid = value;
+ bt->pid = value;
else if (attr == &dev_attr_start_lba)
- q->blk_trace->start_lba = value;
+ bt->start_lba = value;
else if (attr == &dev_attr_end_lba)
- q->blk_trace->end_lba = value;
+ bt->end_lba = value;
}
out_unlock_bdev:
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 19e793a..68250d4 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -732,7 +732,7 @@ static int bpf_send_signal_common(u32 sig, enum pid_type type)
if (unlikely(!nmi_uaccess_okay()))
return -EPERM;
- if (in_nmi()) {
+ if (irqs_disabled()) {
/* Do an early check on signal validity. Otherwise,
* the error is lost in deferred irq_work.
*/
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 3f7ee10..fd81c7d 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1547,6 +1547,8 @@ static struct dyn_ftrace *lookup_rec(unsigned long start, unsigned long end)
rec = bsearch(&key, pg->records, pg->index,
sizeof(struct dyn_ftrace),
ftrace_cmp_recs);
+ if (rec)
+ break;
}
return rec;
}
diff --git a/kernel/trace/synth_event_gen_test.c b/kernel/trace/synth_event_gen_test.c
index 4aefe00..7d56d62 100644
--- a/kernel/trace/synth_event_gen_test.c
+++ b/kernel/trace/synth_event_gen_test.c
@@ -111,11 +111,11 @@ static int __init test_gen_synth_cmd(void)
/* Create some bogus values just for testing */
vals[0] = 777; /* next_pid_field */
- vals[1] = (u64)"hula hoops"; /* next_comm_field */
+ vals[1] = (u64)(long)"hula hoops"; /* next_comm_field */
vals[2] = 1000000; /* ts_ns */
vals[3] = 1000; /* ts_ms */
- vals[4] = smp_processor_id(); /* cpu */
- vals[5] = (u64)"thneed"; /* my_string_field */
+ vals[4] = raw_smp_processor_id(); /* cpu */
+ vals[5] = (u64)(long)"thneed"; /* my_string_field */
vals[6] = 598; /* my_int_field */
/* Now generate a gen_synth_test event */
@@ -218,11 +218,11 @@ static int __init test_empty_synth_event(void)
/* Create some bogus values just for testing */
vals[0] = 777; /* next_pid_field */
- vals[1] = (u64)"tiddlywinks"; /* next_comm_field */
+ vals[1] = (u64)(long)"tiddlywinks"; /* next_comm_field */
vals[2] = 1000000; /* ts_ns */
vals[3] = 1000; /* ts_ms */
- vals[4] = smp_processor_id(); /* cpu */
- vals[5] = (u64)"thneed_2.0"; /* my_string_field */
+ vals[4] = raw_smp_processor_id(); /* cpu */
+ vals[5] = (u64)(long)"thneed_2.0"; /* my_string_field */
vals[6] = 399; /* my_int_field */
/* Now trace an empty_synth_test event */
@@ -290,11 +290,11 @@ static int __init test_create_synth_event(void)
/* Create some bogus values just for testing */
vals[0] = 777; /* next_pid_field */
- vals[1] = (u64)"tiddlywinks"; /* next_comm_field */
+ vals[1] = (u64)(long)"tiddlywinks"; /* next_comm_field */
vals[2] = 1000000; /* ts_ns */
vals[3] = 1000; /* ts_ms */
- vals[4] = smp_processor_id(); /* cpu */
- vals[5] = (u64)"thneed"; /* my_string_field */
+ vals[4] = raw_smp_processor_id(); /* cpu */
+ vals[5] = (u64)(long)"thneed"; /* my_string_field */
vals[6] = 398; /* my_int_field */
/* Now generate a create_synth_test event */
@@ -330,7 +330,7 @@ static int __init test_add_next_synth_val(void)
goto out;
/* next_comm_field */
- ret = synth_event_add_next_val((u64)"slinky", &trace_state);
+ ret = synth_event_add_next_val((u64)(long)"slinky", &trace_state);
if (ret)
goto out;
@@ -345,12 +345,12 @@ static int __init test_add_next_synth_val(void)
goto out;
/* cpu */
- ret = synth_event_add_next_val(smp_processor_id(), &trace_state);
+ ret = synth_event_add_next_val(raw_smp_processor_id(), &trace_state);
if (ret)
goto out;
/* my_string_field */
- ret = synth_event_add_next_val((u64)"thneed_2.01", &trace_state);
+ ret = synth_event_add_next_val((u64)(long)"thneed_2.01", &trace_state);
if (ret)
goto out;
@@ -388,7 +388,7 @@ static int __init test_add_synth_val(void)
if (ret)
goto out;
- ret = synth_event_add_val("cpu", smp_processor_id(), &trace_state);
+ ret = synth_event_add_val("cpu", raw_smp_processor_id(), &trace_state);
if (ret)
goto out;
@@ -396,12 +396,12 @@ static int __init test_add_synth_val(void)
if (ret)
goto out;
- ret = synth_event_add_val("next_comm_field", (u64)"silly putty",
+ ret = synth_event_add_val("next_comm_field", (u64)(long)"silly putty",
&trace_state);
if (ret)
goto out;
- ret = synth_event_add_val("my_string_field", (u64)"thneed_9",
+ ret = synth_event_add_val("my_string_field", (u64)(long)"thneed_9",
&trace_state);
if (ret)
goto out;
@@ -423,13 +423,13 @@ static int __init test_trace_synth_event(void)
/* Trace some bogus values just for testing */
ret = synth_event_trace(create_synth_test, 7, /* number of values */
- 444, /* next_pid_field */
- (u64)"clackers", /* next_comm_field */
- 1000000, /* ts_ns */
- 1000, /* ts_ms */
- smp_processor_id(), /* cpu */
- (u64)"Thneed", /* my_string_field */
- 999); /* my_int_field */
+ (u64)444, /* next_pid_field */
+ (u64)(long)"clackers", /* next_comm_field */
+ (u64)1000000, /* ts_ns */
+ (u64)1000, /* ts_ms */
+ (u64)raw_smp_processor_id(), /* cpu */
+ (u64)(long)"Thneed", /* my_string_field */
+ (u64)999); /* my_int_field */
return ret;
}
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index c797a15..6b11e4e 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1837,6 +1837,7 @@ static __init int init_trace_selftests(void)
pr_info("Running postponed tracer tests:\n");
+ tracing_selftest_running = true;
list_for_each_entry_safe(p, n, &postponed_selftests, list) {
/* This loop can take minutes when sanitizers are enabled, so
* lets make sure we allow RCU processing.
@@ -1859,6 +1860,7 @@ static __init int init_trace_selftests(void)
list_del(&p->list);
kfree(p);
}
+ tracing_selftest_running = false;
out:
mutex_unlock(&trace_types_lock);
diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
index e7ce7cd..5f6834a 100644
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -821,6 +821,29 @@ static const char *synth_field_fmt(char *type)
return fmt;
}
+static void print_synth_event_num_val(struct trace_seq *s,
+ char *print_fmt, char *name,
+ int size, u64 val, char *space)
+{
+ switch (size) {
+ case 1:
+ trace_seq_printf(s, print_fmt, name, (u8)val, space);
+ break;
+
+ case 2:
+ trace_seq_printf(s, print_fmt, name, (u16)val, space);
+ break;
+
+ case 4:
+ trace_seq_printf(s, print_fmt, name, (u32)val, space);
+ break;
+
+ default:
+ trace_seq_printf(s, print_fmt, name, val, space);
+ break;
+ }
+}
+
static enum print_line_t print_synth_event(struct trace_iterator *iter,
int flags,
struct trace_event *event)
@@ -859,10 +882,13 @@ static enum print_line_t print_synth_event(struct trace_iterator *iter,
} else {
struct trace_print_flags __flags[] = {
__def_gfpflag_names, {-1, NULL} };
+ char *space = (i == se->n_fields - 1 ? "" : " ");
- trace_seq_printf(s, print_fmt, se->fields[i]->name,
- entry->fields[n_u64],
- i == se->n_fields - 1 ? "" : " ");
+ print_synth_event_num_val(s, print_fmt,
+ se->fields[i]->name,
+ se->fields[i]->size,
+ entry->fields[n_u64],
+ space);
if (strcmp(se->fields[i]->type, "gfp_t") == 0) {
trace_seq_puts(s, " (");
@@ -1798,6 +1824,62 @@ void synth_event_cmd_init(struct dynevent_cmd *cmd, char *buf, int maxlen)
}
EXPORT_SYMBOL_GPL(synth_event_cmd_init);
+static inline int
+__synth_event_trace_start(struct trace_event_file *file,
+ struct synth_event_trace_state *trace_state)
+{
+ int entry_size, fields_size = 0;
+ int ret = 0;
+
+ memset(trace_state, '\0', sizeof(*trace_state));
+
+ /*
+ * Normal event tracing doesn't get called at all unless the
+ * ENABLED bit is set (which attaches the probe thus allowing
+ * this code to be called, etc). Because this is called
+ * directly by the user, we don't have that but we still need
+ * to honor not logging when disabled. For the the iterated
+ * trace case, we save the enabed state upon start and just
+ * ignore the following data calls.
+ */
+ if (!(file->flags & EVENT_FILE_FL_ENABLED) ||
+ trace_trigger_soft_disabled(file)) {
+ trace_state->disabled = true;
+ ret = -ENOENT;
+ goto out;
+ }
+
+ trace_state->event = file->event_call->data;
+
+ fields_size = trace_state->event->n_u64 * sizeof(u64);
+
+ /*
+ * Avoid ring buffer recursion detection, as this event
+ * is being performed within another event.
+ */
+ trace_state->buffer = file->tr->array_buffer.buffer;
+ ring_buffer_nest_start(trace_state->buffer);
+
+ entry_size = sizeof(*trace_state->entry) + fields_size;
+ trace_state->entry = trace_event_buffer_reserve(&trace_state->fbuffer,
+ file,
+ entry_size);
+ if (!trace_state->entry) {
+ ring_buffer_nest_end(trace_state->buffer);
+ ret = -EINVAL;
+ }
+out:
+ return ret;
+}
+
+static inline void
+__synth_event_trace_end(struct synth_event_trace_state *trace_state)
+{
+ trace_event_buffer_commit(&trace_state->fbuffer);
+
+ ring_buffer_nest_end(trace_state->buffer);
+}
+
/**
* synth_event_trace - Trace a synthetic event
* @file: The trace_event_file representing the synthetic event
@@ -1819,71 +1901,61 @@ EXPORT_SYMBOL_GPL(synth_event_cmd_init);
*/
int synth_event_trace(struct trace_event_file *file, unsigned int n_vals, ...)
{
- struct trace_event_buffer fbuffer;
- struct synth_trace_event *entry;
- struct trace_buffer *buffer;
- struct synth_event *event;
+ struct synth_event_trace_state state;
unsigned int i, n_u64;
- int fields_size = 0;
va_list args;
- int ret = 0;
+ int ret;
- /*
- * Normal event generation doesn't get called at all unless
- * the ENABLED bit is set (which attaches the probe thus
- * allowing this code to be called, etc). Because this is
- * called directly by the user, we don't have that but we
- * still need to honor not logging when disabled.
- */
- if (!(file->flags & EVENT_FILE_FL_ENABLED))
- return 0;
+ ret = __synth_event_trace_start(file, &state);
+ if (ret) {
+ if (ret == -ENOENT)
+ ret = 0; /* just disabled, not really an error */
+ return ret;
+ }
- event = file->event_call->data;
-
- if (n_vals != event->n_fields)
- return -EINVAL;
-
- if (trace_trigger_soft_disabled(file))
- return -EINVAL;
-
- fields_size = event->n_u64 * sizeof(u64);
-
- /*
- * Avoid ring buffer recursion detection, as this event
- * is being performed within another event.
- */
- buffer = file->tr->array_buffer.buffer;
- ring_buffer_nest_start(buffer);
-
- entry = trace_event_buffer_reserve(&fbuffer, file,
- sizeof(*entry) + fields_size);
- if (!entry) {
+ if (n_vals != state.event->n_fields) {
ret = -EINVAL;
goto out;
}
va_start(args, n_vals);
- for (i = 0, n_u64 = 0; i < event->n_fields; i++) {
+ for (i = 0, n_u64 = 0; i < state.event->n_fields; i++) {
u64 val;
val = va_arg(args, u64);
- if (event->fields[i]->is_string) {
+ if (state.event->fields[i]->is_string) {
char *str_val = (char *)(long)val;
- char *str_field = (char *)&entry->fields[n_u64];
+ char *str_field = (char *)&state.entry->fields[n_u64];
strscpy(str_field, str_val, STR_VAR_LEN_MAX);
n_u64 += STR_VAR_LEN_MAX / sizeof(u64);
} else {
- entry->fields[n_u64] = val;
+ struct synth_field *field = state.event->fields[i];
+
+ switch (field->size) {
+ case 1:
+ *(u8 *)&state.entry->fields[n_u64] = (u8)val;
+ break;
+
+ case 2:
+ *(u16 *)&state.entry->fields[n_u64] = (u16)val;
+ break;
+
+ case 4:
+ *(u32 *)&state.entry->fields[n_u64] = (u32)val;
+ break;
+
+ default:
+ state.entry->fields[n_u64] = val;
+ break;
+ }
n_u64++;
}
}
va_end(args);
-
- trace_event_buffer_commit(&fbuffer);
out:
- ring_buffer_nest_end(buffer);
+ __synth_event_trace_end(&state);
return ret;
}
@@ -1910,64 +1982,55 @@ EXPORT_SYMBOL_GPL(synth_event_trace);
int synth_event_trace_array(struct trace_event_file *file, u64 *vals,
unsigned int n_vals)
{
- struct trace_event_buffer fbuffer;
- struct synth_trace_event *entry;
- struct trace_buffer *buffer;
- struct synth_event *event;
+ struct synth_event_trace_state state;
unsigned int i, n_u64;
- int fields_size = 0;
- int ret = 0;
+ int ret;
- /*
- * Normal event generation doesn't get called at all unless
- * the ENABLED bit is set (which attaches the probe thus
- * allowing this code to be called, etc). Because this is
- * called directly by the user, we don't have that but we
- * still need to honor not logging when disabled.
- */
- if (!(file->flags & EVENT_FILE_FL_ENABLED))
- return 0;
+ ret = __synth_event_trace_start(file, &state);
+ if (ret) {
+ if (ret == -ENOENT)
+ ret = 0; /* just disabled, not really an error */
+ return ret;
+ }
- event = file->event_call->data;
-
- if (n_vals != event->n_fields)
- return -EINVAL;
-
- if (trace_trigger_soft_disabled(file))
- return -EINVAL;
-
- fields_size = event->n_u64 * sizeof(u64);
-
- /*
- * Avoid ring buffer recursion detection, as this event
- * is being performed within another event.
- */
- buffer = file->tr->array_buffer.buffer;
- ring_buffer_nest_start(buffer);
-
- entry = trace_event_buffer_reserve(&fbuffer, file,
- sizeof(*entry) + fields_size);
- if (!entry) {
+ if (n_vals != state.event->n_fields) {
ret = -EINVAL;
goto out;
}
- for (i = 0, n_u64 = 0; i < event->n_fields; i++) {
- if (event->fields[i]->is_string) {
+ for (i = 0, n_u64 = 0; i < state.event->n_fields; i++) {
+ if (state.event->fields[i]->is_string) {
char *str_val = (char *)(long)vals[i];
- char *str_field = (char *)&entry->fields[n_u64];
+ char *str_field = (char *)&state.entry->fields[n_u64];
strscpy(str_field, str_val, STR_VAR_LEN_MAX);
n_u64 += STR_VAR_LEN_MAX / sizeof(u64);
} else {
- entry->fields[n_u64] = vals[i];
+ struct synth_field *field = state.event->fields[i];
+ u64 val = vals[i];
+
+ switch (field->size) {
+ case 1:
+ *(u8 *)&state.entry->fields[n_u64] = (u8)val;
+ break;
+
+ case 2:
+ *(u16 *)&state.entry->fields[n_u64] = (u16)val;
+ break;
+
+ case 4:
+ *(u32 *)&state.entry->fields[n_u64] = (u32)val;
+ break;
+
+ default:
+ state.entry->fields[n_u64] = val;
+ break;
+ }
n_u64++;
}
}
-
- trace_event_buffer_commit(&fbuffer);
out:
- ring_buffer_nest_end(buffer);
+ __synth_event_trace_end(&state);
return ret;
}
@@ -2004,58 +2067,15 @@ EXPORT_SYMBOL_GPL(synth_event_trace_array);
int synth_event_trace_start(struct trace_event_file *file,
struct synth_event_trace_state *trace_state)
{
- struct synth_trace_event *entry;
- int fields_size = 0;
- int ret = 0;
+ int ret;
- if (!trace_state) {
- ret = -EINVAL;
- goto out;
- }
+ if (!trace_state)
+ return -EINVAL;
- memset(trace_state, '\0', sizeof(*trace_state));
+ ret = __synth_event_trace_start(file, trace_state);
+ if (ret == -ENOENT)
+ ret = 0; /* just disabled, not really an error */
- /*
- * Normal event tracing doesn't get called at all unless the
- * ENABLED bit is set (which attaches the probe thus allowing
- * this code to be called, etc). Because this is called
- * directly by the user, we don't have that but we still need
- * to honor not logging when disabled. For the the iterated
- * trace case, we save the enabed state upon start and just
- * ignore the following data calls.
- */
- if (!(file->flags & EVENT_FILE_FL_ENABLED)) {
- trace_state->enabled = false;
- goto out;
- }
-
- trace_state->enabled = true;
-
- trace_state->event = file->event_call->data;
-
- if (trace_trigger_soft_disabled(file)) {
- ret = -EINVAL;
- goto out;
- }
-
- fields_size = trace_state->event->n_u64 * sizeof(u64);
-
- /*
- * Avoid ring buffer recursion detection, as this event
- * is being performed within another event.
- */
- trace_state->buffer = file->tr->array_buffer.buffer;
- ring_buffer_nest_start(trace_state->buffer);
-
- entry = trace_event_buffer_reserve(&trace_state->fbuffer, file,
- sizeof(*entry) + fields_size);
- if (!entry) {
- ret = -EINVAL;
- goto out;
- }
-
- trace_state->entry = entry;
-out:
return ret;
}
EXPORT_SYMBOL_GPL(synth_event_trace_start);
@@ -2088,7 +2108,7 @@ static int __synth_event_add_val(const char *field_name, u64 val,
trace_state->add_next = true;
}
- if (!trace_state->enabled)
+ if (trace_state->disabled)
goto out;
event = trace_state->event;
@@ -2122,8 +2142,25 @@ static int __synth_event_add_val(const char *field_name, u64 val,
str_field = (char *)&entry->fields[field->offset];
strscpy(str_field, str_val, STR_VAR_LEN_MAX);
- } else
- entry->fields[field->offset] = val;
+ } else {
+ switch (field->size) {
+ case 1:
+ *(u8 *)&trace_state->entry->fields[field->offset] = (u8)val;
+ break;
+
+ case 2:
+ *(u16 *)&trace_state->entry->fields[field->offset] = (u16)val;
+ break;
+
+ case 4:
+ *(u32 *)&trace_state->entry->fields[field->offset] = (u32)val;
+ break;
+
+ default:
+ trace_state->entry->fields[field->offset] = val;
+ break;
+ }
+ }
out:
return ret;
}
@@ -2223,9 +2260,7 @@ int synth_event_trace_end(struct synth_event_trace_state *trace_state)
if (!trace_state)
return -EINVAL;
- trace_event_buffer_commit(&trace_state->fbuffer);
-
- ring_buffer_nest_end(trace_state->buffer);
+ __synth_event_trace_end(trace_state);
return 0;
}
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index d8264eb..362cca5 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -1012,7 +1012,7 @@ int __kprobe_event_add_fields(struct dynevent_cmd *cmd, ...)
{
struct dynevent_arg arg;
va_list args;
- int ret;
+ int ret = 0;
if (cmd->type != DYNEVENT_TYPE_KPROBE)
return -EINVAL;
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 301db44..4e01c44 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -1411,14 +1411,16 @@ static void __queue_work(int cpu, struct workqueue_struct *wq,
return;
rcu_read_lock();
retry:
- if (req_cpu == WORK_CPU_UNBOUND)
- cpu = wq_select_unbound_cpu(raw_smp_processor_id());
-
/* pwq which will be used unless @work is executing elsewhere */
- if (!(wq->flags & WQ_UNBOUND))
- pwq = per_cpu_ptr(wq->cpu_pwqs, cpu);
- else
+ if (wq->flags & WQ_UNBOUND) {
+ if (req_cpu == WORK_CPU_UNBOUND)
+ cpu = wq_select_unbound_cpu(raw_smp_processor_id());
pwq = unbound_pwq_by_node(wq, cpu_to_node(cpu));
+ } else {
+ if (req_cpu == WORK_CPU_UNBOUND)
+ cpu = raw_smp_processor_id();
+ pwq = per_cpu_ptr(wq->cpu_pwqs, cpu);
+ }
/*
* If @work was previously on a different pool, it might still be
diff --git a/lib/Kconfig b/lib/Kconfig
index 0cf875f..bc7e563 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -573,9 +573,6 @@
config LIBFDT
bool
-config LIBXBC
- bool
-
config OID_REGISTRY
tristate
help
diff --git a/lib/Makefile b/lib/Makefile
index 5d64890..611872c 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -230,7 +230,7 @@
$(eval CFLAGS_$(file) = -I $(srctree)/scripts/dtc/libfdt))
lib-$(CONFIG_LIBFDT) += $(libfdt_files)
-lib-$(CONFIG_LIBXBC) += bootconfig.o
+lib-$(CONFIG_BOOT_CONFIG) += bootconfig.o
obj-$(CONFIG_RBTREE_TEST) += rbtree_test.o
obj-$(CONFIG_INTERVAL_TREE_TEST) += interval_tree_test.o
diff --git a/lib/bootconfig.c b/lib/bootconfig.c
index afb2e76..ec3ce7f 100644
--- a/lib/bootconfig.c
+++ b/lib/bootconfig.c
@@ -6,12 +6,13 @@
#define pr_fmt(fmt) "bootconfig: " fmt
+#include <linux/bootconfig.h>
#include <linux/bug.h>
#include <linux/ctype.h>
#include <linux/errno.h>
#include <linux/kernel.h>
+#include <linux/memblock.h>
#include <linux/printk.h>
-#include <linux/bootconfig.h>
#include <linux/string.h>
/*
@@ -23,7 +24,7 @@
* node (for array).
*/
-static struct xbc_node xbc_nodes[XBC_NODE_MAX] __initdata;
+static struct xbc_node *xbc_nodes __initdata;
static int xbc_node_num __initdata;
static char *xbc_data __initdata;
static size_t xbc_data_size __initdata;
@@ -532,7 +533,7 @@ struct xbc_node *find_match_node(struct xbc_node *node, char *k)
static int __init __xbc_add_key(char *k)
{
- struct xbc_node *node;
+ struct xbc_node *node, *child;
if (!xbc_valid_keyword(k))
return xbc_parse_error("Invalid keyword", k);
@@ -542,8 +543,12 @@ static int __init __xbc_add_key(char *k)
if (!last_parent) /* the first level */
node = find_match_node(xbc_nodes, k);
- else
- node = find_match_node(xbc_node_get_child(last_parent), k);
+ else {
+ child = xbc_node_get_child(last_parent);
+ if (child && xbc_node_is_value(child))
+ return xbc_parse_error("Subkey is mixed with value", k);
+ node = find_match_node(child, k);
+ }
if (node)
last_parent = node;
@@ -573,10 +578,10 @@ static int __init __xbc_parse_keys(char *k)
return __xbc_add_key(k);
}
-static int __init xbc_parse_kv(char **k, char *v)
+static int __init xbc_parse_kv(char **k, char *v, int op)
{
struct xbc_node *prev_parent = last_parent;
- struct xbc_node *node;
+ struct xbc_node *child;
char *next;
int c, ret;
@@ -584,12 +589,19 @@ static int __init xbc_parse_kv(char **k, char *v)
if (ret)
return ret;
+ child = xbc_node_get_child(last_parent);
+ if (child) {
+ if (xbc_node_is_key(child))
+ return xbc_parse_error("Value is mixed with subkey", v);
+ else if (op == '=')
+ return xbc_parse_error("Value is redefined", v);
+ }
+
c = __xbc_parse_value(&v, &next);
if (c < 0)
return c;
- node = xbc_add_sibling(v, XBC_VALUE);
- if (!node)
+ if (!xbc_add_sibling(v, XBC_VALUE))
return -ENOMEM;
if (c == ',') { /* Array */
@@ -719,7 +731,8 @@ void __init xbc_destroy_all(void)
xbc_data = NULL;
xbc_data_size = 0;
xbc_node_num = 0;
- memset(xbc_nodes, 0, sizeof(xbc_nodes));
+ memblock_free(__pa(xbc_nodes), sizeof(struct xbc_node) * XBC_NODE_MAX);
+ xbc_nodes = NULL;
}
/**
@@ -748,13 +761,20 @@ int __init xbc_init(char *buf)
return -ERANGE;
}
+ xbc_nodes = memblock_alloc(sizeof(struct xbc_node) * XBC_NODE_MAX,
+ SMP_CACHE_BYTES);
+ if (!xbc_nodes) {
+ pr_err("Failed to allocate memory for bootconfig nodes.\n");
+ return -ENOMEM;
+ }
+ memset(xbc_nodes, 0, sizeof(struct xbc_node) * XBC_NODE_MAX);
xbc_data = buf;
xbc_data_size = ret + 1;
last_parent = NULL;
p = buf;
do {
- q = strpbrk(p, "{}=;\n#");
+ q = strpbrk(p, "{}=+;\n#");
if (!q) {
p = skip_spaces(p);
if (*p != '\0')
@@ -765,8 +785,15 @@ int __init xbc_init(char *buf)
c = *q;
*q++ = '\0';
switch (c) {
+ case '+':
+ if (*q++ != '=') {
+ ret = xbc_parse_error("Wrong '+' operator",
+ q - 2);
+ break;
+ }
+ /* Fall through */
case '=':
- ret = xbc_parse_kv(&p, q);
+ ret = xbc_parse_kv(&p, q, c);
break;
case '{':
ret = xbc_open_brace(&p, q);
diff --git a/lib/crypto/chacha20poly1305-selftest.c b/lib/crypto/chacha20poly1305-selftest.c
index c391a91..fa43ded 100644
--- a/lib/crypto/chacha20poly1305-selftest.c
+++ b/lib/crypto/chacha20poly1305-selftest.c
@@ -9028,10 +9028,15 @@ bool __init chacha20poly1305_selftest(void)
&& total_len <= 1 << 10; ++total_len) {
for (i = 0; i <= total_len; ++i) {
for (j = i; j <= total_len; ++j) {
+ k = 0;
sg_init_table(sg_src, 3);
- sg_set_buf(&sg_src[0], input, i);
- sg_set_buf(&sg_src[1], input + i, j - i);
- sg_set_buf(&sg_src[2], input + j, total_len - j);
+ if (i)
+ sg_set_buf(&sg_src[k++], input, i);
+ if (j - i)
+ sg_set_buf(&sg_src[k++], input + i, j - i);
+ if (total_len - j)
+ sg_set_buf(&sg_src[k++], input + j, total_len - j);
+ sg_init_marker(sg_src, k);
memset(computed_output, 0, total_len);
memset(input, 0, total_len);
diff --git a/lib/crypto/chacha20poly1305.c b/lib/crypto/chacha20poly1305.c
index 6d83caf..ad0699c 100644
--- a/lib/crypto/chacha20poly1305.c
+++ b/lib/crypto/chacha20poly1305.c
@@ -235,6 +235,9 @@ bool chacha20poly1305_crypt_sg_inplace(struct scatterlist *src,
__le64 lens[2];
} b __aligned(16);
+ if (WARN_ON(src_len > INT_MAX))
+ return false;
+
chacha_load_key(b.k, key);
b.iv[0] = 0;
diff --git a/lib/stackdepot.c b/lib/stackdepot.c
index ed717dd..81c69c0 100644
--- a/lib/stackdepot.c
+++ b/lib/stackdepot.c
@@ -83,15 +83,19 @@ static bool init_stack_slab(void **prealloc)
return true;
if (stack_slabs[depot_index] == NULL) {
stack_slabs[depot_index] = *prealloc;
+ *prealloc = NULL;
} else {
- stack_slabs[depot_index + 1] = *prealloc;
+ /* If this is the last depot slab, do not touch the next one. */
+ if (depot_index + 1 < STACK_ALLOC_MAX_SLABS) {
+ stack_slabs[depot_index + 1] = *prealloc;
+ *prealloc = NULL;
+ }
/*
* This smp_store_release pairs with smp_load_acquire() from
* |next_slab_inited| above and in stack_depot_save().
*/
smp_store_release(&next_slab_inited, 1);
}
- *prealloc = NULL;
return true;
}
diff --git a/lib/string.c b/lib/string.c
index f607b96..6012c38 100644
--- a/lib/string.c
+++ b/lib/string.c
@@ -699,6 +699,14 @@ EXPORT_SYMBOL(sysfs_streq);
* @n: number of strings in the array or -1 for NULL terminated arrays
* @string: string to match with
*
+ * This routine will look for a string in an array of strings up to the
+ * n-th element in the array or until the first NULL element.
+ *
+ * Historically the value of -1 for @n, was used to search in arrays that
+ * are NULL terminated. However, the function does not make a distinction
+ * when finishing the search: either @n elements have been compared OR
+ * the first NULL element was found.
+ *
* Return:
* index of a @string in the @array if matches, or %-EINVAL otherwise.
*/
@@ -727,6 +735,14 @@ EXPORT_SYMBOL(match_string);
*
* Returns index of @str in the @array or -EINVAL, just like match_string().
* Uses sysfs_streq instead of strcmp for matching.
+ *
+ * This routine will look for a string in an array of strings up to the
+ * n-th element in the array or until the first NULL element.
+ *
+ * Historically the value of -1 for @n, was used to search in arrays that
+ * are NULL terminated. However, the function does not make a distinction
+ * when finishing the search: either @n elements have been compared OR
+ * the first NULL element was found.
*/
int __sysfs_match_string(const char * const *array, size_t n, const char *str)
{
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index b08b199..24ad53b 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -3043,8 +3043,7 @@ void set_pmd_migration_entry(struct page_vma_mapped_walk *pvmw,
return;
flush_cache_range(vma, address, address + HPAGE_PMD_SIZE);
- pmdval = *pvmw->pmd;
- pmdp_invalidate(vma, address, pvmw->pmd);
+ pmdval = pmdp_invalidate(vma, address, pvmw->pmd);
if (pmd_dirty(pmdval))
set_page_dirty(page);
entry = make_migration_entry(page, pmd_write(pmdval));
diff --git a/mm/hugetlb_cgroup.c b/mm/hugetlb_cgroup.c
index e434b05..5280bcf 100644
--- a/mm/hugetlb_cgroup.c
+++ b/mm/hugetlb_cgroup.c
@@ -240,8 +240,7 @@ int hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages,
if (!page_counter_try_charge(&h_cg->hugepage[idx], nr_pages,
&counter)) {
ret = -ENOMEM;
- hugetlb_event(hugetlb_cgroup_from_counter(counter, idx), idx,
- HUGETLB_MAX);
+ hugetlb_event(h_cg, idx, HUGETLB_MAX);
}
css_put(&h_cg->css);
done:
diff --git a/mm/madvise.c b/mm/madvise.c
index 43b47d3..4bb30ed 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -335,12 +335,14 @@ static int madvise_cold_or_pageout_pte_range(pmd_t *pmd,
}
page = pmd_page(orig_pmd);
+
+ /* Do not interfere with other mappings of this page */
+ if (page_mapcount(page) != 1)
+ goto huge_unlock;
+
if (next - addr != HPAGE_PMD_SIZE) {
int err;
- if (page_mapcount(page) != 1)
- goto huge_unlock;
-
get_page(page);
spin_unlock(ptl);
lock_page(page);
@@ -426,6 +428,10 @@ static int madvise_cold_or_pageout_pte_range(pmd_t *pmd,
continue;
}
+ /* Do not interfere with other mappings of this page */
+ if (page_mapcount(page) != 1)
+ continue;
+
VM_BUG_ON_PAGE(PageTransCompound(page), page);
if (pte_young(ptent)) {
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 6f6dc87..7ddf91c 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -409,8 +409,10 @@ int memcg_expand_shrinker_maps(int new_id)
if (mem_cgroup_is_root(memcg))
continue;
ret = memcg_expand_one_shrinker_map(memcg, size, old_size);
- if (ret)
+ if (ret) {
+ mem_cgroup_iter_break(NULL, memcg);
goto unlock;
+ }
}
unlock:
if (!ret)
@@ -775,6 +777,17 @@ void __mod_lruvec_slab_state(void *p, enum node_stat_item idx, int val)
rcu_read_unlock();
}
+void mod_memcg_obj_state(void *p, int idx, int val)
+{
+ struct mem_cgroup *memcg;
+
+ rcu_read_lock();
+ memcg = mem_cgroup_from_obj(p);
+ if (memcg)
+ mod_memcg_state(memcg, idx, val);
+ rcu_read_unlock();
+}
+
/**
* __count_memcg_events - account VM events in a cgroup
* @memcg: the memory cgroup
@@ -2295,28 +2308,41 @@ static void high_work_func(struct work_struct *work)
#define MEMCG_DELAY_SCALING_SHIFT 14
/*
- * Scheduled by try_charge() to be executed from the userland return path
- * and reclaims memory over the high limit.
+ * Get the number of jiffies that we should penalise a mischievous cgroup which
+ * is exceeding its memory.high by checking both it and its ancestors.
*/
-void mem_cgroup_handle_over_high(void)
+static unsigned long calculate_high_delay(struct mem_cgroup *memcg,
+ unsigned int nr_pages)
{
- unsigned long usage, high, clamped_high;
- unsigned long pflags;
- unsigned long penalty_jiffies, overage;
- unsigned int nr_pages = current->memcg_nr_pages_over_high;
- struct mem_cgroup *memcg;
+ unsigned long penalty_jiffies;
+ u64 max_overage = 0;
- if (likely(!nr_pages))
- return;
+ do {
+ unsigned long usage, high;
+ u64 overage;
- memcg = get_mem_cgroup_from_mm(current->mm);
- reclaim_high(memcg, nr_pages, GFP_KERNEL);
- current->memcg_nr_pages_over_high = 0;
+ usage = page_counter_read(&memcg->memory);
+ high = READ_ONCE(memcg->high);
+
+ /*
+ * Prevent division by 0 in overage calculation by acting as if
+ * it was a threshold of 1 page
+ */
+ high = max(high, 1UL);
+
+ overage = usage - high;
+ overage <<= MEMCG_DELAY_PRECISION_SHIFT;
+ overage = div64_u64(overage, high);
+
+ if (overage > max_overage)
+ max_overage = overage;
+ } while ((memcg = parent_mem_cgroup(memcg)) &&
+ !mem_cgroup_is_root(memcg));
+
+ if (!max_overage)
+ return 0;
/*
- * memory.high is breached and reclaim is unable to keep up. Throttle
- * allocators proactively to slow down excessive growth.
- *
* We use overage compared to memory.high to calculate the number of
* jiffies to sleep (penalty_jiffies). Ideally this value should be
* fairly lenient on small overages, and increasingly harsh when the
@@ -2324,24 +2350,9 @@ void mem_cgroup_handle_over_high(void)
* its crazy behaviour, so we exponentially increase the delay based on
* overage amount.
*/
-
- usage = page_counter_read(&memcg->memory);
- high = READ_ONCE(memcg->high);
-
- if (usage <= high)
- goto out;
-
- /*
- * Prevent division by 0 in overage calculation by acting as if it was a
- * threshold of 1 page
- */
- clamped_high = max(high, 1UL);
-
- overage = div_u64((u64)(usage - high) << MEMCG_DELAY_PRECISION_SHIFT,
- clamped_high);
-
- penalty_jiffies = ((u64)overage * overage * HZ)
- >> (MEMCG_DELAY_PRECISION_SHIFT + MEMCG_DELAY_SCALING_SHIFT);
+ penalty_jiffies = max_overage * max_overage * HZ;
+ penalty_jiffies >>= MEMCG_DELAY_PRECISION_SHIFT;
+ penalty_jiffies >>= MEMCG_DELAY_SCALING_SHIFT;
/*
* Factor in the task's own contribution to the overage, such that four
@@ -2358,7 +2369,32 @@ void mem_cgroup_handle_over_high(void)
* application moving forwards and also permit diagnostics, albeit
* extremely slowly.
*/
- penalty_jiffies = min(penalty_jiffies, MEMCG_MAX_HIGH_DELAY_JIFFIES);
+ return min(penalty_jiffies, MEMCG_MAX_HIGH_DELAY_JIFFIES);
+}
+
+/*
+ * Scheduled by try_charge() to be executed from the userland return path
+ * and reclaims memory over the high limit.
+ */
+void mem_cgroup_handle_over_high(void)
+{
+ unsigned long penalty_jiffies;
+ unsigned long pflags;
+ unsigned int nr_pages = current->memcg_nr_pages_over_high;
+ struct mem_cgroup *memcg;
+
+ if (likely(!nr_pages))
+ return;
+
+ memcg = get_mem_cgroup_from_mm(current->mm);
+ reclaim_high(memcg, nr_pages, GFP_KERNEL);
+ current->memcg_nr_pages_over_high = 0;
+
+ /*
+ * memory.high is breached and reclaim is unable to keep up. Throttle
+ * allocators proactively to slow down excessive growth.
+ */
+ penalty_jiffies = calculate_high_delay(memcg, nr_pages);
/*
* Don't sleep if the amount of jiffies this memcg owes us is so low
@@ -2636,6 +2672,33 @@ static void commit_charge(struct page *page, struct mem_cgroup *memcg,
}
#ifdef CONFIG_MEMCG_KMEM
+/*
+ * Returns a pointer to the memory cgroup to which the kernel object is charged.
+ *
+ * The caller must ensure the memcg lifetime, e.g. by taking rcu_read_lock(),
+ * cgroup_mutex, etc.
+ */
+struct mem_cgroup *mem_cgroup_from_obj(void *p)
+{
+ struct page *page;
+
+ if (mem_cgroup_disabled())
+ return NULL;
+
+ page = virt_to_head_page(p);
+
+ /*
+ * Slab pages don't have page->mem_cgroup set because corresponding
+ * kmem caches can be reparented during the lifetime. That's why
+ * memcg_from_slab_page() should be used instead.
+ */
+ if (PageSlab(page))
+ return memcg_from_slab_page(page);
+
+ /* All other pages use page->mem_cgroup */
+ return page->mem_cgroup;
+}
+
static int memcg_alloc_cache_id(void)
{
int id, size;
@@ -4025,7 +4088,7 @@ static void __mem_cgroup_usage_unregister_event(struct mem_cgroup *memcg,
struct mem_cgroup_thresholds *thresholds;
struct mem_cgroup_threshold_ary *new;
unsigned long usage;
- int i, j, size;
+ int i, j, size, entries;
mutex_lock(&memcg->thresholds_lock);
@@ -4045,14 +4108,20 @@ static void __mem_cgroup_usage_unregister_event(struct mem_cgroup *memcg,
__mem_cgroup_threshold(memcg, type == _MEMSWAP);
/* Calculate new number of threshold */
- size = 0;
+ size = entries = 0;
for (i = 0; i < thresholds->primary->size; i++) {
if (thresholds->primary->entries[i].eventfd != eventfd)
size++;
+ else
+ entries++;
}
new = thresholds->spare;
+ /* If no items related to eventfd have been cleared, nothing to do */
+ if (!entries)
+ goto unlock;
+
/* Set thresholds array to NULL if we don't have thresholds */
if (!size) {
kfree(new);
@@ -6680,19 +6749,9 @@ void mem_cgroup_sk_alloc(struct sock *sk)
if (!mem_cgroup_sockets_enabled)
return;
- /*
- * Socket cloning can throw us here with sk_memcg already
- * filled. It won't however, necessarily happen from
- * process context. So the test for root memcg given
- * the current task's memcg won't help us in this case.
- *
- * Respecting the original socket's memcg is a better
- * decision in this case.
- */
- if (sk->sk_memcg) {
- css_get(&sk->sk_memcg->css);
+ /* Do not associate the sock with unrelated interrupted task's memcg. */
+ if (in_interrupt())
return;
- }
rcu_read_lock();
memcg = mem_cgroup_from_task(current);
diff --git a/mm/memory.c b/mm/memory.c
index 0bccc62..e8bfdf0 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2257,7 +2257,7 @@ static inline bool cow_user_page(struct page *dst, struct page *src,
bool ret;
void *kaddr;
void __user *uaddr;
- bool force_mkyoung;
+ bool locked = false;
struct vm_area_struct *vma = vmf->vma;
struct mm_struct *mm = vma->vm_mm;
unsigned long addr = vmf->address;
@@ -2282,11 +2282,11 @@ static inline bool cow_user_page(struct page *dst, struct page *src,
* On architectures with software "accessed" bits, we would
* take a double page fault, so mark it accessed here.
*/
- force_mkyoung = arch_faults_on_old_pte() && !pte_young(vmf->orig_pte);
- if (force_mkyoung) {
+ if (arch_faults_on_old_pte() && !pte_young(vmf->orig_pte)) {
pte_t entry;
vmf->pte = pte_offset_map_lock(mm, vmf->pmd, addr, &vmf->ptl);
+ locked = true;
if (!likely(pte_same(*vmf->pte, vmf->orig_pte))) {
/*
* Other thread has already handled the fault
@@ -2310,18 +2310,37 @@ static inline bool cow_user_page(struct page *dst, struct page *src,
* zeroes.
*/
if (__copy_from_user_inatomic(kaddr, uaddr, PAGE_SIZE)) {
+ if (locked)
+ goto warn;
+
+ /* Re-validate under PTL if the page is still mapped */
+ vmf->pte = pte_offset_map_lock(mm, vmf->pmd, addr, &vmf->ptl);
+ locked = true;
+ if (!likely(pte_same(*vmf->pte, vmf->orig_pte))) {
+ /* The PTE changed under us. Retry page fault. */
+ ret = false;
+ goto pte_unlock;
+ }
+
/*
- * Give a warn in case there can be some obscure
- * use-case
+ * The same page can be mapped back since last copy attampt.
+ * Try to copy again under PTL.
*/
- WARN_ON_ONCE(1);
- clear_page(kaddr);
+ if (__copy_from_user_inatomic(kaddr, uaddr, PAGE_SIZE)) {
+ /*
+ * Give a warn in case there can be some obscure
+ * use-case
+ */
+warn:
+ WARN_ON_ONCE(1);
+ clear_page(kaddr);
+ }
}
ret = true;
pte_unlock:
- if (force_mkyoung)
+ if (locked)
pte_unmap_unlock(vmf->pte, vmf->ptl);
kunmap_atomic(kaddr);
flush_dcache_page(dst);
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 0a54ffa..19389cd 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -574,7 +574,13 @@ EXPORT_SYMBOL_GPL(restore_online_page_callback);
void generic_online_page(struct page *page, unsigned int order)
{
- kernel_map_pages(page, 1 << order, 1);
+ /*
+ * Freeing the page with debug_pagealloc enabled will try to unmap it,
+ * so we should map it first. This is better than introducing a special
+ * case in page freeing fast path.
+ */
+ if (debug_pagealloc_enabled_static())
+ kernel_map_pages(page, 1 << order, 1);
__free_pages_core(page, order);
totalram_pages_add(1UL << order);
#ifdef CONFIG_HIGHMEM
diff --git a/mm/mmap.c b/mm/mmap.c
index 6756b8b..d681a20 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -195,8 +195,6 @@ SYSCALL_DEFINE1(brk, unsigned long, brk)
bool downgraded = false;
LIST_HEAD(uf);
- brk = untagged_addr(brk);
-
if (down_write_killable(&mm->mmap_sem))
return -EINTR;
@@ -1557,8 +1555,6 @@ unsigned long ksys_mmap_pgoff(unsigned long addr, unsigned long len,
struct file *file = NULL;
unsigned long retval;
- addr = untagged_addr(addr);
-
if (!(flags & MAP_ANONYMOUS)) {
audit_mmap_fd(fd, flags);
file = fget(fd);
diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c
index ef3973a..06852b8 100644
--- a/mm/mmu_notifier.c
+++ b/mm/mmu_notifier.c
@@ -307,7 +307,8 @@ static void mn_hlist_release(struct mmu_notifier_subscriptions *subscriptions,
* ->release returns.
*/
id = srcu_read_lock(&srcu);
- hlist_for_each_entry_rcu(subscription, &subscriptions->list, hlist)
+ hlist_for_each_entry_rcu(subscription, &subscriptions->list, hlist,
+ srcu_read_lock_held(&srcu))
/*
* If ->release runs before mmu_notifier_unregister it must be
* handled, as it's the only way for the driver to flush all
@@ -370,7 +371,8 @@ int __mmu_notifier_clear_flush_young(struct mm_struct *mm,
id = srcu_read_lock(&srcu);
hlist_for_each_entry_rcu(subscription,
- &mm->notifier_subscriptions->list, hlist) {
+ &mm->notifier_subscriptions->list, hlist,
+ srcu_read_lock_held(&srcu)) {
if (subscription->ops->clear_flush_young)
young |= subscription->ops->clear_flush_young(
subscription, mm, start, end);
@@ -389,7 +391,8 @@ int __mmu_notifier_clear_young(struct mm_struct *mm,
id = srcu_read_lock(&srcu);
hlist_for_each_entry_rcu(subscription,
- &mm->notifier_subscriptions->list, hlist) {
+ &mm->notifier_subscriptions->list, hlist,
+ srcu_read_lock_held(&srcu)) {
if (subscription->ops->clear_young)
young |= subscription->ops->clear_young(subscription,
mm, start, end);
@@ -407,7 +410,8 @@ int __mmu_notifier_test_young(struct mm_struct *mm,
id = srcu_read_lock(&srcu);
hlist_for_each_entry_rcu(subscription,
- &mm->notifier_subscriptions->list, hlist) {
+ &mm->notifier_subscriptions->list, hlist,
+ srcu_read_lock_held(&srcu)) {
if (subscription->ops->test_young) {
young = subscription->ops->test_young(subscription, mm,
address);
@@ -428,7 +432,8 @@ void __mmu_notifier_change_pte(struct mm_struct *mm, unsigned long address,
id = srcu_read_lock(&srcu);
hlist_for_each_entry_rcu(subscription,
- &mm->notifier_subscriptions->list, hlist) {
+ &mm->notifier_subscriptions->list, hlist,
+ srcu_read_lock_held(&srcu)) {
if (subscription->ops->change_pte)
subscription->ops->change_pte(subscription, mm, address,
pte);
@@ -476,7 +481,8 @@ static int mn_hlist_invalidate_range_start(
int id;
id = srcu_read_lock(&srcu);
- hlist_for_each_entry_rcu(subscription, &subscriptions->list, hlist) {
+ hlist_for_each_entry_rcu(subscription, &subscriptions->list, hlist,
+ srcu_read_lock_held(&srcu)) {
const struct mmu_notifier_ops *ops = subscription->ops;
if (ops->invalidate_range_start) {
@@ -528,7 +534,8 @@ mn_hlist_invalidate_end(struct mmu_notifier_subscriptions *subscriptions,
int id;
id = srcu_read_lock(&srcu);
- hlist_for_each_entry_rcu(subscription, &subscriptions->list, hlist) {
+ hlist_for_each_entry_rcu(subscription, &subscriptions->list, hlist,
+ srcu_read_lock_held(&srcu)) {
/*
* Call invalidate_range here too to avoid the need for the
* subsystem of having to register an invalidate_range_end
@@ -582,7 +589,8 @@ void __mmu_notifier_invalidate_range(struct mm_struct *mm,
id = srcu_read_lock(&srcu);
hlist_for_each_entry_rcu(subscription,
- &mm->notifier_subscriptions->list, hlist) {
+ &mm->notifier_subscriptions->list, hlist,
+ srcu_read_lock_held(&srcu)) {
if (subscription->ops->invalidate_range)
subscription->ops->invalidate_range(subscription, mm,
start, end);
@@ -714,7 +722,8 @@ find_get_mmu_notifier(struct mm_struct *mm, const struct mmu_notifier_ops *ops)
spin_lock(&mm->notifier_subscriptions->lock);
hlist_for_each_entry_rcu(subscription,
- &mm->notifier_subscriptions->list, hlist) {
+ &mm->notifier_subscriptions->list, hlist,
+ lockdep_is_held(&mm->notifier_subscriptions->lock)) {
if (subscription->ops != ops)
continue;
diff --git a/mm/mprotect.c b/mm/mprotect.c
index 7a8e84f..311c0da 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -161,6 +161,31 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
return pages;
}
+/*
+ * Used when setting automatic NUMA hinting protection where it is
+ * critical that a numa hinting PMD is not confused with a bad PMD.
+ */
+static inline int pmd_none_or_clear_bad_unless_trans_huge(pmd_t *pmd)
+{
+ pmd_t pmdval = pmd_read_atomic(pmd);
+
+ /* See pmd_none_or_trans_huge_or_clear_bad for info on barrier */
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+ barrier();
+#endif
+
+ if (pmd_none(pmdval))
+ return 1;
+ if (pmd_trans_huge(pmdval))
+ return 0;
+ if (unlikely(pmd_bad(pmdval))) {
+ pmd_clear_bad(pmd);
+ return 1;
+ }
+
+ return 0;
+}
+
static inline unsigned long change_pmd_range(struct vm_area_struct *vma,
pud_t *pud, unsigned long addr, unsigned long end,
pgprot_t newprot, int dirty_accountable, int prot_numa)
@@ -178,8 +203,17 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma,
unsigned long this_pages;
next = pmd_addr_end(addr, end);
- if (!is_swap_pmd(*pmd) && !pmd_trans_huge(*pmd) && !pmd_devmap(*pmd)
- && pmd_none_or_clear_bad(pmd))
+
+ /*
+ * Automatic NUMA balancing walks the tables with mmap_sem
+ * held for read. It's possible a parallel update to occur
+ * between pmd_trans_huge() and a pmd_none_or_clear_bad()
+ * check leading to a false positive and clearing.
+ * Hence, it's necessary to atomically read the PMD value
+ * for all the checks.
+ */
+ if (!is_swap_pmd(*pmd) && !pmd_devmap(*pmd) &&
+ pmd_none_or_clear_bad_unless_trans_huge(pmd))
goto next;
/* invoke the mmu notifier if the pmd is populated */
diff --git a/mm/mremap.c b/mm/mremap.c
index 122938d..af36306 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -607,7 +607,6 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len,
LIST_HEAD(uf_unmap);
addr = untagged_addr(addr);
- new_addr = untagged_addr(new_addr);
if (flags & ~(MREMAP_FIXED | MREMAP_MAYMOVE))
return ret;
diff --git a/mm/nommu.c b/mm/nommu.c
index bd2b4e5e..318df4e 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -370,10 +370,14 @@ void vm_unmap_aliases(void)
EXPORT_SYMBOL_GPL(vm_unmap_aliases);
/*
- * Implement a stub for vmalloc_sync_all() if the architecture chose not to
- * have one.
+ * Implement a stub for vmalloc_sync_[un]mapping() if the architecture
+ * chose not to have one.
*/
-void __weak vmalloc_sync_all(void)
+void __weak vmalloc_sync_mappings(void)
+{
+}
+
+void __weak vmalloc_sync_unmappings(void)
{
}
diff --git a/mm/shmem.c b/mm/shmem.c
index c8f7540..aad3ba7 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -3386,8 +3386,6 @@ static const struct constant_table shmem_param_enums_huge[] = {
{"always", SHMEM_HUGE_ALWAYS },
{"within_size", SHMEM_HUGE_WITHIN_SIZE },
{"advise", SHMEM_HUGE_ADVISE },
- {"deny", SHMEM_HUGE_DENY },
- {"force", SHMEM_HUGE_FORCE },
{}
};
diff --git a/mm/slub.c b/mm/slub.c
index 17dc00e..6589b41 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1973,8 +1973,6 @@ static void *get_partial(struct kmem_cache *s, gfp_t flags, int node,
if (node == NUMA_NO_NODE)
searchnode = numa_mem_id();
- else if (!node_present_pages(node))
- searchnode = node_to_mem_node(node);
object = get_partial_node(s, get_node(s, searchnode), c, flags);
if (object || node != NUMA_NO_NODE)
@@ -2563,17 +2561,27 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
struct page *page;
page = c->page;
- if (!page)
+ if (!page) {
+ /*
+ * if the node is not online or has no normal memory, just
+ * ignore the node constraint
+ */
+ if (unlikely(node != NUMA_NO_NODE &&
+ !node_state(node, N_NORMAL_MEMORY)))
+ node = NUMA_NO_NODE;
goto new_slab;
+ }
redo:
if (unlikely(!node_match(page, node))) {
- int searchnode = node;
-
- if (node != NUMA_NO_NODE && !node_present_pages(node))
- searchnode = node_to_mem_node(node);
-
- if (unlikely(!node_match(page, searchnode))) {
+ /*
+ * same as above but node_match() being false already
+ * implies node != NUMA_NO_NODE
+ */
+ if (!node_state(node, N_NORMAL_MEMORY)) {
+ node = NUMA_NO_NODE;
+ goto redo;
+ } else {
stat(s, ALLOC_NODE_MISMATCH);
deactivate_slab(s, page, c->freelist, c);
goto new_slab;
@@ -2997,11 +3005,13 @@ static __always_inline void do_slab_free(struct kmem_cache *s,
barrier();
if (likely(page == c->page)) {
- set_freepointer(s, tail_obj, c->freelist);
+ void **freelist = READ_ONCE(c->freelist);
+
+ set_freepointer(s, tail_obj, freelist);
if (unlikely(!this_cpu_cmpxchg_double(
s->cpu_slab->freelist, s->cpu_slab->tid,
- c->freelist, tid,
+ freelist, tid,
head, next_tid(tid)))) {
note_cmpxchg_failure("slab_free", s, tid);
@@ -3175,6 +3185,15 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
if (unlikely(!object)) {
/*
+ * We may have removed an object from c->freelist using
+ * the fastpath in the previous iteration; in that case,
+ * c->tid has not been bumped yet.
+ * Since ___slab_alloc() may reenable interrupts while
+ * allocating memory, we should bump c->tid now.
+ */
+ c->tid = next_tid(c->tid);
+
+ /*
* Invoking slow path likely have side-effect
* of re-populating per CPU c->freelist
*/
diff --git a/mm/sparse.c b/mm/sparse.c
index c184b69..65599e8 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -734,6 +734,7 @@ static void section_deactivate(unsigned long pfn, unsigned long nr_pages,
struct mem_section *ms = __pfn_to_section(pfn);
bool section_is_early = early_section(ms);
struct page *memmap = NULL;
+ bool empty;
unsigned long *subsection_map = ms->usage
? &ms->usage->subsection_map[0] : NULL;
@@ -764,7 +765,8 @@ static void section_deactivate(unsigned long pfn, unsigned long nr_pages,
* For 2/ and 3/ the SPARSEMEM_VMEMMAP={y,n} cases are unified
*/
bitmap_xor(subsection_map, map, subsection_map, SUBSECTIONS_PER_SECTION);
- if (bitmap_empty(subsection_map, SUBSECTIONS_PER_SECTION)) {
+ empty = bitmap_empty(subsection_map, SUBSECTIONS_PER_SECTION);
+ if (empty) {
unsigned long section_nr = pfn_to_section_nr(pfn);
/*
@@ -779,13 +781,21 @@ static void section_deactivate(unsigned long pfn, unsigned long nr_pages,
ms->usage = NULL;
}
memmap = sparse_decode_mem_map(ms->section_mem_map, section_nr);
- ms->section_mem_map = (unsigned long)NULL;
+ /*
+ * Mark the section invalid so that valid_section()
+ * return false. This prevents code from dereferencing
+ * ms->usage array.
+ */
+ ms->section_mem_map &= ~SECTION_HAS_MEM_MAP;
}
if (section_is_early && memmap)
free_map_bootmem(memmap);
else
depopulate_section_memmap(pfn, nr_pages, altmap);
+
+ if (empty)
+ ms->section_mem_map = (unsigned long)NULL;
}
static struct page * __meminit section_activate(int nid, unsigned long pfn,
@@ -876,7 +886,7 @@ int __meminit sparse_add_section(int nid, unsigned long start_pfn,
* Poison uninitialized struct pages in order to catch invalid flags
* combinations.
*/
- page_init_poison(pfn_to_page(start_pfn), sizeof(struct page) * nr_pages);
+ page_init_poison(memmap, sizeof(struct page) * nr_pages);
ms = __nr_to_section(section_nr);
set_section_nid(section_nr, nid);
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 2c33ff4..be33e61 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -2899,10 +2899,6 @@ static int claim_swapfile(struct swap_info_struct *p, struct inode *inode)
p->bdev = inode->i_sb->s_bdev;
}
- inode_lock(inode);
- if (IS_SWAPFILE(inode))
- return -EBUSY;
-
return 0;
}
@@ -3157,36 +3153,41 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
mapping = swap_file->f_mapping;
inode = mapping->host;
- /* If S_ISREG(inode->i_mode) will do inode_lock(inode); */
error = claim_swapfile(p, inode);
if (unlikely(error))
goto bad_swap;
+ inode_lock(inode);
+ if (IS_SWAPFILE(inode)) {
+ error = -EBUSY;
+ goto bad_swap_unlock_inode;
+ }
+
/*
* Read the swap header.
*/
if (!mapping->a_ops->readpage) {
error = -EINVAL;
- goto bad_swap;
+ goto bad_swap_unlock_inode;
}
page = read_mapping_page(mapping, 0, swap_file);
if (IS_ERR(page)) {
error = PTR_ERR(page);
- goto bad_swap;
+ goto bad_swap_unlock_inode;
}
swap_header = kmap(page);
maxpages = read_swap_header(p, swap_header, inode);
if (unlikely(!maxpages)) {
error = -EINVAL;
- goto bad_swap;
+ goto bad_swap_unlock_inode;
}
/* OK, set up the swap map and apply the bad block list */
swap_map = vzalloc(maxpages);
if (!swap_map) {
error = -ENOMEM;
- goto bad_swap;
+ goto bad_swap_unlock_inode;
}
if (bdi_cap_stable_pages_required(inode_to_bdi(inode)))
@@ -3211,7 +3212,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
GFP_KERNEL);
if (!cluster_info) {
error = -ENOMEM;
- goto bad_swap;
+ goto bad_swap_unlock_inode;
}
for (ci = 0; ci < nr_cluster; ci++)
@@ -3220,7 +3221,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
p->percpu_cluster = alloc_percpu(struct percpu_cluster);
if (!p->percpu_cluster) {
error = -ENOMEM;
- goto bad_swap;
+ goto bad_swap_unlock_inode;
}
for_each_possible_cpu(cpu) {
struct percpu_cluster *cluster;
@@ -3234,13 +3235,13 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
error = swap_cgroup_swapon(p->type, maxpages);
if (error)
- goto bad_swap;
+ goto bad_swap_unlock_inode;
nr_extents = setup_swap_map_and_extents(p, swap_header, swap_map,
cluster_info, maxpages, &span);
if (unlikely(nr_extents < 0)) {
error = nr_extents;
- goto bad_swap;
+ goto bad_swap_unlock_inode;
}
/* frontswap enabled? set up bit-per-page map for frontswap */
if (IS_ENABLED(CONFIG_FRONTSWAP))
@@ -3280,7 +3281,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
error = init_swap_address_space(p->type, maxpages);
if (error)
- goto bad_swap;
+ goto bad_swap_unlock_inode;
/*
* Flush any pending IO and dirty mappings before we start using this
@@ -3290,7 +3291,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
error = inode_drain_writes(inode);
if (error) {
inode->i_flags &= ~S_SWAPFILE;
- goto bad_swap;
+ goto bad_swap_unlock_inode;
}
mutex_lock(&swapon_mutex);
@@ -3315,6 +3316,8 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
error = 0;
goto out;
+bad_swap_unlock_inode:
+ inode_unlock(inode);
bad_swap:
free_percpu(p->percpu_cluster);
p->percpu_cluster = NULL;
@@ -3322,6 +3325,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
set_blocksize(p->bdev, p->old_block_size);
blkdev_put(p->bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
}
+ inode = NULL;
destroy_swap_extents(p);
swap_cgroup_swapoff(p->type);
spin_lock(&swap_lock);
@@ -3333,13 +3337,8 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
kvfree(frontswap_map);
if (inced_nr_rotate_swap)
atomic_dec(&nr_rotate_swap);
- if (swap_file) {
- if (inode) {
- inode_unlock(inode);
- inode = NULL;
- }
+ if (swap_file)
filp_close(swap_file, NULL);
- }
out:
if (page && !IS_ERR(page)) {
kunmap(page);
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 1f46c3b..6b8eeb0 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -1295,7 +1295,7 @@ static bool __purge_vmap_area_lazy(unsigned long start, unsigned long end)
* First make sure the mappings are removed from all page-tables
* before they are freed.
*/
- vmalloc_sync_all();
+ vmalloc_sync_unmappings();
/*
* TODO: to calculate a flush range without looping.
@@ -3128,16 +3128,19 @@ int remap_vmalloc_range(struct vm_area_struct *vma, void *addr,
EXPORT_SYMBOL(remap_vmalloc_range);
/*
- * Implement a stub for vmalloc_sync_all() if the architecture chose not to
- * have one.
+ * Implement stubs for vmalloc_sync_[un]mappings () if the architecture chose
+ * not to have one.
*
* The purpose of this function is to make sure the vmalloc area
* mappings are identical in all page-tables in the system.
*/
-void __weak vmalloc_sync_all(void)
+void __weak vmalloc_sync_mappings(void)
{
}
+void __weak vmalloc_sync_unmappings(void)
+{
+}
static int f(pte_t *pte, unsigned long addr, void *data)
{
diff --git a/mm/vmscan.c b/mm/vmscan.c
index c05eb9e..8763705 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2415,10 +2415,13 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,
/*
* Scan types proportional to swappiness and
* their relative recent reclaim efficiency.
- * Make sure we don't miss the last page
- * because of a round-off error.
+ * Make sure we don't miss the last page on
+ * the offlined memory cgroups because of a
+ * round-off error.
*/
- scan = DIV64_U64_ROUND_UP(scan * fraction[file],
+ scan = mem_cgroup_online(memcg) ?
+ div64_u64(scan * fraction[file], denominator) :
+ DIV64_U64_ROUND_UP(scan * fraction[file],
denominator);
break;
case SCAN_FILE:
diff --git a/mm/z3fold.c b/mm/z3fold.c
index 43754d8..42f31c4 100644
--- a/mm/z3fold.c
+++ b/mm/z3fold.c
@@ -41,7 +41,6 @@
#include <linux/workqueue.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
-#include <linux/rwlock.h>
#include <linux/zpool.h>
#include <linux/magic.h>
diff --git a/net/Kconfig b/net/Kconfig
index b0937a7..df8d8c9 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -52,6 +52,9 @@
config NET_EGRESS
bool
+config NET_REDIRECT
+ bool
+
config SKB_EXTENSIONS
bool
@@ -189,7 +192,6 @@
depends on NETFILTER_ADVANCED
select NETFILTER_FAMILY_BRIDGE
select SKB_EXTENSIONS
- default m
---help---
Enabling this option will let arptables resp. iptables see bridged
ARP resp. IP traffic. If you want a bridging firewall, you probably
diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
index f020950..a7c8dd7 100644
--- a/net/batman-adv/bat_iv_ogm.c
+++ b/net/batman-adv/bat_iv_ogm.c
@@ -789,6 +789,10 @@ static void batadv_iv_ogm_schedule_buff(struct batadv_hard_iface *hard_iface)
lockdep_assert_held(&hard_iface->bat_iv.ogm_buff_mutex);
+ /* interface already disabled by batadv_iv_ogm_iface_disable */
+ if (!*ogm_buff)
+ return;
+
/* the interface gets activated here to avoid race conditions between
* the moment of activating the interface in
* hardif_activate_interface() where the originator mac is set and
diff --git a/net/bpfilter/main.c b/net/bpfilter/main.c
index 77396a0..efea487 100644
--- a/net/bpfilter/main.c
+++ b/net/bpfilter/main.c
@@ -10,7 +10,7 @@
#include <asm/unistd.h>
#include "msgfmt.h"
-int debug_fd;
+FILE *debug_f;
static int handle_get_cmd(struct mbox_request *cmd)
{
@@ -35,9 +35,10 @@ static void loop(void)
struct mbox_reply reply;
int n;
+ fprintf(debug_f, "testing the buffer\n");
n = read(0, &req, sizeof(req));
if (n != sizeof(req)) {
- dprintf(debug_fd, "invalid request %d\n", n);
+ fprintf(debug_f, "invalid request %d\n", n);
return;
}
@@ -47,7 +48,7 @@ static void loop(void)
n = write(1, &reply, sizeof(reply));
if (n != sizeof(reply)) {
- dprintf(debug_fd, "reply failed %d\n", n);
+ fprintf(debug_f, "reply failed %d\n", n);
return;
}
}
@@ -55,9 +56,10 @@ static void loop(void)
int main(void)
{
- debug_fd = open("/dev/kmsg", 00000002);
- dprintf(debug_fd, "Started bpfilter\n");
+ debug_f = fopen("/dev/kmsg", "w");
+ setvbuf(debug_f, 0, _IOLBF, 0);
+ fprintf(debug_f, "Started bpfilter\n");
loop();
- close(debug_fd);
+ fclose(debug_f);
return 0;
}
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index dc3d2c1..0e3dbc5 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -34,7 +34,6 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
const struct nf_br_ops *nf_ops;
u8 state = BR_STATE_FORWARDING;
const unsigned char *dest;
- struct ethhdr *eth;
u16 vid = 0;
rcu_read_lock();
@@ -54,15 +53,14 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
BR_INPUT_SKB_CB(skb)->frag_max_size = 0;
skb_reset_mac_header(skb);
- eth = eth_hdr(skb);
skb_pull(skb, ETH_HLEN);
if (!br_allowed_ingress(br, br_vlan_group_rcu(br), skb, &vid, &state))
goto out;
if (IS_ENABLED(CONFIG_INET) &&
- (eth->h_proto == htons(ETH_P_ARP) ||
- eth->h_proto == htons(ETH_P_RARP)) &&
+ (eth_hdr(skb)->h_proto == htons(ETH_P_ARP) ||
+ eth_hdr(skb)->h_proto == htons(ETH_P_RARP)) &&
br_opt_get(br, BROPT_NEIGH_SUPPRESS_ENABLED)) {
br_do_proxy_suppress_arp(skb, br, vid, NULL);
} else if (IS_ENABLED(CONFIG_IPV6) &&
diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c
index 6856a6d..1f14b84 100644
--- a/net/bridge/br_stp.c
+++ b/net/bridge/br_stp.c
@@ -63,7 +63,8 @@ struct net_bridge_port *br_get_port(struct net_bridge *br, u16 port_no)
{
struct net_bridge_port *p;
- list_for_each_entry_rcu(p, &br->port_list, list) {
+ list_for_each_entry_rcu(p, &br->port_list, list,
+ lockdep_is_held(&br->lock)) {
if (p->port_no == port_no)
return p;
}
diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c
index 03c7cdd..195d2d6 100644
--- a/net/caif/caif_dev.c
+++ b/net/caif/caif_dev.c
@@ -112,7 +112,8 @@ static struct caif_device_entry *caif_get(struct net_device *dev)
caif_device_list(dev_net(dev));
struct caif_device_entry *caifd;
- list_for_each_entry_rcu(caifd, &caifdevs->list, list) {
+ list_for_each_entry_rcu(caifd, &caifdevs->list, list,
+ lockdep_rtnl_is_held()) {
if (caifd->netdev == dev)
return caifd;
}
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 5b4bd82..f8ca5ed 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -3248,12 +3248,16 @@ static struct ceph_msg_data *ceph_msg_data_add(struct ceph_msg *msg)
static void ceph_msg_data_destroy(struct ceph_msg_data *data)
{
- if (data->type == CEPH_MSG_DATA_PAGELIST)
+ if (data->type == CEPH_MSG_DATA_PAGES && data->own_pages) {
+ int num_pages = calc_pages_for(data->alignment, data->length);
+ ceph_release_page_vector(data->pages, num_pages);
+ } else if (data->type == CEPH_MSG_DATA_PAGELIST) {
ceph_pagelist_release(data->pagelist);
+ }
}
void ceph_msg_data_add_pages(struct ceph_msg *msg, struct page **pages,
- size_t length, size_t alignment)
+ size_t length, size_t alignment, bool own_pages)
{
struct ceph_msg_data *data;
@@ -3265,6 +3269,7 @@ void ceph_msg_data_add_pages(struct ceph_msg *msg, struct page **pages,
data->pages = pages;
data->length = length;
data->alignment = alignment & ~PAGE_MASK;
+ data->own_pages = own_pages;
msg->data_length += length;
}
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index b68b376..af868d3 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -962,7 +962,7 @@ static void ceph_osdc_msg_data_add(struct ceph_msg *msg,
BUG_ON(length > (u64) SIZE_MAX);
if (length)
ceph_msg_data_add_pages(msg, osd_data->pages,
- length, osd_data->alignment);
+ length, osd_data->alignment, false);
} else if (osd_data->type == CEPH_OSD_DATA_TYPE_PAGELIST) {
BUG_ON(!length);
ceph_msg_data_add_pagelist(msg, osd_data->pagelist);
@@ -4436,9 +4436,7 @@ static void handle_watch_notify(struct ceph_osd_client *osdc,
CEPH_MSG_DATA_PAGES);
*lreq->preply_pages = data->pages;
*lreq->preply_len = data->length;
- } else {
- ceph_release_page_vector(data->pages,
- calc_pages_for(0, data->length));
+ data->own_pages = false;
}
}
lreq->notify_finish_error = return_code;
@@ -5506,9 +5504,6 @@ static struct ceph_msg *get_reply(struct ceph_connection *con,
return m;
}
-/*
- * TODO: switch to a msg-owned pagelist
- */
static struct ceph_msg *alloc_msg_with_page_vector(struct ceph_msg_header *hdr)
{
struct ceph_msg *m;
@@ -5522,7 +5517,6 @@ static struct ceph_msg *alloc_msg_with_page_vector(struct ceph_msg_header *hdr)
if (data_len) {
struct page **pages;
- struct ceph_osd_data osd_data;
pages = ceph_alloc_page_vector(calc_pages_for(0, data_len),
GFP_NOIO);
@@ -5531,9 +5525,7 @@ static struct ceph_msg *alloc_msg_with_page_vector(struct ceph_msg_header *hdr)
return NULL;
}
- ceph_osd_data_pages_init(&osd_data, pages, data_len, 0, false,
- false);
- ceph_osdc_msg_data_add(m, &osd_data);
+ ceph_msg_data_add_pages(m, pages, data_len, 0, true);
}
return m;
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index 4e0de14..2a6e63a 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -710,6 +710,15 @@ int ceph_pg_poolid_by_name(struct ceph_osdmap *map, const char *name)
}
EXPORT_SYMBOL(ceph_pg_poolid_by_name);
+u64 ceph_pg_pool_flags(struct ceph_osdmap *map, u64 id)
+{
+ struct ceph_pg_pool_info *pi;
+
+ pi = __lookup_pg_pool(&map->pg_pools, id);
+ return pi ? pi->flags : 0;
+}
+EXPORT_SYMBOL(ceph_pg_pool_flags);
+
static void __remove_pg_pool(struct rb_root *root, struct ceph_pg_pool_info *pi)
{
rb_erase(&pi->node, root);
diff --git a/net/compat.c b/net/compat.c
index 47d99c7..4bed96e 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -33,10 +33,10 @@
#include <linux/uaccess.h>
#include <net/compat.h>
-int get_compat_msghdr(struct msghdr *kmsg,
- struct compat_msghdr __user *umsg,
- struct sockaddr __user **save_addr,
- struct iovec **iov)
+int __get_compat_msghdr(struct msghdr *kmsg,
+ struct compat_msghdr __user *umsg,
+ struct sockaddr __user **save_addr,
+ compat_uptr_t *ptr, compat_size_t *len)
{
struct compat_msghdr msg;
ssize_t err;
@@ -79,10 +79,26 @@ int get_compat_msghdr(struct msghdr *kmsg,
return -EMSGSIZE;
kmsg->msg_iocb = NULL;
+ *ptr = msg.msg_iov;
+ *len = msg.msg_iovlen;
+ return 0;
+}
- err = compat_import_iovec(save_addr ? READ : WRITE,
- compat_ptr(msg.msg_iov), msg.msg_iovlen,
- UIO_FASTIOV, iov, &kmsg->msg_iter);
+int get_compat_msghdr(struct msghdr *kmsg,
+ struct compat_msghdr __user *umsg,
+ struct sockaddr __user **save_addr,
+ struct iovec **iov)
+{
+ compat_uptr_t ptr;
+ compat_size_t len;
+ ssize_t err;
+
+ err = __get_compat_msghdr(kmsg, umsg, save_addr, &ptr, &len);
+ if (err)
+ return err;
+
+ err = compat_import_iovec(save_addr ? READ : WRITE, compat_ptr(ptr),
+ len, UIO_FASTIOV, iov, &kmsg->msg_iter);
return err < 0 ? err : 0;
}
diff --git a/net/core/dev.c b/net/core/dev.c
index a69e8bd..500bba8 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -146,7 +146,6 @@
#include "net-sysfs.h"
#define MAX_GRO_SKBS 8
-#define MAX_NEST_DEV 8
/* This should be increased if a protocol with a bigger head is added. */
#define GRO_MAX_HEAD (MAX_HEADER + 128)
@@ -331,6 +330,12 @@ int netdev_name_node_alt_destroy(struct net_device *dev, const char *name)
name_node = netdev_name_node_lookup(net, name);
if (!name_node)
return -ENOENT;
+ /* lookup might have found our primary name or a name belonging
+ * to another device.
+ */
+ if (name_node == dev->name_node || name_node->dev != dev)
+ return -EINVAL;
+
__netdev_name_node_alt_destroy(name_node);
return 0;
@@ -3071,6 +3076,8 @@ static u16 skb_tx_hash(const struct net_device *dev,
if (skb_rx_queue_recorded(skb)) {
hash = skb_get_rx_queue(skb);
+ if (hash >= qoffset)
+ hash -= qoffset;
while (unlikely(hash >= qcount))
hash -= qcount;
return hash + qoffset;
@@ -3657,26 +3664,8 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
qdisc_calculate_pkt_len(skb, q);
if (q->flags & TCQ_F_NOLOCK) {
- if ((q->flags & TCQ_F_CAN_BYPASS) && READ_ONCE(q->empty) &&
- qdisc_run_begin(q)) {
- if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED,
- &q->state))) {
- __qdisc_drop(skb, &to_free);
- rc = NET_XMIT_DROP;
- goto end_run;
- }
- qdisc_bstats_cpu_update(q, skb);
-
- rc = NET_XMIT_SUCCESS;
- if (sch_direct_xmit(skb, q, dev, txq, NULL, true))
- __qdisc_run(q);
-
-end_run:
- qdisc_run_end(q);
- } else {
- rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK;
- qdisc_run(q);
- }
+ rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK;
+ qdisc_run(q);
if (unlikely(to_free))
kfree_skb_list(to_free);
@@ -4527,14 +4516,14 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
/* Reinjected packets coming from act_mirred or similar should
* not get XDP generic processing.
*/
- if (skb_cloned(skb) || skb_is_tc_redirected(skb))
+ if (skb_is_redirected(skb))
return XDP_PASS;
/* XDP packets must be linear and must have sufficient headroom
* of XDP_PACKET_HEADROOM bytes. This is the guarantee that also
* native XDP provides, thus we need to do it here as well.
*/
- if (skb_is_nonlinear(skb) ||
+ if (skb_cloned(skb) || skb_is_nonlinear(skb) ||
skb_headroom(skb) < XDP_PACKET_HEADROOM) {
int hroom = XDP_PACKET_HEADROOM - skb_headroom(skb);
int troom = skb->tail + skb->data_len - skb->end;
@@ -5074,7 +5063,7 @@ static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc,
goto out;
}
#endif
- skb_reset_tc(skb);
+ skb_reset_redirect(skb);
skip_classify:
if (pfmemalloc && !skb_pfmemalloc_protocol(skb))
goto drop;
@@ -5206,7 +5195,7 @@ static int __netif_receive_skb_one_core(struct sk_buff *skb, bool pfmemalloc)
*
* More direct receive version of netif_receive_skb(). It should
* only be used by callers that have a need to skip RPS and Generic XDP.
- * Caller must also take care of handling if (page_is_)pfmemalloc.
+ * Caller must also take care of handling if ``(page_is_)pfmemalloc``.
*
* This function may only be called from softirq context and interrupts
* should be enabled.
@@ -7201,8 +7190,8 @@ static int __netdev_walk_all_lower_dev(struct net_device *dev,
return 0;
}
-static struct net_device *netdev_next_lower_dev_rcu(struct net_device *dev,
- struct list_head **iter)
+struct net_device *netdev_next_lower_dev_rcu(struct net_device *dev,
+ struct list_head **iter)
{
struct netdev_adjacent *lower;
@@ -7214,6 +7203,7 @@ static struct net_device *netdev_next_lower_dev_rcu(struct net_device *dev,
return lower->dev;
}
+EXPORT_SYMBOL(netdev_next_lower_dev_rcu);
static u8 __netdev_upper_depth(struct net_device *dev)
{
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 549ee56..b831c55 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -2103,11 +2103,11 @@ static int devlink_dpipe_entry_put(struct sk_buff *skb,
static struct devlink_dpipe_table *
devlink_dpipe_table_find(struct list_head *dpipe_tables,
- const char *table_name)
+ const char *table_name, struct devlink *devlink)
{
struct devlink_dpipe_table *table;
-
- list_for_each_entry_rcu(table, dpipe_tables, list) {
+ list_for_each_entry_rcu(table, dpipe_tables, list,
+ lockdep_is_held(&devlink->lock)) {
if (!strcmp(table->name, table_name))
return table;
}
@@ -2226,7 +2226,7 @@ static int devlink_nl_cmd_dpipe_entries_get(struct sk_buff *skb,
table_name = nla_data(info->attrs[DEVLINK_ATTR_DPIPE_TABLE_NAME]);
table = devlink_dpipe_table_find(&devlink->dpipe_table_list,
- table_name);
+ table_name, devlink);
if (!table)
return -EINVAL;
@@ -2382,7 +2382,7 @@ static int devlink_dpipe_table_counters_set(struct devlink *devlink,
struct devlink_dpipe_table *table;
table = devlink_dpipe_table_find(&devlink->dpipe_table_list,
- table_name);
+ table_name, devlink);
if (!table)
return -EINVAL;
@@ -3352,34 +3352,41 @@ devlink_param_value_get_from_info(const struct devlink_param *param,
struct genl_info *info,
union devlink_param_value *value)
{
+ struct nlattr *param_data;
int len;
- if (param->type != DEVLINK_PARAM_TYPE_BOOL &&
- !info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA])
+ param_data = info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA];
+
+ if (param->type != DEVLINK_PARAM_TYPE_BOOL && !param_data)
return -EINVAL;
switch (param->type) {
case DEVLINK_PARAM_TYPE_U8:
- value->vu8 = nla_get_u8(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]);
+ if (nla_len(param_data) != sizeof(u8))
+ return -EINVAL;
+ value->vu8 = nla_get_u8(param_data);
break;
case DEVLINK_PARAM_TYPE_U16:
- value->vu16 = nla_get_u16(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]);
+ if (nla_len(param_data) != sizeof(u16))
+ return -EINVAL;
+ value->vu16 = nla_get_u16(param_data);
break;
case DEVLINK_PARAM_TYPE_U32:
- value->vu32 = nla_get_u32(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]);
+ if (nla_len(param_data) != sizeof(u32))
+ return -EINVAL;
+ value->vu32 = nla_get_u32(param_data);
break;
case DEVLINK_PARAM_TYPE_STRING:
- len = strnlen(nla_data(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]),
- nla_len(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]));
- if (len == nla_len(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]) ||
+ len = strnlen(nla_data(param_data), nla_len(param_data));
+ if (len == nla_len(param_data) ||
len >= __DEVLINK_PARAM_MAX_STRING_VALUE)
return -EINVAL;
- strcpy(value->vstr,
- nla_data(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]));
+ strcpy(value->vstr, nla_data(param_data));
break;
case DEVLINK_PARAM_TYPE_BOOL:
- value->vbool = info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA] ?
- true : false;
+ if (param_data && nla_len(param_data))
+ return -EINVAL;
+ value->vbool = nla_get_flag(param_data);
break;
}
return 0;
@@ -5951,6 +5958,8 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
[DEVLINK_ATTR_PARAM_VALUE_CMODE] = { .type = NLA_U8 },
[DEVLINK_ATTR_REGION_NAME] = { .type = NLA_NUL_STRING },
[DEVLINK_ATTR_REGION_SNAPSHOT_ID] = { .type = NLA_U32 },
+ [DEVLINK_ATTR_REGION_CHUNK_ADDR] = { .type = NLA_U64 },
+ [DEVLINK_ATTR_REGION_CHUNK_LEN] = { .type = NLA_U64 },
[DEVLINK_ATTR_HEALTH_REPORTER_NAME] = { .type = NLA_NUL_STRING },
[DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD] = { .type = NLA_U64 },
[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER] = { .type = NLA_U8 },
@@ -6854,7 +6863,7 @@ bool devlink_dpipe_table_counter_enabled(struct devlink *devlink,
rcu_read_lock();
table = devlink_dpipe_table_find(&devlink->dpipe_table_list,
- table_name);
+ table_name, devlink);
enabled = false;
if (table)
enabled = table->counters_enabled;
@@ -6878,26 +6887,34 @@ int devlink_dpipe_table_register(struct devlink *devlink,
void *priv, bool counter_control_extern)
{
struct devlink_dpipe_table *table;
-
- if (devlink_dpipe_table_find(&devlink->dpipe_table_list, table_name))
- return -EEXIST;
+ int err = 0;
if (WARN_ON(!table_ops->size_get))
return -EINVAL;
+ mutex_lock(&devlink->lock);
+
+ if (devlink_dpipe_table_find(&devlink->dpipe_table_list, table_name,
+ devlink)) {
+ err = -EEXIST;
+ goto unlock;
+ }
+
table = kzalloc(sizeof(*table), GFP_KERNEL);
- if (!table)
- return -ENOMEM;
+ if (!table) {
+ err = -ENOMEM;
+ goto unlock;
+ }
table->name = table_name;
table->table_ops = table_ops;
table->priv = priv;
table->counter_control_extern = counter_control_extern;
- mutex_lock(&devlink->lock);
list_add_tail_rcu(&table->list, &devlink->dpipe_table_list);
+unlock:
mutex_unlock(&devlink->lock);
- return 0;
+ return err;
}
EXPORT_SYMBOL_GPL(devlink_dpipe_table_register);
@@ -6914,7 +6931,7 @@ void devlink_dpipe_table_unregister(struct devlink *devlink,
mutex_lock(&devlink->lock);
table = devlink_dpipe_table_find(&devlink->dpipe_table_list,
- table_name);
+ table_name, devlink);
if (!table)
goto unlock;
list_del_rcu(&table->list);
@@ -7071,7 +7088,7 @@ int devlink_dpipe_table_resource_set(struct devlink *devlink,
mutex_lock(&devlink->lock);
table = devlink_dpipe_table_find(&devlink->dpipe_table_list,
- table_name);
+ table_name, devlink);
if (!table) {
err = -EINVAL;
goto out;
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 3e7e152..bd7eba9 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -974,7 +974,7 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule,
frh = nlmsg_data(nlh);
frh->family = ops->family;
- frh->table = rule->table;
+ frh->table = rule->table < 256 ? rule->table : RT_TABLE_COMPAT;
if (nla_put_u32(skb, FRA_TABLE, rule->table))
goto nla_put_failure;
if (nla_put_u32(skb, FRA_SUPPRESS_PREFIXLEN, rule->suppress_prefixlen))
diff --git a/net/core/netclassid_cgroup.c b/net/core/netclassid_cgroup.c
index 0642f91..b4c87fe 100644
--- a/net/core/netclassid_cgroup.c
+++ b/net/core/netclassid_cgroup.c
@@ -53,30 +53,60 @@ static void cgrp_css_free(struct cgroup_subsys_state *css)
kfree(css_cls_state(css));
}
+/*
+ * To avoid freezing of sockets creation for tasks with big number of threads
+ * and opened sockets lets release file_lock every 1000 iterated descriptors.
+ * New sockets will already have been created with new classid.
+ */
+
+struct update_classid_context {
+ u32 classid;
+ unsigned int batch;
+};
+
+#define UPDATE_CLASSID_BATCH 1000
+
static int update_classid_sock(const void *v, struct file *file, unsigned n)
{
int err;
+ struct update_classid_context *ctx = (void *)v;
struct socket *sock = sock_from_file(file, &err);
if (sock) {
spin_lock(&cgroup_sk_update_lock);
- sock_cgroup_set_classid(&sock->sk->sk_cgrp_data,
- (unsigned long)v);
+ sock_cgroup_set_classid(&sock->sk->sk_cgrp_data, ctx->classid);
spin_unlock(&cgroup_sk_update_lock);
}
+ if (--ctx->batch == 0) {
+ ctx->batch = UPDATE_CLASSID_BATCH;
+ return n + 1;
+ }
return 0;
}
+static void update_classid_task(struct task_struct *p, u32 classid)
+{
+ struct update_classid_context ctx = {
+ .classid = classid,
+ .batch = UPDATE_CLASSID_BATCH
+ };
+ unsigned int fd = 0;
+
+ do {
+ task_lock(p);
+ fd = iterate_fd(p->files, fd, update_classid_sock, &ctx);
+ task_unlock(p);
+ cond_resched();
+ } while (fd);
+}
+
static void cgrp_attach(struct cgroup_taskset *tset)
{
struct cgroup_subsys_state *css;
struct task_struct *p;
cgroup_taskset_for_each(p, css, tset) {
- task_lock(p);
- iterate_fd(p->files, 0, update_classid_sock,
- (void *)(unsigned long)css_cls_state(css)->classid);
- task_unlock(p);
+ update_classid_task(p, css_cls_state(css)->classid);
}
}
@@ -98,10 +128,7 @@ static int write_classid(struct cgroup_subsys_state *css, struct cftype *cft,
css_task_iter_start(css, 0, &it);
while ((p = css_task_iter_next(&it))) {
- task_lock(p);
- iterate_fd(p->files, 0, update_classid_sock,
- (void *)(unsigned long)cs->classid);
- task_unlock(p);
+ update_classid_task(p, cs->classid);
cond_resched();
}
css_task_iter_end(&it);
diff --git a/net/core/page_pool.c b/net/core/page_pool.c
index 9b7cbe3..10d2b25 100644
--- a/net/core/page_pool.c
+++ b/net/core/page_pool.c
@@ -99,8 +99,7 @@ EXPORT_SYMBOL(page_pool_create);
static void __page_pool_return_page(struct page_pool *pool, struct page *page);
noinline
-static struct page *page_pool_refill_alloc_cache(struct page_pool *pool,
- bool refill)
+static struct page *page_pool_refill_alloc_cache(struct page_pool *pool)
{
struct ptr_ring *r = &pool->ring;
struct page *page;
@@ -141,8 +140,7 @@ static struct page *page_pool_refill_alloc_cache(struct page_pool *pool,
page = NULL;
break;
}
- } while (pool->alloc.count < PP_ALLOC_CACHE_REFILL &&
- refill);
+ } while (pool->alloc.count < PP_ALLOC_CACHE_REFILL);
/* Return last page */
if (likely(pool->alloc.count > 0))
@@ -155,20 +153,16 @@ static struct page *page_pool_refill_alloc_cache(struct page_pool *pool,
/* fast path */
static struct page *__page_pool_get_cached(struct page_pool *pool)
{
- bool refill = false;
struct page *page;
- /* Test for safe-context, caller should provide this guarantee */
- if (likely(in_serving_softirq())) {
- if (likely(pool->alloc.count)) {
- /* Fast-path */
- page = pool->alloc.cache[--pool->alloc.count];
- return page;
- }
- refill = true;
+ /* Caller MUST guarantee safe non-concurrent access, e.g. softirq */
+ if (likely(pool->alloc.count)) {
+ /* Fast-path */
+ page = pool->alloc.cache[--pool->alloc.count];
+ } else {
+ page = page_pool_refill_alloc_cache(pool);
}
- page = page_pool_refill_alloc_cache(pool, refill);
return page;
}
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index acc849d..d0641bb 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -3362,7 +3362,7 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev)
/* skb was 'freed' by stack, so clean few
* bits and reuse it
*/
- skb_reset_tc(skb);
+ skb_reset_redirect(skb);
} while (--burst > 0);
goto out; /* Skips xmit_mode M_START_XMIT */
} else if (pkt_dev->xmit_mode == M_QUEUE_XMIT) {
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 09c44bf..e1152f4 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -3504,27 +3504,25 @@ static int rtnl_alt_ifname(int cmd, struct net_device *dev, struct nlattr *attr,
if (err)
return err;
- alt_ifname = nla_data(attr);
+ alt_ifname = nla_strdup(attr, GFP_KERNEL);
+ if (!alt_ifname)
+ return -ENOMEM;
+
if (cmd == RTM_NEWLINKPROP) {
- alt_ifname = kstrdup(alt_ifname, GFP_KERNEL);
- if (!alt_ifname)
- return -ENOMEM;
err = netdev_name_node_alt_create(dev, alt_ifname);
- if (err) {
- kfree(alt_ifname);
- return err;
- }
+ if (!err)
+ alt_ifname = NULL;
} else if (cmd == RTM_DELLINKPROP) {
err = netdev_name_node_alt_destroy(dev, alt_ifname);
- if (err)
- return err;
} else {
- WARN_ON(1);
- return 0;
+ WARN_ON_ONCE(1);
+ err = -EINVAL;
}
- *changed = true;
- return 0;
+ kfree(alt_ifname);
+ if (!err)
+ *changed = true;
+ return err;
}
static int rtnl_linkprop(int cmd, struct sk_buff *skb, struct nlmsghdr *nlh,
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 864cb9e..e1101a4 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -467,7 +467,6 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int len,
return NULL;
}
- /* use OR instead of assignment to avoid clearing of bits in mask */
if (pfmemalloc)
skb->pfmemalloc = 1;
skb->head_frag = 1;
@@ -527,7 +526,6 @@ struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len,
return NULL;
}
- /* use OR instead of assignment to avoid clearing of bits in mask */
if (nc->page.pfmemalloc)
skb->pfmemalloc = 1;
skb->head_frag = 1;
@@ -4805,9 +4803,9 @@ static __sum16 *skb_checksum_setup_ip(struct sk_buff *skb,
typeof(IPPROTO_IP) proto,
unsigned int off)
{
- switch (proto) {
- int err;
+ int err;
+ switch (proto) {
case IPPROTO_TCP:
err = skb_maybe_pull_tail(skb, off + sizeof(struct tcphdr),
off + MAX_TCP_HDR_LEN);
diff --git a/net/core/sock.c b/net/core/sock.c
index a4c8fac..8f71684 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1830,7 +1830,10 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
atomic_set(&newsk->sk_zckey, 0);
sock_reset_flag(newsk, SOCK_DONE);
- mem_cgroup_sk_alloc(newsk);
+
+ /* sk->sk_memcg will be populated at accept() time */
+ newsk->sk_memcg = NULL;
+
cgroup_sk_alloc(&newsk->sk_cgrp_data);
rcu_read_lock();
diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index 085cef5..b70c844 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -233,8 +233,11 @@ static void sock_map_free(struct bpf_map *map)
struct bpf_stab *stab = container_of(map, struct bpf_stab, map);
int i;
+ /* After the sync no updates or deletes will be in-flight so it
+ * is safe to walk map and remove entries without risking a race
+ * in EEXIST update case.
+ */
synchronize_rcu();
- raw_spin_lock_bh(&stab->lock);
for (i = 0; i < stab->map.max_entries; i++) {
struct sock **psk = &stab->sks[i];
struct sock *sk;
@@ -248,7 +251,6 @@ static void sock_map_free(struct bpf_map *map)
release_sock(sk);
}
}
- raw_spin_unlock_bh(&stab->lock);
/* wait for psock readers accessing its map link */
synchronize_rcu();
@@ -863,10 +865,13 @@ static void sock_hash_free(struct bpf_map *map)
struct hlist_node *node;
int i;
+ /* After the sync no updates or deletes will be in-flight so it
+ * is safe to walk map and remove entries without risking a race
+ * in EEXIST update case.
+ */
synchronize_rcu();
for (i = 0; i < htab->buckets_num; i++) {
bucket = sock_hash_select_bucket(htab, i);
- raw_spin_lock_bh(&bucket->lock);
hlist_for_each_entry_safe(elem, node, &bucket->head, node) {
hlist_del_rcu(&elem->node);
lock_sock(elem->sk);
@@ -875,7 +880,6 @@ static void sock_hash_free(struct bpf_map *map)
rcu_read_unlock();
release_sock(elem->sk);
}
- raw_spin_unlock_bh(&bucket->lock);
}
/* wait for psock readers accessing its map link */
diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index a7662e7..760e6ea 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -117,7 +117,9 @@ static inline struct net_device *dsa_master_find_slave(struct net_device *dev,
/* port.c */
int dsa_port_set_state(struct dsa_port *dp, u8 state,
struct switchdev_trans *trans);
+int dsa_port_enable_rt(struct dsa_port *dp, struct phy_device *phy);
int dsa_port_enable(struct dsa_port *dp, struct phy_device *phy);
+void dsa_port_disable_rt(struct dsa_port *dp);
void dsa_port_disable(struct dsa_port *dp);
int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br);
void dsa_port_bridge_leave(struct dsa_port *dp, struct net_device *br);
diff --git a/net/dsa/port.c b/net/dsa/port.c
index 774facb..ec13dc6 100644
--- a/net/dsa/port.c
+++ b/net/dsa/port.c
@@ -63,7 +63,7 @@ static void dsa_port_set_state_now(struct dsa_port *dp, u8 state)
pr_err("DSA: failed to set STP state %u (%d)\n", state, err);
}
-int dsa_port_enable(struct dsa_port *dp, struct phy_device *phy)
+int dsa_port_enable_rt(struct dsa_port *dp, struct phy_device *phy)
{
struct dsa_switch *ds = dp->ds;
int port = dp->index;
@@ -78,14 +78,31 @@ int dsa_port_enable(struct dsa_port *dp, struct phy_device *phy)
if (!dp->bridge_dev)
dsa_port_set_state_now(dp, BR_STATE_FORWARDING);
+ if (dp->pl)
+ phylink_start(dp->pl);
+
return 0;
}
-void dsa_port_disable(struct dsa_port *dp)
+int dsa_port_enable(struct dsa_port *dp, struct phy_device *phy)
+{
+ int err;
+
+ rtnl_lock();
+ err = dsa_port_enable_rt(dp, phy);
+ rtnl_unlock();
+
+ return err;
+}
+
+void dsa_port_disable_rt(struct dsa_port *dp)
{
struct dsa_switch *ds = dp->ds;
int port = dp->index;
+ if (dp->pl)
+ phylink_stop(dp->pl);
+
if (!dp->bridge_dev)
dsa_port_set_state_now(dp, BR_STATE_DISABLED);
@@ -93,6 +110,13 @@ void dsa_port_disable(struct dsa_port *dp)
ds->ops->port_disable(ds, port);
}
+void dsa_port_disable(struct dsa_port *dp)
+{
+ rtnl_lock();
+ dsa_port_disable_rt(dp);
+ rtnl_unlock();
+}
+
int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br)
{
struct dsa_notifier_bridge_info info = {
@@ -614,10 +638,6 @@ static int dsa_port_phylink_register(struct dsa_port *dp)
goto err_phy_connect;
}
- rtnl_lock();
- phylink_start(dp->pl);
- rtnl_unlock();
-
return 0;
err_phy_connect:
@@ -628,9 +648,14 @@ static int dsa_port_phylink_register(struct dsa_port *dp)
int dsa_port_link_register_of(struct dsa_port *dp)
{
struct dsa_switch *ds = dp->ds;
+ struct device_node *phy_np;
- if (!ds->ops->adjust_link)
- return dsa_port_phylink_register(dp);
+ if (!ds->ops->adjust_link) {
+ phy_np = of_parse_phandle(dp->dn, "phy-handle", 0);
+ if (of_phy_is_fixed_link(dp->dn) || phy_np)
+ return dsa_port_phylink_register(dp);
+ return 0;
+ }
dev_warn(ds->dev,
"Using legacy PHYLIB callbacks. Please migrate to PHYLINK!\n");
@@ -645,11 +670,12 @@ void dsa_port_link_unregister_of(struct dsa_port *dp)
{
struct dsa_switch *ds = dp->ds;
- if (!ds->ops->adjust_link) {
+ if (!ds->ops->adjust_link && dp->pl) {
rtnl_lock();
phylink_disconnect_phy(dp->pl);
rtnl_unlock();
phylink_destroy(dp->pl);
+ dp->pl = NULL;
return;
}
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 088c886..ddc0f92 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -88,12 +88,10 @@ static int dsa_slave_open(struct net_device *dev)
goto clear_allmulti;
}
- err = dsa_port_enable(dp, dev->phydev);
+ err = dsa_port_enable_rt(dp, dev->phydev);
if (err)
goto clear_promisc;
- phylink_start(dp->pl);
-
return 0;
clear_promisc:
@@ -114,9 +112,7 @@ static int dsa_slave_close(struct net_device *dev)
struct net_device *master = dsa_slave_to_master(dev);
struct dsa_port *dp = dsa_slave_to_port(dev);
- phylink_stop(dp->pl);
-
- dsa_port_disable(dp);
+ dsa_port_disable_rt(dp);
dev_mc_unsync(master, dev);
dev_uc_unsync(master, dev);
diff --git a/net/dsa/tag_8021q.c b/net/dsa/tag_8021q.c
index 2fb6c26..b97ad93 100644
--- a/net/dsa/tag_8021q.c
+++ b/net/dsa/tag_8021q.c
@@ -298,47 +298,4 @@ struct sk_buff *dsa_8021q_xmit(struct sk_buff *skb, struct net_device *netdev,
}
EXPORT_SYMBOL_GPL(dsa_8021q_xmit);
-/* In the DSA packet_type handler, skb->data points in the middle of the VLAN
- * tag, after tpid and before tci. This is because so far, ETH_HLEN
- * (DMAC, SMAC, EtherType) bytes were pulled.
- * There are 2 bytes of VLAN tag left in skb->data, and upper
- * layers expect the 'real' EtherType to be consumed as well.
- * Coincidentally, a VLAN header is also of the same size as
- * the number of bytes that need to be pulled.
- *
- * skb_mac_header skb->data
- * | |
- * v v
- * | | | | | | | | | | | | | | | | | | |
- * +-----------------------+-----------------------+-------+-------+-------+
- * | Destination MAC | Source MAC | TPID | TCI | EType |
- * +-----------------------+-----------------------+-------+-------+-------+
- * ^ | |
- * |<--VLAN_HLEN-->to <---VLAN_HLEN--->
- * from |
- * >>>>>>> v
- * >>>>>>> | | | | | | | | | | | | | | |
- * >>>>>>> +-----------------------+-----------------------+-------+
- * >>>>>>> | Destination MAC | Source MAC | EType |
- * +-----------------------+-----------------------+-------+
- * ^ ^
- * (now part of | |
- * skb->head) skb_mac_header skb->data
- */
-struct sk_buff *dsa_8021q_remove_header(struct sk_buff *skb)
-{
- u8 *from = skb_mac_header(skb);
- u8 *dest = from + VLAN_HLEN;
-
- memmove(dest, from, ETH_HLEN - VLAN_HLEN);
- skb_pull(skb, VLAN_HLEN);
- skb_push(skb, ETH_HLEN);
- skb_reset_mac_header(skb);
- skb_reset_mac_len(skb);
- skb_pull_rcsum(skb, ETH_HLEN);
-
- return skb;
-}
-EXPORT_SYMBOL_GPL(dsa_8021q_remove_header);
-
MODULE_LICENSE("GPL v2");
diff --git a/net/dsa/tag_ar9331.c b/net/dsa/tag_ar9331.c
index 466ffa9..55b0069 100644
--- a/net/dsa/tag_ar9331.c
+++ b/net/dsa/tag_ar9331.c
@@ -31,7 +31,7 @@ static struct sk_buff *ar9331_tag_xmit(struct sk_buff *skb,
__le16 *phdr;
u16 hdr;
- if (skb_cow_head(skb, 0) < 0)
+ if (skb_cow_head(skb, AR9331_HDR_LEN) < 0)
return NULL;
phdr = skb_push(skb, AR9331_HDR_LEN);
diff --git a/net/dsa/tag_brcm.c b/net/dsa/tag_brcm.c
index 9c31141..9169b63 100644
--- a/net/dsa/tag_brcm.c
+++ b/net/dsa/tag_brcm.c
@@ -140,6 +140,8 @@ static struct sk_buff *brcm_tag_rcv_ll(struct sk_buff *skb,
/* Remove Broadcom tag and update checksum */
skb_pull_rcsum(skb, BRCM_TAG_LEN);
+ skb->offload_fwd_mark = 1;
+
return skb;
}
#endif
diff --git a/net/dsa/tag_qca.c b/net/dsa/tag_qca.c
index c8a128c..70db7c9 100644
--- a/net/dsa/tag_qca.c
+++ b/net/dsa/tag_qca.c
@@ -33,7 +33,7 @@ static struct sk_buff *qca_tag_xmit(struct sk_buff *skb, struct net_device *dev)
struct dsa_port *dp = dsa_slave_to_port(dev);
u16 *phdr, hdr;
- if (skb_cow_head(skb, 0) < 0)
+ if (skb_cow_head(skb, QCA_HDR_LEN) < 0)
return NULL;
skb_push(skb, QCA_HDR_LEN);
diff --git a/net/dsa/tag_sja1105.c b/net/dsa/tag_sja1105.c
index 5366ea4..d553bf3 100644
--- a/net/dsa/tag_sja1105.c
+++ b/net/dsa/tag_sja1105.c
@@ -250,14 +250,14 @@ static struct sk_buff *sja1105_rcv(struct sk_buff *skb,
{
struct sja1105_meta meta = {0};
int source_port, switch_id;
- struct vlan_ethhdr *hdr;
+ struct ethhdr *hdr;
u16 tpid, vid, tci;
bool is_link_local;
bool is_tagged;
bool is_meta;
- hdr = vlan_eth_hdr(skb);
- tpid = ntohs(hdr->h_vlan_proto);
+ hdr = eth_hdr(skb);
+ tpid = ntohs(hdr->h_proto);
is_tagged = (tpid == ETH_P_SJA1105);
is_link_local = sja1105_is_link_local(skb);
is_meta = sja1105_is_meta_frame(skb);
@@ -266,7 +266,12 @@ static struct sk_buff *sja1105_rcv(struct sk_buff *skb,
if (is_tagged) {
/* Normal traffic path. */
- tci = ntohs(hdr->h_vlan_TCI);
+ skb_push_rcsum(skb, ETH_HLEN);
+ __skb_vlan_pop(skb, &tci);
+ skb_pull_rcsum(skb, ETH_HLEN);
+ skb_reset_network_header(skb);
+ skb_reset_transport_header(skb);
+
vid = tci & VLAN_VID_MASK;
source_port = dsa_8021q_rx_source_port(vid);
switch_id = dsa_8021q_rx_switch_id(vid);
@@ -295,12 +300,6 @@ static struct sk_buff *sja1105_rcv(struct sk_buff *skb,
return NULL;
}
- /* Delete/overwrite fake VLAN header, DSA expects to not find
- * it there, see dsa_switch_rcv: skb_push(skb, ETH_HLEN).
- */
- if (is_tagged)
- skb = dsa_8021q_remove_header(skb);
-
return sja1105_rcv_meta_state_machine(skb, &meta, is_link_local,
is_meta);
}
diff --git a/net/ethtool/bitset.c b/net/ethtool/bitset.c
index fce45da..ef91975 100644
--- a/net/ethtool/bitset.c
+++ b/net/ethtool/bitset.c
@@ -305,7 +305,8 @@ int ethnl_put_bitset32(struct sk_buff *skb, int attrtype, const u32 *val,
static const struct nla_policy bitset_policy[ETHTOOL_A_BITSET_MAX + 1] = {
[ETHTOOL_A_BITSET_UNSPEC] = { .type = NLA_REJECT },
[ETHTOOL_A_BITSET_NOMASK] = { .type = NLA_FLAG },
- [ETHTOOL_A_BITSET_SIZE] = { .type = NLA_U32 },
+ [ETHTOOL_A_BITSET_SIZE] = NLA_POLICY_MAX(NLA_U32,
+ ETHNL_MAX_BITSET_SIZE),
[ETHTOOL_A_BITSET_BITS] = { .type = NLA_NESTED },
[ETHTOOL_A_BITSET_VALUE] = { .type = NLA_BINARY },
[ETHTOOL_A_BITSET_MASK] = { .type = NLA_BINARY },
@@ -447,7 +448,10 @@ ethnl_update_bitset32_verbose(u32 *bitmap, unsigned int nbits,
"mask only allowed in compact bitset");
return -EINVAL;
}
+
no_mask = tb[ETHTOOL_A_BITSET_NOMASK];
+ if (no_mask)
+ ethnl_bitmap32_clear(bitmap, 0, nbits, mod);
nla_for_each_nested(bit_attr, tb[ETHTOOL_A_BITSET_BITS], rem) {
bool old_val, new_val;
diff --git a/net/ethtool/bitset.h b/net/ethtool/bitset.h
index b8247e3..b849f9d 100644
--- a/net/ethtool/bitset.h
+++ b/net/ethtool/bitset.h
@@ -3,6 +3,8 @@
#ifndef _NET_ETHTOOL_BITSET_H
#define _NET_ETHTOOL_BITSET_H
+#define ETHNL_MAX_BITSET_SIZE S16_MAX
+
typedef const char (*const ethnl_string_array_t)[ETH_GSTRING_LEN];
int ethnl_bitset_is_compact(const struct nlattr *bitset, bool *compact);
diff --git a/net/ethtool/debug.c b/net/ethtool/debug.c
index aaef484..92599ad 100644
--- a/net/ethtool/debug.c
+++ b/net/ethtool/debug.c
@@ -107,8 +107,9 @@ int ethnl_set_debug(struct sk_buff *skb, struct genl_info *info)
if (ret < 0)
return ret;
dev = req_info.dev;
+ ret = -EOPNOTSUPP;
if (!dev->ethtool_ops->get_msglevel || !dev->ethtool_ops->set_msglevel)
- return -EOPNOTSUPP;
+ goto out_dev;
rtnl_lock();
ret = ethnl_ops_begin(dev);
@@ -129,6 +130,7 @@ int ethnl_set_debug(struct sk_buff *skb, struct genl_info *info)
ethnl_ops_complete(dev);
out_rtnl:
rtnl_unlock();
+out_dev:
dev_put(dev);
return ret;
}
diff --git a/net/ethtool/linkinfo.c b/net/ethtool/linkinfo.c
index 5d16cb4..6e9e0b5 100644
--- a/net/ethtool/linkinfo.c
+++ b/net/ethtool/linkinfo.c
@@ -126,9 +126,10 @@ int ethnl_set_linkinfo(struct sk_buff *skb, struct genl_info *info)
if (ret < 0)
return ret;
dev = req_info.dev;
+ ret = -EOPNOTSUPP;
if (!dev->ethtool_ops->get_link_ksettings ||
!dev->ethtool_ops->set_link_ksettings)
- return -EOPNOTSUPP;
+ goto out_dev;
rtnl_lock();
ret = ethnl_ops_begin(dev);
@@ -162,6 +163,7 @@ int ethnl_set_linkinfo(struct sk_buff *skb, struct genl_info *info)
ethnl_ops_complete(dev);
out_rtnl:
rtnl_unlock();
+out_dev:
dev_put(dev);
return ret;
}
diff --git a/net/ethtool/linkmodes.c b/net/ethtool/linkmodes.c
index 96f20be..18cc37be 100644
--- a/net/ethtool/linkmodes.c
+++ b/net/ethtool/linkmodes.c
@@ -338,9 +338,10 @@ int ethnl_set_linkmodes(struct sk_buff *skb, struct genl_info *info)
if (ret < 0)
return ret;
dev = req_info.dev;
+ ret = -EOPNOTSUPP;
if (!dev->ethtool_ops->get_link_ksettings ||
!dev->ethtool_ops->set_link_ksettings)
- return -EOPNOTSUPP;
+ goto out_dev;
rtnl_lock();
ret = ethnl_ops_begin(dev);
@@ -370,6 +371,7 @@ int ethnl_set_linkmodes(struct sk_buff *skb, struct genl_info *info)
ethnl_ops_complete(dev);
out_rtnl:
rtnl_unlock();
+out_dev:
dev_put(dev);
return ret;
}
diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c
index 180c194..fc9e0b8 100644
--- a/net/ethtool/netlink.c
+++ b/net/ethtool/netlink.c
@@ -40,6 +40,7 @@ int ethnl_parse_header(struct ethnl_req_info *req_info,
struct nlattr *tb[ETHTOOL_A_HEADER_MAX + 1];
const struct nlattr *devname_attr;
struct net_device *dev = NULL;
+ u32 flags = 0;
int ret;
if (!header) {
@@ -50,8 +51,17 @@ int ethnl_parse_header(struct ethnl_req_info *req_info,
ethnl_header_policy, extack);
if (ret < 0)
return ret;
- devname_attr = tb[ETHTOOL_A_HEADER_DEV_NAME];
+ if (tb[ETHTOOL_A_HEADER_FLAGS]) {
+ flags = nla_get_u32(tb[ETHTOOL_A_HEADER_FLAGS]);
+ if (flags & ~ETHTOOL_FLAG_ALL) {
+ NL_SET_ERR_MSG_ATTR(extack, tb[ETHTOOL_A_HEADER_FLAGS],
+ "unrecognized request flags");
+ nl_set_extack_cookie_u32(extack, ETHTOOL_FLAG_ALL);
+ return -EOPNOTSUPP;
+ }
+ }
+ devname_attr = tb[ETHTOOL_A_HEADER_DEV_NAME];
if (tb[ETHTOOL_A_HEADER_DEV_INDEX]) {
u32 ifindex = nla_get_u32(tb[ETHTOOL_A_HEADER_DEV_INDEX]);
@@ -90,9 +100,7 @@ int ethnl_parse_header(struct ethnl_req_info *req_info,
}
req_info->dev = dev;
- if (tb[ETHTOOL_A_HEADER_FLAGS])
- req_info->flags = nla_get_u32(tb[ETHTOOL_A_HEADER_FLAGS]);
-
+ req_info->flags = flags;
return 0;
}
diff --git a/net/ethtool/wol.c b/net/ethtool/wol.c
index e1b8a65..55e1eca 100644
--- a/net/ethtool/wol.c
+++ b/net/ethtool/wol.c
@@ -128,8 +128,9 @@ int ethnl_set_wol(struct sk_buff *skb, struct genl_info *info)
if (ret < 0)
return ret;
dev = req_info.dev;
+ ret = -EOPNOTSUPP;
if (!dev->ethtool_ops->get_wol || !dev->ethtool_ops->set_wol)
- return -EOPNOTSUPP;
+ goto out_dev;
rtnl_lock();
ret = ethnl_ops_begin(dev);
@@ -172,6 +173,7 @@ int ethnl_set_wol(struct sk_buff *skb, struct genl_info *info)
ethnl_ops_complete(dev);
out_rtnl:
rtnl_unlock();
+out_dev:
dev_put(dev);
return ret;
}
diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c
index 364ea2c..a64bb64 100644
--- a/net/hsr/hsr_framereg.c
+++ b/net/hsr/hsr_framereg.c
@@ -155,7 +155,8 @@ static struct hsr_node *hsr_add_node(struct hsr_priv *hsr,
new_node->seq_out[i] = seq_out;
spin_lock_bh(&hsr->list_lock);
- list_for_each_entry_rcu(node, node_db, mac_list) {
+ list_for_each_entry_rcu(node, node_db, mac_list,
+ lockdep_is_held(&hsr->list_lock)) {
if (ether_addr_equal(node->macaddress_A, addr))
goto out;
if (ether_addr_equal(node->macaddress_B, addr))
@@ -481,12 +482,9 @@ int hsr_get_node_data(struct hsr_priv *hsr,
struct hsr_port *port;
unsigned long tdiff;
- rcu_read_lock();
node = find_node_by_addr_A(&hsr->node_db, addr);
- if (!node) {
- rcu_read_unlock();
- return -ENOENT; /* No such entry */
- }
+ if (!node)
+ return -ENOENT;
ether_addr_copy(addr_b, node->macaddress_B);
@@ -521,7 +519,5 @@ int hsr_get_node_data(struct hsr_priv *hsr,
*addr_b_ifindex = -1;
}
- rcu_read_unlock();
-
return 0;
}
diff --git a/net/hsr/hsr_netlink.c b/net/hsr/hsr_netlink.c
index 8dc0547..fae21c8 100644
--- a/net/hsr/hsr_netlink.c
+++ b/net/hsr/hsr_netlink.c
@@ -251,15 +251,16 @@ static int hsr_get_node_status(struct sk_buff *skb_in, struct genl_info *info)
if (!na)
goto invalid;
- hsr_dev = __dev_get_by_index(genl_info_net(info),
- nla_get_u32(info->attrs[HSR_A_IFINDEX]));
+ rcu_read_lock();
+ hsr_dev = dev_get_by_index_rcu(genl_info_net(info),
+ nla_get_u32(info->attrs[HSR_A_IFINDEX]));
if (!hsr_dev)
- goto invalid;
+ goto rcu_unlock;
if (!is_hsr_master(hsr_dev))
- goto invalid;
+ goto rcu_unlock;
/* Send reply */
- skb_out = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+ skb_out = genlmsg_new(NLMSG_GOODSIZE, GFP_ATOMIC);
if (!skb_out) {
res = -ENOMEM;
goto fail;
@@ -313,12 +314,10 @@ static int hsr_get_node_status(struct sk_buff *skb_in, struct genl_info *info)
res = nla_put_u16(skb_out, HSR_A_IF1_SEQ, hsr_node_if1_seq);
if (res < 0)
goto nla_put_failure;
- rcu_read_lock();
port = hsr_port_get_hsr(hsr, HSR_PT_SLAVE_A);
if (port)
res = nla_put_u32(skb_out, HSR_A_IF1_IFINDEX,
port->dev->ifindex);
- rcu_read_unlock();
if (res < 0)
goto nla_put_failure;
@@ -328,20 +327,22 @@ static int hsr_get_node_status(struct sk_buff *skb_in, struct genl_info *info)
res = nla_put_u16(skb_out, HSR_A_IF2_SEQ, hsr_node_if2_seq);
if (res < 0)
goto nla_put_failure;
- rcu_read_lock();
port = hsr_port_get_hsr(hsr, HSR_PT_SLAVE_B);
if (port)
res = nla_put_u32(skb_out, HSR_A_IF2_IFINDEX,
port->dev->ifindex);
- rcu_read_unlock();
if (res < 0)
goto nla_put_failure;
+ rcu_read_unlock();
+
genlmsg_end(skb_out, msg_head);
genlmsg_unicast(genl_info_net(info), skb_out, info->snd_portid);
return 0;
+rcu_unlock:
+ rcu_read_unlock();
invalid:
netlink_ack(skb_in, nlmsg_hdr(skb_in), -EINVAL, NULL);
return 0;
@@ -351,6 +352,7 @@ static int hsr_get_node_status(struct sk_buff *skb_in, struct genl_info *info)
/* Fall through */
fail:
+ rcu_read_unlock();
return res;
}
@@ -358,16 +360,14 @@ static int hsr_get_node_status(struct sk_buff *skb_in, struct genl_info *info)
*/
static int hsr_get_node_list(struct sk_buff *skb_in, struct genl_info *info)
{
- /* For receiving */
- struct nlattr *na;
- struct net_device *hsr_dev;
-
- /* For sending */
- struct sk_buff *skb_out;
- void *msg_head;
- struct hsr_priv *hsr;
- void *pos;
unsigned char addr[ETH_ALEN];
+ struct net_device *hsr_dev;
+ struct sk_buff *skb_out;
+ struct hsr_priv *hsr;
+ bool restart = false;
+ struct nlattr *na;
+ void *pos = NULL;
+ void *msg_head;
int res;
if (!info)
@@ -377,15 +377,17 @@ static int hsr_get_node_list(struct sk_buff *skb_in, struct genl_info *info)
if (!na)
goto invalid;
- hsr_dev = __dev_get_by_index(genl_info_net(info),
- nla_get_u32(info->attrs[HSR_A_IFINDEX]));
+ rcu_read_lock();
+ hsr_dev = dev_get_by_index_rcu(genl_info_net(info),
+ nla_get_u32(info->attrs[HSR_A_IFINDEX]));
if (!hsr_dev)
- goto invalid;
+ goto rcu_unlock;
if (!is_hsr_master(hsr_dev))
- goto invalid;
+ goto rcu_unlock;
+restart:
/* Send reply */
- skb_out = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+ skb_out = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_ATOMIC);
if (!skb_out) {
res = -ENOMEM;
goto fail;
@@ -399,18 +401,26 @@ static int hsr_get_node_list(struct sk_buff *skb_in, struct genl_info *info)
goto nla_put_failure;
}
- res = nla_put_u32(skb_out, HSR_A_IFINDEX, hsr_dev->ifindex);
- if (res < 0)
- goto nla_put_failure;
+ if (!restart) {
+ res = nla_put_u32(skb_out, HSR_A_IFINDEX, hsr_dev->ifindex);
+ if (res < 0)
+ goto nla_put_failure;
+ }
hsr = netdev_priv(hsr_dev);
- rcu_read_lock();
- pos = hsr_get_next_node(hsr, NULL, addr);
+ if (!pos)
+ pos = hsr_get_next_node(hsr, NULL, addr);
while (pos) {
res = nla_put(skb_out, HSR_A_NODE_ADDR, ETH_ALEN, addr);
if (res < 0) {
- rcu_read_unlock();
+ if (res == -EMSGSIZE) {
+ genlmsg_end(skb_out, msg_head);
+ genlmsg_unicast(genl_info_net(info), skb_out,
+ info->snd_portid);
+ restart = true;
+ goto restart;
+ }
goto nla_put_failure;
}
pos = hsr_get_next_node(hsr, pos, addr);
@@ -422,15 +432,18 @@ static int hsr_get_node_list(struct sk_buff *skb_in, struct genl_info *info)
return 0;
+rcu_unlock:
+ rcu_read_unlock();
invalid:
netlink_ack(skb_in, nlmsg_hdr(skb_in), -EINVAL, NULL);
return 0;
nla_put_failure:
- kfree_skb(skb_out);
+ nlmsg_free(skb_out);
/* Fall through */
fail:
+ rcu_read_unlock();
return res;
}
@@ -457,6 +470,7 @@ static struct genl_family hsr_genl_family __ro_after_init = {
.version = 1,
.maxattr = HSR_A_MAX,
.policy = hsr_genl_policy,
+ .netnsok = true,
.module = THIS_MODULE,
.ops = hsr_ops,
.n_ops = ARRAY_SIZE(hsr_ops),
diff --git a/net/hsr/hsr_slave.c b/net/hsr/hsr_slave.c
index fbfd0db..a9104d4 100644
--- a/net/hsr/hsr_slave.c
+++ b/net/hsr/hsr_slave.c
@@ -145,16 +145,16 @@ int hsr_add_port(struct hsr_priv *hsr, struct net_device *dev,
if (!port)
return -ENOMEM;
+ port->hsr = hsr;
+ port->dev = dev;
+ port->type = type;
+
if (type != HSR_PT_MASTER) {
res = hsr_portdev_setup(dev, port);
if (res)
goto fail_dev_setup;
}
- port->hsr = hsr;
- port->dev = dev;
- port->type = type;
-
list_add_tail_rcu(&port->port_list, &hsr->ports);
synchronize_rcu();
diff --git a/net/ieee802154/nl_policy.c b/net/ieee802154/nl_policy.c
index 2c7a38d..0672b2f 100644
--- a/net/ieee802154/nl_policy.c
+++ b/net/ieee802154/nl_policy.c
@@ -21,7 +21,13 @@ const struct nla_policy ieee802154_policy[IEEE802154_ATTR_MAX + 1] = {
[IEEE802154_ATTR_HW_ADDR] = { .type = NLA_HW_ADDR, },
[IEEE802154_ATTR_PAN_ID] = { .type = NLA_U16, },
[IEEE802154_ATTR_CHANNEL] = { .type = NLA_U8, },
+ [IEEE802154_ATTR_BCN_ORD] = { .type = NLA_U8, },
+ [IEEE802154_ATTR_SF_ORD] = { .type = NLA_U8, },
+ [IEEE802154_ATTR_PAN_COORD] = { .type = NLA_U8, },
+ [IEEE802154_ATTR_BAT_EXT] = { .type = NLA_U8, },
+ [IEEE802154_ATTR_COORD_REALIGN] = { .type = NLA_U8, },
[IEEE802154_ATTR_PAGE] = { .type = NLA_U8, },
+ [IEEE802154_ATTR_DEV_TYPE] = { .type = NLA_U8, },
[IEEE802154_ATTR_COORD_SHORT_ADDR] = { .type = NLA_U16, },
[IEEE802154_ATTR_COORD_HW_ADDR] = { .type = NLA_HW_ADDR, },
[IEEE802154_ATTR_COORD_PAN_ID] = { .type = NLA_U16, },
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index f96bd48..25a8888 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -129,7 +129,7 @@
If unsure, say Y. Note that if you want to use DHCP, a DHCP server
must be operating on your network. Read
- <file:Documentation/filesystems/nfs/nfsroot.txt> for details.
+ <file:Documentation/admin-guide/nfs/nfsroot.rst> for details.
config IP_PNP_BOOTP
bool "IP: BOOTP support"
@@ -144,7 +144,7 @@
does BOOTP itself, providing all necessary information on the kernel
command line, you can say N here. If unsure, say Y. Note that if you
want to use BOOTP, a BOOTP server must be operating on your network.
- Read <file:Documentation/filesystems/nfs/nfsroot.txt> for details.
+ Read <file:Documentation/admin-guide/nfs/nfsroot.rst> for details.
config IP_PNP_RARP
bool "IP: RARP support"
@@ -157,7 +157,7 @@
older protocol which is being obsoleted by BOOTP and DHCP), say Y
here. Note that if you want to use RARP, a RARP server must be
operating on your network. Read
- <file:Documentation/filesystems/nfs/nfsroot.txt> for details.
+ <file:Documentation/admin-guide/nfs/nfsroot.rst> for details.
config NET_IPIP
tristate "IP: tunneling"
@@ -303,6 +303,7 @@
config NET_IPVTI
tristate "Virtual (secure) IP: tunneling"
+ depends on IPV6 || IPV6=n
select INET_TUNNEL
select NET_IP_TUNNEL
select XFRM
diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c
index 574972b..2bf3abe 100644
--- a/net/ipv4/bpf_tcp_ca.c
+++ b/net/ipv4/bpf_tcp_ca.c
@@ -184,7 +184,6 @@ static int bpf_tcp_ca_init_member(const struct btf_type *t,
{
const struct tcp_congestion_ops *utcp_ca;
struct tcp_congestion_ops *tcp_ca;
- size_t tcp_ca_name_len;
int prog_fd;
u32 moff;
@@ -199,13 +198,11 @@ static int bpf_tcp_ca_init_member(const struct btf_type *t,
tcp_ca->flags = utcp_ca->flags;
return 1;
case offsetof(struct tcp_congestion_ops, name):
- tcp_ca_name_len = strnlen(utcp_ca->name, sizeof(utcp_ca->name));
- if (!tcp_ca_name_len ||
- tcp_ca_name_len == sizeof(utcp_ca->name))
+ if (bpf_obj_name_cpy(tcp_ca->name, utcp_ca->name,
+ sizeof(tcp_ca->name)) <= 0)
return -EINVAL;
if (tcp_ca_find(utcp_ca->name))
return -EEXIST;
- memcpy(tcp_ca->name, utcp_ca->name, sizeof(tcp_ca->name));
return 1;
}
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index 3768822..0bd10a1 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -1724,6 +1724,7 @@ void cipso_v4_error(struct sk_buff *skb, int error, u32 gateway)
{
unsigned char optbuf[sizeof(struct ip_options) + 40];
struct ip_options *opt = (struct ip_options *)optbuf;
+ int res;
if (ip_hdr(skb)->protocol == IPPROTO_ICMP || error != -EACCES)
return;
@@ -1735,7 +1736,11 @@ void cipso_v4_error(struct sk_buff *skb, int error, u32 gateway)
memset(opt, 0, sizeof(struct ip_options));
opt->optlen = ip_hdr(skb)->ihl*4 - sizeof(struct iphdr);
- if (__ip_options_compile(dev_net(skb->dev), opt, skb, NULL))
+ rcu_read_lock();
+ res = __ip_options_compile(dev_net(skb->dev), opt, skb, NULL);
+ rcu_read_unlock();
+
+ if (res)
return;
if (gateway)
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 577db1d..213be9c 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -997,7 +997,9 @@ static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
return -ENOENT;
}
+ rcu_read_lock();
err = fib_table_dump(tb, skb, cb, &filter);
+ rcu_read_unlock();
return skb->len ? : err;
}
diff --git a/net/ipv4/gre_demux.c b/net/ipv4/gre_demux.c
index 5fd6e8e..66fdbfe 100644
--- a/net/ipv4/gre_demux.c
+++ b/net/ipv4/gre_demux.c
@@ -56,7 +56,9 @@ int gre_del_protocol(const struct gre_protocol *proto, u8 version)
}
EXPORT_SYMBOL_GPL(gre_del_protocol);
-/* Fills in tpi and returns header length to be pulled. */
+/* Fills in tpi and returns header length to be pulled.
+ * Note that caller must use pskb_may_pull() before pulling GRE header.
+ */
int gre_parse_header(struct sk_buff *skb, struct tnl_ptk_info *tpi,
bool *csum_err, __be16 proto, int nhs)
{
@@ -110,8 +112,14 @@ int gre_parse_header(struct sk_buff *skb, struct tnl_ptk_info *tpi,
* - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
*/
if (greh->flags == 0 && tpi->proto == htons(ETH_P_WCCP)) {
+ u8 _val, *val;
+
+ val = skb_header_pointer(skb, nhs + hdr_len,
+ sizeof(_val), &_val);
+ if (!val)
+ return -EINVAL;
tpi->proto = proto;
- if ((*(u8 *)options & 0xF0) != 0x40)
+ if ((*val & 0xF0) != 0x40)
hdr_len += 4;
}
tpi->hdr_len = hdr_len;
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 18068ed..f369e7c 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -748,6 +748,39 @@ out:;
}
EXPORT_SYMBOL(__icmp_send);
+#if IS_ENABLED(CONFIG_NF_NAT)
+#include <net/netfilter/nf_conntrack.h>
+void icmp_ndo_send(struct sk_buff *skb_in, int type, int code, __be32 info)
+{
+ struct sk_buff *cloned_skb = NULL;
+ enum ip_conntrack_info ctinfo;
+ struct nf_conn *ct;
+ __be32 orig_ip;
+
+ ct = nf_ct_get(skb_in, &ctinfo);
+ if (!ct || !(ct->status & IPS_SRC_NAT)) {
+ icmp_send(skb_in, type, code, info);
+ return;
+ }
+
+ if (skb_shared(skb_in))
+ skb_in = cloned_skb = skb_clone(skb_in, GFP_ATOMIC);
+
+ if (unlikely(!skb_in || skb_network_header(skb_in) < skb_in->head ||
+ (skb_network_header(skb_in) + sizeof(struct iphdr)) >
+ skb_tail_pointer(skb_in) || skb_ensure_writable(skb_in,
+ skb_network_offset(skb_in) + sizeof(struct iphdr))))
+ goto out;
+
+ orig_ip = ip_hdr(skb_in)->saddr;
+ ip_hdr(skb_in)->saddr = ct->tuplehash[0].tuple.src.u3.ip;
+ icmp_send(skb_in, type, code, info);
+ ip_hdr(skb_in)->saddr = orig_ip;
+out:
+ consume_skb(cloned_skb);
+}
+EXPORT_SYMBOL(icmp_ndo_send);
+#endif
static void icmp_socket_deliver(struct sk_buff *skb, u32 info)
{
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index a4db79b..d545fb9 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -482,8 +482,28 @@ struct sock *inet_csk_accept(struct sock *sk, int flags, int *err, bool kern)
}
spin_unlock_bh(&queue->fastopenq.lock);
}
+
out:
release_sock(sk);
+ if (newsk && mem_cgroup_sockets_enabled) {
+ int amt;
+
+ /* atomically get the memory usage, set and charge the
+ * newsk->sk_memcg.
+ */
+ lock_sock(newsk);
+
+ /* The socket has not been accepted yet, no need to look at
+ * newsk->sk_wmem_queued.
+ */
+ amt = sk_mem_pages(newsk->sk_forward_alloc +
+ atomic_read(&newsk->sk_rmem_alloc));
+ mem_cgroup_sk_alloc(newsk);
+ if (newsk->sk_memcg && amt)
+ mem_cgroup_charge_skmem(newsk->sk_memcg, amt);
+
+ release_sock(newsk);
+ }
if (req)
reqsk_put(req);
return newsk;
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index f11e997..8c83775 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -100,13 +100,9 @@ static size_t inet_sk_attr_size(struct sock *sk,
aux = handler->idiag_get_aux_size(sk, net_admin);
return nla_total_size(sizeof(struct tcp_info))
- + nla_total_size(1) /* INET_DIAG_SHUTDOWN */
- + nla_total_size(1) /* INET_DIAG_TOS */
- + nla_total_size(1) /* INET_DIAG_TCLASS */
- + nla_total_size(4) /* INET_DIAG_MARK */
- + nla_total_size(4) /* INET_DIAG_CLASS_ID */
- + nla_total_size(sizeof(struct inet_diag_meminfo))
+ nla_total_size(sizeof(struct inet_diag_msg))
+ + inet_diag_msg_attrs_size()
+ + nla_total_size(sizeof(struct inet_diag_meminfo))
+ nla_total_size(SK_MEMINFO_VARS * sizeof(u32))
+ nla_total_size(TCP_CA_NAME_MAX)
+ nla_total_size(sizeof(struct tcpvegas_info))
@@ -147,6 +143,24 @@ int inet_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb,
if (net_admin && nla_put_u32(skb, INET_DIAG_MARK, sk->sk_mark))
goto errout;
+ if (ext & (1 << (INET_DIAG_CLASS_ID - 1)) ||
+ ext & (1 << (INET_DIAG_TCLASS - 1))) {
+ u32 classid = 0;
+
+#ifdef CONFIG_SOCK_CGROUP_DATA
+ classid = sock_cgroup_classid(&sk->sk_cgrp_data);
+#endif
+ /* Fallback to socket priority if class id isn't set.
+ * Classful qdiscs use it as direct reference to class.
+ * For cgroup2 classid is always zero.
+ */
+ if (!classid)
+ classid = sk->sk_priority;
+
+ if (nla_put_u32(skb, INET_DIAG_CLASS_ID, classid))
+ goto errout;
+ }
+
r->idiag_uid = from_kuid_munged(user_ns, sock_i_uid(sk));
r->idiag_inode = sock_i_ino(sk);
@@ -284,24 +298,6 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
goto errout;
}
- if (ext & (1 << (INET_DIAG_CLASS_ID - 1)) ||
- ext & (1 << (INET_DIAG_TCLASS - 1))) {
- u32 classid = 0;
-
-#ifdef CONFIG_SOCK_CGROUP_DATA
- classid = sock_cgroup_classid(&sk->sk_cgrp_data);
-#endif
- /* Fallback to socket priority if class id isn't set.
- * Classful qdiscs use it as direct reference to class.
- * For cgroup2 classid is always zero.
- */
- if (!classid)
- classid = sk->sk_priority;
-
- if (nla_put_u32(skb, INET_DIAG_CLASS_ID, classid))
- goto errout;
- }
-
out:
nlmsg_end(skb, nlh);
return 0;
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 8274f98c..029b24e 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -1153,6 +1153,24 @@ static int ipgre_netlink_parms(struct net_device *dev,
if (data[IFLA_GRE_FWMARK])
*fwmark = nla_get_u32(data[IFLA_GRE_FWMARK]);
+ return 0;
+}
+
+static int erspan_netlink_parms(struct net_device *dev,
+ struct nlattr *data[],
+ struct nlattr *tb[],
+ struct ip_tunnel_parm *parms,
+ __u32 *fwmark)
+{
+ struct ip_tunnel *t = netdev_priv(dev);
+ int err;
+
+ err = ipgre_netlink_parms(dev, data, tb, parms, fwmark);
+ if (err)
+ return err;
+ if (!data)
+ return 0;
+
if (data[IFLA_GRE_ERSPAN_VER]) {
t->erspan_ver = nla_get_u8(data[IFLA_GRE_ERSPAN_VER]);
@@ -1276,45 +1294,70 @@ static void ipgre_tap_setup(struct net_device *dev)
ip_tunnel_setup(dev, gre_tap_net_id);
}
-static int ipgre_newlink(struct net *src_net, struct net_device *dev,
- struct nlattr *tb[], struct nlattr *data[],
- struct netlink_ext_ack *extack)
+static int
+ipgre_newlink_encap_setup(struct net_device *dev, struct nlattr *data[])
{
- struct ip_tunnel_parm p;
struct ip_tunnel_encap ipencap;
- __u32 fwmark = 0;
- int err;
if (ipgre_netlink_encap_parms(data, &ipencap)) {
struct ip_tunnel *t = netdev_priv(dev);
- err = ip_tunnel_encap_setup(t, &ipencap);
+ int err = ip_tunnel_encap_setup(t, &ipencap);
if (err < 0)
return err;
}
+ return 0;
+}
+
+static int ipgre_newlink(struct net *src_net, struct net_device *dev,
+ struct nlattr *tb[], struct nlattr *data[],
+ struct netlink_ext_ack *extack)
+{
+ struct ip_tunnel_parm p;
+ __u32 fwmark = 0;
+ int err;
+
+ err = ipgre_newlink_encap_setup(dev, data);
+ if (err)
+ return err;
+
err = ipgre_netlink_parms(dev, data, tb, &p, &fwmark);
if (err < 0)
return err;
return ip_tunnel_newlink(dev, tb, &p, fwmark);
}
+static int erspan_newlink(struct net *src_net, struct net_device *dev,
+ struct nlattr *tb[], struct nlattr *data[],
+ struct netlink_ext_ack *extack)
+{
+ struct ip_tunnel_parm p;
+ __u32 fwmark = 0;
+ int err;
+
+ err = ipgre_newlink_encap_setup(dev, data);
+ if (err)
+ return err;
+
+ err = erspan_netlink_parms(dev, data, tb, &p, &fwmark);
+ if (err)
+ return err;
+ return ip_tunnel_newlink(dev, tb, &p, fwmark);
+}
+
static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
struct nlattr *data[],
struct netlink_ext_ack *extack)
{
struct ip_tunnel *t = netdev_priv(dev);
- struct ip_tunnel_encap ipencap;
__u32 fwmark = t->fwmark;
struct ip_tunnel_parm p;
int err;
- if (ipgre_netlink_encap_parms(data, &ipencap)) {
- err = ip_tunnel_encap_setup(t, &ipencap);
-
- if (err < 0)
- return err;
- }
+ err = ipgre_newlink_encap_setup(dev, data);
+ if (err)
+ return err;
err = ipgre_netlink_parms(dev, data, tb, &p, &fwmark);
if (err < 0)
@@ -1327,8 +1370,34 @@ static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
t->parms.i_flags = p.i_flags;
t->parms.o_flags = p.o_flags;
- if (strcmp(dev->rtnl_link_ops->kind, "erspan"))
- ipgre_link_update(dev, !tb[IFLA_MTU]);
+ ipgre_link_update(dev, !tb[IFLA_MTU]);
+
+ return 0;
+}
+
+static int erspan_changelink(struct net_device *dev, struct nlattr *tb[],
+ struct nlattr *data[],
+ struct netlink_ext_ack *extack)
+{
+ struct ip_tunnel *t = netdev_priv(dev);
+ __u32 fwmark = t->fwmark;
+ struct ip_tunnel_parm p;
+ int err;
+
+ err = ipgre_newlink_encap_setup(dev, data);
+ if (err)
+ return err;
+
+ err = erspan_netlink_parms(dev, data, tb, &p, &fwmark);
+ if (err < 0)
+ return err;
+
+ err = ip_tunnel_changelink(dev, tb, &p, fwmark);
+ if (err < 0)
+ return err;
+
+ t->parms.i_flags = p.i_flags;
+ t->parms.o_flags = p.o_flags;
return 0;
}
@@ -1519,8 +1588,8 @@ static struct rtnl_link_ops erspan_link_ops __read_mostly = {
.priv_size = sizeof(struct ip_tunnel),
.setup = erspan_setup,
.validate = erspan_validate,
- .newlink = ipgre_newlink,
- .changelink = ipgre_changelink,
+ .newlink = erspan_newlink,
+ .changelink = erspan_changelink,
.dellink = ip_tunnel_dellink,
.get_size = ipgre_get_size,
.fill_info = ipgre_fill_info,
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index 37cddd1..1b4e6f2 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -187,17 +187,39 @@ static netdev_tx_t vti_xmit(struct sk_buff *skb, struct net_device *dev,
int mtu;
if (!dst) {
- struct rtable *rt;
+ switch (skb->protocol) {
+ case htons(ETH_P_IP): {
+ struct rtable *rt;
- fl->u.ip4.flowi4_oif = dev->ifindex;
- fl->u.ip4.flowi4_flags |= FLOWI_FLAG_ANYSRC;
- rt = __ip_route_output_key(dev_net(dev), &fl->u.ip4);
- if (IS_ERR(rt)) {
+ fl->u.ip4.flowi4_oif = dev->ifindex;
+ fl->u.ip4.flowi4_flags |= FLOWI_FLAG_ANYSRC;
+ rt = __ip_route_output_key(dev_net(dev), &fl->u.ip4);
+ if (IS_ERR(rt)) {
+ dev->stats.tx_carrier_errors++;
+ goto tx_error_icmp;
+ }
+ dst = &rt->dst;
+ skb_dst_set(skb, dst);
+ break;
+ }
+#if IS_ENABLED(CONFIG_IPV6)
+ case htons(ETH_P_IPV6):
+ fl->u.ip6.flowi6_oif = dev->ifindex;
+ fl->u.ip6.flowi6_flags |= FLOWI_FLAG_ANYSRC;
+ dst = ip6_route_output(dev_net(dev), NULL, &fl->u.ip6);
+ if (dst->error) {
+ dst_release(dst);
+ dst = NULL;
+ dev->stats.tx_carrier_errors++;
+ goto tx_error_icmp;
+ }
+ skb_dst_set(skb, dst);
+ break;
+#endif
+ default:
dev->stats.tx_carrier_errors++;
goto tx_error_icmp;
}
- dst = &rt->dst;
- skb_dst_set(skb, dst);
}
dst_hold(dst);
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index 4438f6b..561f15b 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -1621,7 +1621,7 @@ late_initcall(ip_auto_config);
/*
* Decode any IP configuration options in the "ip=" or "nfsaddrs=" kernel
- * command line parameter. See Documentation/filesystems/nfs/nfsroot.txt.
+ * command line parameter. See Documentation/admin-guide/nfs/nfsroot.rst.
*/
static int __init ic_proto_name(char *name)
{
diff --git a/net/ipv4/raw_diag.c b/net/ipv4/raw_diag.c
index e35736b..a93e7d1 100644
--- a/net/ipv4/raw_diag.c
+++ b/net/ipv4/raw_diag.c
@@ -100,8 +100,9 @@ static int raw_diag_dump_one(struct sk_buff *in_skb,
if (IS_ERR(sk))
return PTR_ERR(sk);
- rep = nlmsg_new(sizeof(struct inet_diag_msg) +
- sizeof(struct inet_diag_meminfo) + 64,
+ rep = nlmsg_new(nla_total_size(sizeof(struct inet_diag_msg)) +
+ inet_diag_msg_attrs_size() +
+ nla_total_size(sizeof(struct inet_diag_meminfo)) + 64,
GFP_KERNEL);
if (!rep) {
sock_put(sk);
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index eb2d805..dc77c30 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2948,8 +2948,10 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
err = -EPERM;
else if (tp->repair_queue == TCP_SEND_QUEUE)
WRITE_ONCE(tp->write_seq, val);
- else if (tp->repair_queue == TCP_RECV_QUEUE)
+ else if (tp->repair_queue == TCP_RECV_QUEUE) {
WRITE_ONCE(tp->rcv_nxt, val);
+ WRITE_ONCE(tp->copied_seq, val);
+ }
else
err = -EINVAL;
break;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 316ebdf..6b6b570 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -6124,7 +6124,11 @@ static void tcp_rcv_synrecv_state_fastopen(struct sock *sk)
{
struct request_sock *req;
- tcp_try_undo_loss(sk, false);
+ /* If we are still handling the SYNACK RTO, see if timestamp ECR allows
+ * undo. If peer SACKs triggered fast recovery, we can't undo here.
+ */
+ if (inet_csk(sk)->icsk_ca_state == TCP_CA_Loss)
+ tcp_try_undo_loss(sk, false);
/* Reset rtx states to prevent spurious retransmits_timed_out() */
tcp_sk(sk)->retrans_stamp = 0;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 306e25d..2f45cde 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1109,6 +1109,10 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb,
if (unlikely(!skb))
return -ENOBUFS;
+ /* retransmit skbs might have a non zero value in skb->dev
+ * because skb->dev is aliased with skb->rbnode.rb_left
+ */
+ skb->dev = NULL;
}
inet = inet_sk(sk);
@@ -3037,8 +3041,12 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs)
tcp_skb_tsorted_save(skb) {
nskb = __pskb_copy(skb, MAX_TCP_HEADER, GFP_ATOMIC);
- err = nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) :
- -ENOBUFS;
+ if (nskb) {
+ nskb->dev = NULL;
+ err = tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC);
+ } else {
+ err = -ENOBUFS;
+ }
} tcp_skb_tsorted_restore(skb);
if (!err) {
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index db76b96..08a41f1 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1857,8 +1857,12 @@ int __udp_disconnect(struct sock *sk, int flags)
inet->inet_dport = 0;
sock_rps_reset_rxhash(sk);
sk->sk_bound_dev_if = 0;
- if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
+ if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK)) {
inet_reset_saddr(sk);
+ if (sk->sk_prot->rehash &&
+ (sk->sk_userlocks & SOCK_BINDPORT_LOCK))
+ sk->sk_prot->rehash(sk);
+ }
if (!(sk->sk_userlocks & SOCK_BINDPORT_LOCK)) {
sk->sk_prot->unhash(sk);
diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c
index 910555a..dccd2286 100644
--- a/net/ipv4/udp_diag.c
+++ b/net/ipv4/udp_diag.c
@@ -64,8 +64,9 @@ static int udp_dump_one(struct udp_table *tbl, struct sk_buff *in_skb,
goto out;
err = -ENOMEM;
- rep = nlmsg_new(sizeof(struct inet_diag_msg) +
- sizeof(struct inet_diag_meminfo) + 64,
+ rep = nlmsg_new(nla_total_size(sizeof(struct inet_diag_msg)) +
+ inet_diag_msg_attrs_size() +
+ nla_total_size(sizeof(struct inet_diag_meminfo)) + 64,
GFP_KERNEL);
if (!rep)
goto out;
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index cb493e1..46d614b 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -1226,11 +1226,13 @@ check_cleanup_prefix_route(struct inet6_ifaddr *ifp, unsigned long *expires)
}
static void
-cleanup_prefix_route(struct inet6_ifaddr *ifp, unsigned long expires, bool del_rt)
+cleanup_prefix_route(struct inet6_ifaddr *ifp, unsigned long expires,
+ bool del_rt, bool del_peer)
{
struct fib6_info *f6i;
- f6i = addrconf_get_prefix_route(&ifp->addr, ifp->prefix_len,
+ f6i = addrconf_get_prefix_route(del_peer ? &ifp->peer_addr : &ifp->addr,
+ ifp->prefix_len,
ifp->idev->dev, 0, RTF_DEFAULT, true);
if (f6i) {
if (del_rt)
@@ -1293,7 +1295,7 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp)
if (action != CLEANUP_PREFIX_RT_NOP) {
cleanup_prefix_route(ifp, expires,
- action == CLEANUP_PREFIX_RT_DEL);
+ action == CLEANUP_PREFIX_RT_DEL, false);
}
/* clean up prefsrc entries */
@@ -3345,6 +3347,10 @@ static void addrconf_dev_config(struct net_device *dev)
(dev->type != ARPHRD_NONE) &&
(dev->type != ARPHRD_RAWIP)) {
/* Alas, we support only Ethernet autoconfiguration. */
+ idev = __in6_dev_get(dev);
+ if (!IS_ERR_OR_NULL(idev) && dev->flags & IFF_UP &&
+ dev->flags & IFF_MULTICAST)
+ ipv6_mc_up(idev);
return;
}
@@ -4586,12 +4592,14 @@ inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh,
}
static int modify_prefix_route(struct inet6_ifaddr *ifp,
- unsigned long expires, u32 flags)
+ unsigned long expires, u32 flags,
+ bool modify_peer)
{
struct fib6_info *f6i;
u32 prio;
- f6i = addrconf_get_prefix_route(&ifp->addr, ifp->prefix_len,
+ f6i = addrconf_get_prefix_route(modify_peer ? &ifp->peer_addr : &ifp->addr,
+ ifp->prefix_len,
ifp->idev->dev, 0, RTF_DEFAULT, true);
if (!f6i)
return -ENOENT;
@@ -4602,7 +4610,8 @@ static int modify_prefix_route(struct inet6_ifaddr *ifp,
ip6_del_rt(dev_net(ifp->idev->dev), f6i);
/* add new one */
- addrconf_prefix_route(&ifp->addr, ifp->prefix_len,
+ addrconf_prefix_route(modify_peer ? &ifp->peer_addr : &ifp->addr,
+ ifp->prefix_len,
ifp->rt_priority, ifp->idev->dev,
expires, flags, GFP_KERNEL);
} else {
@@ -4624,6 +4633,7 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, struct ifa6_config *cfg)
unsigned long timeout;
bool was_managetempaddr;
bool had_prefixroute;
+ bool new_peer = false;
ASSERT_RTNL();
@@ -4655,6 +4665,13 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, struct ifa6_config *cfg)
cfg->preferred_lft = timeout;
}
+ if (cfg->peer_pfx &&
+ memcmp(&ifp->peer_addr, cfg->peer_pfx, sizeof(struct in6_addr))) {
+ if (!ipv6_addr_any(&ifp->peer_addr))
+ cleanup_prefix_route(ifp, expires, true, true);
+ new_peer = true;
+ }
+
spin_lock_bh(&ifp->lock);
was_managetempaddr = ifp->flags & IFA_F_MANAGETEMPADDR;
had_prefixroute = ifp->flags & IFA_F_PERMANENT &&
@@ -4670,6 +4687,9 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, struct ifa6_config *cfg)
if (cfg->rt_priority && cfg->rt_priority != ifp->rt_priority)
ifp->rt_priority = cfg->rt_priority;
+ if (new_peer)
+ ifp->peer_addr = *cfg->peer_pfx;
+
spin_unlock_bh(&ifp->lock);
if (!(ifp->flags&IFA_F_TENTATIVE))
ipv6_ifa_notify(0, ifp);
@@ -4678,7 +4698,7 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, struct ifa6_config *cfg)
int rc = -ENOENT;
if (had_prefixroute)
- rc = modify_prefix_route(ifp, expires, flags);
+ rc = modify_prefix_route(ifp, expires, flags, false);
/* prefix route could have been deleted; if so restore it */
if (rc == -ENOENT) {
@@ -4686,6 +4706,15 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, struct ifa6_config *cfg)
ifp->rt_priority, ifp->idev->dev,
expires, flags, GFP_KERNEL);
}
+
+ if (had_prefixroute && !ipv6_addr_any(&ifp->peer_addr))
+ rc = modify_prefix_route(ifp, expires, flags, true);
+
+ if (rc == -ENOENT && !ipv6_addr_any(&ifp->peer_addr)) {
+ addrconf_prefix_route(&ifp->peer_addr, ifp->prefix_len,
+ ifp->rt_priority, ifp->idev->dev,
+ expires, flags, GFP_KERNEL);
+ }
} else if (had_prefixroute) {
enum cleanup_prefix_rt_t action;
unsigned long rt_expires;
@@ -4696,7 +4725,7 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, struct ifa6_config *cfg)
if (action != CLEANUP_PREFIX_RT_NOP) {
cleanup_prefix_route(ifp, rt_expires,
- action == CLEANUP_PREFIX_RT_DEL);
+ action == CLEANUP_PREFIX_RT_DEL, false);
}
}
@@ -5983,9 +6012,9 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
if (ifp->idev->cnf.forwarding)
addrconf_join_anycast(ifp);
if (!ipv6_addr_any(&ifp->peer_addr))
- addrconf_prefix_route(&ifp->peer_addr, 128, 0,
- ifp->idev->dev, 0, 0,
- GFP_ATOMIC);
+ addrconf_prefix_route(&ifp->peer_addr, 128,
+ ifp->rt_priority, ifp->idev->dev,
+ 0, 0, GFP_ATOMIC);
break;
case RTM_DELADDR:
if (ifp->idev->cnf.forwarding)
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 58fbde2..72abf89 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -1102,8 +1102,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt,
found++;
break;
}
- if (rt_can_ecmp)
- fallback_ins = fallback_ins ?: ins;
+ fallback_ins = fallback_ins ?: ins;
goto next_iter;
}
@@ -1146,7 +1145,9 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt,
}
if (fallback_ins && !found) {
- /* No ECMP-able route found, replace first non-ECMP one */
+ /* No matching route with same ecmp-able-ness found, replace
+ * first matching route
+ */
ins = fallback_ins;
iter = rcu_dereference_protected(*ins,
lockdep_is_held(&rt->fib6_table->tb6_lock));
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 55bfc51..781ca8c 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -437,8 +437,6 @@ static int ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
return -ENOENT;
switch (type) {
- struct ipv6_tlv_tnl_enc_lim *tel;
- __u32 teli;
case ICMPV6_DEST_UNREACH:
net_dbg_ratelimited("%s: Path to destination invalid or inactive!\n",
t->parms.name);
@@ -452,7 +450,10 @@ static int ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
break;
}
return 0;
- case ICMPV6_PARAMPROB:
+ case ICMPV6_PARAMPROB: {
+ struct ipv6_tlv_tnl_enc_lim *tel;
+ __u32 teli;
+
teli = 0;
if (code == ICMPV6_HDR_FIELD)
teli = ip6_tnl_parse_tlv_enc_lim(skb, skb->data);
@@ -468,6 +469,7 @@ static int ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
t->parms.name);
}
return 0;
+ }
case ICMPV6_PKT_TOOBIG:
ip6_update_pmtu(skb, net, info, 0, 0, sock_net_uid(net, NULL));
return 0;
diff --git a/net/ipv6/ip6_icmp.c b/net/ipv6/ip6_icmp.c
index 0204549..e008675 100644
--- a/net/ipv6/ip6_icmp.c
+++ b/net/ipv6/ip6_icmp.c
@@ -45,4 +45,38 @@ void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
rcu_read_unlock();
}
EXPORT_SYMBOL(icmpv6_send);
+
+#if IS_ENABLED(CONFIG_NF_NAT)
+#include <net/netfilter/nf_conntrack.h>
+void icmpv6_ndo_send(struct sk_buff *skb_in, u8 type, u8 code, __u32 info)
+{
+ struct sk_buff *cloned_skb = NULL;
+ enum ip_conntrack_info ctinfo;
+ struct in6_addr orig_ip;
+ struct nf_conn *ct;
+
+ ct = nf_ct_get(skb_in, &ctinfo);
+ if (!ct || !(ct->status & IPS_SRC_NAT)) {
+ icmpv6_send(skb_in, type, code, info);
+ return;
+ }
+
+ if (skb_shared(skb_in))
+ skb_in = cloned_skb = skb_clone(skb_in, GFP_ATOMIC);
+
+ if (unlikely(!skb_in || skb_network_header(skb_in) < skb_in->head ||
+ (skb_network_header(skb_in) + sizeof(struct ipv6hdr)) >
+ skb_tail_pointer(skb_in) || skb_ensure_writable(skb_in,
+ skb_network_offset(skb_in) + sizeof(struct ipv6hdr))))
+ goto out;
+
+ orig_ip = ipv6_hdr(skb_in)->saddr;
+ ipv6_hdr(skb_in)->saddr = ct->tuplehash[0].tuple.src.u3.in6;
+ icmpv6_send(skb_in, type, code, info);
+ ipv6_hdr(skb_in)->saddr = orig_ip;
+out:
+ consume_skb(cloned_skb);
+}
+EXPORT_SYMBOL(icmpv6_ndo_send);
+#endif
#endif
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index b5dd20c..4703b09 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -121,6 +121,7 @@ static struct net_device_stats *ip6_get_stats(struct net_device *dev)
/**
* ip6_tnl_lookup - fetch tunnel matching the end-point addresses
+ * @link: ifindex of underlying interface
* @remote: the address of the tunnel exit-point
* @local: the address of the tunnel entry-point
*
@@ -134,37 +135,56 @@ static struct net_device_stats *ip6_get_stats(struct net_device *dev)
for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
static struct ip6_tnl *
-ip6_tnl_lookup(struct net *net, const struct in6_addr *remote, const struct in6_addr *local)
+ip6_tnl_lookup(struct net *net, int link,
+ const struct in6_addr *remote, const struct in6_addr *local)
{
unsigned int hash = HASH(remote, local);
- struct ip6_tnl *t;
+ struct ip6_tnl *t, *cand = NULL;
struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
struct in6_addr any;
for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[hash]) {
- if (ipv6_addr_equal(local, &t->parms.laddr) &&
- ipv6_addr_equal(remote, &t->parms.raddr) &&
- (t->dev->flags & IFF_UP))
+ if (!ipv6_addr_equal(local, &t->parms.laddr) ||
+ !ipv6_addr_equal(remote, &t->parms.raddr) ||
+ !(t->dev->flags & IFF_UP))
+ continue;
+
+ if (link == t->parms.link)
return t;
+ else
+ cand = t;
}
memset(&any, 0, sizeof(any));
hash = HASH(&any, local);
for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[hash]) {
- if (ipv6_addr_equal(local, &t->parms.laddr) &&
- ipv6_addr_any(&t->parms.raddr) &&
- (t->dev->flags & IFF_UP))
+ if (!ipv6_addr_equal(local, &t->parms.laddr) ||
+ !ipv6_addr_any(&t->parms.raddr) ||
+ !(t->dev->flags & IFF_UP))
+ continue;
+
+ if (link == t->parms.link)
return t;
+ else if (!cand)
+ cand = t;
}
hash = HASH(remote, &any);
for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[hash]) {
- if (ipv6_addr_equal(remote, &t->parms.raddr) &&
- ipv6_addr_any(&t->parms.laddr) &&
- (t->dev->flags & IFF_UP))
+ if (!ipv6_addr_equal(remote, &t->parms.raddr) ||
+ !ipv6_addr_any(&t->parms.laddr) ||
+ !(t->dev->flags & IFF_UP))
+ continue;
+
+ if (link == t->parms.link)
return t;
+ else if (!cand)
+ cand = t;
}
+ if (cand)
+ return cand;
+
t = rcu_dereference(ip6n->collect_md_tun);
if (t && t->dev->flags & IFF_UP)
return t;
@@ -351,7 +371,8 @@ static struct ip6_tnl *ip6_tnl_locate(struct net *net,
(t = rtnl_dereference(*tp)) != NULL;
tp = &t->next) {
if (ipv6_addr_equal(local, &t->parms.laddr) &&
- ipv6_addr_equal(remote, &t->parms.raddr)) {
+ ipv6_addr_equal(remote, &t->parms.raddr) &&
+ p->link == t->parms.link) {
if (create)
return ERR_PTR(-EEXIST);
@@ -485,7 +506,7 @@ ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt,
processing of the error. */
rcu_read_lock();
- t = ip6_tnl_lookup(dev_net(skb->dev), &ipv6h->daddr, &ipv6h->saddr);
+ t = ip6_tnl_lookup(dev_net(skb->dev), skb->dev->ifindex, &ipv6h->daddr, &ipv6h->saddr);
if (!t)
goto out;
@@ -496,8 +517,6 @@ ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt,
err = 0;
switch (*type) {
- struct ipv6_tlv_tnl_enc_lim *tel;
- __u32 mtu, teli;
case ICMPV6_DEST_UNREACH:
net_dbg_ratelimited("%s: Path to destination invalid or inactive!\n",
t->parms.name);
@@ -510,7 +529,10 @@ ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt,
rel_msg = 1;
}
break;
- case ICMPV6_PARAMPROB:
+ case ICMPV6_PARAMPROB: {
+ struct ipv6_tlv_tnl_enc_lim *tel;
+ __u32 teli;
+
teli = 0;
if ((*code) == ICMPV6_HDR_FIELD)
teli = ip6_tnl_parse_tlv_enc_lim(skb, skb->data);
@@ -527,7 +549,10 @@ ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt,
t->parms.name);
}
break;
- case ICMPV6_PKT_TOOBIG:
+ }
+ case ICMPV6_PKT_TOOBIG: {
+ __u32 mtu;
+
ip6_update_pmtu(skb, net, htonl(*info), 0, 0,
sock_net_uid(net, NULL));
mtu = *info - offset;
@@ -541,6 +566,7 @@ ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt,
rel_msg = 1;
}
break;
+ }
case NDISC_REDIRECT:
ip6_redirect(skb, net, skb->dev->ifindex, 0,
sock_net_uid(net, NULL));
@@ -887,7 +913,7 @@ static int ipxip6_rcv(struct sk_buff *skb, u8 ipproto,
int ret = -1;
rcu_read_lock();
- t = ip6_tnl_lookup(dev_net(skb->dev), &ipv6h->saddr, &ipv6h->daddr);
+ t = ip6_tnl_lookup(dev_net(skb->dev), skb->dev->ifindex, &ipv6h->saddr, &ipv6h->daddr);
if (t) {
u8 tproto = READ_ONCE(t->parms.proto);
@@ -1420,8 +1446,10 @@ ip6_tnl_start_xmit(struct sk_buff *skb, struct net_device *dev)
static void ip6_tnl_link_config(struct ip6_tnl *t)
{
struct net_device *dev = t->dev;
+ struct net_device *tdev = NULL;
struct __ip6_tnl_parm *p = &t->parms;
struct flowi6 *fl6 = &t->fl.u.ip6;
+ unsigned int mtu;
int t_hlen;
memcpy(dev->dev_addr, &p->laddr, sizeof(struct in6_addr));
@@ -1457,22 +1485,25 @@ static void ip6_tnl_link_config(struct ip6_tnl *t)
struct rt6_info *rt = rt6_lookup(t->net,
&p->raddr, &p->laddr,
p->link, NULL, strict);
+ if (rt) {
+ tdev = rt->dst.dev;
+ ip6_rt_put(rt);
+ }
- if (!rt)
- return;
+ if (!tdev && p->link)
+ tdev = __dev_get_by_index(t->net, p->link);
- if (rt->dst.dev) {
- dev->hard_header_len = rt->dst.dev->hard_header_len +
- t_hlen;
+ if (tdev) {
+ dev->hard_header_len = tdev->hard_header_len + t_hlen;
+ mtu = min_t(unsigned int, tdev->mtu, IP6_MAX_MTU);
- dev->mtu = rt->dst.dev->mtu - t_hlen;
+ dev->mtu = mtu - t_hlen;
if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
dev->mtu -= 8;
if (dev->mtu < IPV6_MIN_MTU)
dev->mtu = IPV6_MIN_MTU;
}
- ip6_rt_put(rt);
}
}
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index 524006a..cc6180e 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -311,7 +311,7 @@ static int vti6_rcv(struct sk_buff *skb)
if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
rcu_read_unlock();
- return 0;
+ goto discard;
}
ipv6h = ipv6_hdr(skb);
@@ -450,15 +450,33 @@ vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl)
int mtu;
if (!dst) {
- fl->u.ip6.flowi6_oif = dev->ifindex;
- fl->u.ip6.flowi6_flags |= FLOWI_FLAG_ANYSRC;
- dst = ip6_route_output(dev_net(dev), NULL, &fl->u.ip6);
- if (dst->error) {
- dst_release(dst);
- dst = NULL;
+ switch (skb->protocol) {
+ case htons(ETH_P_IP): {
+ struct rtable *rt;
+
+ fl->u.ip4.flowi4_oif = dev->ifindex;
+ fl->u.ip4.flowi4_flags |= FLOWI_FLAG_ANYSRC;
+ rt = __ip_route_output_key(dev_net(dev), &fl->u.ip4);
+ if (IS_ERR(rt))
+ goto tx_err_link_failure;
+ dst = &rt->dst;
+ skb_dst_set(skb, dst);
+ break;
+ }
+ case htons(ETH_P_IPV6):
+ fl->u.ip6.flowi6_oif = dev->ifindex;
+ fl->u.ip6.flowi6_flags |= FLOWI_FLAG_ANYSRC;
+ dst = ip6_route_output(dev_net(dev), NULL, &fl->u.ip6);
+ if (dst->error) {
+ dst_release(dst);
+ dst = NULL;
+ goto tx_err_link_failure;
+ }
+ skb_dst_set(skb, dst);
+ break;
+ default:
goto tx_err_link_failure;
}
- skb_dst_set(skb, dst);
}
dst_hold(dst);
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 79fc012..debdaeb 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -183,9 +183,15 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
retv = -EBUSY;
break;
}
- } else if (sk->sk_protocol != IPPROTO_TCP)
+ } else if (sk->sk_protocol == IPPROTO_TCP) {
+ if (sk->sk_prot != &tcpv6_prot) {
+ retv = -EBUSY;
+ break;
+ }
break;
-
+ } else {
+ break;
+ }
if (sk->sk_state != TCP_ESTABLISHED) {
retv = -ENOTCONN;
break;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 4fbdc60..2931224 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -5198,6 +5198,7 @@ static int ip6_route_multipath_add(struct fib6_config *cfg,
*/
cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
NLM_F_REPLACE);
+ cfg->fc_nlinfo.nlh->nlmsg_flags |= NLM_F_CREATE;
nhn++;
}
diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c
index ab7f124..8c52efe 100644
--- a/net/ipv6/seg6_iptunnel.c
+++ b/net/ipv6/seg6_iptunnel.c
@@ -268,7 +268,7 @@ static int seg6_do_srh(struct sk_buff *skb)
skb_mac_header_rebuild(skb);
skb_push(skb, skb->mac_len);
- err = seg6_do_srh_encap(skb, tinfo->srh, NEXTHDR_NONE);
+ err = seg6_do_srh_encap(skb, tinfo->srh, IPPROTO_ETHERNET);
if (err)
return err;
diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c
index 7cbc197..8165802 100644
--- a/net/ipv6/seg6_local.c
+++ b/net/ipv6/seg6_local.c
@@ -282,7 +282,7 @@ static int input_action_end_dx2(struct sk_buff *skb,
struct net_device *odev;
struct ethhdr *eth;
- if (!decap_and_validate(skb, NEXTHDR_NONE))
+ if (!decap_and_validate(skb, IPPROTO_ETHERNET))
goto drop;
if (!pskb_may_pull(skb, ETH_HLEN))
diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c
index e11bdb0..25b7ebd 100644
--- a/net/ipv6/xfrm6_tunnel.c
+++ b/net/ipv6/xfrm6_tunnel.c
@@ -78,7 +78,7 @@ static struct xfrm6_tunnel_spi *__xfrm6_tunnel_spi_lookup(struct net *net, const
hlist_for_each_entry_rcu(x6spi,
&xfrm6_tn->spi_byaddr[xfrm6_tunnel_spi_hash_byaddr(saddr)],
- list_byaddr) {
+ list_byaddr, lockdep_is_held(&xfrm6_tunnel_spi_lock)) {
if (xfrm6_addr_equal(&x6spi->addr, saddr))
return x6spi;
}
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 000c742..6aee699 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -3450,7 +3450,7 @@ int ieee80211_attach_ack_skb(struct ieee80211_local *local, struct sk_buff *skb,
spin_lock_irqsave(&local->ack_status_lock, spin_flags);
id = idr_alloc(&local->ack_status_frames, ack_skb,
- 1, 0x40, GFP_ATOMIC);
+ 1, 0x2000, GFP_ATOMIC);
spin_unlock_irqrestore(&local->ack_status_lock, spin_flags);
if (id < 0) {
diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c
index c80b1e1..3419ed6 100644
--- a/net/mac80211/debugfs_sta.c
+++ b/net/mac80211/debugfs_sta.c
@@ -5,7 +5,7 @@
* Copyright 2007 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright(c) 2016 Intel Deutschland GmbH
- * Copyright (C) 2018 - 2019 Intel Corporation
+ * Copyright (C) 2018 - 2020 Intel Corporation
*/
#include <linux/debugfs.h>
@@ -78,6 +78,7 @@ static const char * const sta_flag_names[] = {
FLAG(MPSP_OWNER),
FLAG(MPSP_RECIPIENT),
FLAG(PS_DELIVER),
+ FLAG(USES_ENCRYPTION),
#undef FLAG
};
diff --git a/net/mac80211/key.c b/net/mac80211/key.c
index 0f889b9..efc1acc 100644
--- a/net/mac80211/key.c
+++ b/net/mac80211/key.c
@@ -6,7 +6,7 @@
* Copyright 2007-2008 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright 2015-2017 Intel Deutschland GmbH
- * Copyright 2018-2019 Intel Corporation
+ * Copyright 2018-2020 Intel Corporation
*/
#include <linux/if_ether.h>
@@ -262,22 +262,29 @@ static void ieee80211_key_disable_hw_accel(struct ieee80211_key *key)
sta ? sta->sta.addr : bcast_addr, ret);
}
-int ieee80211_set_tx_key(struct ieee80211_key *key)
+static int _ieee80211_set_tx_key(struct ieee80211_key *key, bool force)
{
struct sta_info *sta = key->sta;
struct ieee80211_local *local = key->local;
assert_key_lock(local);
+ set_sta_flag(sta, WLAN_STA_USES_ENCRYPTION);
+
sta->ptk_idx = key->conf.keyidx;
- if (!ieee80211_hw_check(&local->hw, AMPDU_KEYBORDER_SUPPORT))
+ if (force || !ieee80211_hw_check(&local->hw, AMPDU_KEYBORDER_SUPPORT))
clear_sta_flag(sta, WLAN_STA_BLOCK_BA);
ieee80211_check_fast_xmit(sta);
return 0;
}
+int ieee80211_set_tx_key(struct ieee80211_key *key)
+{
+ return _ieee80211_set_tx_key(key, false);
+}
+
static void ieee80211_pairwise_rekey(struct ieee80211_key *old,
struct ieee80211_key *new)
{
@@ -441,11 +448,8 @@ static int ieee80211_key_replace(struct ieee80211_sub_if_data *sdata,
if (pairwise) {
rcu_assign_pointer(sta->ptk[idx], new);
if (new &&
- !(new->conf.flags & IEEE80211_KEY_FLAG_NO_AUTO_TX)) {
- sta->ptk_idx = idx;
- clear_sta_flag(sta, WLAN_STA_BLOCK_BA);
- ieee80211_check_fast_xmit(sta);
- }
+ !(new->conf.flags & IEEE80211_KEY_FLAG_NO_AUTO_TX))
+ _ieee80211_set_tx_key(new, true);
} else {
rcu_assign_pointer(sta->gtk[idx], new);
}
diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c
index d699833..38a0383 100644
--- a/net/mac80211/mesh_hwmp.c
+++ b/net/mac80211/mesh_hwmp.c
@@ -1152,7 +1152,8 @@ int mesh_nexthop_resolve(struct ieee80211_sub_if_data *sdata,
}
}
- if (!(mpath->flags & MESH_PATH_RESOLVING))
+ if (!(mpath->flags & MESH_PATH_RESOLVING) &&
+ mesh_path_sel_is_hwmp(sdata))
mesh_queue_preq(mpath, PREQ_Q_F_START);
if (skb_queue_len(&mpath->frame_queue) >= MESH_FRAME_QUEUE_LEN)
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 5fa1317..88d7a69 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -8,7 +8,7 @@
* Copyright 2007, Michael Wu <flamingice@sourmilk.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright (C) 2015 - 2017 Intel Deutschland GmbH
- * Copyright (C) 2018 - 2019 Intel Corporation
+ * Copyright (C) 2018 - 2020 Intel Corporation
*/
#include <linux/delay.h>
@@ -1311,7 +1311,7 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata,
if (!res) {
ch_switch.timestamp = timestamp;
ch_switch.device_timestamp = device_timestamp;
- ch_switch.block_tx = beacon ? csa_ie.mode : 0;
+ ch_switch.block_tx = csa_ie.mode;
ch_switch.chandef = csa_ie.chandef;
ch_switch.count = csa_ie.count;
ch_switch.delay = csa_ie.max_switch_time;
@@ -1404,7 +1404,7 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata,
sdata->vif.csa_active = true;
sdata->csa_chandef = csa_ie.chandef;
- sdata->csa_block_tx = ch_switch.block_tx;
+ sdata->csa_block_tx = csa_ie.mode;
ifmgd->csa_ignored_same_chan = false;
if (sdata->csa_block_tx)
@@ -1438,7 +1438,7 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata,
* reset when the disconnection worker runs.
*/
sdata->vif.csa_active = true;
- sdata->csa_block_tx = ch_switch.block_tx;
+ sdata->csa_block_tx = csa_ie.mode;
ieee80211_queue_work(&local->hw, &ifmgd->csa_connection_drop_work);
mutex_unlock(&local->chanctx_mtx);
@@ -2959,7 +2959,7 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata,
(auth_transaction == 2 &&
ifmgd->auth_data->expected_transaction == 2)) {
if (!ieee80211_mark_sta_auth(sdata, bssid))
- goto out_err;
+ return; /* ignore frame -- wait for timeout */
} else if (ifmgd->auth_data->algorithm == WLAN_AUTH_SAE &&
auth_transaction == 2) {
sdata_info(sdata, "SAE peer confirmed\n");
@@ -2967,10 +2967,6 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata,
}
cfg80211_rx_mlme_mgmt(sdata->dev, (u8 *)mgmt, len);
- return;
- out_err:
- mutex_unlock(&sdata->local->sta_mtx);
- /* ignore frame -- wait for timeout */
}
#define case_WLAN(type) \
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 0e05ff0..0ba98ad 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -4114,7 +4114,7 @@ void __ieee80211_check_fast_rx_iface(struct ieee80211_sub_if_data *sdata)
lockdep_assert_held(&local->sta_mtx);
- list_for_each_entry_rcu(sta, &local->sta_list, list) {
+ list_for_each_entry(sta, &local->sta_list, list) {
if (sdata != sta->sdata &&
(!sta->sdata->bss || sta->sdata->bss != sdata->bss))
continue;
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 0f5f406..e3572be 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -4,7 +4,7 @@
* Copyright 2006-2007 Jiri Benc <jbenc@suse.cz>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright (C) 2015 - 2017 Intel Deutschland GmbH
- * Copyright (C) 2018-2019 Intel Corporation
+ * Copyright (C) 2018-2020 Intel Corporation
*/
#include <linux/module.h>
@@ -1049,6 +1049,11 @@ static void __sta_info_destroy_part2(struct sta_info *sta)
might_sleep();
lockdep_assert_held(&local->sta_mtx);
+ while (sta->sta_state == IEEE80211_STA_AUTHORIZED) {
+ ret = sta_info_move_state(sta, IEEE80211_STA_ASSOC);
+ WARN_ON_ONCE(ret);
+ }
+
/* now keys can no longer be reached */
ieee80211_free_sta_keys(local, sta);
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index c00e285..552eed3 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -98,6 +98,7 @@ enum ieee80211_sta_info_flags {
WLAN_STA_MPSP_OWNER,
WLAN_STA_MPSP_RECIPIENT,
WLAN_STA_PS_DELIVER,
+ WLAN_STA_USES_ENCRYPTION,
NUM_WLAN_STA_FLAGS,
};
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 4bd1faf..d9cca6d 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -5,7 +5,7 @@
* Copyright 2006-2007 Jiri Benc <jbenc@suse.cz>
* Copyright 2007 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
- * Copyright (C) 2018 Intel Corporation
+ * Copyright (C) 2018, 2020 Intel Corporation
*
* Transmit and frame generation functions.
*/
@@ -590,10 +590,13 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx)
struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb);
struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)tx->skb->data;
- if (unlikely(info->flags & IEEE80211_TX_INTFL_DONT_ENCRYPT))
+ if (unlikely(info->flags & IEEE80211_TX_INTFL_DONT_ENCRYPT)) {
tx->key = NULL;
- else if (tx->sta &&
- (key = rcu_dereference(tx->sta->ptk[tx->sta->ptk_idx])))
+ return TX_CONTINUE;
+ }
+
+ if (tx->sta &&
+ (key = rcu_dereference(tx->sta->ptk[tx->sta->ptk_idx])))
tx->key = key;
else if (ieee80211_is_group_privacy_action(tx->skb) &&
(key = rcu_dereference(tx->sdata->default_multicast_key)))
@@ -654,6 +657,9 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx)
if (!skip_hw && tx->key &&
tx->key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE)
info->control.hw_key = &tx->key->conf;
+ } else if (!ieee80211_is_mgmt(hdr->frame_control) && tx->sta &&
+ test_sta_flag(tx->sta, WLAN_STA_USES_ENCRYPTION)) {
+ return TX_DROP;
}
return TX_CONTINUE;
@@ -2442,7 +2448,7 @@ static int ieee80211_store_ack_skb(struct ieee80211_local *local,
spin_lock_irqsave(&local->ack_status_lock, flags);
id = idr_alloc(&local->ack_status_frames, ack_skb,
- 1, 0x40, GFP_ATOMIC);
+ 1, 0x2000, GFP_ATOMIC);
spin_unlock_irqrestore(&local->ack_status_lock, flags);
if (id >= 0) {
@@ -3598,8 +3604,25 @@ struct sk_buff *ieee80211_tx_dequeue(struct ieee80211_hw *hw,
tx.skb = skb;
tx.sdata = vif_to_sdata(info->control.vif);
- if (txq->sta)
+ if (txq->sta) {
tx.sta = container_of(txq->sta, struct sta_info, sta);
+ /*
+ * Drop unicast frames to unauthorised stations unless they are
+ * EAPOL frames from the local station.
+ */
+ if (unlikely(!ieee80211_vif_is_mesh(&tx.sdata->vif) &&
+ tx.sdata->vif.type != NL80211_IFTYPE_OCB &&
+ !is_multicast_ether_addr(hdr->addr1) &&
+ !test_sta_flag(tx.sta, WLAN_STA_AUTHORIZED) &&
+ (!(info->control.flags &
+ IEEE80211_TX_CTRL_PORT_CTRL_PROTO) ||
+ !ether_addr_equal(tx.sdata->vif.addr,
+ hdr->addr2)))) {
+ I802_DEBUG_INC(local->tx_handlers_drop_unauth_port);
+ ieee80211_free_txskb(&local->hw, skb);
+ goto begin;
+ }
+ }
/*
* The key can be removed while the packet was queued, so need to call
@@ -5126,6 +5149,7 @@ int ieee80211_tx_control_port(struct wiphy *wiphy, struct net_device *dev,
struct ieee80211_local *local = sdata->local;
struct sk_buff *skb;
struct ethhdr *ehdr;
+ u32 ctrl_flags = 0;
u32 flags;
/* Only accept CONTROL_PORT_PROTOCOL configured in CONNECT/ASSOCIATE
@@ -5135,6 +5159,9 @@ int ieee80211_tx_control_port(struct wiphy *wiphy, struct net_device *dev,
proto != cpu_to_be16(ETH_P_PREAUTH))
return -EINVAL;
+ if (proto == sdata->control_port_protocol)
+ ctrl_flags |= IEEE80211_TX_CTRL_PORT_CTRL_PROTO;
+
if (unencrypted)
flags = IEEE80211_TX_INTFL_DONT_ENCRYPT;
else
@@ -5160,7 +5187,7 @@ int ieee80211_tx_control_port(struct wiphy *wiphy, struct net_device *dev,
skb_reset_mac_header(skb);
local_bh_disable();
- __ieee80211_subif_start_xmit(skb, skb->dev, flags, 0);
+ __ieee80211_subif_start_xmit(skb, skb->dev, flags, ctrl_flags);
local_bh_enable();
return 0;
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 32a7a53..decd46b3 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -1063,16 +1063,22 @@ _ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action,
elem_parse_failed = true;
break;
case WLAN_EID_VHT_OPERATION:
- if (elen >= sizeof(struct ieee80211_vht_operation))
+ if (elen >= sizeof(struct ieee80211_vht_operation)) {
elems->vht_operation = (void *)pos;
- else
- elem_parse_failed = true;
+ if (calc_crc)
+ crc = crc32_be(crc, pos - 2, elen + 2);
+ break;
+ }
+ elem_parse_failed = true;
break;
case WLAN_EID_OPMODE_NOTIF:
- if (elen > 0)
+ if (elen > 0) {
elems->opmode_notif = pos;
- else
- elem_parse_failed = true;
+ if (calc_crc)
+ crc = crc32_be(crc, pos - 2, elen + 2);
+ break;
+ }
+ elem_parse_failed = true;
break;
case WLAN_EID_MESH_ID:
elems->mesh_id = pos;
@@ -2987,10 +2993,22 @@ bool ieee80211_chandef_vht_oper(struct ieee80211_hw *hw,
int cf0, cf1;
int ccfs0, ccfs1, ccfs2;
int ccf0, ccf1;
+ u32 vht_cap;
+ bool support_80_80 = false;
+ bool support_160 = false;
if (!oper || !htop)
return false;
+ vht_cap = hw->wiphy->bands[chandef->chan->band]->vht_cap.cap;
+ support_160 = (vht_cap & (IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK |
+ IEEE80211_VHT_CAP_EXT_NSS_BW_MASK));
+ support_80_80 = ((vht_cap &
+ IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ) ||
+ (vht_cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ &&
+ vht_cap & IEEE80211_VHT_CAP_EXT_NSS_BW_MASK) ||
+ ((vht_cap & IEEE80211_VHT_CAP_EXT_NSS_BW_MASK) >>
+ IEEE80211_VHT_CAP_EXT_NSS_BW_SHIFT > 1));
ccfs0 = oper->center_freq_seg0_idx;
ccfs1 = oper->center_freq_seg1_idx;
ccfs2 = (le16_to_cpu(htop->operation_mode) &
@@ -3018,10 +3036,10 @@ bool ieee80211_chandef_vht_oper(struct ieee80211_hw *hw,
unsigned int diff;
diff = abs(ccf1 - ccf0);
- if (diff == 8) {
+ if ((diff == 8) && support_160) {
new.width = NL80211_CHAN_WIDTH_160;
new.center_freq1 = cf1;
- } else if (diff > 8) {
+ } else if ((diff > 8) && support_80_80) {
new.width = NL80211_CHAN_WIDTH_80P80;
new.center_freq2 = cf1;
}
diff --git a/net/mptcp/Kconfig b/net/mptcp/Kconfig
index 49f6054..a9ed3bf 100644
--- a/net/mptcp/Kconfig
+++ b/net/mptcp/Kconfig
@@ -4,6 +4,7 @@
depends on INET
select SKB_EXTENSIONS
select CRYPTO_LIB_SHA256
+ select CRYPTO
help
Multipath TCP (MPTCP) connections send and receive data over multiple
subflows in order to utilize multiple network paths. Each subflow
diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index 45acd87..fd2c3150 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -334,6 +334,8 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb,
struct mptcp_sock *msk;
unsigned int ack_size;
bool ret = false;
+ bool can_ack;
+ u64 ack_seq;
u8 tcp_fin;
if (skb) {
@@ -360,9 +362,22 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb,
ret = true;
}
+ /* passive sockets msk will set the 'can_ack' after accept(), even
+ * if the first subflow may have the already the remote key handy
+ */
+ can_ack = true;
opts->ext_copy.use_ack = 0;
msk = mptcp_sk(subflow->conn);
- if (!msk || !READ_ONCE(msk->can_ack)) {
+ if (likely(msk && READ_ONCE(msk->can_ack))) {
+ ack_seq = msk->ack_seq;
+ } else if (subflow->can_ack) {
+ mptcp_crypto_key_sha(subflow->remote_key, NULL, &ack_seq);
+ ack_seq++;
+ } else {
+ can_ack = false;
+ }
+
+ if (unlikely(!can_ack)) {
*size = ALIGN(dss_size, 4);
return ret;
}
@@ -375,7 +390,7 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb,
dss_size += ack_size;
- opts->ext_copy.data_ack = msk->ack_seq;
+ opts->ext_copy.data_ack = ack_seq;
opts->ext_copy.ack64 = 1;
opts->ext_copy.use_ack = 1;
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 73780b4..3c19a8e 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -543,6 +543,11 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
}
}
+static unsigned int mptcp_sync_mss(struct sock *sk, u32 pmtu)
+{
+ return 0;
+}
+
static int __mptcp_init_sock(struct sock *sk)
{
struct mptcp_sock *msk = mptcp_sk(sk);
@@ -551,6 +556,7 @@ static int __mptcp_init_sock(struct sock *sk)
__set_bit(MPTCP_SEND_SPACE, &msk->flags);
msk->first = NULL;
+ inet_csk(sk)->icsk_sync_mss = mptcp_sync_mss;
return 0;
}
@@ -643,7 +649,7 @@ static struct ipv6_pinfo *mptcp_inet6_sk(const struct sock *sk)
}
#endif
-struct sock *mptcp_sk_clone_lock(const struct sock *sk)
+static struct sock *mptcp_sk_clone_lock(const struct sock *sk)
{
struct sock *nsk = sk_clone_lock(sk, GFP_ATOMIC);
@@ -755,60 +761,50 @@ static int mptcp_setsockopt(struct sock *sk, int level, int optname,
char __user *optval, unsigned int optlen)
{
struct mptcp_sock *msk = mptcp_sk(sk);
- int ret = -EOPNOTSUPP;
struct socket *ssock;
- struct sock *ssk;
pr_debug("msk=%p", msk);
/* @@ the meaning of setsockopt() when the socket is connected and
- * there are multiple subflows is not defined.
+ * there are multiple subflows is not yet defined. It is up to the
+ * MPTCP-level socket to configure the subflows until the subflow
+ * is in TCP fallback, when TCP socket options are passed through
+ * to the one remaining subflow.
*/
lock_sock(sk);
- ssock = __mptcp_socket_create(msk, MPTCP_SAME_STATE);
- if (IS_ERR(ssock)) {
- release_sock(sk);
- return ret;
- }
+ ssock = __mptcp_tcp_fallback(msk);
+ if (ssock)
+ return tcp_setsockopt(ssock->sk, level, optname, optval,
+ optlen);
- ssk = ssock->sk;
- sock_hold(ssk);
release_sock(sk);
- ret = tcp_setsockopt(ssk, level, optname, optval, optlen);
- sock_put(ssk);
-
- return ret;
+ return -EOPNOTSUPP;
}
static int mptcp_getsockopt(struct sock *sk, int level, int optname,
char __user *optval, int __user *option)
{
struct mptcp_sock *msk = mptcp_sk(sk);
- int ret = -EOPNOTSUPP;
struct socket *ssock;
- struct sock *ssk;
pr_debug("msk=%p", msk);
- /* @@ the meaning of getsockopt() when the socket is connected and
- * there are multiple subflows is not defined.
+ /* @@ the meaning of setsockopt() when the socket is connected and
+ * there are multiple subflows is not yet defined. It is up to the
+ * MPTCP-level socket to configure the subflows until the subflow
+ * is in TCP fallback, when socket options are passed through
+ * to the one remaining subflow.
*/
lock_sock(sk);
- ssock = __mptcp_socket_create(msk, MPTCP_SAME_STATE);
- if (IS_ERR(ssock)) {
- release_sock(sk);
- return ret;
- }
+ ssock = __mptcp_tcp_fallback(msk);
+ if (ssock)
+ return tcp_getsockopt(ssock->sk, level, optname, optval,
+ option);
- ssk = ssock->sk;
- sock_hold(ssk);
release_sock(sk);
- ret = tcp_getsockopt(ssk, level, optname, optval, option);
- sock_put(ssk);
-
- return ret;
+ return -EOPNOTSUPP;
}
static int mptcp_get_port(struct sock *sk, unsigned short snum)
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 8a99a29..9f8663b3 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -56,8 +56,8 @@
#define MPTCP_DSS_FLAG_MASK (0x1F)
/* MPTCP socket flags */
-#define MPTCP_DATA_READY BIT(0)
-#define MPTCP_SEND_SPACE BIT(1)
+#define MPTCP_DATA_READY 0
+#define MPTCP_SEND_SPACE 1
/* MPTCP connection sock */
struct mptcp_sock {
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index 69c107f..8dd1758 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -723,6 +723,20 @@ ip_set_rcu_get(struct net *net, ip_set_id_t index)
return set;
}
+static inline void
+ip_set_lock(struct ip_set *set)
+{
+ if (!set->variant->region_lock)
+ spin_lock_bh(&set->lock);
+}
+
+static inline void
+ip_set_unlock(struct ip_set *set)
+{
+ if (!set->variant->region_lock)
+ spin_unlock_bh(&set->lock);
+}
+
int
ip_set_test(ip_set_id_t index, const struct sk_buff *skb,
const struct xt_action_param *par, struct ip_set_adt_opt *opt)
@@ -744,9 +758,9 @@ ip_set_test(ip_set_id_t index, const struct sk_buff *skb,
if (ret == -EAGAIN) {
/* Type requests element to be completed */
pr_debug("element must be completed, ADD is triggered\n");
- spin_lock_bh(&set->lock);
+ ip_set_lock(set);
set->variant->kadt(set, skb, par, IPSET_ADD, opt);
- spin_unlock_bh(&set->lock);
+ ip_set_unlock(set);
ret = 1;
} else {
/* --return-nomatch: invert matched element */
@@ -775,9 +789,9 @@ ip_set_add(ip_set_id_t index, const struct sk_buff *skb,
!(opt->family == set->family || set->family == NFPROTO_UNSPEC))
return -IPSET_ERR_TYPE_MISMATCH;
- spin_lock_bh(&set->lock);
+ ip_set_lock(set);
ret = set->variant->kadt(set, skb, par, IPSET_ADD, opt);
- spin_unlock_bh(&set->lock);
+ ip_set_unlock(set);
return ret;
}
@@ -797,9 +811,9 @@ ip_set_del(ip_set_id_t index, const struct sk_buff *skb,
!(opt->family == set->family || set->family == NFPROTO_UNSPEC))
return -IPSET_ERR_TYPE_MISMATCH;
- spin_lock_bh(&set->lock);
+ ip_set_lock(set);
ret = set->variant->kadt(set, skb, par, IPSET_DEL, opt);
- spin_unlock_bh(&set->lock);
+ ip_set_unlock(set);
return ret;
}
@@ -1264,9 +1278,9 @@ ip_set_flush_set(struct ip_set *set)
{
pr_debug("set: %s\n", set->name);
- spin_lock_bh(&set->lock);
+ ip_set_lock(set);
set->variant->flush(set);
- spin_unlock_bh(&set->lock);
+ ip_set_unlock(set);
}
static int ip_set_flush(struct net *net, struct sock *ctnl, struct sk_buff *skb,
@@ -1713,9 +1727,9 @@ call_ad(struct sock *ctnl, struct sk_buff *skb, struct ip_set *set,
bool eexist = flags & IPSET_FLAG_EXIST, retried = false;
do {
- spin_lock_bh(&set->lock);
+ ip_set_lock(set);
ret = set->variant->uadt(set, tb, adt, &lineno, flags, retried);
- spin_unlock_bh(&set->lock);
+ ip_set_unlock(set);
retried = true;
} while (ret == -EAGAIN &&
set->variant->resize &&
diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h
index 7480ce5..e52d7b7 100644
--- a/net/netfilter/ipset/ip_set_hash_gen.h
+++ b/net/netfilter/ipset/ip_set_hash_gen.h
@@ -7,13 +7,21 @@
#include <linux/rcupdate.h>
#include <linux/jhash.h>
#include <linux/types.h>
+#include <linux/netfilter/nfnetlink.h>
#include <linux/netfilter/ipset/ip_set.h>
-#define __ipset_dereference_protected(p, c) rcu_dereference_protected(p, c)
-#define ipset_dereference_protected(p, set) \
- __ipset_dereference_protected(p, lockdep_is_held(&(set)->lock))
-
-#define rcu_dereference_bh_nfnl(p) rcu_dereference_bh_check(p, 1)
+#define __ipset_dereference(p) \
+ rcu_dereference_protected(p, 1)
+#define ipset_dereference_nfnl(p) \
+ rcu_dereference_protected(p, \
+ lockdep_nfnl_is_held(NFNL_SUBSYS_IPSET))
+#define ipset_dereference_set(p, set) \
+ rcu_dereference_protected(p, \
+ lockdep_nfnl_is_held(NFNL_SUBSYS_IPSET) || \
+ lockdep_is_held(&(set)->lock))
+#define ipset_dereference_bh_nfnl(p) \
+ rcu_dereference_bh_check(p, \
+ lockdep_nfnl_is_held(NFNL_SUBSYS_IPSET))
/* Hashing which uses arrays to resolve clashing. The hash table is resized
* (doubled) when searching becomes too long.
@@ -72,11 +80,35 @@ struct hbucket {
__aligned(__alignof__(u64));
};
+/* Region size for locking == 2^HTABLE_REGION_BITS */
+#define HTABLE_REGION_BITS 10
+#define ahash_numof_locks(htable_bits) \
+ ((htable_bits) < HTABLE_REGION_BITS ? 1 \
+ : jhash_size((htable_bits) - HTABLE_REGION_BITS))
+#define ahash_sizeof_regions(htable_bits) \
+ (ahash_numof_locks(htable_bits) * sizeof(struct ip_set_region))
+#define ahash_region(n, htable_bits) \
+ ((n) % ahash_numof_locks(htable_bits))
+#define ahash_bucket_start(h, htable_bits) \
+ ((htable_bits) < HTABLE_REGION_BITS ? 0 \
+ : (h) * jhash_size(HTABLE_REGION_BITS))
+#define ahash_bucket_end(h, htable_bits) \
+ ((htable_bits) < HTABLE_REGION_BITS ? jhash_size(htable_bits) \
+ : ((h) + 1) * jhash_size(HTABLE_REGION_BITS))
+
+struct htable_gc {
+ struct delayed_work dwork;
+ struct ip_set *set; /* Set the gc belongs to */
+ u32 region; /* Last gc run position */
+};
+
/* The hash table: the table size stored here in order to make resizing easy */
struct htable {
atomic_t ref; /* References for resizing */
- atomic_t uref; /* References for dumping */
+ atomic_t uref; /* References for dumping and gc */
u8 htable_bits; /* size of hash table == 2^htable_bits */
+ u32 maxelem; /* Maxelem per region */
+ struct ip_set_region *hregion; /* Region locks and ext sizes */
struct hbucket __rcu *bucket[0]; /* hashtable buckets */
};
@@ -162,6 +194,10 @@ htable_bits(u32 hashsize)
#define NLEN 0
#endif /* IP_SET_HASH_WITH_NETS */
+#define SET_ELEM_EXPIRED(set, d) \
+ (SET_WITH_TIMEOUT(set) && \
+ ip_set_timeout_expired(ext_timeout(d, set)))
+
#endif /* _IP_SET_HASH_GEN_H */
#ifndef MTYPE
@@ -205,10 +241,12 @@ htable_bits(u32 hashsize)
#undef mtype_test_cidrs
#undef mtype_test
#undef mtype_uref
-#undef mtype_expire
#undef mtype_resize
+#undef mtype_ext_size
+#undef mtype_resize_ad
#undef mtype_head
#undef mtype_list
+#undef mtype_gc_do
#undef mtype_gc
#undef mtype_gc_init
#undef mtype_variant
@@ -247,10 +285,12 @@ htable_bits(u32 hashsize)
#define mtype_test_cidrs IPSET_TOKEN(MTYPE, _test_cidrs)
#define mtype_test IPSET_TOKEN(MTYPE, _test)
#define mtype_uref IPSET_TOKEN(MTYPE, _uref)
-#define mtype_expire IPSET_TOKEN(MTYPE, _expire)
#define mtype_resize IPSET_TOKEN(MTYPE, _resize)
+#define mtype_ext_size IPSET_TOKEN(MTYPE, _ext_size)
+#define mtype_resize_ad IPSET_TOKEN(MTYPE, _resize_ad)
#define mtype_head IPSET_TOKEN(MTYPE, _head)
#define mtype_list IPSET_TOKEN(MTYPE, _list)
+#define mtype_gc_do IPSET_TOKEN(MTYPE, _gc_do)
#define mtype_gc IPSET_TOKEN(MTYPE, _gc)
#define mtype_gc_init IPSET_TOKEN(MTYPE, _gc_init)
#define mtype_variant IPSET_TOKEN(MTYPE, _variant)
@@ -275,8 +315,7 @@ htable_bits(u32 hashsize)
/* The generic hash structure */
struct htype {
struct htable __rcu *table; /* the hash table */
- struct timer_list gc; /* garbage collection when timeout enabled */
- struct ip_set *set; /* attached to this ip_set */
+ struct htable_gc gc; /* gc workqueue */
u32 maxelem; /* max elements in the hash */
u32 initval; /* random jhash init value */
#ifdef IP_SET_HASH_WITH_MARKMASK
@@ -288,21 +327,33 @@ struct htype {
#ifdef IP_SET_HASH_WITH_NETMASK
u8 netmask; /* netmask value for subnets to store */
#endif
+ struct list_head ad; /* Resize add|del backlist */
struct mtype_elem next; /* temporary storage for uadd */
#ifdef IP_SET_HASH_WITH_NETS
struct net_prefixes nets[NLEN]; /* book-keeping of prefixes */
#endif
};
+/* ADD|DEL entries saved during resize */
+struct mtype_resize_ad {
+ struct list_head list;
+ enum ipset_adt ad; /* ADD|DEL element */
+ struct mtype_elem d; /* Element value */
+ struct ip_set_ext ext; /* Extensions for ADD */
+ struct ip_set_ext mext; /* Target extensions for ADD */
+ u32 flags; /* Flags for ADD */
+};
+
#ifdef IP_SET_HASH_WITH_NETS
/* Network cidr size book keeping when the hash stores different
* sized networks. cidr == real cidr + 1 to support /0.
*/
static void
-mtype_add_cidr(struct htype *h, u8 cidr, u8 n)
+mtype_add_cidr(struct ip_set *set, struct htype *h, u8 cidr, u8 n)
{
int i, j;
+ spin_lock_bh(&set->lock);
/* Add in increasing prefix order, so larger cidr first */
for (i = 0, j = -1; i < NLEN && h->nets[i].cidr[n]; i++) {
if (j != -1) {
@@ -311,7 +362,7 @@ mtype_add_cidr(struct htype *h, u8 cidr, u8 n)
j = i;
} else if (h->nets[i].cidr[n] == cidr) {
h->nets[CIDR_POS(cidr)].nets[n]++;
- return;
+ goto unlock;
}
}
if (j != -1) {
@@ -320,24 +371,29 @@ mtype_add_cidr(struct htype *h, u8 cidr, u8 n)
}
h->nets[i].cidr[n] = cidr;
h->nets[CIDR_POS(cidr)].nets[n] = 1;
+unlock:
+ spin_unlock_bh(&set->lock);
}
static void
-mtype_del_cidr(struct htype *h, u8 cidr, u8 n)
+mtype_del_cidr(struct ip_set *set, struct htype *h, u8 cidr, u8 n)
{
u8 i, j, net_end = NLEN - 1;
+ spin_lock_bh(&set->lock);
for (i = 0; i < NLEN; i++) {
if (h->nets[i].cidr[n] != cidr)
continue;
h->nets[CIDR_POS(cidr)].nets[n]--;
if (h->nets[CIDR_POS(cidr)].nets[n] > 0)
- return;
+ goto unlock;
for (j = i; j < net_end && h->nets[j].cidr[n]; j++)
h->nets[j].cidr[n] = h->nets[j + 1].cidr[n];
h->nets[j].cidr[n] = 0;
- return;
+ goto unlock;
}
+unlock:
+ spin_unlock_bh(&set->lock);
}
#endif
@@ -345,7 +401,7 @@ mtype_del_cidr(struct htype *h, u8 cidr, u8 n)
static size_t
mtype_ahash_memsize(const struct htype *h, const struct htable *t)
{
- return sizeof(*h) + sizeof(*t);
+ return sizeof(*h) + sizeof(*t) + ahash_sizeof_regions(t->htable_bits);
}
/* Get the ith element from the array block n */
@@ -369,24 +425,29 @@ mtype_flush(struct ip_set *set)
struct htype *h = set->data;
struct htable *t;
struct hbucket *n;
- u32 i;
+ u32 r, i;
- t = ipset_dereference_protected(h->table, set);
- for (i = 0; i < jhash_size(t->htable_bits); i++) {
- n = __ipset_dereference_protected(hbucket(t, i), 1);
- if (!n)
- continue;
- if (set->extensions & IPSET_EXT_DESTROY)
- mtype_ext_cleanup(set, n);
- /* FIXME: use slab cache */
- rcu_assign_pointer(hbucket(t, i), NULL);
- kfree_rcu(n, rcu);
+ t = ipset_dereference_nfnl(h->table);
+ for (r = 0; r < ahash_numof_locks(t->htable_bits); r++) {
+ spin_lock_bh(&t->hregion[r].lock);
+ for (i = ahash_bucket_start(r, t->htable_bits);
+ i < ahash_bucket_end(r, t->htable_bits); i++) {
+ n = __ipset_dereference(hbucket(t, i));
+ if (!n)
+ continue;
+ if (set->extensions & IPSET_EXT_DESTROY)
+ mtype_ext_cleanup(set, n);
+ /* FIXME: use slab cache */
+ rcu_assign_pointer(hbucket(t, i), NULL);
+ kfree_rcu(n, rcu);
+ }
+ t->hregion[r].ext_size = 0;
+ t->hregion[r].elements = 0;
+ spin_unlock_bh(&t->hregion[r].lock);
}
#ifdef IP_SET_HASH_WITH_NETS
memset(h->nets, 0, sizeof(h->nets));
#endif
- set->elements = 0;
- set->ext_size = 0;
}
/* Destroy the hashtable part of the set */
@@ -397,7 +458,7 @@ mtype_ahash_destroy(struct ip_set *set, struct htable *t, bool ext_destroy)
u32 i;
for (i = 0; i < jhash_size(t->htable_bits); i++) {
- n = __ipset_dereference_protected(hbucket(t, i), 1);
+ n = __ipset_dereference(hbucket(t, i));
if (!n)
continue;
if (set->extensions & IPSET_EXT_DESTROY && ext_destroy)
@@ -406,6 +467,7 @@ mtype_ahash_destroy(struct ip_set *set, struct htable *t, bool ext_destroy)
kfree(n);
}
+ ip_set_free(t->hregion);
ip_set_free(t);
}
@@ -414,28 +476,21 @@ static void
mtype_destroy(struct ip_set *set)
{
struct htype *h = set->data;
+ struct list_head *l, *lt;
if (SET_WITH_TIMEOUT(set))
- del_timer_sync(&h->gc);
+ cancel_delayed_work_sync(&h->gc.dwork);
- mtype_ahash_destroy(set,
- __ipset_dereference_protected(h->table, 1), true);
+ mtype_ahash_destroy(set, ipset_dereference_nfnl(h->table), true);
+ list_for_each_safe(l, lt, &h->ad) {
+ list_del(l);
+ kfree(l);
+ }
kfree(h);
set->data = NULL;
}
-static void
-mtype_gc_init(struct ip_set *set, void (*gc)(struct timer_list *t))
-{
- struct htype *h = set->data;
-
- timer_setup(&h->gc, gc, 0);
- mod_timer(&h->gc, jiffies + IPSET_GC_PERIOD(set->timeout) * HZ);
- pr_debug("gc initialized, run in every %u\n",
- IPSET_GC_PERIOD(set->timeout));
-}
-
static bool
mtype_same_set(const struct ip_set *a, const struct ip_set *b)
{
@@ -454,11 +509,9 @@ mtype_same_set(const struct ip_set *a, const struct ip_set *b)
a->extensions == b->extensions;
}
-/* Delete expired elements from the hashtable */
static void
-mtype_expire(struct ip_set *set, struct htype *h)
+mtype_gc_do(struct ip_set *set, struct htype *h, struct htable *t, u32 r)
{
- struct htable *t;
struct hbucket *n, *tmp;
struct mtype_elem *data;
u32 i, j, d;
@@ -466,10 +519,12 @@ mtype_expire(struct ip_set *set, struct htype *h)
#ifdef IP_SET_HASH_WITH_NETS
u8 k;
#endif
+ u8 htable_bits = t->htable_bits;
- t = ipset_dereference_protected(h->table, set);
- for (i = 0; i < jhash_size(t->htable_bits); i++) {
- n = __ipset_dereference_protected(hbucket(t, i), 1);
+ spin_lock_bh(&t->hregion[r].lock);
+ for (i = ahash_bucket_start(r, htable_bits);
+ i < ahash_bucket_end(r, htable_bits); i++) {
+ n = __ipset_dereference(hbucket(t, i));
if (!n)
continue;
for (j = 0, d = 0; j < n->pos; j++) {
@@ -485,58 +540,100 @@ mtype_expire(struct ip_set *set, struct htype *h)
smp_mb__after_atomic();
#ifdef IP_SET_HASH_WITH_NETS
for (k = 0; k < IPSET_NET_COUNT; k++)
- mtype_del_cidr(h,
+ mtype_del_cidr(set, h,
NCIDR_PUT(DCIDR_GET(data->cidr, k)),
k);
#endif
+ t->hregion[r].elements--;
ip_set_ext_destroy(set, data);
- set->elements--;
d++;
}
if (d >= AHASH_INIT_SIZE) {
if (d >= n->size) {
+ t->hregion[r].ext_size -=
+ ext_size(n->size, dsize);
rcu_assign_pointer(hbucket(t, i), NULL);
kfree_rcu(n, rcu);
continue;
}
tmp = kzalloc(sizeof(*tmp) +
- (n->size - AHASH_INIT_SIZE) * dsize,
- GFP_ATOMIC);
+ (n->size - AHASH_INIT_SIZE) * dsize,
+ GFP_ATOMIC);
if (!tmp)
- /* Still try to delete expired elements */
+ /* Still try to delete expired elements. */
continue;
tmp->size = n->size - AHASH_INIT_SIZE;
for (j = 0, d = 0; j < n->pos; j++) {
if (!test_bit(j, n->used))
continue;
data = ahash_data(n, j, dsize);
- memcpy(tmp->value + d * dsize, data, dsize);
+ memcpy(tmp->value + d * dsize,
+ data, dsize);
set_bit(d, tmp->used);
d++;
}
tmp->pos = d;
- set->ext_size -= ext_size(AHASH_INIT_SIZE, dsize);
+ t->hregion[r].ext_size -=
+ ext_size(AHASH_INIT_SIZE, dsize);
rcu_assign_pointer(hbucket(t, i), tmp);
kfree_rcu(n, rcu);
}
}
+ spin_unlock_bh(&t->hregion[r].lock);
}
static void
-mtype_gc(struct timer_list *t)
+mtype_gc(struct work_struct *work)
{
- struct htype *h = from_timer(h, t, gc);
- struct ip_set *set = h->set;
+ struct htable_gc *gc;
+ struct ip_set *set;
+ struct htype *h;
+ struct htable *t;
+ u32 r, numof_locks;
+ unsigned int next_run;
- pr_debug("called\n");
+ gc = container_of(work, struct htable_gc, dwork.work);
+ set = gc->set;
+ h = set->data;
+
spin_lock_bh(&set->lock);
- mtype_expire(set, h);
+ t = ipset_dereference_set(h->table, set);
+ atomic_inc(&t->uref);
+ numof_locks = ahash_numof_locks(t->htable_bits);
+ r = gc->region++;
+ if (r >= numof_locks) {
+ r = gc->region = 0;
+ }
+ next_run = (IPSET_GC_PERIOD(set->timeout) * HZ) / numof_locks;
+ if (next_run < HZ/10)
+ next_run = HZ/10;
spin_unlock_bh(&set->lock);
- h->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ;
- add_timer(&h->gc);
+ mtype_gc_do(set, h, t, r);
+
+ if (atomic_dec_and_test(&t->uref) && atomic_read(&t->ref)) {
+ pr_debug("Table destroy after resize by expire: %p\n", t);
+ mtype_ahash_destroy(set, t, false);
+ }
+
+ queue_delayed_work(system_power_efficient_wq, &gc->dwork, next_run);
+
}
+static void
+mtype_gc_init(struct htable_gc *gc)
+{
+ INIT_DEFERRABLE_WORK(&gc->dwork, mtype_gc);
+ queue_delayed_work(system_power_efficient_wq, &gc->dwork, HZ);
+}
+
+static int
+mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
+ struct ip_set_ext *mext, u32 flags);
+static int
+mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext,
+ struct ip_set_ext *mext, u32 flags);
+
/* Resize a hash: create a new hash table with doubling the hashsize
* and inserting the elements to it. Repeat until we succeed or
* fail due to memory pressures.
@@ -547,7 +644,7 @@ mtype_resize(struct ip_set *set, bool retried)
struct htype *h = set->data;
struct htable *t, *orig;
u8 htable_bits;
- size_t extsize, dsize = set->dsize;
+ size_t dsize = set->dsize;
#ifdef IP_SET_HASH_WITH_NETS
u8 flags;
struct mtype_elem *tmp;
@@ -555,7 +652,9 @@ mtype_resize(struct ip_set *set, bool retried)
struct mtype_elem *data;
struct mtype_elem *d;
struct hbucket *n, *m;
- u32 i, j, key;
+ struct list_head *l, *lt;
+ struct mtype_resize_ad *x;
+ u32 i, j, r, nr, key;
int ret;
#ifdef IP_SET_HASH_WITH_NETS
@@ -563,10 +662,8 @@ mtype_resize(struct ip_set *set, bool retried)
if (!tmp)
return -ENOMEM;
#endif
- rcu_read_lock_bh();
- orig = rcu_dereference_bh_nfnl(h->table);
+ orig = ipset_dereference_bh_nfnl(h->table);
htable_bits = orig->htable_bits;
- rcu_read_unlock_bh();
retry:
ret = 0;
@@ -583,88 +680,124 @@ mtype_resize(struct ip_set *set, bool retried)
ret = -ENOMEM;
goto out;
}
+ t->hregion = ip_set_alloc(ahash_sizeof_regions(htable_bits));
+ if (!t->hregion) {
+ kfree(t);
+ ret = -ENOMEM;
+ goto out;
+ }
t->htable_bits = htable_bits;
+ t->maxelem = h->maxelem / ahash_numof_locks(htable_bits);
+ for (i = 0; i < ahash_numof_locks(htable_bits); i++)
+ spin_lock_init(&t->hregion[i].lock);
- spin_lock_bh(&set->lock);
- orig = __ipset_dereference_protected(h->table, 1);
- /* There can't be another parallel resizing, but dumping is possible */
+ /* There can't be another parallel resizing,
+ * but dumping, gc, kernel side add/del are possible
+ */
+ orig = ipset_dereference_bh_nfnl(h->table);
atomic_set(&orig->ref, 1);
atomic_inc(&orig->uref);
- extsize = 0;
pr_debug("attempt to resize set %s from %u to %u, t %p\n",
set->name, orig->htable_bits, htable_bits, orig);
- for (i = 0; i < jhash_size(orig->htable_bits); i++) {
- n = __ipset_dereference_protected(hbucket(orig, i), 1);
- if (!n)
- continue;
- for (j = 0; j < n->pos; j++) {
- if (!test_bit(j, n->used))
+ for (r = 0; r < ahash_numof_locks(orig->htable_bits); r++) {
+ /* Expire may replace a hbucket with another one */
+ rcu_read_lock_bh();
+ for (i = ahash_bucket_start(r, orig->htable_bits);
+ i < ahash_bucket_end(r, orig->htable_bits); i++) {
+ n = __ipset_dereference(hbucket(orig, i));
+ if (!n)
continue;
- data = ahash_data(n, j, dsize);
+ for (j = 0; j < n->pos; j++) {
+ if (!test_bit(j, n->used))
+ continue;
+ data = ahash_data(n, j, dsize);
+ if (SET_ELEM_EXPIRED(set, data))
+ continue;
#ifdef IP_SET_HASH_WITH_NETS
- /* We have readers running parallel with us,
- * so the live data cannot be modified.
- */
- flags = 0;
- memcpy(tmp, data, dsize);
- data = tmp;
- mtype_data_reset_flags(data, &flags);
+ /* We have readers running parallel with us,
+ * so the live data cannot be modified.
+ */
+ flags = 0;
+ memcpy(tmp, data, dsize);
+ data = tmp;
+ mtype_data_reset_flags(data, &flags);
#endif
- key = HKEY(data, h->initval, htable_bits);
- m = __ipset_dereference_protected(hbucket(t, key), 1);
- if (!m) {
- m = kzalloc(sizeof(*m) +
+ key = HKEY(data, h->initval, htable_bits);
+ m = __ipset_dereference(hbucket(t, key));
+ nr = ahash_region(key, htable_bits);
+ if (!m) {
+ m = kzalloc(sizeof(*m) +
AHASH_INIT_SIZE * dsize,
GFP_ATOMIC);
- if (!m) {
- ret = -ENOMEM;
- goto cleanup;
- }
- m->size = AHASH_INIT_SIZE;
- extsize += ext_size(AHASH_INIT_SIZE, dsize);
- RCU_INIT_POINTER(hbucket(t, key), m);
- } else if (m->pos >= m->size) {
- struct hbucket *ht;
+ if (!m) {
+ ret = -ENOMEM;
+ goto cleanup;
+ }
+ m->size = AHASH_INIT_SIZE;
+ t->hregion[nr].ext_size +=
+ ext_size(AHASH_INIT_SIZE,
+ dsize);
+ RCU_INIT_POINTER(hbucket(t, key), m);
+ } else if (m->pos >= m->size) {
+ struct hbucket *ht;
- if (m->size >= AHASH_MAX(h)) {
- ret = -EAGAIN;
- } else {
- ht = kzalloc(sizeof(*ht) +
+ if (m->size >= AHASH_MAX(h)) {
+ ret = -EAGAIN;
+ } else {
+ ht = kzalloc(sizeof(*ht) +
(m->size + AHASH_INIT_SIZE)
* dsize,
GFP_ATOMIC);
- if (!ht)
- ret = -ENOMEM;
+ if (!ht)
+ ret = -ENOMEM;
+ }
+ if (ret < 0)
+ goto cleanup;
+ memcpy(ht, m, sizeof(struct hbucket) +
+ m->size * dsize);
+ ht->size = m->size + AHASH_INIT_SIZE;
+ t->hregion[nr].ext_size +=
+ ext_size(AHASH_INIT_SIZE,
+ dsize);
+ kfree(m);
+ m = ht;
+ RCU_INIT_POINTER(hbucket(t, key), ht);
}
- if (ret < 0)
- goto cleanup;
- memcpy(ht, m, sizeof(struct hbucket) +
- m->size * dsize);
- ht->size = m->size + AHASH_INIT_SIZE;
- extsize += ext_size(AHASH_INIT_SIZE, dsize);
- kfree(m);
- m = ht;
- RCU_INIT_POINTER(hbucket(t, key), ht);
- }
- d = ahash_data(m, m->pos, dsize);
- memcpy(d, data, dsize);
- set_bit(m->pos++, m->used);
+ d = ahash_data(m, m->pos, dsize);
+ memcpy(d, data, dsize);
+ set_bit(m->pos++, m->used);
+ t->hregion[nr].elements++;
#ifdef IP_SET_HASH_WITH_NETS
- mtype_data_reset_flags(d, &flags);
+ mtype_data_reset_flags(d, &flags);
#endif
+ }
}
+ rcu_read_unlock_bh();
}
- rcu_assign_pointer(h->table, t);
- set->ext_size = extsize;
- spin_unlock_bh(&set->lock);
+ /* There can't be any other writer. */
+ rcu_assign_pointer(h->table, t);
/* Give time to other readers of the set */
synchronize_rcu();
pr_debug("set %s resized from %u (%p) to %u (%p)\n", set->name,
orig->htable_bits, orig, t->htable_bits, t);
- /* If there's nobody else dumping the table, destroy it */
+ /* Add/delete elements processed by the SET target during resize.
+ * Kernel-side add cannot trigger a resize and userspace actions
+ * are serialized by the mutex.
+ */
+ list_for_each_safe(l, lt, &h->ad) {
+ x = list_entry(l, struct mtype_resize_ad, list);
+ if (x->ad == IPSET_ADD) {
+ mtype_add(set, &x->d, &x->ext, &x->mext, x->flags);
+ } else {
+ mtype_del(set, &x->d, NULL, NULL, 0);
+ }
+ list_del(l);
+ kfree(l);
+ }
+ /* If there's nobody else using the table, destroy it */
if (atomic_dec_and_test(&orig->uref)) {
pr_debug("Table destroy by resize %p\n", orig);
mtype_ahash_destroy(set, orig, false);
@@ -677,15 +810,44 @@ mtype_resize(struct ip_set *set, bool retried)
return ret;
cleanup:
+ rcu_read_unlock_bh();
atomic_set(&orig->ref, 0);
atomic_dec(&orig->uref);
- spin_unlock_bh(&set->lock);
mtype_ahash_destroy(set, t, false);
if (ret == -EAGAIN)
goto retry;
goto out;
}
+/* Get the current number of elements and ext_size in the set */
+static void
+mtype_ext_size(struct ip_set *set, u32 *elements, size_t *ext_size)
+{
+ struct htype *h = set->data;
+ const struct htable *t;
+ u32 i, j, r;
+ struct hbucket *n;
+ struct mtype_elem *data;
+
+ t = rcu_dereference_bh(h->table);
+ for (r = 0; r < ahash_numof_locks(t->htable_bits); r++) {
+ for (i = ahash_bucket_start(r, t->htable_bits);
+ i < ahash_bucket_end(r, t->htable_bits); i++) {
+ n = rcu_dereference_bh(hbucket(t, i));
+ if (!n)
+ continue;
+ for (j = 0; j < n->pos; j++) {
+ if (!test_bit(j, n->used))
+ continue;
+ data = ahash_data(n, j, set->dsize);
+ if (!SET_ELEM_EXPIRED(set, data))
+ (*elements)++;
+ }
+ }
+ *ext_size += t->hregion[r].ext_size;
+ }
+}
+
/* Add an element to a hash and update the internal counters when succeeded,
* otherwise report the proper error code.
*/
@@ -698,32 +860,49 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
const struct mtype_elem *d = value;
struct mtype_elem *data;
struct hbucket *n, *old = ERR_PTR(-ENOENT);
- int i, j = -1;
+ int i, j = -1, ret;
bool flag_exist = flags & IPSET_FLAG_EXIST;
bool deleted = false, forceadd = false, reuse = false;
- u32 key, multi = 0;
+ u32 r, key, multi = 0, elements, maxelem;
- if (set->elements >= h->maxelem) {
- if (SET_WITH_TIMEOUT(set))
- /* FIXME: when set is full, we slow down here */
- mtype_expire(set, h);
- if (set->elements >= h->maxelem && SET_WITH_FORCEADD(set))
+ rcu_read_lock_bh();
+ t = rcu_dereference_bh(h->table);
+ key = HKEY(value, h->initval, t->htable_bits);
+ r = ahash_region(key, t->htable_bits);
+ atomic_inc(&t->uref);
+ elements = t->hregion[r].elements;
+ maxelem = t->maxelem;
+ if (elements >= maxelem) {
+ u32 e;
+ if (SET_WITH_TIMEOUT(set)) {
+ rcu_read_unlock_bh();
+ mtype_gc_do(set, h, t, r);
+ rcu_read_lock_bh();
+ }
+ maxelem = h->maxelem;
+ elements = 0;
+ for (e = 0; e < ahash_numof_locks(t->htable_bits); e++)
+ elements += t->hregion[e].elements;
+ if (elements >= maxelem && SET_WITH_FORCEADD(set))
forceadd = true;
}
+ rcu_read_unlock_bh();
- t = ipset_dereference_protected(h->table, set);
- key = HKEY(value, h->initval, t->htable_bits);
- n = __ipset_dereference_protected(hbucket(t, key), 1);
+ spin_lock_bh(&t->hregion[r].lock);
+ n = rcu_dereference_bh(hbucket(t, key));
if (!n) {
- if (forceadd || set->elements >= h->maxelem)
+ if (forceadd || elements >= maxelem)
goto set_full;
old = NULL;
n = kzalloc(sizeof(*n) + AHASH_INIT_SIZE * set->dsize,
GFP_ATOMIC);
- if (!n)
- return -ENOMEM;
+ if (!n) {
+ ret = -ENOMEM;
+ goto unlock;
+ }
n->size = AHASH_INIT_SIZE;
- set->ext_size += ext_size(AHASH_INIT_SIZE, set->dsize);
+ t->hregion[r].ext_size +=
+ ext_size(AHASH_INIT_SIZE, set->dsize);
goto copy_elem;
}
for (i = 0; i < n->pos; i++) {
@@ -737,38 +916,37 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
}
data = ahash_data(n, i, set->dsize);
if (mtype_data_equal(data, d, &multi)) {
- if (flag_exist ||
- (SET_WITH_TIMEOUT(set) &&
- ip_set_timeout_expired(ext_timeout(data, set)))) {
+ if (flag_exist || SET_ELEM_EXPIRED(set, data)) {
/* Just the extensions could be overwritten */
j = i;
goto overwrite_extensions;
}
- return -IPSET_ERR_EXIST;
+ ret = -IPSET_ERR_EXIST;
+ goto unlock;
}
/* Reuse first timed out entry */
- if (SET_WITH_TIMEOUT(set) &&
- ip_set_timeout_expired(ext_timeout(data, set)) &&
- j == -1) {
+ if (SET_ELEM_EXPIRED(set, data) && j == -1) {
j = i;
reuse = true;
}
}
if (reuse || forceadd) {
+ if (j == -1)
+ j = 0;
data = ahash_data(n, j, set->dsize);
if (!deleted) {
#ifdef IP_SET_HASH_WITH_NETS
for (i = 0; i < IPSET_NET_COUNT; i++)
- mtype_del_cidr(h,
+ mtype_del_cidr(set, h,
NCIDR_PUT(DCIDR_GET(data->cidr, i)),
i);
#endif
ip_set_ext_destroy(set, data);
- set->elements--;
+ t->hregion[r].elements--;
}
goto copy_data;
}
- if (set->elements >= h->maxelem)
+ if (elements >= maxelem)
goto set_full;
/* Create a new slot */
if (n->pos >= n->size) {
@@ -776,28 +954,32 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
if (n->size >= AHASH_MAX(h)) {
/* Trigger rehashing */
mtype_data_next(&h->next, d);
- return -EAGAIN;
+ ret = -EAGAIN;
+ goto resize;
}
old = n;
n = kzalloc(sizeof(*n) +
(old->size + AHASH_INIT_SIZE) * set->dsize,
GFP_ATOMIC);
- if (!n)
- return -ENOMEM;
+ if (!n) {
+ ret = -ENOMEM;
+ goto unlock;
+ }
memcpy(n, old, sizeof(struct hbucket) +
old->size * set->dsize);
n->size = old->size + AHASH_INIT_SIZE;
- set->ext_size += ext_size(AHASH_INIT_SIZE, set->dsize);
+ t->hregion[r].ext_size +=
+ ext_size(AHASH_INIT_SIZE, set->dsize);
}
copy_elem:
j = n->pos++;
data = ahash_data(n, j, set->dsize);
copy_data:
- set->elements++;
+ t->hregion[r].elements++;
#ifdef IP_SET_HASH_WITH_NETS
for (i = 0; i < IPSET_NET_COUNT; i++)
- mtype_add_cidr(h, NCIDR_PUT(DCIDR_GET(d->cidr, i)), i);
+ mtype_add_cidr(set, h, NCIDR_PUT(DCIDR_GET(d->cidr, i)), i);
#endif
memcpy(data, d, sizeof(struct mtype_elem));
overwrite_extensions:
@@ -820,13 +1002,41 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
if (old)
kfree_rcu(old, rcu);
}
+ ret = 0;
+resize:
+ spin_unlock_bh(&t->hregion[r].lock);
+ if (atomic_read(&t->ref) && ext->target) {
+ /* Resize is in process and kernel side add, save values */
+ struct mtype_resize_ad *x;
- return 0;
+ x = kzalloc(sizeof(struct mtype_resize_ad), GFP_ATOMIC);
+ if (!x)
+ /* Don't bother */
+ goto out;
+ x->ad = IPSET_ADD;
+ memcpy(&x->d, value, sizeof(struct mtype_elem));
+ memcpy(&x->ext, ext, sizeof(struct ip_set_ext));
+ memcpy(&x->mext, mext, sizeof(struct ip_set_ext));
+ x->flags = flags;
+ spin_lock_bh(&set->lock);
+ list_add_tail(&x->list, &h->ad);
+ spin_unlock_bh(&set->lock);
+ }
+ goto out;
+
set_full:
if (net_ratelimit())
pr_warn("Set %s is full, maxelem %u reached\n",
- set->name, h->maxelem);
- return -IPSET_ERR_HASH_FULL;
+ set->name, maxelem);
+ ret = -IPSET_ERR_HASH_FULL;
+unlock:
+ spin_unlock_bh(&t->hregion[r].lock);
+out:
+ if (atomic_dec_and_test(&t->uref) && atomic_read(&t->ref)) {
+ pr_debug("Table destroy after resize by add: %p\n", t);
+ mtype_ahash_destroy(set, t, false);
+ }
+ return ret;
}
/* Delete an element from the hash and free up space if possible.
@@ -840,13 +1050,23 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext,
const struct mtype_elem *d = value;
struct mtype_elem *data;
struct hbucket *n;
- int i, j, k, ret = -IPSET_ERR_EXIST;
+ struct mtype_resize_ad *x = NULL;
+ int i, j, k, r, ret = -IPSET_ERR_EXIST;
u32 key, multi = 0;
size_t dsize = set->dsize;
- t = ipset_dereference_protected(h->table, set);
+ /* Userspace add and resize is excluded by the mutex.
+ * Kernespace add does not trigger resize.
+ */
+ rcu_read_lock_bh();
+ t = rcu_dereference_bh(h->table);
key = HKEY(value, h->initval, t->htable_bits);
- n = __ipset_dereference_protected(hbucket(t, key), 1);
+ r = ahash_region(key, t->htable_bits);
+ atomic_inc(&t->uref);
+ rcu_read_unlock_bh();
+
+ spin_lock_bh(&t->hregion[r].lock);
+ n = rcu_dereference_bh(hbucket(t, key));
if (!n)
goto out;
for (i = 0, k = 0; i < n->pos; i++) {
@@ -857,8 +1077,7 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext,
data = ahash_data(n, i, dsize);
if (!mtype_data_equal(data, d, &multi))
continue;
- if (SET_WITH_TIMEOUT(set) &&
- ip_set_timeout_expired(ext_timeout(data, set)))
+ if (SET_ELEM_EXPIRED(set, data))
goto out;
ret = 0;
@@ -866,20 +1085,33 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext,
smp_mb__after_atomic();
if (i + 1 == n->pos)
n->pos--;
- set->elements--;
+ t->hregion[r].elements--;
#ifdef IP_SET_HASH_WITH_NETS
for (j = 0; j < IPSET_NET_COUNT; j++)
- mtype_del_cidr(h, NCIDR_PUT(DCIDR_GET(d->cidr, j)),
- j);
+ mtype_del_cidr(set, h,
+ NCIDR_PUT(DCIDR_GET(d->cidr, j)), j);
#endif
ip_set_ext_destroy(set, data);
+ if (atomic_read(&t->ref) && ext->target) {
+ /* Resize is in process and kernel side del,
+ * save values
+ */
+ x = kzalloc(sizeof(struct mtype_resize_ad),
+ GFP_ATOMIC);
+ if (x) {
+ x->ad = IPSET_DEL;
+ memcpy(&x->d, value,
+ sizeof(struct mtype_elem));
+ x->flags = flags;
+ }
+ }
for (; i < n->pos; i++) {
if (!test_bit(i, n->used))
k++;
}
if (n->pos == 0 && k == 0) {
- set->ext_size -= ext_size(n->size, dsize);
+ t->hregion[r].ext_size -= ext_size(n->size, dsize);
rcu_assign_pointer(hbucket(t, key), NULL);
kfree_rcu(n, rcu);
} else if (k >= AHASH_INIT_SIZE) {
@@ -898,7 +1130,8 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext,
k++;
}
tmp->pos = k;
- set->ext_size -= ext_size(AHASH_INIT_SIZE, dsize);
+ t->hregion[r].ext_size -=
+ ext_size(AHASH_INIT_SIZE, dsize);
rcu_assign_pointer(hbucket(t, key), tmp);
kfree_rcu(n, rcu);
}
@@ -906,6 +1139,16 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext,
}
out:
+ spin_unlock_bh(&t->hregion[r].lock);
+ if (x) {
+ spin_lock_bh(&set->lock);
+ list_add(&x->list, &h->ad);
+ spin_unlock_bh(&set->lock);
+ }
+ if (atomic_dec_and_test(&t->uref) && atomic_read(&t->ref)) {
+ pr_debug("Table destroy after resize by del: %p\n", t);
+ mtype_ahash_destroy(set, t, false);
+ }
return ret;
}
@@ -991,6 +1234,7 @@ mtype_test(struct ip_set *set, void *value, const struct ip_set_ext *ext,
int i, ret = 0;
u32 key, multi = 0;
+ rcu_read_lock_bh();
t = rcu_dereference_bh(h->table);
#ifdef IP_SET_HASH_WITH_NETS
/* If we test an IP address and not a network address,
@@ -1022,6 +1266,7 @@ mtype_test(struct ip_set *set, void *value, const struct ip_set_ext *ext,
goto out;
}
out:
+ rcu_read_unlock_bh();
return ret;
}
@@ -1033,23 +1278,14 @@ mtype_head(struct ip_set *set, struct sk_buff *skb)
const struct htable *t;
struct nlattr *nested;
size_t memsize;
+ u32 elements = 0;
+ size_t ext_size = 0;
u8 htable_bits;
- /* If any members have expired, set->elements will be wrong
- * mytype_expire function will update it with the right count.
- * we do not hold set->lock here, so grab it first.
- * set->elements can still be incorrect in the case of a huge set,
- * because elements might time out during the listing.
- */
- if (SET_WITH_TIMEOUT(set)) {
- spin_lock_bh(&set->lock);
- mtype_expire(set, h);
- spin_unlock_bh(&set->lock);
- }
-
rcu_read_lock_bh();
- t = rcu_dereference_bh_nfnl(h->table);
- memsize = mtype_ahash_memsize(h, t) + set->ext_size;
+ t = rcu_dereference_bh(h->table);
+ mtype_ext_size(set, &elements, &ext_size);
+ memsize = mtype_ahash_memsize(h, t) + ext_size + set->ext_size;
htable_bits = t->htable_bits;
rcu_read_unlock_bh();
@@ -1071,7 +1307,7 @@ mtype_head(struct ip_set *set, struct sk_buff *skb)
#endif
if (nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref)) ||
nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize)) ||
- nla_put_net32(skb, IPSET_ATTR_ELEMENTS, htonl(set->elements)))
+ nla_put_net32(skb, IPSET_ATTR_ELEMENTS, htonl(elements)))
goto nla_put_failure;
if (unlikely(ip_set_put_flags(skb, set)))
goto nla_put_failure;
@@ -1091,15 +1327,15 @@ mtype_uref(struct ip_set *set, struct netlink_callback *cb, bool start)
if (start) {
rcu_read_lock_bh();
- t = rcu_dereference_bh_nfnl(h->table);
+ t = ipset_dereference_bh_nfnl(h->table);
atomic_inc(&t->uref);
cb->args[IPSET_CB_PRIVATE] = (unsigned long)t;
rcu_read_unlock_bh();
} else if (cb->args[IPSET_CB_PRIVATE]) {
t = (struct htable *)cb->args[IPSET_CB_PRIVATE];
if (atomic_dec_and_test(&t->uref) && atomic_read(&t->ref)) {
- /* Resizing didn't destroy the hash table */
- pr_debug("Table destroy by dump: %p\n", t);
+ pr_debug("Table destroy after resize "
+ " by dump: %p\n", t);
mtype_ahash_destroy(set, t, false);
}
cb->args[IPSET_CB_PRIVATE] = 0;
@@ -1141,8 +1377,7 @@ mtype_list(const struct ip_set *set,
if (!test_bit(i, n->used))
continue;
e = ahash_data(n, i, set->dsize);
- if (SET_WITH_TIMEOUT(set) &&
- ip_set_timeout_expired(ext_timeout(e, set)))
+ if (SET_ELEM_EXPIRED(set, e))
continue;
pr_debug("list hash %lu hbucket %p i %u, data %p\n",
cb->args[IPSET_CB_ARG0], n, i, e);
@@ -1208,6 +1443,7 @@ static const struct ip_set_type_variant mtype_variant = {
.uref = mtype_uref,
.resize = mtype_resize,
.same_set = mtype_same_set,
+ .region_lock = true,
};
#ifdef IP_SET_EMIT_CREATE
@@ -1226,6 +1462,7 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set,
size_t hsize;
struct htype *h;
struct htable *t;
+ u32 i;
pr_debug("Create set %s with family %s\n",
set->name, set->family == NFPROTO_IPV4 ? "inet" : "inet6");
@@ -1294,6 +1531,15 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set,
kfree(h);
return -ENOMEM;
}
+ t->hregion = ip_set_alloc(ahash_sizeof_regions(hbits));
+ if (!t->hregion) {
+ kfree(t);
+ kfree(h);
+ return -ENOMEM;
+ }
+ h->gc.set = set;
+ for (i = 0; i < ahash_numof_locks(hbits); i++)
+ spin_lock_init(&t->hregion[i].lock);
h->maxelem = maxelem;
#ifdef IP_SET_HASH_WITH_NETMASK
h->netmask = netmask;
@@ -1304,9 +1550,10 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set,
get_random_bytes(&h->initval, sizeof(h->initval));
t->htable_bits = hbits;
+ t->maxelem = h->maxelem / ahash_numof_locks(hbits);
RCU_INIT_POINTER(h->table, t);
- h->set = set;
+ INIT_LIST_HEAD(&h->ad);
set->data = h;
#ifndef IP_SET_PROTO_UNDEF
if (set->family == NFPROTO_IPV4) {
@@ -1329,12 +1576,10 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set,
#ifndef IP_SET_PROTO_UNDEF
if (set->family == NFPROTO_IPV4)
#endif
- IPSET_TOKEN(HTYPE, 4_gc_init)(set,
- IPSET_TOKEN(HTYPE, 4_gc));
+ IPSET_TOKEN(HTYPE, 4_gc_init)(&h->gc);
#ifndef IP_SET_PROTO_UNDEF
else
- IPSET_TOKEN(HTYPE, 6_gc_init)(set,
- IPSET_TOKEN(HTYPE, 6_gc));
+ IPSET_TOKEN(HTYPE, 6_gc_init)(&h->gc);
#endif
}
pr_debug("create %s hashsize %u (%u) maxelem %u: %p(%p)\n",
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index d130542..1927fc2 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -894,32 +894,175 @@ static void nf_ct_acct_merge(struct nf_conn *ct, enum ip_conntrack_info ctinfo,
}
}
-/* Resolve race on insertion if this protocol allows this. */
+static void __nf_conntrack_insert_prepare(struct nf_conn *ct)
+{
+ struct nf_conn_tstamp *tstamp;
+
+ atomic_inc(&ct->ct_general.use);
+ ct->status |= IPS_CONFIRMED;
+
+ /* set conntrack timestamp, if enabled. */
+ tstamp = nf_conn_tstamp_find(ct);
+ if (tstamp)
+ tstamp->start = ktime_get_real_ns();
+}
+
+static int __nf_ct_resolve_clash(struct sk_buff *skb,
+ struct nf_conntrack_tuple_hash *h)
+{
+ /* This is the conntrack entry already in hashes that won race. */
+ struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
+ enum ip_conntrack_info ctinfo;
+ struct nf_conn *loser_ct;
+
+ loser_ct = nf_ct_get(skb, &ctinfo);
+
+ if (nf_ct_is_dying(ct))
+ return NF_DROP;
+
+ if (!atomic_inc_not_zero(&ct->ct_general.use))
+ return NF_DROP;
+
+ if (((ct->status & IPS_NAT_DONE_MASK) == 0) ||
+ nf_ct_match(ct, loser_ct)) {
+ struct net *net = nf_ct_net(ct);
+
+ nf_ct_acct_merge(ct, ctinfo, loser_ct);
+ nf_ct_add_to_dying_list(loser_ct);
+ nf_conntrack_put(&loser_ct->ct_general);
+ nf_ct_set(skb, ct, ctinfo);
+
+ NF_CT_STAT_INC(net, insert_failed);
+ return NF_ACCEPT;
+ }
+
+ nf_ct_put(ct);
+ return NF_DROP;
+}
+
+/**
+ * nf_ct_resolve_clash_harder - attempt to insert clashing conntrack entry
+ *
+ * @skb: skb that causes the collision
+ * @repl_idx: hash slot for reply direction
+ *
+ * Called when origin or reply direction had a clash.
+ * The skb can be handled without packet drop provided the reply direction
+ * is unique or there the existing entry has the identical tuple in both
+ * directions.
+ *
+ * Caller must hold conntrack table locks to prevent concurrent updates.
+ *
+ * Returns NF_DROP if the clash could not be handled.
+ */
+static int nf_ct_resolve_clash_harder(struct sk_buff *skb, u32 repl_idx)
+{
+ struct nf_conn *loser_ct = (struct nf_conn *)skb_nfct(skb);
+ const struct nf_conntrack_zone *zone;
+ struct nf_conntrack_tuple_hash *h;
+ struct hlist_nulls_node *n;
+ struct net *net;
+
+ zone = nf_ct_zone(loser_ct);
+ net = nf_ct_net(loser_ct);
+
+ /* Reply direction must never result in a clash, unless both origin
+ * and reply tuples are identical.
+ */
+ hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[repl_idx], hnnode) {
+ if (nf_ct_key_equal(h,
+ &loser_ct->tuplehash[IP_CT_DIR_REPLY].tuple,
+ zone, net))
+ return __nf_ct_resolve_clash(skb, h);
+ }
+
+ /* We want the clashing entry to go away real soon: 1 second timeout. */
+ loser_ct->timeout = nfct_time_stamp + HZ;
+
+ /* IPS_NAT_CLASH removes the entry automatically on the first
+ * reply. Also prevents UDP tracker from moving the entry to
+ * ASSURED state, i.e. the entry can always be evicted under
+ * pressure.
+ */
+ loser_ct->status |= IPS_FIXED_TIMEOUT | IPS_NAT_CLASH;
+
+ __nf_conntrack_insert_prepare(loser_ct);
+
+ /* fake add for ORIGINAL dir: we want lookups to only find the entry
+ * already in the table. This also hides the clashing entry from
+ * ctnetlink iteration, i.e. conntrack -L won't show them.
+ */
+ hlist_nulls_add_fake(&loser_ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode);
+
+ hlist_nulls_add_head_rcu(&loser_ct->tuplehash[IP_CT_DIR_REPLY].hnnode,
+ &nf_conntrack_hash[repl_idx]);
+ return NF_ACCEPT;
+}
+
+/**
+ * nf_ct_resolve_clash - attempt to handle clash without packet drop
+ *
+ * @skb: skb that causes the clash
+ * @h: tuplehash of the clashing entry already in table
+ * @hash_reply: hash slot for reply direction
+ *
+ * A conntrack entry can be inserted to the connection tracking table
+ * if there is no existing entry with an identical tuple.
+ *
+ * If there is one, @skb (and the assocated, unconfirmed conntrack) has
+ * to be dropped. In case @skb is retransmitted, next conntrack lookup
+ * will find the already-existing entry.
+ *
+ * The major problem with such packet drop is the extra delay added by
+ * the packet loss -- it will take some time for a retransmit to occur
+ * (or the sender to time out when waiting for a reply).
+ *
+ * This function attempts to handle the situation without packet drop.
+ *
+ * If @skb has no NAT transformation or if the colliding entries are
+ * exactly the same, only the to-be-confirmed conntrack entry is discarded
+ * and @skb is associated with the conntrack entry already in the table.
+ *
+ * Failing that, the new, unconfirmed conntrack is still added to the table
+ * provided that the collision only occurs in the ORIGINAL direction.
+ * The new entry will be added after the existing one in the hash list,
+ * so packets in the ORIGINAL direction will continue to match the existing
+ * entry. The new entry will also have a fixed timeout so it expires --
+ * due to the collision, it will not see bidirectional traffic.
+ *
+ * Returns NF_DROP if the clash could not be resolved.
+ */
static __cold noinline int
-nf_ct_resolve_clash(struct net *net, struct sk_buff *skb,
- enum ip_conntrack_info ctinfo,
- struct nf_conntrack_tuple_hash *h)
+nf_ct_resolve_clash(struct sk_buff *skb, struct nf_conntrack_tuple_hash *h,
+ u32 reply_hash)
{
/* This is the conntrack entry already in hashes that won race. */
struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
const struct nf_conntrack_l4proto *l4proto;
- enum ip_conntrack_info oldinfo;
- struct nf_conn *loser_ct = nf_ct_get(skb, &oldinfo);
+ enum ip_conntrack_info ctinfo;
+ struct nf_conn *loser_ct;
+ struct net *net;
+ int ret;
+
+ loser_ct = nf_ct_get(skb, &ctinfo);
+ net = nf_ct_net(loser_ct);
l4proto = nf_ct_l4proto_find(nf_ct_protonum(ct));
- if (l4proto->allow_clash &&
- !nf_ct_is_dying(ct) &&
- atomic_inc_not_zero(&ct->ct_general.use)) {
- if (((ct->status & IPS_NAT_DONE_MASK) == 0) ||
- nf_ct_match(ct, loser_ct)) {
- nf_ct_acct_merge(ct, ctinfo, loser_ct);
- nf_conntrack_put(&loser_ct->ct_general);
- nf_ct_set(skb, ct, oldinfo);
- return NF_ACCEPT;
- }
- nf_ct_put(ct);
- }
+ if (!l4proto->allow_clash)
+ goto drop;
+
+ ret = __nf_ct_resolve_clash(skb, h);
+ if (ret == NF_ACCEPT)
+ return ret;
+
+ ret = nf_ct_resolve_clash_harder(skb, reply_hash);
+ if (ret == NF_ACCEPT)
+ return ret;
+
+drop:
+ nf_ct_add_to_dying_list(loser_ct);
NF_CT_STAT_INC(net, drop);
+ NF_CT_STAT_INC(net, insert_failed);
return NF_DROP;
}
@@ -932,7 +1075,6 @@ __nf_conntrack_confirm(struct sk_buff *skb)
struct nf_conntrack_tuple_hash *h;
struct nf_conn *ct;
struct nf_conn_help *help;
- struct nf_conn_tstamp *tstamp;
struct hlist_nulls_node *n;
enum ip_conntrack_info ctinfo;
struct net *net;
@@ -989,6 +1131,7 @@ __nf_conntrack_confirm(struct sk_buff *skb)
if (unlikely(nf_ct_is_dying(ct))) {
nf_ct_add_to_dying_list(ct);
+ NF_CT_STAT_INC(net, insert_failed);
goto dying;
}
@@ -1009,13 +1152,8 @@ __nf_conntrack_confirm(struct sk_buff *skb)
setting time, otherwise we'd get timer wrap in
weird delay cases. */
ct->timeout += nfct_time_stamp;
- atomic_inc(&ct->ct_general.use);
- ct->status |= IPS_CONFIRMED;
- /* set conntrack timestamp, if enabled. */
- tstamp = nf_conn_tstamp_find(ct);
- if (tstamp)
- tstamp->start = ktime_get_real_ns();
+ __nf_conntrack_insert_prepare(ct);
/* Since the lookup is lockless, hash insertion must be done after
* starting the timer and setting the CONFIRMED bit. The RCU barriers
@@ -1035,11 +1173,9 @@ __nf_conntrack_confirm(struct sk_buff *skb)
return NF_ACCEPT;
out:
- nf_ct_add_to_dying_list(ct);
- ret = nf_ct_resolve_clash(net, skb, ctinfo, h);
+ ret = nf_ct_resolve_clash(skb, h, reply_hash);
dying:
nf_conntrack_double_unlock(hash, reply_hash);
- NF_CT_STAT_INC(net, insert_failed);
local_bh_enable();
return ret;
}
diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c
index 7365b43..760ca24 100644
--- a/net/netfilter/nf_conntrack_proto_udp.c
+++ b/net/netfilter/nf_conntrack_proto_udp.c
@@ -81,6 +81,18 @@ static bool udp_error(struct sk_buff *skb,
return false;
}
+static void nf_conntrack_udp_refresh_unreplied(struct nf_conn *ct,
+ struct sk_buff *skb,
+ enum ip_conntrack_info ctinfo,
+ u32 extra_jiffies)
+{
+ if (unlikely(ctinfo == IP_CT_ESTABLISHED_REPLY &&
+ ct->status & IPS_NAT_CLASH))
+ nf_ct_kill(ct);
+ else
+ nf_ct_refresh_acct(ct, ctinfo, skb, extra_jiffies);
+}
+
/* Returns verdict for packet, and may modify conntracktype */
int nf_conntrack_udp_packet(struct nf_conn *ct,
struct sk_buff *skb,
@@ -116,8 +128,8 @@ int nf_conntrack_udp_packet(struct nf_conn *ct,
if (!test_and_set_bit(IPS_ASSURED_BIT, &ct->status))
nf_conntrack_event_cache(IPCT_ASSURED, ct);
} else {
- nf_ct_refresh_acct(ct, ctinfo, skb,
- timeouts[UDP_CT_UNREPLIED]);
+ nf_conntrack_udp_refresh_unreplied(ct, skb, ctinfo,
+ timeouts[UDP_CT_UNREPLIED]);
}
return NF_ACCEPT;
}
@@ -198,8 +210,8 @@ int nf_conntrack_udplite_packet(struct nf_conn *ct,
if (!test_and_set_bit(IPS_ASSURED_BIT, &ct->status))
nf_conntrack_event_cache(IPCT_ASSURED, ct);
} else {
- nf_ct_refresh_acct(ct, ctinfo, skb,
- timeouts[UDP_CT_UNREPLIED]);
+ nf_conntrack_udp_refresh_unreplied(ct, skb, ctinfo,
+ timeouts[UDP_CT_UNREPLIED]);
}
return NF_ACCEPT;
}
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 410809c..4912069 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -411,7 +411,7 @@ static void *ct_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos)
*pos = cpu + 1;
return per_cpu_ptr(net->ct.stat, cpu);
}
-
+ (*pos)++;
return NULL;
}
diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
index 8af28e1..70ebeba 100644
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -554,6 +554,9 @@ void nf_flow_table_free(struct nf_flowtable *flow_table)
nf_flow_table_iterate(flow_table, nf_flow_table_do_cleanup, NULL);
nf_flow_table_iterate(flow_table, nf_flow_offload_gc_step, flow_table);
nf_flow_table_offload_flush(flow_table);
+ if (nf_flowtable_hw_offload(flow_table))
+ nf_flow_table_iterate(flow_table, nf_flow_offload_gc_step,
+ flow_table);
rhashtable_destroy(&flow_table->rhashtable);
}
EXPORT_SYMBOL_GPL(nf_flow_table_free);
diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c
index 9e563fd..ba775ae 100644
--- a/net/netfilter/nf_flow_table_ip.c
+++ b/net/netfilter/nf_flow_table_ip.c
@@ -146,11 +146,13 @@ static int nf_flow_nat_ip(const struct flow_offload *flow, struct sk_buff *skb,
if (test_bit(NF_FLOW_SNAT, &flow->flags) &&
(nf_flow_snat_port(flow, skb, thoff, iph->protocol, dir) < 0 ||
- nf_flow_snat_ip(flow, skb, iph, thoff, dir) < 0))
+ nf_flow_snat_ip(flow, skb, ip_hdr(skb), thoff, dir) < 0))
return -1;
+
+ iph = ip_hdr(skb);
if (test_bit(NF_FLOW_DNAT, &flow->flags) &&
(nf_flow_dnat_port(flow, skb, thoff, iph->protocol, dir) < 0 ||
- nf_flow_dnat_ip(flow, skb, iph, thoff, dir) < 0))
+ nf_flow_dnat_ip(flow, skb, ip_hdr(skb), thoff, dir) < 0))
return -1;
return 0;
@@ -189,6 +191,7 @@ static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev,
if (!pskb_may_pull(skb, thoff + sizeof(*ports)))
return -1;
+ iph = ip_hdr(skb);
ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
tuple->src_v4.s_addr = iph->saddr;
@@ -426,11 +429,13 @@ static int nf_flow_nat_ipv6(const struct flow_offload *flow,
if (test_bit(NF_FLOW_SNAT, &flow->flags) &&
(nf_flow_snat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 ||
- nf_flow_snat_ipv6(flow, skb, ip6h, thoff, dir) < 0))
+ nf_flow_snat_ipv6(flow, skb, ipv6_hdr(skb), thoff, dir) < 0))
return -1;
+
+ ip6h = ipv6_hdr(skb);
if (test_bit(NF_FLOW_DNAT, &flow->flags) &&
(nf_flow_dnat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 ||
- nf_flow_dnat_ipv6(flow, skb, ip6h, thoff, dir) < 0))
+ nf_flow_dnat_ipv6(flow, skb, ipv6_hdr(skb), thoff, dir) < 0))
return -1;
return 0;
@@ -459,6 +464,7 @@ static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev,
if (!pskb_may_pull(skb, thoff + sizeof(*ports)))
return -1;
+ ip6h = ipv6_hdr(skb);
ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
tuple->src_v6 = ip6h->saddr;
diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c
index 83e1db3..f2c22c6 100644
--- a/net/netfilter/nf_flow_table_offload.c
+++ b/net/netfilter/nf_flow_table_offload.c
@@ -87,6 +87,7 @@ static int nf_flow_rule_match(struct nf_flow_match *match,
default:
return -EOPNOTSUPP;
}
+ mask->control.addr_type = 0xffff;
match->dissector.used_keys |= BIT(key->control.addr_type);
mask->basic.n_proto = 0xffff;
@@ -847,9 +848,6 @@ static int nf_flow_table_offload_cmd(struct flow_block_offload *bo,
{
int err;
- if (!nf_flowtable_hw_offload(flowtable))
- return 0;
-
if (!dev->netdev_ops->ndo_setup_tc)
return -EOPNOTSUPP;
@@ -876,6 +874,9 @@ int nf_flow_table_offload_setup(struct nf_flowtable *flowtable,
struct flow_block_offload bo;
int err;
+ if (!nf_flowtable_hw_offload(flowtable))
+ return 0;
+
err = nf_flow_table_offload_cmd(&bo, flowtable, dev, cmd, &extack);
if (err < 0)
return err;
diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c
index b0930d4a..b9cbe1e 100644
--- a/net/netfilter/nf_synproxy_core.c
+++ b/net/netfilter/nf_synproxy_core.c
@@ -267,7 +267,7 @@ static void *synproxy_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos)
*pos = cpu + 1;
return per_cpu_ptr(snet->stats, cpu);
}
-
+ (*pos)++;
return NULL;
}
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index d1318bd..d11f1a7 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -1405,6 +1405,11 @@ static int nf_tables_fill_chain_info(struct sk_buff *skb, struct net *net,
lockdep_commit_lock_is_held(net));
if (nft_dump_stats(skb, stats))
goto nla_put_failure;
+
+ if ((chain->flags & NFT_CHAIN_HW_OFFLOAD) &&
+ nla_put_be32(skb, NFTA_CHAIN_FLAGS,
+ htonl(NFT_CHAIN_HW_OFFLOAD)))
+ goto nla_put_failure;
}
if (nla_put_be32(skb, NFTA_CHAIN_USE, htonl(chain->use)))
@@ -5077,6 +5082,11 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
err = -EBUSY;
else if (!(nlmsg_flags & NLM_F_EXCL))
err = 0;
+ } else if (err == -ENOTEMPTY) {
+ /* ENOTEMPTY reports overlapping between this element
+ * and an existing one.
+ */
+ err = -EEXIST;
}
goto err_element_clash;
}
@@ -6300,8 +6310,13 @@ static int nf_tables_newflowtable(struct net *net, struct sock *nlsk,
goto err4;
err = nft_register_flowtable_net_hooks(ctx.net, table, flowtable);
- if (err < 0)
+ if (err < 0) {
+ list_for_each_entry_safe(hook, next, &flowtable->hook_list, list) {
+ list_del_rcu(&hook->list);
+ kfree_rcu(hook, rcu);
+ }
goto err4;
+ }
err = nft_trans_flowtable_add(&ctx, NFT_MSG_NEWFLOWTABLE, flowtable);
if (err < 0)
@@ -7378,13 +7393,8 @@ static void nf_tables_module_autoload(struct net *net)
list_splice_init(&net->nft.module_list, &module_list);
mutex_unlock(&net->nft.commit_mutex);
list_for_each_entry_safe(req, next, &module_list, list) {
- if (req->done) {
- list_del(&req->list);
- kfree(req);
- } else {
- request_module("%s", req->module);
- req->done = true;
- }
+ request_module("%s", req->module);
+ req->done = true;
}
mutex_lock(&net->nft.commit_mutex);
list_splice(&module_list, &net->nft.module_list);
@@ -8167,6 +8177,7 @@ static void __net_exit nf_tables_exit_net(struct net *net)
__nft_release_tables(net);
mutex_unlock(&net->nft.commit_mutex);
WARN_ON_ONCE(!list_empty(&net->nft.tables));
+ WARN_ON_ONCE(!list_empty(&net->nft.module_list));
}
static struct pernet_operations nf_tables_net_ops = {
diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c
index de3a959..a5f294a 100644
--- a/net/netfilter/nfnetlink_cthelper.c
+++ b/net/netfilter/nfnetlink_cthelper.c
@@ -742,6 +742,8 @@ static const struct nla_policy nfnl_cthelper_policy[NFCTH_MAX+1] = {
[NFCTH_NAME] = { .type = NLA_NUL_STRING,
.len = NF_CT_HELPER_NAME_LEN-1 },
[NFCTH_QUEUE_NUM] = { .type = NLA_U32, },
+ [NFCTH_PRIV_DATA_LEN] = { .type = NLA_U32, },
+ [NFCTH_STATUS] = { .type = NLA_U32, },
};
static const struct nfnl_callback nfnl_cthelper_cb[NFNL_MSG_CTHELPER_MAX] = {
diff --git a/net/netfilter/nft_chain_nat.c b/net/netfilter/nft_chain_nat.c
index ff9ac8a..eac4a90 100644
--- a/net/netfilter/nft_chain_nat.c
+++ b/net/netfilter/nft_chain_nat.c
@@ -89,6 +89,7 @@ static const struct nft_chain_type nft_chain_nat_inet = {
.name = "nat",
.type = NFT_CHAIN_T_NAT,
.family = NFPROTO_INET,
+ .owner = THIS_MODULE,
.hook_mask = (1 << NF_INET_PRE_ROUTING) |
(1 << NF_INET_LOCAL_IN) |
(1 << NF_INET_LOCAL_OUT) |
diff --git a/net/netfilter/nft_fwd_netdev.c b/net/netfilter/nft_fwd_netdev.c
index aba11c2..3087e23 100644
--- a/net/netfilter/nft_fwd_netdev.c
+++ b/net/netfilter/nft_fwd_netdev.c
@@ -28,6 +28,9 @@ static void nft_fwd_netdev_eval(const struct nft_expr *expr,
struct nft_fwd_netdev *priv = nft_expr_priv(expr);
int oif = regs->data[priv->sreg_dev];
+ /* This is used by ifb only. */
+ skb_set_redirected(pkt->skb, true);
+
nf_fwd_netdev_egress(pkt, oif);
regs->verdict.code = NF_STOLEN;
}
@@ -190,6 +193,13 @@ static int nft_fwd_neigh_dump(struct sk_buff *skb, const struct nft_expr *expr)
return -1;
}
+static int nft_fwd_validate(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nft_data **data)
+{
+ return nft_chain_validate_hooks(ctx->chain, (1 << NF_NETDEV_INGRESS));
+}
+
static struct nft_expr_type nft_fwd_netdev_type;
static const struct nft_expr_ops nft_fwd_neigh_netdev_ops = {
.type = &nft_fwd_netdev_type,
@@ -197,6 +207,7 @@ static const struct nft_expr_ops nft_fwd_neigh_netdev_ops = {
.eval = nft_fwd_neigh_eval,
.init = nft_fwd_neigh_init,
.dump = nft_fwd_neigh_dump,
+ .validate = nft_fwd_validate,
};
static const struct nft_expr_ops nft_fwd_netdev_ops = {
@@ -205,6 +216,7 @@ static const struct nft_expr_ops nft_fwd_netdev_ops = {
.eval = nft_fwd_netdev_eval,
.init = nft_fwd_netdev_init,
.dump = nft_fwd_netdev_dump,
+ .validate = nft_fwd_validate,
.offload = nft_fwd_netdev_offload,
};
diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c
index 1993af3..a7de3a5 100644
--- a/net/netfilter/nft_payload.c
+++ b/net/netfilter/nft_payload.c
@@ -129,6 +129,7 @@ static const struct nla_policy nft_payload_policy[NFTA_PAYLOAD_MAX + 1] = {
[NFTA_PAYLOAD_LEN] = { .type = NLA_U32 },
[NFTA_PAYLOAD_CSUM_TYPE] = { .type = NLA_U32 },
[NFTA_PAYLOAD_CSUM_OFFSET] = { .type = NLA_U32 },
+ [NFTA_PAYLOAD_CSUM_FLAGS] = { .type = NLA_U32 },
};
static int nft_payload_init(const struct nft_ctx *ctx,
diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c
index f0cb1e1..ef7e8ad 100644
--- a/net/netfilter/nft_set_pipapo.c
+++ b/net/netfilter/nft_set_pipapo.c
@@ -203,7 +203,7 @@
* ::
*
* rule indices in last field: 0 1
- * map to elements: 0x42 0x66
+ * map to elements: 0x66 0x42
*
*
* Matching
@@ -298,7 +298,7 @@
* ::
*
* rule indices in last field: 0 1
- * map to elements: 0x42 0x66
+ * map to elements: 0x66 0x42
*
* the matching element is at 0x42.
*
@@ -503,7 +503,7 @@ static int pipapo_refill(unsigned long *map, int len, int rules,
return -1;
}
- if (unlikely(match_only)) {
+ if (match_only) {
bitmap_clear(map, i, 1);
return i;
}
@@ -1098,21 +1098,41 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set,
struct nft_pipapo_field *f;
int i, bsize_max, err = 0;
+ if (nft_set_ext_exists(ext, NFT_SET_EXT_KEY_END))
+ end = (const u8 *)nft_set_ext_key_end(ext)->data;
+ else
+ end = start;
+
dup = pipapo_get(net, set, start, genmask);
- if (PTR_ERR(dup) == -ENOENT) {
- if (nft_set_ext_exists(ext, NFT_SET_EXT_KEY_END)) {
- end = (const u8 *)nft_set_ext_key_end(ext)->data;
- dup = pipapo_get(net, set, end, nft_genmask_next(net));
- } else {
- end = start;
+ if (!IS_ERR(dup)) {
+ /* Check if we already have the same exact entry */
+ const struct nft_data *dup_key, *dup_end;
+
+ dup_key = nft_set_ext_key(&dup->ext);
+ if (nft_set_ext_exists(&dup->ext, NFT_SET_EXT_KEY_END))
+ dup_end = nft_set_ext_key_end(&dup->ext);
+ else
+ dup_end = dup_key;
+
+ if (!memcmp(start, dup_key->data, sizeof(*dup_key->data)) &&
+ !memcmp(end, dup_end->data, sizeof(*dup_end->data))) {
+ *ext2 = &dup->ext;
+ return -EEXIST;
}
+
+ return -ENOTEMPTY;
+ }
+
+ if (PTR_ERR(dup) == -ENOENT) {
+ /* Look for partially overlapping entries */
+ dup = pipapo_get(net, set, end, nft_genmask_next(net));
}
if (PTR_ERR(dup) != -ENOENT) {
if (IS_ERR(dup))
return PTR_ERR(dup);
*ext2 = &dup->ext;
- return -EEXIST;
+ return -ENOTEMPTY;
}
/* Validate */
@@ -1766,11 +1786,13 @@ static bool pipapo_match_field(struct nft_pipapo_field *f,
static void nft_pipapo_remove(const struct net *net, const struct nft_set *set,
const struct nft_set_elem *elem)
{
- const u8 *data = (const u8 *)elem->key.val.data;
struct nft_pipapo *priv = nft_set_priv(set);
struct nft_pipapo_match *m = priv->clone;
+ struct nft_pipapo_elem *e = elem->priv;
int rules_f0, first_rule = 0;
- struct nft_pipapo_elem *e;
+ const u8 *data;
+
+ data = (const u8 *)nft_set_ext_key(&e->ext);
e = pipapo_get(net, set, data, 0);
if (IS_ERR(e))
diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c
index 5000b93..8617fc1 100644
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -33,6 +33,11 @@ static bool nft_rbtree_interval_end(const struct nft_rbtree_elem *rbe)
(*nft_set_ext_flags(&rbe->ext) & NFT_SET_ELEM_INTERVAL_END);
}
+static bool nft_rbtree_interval_start(const struct nft_rbtree_elem *rbe)
+{
+ return !nft_rbtree_interval_end(rbe);
+}
+
static bool nft_rbtree_equal(const struct nft_set *set, const void *this,
const struct nft_rbtree_elem *interval)
{
@@ -64,7 +69,7 @@ static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set
if (interval &&
nft_rbtree_equal(set, this, interval) &&
nft_rbtree_interval_end(rbe) &&
- !nft_rbtree_interval_end(interval))
+ nft_rbtree_interval_start(interval))
continue;
interval = rbe;
} else if (d > 0)
@@ -89,7 +94,7 @@ static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set
if (set->flags & NFT_SET_INTERVAL && interval != NULL &&
nft_set_elem_active(&interval->ext, genmask) &&
- !nft_rbtree_interval_end(interval)) {
+ nft_rbtree_interval_start(interval)) {
*ext = &interval->ext;
return true;
}
@@ -208,8 +213,43 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
u8 genmask = nft_genmask_next(net);
struct nft_rbtree_elem *rbe;
struct rb_node *parent, **p;
+ bool overlap = false;
int d;
+ /* Detect overlaps as we descend the tree. Set the flag in these cases:
+ *
+ * a1. |__ _ _? >|__ _ _ (insert start after existing start)
+ * a2. _ _ __>| ?_ _ __| (insert end before existing end)
+ * a3. _ _ ___| ?_ _ _>| (insert end after existing end)
+ * a4. >|__ _ _ _ _ __| (insert start before existing end)
+ *
+ * and clear it later on, as we eventually reach the points indicated by
+ * '?' above, in the cases described below. We'll always meet these
+ * later, locally, due to tree ordering, and overlaps for the intervals
+ * that are the closest together are always evaluated last.
+ *
+ * b1. |__ _ _! >|__ _ _ (insert start after existing end)
+ * b2. _ _ __>| !_ _ __| (insert end before existing start)
+ * b3. !_____>| (insert end after existing start)
+ *
+ * Case a4. resolves to b1.:
+ * - if the inserted start element is the leftmost, because the '0'
+ * element in the tree serves as end element
+ * - otherwise, if an existing end is found. Note that end elements are
+ * always inserted after corresponding start elements.
+ *
+ * For a new, rightmost pair of elements, we'll hit cases b1. and b3.,
+ * in that order.
+ *
+ * The flag is also cleared in two special cases:
+ *
+ * b4. |__ _ _!|<_ _ _ (insert start right before existing end)
+ * b5. |__ _ >|!__ _ _ (insert end right after existing start)
+ *
+ * which always happen as last step and imply that no further
+ * overlapping is possible.
+ */
+
parent = NULL;
p = &priv->root.rb_node;
while (*p != NULL) {
@@ -218,17 +258,42 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
d = memcmp(nft_set_ext_key(&rbe->ext),
nft_set_ext_key(&new->ext),
set->klen);
- if (d < 0)
+ if (d < 0) {
p = &parent->rb_left;
- else if (d > 0)
+
+ if (nft_rbtree_interval_start(new)) {
+ overlap = nft_rbtree_interval_start(rbe) &&
+ nft_set_elem_active(&rbe->ext,
+ genmask);
+ } else {
+ overlap = nft_rbtree_interval_end(rbe) &&
+ nft_set_elem_active(&rbe->ext,
+ genmask);
+ }
+ } else if (d > 0) {
p = &parent->rb_right;
- else {
+
+ if (nft_rbtree_interval_end(new)) {
+ overlap = nft_rbtree_interval_end(rbe) &&
+ nft_set_elem_active(&rbe->ext,
+ genmask);
+ } else if (nft_rbtree_interval_end(rbe) &&
+ nft_set_elem_active(&rbe->ext, genmask)) {
+ overlap = true;
+ }
+ } else {
if (nft_rbtree_interval_end(rbe) &&
- !nft_rbtree_interval_end(new)) {
+ nft_rbtree_interval_start(new)) {
p = &parent->rb_left;
- } else if (!nft_rbtree_interval_end(rbe) &&
+
+ if (nft_set_elem_active(&rbe->ext, genmask))
+ overlap = false;
+ } else if (nft_rbtree_interval_start(rbe) &&
nft_rbtree_interval_end(new)) {
p = &parent->rb_right;
+
+ if (nft_set_elem_active(&rbe->ext, genmask))
+ overlap = false;
} else if (nft_set_elem_active(&rbe->ext, genmask)) {
*ext = &rbe->ext;
return -EEXIST;
@@ -237,6 +302,10 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
}
}
}
+
+ if (overlap)
+ return -ENOTEMPTY;
+
rb_link_node_rcu(&new->node, parent, p);
rb_insert_color(&new->node, &priv->root);
return 0;
@@ -317,10 +386,10 @@ static void *nft_rbtree_deactivate(const struct net *net,
parent = parent->rb_right;
else {
if (nft_rbtree_interval_end(rbe) &&
- !nft_rbtree_interval_end(this)) {
+ nft_rbtree_interval_start(this)) {
parent = parent->rb_left;
continue;
- } else if (!nft_rbtree_interval_end(rbe) &&
+ } else if (nft_rbtree_interval_start(rbe) &&
nft_rbtree_interval_end(this)) {
parent = parent->rb_right;
continue;
diff --git a/net/netfilter/nft_tunnel.c b/net/netfilter/nft_tunnel.c
index 4c3f2e24..764e886 100644
--- a/net/netfilter/nft_tunnel.c
+++ b/net/netfilter/nft_tunnel.c
@@ -339,6 +339,8 @@ static const struct nla_policy nft_tunnel_key_policy[NFTA_TUNNEL_KEY_MAX + 1] =
[NFTA_TUNNEL_KEY_FLAGS] = { .type = NLA_U32, },
[NFTA_TUNNEL_KEY_TOS] = { .type = NLA_U8, },
[NFTA_TUNNEL_KEY_TTL] = { .type = NLA_U8, },
+ [NFTA_TUNNEL_KEY_SPORT] = { .type = NLA_U16, },
+ [NFTA_TUNNEL_KEY_DPORT] = { .type = NLA_U16, },
[NFTA_TUNNEL_KEY_OPTS] = { .type = NLA_NESTED, },
};
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index e27c6c5..cd2b034 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -1551,6 +1551,9 @@ static void *xt_mttg_seq_next(struct seq_file *seq, void *v, loff_t *ppos,
uint8_t nfproto = (unsigned long)PDE_DATA(file_inode(seq->file));
struct nf_mttg_trav *trav = seq->private;
+ if (ppos != NULL)
+ ++(*ppos);
+
switch (trav->class) {
case MTTG_TRAV_INIT:
trav->class = MTTG_TRAV_NFP_UNSPEC;
@@ -1576,9 +1579,6 @@ static void *xt_mttg_seq_next(struct seq_file *seq, void *v, loff_t *ppos,
default:
return NULL;
}
-
- if (ppos != NULL)
- ++*ppos;
return trav;
}
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index bccd47c..8c835ad 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -36,6 +36,7 @@
#include <linux/netfilter_ipv6/ip6_tables.h>
#include <linux/mutex.h>
#include <linux/kernel.h>
+#include <linux/refcount.h>
#include <uapi/linux/netfilter/xt_hashlimit.h>
#define XT_HASHLIMIT_ALL (XT_HASHLIMIT_HASH_DIP | XT_HASHLIMIT_HASH_DPT | \
@@ -114,7 +115,7 @@ struct dsthash_ent {
struct xt_hashlimit_htable {
struct hlist_node node; /* global list of all htables */
- int use;
+ refcount_t use;
u_int8_t family;
bool rnd_initialized;
@@ -315,7 +316,7 @@ static int htable_create(struct net *net, struct hashlimit_cfg3 *cfg,
for (i = 0; i < hinfo->cfg.size; i++)
INIT_HLIST_HEAD(&hinfo->hash[i]);
- hinfo->use = 1;
+ refcount_set(&hinfo->use, 1);
hinfo->count = 0;
hinfo->family = family;
hinfo->rnd_initialized = false;
@@ -401,15 +402,6 @@ static void htable_remove_proc_entry(struct xt_hashlimit_htable *hinfo)
remove_proc_entry(hinfo->name, parent);
}
-static void htable_destroy(struct xt_hashlimit_htable *hinfo)
-{
- cancel_delayed_work_sync(&hinfo->gc_work);
- htable_remove_proc_entry(hinfo);
- htable_selective_cleanup(hinfo, true);
- kfree(hinfo->name);
- vfree(hinfo);
-}
-
static struct xt_hashlimit_htable *htable_find_get(struct net *net,
const char *name,
u_int8_t family)
@@ -420,7 +412,7 @@ static struct xt_hashlimit_htable *htable_find_get(struct net *net,
hlist_for_each_entry(hinfo, &hashlimit_net->htables, node) {
if (!strcmp(name, hinfo->name) &&
hinfo->family == family) {
- hinfo->use++;
+ refcount_inc(&hinfo->use);
return hinfo;
}
}
@@ -429,12 +421,16 @@ static struct xt_hashlimit_htable *htable_find_get(struct net *net,
static void htable_put(struct xt_hashlimit_htable *hinfo)
{
- mutex_lock(&hashlimit_mutex);
- if (--hinfo->use == 0) {
+ if (refcount_dec_and_mutex_lock(&hinfo->use, &hashlimit_mutex)) {
hlist_del(&hinfo->node);
- htable_destroy(hinfo);
+ htable_remove_proc_entry(hinfo);
+ mutex_unlock(&hashlimit_mutex);
+
+ cancel_delayed_work_sync(&hinfo->gc_work);
+ htable_selective_cleanup(hinfo, true);
+ kfree(hinfo->name);
+ vfree(hinfo);
}
- mutex_unlock(&hashlimit_mutex);
}
/* The algorithm used is the Simple Token Bucket Filter (TBF)
@@ -837,6 +833,8 @@ hashlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
return hashlimit_mt_common(skb, par, hinfo, &info->cfg, 3);
}
+#define HASHLIMIT_MAX_SIZE 1048576
+
static int hashlimit_mt_check_common(const struct xt_mtchk_param *par,
struct xt_hashlimit_htable **hinfo,
struct hashlimit_cfg3 *cfg,
@@ -847,6 +845,14 @@ static int hashlimit_mt_check_common(const struct xt_mtchk_param *par,
if (cfg->gc_interval == 0 || cfg->expire == 0)
return -EINVAL;
+ if (cfg->size > HASHLIMIT_MAX_SIZE) {
+ cfg->size = HASHLIMIT_MAX_SIZE;
+ pr_info_ratelimited("size too large, truncated to %u\n", cfg->size);
+ }
+ if (cfg->max > HASHLIMIT_MAX_SIZE) {
+ cfg->max = HASHLIMIT_MAX_SIZE;
+ pr_info_ratelimited("max too large, truncated to %u\n", cfg->max);
+ }
if (par->family == NFPROTO_IPV4) {
if (cfg->srcmask > 32 || cfg->dstmask > 32)
return -EINVAL;
diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
index 0a97080..225a7ab 100644
--- a/net/netfilter/xt_recent.c
+++ b/net/netfilter/xt_recent.c
@@ -492,12 +492,12 @@ static void *recent_seq_next(struct seq_file *seq, void *v, loff_t *pos)
const struct recent_entry *e = v;
const struct list_head *head = e->list.next;
+ (*pos)++;
while (head == &t->iphash[st->bucket]) {
if (++st->bucket >= ip_list_hash_size)
return NULL;
head = t->iphash[st->bucket].next;
}
- (*pos)++;
return list_entry(head, struct recent_entry, list);
}
diff --git a/net/netlabel/netlabel_domainhash.c b/net/netlabel/netlabel_domainhash.c
index f5d34da..a1f2320 100644
--- a/net/netlabel/netlabel_domainhash.c
+++ b/net/netlabel/netlabel_domainhash.c
@@ -143,7 +143,8 @@ static struct netlbl_dom_map *netlbl_domhsh_search(const char *domain,
if (domain != NULL) {
bkt = netlbl_domhsh_hash(domain);
bkt_list = &netlbl_domhsh_rcu_deref(netlbl_domhsh)->tbl[bkt];
- list_for_each_entry_rcu(iter, bkt_list, list)
+ list_for_each_entry_rcu(iter, bkt_list, list,
+ lockdep_is_held(&netlbl_domhsh_lock))
if (iter->valid &&
netlbl_family_match(iter->family, family) &&
strcmp(iter->domain, domain) == 0)
diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c
index d2e4ab8..77bb1bb 100644
--- a/net/netlabel/netlabel_unlabeled.c
+++ b/net/netlabel/netlabel_unlabeled.c
@@ -207,7 +207,8 @@ static struct netlbl_unlhsh_iface *netlbl_unlhsh_search_iface(int ifindex)
bkt = netlbl_unlhsh_hash(ifindex);
bkt_list = &netlbl_unlhsh_rcu_deref(netlbl_unlhsh)->tbl[bkt];
- list_for_each_entry_rcu(iter, bkt_list, list)
+ list_for_each_entry_rcu(iter, bkt_list, list,
+ lockdep_is_held(&netlbl_unlhsh_lock))
if (iter->valid && iter->ifindex == ifindex)
return iter;
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 4e31721..2f23479 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1014,7 +1014,8 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr,
if (nlk->netlink_bind && groups) {
int group;
- for (group = 0; group < nlk->ngroups; group++) {
+ /* nl_groups is a u32, so cap the maximum groups we can bind */
+ for (group = 0; group < BITS_PER_TYPE(u32); group++) {
if (!test_bit(group, &groups))
continue;
err = nlk->netlink_bind(net, group + 1);
@@ -1033,7 +1034,7 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr,
netlink_insert(sk, nladdr->nl_pid) :
netlink_autobind(sock);
if (err) {
- netlink_undo_bind(nlk->ngroups, groups, sk);
+ netlink_undo_bind(BITS_PER_TYPE(u32), groups, sk);
goto unlock;
}
}
@@ -2391,19 +2392,14 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err,
if (nlk_has_extack && extack && extack->_msg)
tlvlen += nla_total_size(strlen(extack->_msg) + 1);
- if (err) {
- if (!(nlk->flags & NETLINK_F_CAP_ACK))
- payload += nlmsg_len(nlh);
- else
- flags |= NLM_F_CAPPED;
- if (nlk_has_extack && extack && extack->bad_attr)
- tlvlen += nla_total_size(sizeof(u32));
- } else {
+ if (err && !(nlk->flags & NETLINK_F_CAP_ACK))
+ payload += nlmsg_len(nlh);
+ else
flags |= NLM_F_CAPPED;
-
- if (nlk_has_extack && extack && extack->cookie_len)
- tlvlen += nla_total_size(extack->cookie_len);
- }
+ if (err && nlk_has_extack && extack && extack->bad_attr)
+ tlvlen += nla_total_size(sizeof(u32));
+ if (nlk_has_extack && extack && extack->cookie_len)
+ tlvlen += nla_total_size(extack->cookie_len);
if (tlvlen)
flags |= NLM_F_ACK_TLVS;
@@ -2426,20 +2422,16 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err,
WARN_ON(nla_put_string(skb, NLMSGERR_ATTR_MSG,
extack->_msg));
}
- if (err) {
- if (extack->bad_attr &&
- !WARN_ON((u8 *)extack->bad_attr < in_skb->data ||
- (u8 *)extack->bad_attr >= in_skb->data +
- in_skb->len))
- WARN_ON(nla_put_u32(skb, NLMSGERR_ATTR_OFFS,
- (u8 *)extack->bad_attr -
- in_skb->data));
- } else {
- if (extack->cookie_len)
- WARN_ON(nla_put(skb, NLMSGERR_ATTR_COOKIE,
- extack->cookie_len,
- extack->cookie));
- }
+ if (err && extack->bad_attr &&
+ !WARN_ON((u8 *)extack->bad_attr < in_skb->data ||
+ (u8 *)extack->bad_attr >= in_skb->data +
+ in_skb->len))
+ WARN_ON(nla_put_u32(skb, NLMSGERR_ATTR_OFFS,
+ (u8 *)extack->bad_attr -
+ (u8 *)nlh));
+ if (extack->cookie_len)
+ WARN_ON(nla_put(skb, NLMSGERR_ATTR_COOKIE,
+ extack->cookie_len, extack->cookie));
}
nlmsg_end(skb, rep);
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index 0522b2b..9f357aa 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -497,8 +497,9 @@ genl_family_rcv_msg_attrs_parse(const struct genl_family *family,
err = __nlmsg_parse(nlh, hdrlen, attrbuf, family->maxattr,
family->policy, validate, extack);
- if (err && parallel) {
- kfree(attrbuf);
+ if (err) {
+ if (parallel)
+ kfree(attrbuf);
return ERR_PTR(err);
}
return attrbuf;
diff --git a/net/nfc/hci/core.c b/net/nfc/hci/core.c
index 6f1b096..43811b5 100644
--- a/net/nfc/hci/core.c
+++ b/net/nfc/hci/core.c
@@ -181,13 +181,20 @@ void nfc_hci_resp_received(struct nfc_hci_dev *hdev, u8 result,
void nfc_hci_cmd_received(struct nfc_hci_dev *hdev, u8 pipe, u8 cmd,
struct sk_buff *skb)
{
- u8 gate = hdev->pipes[pipe].gate;
u8 status = NFC_HCI_ANY_OK;
struct hci_create_pipe_resp *create_info;
struct hci_delete_pipe_noti *delete_info;
struct hci_all_pipe_cleared_noti *cleared_info;
+ u8 gate;
- pr_debug("from gate %x pipe %x cmd %x\n", gate, pipe, cmd);
+ pr_debug("from pipe %x cmd %x\n", pipe, cmd);
+
+ if (pipe >= NFC_HCI_MAX_PIPES) {
+ status = NFC_HCI_ANY_E_NOK;
+ goto exit;
+ }
+
+ gate = hdev->pipes[pipe].gate;
switch (cmd) {
case NFC_HCI_ADM_NOTIFY_PIPE_CREATED:
@@ -375,8 +382,14 @@ void nfc_hci_event_received(struct nfc_hci_dev *hdev, u8 pipe, u8 event,
struct sk_buff *skb)
{
int r = 0;
- u8 gate = hdev->pipes[pipe].gate;
+ u8 gate;
+ if (pipe >= NFC_HCI_MAX_PIPES) {
+ pr_err("Discarded event %x to invalid pipe %x\n", event, pipe);
+ goto exit;
+ }
+
+ gate = hdev->pipes[pipe].gate;
if (gate == NFC_HCI_INVALID_GATE) {
pr_err("Discarded event %x to unopened pipe %x\n", event, pipe);
goto exit;
diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c
index eee0ddd..e894254 100644
--- a/net/nfc/netlink.c
+++ b/net/nfc/netlink.c
@@ -32,6 +32,7 @@ static const struct nla_policy nfc_genl_policy[NFC_ATTR_MAX + 1] = {
[NFC_ATTR_DEVICE_NAME] = { .type = NLA_STRING,
.len = NFC_DEVICE_NAME_MAXSIZE },
[NFC_ATTR_PROTOCOLS] = { .type = NLA_U32 },
+ [NFC_ATTR_TARGET_INDEX] = { .type = NLA_U32 },
[NFC_ATTR_COMM_MODE] = { .type = NLA_U8 },
[NFC_ATTR_RF_MODE] = { .type = NLA_U8 },
[NFC_ATTR_DEVICE_POWERED] = { .type = NLA_U8 },
@@ -43,7 +44,10 @@ static const struct nla_policy nfc_genl_policy[NFC_ATTR_MAX + 1] = {
[NFC_ATTR_LLC_SDP] = { .type = NLA_NESTED },
[NFC_ATTR_FIRMWARE_NAME] = { .type = NLA_STRING,
.len = NFC_FIRMWARE_NAME_MAXSIZE },
+ [NFC_ATTR_SE_INDEX] = { .type = NLA_U32 },
[NFC_ATTR_SE_APDU] = { .type = NLA_BINARY },
+ [NFC_ATTR_VENDOR_ID] = { .type = NLA_U32 },
+ [NFC_ATTR_VENDOR_SUBCMD] = { .type = NLA_U32 },
[NFC_ATTR_VENDOR_DATA] = { .type = NLA_BINARY },
};
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 659c2a7..07a7dd18 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -179,7 +179,8 @@ struct vport *ovs_lookup_vport(const struct datapath *dp, u16 port_no)
struct hlist_head *head;
head = vport_hash_bucket(dp, port_no);
- hlist_for_each_entry_rcu(vport, head, dp_hash_node) {
+ hlist_for_each_entry_rcu(vport, head, dp_hash_node,
+ lockdep_ovsl_is_held()) {
if (vport->port_no == port_no)
return vport;
}
@@ -644,6 +645,7 @@ static const struct nla_policy packet_policy[OVS_PACKET_ATTR_MAX + 1] = {
[OVS_PACKET_ATTR_ACTIONS] = { .type = NLA_NESTED },
[OVS_PACKET_ATTR_PROBE] = { .type = NLA_FLAG },
[OVS_PACKET_ATTR_MRU] = { .type = NLA_U16 },
+ [OVS_PACKET_ATTR_HASH] = { .type = NLA_U64 },
};
static const struct genl_ops dp_packet_genl_ops[] = {
@@ -2042,7 +2044,8 @@ static unsigned int ovs_get_max_headroom(struct datapath *dp)
int i;
for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
- hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node) {
+ hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node,
+ lockdep_ovsl_is_held()) {
dev = vport->dev;
dev_headroom = netdev_get_fwd_headroom(dev);
if (dev_headroom > max_headroom)
@@ -2061,7 +2064,8 @@ static void ovs_update_headroom(struct datapath *dp, unsigned int new_headroom)
dp->max_headroom = new_headroom;
for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++)
- hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node)
+ hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node,
+ lockdep_ovsl_is_held())
netdev_set_rx_headroom(vport->dev, new_headroom);
}
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index 7da4230..288122e 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -2708,10 +2708,6 @@ static int validate_set(const struct nlattr *a,
return -EINVAL;
switch (key_type) {
- const struct ovs_key_ipv4 *ipv4_key;
- const struct ovs_key_ipv6 *ipv6_key;
- int err;
-
case OVS_KEY_ATTR_PRIORITY:
case OVS_KEY_ATTR_SKB_MARK:
case OVS_KEY_ATTR_CT_MARK:
@@ -2723,7 +2719,9 @@ static int validate_set(const struct nlattr *a,
return -EINVAL;
break;
- case OVS_KEY_ATTR_TUNNEL:
+ case OVS_KEY_ATTR_TUNNEL: {
+ int err;
+
if (masked)
return -EINVAL; /* Masked tunnel set not supported. */
@@ -2732,8 +2730,10 @@ static int validate_set(const struct nlattr *a,
if (err)
return err;
break;
+ }
+ case OVS_KEY_ATTR_IPV4: {
+ const struct ovs_key_ipv4 *ipv4_key;
- case OVS_KEY_ATTR_IPV4:
if (eth_type != htons(ETH_P_IP))
return -EINVAL;
@@ -2753,8 +2753,10 @@ static int validate_set(const struct nlattr *a,
return -EINVAL;
}
break;
+ }
+ case OVS_KEY_ATTR_IPV6: {
+ const struct ovs_key_ipv6 *ipv6_key;
- case OVS_KEY_ATTR_IPV6:
if (eth_type != htons(ETH_P_IPV6))
return -EINVAL;
@@ -2781,7 +2783,7 @@ static int validate_set(const struct nlattr *a,
return -EINVAL;
break;
-
+ }
case OVS_KEY_ATTR_TCP:
if ((eth_type != htons(ETH_P_IP) &&
eth_type != htons(ETH_P_IPV6)) ||
diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c
index 5904e93..fd8a01c 100644
--- a/net/openvswitch/flow_table.c
+++ b/net/openvswitch/flow_table.c
@@ -585,7 +585,8 @@ static struct sw_flow *masked_flow_lookup(struct table_instance *ti,
head = find_bucket(ti, hash);
(*n_mask_hit)++;
- hlist_for_each_entry_rcu(flow, head, flow_table.node[ti->node_ver]) {
+ hlist_for_each_entry_rcu(flow, head, flow_table.node[ti->node_ver],
+ lockdep_ovsl_is_held()) {
if (flow->mask == mask && flow->flow_table.hash == hash &&
flow_cmp_masked_key(flow, &masked_key, &mask->range))
return flow;
@@ -769,7 +770,8 @@ struct sw_flow *ovs_flow_tbl_lookup_ufid(struct flow_table *tbl,
hash = ufid_hash(ufid);
head = find_bucket(ti, hash);
- hlist_for_each_entry_rcu(flow, head, ufid_table.node[ti->node_ver]) {
+ hlist_for_each_entry_rcu(flow, head, ufid_table.node[ti->node_ver],
+ lockdep_ovsl_is_held()) {
if (flow->ufid_table.hash == hash &&
ovs_flow_cmp_ufid(flow, ufid))
return flow;
diff --git a/net/openvswitch/meter.c b/net/openvswitch/meter.c
index 3323b79..5010d1d 100644
--- a/net/openvswitch/meter.c
+++ b/net/openvswitch/meter.c
@@ -61,7 +61,8 @@ static struct dp_meter *lookup_meter(const struct datapath *dp,
struct hlist_head *head;
head = meter_hash_bucket(dp, meter_id);
- hlist_for_each_entry_rcu(meter, head, dp_hash_node) {
+ hlist_for_each_entry_rcu(meter, head, dp_hash_node,
+ lockdep_ovsl_is_held()) {
if (meter->id == meter_id)
return meter;
}
diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
index 5da9392..47febb4 100644
--- a/net/openvswitch/vport.c
+++ b/net/openvswitch/vport.c
@@ -96,7 +96,8 @@ struct vport *ovs_vport_locate(const struct net *net, const char *name)
struct hlist_head *bucket = hash_bucket(net, name);
struct vport *vport;
- hlist_for_each_entry_rcu(vport, bucket, hash_node)
+ hlist_for_each_entry_rcu(vport, bucket, hash_node,
+ lockdep_ovsl_is_held())
if (!strcmp(name, ovs_vport_name(vport)) &&
net_eq(ovs_dp_get_net(vport->dp), net))
return vport;
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 30c6879..29bd405 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -2173,6 +2173,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
struct timespec64 ts;
__u32 ts_status;
bool is_drop_n_account = false;
+ unsigned int slot_id = 0;
bool do_vnet = false;
/* struct tpacket{2,3}_hdr is aligned to a multiple of TPACKET_ALIGNMENT.
@@ -2274,6 +2275,20 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
TP_STATUS_KERNEL, (macoff+snaplen));
if (!h.raw)
goto drop_n_account;
+
+ if (po->tp_version <= TPACKET_V2) {
+ slot_id = po->rx_ring.head;
+ if (test_bit(slot_id, po->rx_ring.rx_owner_map))
+ goto drop_n_account;
+ __set_bit(slot_id, po->rx_ring.rx_owner_map);
+ }
+
+ if (do_vnet &&
+ virtio_net_hdr_from_skb(skb, h.raw + macoff -
+ sizeof(struct virtio_net_hdr),
+ vio_le(), true, 0))
+ goto drop_n_account;
+
if (po->tp_version <= TPACKET_V2) {
packet_increment_rx_head(po, &po->rx_ring);
/*
@@ -2286,12 +2301,6 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
status |= TP_STATUS_LOSING;
}
- if (do_vnet &&
- virtio_net_hdr_from_skb(skb, h.raw + macoff -
- sizeof(struct virtio_net_hdr),
- vio_le(), true, 0))
- goto drop_n_account;
-
po->stats.stats1.tp_packets++;
if (copy_skb) {
status |= TP_STATUS_COPY;
@@ -2379,7 +2388,10 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
#endif
if (po->tp_version <= TPACKET_V2) {
+ spin_lock(&sk->sk_receive_queue.lock);
__packet_set_status(po, h.raw, status);
+ __clear_bit(slot_id, po->rx_ring.rx_owner_map);
+ spin_unlock(&sk->sk_receive_queue.lock);
sk->sk_data_ready(sk);
} else {
prb_clear_blk_fill_status(&po->rx_ring);
@@ -4276,6 +4288,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
{
struct pgv *pg_vec = NULL;
struct packet_sock *po = pkt_sk(sk);
+ unsigned long *rx_owner_map = NULL;
int was_running, order = 0;
struct packet_ring_buffer *rb;
struct sk_buff_head *rb_queue;
@@ -4361,6 +4374,12 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
}
break;
default:
+ if (!tx_ring) {
+ rx_owner_map = bitmap_alloc(req->tp_frame_nr,
+ GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO);
+ if (!rx_owner_map)
+ goto out_free_pg_vec;
+ }
break;
}
}
@@ -4390,6 +4409,8 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
err = 0;
spin_lock_bh(&rb_queue->lock);
swap(rb->pg_vec, pg_vec);
+ if (po->tp_version <= TPACKET_V2)
+ swap(rb->rx_owner_map, rx_owner_map);
rb->frame_max = (req->tp_frame_nr - 1);
rb->head = 0;
rb->frame_size = req->tp_frame_size;
@@ -4421,6 +4442,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
}
out_free_pg_vec:
+ bitmap_free(rx_owner_map);
if (pg_vec)
free_pg_vec(pg_vec, order, req->tp_block_nr);
out:
diff --git a/net/packet/internal.h b/net/packet/internal.h
index 82fb2b1..907f4cd 100644
--- a/net/packet/internal.h
+++ b/net/packet/internal.h
@@ -70,7 +70,10 @@ struct packet_ring_buffer {
unsigned int __percpu *pending_refcnt;
- struct tpacket_kbdq_core prb_bdqc;
+ union {
+ unsigned long *rx_owner_map;
+ struct tpacket_kbdq_core prb_bdqc;
+ };
};
extern struct mutex fanout_mutex;
diff --git a/net/rds/rdma.c b/net/rds/rdma.c
index 3341eee..585e6b3 100644
--- a/net/rds/rdma.c
+++ b/net/rds/rdma.c
@@ -162,10 +162,9 @@ static int rds_pin_pages(unsigned long user_addr, unsigned int nr_pages,
if (write)
gup_flags |= FOLL_WRITE;
- ret = get_user_pages_fast(user_addr, nr_pages, gup_flags, pages);
+ ret = pin_user_pages_fast(user_addr, nr_pages, gup_flags, pages);
if (ret >= 0 && ret < nr_pages) {
- while (ret--)
- put_page(pages[ret]);
+ unpin_user_pages(pages, ret);
ret = -EFAULT;
}
@@ -300,8 +299,7 @@ static int __rds_rdma_map(struct rds_sock *rs, struct rds_get_mr_args *args,
* to release anything.
*/
if (!need_odp) {
- for (i = 0 ; i < nents; i++)
- put_page(sg_page(&sg[i]));
+ unpin_user_pages(pages, nr_pages);
kfree(sg);
}
ret = PTR_ERR(trans_private);
@@ -325,7 +323,12 @@ static int __rds_rdma_map(struct rds_sock *rs, struct rds_get_mr_args *args,
if (cookie_ret)
*cookie_ret = cookie;
- if (args->cookie_addr && put_user(cookie, (u64 __user *)(unsigned long) args->cookie_addr)) {
+ if (args->cookie_addr &&
+ put_user(cookie, (u64 __user *)(unsigned long)args->cookie_addr)) {
+ if (!need_odp) {
+ unpin_user_pages(pages, nr_pages);
+ kfree(sg);
+ }
ret = -EFAULT;
goto out;
}
@@ -496,9 +499,7 @@ void rds_rdma_free_op(struct rm_rdma_op *ro)
* is the case for a RDMA_READ which copies from remote
* to local memory
*/
- if (!ro->op_write)
- set_page_dirty(page);
- put_page(page);
+ unpin_user_pages_dirty_lock(&page, 1, !ro->op_write);
}
}
@@ -515,8 +516,7 @@ void rds_atomic_free_op(struct rm_atomic_op *ao)
/* Mark page dirty if it was possibly modified, which
* is the case for a RDMA_READ which copies from remote
* to local memory */
- set_page_dirty(page);
- put_page(page);
+ unpin_user_pages_dirty_lock(&page, 1, true);
kfree(ao->op_notifier);
ao->op_notifier = NULL;
@@ -944,7 +944,7 @@ int rds_cmsg_atomic(struct rds_sock *rs, struct rds_message *rm,
return ret;
err:
if (page)
- put_page(page);
+ unpin_user_page(page);
rm->atomic.op_active = 0;
kfree(rm->atomic.op_notifier);
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index fe42f98..15ee92d 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -285,7 +285,7 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock,
gfp_t gfp,
rxrpc_notify_rx_t notify_rx,
bool upgrade,
- bool intr,
+ enum rxrpc_interruptibility interruptibility,
unsigned int debug_id)
{
struct rxrpc_conn_parameters cp;
@@ -310,7 +310,7 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock,
memset(&p, 0, sizeof(p));
p.user_call_ID = user_call_ID;
p.tx_total_len = tx_total_len;
- p.intr = intr;
+ p.interruptibility = interruptibility;
memset(&cp, 0, sizeof(cp));
cp.local = rx->local;
@@ -371,45 +371,18 @@ EXPORT_SYMBOL(rxrpc_kernel_end_call);
* rxrpc_kernel_check_life - Check to see whether a call is still alive
* @sock: The socket the call is on
* @call: The call to check
- * @_life: Where to store the life value
*
- * Allow a kernel service to find out whether a call is still alive - ie. we're
- * getting ACKs from the server. Passes back in *_life a number representing
- * the life state which can be compared to that returned by a previous call and
- * return true if the call is still alive.
- *
- * If the life state stalls, rxrpc_kernel_probe_life() should be called and
- * then 2RTT waited.
+ * Allow a kernel service to find out whether a call is still alive -
+ * ie. whether it has completed.
*/
bool rxrpc_kernel_check_life(const struct socket *sock,
- const struct rxrpc_call *call,
- u32 *_life)
+ const struct rxrpc_call *call)
{
- *_life = call->acks_latest;
return call->state != RXRPC_CALL_COMPLETE;
}
EXPORT_SYMBOL(rxrpc_kernel_check_life);
/**
- * rxrpc_kernel_probe_life - Poke the peer to see if it's still alive
- * @sock: The socket the call is on
- * @call: The call to check
- *
- * In conjunction with rxrpc_kernel_check_life(), allow a kernel service to
- * find out whether a call is still alive by pinging it. This should cause the
- * life state to be bumped in about 2*RTT.
- *
- * The must be called in TASK_RUNNING state on pain of might_sleep() objecting.
- */
-void rxrpc_kernel_probe_life(struct socket *sock, struct rxrpc_call *call)
-{
- rxrpc_propose_ACK(call, RXRPC_ACK_PING, 0, true, false,
- rxrpc_propose_ack_ping_for_check_life);
- rxrpc_send_ack_packet(call, true, NULL);
-}
-EXPORT_SYMBOL(rxrpc_kernel_probe_life);
-
-/**
* rxrpc_kernel_get_epoch - Retrieve the epoch value from a call.
* @sock: The socket the call is on
* @call: The call to query
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index 7d730c4..3eb1ab4 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -489,7 +489,6 @@ enum rxrpc_call_flag {
RXRPC_CALL_BEGAN_RX_TIMER, /* We began the expect_rx_by timer */
RXRPC_CALL_RX_HEARD, /* The peer responded at least once to this call */
RXRPC_CALL_RX_UNDERRUN, /* Got data underrun */
- RXRPC_CALL_IS_INTR, /* The call is interruptible */
RXRPC_CALL_DISCONNECTED, /* The call has been disconnected */
};
@@ -598,6 +597,7 @@ struct rxrpc_call {
atomic_t usage;
u16 service_id; /* service ID */
u8 security_ix; /* Security type */
+ enum rxrpc_interruptibility interruptibility; /* At what point call may be interrupted */
u32 call_id; /* call ID on connection */
u32 cid; /* connection ID plus channel index */
int debug_id; /* debug ID for printks */
@@ -675,7 +675,6 @@ struct rxrpc_call {
/* transmission-phase ACK management */
ktime_t acks_latest_ts; /* Timestamp of latest ACK received */
- rxrpc_serial_t acks_latest; /* serial number of latest ACK received */
rxrpc_seq_t acks_lowest_nak; /* Lowest NACK in the buffer (or ==tx_hard_ack) */
rxrpc_seq_t acks_lost_top; /* tx_top at the time lost-ack ping sent */
rxrpc_serial_t acks_lost_ping; /* Serial number of probe ACK */
@@ -721,7 +720,7 @@ struct rxrpc_call_params {
u32 normal; /* Max time since last call packet (msec) */
} timeouts;
u8 nr_timeouts; /* Number of timeouts specified */
- bool intr; /* The call is interruptible */
+ enum rxrpc_interruptibility interruptibility; /* How is interruptible is the call? */
};
struct rxrpc_send_params {
diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c
index c9f34b0..f079702 100644
--- a/net/rxrpc/call_object.c
+++ b/net/rxrpc/call_object.c
@@ -237,8 +237,7 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx,
return call;
}
- if (p->intr)
- __set_bit(RXRPC_CALL_IS_INTR, &call->flags);
+ call->interruptibility = p->interruptibility;
call->tx_total_len = p->tx_total_len;
trace_rxrpc_call(call->debug_id, rxrpc_call_new_client,
atomic_read(&call->usage),
diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c
index ea7d4c2..f2a1a5d 100644
--- a/net/rxrpc/conn_client.c
+++ b/net/rxrpc/conn_client.c
@@ -655,13 +655,20 @@ static int rxrpc_wait_for_channel(struct rxrpc_call *call, gfp_t gfp)
add_wait_queue_exclusive(&call->waitq, &myself);
for (;;) {
- if (test_bit(RXRPC_CALL_IS_INTR, &call->flags))
+ switch (call->interruptibility) {
+ case RXRPC_INTERRUPTIBLE:
+ case RXRPC_PREINTERRUPTIBLE:
set_current_state(TASK_INTERRUPTIBLE);
- else
+ break;
+ case RXRPC_UNINTERRUPTIBLE:
+ default:
set_current_state(TASK_UNINTERRUPTIBLE);
+ break;
+ }
if (call->call_id)
break;
- if (test_bit(RXRPC_CALL_IS_INTR, &call->flags) &&
+ if ((call->interruptibility == RXRPC_INTERRUPTIBLE ||
+ call->interruptibility == RXRPC_PREINTERRUPTIBLE) &&
signal_pending(current)) {
ret = -ERESTARTSYS;
break;
diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c
index ef10fbf..69e09d6 100644
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -882,7 +882,6 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
before(prev_pkt, call->ackr_prev_seq))
goto out;
call->acks_latest_ts = skb->tstamp;
- call->acks_latest = sp->hdr.serial;
call->ackr_first_seq = first_soft_ack;
call->ackr_prev_seq = prev_pkt;
diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c
index 813fd68..0fcf157 100644
--- a/net/rxrpc/sendmsg.c
+++ b/net/rxrpc/sendmsg.c
@@ -18,6 +18,21 @@
#include "ar-internal.h"
/*
+ * Return true if there's sufficient Tx queue space.
+ */
+static bool rxrpc_check_tx_space(struct rxrpc_call *call, rxrpc_seq_t *_tx_win)
+{
+ unsigned int win_size =
+ min_t(unsigned int, call->tx_winsize,
+ call->cong_cwnd + call->cong_extra);
+ rxrpc_seq_t tx_win = READ_ONCE(call->tx_hard_ack);
+
+ if (_tx_win)
+ *_tx_win = tx_win;
+ return call->tx_top - tx_win < win_size;
+}
+
+/*
* Wait for space to appear in the Tx queue or a signal to occur.
*/
static int rxrpc_wait_for_tx_window_intr(struct rxrpc_sock *rx,
@@ -26,9 +41,7 @@ static int rxrpc_wait_for_tx_window_intr(struct rxrpc_sock *rx,
{
for (;;) {
set_current_state(TASK_INTERRUPTIBLE);
- if (call->tx_top - call->tx_hard_ack <
- min_t(unsigned int, call->tx_winsize,
- call->cong_cwnd + call->cong_extra))
+ if (rxrpc_check_tx_space(call, NULL))
return 0;
if (call->state >= RXRPC_CALL_COMPLETE)
@@ -49,7 +62,7 @@ static int rxrpc_wait_for_tx_window_intr(struct rxrpc_sock *rx,
* Wait for space to appear in the Tx queue uninterruptibly, but with
* a timeout of 2*RTT if no progress was made and a signal occurred.
*/
-static int rxrpc_wait_for_tx_window_nonintr(struct rxrpc_sock *rx,
+static int rxrpc_wait_for_tx_window_waitall(struct rxrpc_sock *rx,
struct rxrpc_call *call)
{
rxrpc_seq_t tx_start, tx_win;
@@ -58,8 +71,8 @@ static int rxrpc_wait_for_tx_window_nonintr(struct rxrpc_sock *rx,
rtt = READ_ONCE(call->peer->rtt);
rtt2 = nsecs_to_jiffies64(rtt) * 2;
- if (rtt2 < 1)
- rtt2 = 1;
+ if (rtt2 < 2)
+ rtt2 = 2;
timeout = rtt2;
tx_start = READ_ONCE(call->tx_hard_ack);
@@ -68,16 +81,13 @@ static int rxrpc_wait_for_tx_window_nonintr(struct rxrpc_sock *rx,
set_current_state(TASK_UNINTERRUPTIBLE);
tx_win = READ_ONCE(call->tx_hard_ack);
- if (call->tx_top - tx_win <
- min_t(unsigned int, call->tx_winsize,
- call->cong_cwnd + call->cong_extra))
+ if (rxrpc_check_tx_space(call, &tx_win))
return 0;
if (call->state >= RXRPC_CALL_COMPLETE)
return call->error;
- if (test_bit(RXRPC_CALL_IS_INTR, &call->flags) &&
- timeout == 0 &&
+ if (timeout == 0 &&
tx_win == tx_start && signal_pending(current))
return -EINTR;
@@ -92,6 +102,26 @@ static int rxrpc_wait_for_tx_window_nonintr(struct rxrpc_sock *rx,
}
/*
+ * Wait for space to appear in the Tx queue uninterruptibly.
+ */
+static int rxrpc_wait_for_tx_window_nonintr(struct rxrpc_sock *rx,
+ struct rxrpc_call *call,
+ long *timeo)
+{
+ for (;;) {
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ if (rxrpc_check_tx_space(call, NULL))
+ return 0;
+
+ if (call->state >= RXRPC_CALL_COMPLETE)
+ return call->error;
+
+ trace_rxrpc_transmit(call, rxrpc_transmit_wait);
+ *timeo = schedule_timeout(*timeo);
+ }
+}
+
+/*
* wait for space to appear in the transmit/ACK window
* - caller holds the socket locked
*/
@@ -108,10 +138,19 @@ static int rxrpc_wait_for_tx_window(struct rxrpc_sock *rx,
add_wait_queue(&call->waitq, &myself);
- if (waitall)
- ret = rxrpc_wait_for_tx_window_nonintr(rx, call);
- else
- ret = rxrpc_wait_for_tx_window_intr(rx, call, timeo);
+ switch (call->interruptibility) {
+ case RXRPC_INTERRUPTIBLE:
+ if (waitall)
+ ret = rxrpc_wait_for_tx_window_waitall(rx, call);
+ else
+ ret = rxrpc_wait_for_tx_window_intr(rx, call, timeo);
+ break;
+ case RXRPC_PREINTERRUPTIBLE:
+ case RXRPC_UNINTERRUPTIBLE:
+ default:
+ ret = rxrpc_wait_for_tx_window_nonintr(rx, call, timeo);
+ break;
+ }
remove_wait_queue(&call->waitq, &myself);
set_current_state(TASK_RUNNING);
@@ -302,9 +341,7 @@ static int rxrpc_send_data(struct rxrpc_sock *rx,
_debug("alloc");
- if (call->tx_top - call->tx_hard_ack >=
- min_t(unsigned int, call->tx_winsize,
- call->cong_cwnd + call->cong_extra)) {
+ if (!rxrpc_check_tx_space(call, NULL)) {
ret = -EAGAIN;
if (msg->msg_flags & MSG_DONTWAIT)
goto maybe_error;
@@ -619,7 +656,7 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
.call.tx_total_len = -1,
.call.user_call_ID = 0,
.call.nr_timeouts = 0,
- .call.intr = true,
+ .call.interruptibility = RXRPC_INTERRUPTIBLE,
.abort_code = 0,
.command = RXRPC_CMD_SEND_DATA,
.exclusive = false,
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 90a31b15..8c466a7 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -186,6 +186,7 @@ static size_t tcf_action_shared_attrs_size(const struct tc_action *act)
+ nla_total_size(IFNAMSIZ) /* TCA_ACT_KIND */
+ cookie_len /* TCA_ACT_COOKIE */
+ nla_total_size(0) /* TCA_ACT_STATS nested */
+ + nla_total_size(sizeof(struct nla_bitfield32)) /* TCA_ACT_FLAGS */
/* TCA_STATS_BASIC */
+ nla_total_size_64bit(sizeof(struct gnet_stats_basic))
/* TCA_STATS_PKT64 */
diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
index f685c0d..41114b4 100644
--- a/net/sched/act_ct.c
+++ b/net/sched/act_ct.c
@@ -739,7 +739,7 @@ static int tcf_ct_init(struct net *net, struct nlattr *nla,
if (goto_ch)
tcf_chain_put_by_act(goto_ch);
if (params)
- kfree_rcu(params, rcu);
+ call_rcu(¶ms->rcu, tcf_ct_params_free);
if (res == ACT_P_CREATED)
tcf_idr_insert(tn, *a);
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 1ad300e..83dd82f 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -284,10 +284,8 @@ static int tcf_mirred_act(struct sk_buff *skb, const struct tc_action *a,
/* mirror is always swallowed */
if (is_redirect) {
- skb2->tc_redirected = 1;
- skb2->tc_from_ingress = skb2->tc_at_ingress;
- if (skb2->tc_from_ingress)
- skb2->tstamp = 0;
+ skb_set_redirected(skb2, skb2->tc_at_ingress);
+
/* let's the caller reinsert the packet, if possible */
if (use_reinsert) {
res->ingress = want_ingress;
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index f9c0d1e..d32d423 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -305,6 +305,7 @@ static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp,
struct cls_fl_filter *f;
list_for_each_entry_rcu(mask, &head->masks, list) {
+ flow_dissector_init_keys(&skb_key.control, &skb_key.basic);
fl_clear_masked_range(&skb_key, mask);
skb_flow_dissect_meta(skb, &mask->dissector, &skb_key);
@@ -691,6 +692,7 @@ static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = {
.len = 128 / BITS_PER_BYTE },
[TCA_FLOWER_KEY_CT_LABELS_MASK] = { .type = NLA_BINARY,
.len = 128 / BITS_PER_BYTE },
+ [TCA_FLOWER_FLAGS] = { .type = NLA_U32 },
};
static const struct nla_policy
diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c
index 039cc86..610a0b7 100644
--- a/net/sched/cls_matchall.c
+++ b/net/sched/cls_matchall.c
@@ -157,6 +157,7 @@ static void *mall_get(struct tcf_proto *tp, u32 handle)
static const struct nla_policy mall_policy[TCA_MATCHALL_MAX + 1] = {
[TCA_MATCHALL_UNSPEC] = { .type = NLA_UNSPEC },
[TCA_MATCHALL_CLASSID] = { .type = NLA_U32 },
+ [TCA_MATCHALL_FLAGS] = { .type = NLA_U32 },
};
static int mall_set_parms(struct net *net, struct tcf_proto *tp,
diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c
index 6f8786b..5efa3e7 100644
--- a/net/sched/cls_route.c
+++ b/net/sched/cls_route.c
@@ -534,8 +534,8 @@ static int route4_change(struct net *net, struct sk_buff *in_skb,
fp = &b->ht[h];
for (pfp = rtnl_dereference(*fp); pfp;
fp = &pfp->next, pfp = rtnl_dereference(*fp)) {
- if (pfp == f) {
- *fp = f->next;
+ if (pfp == fold) {
+ rcu_assign_pointer(*fp, fold->next);
break;
}
}
diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
index 09b7dc5..9904299 100644
--- a/net/sched/cls_tcindex.c
+++ b/net/sched/cls_tcindex.c
@@ -261,8 +261,10 @@ static void tcindex_partial_destroy_work(struct work_struct *work)
struct tcindex_data,
rwork);
+ rtnl_lock();
kfree(p->perfect);
kfree(p);
+ rtnl_unlock();
}
static void tcindex_free_perfect_hash(struct tcindex_data *cp)
@@ -357,6 +359,7 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
if (tcindex_alloc_perfect_hash(net, cp) < 0)
goto errout;
+ cp->alloc_hash = cp->hash;
for (i = 0; i < min(cp->hash, p->hash); i++)
cp->perfect[i].res = p->perfect[i].res;
balloc = 1;
diff --git a/net/sched/sch_cbs.c b/net/sched/sch_cbs.c
index b2905b0..2eaac2f 100644
--- a/net/sched/sch_cbs.c
+++ b/net/sched/sch_cbs.c
@@ -181,6 +181,11 @@ static struct sk_buff *cbs_dequeue_soft(struct Qdisc *sch)
s64 credits;
int len;
+ /* The previous packet is still being sent */
+ if (now < q->last) {
+ qdisc_watchdog_schedule_ns(&q->watchdog, q->last);
+ return NULL;
+ }
if (q->credits < 0) {
credits = timediff_to_credits(now - q->last, q->idleslope);
@@ -212,7 +217,12 @@ static struct sk_buff *cbs_dequeue_soft(struct Qdisc *sch)
credits += q->credits;
q->credits = max_t(s64, credits, q->locredit);
- q->last = now;
+ /* Estimate of the transmission of the last byte of the packet in ns */
+ if (unlikely(atomic64_read(&q->port_rate) == 0))
+ q->last = now;
+ else
+ q->last = now + div64_s64(len * NSEC_PER_SEC,
+ atomic64_read(&q->port_rate));
return skb;
}
diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
index a5a2954..371ad84 100644
--- a/net/sched/sch_fq.c
+++ b/net/sched/sch_fq.c
@@ -744,6 +744,7 @@ static const struct nla_policy fq_policy[TCA_FQ_MAX + 1] = {
[TCA_FQ_FLOW_MAX_RATE] = { .type = NLA_U32 },
[TCA_FQ_BUCKETS_LOG] = { .type = NLA_U32 },
[TCA_FQ_FLOW_REFILL_DELAY] = { .type = NLA_U32 },
+ [TCA_FQ_ORPHAN_MASK] = { .type = NLA_U32 },
[TCA_FQ_LOW_RATE_THRESHOLD] = { .type = NLA_U32 },
[TCA_FQ_CE_THRESHOLD] = { .type = NLA_U32 },
};
diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c
index 660fc45..b1eb12d 100644
--- a/net/sched/sch_taprio.c
+++ b/net/sched/sch_taprio.c
@@ -564,8 +564,10 @@ static struct sk_buff *taprio_dequeue_soft(struct Qdisc *sch)
prio = skb->priority;
tc = netdev_get_prio_tc_map(dev, prio);
- if (!(gate_mask & BIT(tc)))
+ if (!(gate_mask & BIT(tc))) {
+ skb = NULL;
continue;
+ }
len = qdisc_pkt_len(skb);
guard = ktime_add_ns(taprio_get_time(q),
@@ -575,13 +577,17 @@ static struct sk_buff *taprio_dequeue_soft(struct Qdisc *sch)
* guard band ...
*/
if (gate_mask != TAPRIO_ALL_GATES_OPEN &&
- ktime_after(guard, entry->close_time))
+ ktime_after(guard, entry->close_time)) {
+ skb = NULL;
continue;
+ }
/* ... and no budget. */
if (gate_mask != TAPRIO_ALL_GATES_OPEN &&
- atomic_sub_return(len, &entry->budget) < 0)
+ atomic_sub_return(len, &entry->budget) < 0) {
+ skb = NULL;
continue;
+ }
skb = child->ops->dequeue(child);
if (unlikely(!skb))
@@ -768,6 +774,7 @@ static const struct nla_policy taprio_policy[TCA_TAPRIO_ATTR_MAX + 1] = {
[TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME] = { .type = NLA_S64 },
[TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME_EXTENSION] = { .type = NLA_S64 },
[TCA_TAPRIO_ATTR_FLAGS] = { .type = NLA_U32 },
+ [TCA_TAPRIO_ATTR_TXTIME_DELAY] = { .type = NLA_U32 },
};
static int fill_sched_entry(struct nlattr **tb, struct sched_entry *entry,
diff --git a/net/sctp/diag.c b/net/sctp/diag.c
index 8a15146..1069d7a 100644
--- a/net/sctp/diag.c
+++ b/net/sctp/diag.c
@@ -237,15 +237,11 @@ static size_t inet_assoc_attr_size(struct sctp_association *asoc)
addrcnt++;
return nla_total_size(sizeof(struct sctp_info))
- + nla_total_size(1) /* INET_DIAG_SHUTDOWN */
- + nla_total_size(1) /* INET_DIAG_TOS */
- + nla_total_size(1) /* INET_DIAG_TCLASS */
- + nla_total_size(4) /* INET_DIAG_MARK */
- + nla_total_size(4) /* INET_DIAG_CLASS_ID */
+ nla_total_size(addrlen * asoc->peer.transport_count)
+ nla_total_size(addrlen * addrcnt)
- + nla_total_size(sizeof(struct inet_diag_meminfo))
+ nla_total_size(sizeof(struct inet_diag_msg))
+ + inet_diag_msg_attrs_size()
+ + nla_total_size(sizeof(struct inet_diag_meminfo))
+ 64;
}
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 748e3b1..6a16af4 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -170,6 +170,16 @@ static inline bool sctp_chunk_length_valid(struct sctp_chunk *chunk,
return true;
}
+/* Check for format error in an ABORT chunk */
+static inline bool sctp_err_chunk_valid(struct sctp_chunk *chunk)
+{
+ struct sctp_errhdr *err;
+
+ sctp_walk_errors(err, chunk->chunk_hdr);
+
+ return (void *)err == (void *)chunk->chunk_end;
+}
+
/**********************************************************
* These are the state functions for handling chunk events.
**********************************************************/
@@ -2255,6 +2265,9 @@ enum sctp_disposition sctp_sf_shutdown_pending_abort(
sctp_bind_addr_state(&asoc->base.bind_addr, &chunk->dest))
return sctp_sf_discard_chunk(net, ep, asoc, type, arg, commands);
+ if (!sctp_err_chunk_valid(chunk))
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
+
return __sctp_sf_do_9_1_abort(net, ep, asoc, type, arg, commands);
}
@@ -2298,6 +2311,9 @@ enum sctp_disposition sctp_sf_shutdown_sent_abort(
sctp_bind_addr_state(&asoc->base.bind_addr, &chunk->dest))
return sctp_sf_discard_chunk(net, ep, asoc, type, arg, commands);
+ if (!sctp_err_chunk_valid(chunk))
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
+
/* Stop the T2-shutdown timer. */
sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP,
SCTP_TO(SCTP_EVENT_TIMEOUT_T2_SHUTDOWN));
@@ -2565,6 +2581,9 @@ enum sctp_disposition sctp_sf_do_9_1_abort(
sctp_bind_addr_state(&asoc->base.bind_addr, &chunk->dest))
return sctp_sf_discard_chunk(net, ep, asoc, type, arg, commands);
+ if (!sctp_err_chunk_valid(chunk))
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
+
return __sctp_sf_do_9_1_abort(net, ep, asoc, type, arg, commands);
}
@@ -2582,16 +2601,8 @@ static enum sctp_disposition __sctp_sf_do_9_1_abort(
/* See if we have an error cause code in the chunk. */
len = ntohs(chunk->chunk_hdr->length);
- if (len >= sizeof(struct sctp_chunkhdr) + sizeof(struct sctp_errhdr)) {
- struct sctp_errhdr *err;
-
- sctp_walk_errors(err, chunk->chunk_hdr);
- if ((void *)err != (void *)chunk->chunk_end)
- return sctp_sf_pdiscard(net, ep, asoc, type, arg,
- commands);
-
+ if (len >= sizeof(struct sctp_chunkhdr) + sizeof(struct sctp_errhdr))
error = ((struct sctp_errhdr *)chunk->skb->data)->cause;
- }
sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR, SCTP_ERROR(ECONNRESET));
/* ASSOC_FAILED will DELETE_TCB. */
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index cee5bf4..6fd44bd 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -470,6 +470,8 @@ static void smc_switch_to_fallback(struct smc_sock *smc)
if (smc->sk.sk_socket && smc->sk.sk_socket->file) {
smc->clcsock->file = smc->sk.sk_socket->file;
smc->clcsock->file->private_data = smc->clcsock;
+ smc->clcsock->wq.fasync_list =
+ smc->sk.sk_socket->wq.fasync_list;
}
}
@@ -510,15 +512,18 @@ static int smc_connect_decline_fallback(struct smc_sock *smc, int reason_code)
static int smc_connect_abort(struct smc_sock *smc, int reason_code,
int local_contact)
{
+ bool is_smcd = smc->conn.lgr->is_smcd;
+
if (local_contact == SMC_FIRST_CONTACT)
- smc_lgr_forget(smc->conn.lgr);
- if (smc->conn.lgr->is_smcd)
+ smc_lgr_cleanup_early(&smc->conn);
+ else
+ smc_conn_free(&smc->conn);
+ if (is_smcd)
/* there is only one lgr role for SMC-D; use server lock */
mutex_unlock(&smc_server_lgr_pending);
else
mutex_unlock(&smc_client_lgr_pending);
- smc_conn_free(&smc->conn);
smc->connect_nonblock = 0;
return reason_code;
}
@@ -1089,7 +1094,6 @@ static void smc_listen_out_err(struct smc_sock *new_smc)
if (newsmcsk->sk_state == SMC_INIT)
sock_put(&new_smc->sk); /* passive closing */
newsmcsk->sk_state = SMC_CLOSED;
- smc_conn_free(&new_smc->conn);
smc_listen_out(new_smc);
}
@@ -1100,12 +1104,13 @@ static void smc_listen_decline(struct smc_sock *new_smc, int reason_code,
{
/* RDMA setup failed, switch back to TCP */
if (local_contact == SMC_FIRST_CONTACT)
- smc_lgr_forget(new_smc->conn.lgr);
+ smc_lgr_cleanup_early(&new_smc->conn);
+ else
+ smc_conn_free(&new_smc->conn);
if (reason_code < 0) { /* error, no fallback possible */
smc_listen_out_err(new_smc);
return;
}
- smc_conn_free(&new_smc->conn);
smc_switch_to_fallback(new_smc);
new_smc->fallback_rsn = reason_code;
if (reason_code && reason_code != SMC_CLC_DECL_PEERDECL) {
@@ -1168,16 +1173,18 @@ static int smc_listen_ism_init(struct smc_sock *new_smc,
new_smc->conn.lgr->vlan_id,
new_smc->conn.lgr->smcd)) {
if (ini->cln_first_contact == SMC_FIRST_CONTACT)
- smc_lgr_forget(new_smc->conn.lgr);
- smc_conn_free(&new_smc->conn);
+ smc_lgr_cleanup_early(&new_smc->conn);
+ else
+ smc_conn_free(&new_smc->conn);
return SMC_CLC_DECL_SMCDNOTALK;
}
/* Create send and receive buffers */
if (smc_buf_create(new_smc, true)) {
if (ini->cln_first_contact == SMC_FIRST_CONTACT)
- smc_lgr_forget(new_smc->conn.lgr);
- smc_conn_free(&new_smc->conn);
+ smc_lgr_cleanup_early(&new_smc->conn);
+ else
+ smc_conn_free(&new_smc->conn);
return SMC_CLC_DECL_MEM;
}
diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c
index 0879f7b..86cccc2 100644
--- a/net/smc/smc_clc.c
+++ b/net/smc/smc_clc.c
@@ -372,7 +372,9 @@ int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info)
dclc.hdr.length = htons(sizeof(struct smc_clc_msg_decline));
dclc.hdr.version = SMC_CLC_V1;
dclc.hdr.flag = (peer_diag_info == SMC_CLC_DECL_SYNCERR) ? 1 : 0;
- memcpy(dclc.id_for_peer, local_systemid, sizeof(local_systemid));
+ if (smc->conn.lgr && !smc->conn.lgr->is_smcd)
+ memcpy(dclc.id_for_peer, local_systemid,
+ sizeof(local_systemid));
dclc.peer_diagnosis = htonl(peer_diag_info);
memcpy(dclc.trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index 2249de5..5b085ef 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -162,6 +162,18 @@ static void smc_lgr_unregister_conn(struct smc_connection *conn)
conn->lgr = NULL;
}
+void smc_lgr_cleanup_early(struct smc_connection *conn)
+{
+ struct smc_link_group *lgr = conn->lgr;
+
+ if (!lgr)
+ return;
+
+ smc_conn_free(conn);
+ smc_lgr_forget(lgr);
+ smc_lgr_schedule_free_work_fast(lgr);
+}
+
/* Send delete link, either as client to request the initiation
* of the DELETE LINK sequence from server; or as server to
* initiate the delete processing. See smc_llc_rx_delete_link().
diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index c472e12..234ae25 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -296,6 +296,7 @@ struct smc_clc_msg_accept_confirm;
struct smc_clc_msg_local;
void smc_lgr_forget(struct smc_link_group *lgr);
+void smc_lgr_cleanup_early(struct smc_connection *conn);
void smc_lgr_terminate(struct smc_link_group *lgr, bool soft);
void smc_port_terminate(struct smc_ib_device *smcibdev, u8 ibport);
void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid,
@@ -316,7 +317,6 @@ int smc_vlan_by_tcpsk(struct socket *clcsock, struct smc_init_info *ini);
void smc_conn_free(struct smc_connection *conn);
int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini);
-void smcd_conn_free(struct smc_connection *conn);
void smc_lgr_schedule_free_work_fast(struct smc_link_group *lgr);
int smc_core_init(void);
void smc_core_exit(void);
diff --git a/net/smc/smc_diag.c b/net/smc/smc_diag.c
index f38727e..e1f64f4 100644
--- a/net/smc/smc_diag.c
+++ b/net/smc/smc_diag.c
@@ -39,16 +39,15 @@ static void smc_diag_msg_common_fill(struct smc_diag_msg *r, struct sock *sk)
{
struct smc_sock *smc = smc_sk(sk);
+ memset(r, 0, sizeof(*r));
r->diag_family = sk->sk_family;
+ sock_diag_save_cookie(sk, r->id.idiag_cookie);
if (!smc->clcsock)
return;
r->id.idiag_sport = htons(smc->clcsock->sk->sk_num);
r->id.idiag_dport = smc->clcsock->sk->sk_dport;
r->id.idiag_if = smc->clcsock->sk->sk_bound_dev_if;
- sock_diag_save_cookie(sk, r->id.idiag_cookie);
if (sk->sk_protocol == SMCPROTO_SMC) {
- memset(&r->id.idiag_src, 0, sizeof(r->id.idiag_src));
- memset(&r->id.idiag_dst, 0, sizeof(r->id.idiag_dst));
r->id.idiag_src[0] = smc->clcsock->sk->sk_rcv_saddr;
r->id.idiag_dst[0] = smc->clcsock->sk->sk_daddr;
#if IS_ENABLED(CONFIG_IPV6)
diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c
index 5486326..05b825b 100644
--- a/net/smc/smc_ib.c
+++ b/net/smc/smc_ib.c
@@ -573,6 +573,8 @@ static void smc_ib_remove_dev(struct ib_device *ibdev, void *client_data)
struct smc_ib_device *smcibdev;
smcibdev = ib_get_client_data(ibdev, &smc_ib_client);
+ if (!smcibdev || smcibdev->ibdev != ibdev)
+ return;
ib_set_client_data(ibdev, &smc_ib_client, NULL);
spin_lock(&smc_ib_devices.lock);
list_del_init(&smcibdev->list); /* remove from smc_ib_devices */
@@ -580,6 +582,7 @@ static void smc_ib_remove_dev(struct ib_device *ibdev, void *client_data)
smc_smcr_terminate_all(smcibdev);
smc_ib_cleanup_per_ibdev(smcibdev);
ib_unregister_event_handler(&smcibdev->event_handler);
+ cancel_work_sync(&smcibdev->port_event_work);
kfree(smcibdev);
}
diff --git a/net/socket.c b/net/socket.c
index b79a05d..2dd739f 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -1707,7 +1707,8 @@ SYSCALL_DEFINE2(listen, int, fd, int, backlog)
int __sys_accept4_file(struct file *file, unsigned file_flags,
struct sockaddr __user *upeer_sockaddr,
- int __user *upeer_addrlen, int flags)
+ int __user *upeer_addrlen, int flags,
+ unsigned long nofile)
{
struct socket *sock, *newsock;
struct file *newfile;
@@ -1738,7 +1739,7 @@ int __sys_accept4_file(struct file *file, unsigned file_flags,
*/
__module_get(newsock->ops->owner);
- newfd = get_unused_fd_flags(flags);
+ newfd = __get_unused_fd_flags(flags, nofile);
if (unlikely(newfd < 0)) {
err = newfd;
sock_release(newsock);
@@ -1807,7 +1808,8 @@ int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr,
f = fdget(fd);
if (f.file) {
ret = __sys_accept4_file(f.file, 0, upeer_sockaddr,
- upeer_addrlen, flags);
+ upeer_addrlen, flags,
+ rlimit(RLIMIT_NOFILE));
if (f.flags)
fput(f.file);
}
@@ -2226,10 +2228,10 @@ struct used_address {
unsigned int name_len;
};
-static int copy_msghdr_from_user(struct msghdr *kmsg,
- struct user_msghdr __user *umsg,
- struct sockaddr __user **save_addr,
- struct iovec **iov)
+int __copy_msghdr_from_user(struct msghdr *kmsg,
+ struct user_msghdr __user *umsg,
+ struct sockaddr __user **save_addr,
+ struct iovec __user **uiov, size_t *nsegs)
{
struct user_msghdr msg;
ssize_t err;
@@ -2271,6 +2273,23 @@ static int copy_msghdr_from_user(struct msghdr *kmsg,
return -EMSGSIZE;
kmsg->msg_iocb = NULL;
+ *uiov = msg.msg_iov;
+ *nsegs = msg.msg_iovlen;
+ return 0;
+}
+
+static int copy_msghdr_from_user(struct msghdr *kmsg,
+ struct user_msghdr __user *umsg,
+ struct sockaddr __user **save_addr,
+ struct iovec **iov)
+{
+ struct user_msghdr msg;
+ ssize_t err;
+
+ err = __copy_msghdr_from_user(kmsg, umsg, save_addr, &msg.msg_iov,
+ &msg.msg_iovlen);
+ if (err)
+ return err;
err = import_iovec(save_addr ? READ : WRITE,
msg.msg_iov, msg.msg_iovlen,
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
index 095be88..125297c 100644
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c
@@ -288,8 +288,8 @@ struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt,
{
struct rpcrdma_ia *ia = &r_xprt->rx_ia;
struct ib_reg_wr *reg_wr;
+ int i, n, dma_nents;
struct ib_mr *ibmr;
- int i, n;
u8 key;
if (nsegs > ia->ri_max_frwr_depth)
@@ -313,15 +313,16 @@ struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt,
break;
}
mr->mr_dir = rpcrdma_data_dir(writing);
+ mr->mr_nents = i;
- mr->mr_nents =
- ib_dma_map_sg(ia->ri_id->device, mr->mr_sg, i, mr->mr_dir);
- if (!mr->mr_nents)
+ dma_nents = ib_dma_map_sg(ia->ri_id->device, mr->mr_sg, mr->mr_nents,
+ mr->mr_dir);
+ if (!dma_nents)
goto out_dmamap_err;
ibmr = mr->frwr.fr_mr;
- n = ib_map_mr_sg(ibmr, mr->mr_sg, mr->mr_nents, NULL, PAGE_SIZE);
- if (unlikely(n != mr->mr_nents))
+ n = ib_map_mr_sg(ibmr, mr->mr_sg, dma_nents, NULL, PAGE_SIZE);
+ if (n != dma_nents)
goto out_mapmr_err;
ibmr->iova &= 0x00000000ffffffff;
diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c
index 7c35094..bb98624 100644
--- a/net/tipc/netlink.c
+++ b/net/tipc/netlink.c
@@ -116,6 +116,7 @@ const struct nla_policy tipc_nl_prop_policy[TIPC_NLA_PROP_MAX + 1] = {
[TIPC_NLA_PROP_PRIO] = { .type = NLA_U32 },
[TIPC_NLA_PROP_TOL] = { .type = NLA_U32 },
[TIPC_NLA_PROP_WIN] = { .type = NLA_U32 },
+ [TIPC_NLA_PROP_MTU] = { .type = NLA_U32 },
[TIPC_NLA_PROP_BROADCAST] = { .type = NLA_U32 },
[TIPC_NLA_PROP_BROADCAST_RATIO] = { .type = NLA_U32 }
};
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 99b28b6..0c88778 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -278,7 +278,7 @@ struct tipc_crypto *tipc_node_crypto_rx_by_list(struct list_head *pos)
}
#endif
-void tipc_node_free(struct rcu_head *rp)
+static void tipc_node_free(struct rcu_head *rp)
{
struct tipc_node *n = container_of(rp, struct tipc_node, rcu);
@@ -2798,7 +2798,7 @@ static int tipc_nl_retrieve_nodeid(struct nlattr **attrs, u8 **node_id)
return 0;
}
-int __tipc_nl_node_set_key(struct sk_buff *skb, struct genl_info *info)
+static int __tipc_nl_node_set_key(struct sk_buff *skb, struct genl_info *info)
{
struct nlattr *attrs[TIPC_NLA_NODE_MAX + 1];
struct net *net = sock_net(skb->sk);
@@ -2875,7 +2875,8 @@ int tipc_nl_node_set_key(struct sk_buff *skb, struct genl_info *info)
return err;
}
-int __tipc_nl_node_flush_key(struct sk_buff *skb, struct genl_info *info)
+static int __tipc_nl_node_flush_key(struct sk_buff *skb,
+ struct genl_info *info)
{
struct net *net = sock_net(skb->sk);
struct tipc_net *tn = tipc_net(net);
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index f9b4fb9..693e890 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -2441,6 +2441,8 @@ static int tipc_wait_for_connect(struct socket *sock, long *timeo_p)
return -ETIMEDOUT;
if (signal_pending(current))
return sock_intr_errno(*timeo_p);
+ if (sk->sk_state == TIPC_DISCONNECTING)
+ break;
add_wait_queue(sk_sleep(sk), &wait);
done = sk_wait_event(sk, timeo_p, tipc_sk_connected(sk),
diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c
index 1ba5a92..1c5574e 100644
--- a/net/tls/tls_device.c
+++ b/net/tls/tls_device.c
@@ -593,7 +593,7 @@ struct tls_record_info *tls_get_record(struct tls_offload_context_tx *context,
u32 seq, u64 *p_record_sn)
{
u64 record_sn = context->hint_record_sn;
- struct tls_record_info *info;
+ struct tls_record_info *info, *last;
info = context->retransmit_hint;
if (!info ||
@@ -605,6 +605,24 @@ struct tls_record_info *tls_get_record(struct tls_offload_context_tx *context,
struct tls_record_info, list);
if (!info)
return NULL;
+ /* send the start_marker record if seq number is before the
+ * tls offload start marker sequence number. This record is
+ * required to handle TCP packets which are before TLS offload
+ * started.
+ * And if it's not start marker, look if this seq number
+ * belongs to the list.
+ */
+ if (likely(!tls_record_is_start_marker(info))) {
+ /* we have the first record, get the last record to see
+ * if this seq number belongs to the list.
+ */
+ last = list_last_entry(&context->records_list,
+ struct tls_record_info, list);
+
+ if (!between(seq, tls_record_start_seq(info),
+ last->end_seq))
+ return NULL;
+ }
record_sn = context->unacked_record_sn;
}
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 62c12cb..68debcb 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -682,6 +682,7 @@ static int unix_set_peek_off(struct sock *sk, int val)
return 0;
}
+#ifdef CONFIG_PROC_FS
static void unix_show_fdinfo(struct seq_file *m, struct socket *sock)
{
struct sock *sk = sock->sk;
@@ -692,6 +693,9 @@ static void unix_show_fdinfo(struct seq_file *m, struct socket *sock)
seq_printf(m, "scm_fds: %u\n", READ_ONCE(u->scm_stat.nr_fds));
}
}
+#else
+#define unix_show_fdinfo NULL
+#endif
static const struct proto_ops unix_stream_ops = {
.family = PF_UNIX,
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 9c5b2a91..a5f2870 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -451,6 +451,12 @@ int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk)
if (vsk->transport == new_transport)
return 0;
+ /* transport->release() must be called with sock lock acquired.
+ * This path can only be taken during vsock_stream_connect(),
+ * where we have already held the sock lock.
+ * In the other cases, this function is called on a new socket
+ * which is not assigned to any transport.
+ */
vsk->transport->release(vsk);
vsock_deassign_transport(vsk);
}
@@ -753,20 +759,18 @@ static void __vsock_release(struct sock *sk, int level)
vsk = vsock_sk(sk);
pending = NULL; /* Compiler warning. */
- /* The release call is supposed to use lock_sock_nested()
- * rather than lock_sock(), if a sock lock should be acquired.
- */
- if (vsk->transport)
- vsk->transport->release(vsk);
- else if (sk->sk_type == SOCK_STREAM)
- vsock_remove_sock(vsk);
-
/* When "level" is SINGLE_DEPTH_NESTING, use the nested
* version to avoid the warning "possible recursive locking
* detected". When "level" is 0, lock_sock_nested(sk, level)
* is the same as lock_sock(sk).
*/
lock_sock_nested(sk, level);
+
+ if (vsk->transport)
+ vsk->transport->release(vsk);
+ else if (sk->sk_type == SOCK_STREAM)
+ vsock_remove_sock(vsk);
+
sock_orphan(sk);
sk->sk_shutdown = SHUTDOWN_MASK;
diff --git a/net/vmw_vsock/hyperv_transport.c b/net/vmw_vsock/hyperv_transport.c
index 3492c02..630b851 100644
--- a/net/vmw_vsock/hyperv_transport.c
+++ b/net/vmw_vsock/hyperv_transport.c
@@ -526,12 +526,9 @@ static bool hvs_close_lock_held(struct vsock_sock *vsk)
static void hvs_release(struct vsock_sock *vsk)
{
- struct sock *sk = sk_vsock(vsk);
bool remove_sock;
- lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
remove_sock = hvs_close_lock_held(vsk);
- release_sock(sk);
if (remove_sock)
vsock_remove_sock(vsk);
}
diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
index d9f0c9c..f3c4bab 100644
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c
@@ -829,7 +829,6 @@ void virtio_transport_release(struct vsock_sock *vsk)
struct sock *sk = &vsk->sk;
bool remove_sock = true;
- lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
if (sk->sk_type == SOCK_STREAM)
remove_sock = virtio_transport_close(vsk);
@@ -837,7 +836,6 @@ void virtio_transport_release(struct vsock_sock *vsk)
list_del(&pkt->list);
virtio_transport_free_pkt(pkt);
}
- release_sock(sk);
if (remove_sock)
vsock_remove_sock(vsk);
diff --git a/net/wireless/ethtool.c b/net/wireless/ethtool.c
index a9c0f36..24e1840 100644
--- a/net/wireless/ethtool.c
+++ b/net/wireless/ethtool.c
@@ -7,9 +7,13 @@
void cfg80211_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info)
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
+ struct device *pdev = wiphy_dev(wdev->wiphy);
- strlcpy(info->driver, wiphy_dev(wdev->wiphy)->driver->name,
- sizeof(info->driver));
+ if (pdev->driver)
+ strlcpy(info->driver, pdev->driver->name,
+ sizeof(info->driver));
+ else
+ strlcpy(info->driver, "N/A", sizeof(info->driver));
strlcpy(info->version, init_utsname()->release, sizeof(info->version));
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 123b8d7..f0af23c 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -20,6 +20,7 @@
#include <linux/netlink.h>
#include <linux/nospec.h>
#include <linux/etherdevice.h>
+#include <linux/if_vlan.h>
#include <net/net_namespace.h>
#include <net/genetlink.h>
#include <net/cfg80211.h>
@@ -437,6 +438,7 @@ const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
[NL80211_ATTR_CONTROL_PORT_NO_ENCRYPT] = { .type = NLA_FLAG },
[NL80211_ATTR_CONTROL_PORT_OVER_NL80211] = { .type = NLA_FLAG },
[NL80211_ATTR_PRIVACY] = { .type = NLA_FLAG },
+ [NL80211_ATTR_STATUS_CODE] = { .type = NLA_U16 },
[NL80211_ATTR_CIPHER_SUITE_GROUP] = { .type = NLA_U32 },
[NL80211_ATTR_WPA_VERSIONS] = { .type = NLA_U32 },
[NL80211_ATTR_PID] = { .type = NLA_U32 },
@@ -468,6 +470,8 @@ const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
[NL80211_ATTR_WOWLAN_TRIGGERS] = { .type = NLA_NESTED },
[NL80211_ATTR_STA_PLINK_STATE] =
NLA_POLICY_MAX(NLA_U8, NUM_NL80211_PLINK_STATES - 1),
+ [NL80211_ATTR_MEASUREMENT_DURATION] = { .type = NLA_U16 },
+ [NL80211_ATTR_MEASUREMENT_DURATION_MANDATORY] = { .type = NLA_FLAG },
[NL80211_ATTR_MESH_PEER_AID] =
NLA_POLICY_RANGE(NLA_U16, 1, IEEE80211_MAX_AID),
[NL80211_ATTR_SCHED_SCAN_INTERVAL] = { .type = NLA_U32 },
@@ -529,6 +533,8 @@ const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
[NL80211_ATTR_MDID] = { .type = NLA_U16 },
[NL80211_ATTR_IE_RIC] = { .type = NLA_BINARY,
.len = IEEE80211_MAX_DATA_LEN },
+ [NL80211_ATTR_CRIT_PROT_ID] = { .type = NLA_U16 },
+ [NL80211_ATTR_MAX_CRIT_PROT_DURATION] = { .type = NLA_U16 },
[NL80211_ATTR_PEER_AID] =
NLA_POLICY_RANGE(NLA_U16, 1, IEEE80211_MAX_AID),
[NL80211_ATTR_CH_SWITCH_COUNT] = { .type = NLA_U32 },
@@ -559,6 +565,7 @@ const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
NLA_POLICY_MAX(NLA_U8, IEEE80211_NUM_UPS - 1),
[NL80211_ATTR_ADMITTED_TIME] = { .type = NLA_U16 },
[NL80211_ATTR_SMPS_MODE] = { .type = NLA_U8 },
+ [NL80211_ATTR_OPER_CLASS] = { .type = NLA_U8 },
[NL80211_ATTR_MAC_MASK] = {
.type = NLA_EXACT_LEN_WARN,
.len = ETH_ALEN
@@ -4799,8 +4806,7 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info)
err = nl80211_parse_he_obss_pd(
info->attrs[NL80211_ATTR_HE_OBSS_PD],
¶ms.he_obss_pd);
- if (err)
- return err;
+ goto out;
}
nl80211_calculate_ap_params(¶ms);
@@ -4822,6 +4828,7 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info)
}
wdev_unlock(wdev);
+out:
kfree(params.acl);
return err;
@@ -16409,7 +16416,7 @@ void cfg80211_sta_opmode_change_notify(struct net_device *dev, const u8 *mac,
goto nla_put_failure;
if ((sta_opmode->changed & STA_OPMODE_MAX_BW_CHANGED) &&
- nla_put_u8(msg, NL80211_ATTR_CHANNEL_WIDTH, sta_opmode->bw))
+ nla_put_u32(msg, NL80211_ATTR_CHANNEL_WIDTH, sta_opmode->bw))
goto nla_put_failure;
if ((sta_opmode->changed & STA_OPMODE_N_SS_CHANGED) &&
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index fff9a74..1a8218f 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -2276,7 +2276,7 @@ static void handle_channel_custom(struct wiphy *wiphy,
break;
}
- if (IS_ERR(reg_rule)) {
+ if (IS_ERR_OR_NULL(reg_rule)) {
pr_debug("Disabling freq %d MHz as custom regd has no rule that fits it\n",
chan->center_freq);
if (wiphy->regulatory_flags & REGULATORY_WIPHY_SELF_MANAGED) {
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index aef240f..328402a 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -2022,7 +2022,11 @@ void cfg80211_update_assoc_bss_entry(struct wireless_dev *wdev,
spin_lock_bh(&rdev->bss_lock);
- if (WARN_ON(cbss->pub.channel == chan))
+ /*
+ * Some APs use CSA also for bandwidth changes, i.e., without actually
+ * changing the control channel, so no need to update in such a case.
+ */
+ if (cbss->pub.channel == chan)
goto done;
/* use transmitting bss */
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index df60048..356f90e 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -217,6 +217,7 @@ static int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
static void xsk_flush(struct xdp_sock *xs)
{
xskq_prod_submit(xs->rx);
+ __xskq_cons_release(xs->umem->fq);
sock_def_readable(&xs->sk);
}
@@ -304,6 +305,7 @@ void xsk_umem_consume_tx_done(struct xdp_umem *umem)
rcu_read_lock();
list_for_each_entry_rcu(xs, &umem->xsk_list, list) {
+ __xskq_cons_release(xs->tx);
xs->sk.sk_write_space(&xs->sk);
}
rcu_read_unlock();
diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h
index bec2af1..89a01ac 100644
--- a/net/xdp/xsk_queue.h
+++ b/net/xdp/xsk_queue.h
@@ -271,7 +271,8 @@ static inline void xskq_cons_release(struct xsk_queue *q)
{
/* To improve performance, only update local state here.
* Reflect this to global state when we get new entries
- * from the ring in xskq_cons_get_entries().
+ * from the ring in xskq_cons_get_entries() and whenever
+ * Rx or Tx processing are completed in the NAPI loop.
*/
q->cached_cons++;
}
diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c
index 50f567a..e2db468 100644
--- a/net/xfrm/xfrm_device.c
+++ b/net/xfrm/xfrm_device.c
@@ -78,8 +78,8 @@ struct sk_buff *validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t featur
int err;
unsigned long flags;
struct xfrm_state *x;
- struct sk_buff *skb2, *nskb;
struct softnet_data *sd;
+ struct sk_buff *skb2, *nskb, *pskb = NULL;
netdev_features_t esp_features = features;
struct xfrm_offload *xo = xfrm_offload(skb);
struct sec_path *sp;
@@ -168,14 +168,14 @@ struct sk_buff *validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t featur
} else {
if (skb == skb2)
skb = nskb;
-
- if (!skb)
- return NULL;
+ else
+ pskb->next = nskb;
continue;
}
skb_push(skb2, skb2->data - skb_mac_header(skb2));
+ pskb = skb2;
}
return skb;
@@ -383,6 +383,7 @@ static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void
return xfrm_dev_feat_change(dev);
case NETDEV_DOWN:
+ case NETDEV_UNREGISTER:
return xfrm_dev_down(dev);
}
return NOTIFY_DONE;
diff --git a/net/xfrm/xfrm_interface.c b/net/xfrm/xfrm_interface.c
index dc651a6..3361e3a 100644
--- a/net/xfrm/xfrm_interface.c
+++ b/net/xfrm/xfrm_interface.c
@@ -300,10 +300,10 @@ xfrmi_xmit2(struct sk_buff *skb, struct net_device *dev, struct flowi *fl)
if (mtu < IPV6_MIN_MTU)
mtu = IPV6_MIN_MTU;
- icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
+ icmpv6_ndo_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
} else {
- icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
- htonl(mtu));
+ icmp_ndo_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
+ htonl(mtu));
}
dst_release(dst);
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index dbda08e..8a4af86 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -434,7 +434,9 @@ EXPORT_SYMBOL(xfrm_policy_destroy);
static void xfrm_policy_kill(struct xfrm_policy *policy)
{
+ write_lock_bh(&policy->lock);
policy->walk.dead = 1;
+ write_unlock_bh(&policy->lock);
atomic_inc(&policy->genid);
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index b88ba45..e6cfaa6 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -110,7 +110,8 @@ static inline int verify_sec_ctx_len(struct nlattr **attrs)
return 0;
uctx = nla_data(rt);
- if (uctx->len != (sizeof(struct xfrm_user_sec_ctx) + uctx->ctx_len))
+ if (uctx->len > nla_len(rt) ||
+ uctx->len != (sizeof(struct xfrm_user_sec_ctx) + uctx->ctx_len))
return -EINVAL;
return 0;
@@ -2275,6 +2276,9 @@ static int xfrm_add_acquire(struct sk_buff *skb, struct nlmsghdr *nlh,
err = verify_newpolicy_info(&ua->policy);
if (err)
goto free_state;
+ err = verify_sec_ctx_len(attrs);
+ if (err)
+ goto free_state;
/* build an XP */
xp = xfrm_policy_construct(net, &ua->policy, attrs, &err);
diff --git a/scripts/Kconfig.include b/scripts/Kconfig.include
index 85334dc..496d11c 100644
--- a/scripts/Kconfig.include
+++ b/scripts/Kconfig.include
@@ -44,3 +44,10 @@
# gcc version including patch level
gcc-version := $(shell,$(srctree)/scripts/gcc-version.sh $(CC))
+
+# machine bit flags
+# $(m32-flag): -m32 if the compiler supports it, or an empty string otherwise.
+# $(m64-flag): -m64 if the compiler supports it, or an empty string otherwise.
+cc-option-bit = $(if-success,$(CC) -Werror $(1) -E -x c /dev/null -o /dev/null,$(1))
+m32-flag := $(cc-option-bit,-m32)
+m64-flag := $(cc-option-bit,-m64)
diff --git a/scripts/Makefile.extrawarn b/scripts/Makefile.extrawarn
index ecddf83..ca08f2f 100644
--- a/scripts/Makefile.extrawarn
+++ b/scripts/Makefile.extrawarn
@@ -48,6 +48,7 @@
KBUILD_CFLAGS += -Wno-format
KBUILD_CFLAGS += -Wno-sign-compare
KBUILD_CFLAGS += -Wno-format-zero-length
+KBUILD_CFLAGS += $(call cc-disable-warning, pointer-to-enum-cast)
endif
endif
diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib
index bae6254..752ff0a2 100644
--- a/scripts/Makefile.lib
+++ b/scripts/Makefile.lib
@@ -300,15 +300,15 @@
DT_TMP_SCHEMA := $(objtree)/$(DT_BINDING_DIR)/processed-schema.yaml
quiet_cmd_dtb_check = CHECK $@
- cmd_dtb_check = $(DT_CHECKER) -u $(srctree)/$(DT_BINDING_DIR) -p $(DT_TMP_SCHEMA) $@ ;
+ cmd_dtb_check = $(DT_CHECKER) -u $(srctree)/$(DT_BINDING_DIR) -p $(DT_TMP_SCHEMA) $@
-define rule_dtc_dt_yaml
+define rule_dtc
$(call cmd_and_fixdep,dtc,yaml)
$(call cmd,dtb_check)
endef
$(obj)/%.dt.yaml: $(src)/%.dts $(DTC) $(DT_TMP_SCHEMA) FORCE
- $(call if_changed_rule,dtc_dt_yaml)
+ $(call if_changed_rule,dtc)
dtc-tmp = $(subst $(comma),_,$(dot-target).dts.tmp)
diff --git a/scripts/check-sysctl-docs b/scripts/check-sysctl-docs
new file mode 100755
index 0000000..8bcb9e2
--- /dev/null
+++ b/scripts/check-sysctl-docs
@@ -0,0 +1,181 @@
+#!/usr/bin/gawk -f
+# SPDX-License-Identifier: GPL-2.0
+
+# Script to check sysctl documentation against source files
+#
+# Copyright (c) 2020 Stephen Kitt
+
+# Example invocation:
+# scripts/check-sysctl-docs -vtable="kernel" \
+# Documentation/admin-guide/sysctl/kernel.rst \
+# $(git grep -l register_sysctl_)
+#
+# Specify -vdebug=1 to see debugging information
+
+BEGIN {
+ if (!table) {
+ print "Please specify the table to look for using the table variable" > "/dev/stderr"
+ exit 1
+ }
+}
+
+# The following globals are used:
+# children: maps ctl_table names and procnames to child ctl_table names
+# documented: maps documented entries (each key is an entry)
+# entries: maps ctl_table names and procnames to counts (so
+# enumerating the subkeys for a given ctl_table lists its
+# procnames)
+# files: maps procnames to source file names
+# paths: maps ctl_path names to paths
+# curpath: the name of the current ctl_path struct
+# curtable: the name of the current ctl_table struct
+# curentry: the name of the current proc entry (procname when parsing
+# a ctl_table, constructed path when parsing a ctl_path)
+
+
+# Remove punctuation from the given value
+function trimpunct(value) {
+ while (value ~ /^["&]/) {
+ value = substr(value, 2)
+ }
+ while (value ~ /[]["&,}]$/) {
+ value = substr(value, 1, length(value) - 1)
+ }
+ return value
+}
+
+# Print the information for the given entry
+function printentry(entry) {
+ seen[entry]++
+ printf "* %s from %s", entry, file[entry]
+ if (documented[entry]) {
+ printf " (documented)"
+ }
+ print ""
+}
+
+
+# Stage 1: build the list of documented entries
+FNR == NR && /^=+$/ {
+ if (prevline ~ /Documentation for/) {
+ # This is the main title
+ next
+ }
+
+ # The previous line is a section title, parse it
+ $0 = prevline
+ if (debug) print "Parsing " $0
+ inbrackets = 0
+ for (i = 1; i <= NF; i++) {
+ if (length($i) == 0) {
+ continue
+ }
+ if (!inbrackets && substr($i, 1, 1) == "(") {
+ inbrackets = 1
+ }
+ if (!inbrackets) {
+ token = trimpunct($i)
+ if (length(token) > 0 && token != "and") {
+ if (debug) print trimpunct($i)
+ documented[trimpunct($i)]++
+ }
+ }
+ if (inbrackets && substr($i, length($i), 1) == ")") {
+ inbrackets = 0
+ }
+ }
+}
+
+FNR == NR {
+ prevline = $0
+ next
+}
+
+
+# Stage 2: process each file and find all sysctl tables
+BEGINFILE {
+ delete children
+ delete entries
+ delete paths
+ curpath = ""
+ curtable = ""
+ curentry = ""
+ if (debug) print "Processing file " FILENAME
+}
+
+/^static struct ctl_path/ {
+ match($0, /static struct ctl_path ([^][]+)/, tables)
+ curpath = tables[1]
+ if (debug) print "Processing path " curpath
+}
+
+/^static struct ctl_table/ {
+ match($0, /static struct ctl_table ([^][]+)/, tables)
+ curtable = tables[1]
+ if (debug) print "Processing table " curtable
+}
+
+/^};$/ {
+ curpath = ""
+ curtable = ""
+ curentry = ""
+}
+
+curpath && /\.procname[\t ]*=[\t ]*".+"/ {
+ match($0, /.procname[\t ]*=[\t ]*"([^"]+)"/, names)
+ if (curentry) {
+ curentry = curentry "/" names[1]
+ } else {
+ curentry = names[1]
+ }
+ if (debug) print "Setting path " curpath " to " curentry
+ paths[curpath] = curentry
+}
+
+curtable && /\.procname[\t ]*=[\t ]*".+"/ {
+ match($0, /.procname[\t ]*=[\t ]*"([^"]+)"/, names)
+ curentry = names[1]
+ if (debug) print "Adding entry " curentry " to table " curtable
+ entries[curtable][curentry]++
+ file[curentry] = FILENAME
+}
+
+/\.child[\t ]*=/ {
+ child = trimpunct($NF)
+ if (debug) print "Linking child " child " to table " curtable " entry " curentry
+ children[curtable][curentry] = child
+}
+
+/register_sysctl_table\(.*\)/ {
+ match($0, /register_sysctl_table\(([^)]+)\)/, tables)
+ if (debug) print "Registering table " tables[1]
+ if (children[tables[1]][table]) {
+ for (entry in entries[children[tables[1]][table]]) {
+ printentry(entry)
+ }
+ }
+}
+
+/register_sysctl_paths\(.*\)/ {
+ match($0, /register_sysctl_paths\(([^)]+), ([^)]+)\)/, tables)
+ if (debug) print "Attaching table " tables[2] " to path " tables[1]
+ if (paths[tables[1]] == table) {
+ for (entry in entries[tables[2]]) {
+ printentry(entry)
+ }
+ }
+ split(paths[tables[1]], components, "/")
+ if (length(components) > 1 && components[1] == table) {
+ # Count the first subdirectory as seen
+ seen[components[2]]++
+ }
+}
+
+
+END {
+ for (entry in documented) {
+ if (!seen[entry]) {
+ print "No implementation for " entry
+ }
+ }
+}
diff --git a/scripts/documentation-file-ref-check b/scripts/documentation-file-ref-check
index 7784c54..997202a 100755
--- a/scripts/documentation-file-ref-check
+++ b/scripts/documentation-file-ref-check
@@ -51,7 +51,9 @@
or die "Failed to run git grep";
while (<IN>) {
next if (!m,^([^:]+):.*\:doc\:\`([^\`]+)\`,);
+ next if (m,sphinx/,);
+ my $file = $1;
my $d = $1;
my $doc_ref = $2;
@@ -60,7 +62,12 @@
$d =~ s,(.*/).*,$1,;
$f =~ s,.*\<([^\>]+)\>,$1,;
- $f ="$d$f.rst";
+ if ($f =~ m,^/,) {
+ $f = "$f.rst";
+ $f =~ s,^/,Documentation/,;
+ } else {
+ $f = "$d$f.rst";
+ }
next if (grep -e, glob("$f"));
@@ -69,7 +76,7 @@
}
$doc_fix++;
- print STDERR "$f: :doc:`$doc_ref`\n";
+ print STDERR "$file: :doc:`$doc_ref`\n";
}
close IN;
diff --git a/scripts/dtc/dtc-lexer.l b/scripts/dtc/dtc-lexer.l
index 5c6c3fd..b3b7270 100644
--- a/scripts/dtc/dtc-lexer.l
+++ b/scripts/dtc/dtc-lexer.l
@@ -23,7 +23,6 @@
#include "srcpos.h"
#include "dtc-parser.tab.h"
-YYLTYPE yylloc;
extern bool treesource_error;
/* CAUTION: this will stop working if we ever use yyless() or yyunput() */
diff --git a/scripts/export_report.pl b/scripts/export_report.pl
index 548330e..feb3d55 100755
--- a/scripts/export_report.pl
+++ b/scripts/export_report.pl
@@ -94,7 +94,7 @@
#
while ( <$module_symvers> ) {
chomp;
- my (undef, $symbol, $namespace, $module, $gpl) = split('\t');
+ my (undef, $symbol, $module, $gpl, $namespace) = split('\t');
$SYMBOL { $symbol } = [ $module , "0" , $symbol, $gpl];
}
close($module_symvers);
diff --git a/scripts/gcc-plugins/Kconfig b/scripts/gcc-plugins/Kconfig
index e356954..f8ca236 100644
--- a/scripts/gcc-plugins/Kconfig
+++ b/scripts/gcc-plugins/Kconfig
@@ -23,7 +23,7 @@
GCC plugins are loadable modules that provide extra features to the
compiler. They are useful for runtime instrumentation and static analysis.
- See Documentation/core-api/gcc-plugins.rst for details.
+ See Documentation/kbuild/gcc-plugins.rst for details.
if GCC_PLUGINS
diff --git a/scripts/get_maintainer.pl b/scripts/get_maintainer.pl
index 34085d1..6cbcd1a 100755
--- a/scripts/get_maintainer.pl
+++ b/scripts/get_maintainer.pl
@@ -932,10 +932,6 @@
}
}
- foreach my $fix (@fixes) {
- vcs_add_commit_signers($fix, "blamed_fixes");
- }
-
foreach my $email (@email_to, @list_to) {
$email->[0] = deduplicate_email($email->[0]);
}
@@ -974,6 +970,10 @@
}
}
+ foreach my $fix (@fixes) {
+ vcs_add_commit_signers($fix, "blamed_fixes");
+ }
+
my @to = ();
if ($email || $email_list) {
if ($email) {
@@ -1341,35 +1341,11 @@
}
}
} elsif ($ptype eq "M") {
- my ($name, $address) = parse_email($pvalue);
- if ($name eq "") {
- if ($i > 0) {
- my $tv = $typevalue[$i - 1];
- if ($tv =~ m/^([A-Z]):\s*(.*)/) {
- if ($1 eq "P") {
- $name = $2;
- $pvalue = format_email($name, $address, $email_usename);
- }
- }
- }
- }
if ($email_maintainer) {
my $role = get_maintainer_role($i);
push_email_addresses($pvalue, $role);
}
} elsif ($ptype eq "R") {
- my ($name, $address) = parse_email($pvalue);
- if ($name eq "") {
- if ($i > 0) {
- my $tv = $typevalue[$i - 1];
- if ($tv =~ m/^([A-Z]):\s*(.*)/) {
- if ($1 eq "P") {
- $name = $2;
- $pvalue = format_email($name, $address, $email_usename);
- }
- }
- }
- }
if ($email_reviewer) {
my $subsystem = get_subsystem_name($i);
push_email_addresses($pvalue, "reviewer:$subsystem");
diff --git a/scripts/kallsyms.c b/scripts/kallsyms.c
index a566d82..3e8dea6 100644
--- a/scripts/kallsyms.c
+++ b/scripts/kallsyms.c
@@ -195,13 +195,13 @@ static struct sym_entry *read_symbol(FILE *in)
return NULL;
}
- if (is_ignored_symbol(name, type))
- return NULL;
-
- /* Ignore most absolute/undefined (?) symbols. */
if (strcmp(name, "_text") == 0)
_text = addr;
+ /* Ignore most absolute/undefined (?) symbols. */
+ if (is_ignored_symbol(name, type))
+ return NULL;
+
check_symbol_range(name, addr, text_ranges, ARRAY_SIZE(text_ranges));
check_symbol_range(name, addr, &percpu_range, 1);
@@ -210,7 +210,7 @@ static struct sym_entry *read_symbol(FILE *in)
len = strlen(name) + 1;
- sym = malloc(sizeof(*sym) + len);
+ sym = malloc(sizeof(*sym) + len + 1);
if (!sym) {
fprintf(stderr, "kallsyms failure: "
"unable to allocate required amount of memory\n");
@@ -219,7 +219,7 @@ static struct sym_entry *read_symbol(FILE *in)
sym->addr = addr;
sym->len = len;
sym->sym[0] = type;
- memcpy(sym_name(sym), name, len);
+ strcpy(sym_name(sym), name);
sym->percpu_absolute = 0;
return sym;
diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh
index 1919c31..dd484e9 100755
--- a/scripts/link-vmlinux.sh
+++ b/scripts/link-vmlinux.sh
@@ -239,7 +239,7 @@
fi;
# final build of init/
-${MAKE} -f "${srctree}/scripts/Makefile.build" obj=init
+${MAKE} -f "${srctree}/scripts/Makefile.build" obj=init need-builtin=1
#link vmlinux.o
info LD vmlinux.o
diff --git a/scripts/mod/devicetable-offsets.c b/scripts/mod/devicetable-offsets.c
index 054405b..d3c237b 100644
--- a/scripts/mod/devicetable-offsets.c
+++ b/scripts/mod/devicetable-offsets.c
@@ -145,6 +145,13 @@ int main(void)
DEVID(i2c_device_id);
DEVID_FIELD(i2c_device_id, name);
+ DEVID(i3c_device_id);
+ DEVID_FIELD(i3c_device_id, match_flags);
+ DEVID_FIELD(i3c_device_id, dcr);
+ DEVID_FIELD(i3c_device_id, manuf_id);
+ DEVID_FIELD(i3c_device_id, part_id);
+ DEVID_FIELD(i3c_device_id, extra_info);
+
DEVID(spi_device_id);
DEVID_FIELD(spi_device_id, name);
diff --git a/scripts/mod/file2alias.c b/scripts/mod/file2alias.c
index c91eba7..f81cbe0 100644
--- a/scripts/mod/file2alias.c
+++ b/scripts/mod/file2alias.c
@@ -919,6 +919,24 @@ static int do_i2c_entry(const char *filename, void *symval,
return 1;
}
+static int do_i3c_entry(const char *filename, void *symval,
+ char *alias)
+{
+ DEF_FIELD(symval, i3c_device_id, match_flags);
+ DEF_FIELD(symval, i3c_device_id, dcr);
+ DEF_FIELD(symval, i3c_device_id, manuf_id);
+ DEF_FIELD(symval, i3c_device_id, part_id);
+ DEF_FIELD(symval, i3c_device_id, extra_info);
+
+ strcpy(alias, "i3c:");
+ ADD(alias, "dcr", match_flags & I3C_MATCH_DCR, dcr);
+ ADD(alias, "manuf", match_flags & I3C_MATCH_MANUF, manuf_id);
+ ADD(alias, "part", match_flags & I3C_MATCH_PART, part_id);
+ ADD(alias, "ext", match_flags & I3C_MATCH_EXTRA_INFO, extra_info);
+
+ return 1;
+}
+
/* Looks like: spi:S */
static int do_spi_entry(const char *filename, void *symval,
char *alias)
@@ -1386,6 +1404,7 @@ static const struct devtable devtable[] = {
{"vmbus", SIZE_hv_vmbus_device_id, do_vmbus_entry},
{"rpmsg", SIZE_rpmsg_device_id, do_rpmsg_entry},
{"i2c", SIZE_i2c_device_id, do_i2c_entry},
+ {"i3c", SIZE_i3c_device_id, do_i3c_entry},
{"spi", SIZE_spi_device_id, do_spi_entry},
{"dmi", SIZE_dmi_system_id, do_dmi_entry},
{"platform", SIZE_platform_device_id, do_platform_entry},
diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
index 7edfdb2..55a0a2e 100644
--- a/scripts/mod/modpost.c
+++ b/scripts/mod/modpost.c
@@ -308,7 +308,8 @@ static const char *sec_name(struct elf_info *elf, int secindex)
static void *sym_get_data(const struct elf_info *info, const Elf_Sym *sym)
{
- Elf_Shdr *sechdr = &info->sechdrs[sym->st_shndx];
+ unsigned int secindex = get_secindex(info, sym);
+ Elf_Shdr *sechdr = &info->sechdrs[secindex];
unsigned long offset;
offset = sym->st_value;
@@ -2427,7 +2428,7 @@ static void write_if_changed(struct buffer *b, const char *fname)
}
/* parse Module.symvers file. line format:
- * 0x12345678<tab>symbol<tab>module[[<tab>export]<tab>something]
+ * 0x12345678<tab>symbol<tab>module<tab>export<tab>namespace
**/
static void read_dump(const char *fname, unsigned int kernel)
{
@@ -2440,7 +2441,7 @@ static void read_dump(const char *fname, unsigned int kernel)
return;
while ((line = get_next_line(&pos, file, size))) {
- char *symname, *namespace, *modname, *d, *export, *end;
+ char *symname, *namespace, *modname, *d, *export;
unsigned int crc;
struct module *mod;
struct symbol *s;
@@ -2448,16 +2449,16 @@ static void read_dump(const char *fname, unsigned int kernel)
if (!(symname = strchr(line, '\t')))
goto fail;
*symname++ = '\0';
- if (!(namespace = strchr(symname, '\t')))
- goto fail;
- *namespace++ = '\0';
- if (!(modname = strchr(namespace, '\t')))
+ if (!(modname = strchr(symname, '\t')))
goto fail;
*modname++ = '\0';
- if ((export = strchr(modname, '\t')) != NULL)
- *export++ = '\0';
- if (export && ((end = strchr(export, '\t')) != NULL))
- *end = '\0';
+ if (!(export = strchr(modname, '\t')))
+ goto fail;
+ *export++ = '\0';
+ if (!(namespace = strchr(export, '\t')))
+ goto fail;
+ *namespace++ = '\0';
+
crc = strtoul(line, &d, 16);
if (*symname == '\0' || *modname == '\0' || *d != '\0')
goto fail;
@@ -2508,9 +2509,9 @@ static void write_dump(const char *fname)
namespace = symbol->namespace;
buf_printf(&buf, "0x%08x\t%s\t%s\t%s\t%s\n",
symbol->crc, symbol->name,
- namespace ? namespace : "",
symbol->module->name,
- export_str(symbol->export));
+ export_str(symbol->export),
+ namespace ? namespace : "");
}
symbol = symbol->next;
}
diff --git a/scripts/parse-maintainers.pl b/scripts/parse-maintainers.pl
old mode 100644
new mode 100755
index 255cef1..2ca4eb3
--- a/scripts/parse-maintainers.pl
+++ b/scripts/parse-maintainers.pl
@@ -8,13 +8,14 @@
my $output_file = "MAINTAINERS.new";
my $output_section = "SECTION.new";
my $help = 0;
-
+my $order = 0;
my $P = $0;
if (!GetOptions(
'input=s' => \$input_file,
'output=s' => \$output_file,
'section=s' => \$output_section,
+ 'order!' => \$order,
'h|help|usage' => \$help,
)) {
die "$P: invalid argument - use --help if necessary\n";
@@ -32,6 +33,22 @@
--input => MAINTAINERS file to read (default: MAINTAINERS)
--output => sorted MAINTAINERS file to write (default: MAINTAINERS.new)
--section => new sorted MAINTAINERS file to write to (default: SECTION.new)
+ --order => Use the preferred section content output ordering (default: 0)
+ Preferred ordering of section output is:
+ M: Person acting as a maintainer
+ R: Person acting as a patch reviewer
+ L: Mailing list where patches should be sent
+ S: Maintenance status
+ W: URI for general information
+ Q: URI for patchwork tracking
+ B: URI for bug tracking/submission
+ C: URI for chat
+ P: URI or file for subsystem specific coding styles
+ T: SCM tree type and location
+ F: File and directory pattern
+ X: File and directory exclusion pattern
+ N: File glob
+ K: Keyword - patch content regex
If <pattern match regexes> exist, then the sections that match the
regexes are not written to the output file but are written to the
@@ -56,7 +73,7 @@
sub by_pattern($$) {
my ($a, $b) = @_;
- my $preferred_order = 'MRPLSWTQBCFXNK';
+ my $preferred_order = 'MRLSWQBCPTFXNK';
my $a1 = uc(substr($a, 0, 1));
my $b1 = uc(substr($b, 0, 1));
@@ -105,8 +122,14 @@
print $file $separator;
}
print $file $key . "\n";
- foreach my $pattern (sort by_pattern split('\n', %$hashref{$key})) {
- print $file ($pattern . "\n");
+ if ($order) {
+ foreach my $pattern (sort by_pattern split('\n', %$hashref{$key})) {
+ print $file ($pattern . "\n");
+ }
+ } else {
+ foreach my $pattern (split('\n', %$hashref{$key})) {
+ print $file ($pattern . "\n");
+ }
}
}
}
diff --git a/scripts/sphinx-pre-install b/scripts/sphinx-pre-install
index a8f0c00..fa3fb05 100755
--- a/scripts/sphinx-pre-install
+++ b/scripts/sphinx-pre-install
@@ -701,11 +701,26 @@
} else {
my $rec_activate = "$virtenv_dir/bin/activate";
my $virtualenv = findprog("virtualenv-3");
+ my $rec_python3 = "";
$virtualenv = findprog("virtualenv-3.5") if (!$virtualenv);
$virtualenv = findprog("virtualenv") if (!$virtualenv);
$virtualenv = "virtualenv" if (!$virtualenv);
- printf "\t$virtualenv $virtenv_dir\n";
+ my $rel = "";
+ if (index($system_release, "Ubuntu") != -1) {
+ $rel = $1 if ($system_release =~ /Ubuntu\s+(\d+)[.]/);
+ if ($rel && $rel >= 16) {
+ $rec_python3 = " -p python3";
+ }
+ }
+ if (index($system_release, "Debian") != -1) {
+ $rel = $1 if ($system_release =~ /Debian\s+(\d+)/);
+ if ($rel && $rel >= 7) {
+ $rec_python3 = " -p python3";
+ }
+ }
+
+ printf "\t$virtualenv$rec_python3 $virtenv_dir\n";
printf "\t. $rec_activate\n";
printf "\tpip install -r $requirement_file\n";
deactivate_help();
diff --git a/security/integrity/ima/Kconfig b/security/integrity/ima/Kconfig
index 711ff10..3f3ee4e 100644
--- a/security/integrity/ima/Kconfig
+++ b/security/integrity/ima/Kconfig
@@ -112,6 +112,10 @@
config IMA_DEFAULT_HASH_WP512
bool "WP512"
depends on CRYPTO_WP512=y && !IMA_TEMPLATE
+
+ config IMA_DEFAULT_HASH_SM3
+ bool "SM3"
+ depends on CRYPTO_SM3=y && !IMA_TEMPLATE
endchoice
config IMA_DEFAULT_HASH
@@ -121,6 +125,7 @@
default "sha256" if IMA_DEFAULT_HASH_SHA256
default "sha512" if IMA_DEFAULT_HASH_SHA512
default "wp512" if IMA_DEFAULT_HASH_WP512
+ default "sm3" if IMA_DEFAULT_HASH_SM3
config IMA_WRITE_POLICY
bool "Enable multiple writes to the IMA policy"
diff --git a/security/integrity/platform_certs/load_uefi.c b/security/integrity/platform_certs/load_uefi.c
index 111898a..f0c9082 100644
--- a/security/integrity/platform_certs/load_uefi.c
+++ b/security/integrity/platform_certs/load_uefi.c
@@ -35,16 +35,18 @@ static __init bool uefi_check_ignore_db(void)
* Get a certificate list blob from the named EFI variable.
*/
static __init void *get_cert_list(efi_char16_t *name, efi_guid_t *guid,
- unsigned long *size)
+ unsigned long *size, efi_status_t *status)
{
- efi_status_t status;
unsigned long lsize = 4;
unsigned long tmpdb[4];
void *db;
- status = efi.get_variable(name, guid, NULL, &lsize, &tmpdb);
- if (status != EFI_BUFFER_TOO_SMALL) {
- pr_err("Couldn't get size: 0x%lx\n", status);
+ *status = efi.get_variable(name, guid, NULL, &lsize, &tmpdb);
+ if (*status == EFI_NOT_FOUND)
+ return NULL;
+
+ if (*status != EFI_BUFFER_TOO_SMALL) {
+ pr_err("Couldn't get size: 0x%lx\n", *status);
return NULL;
}
@@ -52,10 +54,10 @@ static __init void *get_cert_list(efi_char16_t *name, efi_guid_t *guid,
if (!db)
return NULL;
- status = efi.get_variable(name, guid, NULL, &lsize, db);
- if (status != EFI_SUCCESS) {
+ *status = efi.get_variable(name, guid, NULL, &lsize, db);
+ if (*status != EFI_SUCCESS) {
kfree(db);
- pr_err("Error reading db var: 0x%lx\n", status);
+ pr_err("Error reading db var: 0x%lx\n", *status);
return NULL;
}
@@ -74,6 +76,7 @@ static int __init load_uefi_certs(void)
efi_guid_t mok_var = EFI_SHIM_LOCK_GUID;
void *db = NULL, *dbx = NULL, *mok = NULL;
unsigned long dbsize = 0, dbxsize = 0, moksize = 0;
+ efi_status_t status;
int rc = 0;
if (!efi.get_variable)
@@ -83,9 +86,12 @@ static int __init load_uefi_certs(void)
* an error if we can't get them.
*/
if (!uefi_check_ignore_db()) {
- db = get_cert_list(L"db", &secure_var, &dbsize);
+ db = get_cert_list(L"db", &secure_var, &dbsize, &status);
if (!db) {
- pr_err("MODSIGN: Couldn't get UEFI db list\n");
+ if (status == EFI_NOT_FOUND)
+ pr_debug("MODSIGN: db variable wasn't found\n");
+ else
+ pr_err("MODSIGN: Couldn't get UEFI db list\n");
} else {
rc = parse_efi_signature_list("UEFI:db",
db, dbsize, get_handler_for_db);
@@ -96,9 +102,12 @@ static int __init load_uefi_certs(void)
}
}
- mok = get_cert_list(L"MokListRT", &mok_var, &moksize);
+ mok = get_cert_list(L"MokListRT", &mok_var, &moksize, &status);
if (!mok) {
- pr_info("Couldn't get UEFI MokListRT\n");
+ if (status == EFI_NOT_FOUND)
+ pr_debug("MokListRT variable wasn't found\n");
+ else
+ pr_info("Couldn't get UEFI MokListRT\n");
} else {
rc = parse_efi_signature_list("UEFI:MokListRT",
mok, moksize, get_handler_for_db);
@@ -107,9 +116,12 @@ static int __init load_uefi_certs(void)
kfree(mok);
}
- dbx = get_cert_list(L"dbx", &secure_var, &dbxsize);
+ dbx = get_cert_list(L"dbx", &secure_var, &dbxsize, &status);
if (!dbx) {
- pr_info("Couldn't get UEFI dbx list\n");
+ if (status == EFI_NOT_FOUND)
+ pr_debug("dbx variable wasn't found\n");
+ else
+ pr_info("Couldn't get UEFI dbx list\n");
} else {
rc = parse_efi_signature_list("UEFI:dbx",
dbx, dbxsize,
diff --git a/security/keys/key.c b/security/keys/key.c
index 718bf72..e959b3c 100644
--- a/security/keys/key.c
+++ b/security/keys/key.c
@@ -382,7 +382,7 @@ int key_payload_reserve(struct key *key, size_t datalen)
spin_lock(&key->user->lock);
if (delta > 0 &&
- (key->user->qnbytes + delta >= maxbytes ||
+ (key->user->qnbytes + delta > maxbytes ||
key->user->qnbytes + delta < key->user->qnbytes)) {
ret = -EDQUOT;
}
diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c
index 9b898c9..d1a3dea 100644
--- a/security/keys/keyctl.c
+++ b/security/keys/keyctl.c
@@ -937,8 +937,8 @@ long keyctl_chown_key(key_serial_t id, uid_t user, gid_t group)
key_quota_root_maxbytes : key_quota_maxbytes;
spin_lock(&newowner->lock);
- if (newowner->qnkeys + 1 >= maxkeys ||
- newowner->qnbytes + key->quotalen >= maxbytes ||
+ if (newowner->qnkeys + 1 > maxkeys ||
+ newowner->qnbytes + key->quotalen > maxbytes ||
newowner->qnbytes + key->quotalen <
newowner->qnbytes)
goto quota_overrun;
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 4b6991e..1659b59 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -698,7 +698,7 @@ static int selinux_set_mnt_opts(struct super_block *sb,
if (!strcmp(sb->s_type->name, "debugfs") ||
!strcmp(sb->s_type->name, "tracefs") ||
- !strcmp(sb->s_type->name, "binderfs") ||
+ !strcmp(sb->s_type->name, "binder") ||
!strcmp(sb->s_type->name, "pstore"))
sbsec->flags |= SE_SBGENFS;
diff --git a/security/selinux/ss/sidtab.c b/security/selinux/ss/sidtab.c
index a308ce1..f511ffc 100644
--- a/security/selinux/ss/sidtab.c
+++ b/security/selinux/ss/sidtab.c
@@ -518,19 +518,13 @@ void sidtab_sid2str_put(struct sidtab *s, struct sidtab_entry *entry,
const char *str, u32 str_len)
{
struct sidtab_str_cache *cache, *victim = NULL;
+ unsigned long flags;
/* do not cache invalid contexts */
if (entry->context.len)
return;
- /*
- * Skip the put operation when in non-task context to avoid the need
- * to disable interrupts while holding s->cache_lock.
- */
- if (!in_task())
- return;
-
- spin_lock(&s->cache_lock);
+ spin_lock_irqsave(&s->cache_lock, flags);
cache = rcu_dereference_protected(entry->cache,
lockdep_is_held(&s->cache_lock));
@@ -561,7 +555,7 @@ void sidtab_sid2str_put(struct sidtab *s, struct sidtab_entry *entry,
rcu_assign_pointer(entry->cache, cache);
out_unlock:
- spin_unlock(&s->cache_lock);
+ spin_unlock_irqrestore(&s->cache_lock, flags);
kfree_rcu(victim, rcu_member);
}
diff --git a/sound/core/oss/pcm_plugin.c b/sound/core/oss/pcm_plugin.c
index 240e470..752d078 100644
--- a/sound/core/oss/pcm_plugin.c
+++ b/sound/core/oss/pcm_plugin.c
@@ -111,7 +111,7 @@ int snd_pcm_plug_alloc(struct snd_pcm_substream *plug, snd_pcm_uframes_t frames)
while (plugin->next) {
if (plugin->dst_frames)
frames = plugin->dst_frames(plugin, frames);
- if (snd_BUG_ON((snd_pcm_sframes_t)frames <= 0))
+ if ((snd_pcm_sframes_t)frames <= 0)
return -ENXIO;
plugin = plugin->next;
err = snd_pcm_plugin_alloc(plugin, frames);
@@ -123,7 +123,7 @@ int snd_pcm_plug_alloc(struct snd_pcm_substream *plug, snd_pcm_uframes_t frames)
while (plugin->prev) {
if (plugin->src_frames)
frames = plugin->src_frames(plugin, frames);
- if (snd_BUG_ON((snd_pcm_sframes_t)frames <= 0))
+ if ((snd_pcm_sframes_t)frames <= 0)
return -ENXIO;
plugin = plugin->prev;
err = snd_pcm_plugin_alloc(plugin, frames);
@@ -209,6 +209,8 @@ snd_pcm_sframes_t snd_pcm_plug_client_size(struct snd_pcm_substream *plug, snd_p
if (stream == SNDRV_PCM_STREAM_PLAYBACK) {
plugin = snd_pcm_plug_last(plug);
while (plugin && drv_frames > 0) {
+ if (drv_frames > plugin->buf_frames)
+ drv_frames = plugin->buf_frames;
plugin_prev = plugin->prev;
if (plugin->src_frames)
drv_frames = plugin->src_frames(plugin, drv_frames);
@@ -220,6 +222,8 @@ snd_pcm_sframes_t snd_pcm_plug_client_size(struct snd_pcm_substream *plug, snd_p
plugin_next = plugin->next;
if (plugin->dst_frames)
drv_frames = plugin->dst_frames(plugin, drv_frames);
+ if (drv_frames > plugin->buf_frames)
+ drv_frames = plugin->buf_frames;
plugin = plugin_next;
}
} else
@@ -248,11 +252,15 @@ snd_pcm_sframes_t snd_pcm_plug_slave_size(struct snd_pcm_substream *plug, snd_pc
if (frames < 0)
return frames;
}
+ if (frames > plugin->buf_frames)
+ frames = plugin->buf_frames;
plugin = plugin_next;
}
} else if (stream == SNDRV_PCM_STREAM_CAPTURE) {
plugin = snd_pcm_plug_last(plug);
while (plugin) {
+ if (frames > plugin->buf_frames)
+ frames = plugin->buf_frames;
plugin_prev = plugin->prev;
if (plugin->src_frames) {
frames = plugin->src_frames(plugin, frames);
diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c
index 336406b..d5443ee 100644
--- a/sound/core/pcm_native.c
+++ b/sound/core/pcm_native.c
@@ -2594,7 +2594,8 @@ void snd_pcm_release_substream(struct snd_pcm_substream *substream)
snd_pcm_drop(substream);
if (substream->hw_opened) {
- do_hw_free(substream);
+ if (substream->runtime->status->state != SNDRV_PCM_STATE_OPEN)
+ do_hw_free(substream);
substream->ops->close(substream);
substream->hw_opened = 0;
}
diff --git a/sound/core/seq/oss/seq_oss_midi.c b/sound/core/seq/oss/seq_oss_midi.c
index a88c235..2ddfe22 100644
--- a/sound/core/seq/oss/seq_oss_midi.c
+++ b/sound/core/seq/oss/seq_oss_midi.c
@@ -602,6 +602,7 @@ send_midi_event(struct seq_oss_devinfo *dp, struct snd_seq_event *ev, struct seq
len = snd_seq_oss_timer_start(dp->timer);
if (ev->type == SNDRV_SEQ_EVENT_SYSEX) {
snd_seq_oss_readq_sysex(dp->readq, mdev->seq_device, ev);
+ snd_midi_event_reset_decode(mdev->coder);
} else {
len = snd_midi_event_decode(mdev->coder, msg, sizeof(msg), ev);
if (len > 0)
diff --git a/sound/core/seq/seq_clientmgr.c b/sound/core/seq/seq_clientmgr.c
index 6d9592f..cc93157 100644
--- a/sound/core/seq/seq_clientmgr.c
+++ b/sound/core/seq/seq_clientmgr.c
@@ -580,7 +580,7 @@ static int update_timestamp_of_queue(struct snd_seq_event *event,
event->queue = queue;
event->flags &= ~SNDRV_SEQ_TIME_STAMP_MASK;
if (real_time) {
- event->time.time = snd_seq_timer_get_cur_time(q->timer);
+ event->time.time = snd_seq_timer_get_cur_time(q->timer, true);
event->flags |= SNDRV_SEQ_TIME_STAMP_REAL;
} else {
event->time.tick = snd_seq_timer_get_cur_tick(q->timer);
@@ -1659,7 +1659,7 @@ static int snd_seq_ioctl_get_queue_status(struct snd_seq_client *client,
tmr = queue->timer;
status->events = queue->tickq->cells + queue->timeq->cells;
- status->time = snd_seq_timer_get_cur_time(tmr);
+ status->time = snd_seq_timer_get_cur_time(tmr, true);
status->tick = snd_seq_timer_get_cur_tick(tmr);
status->running = tmr->running;
diff --git a/sound/core/seq/seq_queue.c b/sound/core/seq/seq_queue.c
index caf68bf..71a6ea6 100644
--- a/sound/core/seq/seq_queue.c
+++ b/sound/core/seq/seq_queue.c
@@ -238,6 +238,8 @@ void snd_seq_check_queue(struct snd_seq_queue *q, int atomic, int hop)
{
unsigned long flags;
struct snd_seq_event_cell *cell;
+ snd_seq_tick_time_t cur_tick;
+ snd_seq_real_time_t cur_time;
if (q == NULL)
return;
@@ -254,17 +256,18 @@ void snd_seq_check_queue(struct snd_seq_queue *q, int atomic, int hop)
__again:
/* Process tick queue... */
+ cur_tick = snd_seq_timer_get_cur_tick(q->timer);
for (;;) {
- cell = snd_seq_prioq_cell_out(q->tickq,
- &q->timer->tick.cur_tick);
+ cell = snd_seq_prioq_cell_out(q->tickq, &cur_tick);
if (!cell)
break;
snd_seq_dispatch_event(cell, atomic, hop);
}
/* Process time queue... */
+ cur_time = snd_seq_timer_get_cur_time(q->timer, false);
for (;;) {
- cell = snd_seq_prioq_cell_out(q->timeq, &q->timer->cur_time);
+ cell = snd_seq_prioq_cell_out(q->timeq, &cur_time);
if (!cell)
break;
snd_seq_dispatch_event(cell, atomic, hop);
@@ -392,6 +395,7 @@ int snd_seq_queue_check_access(int queueid, int client)
int snd_seq_queue_set_owner(int queueid, int client, int locked)
{
struct snd_seq_queue *q = queueptr(queueid);
+ unsigned long flags;
if (q == NULL)
return -EINVAL;
@@ -401,8 +405,10 @@ int snd_seq_queue_set_owner(int queueid, int client, int locked)
return -EPERM;
}
+ spin_lock_irqsave(&q->owner_lock, flags);
q->locked = locked ? 1 : 0;
q->owner = client;
+ spin_unlock_irqrestore(&q->owner_lock, flags);
queue_access_unlock(q);
queuefree(q);
@@ -539,15 +545,17 @@ void snd_seq_queue_client_termination(int client)
unsigned long flags;
int i;
struct snd_seq_queue *q;
+ bool matched;
for (i = 0; i < SNDRV_SEQ_MAX_QUEUES; i++) {
if ((q = queueptr(i)) == NULL)
continue;
spin_lock_irqsave(&q->owner_lock, flags);
- if (q->owner == client)
+ matched = (q->owner == client);
+ if (matched)
q->klocked = 1;
spin_unlock_irqrestore(&q->owner_lock, flags);
- if (q->owner == client) {
+ if (matched) {
if (q->timer->running)
snd_seq_timer_stop(q->timer);
snd_seq_timer_reset(q->timer);
@@ -739,6 +747,8 @@ void snd_seq_info_queues_read(struct snd_info_entry *entry,
int i, bpm;
struct snd_seq_queue *q;
struct snd_seq_timer *tmr;
+ bool locked;
+ int owner;
for (i = 0; i < SNDRV_SEQ_MAX_QUEUES; i++) {
if ((q = queueptr(i)) == NULL)
@@ -750,9 +760,14 @@ void snd_seq_info_queues_read(struct snd_info_entry *entry,
else
bpm = 0;
+ spin_lock_irq(&q->owner_lock);
+ locked = q->locked;
+ owner = q->owner;
+ spin_unlock_irq(&q->owner_lock);
+
snd_iprintf(buffer, "queue %d: [%s]\n", q->queue, q->name);
- snd_iprintf(buffer, "owned by client : %d\n", q->owner);
- snd_iprintf(buffer, "lock status : %s\n", q->locked ? "Locked" : "Free");
+ snd_iprintf(buffer, "owned by client : %d\n", owner);
+ snd_iprintf(buffer, "lock status : %s\n", locked ? "Locked" : "Free");
snd_iprintf(buffer, "queued time events : %d\n", snd_seq_prioq_avail(q->timeq));
snd_iprintf(buffer, "queued tick events : %d\n", snd_seq_prioq_avail(q->tickq));
snd_iprintf(buffer, "timer state : %s\n", tmr->running ? "Running" : "Stopped");
diff --git a/sound/core/seq/seq_timer.c b/sound/core/seq/seq_timer.c
index be59b59..1645e41 100644
--- a/sound/core/seq/seq_timer.c
+++ b/sound/core/seq/seq_timer.c
@@ -428,14 +428,15 @@ int snd_seq_timer_continue(struct snd_seq_timer *tmr)
}
/* return current 'real' time. use timeofday() to get better granularity. */
-snd_seq_real_time_t snd_seq_timer_get_cur_time(struct snd_seq_timer *tmr)
+snd_seq_real_time_t snd_seq_timer_get_cur_time(struct snd_seq_timer *tmr,
+ bool adjust_ktime)
{
snd_seq_real_time_t cur_time;
unsigned long flags;
spin_lock_irqsave(&tmr->lock, flags);
cur_time = tmr->cur_time;
- if (tmr->running) {
+ if (adjust_ktime && tmr->running) {
struct timespec64 tm;
ktime_get_ts64(&tm);
@@ -452,7 +453,13 @@ snd_seq_real_time_t snd_seq_timer_get_cur_time(struct snd_seq_timer *tmr)
high PPQ values) */
snd_seq_tick_time_t snd_seq_timer_get_cur_tick(struct snd_seq_timer *tmr)
{
- return tmr->tick.cur_tick;
+ snd_seq_tick_time_t cur_tick;
+ unsigned long flags;
+
+ spin_lock_irqsave(&tmr->lock, flags);
+ cur_tick = tmr->tick.cur_tick;
+ spin_unlock_irqrestore(&tmr->lock, flags);
+ return cur_tick;
}
diff --git a/sound/core/seq/seq_timer.h b/sound/core/seq/seq_timer.h
index 66c3e34..4bec57d 100644
--- a/sound/core/seq/seq_timer.h
+++ b/sound/core/seq/seq_timer.h
@@ -120,7 +120,8 @@ int snd_seq_timer_set_tempo_ppq(struct snd_seq_timer *tmr, int tempo, int ppq);
int snd_seq_timer_set_position_tick(struct snd_seq_timer *tmr, snd_seq_tick_time_t position);
int snd_seq_timer_set_position_time(struct snd_seq_timer *tmr, snd_seq_real_time_t position);
int snd_seq_timer_set_skew(struct snd_seq_timer *tmr, unsigned int skew, unsigned int base);
-snd_seq_real_time_t snd_seq_timer_get_cur_time(struct snd_seq_timer *tmr);
+snd_seq_real_time_t snd_seq_timer_get_cur_time(struct snd_seq_timer *tmr,
+ bool adjust_ktime);
snd_seq_tick_time_t snd_seq_timer_get_cur_tick(struct snd_seq_timer *tmr);
extern int seq_default_timer_class;
diff --git a/sound/core/seq/seq_virmidi.c b/sound/core/seq/seq_virmidi.c
index 626d87c..77d7037 100644
--- a/sound/core/seq/seq_virmidi.c
+++ b/sound/core/seq/seq_virmidi.c
@@ -81,6 +81,7 @@ static int snd_virmidi_dev_receive_event(struct snd_virmidi_dev *rdev,
if ((ev->flags & SNDRV_SEQ_EVENT_LENGTH_MASK) != SNDRV_SEQ_EVENT_LENGTH_VARIABLE)
continue;
snd_seq_dump_var_event(ev, (snd_seq_dump_func_t)snd_rawmidi_receive, vmidi->substream);
+ snd_midi_event_reset_decode(vmidi->parser);
} else {
len = snd_midi_event_decode(vmidi->parser, msg, sizeof(msg), ev);
if (len > 0)
diff --git a/sound/hda/ext/hdac_ext_controller.c b/sound/hda/ext/hdac_ext_controller.c
index a684f05..4d060d5 100644
--- a/sound/hda/ext/hdac_ext_controller.c
+++ b/sound/hda/ext/hdac_ext_controller.c
@@ -254,6 +254,7 @@ EXPORT_SYMBOL_GPL(snd_hdac_ext_bus_link_power_down_all);
int snd_hdac_ext_bus_link_get(struct hdac_bus *bus,
struct hdac_ext_link *link)
{
+ unsigned long codec_mask;
int ret = 0;
mutex_lock(&bus->lock);
@@ -280,9 +281,11 @@ int snd_hdac_ext_bus_link_get(struct hdac_bus *bus,
* HDA spec section 4.3 - Codec Discovery
*/
udelay(521);
- bus->codec_mask = snd_hdac_chip_readw(bus, STATESTS);
- dev_dbg(bus->dev, "codec_mask = 0x%lx\n", bus->codec_mask);
- snd_hdac_chip_writew(bus, STATESTS, bus->codec_mask);
+ codec_mask = snd_hdac_chip_readw(bus, STATESTS);
+ dev_dbg(bus->dev, "codec_mask = 0x%lx\n", codec_mask);
+ snd_hdac_chip_writew(bus, STATESTS, codec_mask);
+ if (!bus->codec_mask)
+ bus->codec_mask = codec_mask;
}
mutex_unlock(&bus->lock);
diff --git a/sound/hda/hdmi_chmap.c b/sound/hda/hdmi_chmap.c
index 5fd6d57..aad5c4b 100644
--- a/sound/hda/hdmi_chmap.c
+++ b/sound/hda/hdmi_chmap.c
@@ -250,7 +250,7 @@ void snd_hdac_print_channel_allocation(int spk_alloc, char *buf, int buflen)
for (i = 0, j = 0; i < ARRAY_SIZE(cea_speaker_allocation_names); i++) {
if (spk_alloc & (1 << i))
- j += snprintf(buf + j, buflen - j, " %s",
+ j += scnprintf(buf + j, buflen - j, " %s",
cea_speaker_allocation_names[i]);
}
buf[j] = '\0'; /* necessary when j == 0 */
diff --git a/sound/mips/sgio2audio.c b/sound/mips/sgio2audio.c
index 9f60a50..5bf1ea1 100644
--- a/sound/mips/sgio2audio.c
+++ b/sound/mips/sgio2audio.c
@@ -649,8 +649,6 @@ snd_sgio2audio_pcm_pointer(struct snd_pcm_substream *substream)
static const struct snd_pcm_ops snd_sgio2audio_playback1_ops = {
.open = snd_sgio2audio_playback1_open,
.close = snd_sgio2audio_pcm_close,
- .hw_params = snd_sgio2audio_pcm_hw_params,
- .hw_free = snd_sgio2audio_pcm_hw_free,
.prepare = snd_sgio2audio_pcm_prepare,
.trigger = snd_sgio2audio_pcm_trigger,
.pointer = snd_sgio2audio_pcm_pointer,
@@ -659,8 +657,6 @@ static const struct snd_pcm_ops snd_sgio2audio_playback1_ops = {
static const struct snd_pcm_ops snd_sgio2audio_playback2_ops = {
.open = snd_sgio2audio_playback2_open,
.close = snd_sgio2audio_pcm_close,
- .hw_params = snd_sgio2audio_pcm_hw_params,
- .hw_free = snd_sgio2audio_pcm_hw_free,
.prepare = snd_sgio2audio_pcm_prepare,
.trigger = snd_sgio2audio_pcm_trigger,
.pointer = snd_sgio2audio_pcm_pointer,
@@ -669,8 +665,6 @@ static const struct snd_pcm_ops snd_sgio2audio_playback2_ops = {
static const struct snd_pcm_ops snd_sgio2audio_capture_ops = {
.open = snd_sgio2audio_capture_open,
.close = snd_sgio2audio_pcm_close,
- .hw_params = snd_sgio2audio_pcm_hw_params,
- .hw_free = snd_sgio2audio_pcm_hw_free,
.prepare = snd_sgio2audio_pcm_prepare,
.trigger = snd_sgio2audio_pcm_trigger,
.pointer = snd_sgio2audio_pcm_pointer,
diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c
index 5dc42f9..53e7732 100644
--- a/sound/pci/hda/hda_codec.c
+++ b/sound/pci/hda/hda_codec.c
@@ -4022,7 +4022,7 @@ void snd_print_pcm_bits(int pcm, char *buf, int buflen)
for (i = 0, j = 0; i < ARRAY_SIZE(bits); i++)
if (pcm & (AC_SUPPCM_BITS_8 << i))
- j += snprintf(buf + j, buflen - j, " %d", bits[i]);
+ j += scnprintf(buf + j, buflen - j, " %d", bits[i]);
buf[j] = '\0'; /* necessary when j == 0 */
}
diff --git a/sound/pci/hda/hda_eld.c b/sound/pci/hda/hda_eld.c
index bb46c89..136477ed 100644
--- a/sound/pci/hda/hda_eld.c
+++ b/sound/pci/hda/hda_eld.c
@@ -360,7 +360,7 @@ static void hdmi_print_pcm_rates(int pcm, char *buf, int buflen)
for (i = 0, j = 0; i < ARRAY_SIZE(alsa_rates); i++)
if (pcm & (1 << i))
- j += snprintf(buf + j, buflen - j, " %d",
+ j += scnprintf(buf + j, buflen - j, " %d",
alsa_rates[i]);
buf[j] = '\0'; /* necessary when j == 0 */
diff --git a/sound/pci/hda/hda_sysfs.c b/sound/pci/hda/hda_sysfs.c
index 0607ed5..eb8ec10 100644
--- a/sound/pci/hda/hda_sysfs.c
+++ b/sound/pci/hda/hda_sysfs.c
@@ -222,7 +222,7 @@ static ssize_t init_verbs_show(struct device *dev,
int i, len = 0;
mutex_lock(&codec->user_mutex);
snd_array_for_each(&codec->init_verbs, i, v) {
- len += snprintf(buf + len, PAGE_SIZE - len,
+ len += scnprintf(buf + len, PAGE_SIZE - len,
"0x%02x 0x%03x 0x%04x\n",
v->nid, v->verb, v->param);
}
@@ -272,7 +272,7 @@ static ssize_t hints_show(struct device *dev,
int i, len = 0;
mutex_lock(&codec->user_mutex);
snd_array_for_each(&codec->hints, i, hint) {
- len += snprintf(buf + len, PAGE_SIZE - len,
+ len += scnprintf(buf + len, PAGE_SIZE - len,
"%s = %s\n", hint->key, hint->val);
}
mutex_unlock(&codec->user_mutex);
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 4770fb3..63e1a56 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -2447,6 +2447,10 @@ static const struct snd_pci_quirk alc882_fixup_tbl[] = {
SND_PCI_QUIRK(0x1071, 0x8258, "Evesham Voyaeger", ALC882_FIXUP_EAPD),
SND_PCI_QUIRK(0x1458, 0xa002, "Gigabyte EP45-DS3/Z87X-UD3H", ALC889_FIXUP_FRONT_HP_NO_PRESENCE),
SND_PCI_QUIRK(0x1458, 0xa0b8, "Gigabyte AZ370-Gaming", ALC1220_FIXUP_GB_DUAL_CODECS),
+ SND_PCI_QUIRK(0x1458, 0xa0cd, "Gigabyte X570 Aorus Master", ALC1220_FIXUP_CLEVO_P950),
+ SND_PCI_QUIRK(0x1462, 0x1228, "MSI-GP63", ALC1220_FIXUP_CLEVO_P950),
+ SND_PCI_QUIRK(0x1462, 0x1276, "MSI-GL73", ALC1220_FIXUP_CLEVO_P950),
+ SND_PCI_QUIRK(0x1462, 0x1293, "MSI-GP65", ALC1220_FIXUP_CLEVO_P950),
SND_PCI_QUIRK(0x1462, 0x7350, "MSI-7350", ALC889_FIXUP_CD),
SND_PCI_QUIRK(0x1462, 0xda57, "MSI Z270-Gaming", ALC1220_FIXUP_GB_DUAL_CODECS),
SND_PCI_QUIRK_VENDOR(0x1462, "MSI", ALC882_FIXUP_GPIO3),
@@ -5701,8 +5705,11 @@ static void alc_fixup_headset_jack(struct hda_codec *codec,
break;
case HDA_FIXUP_ACT_INIT:
switch (codec->core.vendor_id) {
+ case 0x10ec0215:
case 0x10ec0225:
+ case 0x10ec0285:
case 0x10ec0295:
+ case 0x10ec0289:
case 0x10ec0299:
alc_write_coef_idx(codec, 0x48, 0xd011);
alc_update_coef_idx(codec, 0x49, 0x007f, 0x0045);
@@ -5914,7 +5921,8 @@ enum {
ALC289_FIXUP_DUAL_SPK,
ALC294_FIXUP_SPK2_TO_DAC1,
ALC294_FIXUP_ASUS_DUAL_SPK,
-
+ ALC285_FIXUP_THINKPAD_HEADSET_JACK,
+ ALC294_FIXUP_ASUS_HPE,
};
static const struct hda_fixup alc269_fixups[] = {
@@ -6678,6 +6686,8 @@ static const struct hda_fixup alc269_fixups[] = {
[ALC285_FIXUP_SPEAKER2_TO_DAC1] = {
.type = HDA_FIXUP_FUNC,
.v.func = alc285_fixup_speaker2_to_dac1,
+ .chained = true,
+ .chain_id = ALC269_FIXUP_THINKPAD_ACPI
},
[ALC256_FIXUP_DELL_INSPIRON_7559_SUBWOOFER] = {
.type = HDA_FIXUP_PINS,
@@ -7034,7 +7044,23 @@ static const struct hda_fixup alc269_fixups[] = {
.chained = true,
.chain_id = ALC294_FIXUP_SPK2_TO_DAC1
},
-
+ [ALC285_FIXUP_THINKPAD_HEADSET_JACK] = {
+ .type = HDA_FIXUP_FUNC,
+ .v.func = alc_fixup_headset_jack,
+ .chained = true,
+ .chain_id = ALC285_FIXUP_SPEAKER2_TO_DAC1
+ },
+ [ALC294_FIXUP_ASUS_HPE] = {
+ .type = HDA_FIXUP_VERBS,
+ .v.verbs = (const struct hda_verb[]) {
+ /* Set EAPD high */
+ { 0x20, AC_VERB_SET_COEF_INDEX, 0x0f },
+ { 0x20, AC_VERB_SET_PROC_COEF, 0x7774 },
+ { }
+ },
+ .chained = true,
+ .chain_id = ALC294_FIXUP_ASUS_HEADSET_MIC
+ },
};
static const struct snd_pci_quirk alc269_fixup_tbl[] = {
@@ -7109,6 +7135,8 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x1028, 0x0935, "Dell", ALC274_FIXUP_DELL_AIO_LINEOUT_VERB),
SND_PCI_QUIRK(0x1028, 0x097e, "Dell Precision", ALC289_FIXUP_DUAL_SPK),
SND_PCI_QUIRK(0x1028, 0x097d, "Dell Precision", ALC289_FIXUP_DUAL_SPK),
+ SND_PCI_QUIRK(0x1028, 0x098d, "Dell Precision", ALC233_FIXUP_ASUS_MIC_NO_PRESENCE),
+ SND_PCI_QUIRK(0x1028, 0x09bf, "Dell Precision", ALC233_FIXUP_ASUS_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1028, 0x164a, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1028, 0x164b, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x103c, 0x1586, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC2),
@@ -7198,6 +7226,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x1043, 0x16e3, "ASUS UX50", ALC269_FIXUP_STEREO_DMIC),
SND_PCI_QUIRK(0x1043, 0x17d1, "ASUS UX431FL", ALC294_FIXUP_ASUS_DUAL_SPK),
SND_PCI_QUIRK(0x1043, 0x18b1, "Asus MJ401TA", ALC256_FIXUP_ASUS_HEADSET_MIC),
+ SND_PCI_QUIRK(0x1043, 0x19ce, "ASUS B9450FA", ALC294_FIXUP_ASUS_HPE),
SND_PCI_QUIRK(0x1043, 0x1a13, "Asus G73Jw", ALC269_FIXUP_ASUS_G73JW),
SND_PCI_QUIRK(0x1043, 0x1a30, "ASUS X705UD", ALC256_FIXUP_ASUS_MIC),
SND_PCI_QUIRK(0x1043, 0x1b13, "Asus U41SV", ALC269_FIXUP_INV_DMIC),
@@ -7268,8 +7297,8 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x17aa, 0x224c, "Thinkpad", ALC298_FIXUP_TPT470_DOCK),
SND_PCI_QUIRK(0x17aa, 0x224d, "Thinkpad", ALC298_FIXUP_TPT470_DOCK),
SND_PCI_QUIRK(0x17aa, 0x225d, "Thinkpad T480", ALC269_FIXUP_LIMIT_INT_MIC_BOOST),
- SND_PCI_QUIRK(0x17aa, 0x2292, "Thinkpad X1 Yoga 7th", ALC285_FIXUP_SPEAKER2_TO_DAC1),
- SND_PCI_QUIRK(0x17aa, 0x2293, "Thinkpad X1 Carbon 7th", ALC285_FIXUP_SPEAKER2_TO_DAC1),
+ SND_PCI_QUIRK(0x17aa, 0x2292, "Thinkpad X1 Yoga 7th", ALC285_FIXUP_THINKPAD_HEADSET_JACK),
+ SND_PCI_QUIRK(0x17aa, 0x2293, "Thinkpad X1 Carbon 7th", ALC285_FIXUP_THINKPAD_HEADSET_JACK),
SND_PCI_QUIRK(0x17aa, 0x30bb, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY),
SND_PCI_QUIRK(0x17aa, 0x30e2, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY),
SND_PCI_QUIRK(0x17aa, 0x310c, "ThinkCentre Station", ALC294_FIXUP_LENOVO_MIC_LOCATION),
@@ -8022,6 +8051,8 @@ static int patch_alc269(struct hda_codec *codec)
spec->gen.mixer_nid = 0;
break;
case 0x10ec0225:
+ codec->power_save_node = 1;
+ /* fall through */
case 0x10ec0295:
case 0x10ec0299:
spec->codec_variant = ALC269_TYPE_ALC225;
@@ -8581,6 +8612,8 @@ enum {
ALC669_FIXUP_ACER_ASPIRE_ETHOS,
ALC669_FIXUP_ACER_ASPIRE_ETHOS_HEADSET,
ALC671_FIXUP_HP_HEADSET_MIC2,
+ ALC662_FIXUP_ACER_X2660G_HEADSET_MODE,
+ ALC662_FIXUP_ACER_NITRO_HEADSET_MODE,
};
static const struct hda_fixup alc662_fixups[] = {
@@ -8926,6 +8959,25 @@ static const struct hda_fixup alc662_fixups[] = {
.type = HDA_FIXUP_FUNC,
.v.func = alc671_fixup_hp_headset_mic2,
},
+ [ALC662_FIXUP_ACER_X2660G_HEADSET_MODE] = {
+ .type = HDA_FIXUP_PINS,
+ .v.pins = (const struct hda_pintbl[]) {
+ { 0x1a, 0x02a1113c }, /* use as headset mic, without its own jack detect */
+ { }
+ },
+ .chained = true,
+ .chain_id = ALC662_FIXUP_USI_FUNC
+ },
+ [ALC662_FIXUP_ACER_NITRO_HEADSET_MODE] = {
+ .type = HDA_FIXUP_PINS,
+ .v.pins = (const struct hda_pintbl[]) {
+ { 0x1a, 0x01a11140 }, /* use as headset mic, without its own jack detect */
+ { 0x1b, 0x0221144f },
+ { }
+ },
+ .chained = true,
+ .chain_id = ALC662_FIXUP_USI_FUNC
+ },
};
static const struct snd_pci_quirk alc662_fixup_tbl[] = {
@@ -8937,6 +8989,8 @@ static const struct snd_pci_quirk alc662_fixup_tbl[] = {
SND_PCI_QUIRK(0x1025, 0x0349, "eMachines eM250", ALC662_FIXUP_INV_DMIC),
SND_PCI_QUIRK(0x1025, 0x034a, "Gateway LT27", ALC662_FIXUP_INV_DMIC),
SND_PCI_QUIRK(0x1025, 0x038b, "Acer Aspire 8943G", ALC662_FIXUP_ASPIRE),
+ SND_PCI_QUIRK(0x1025, 0x123c, "Acer Nitro N50-600", ALC662_FIXUP_ACER_NITRO_HEADSET_MODE),
+ SND_PCI_QUIRK(0x1025, 0x124e, "Acer 2660G", ALC662_FIXUP_ACER_X2660G_HEADSET_MODE),
SND_PCI_QUIRK(0x1028, 0x05d8, "Dell", ALC668_FIXUP_DELL_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1028, 0x05db, "Dell", ALC668_FIXUP_DELL_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1028, 0x05fe, "Dell XPS 15", ALC668_FIXUP_DELL_XPS13),
diff --git a/sound/soc/amd/raven/acp3x-i2s.c b/sound/soc/amd/raven/acp3x-i2s.c
index 31cd400..91a3881 100644
--- a/sound/soc/amd/raven/acp3x-i2s.c
+++ b/sound/soc/amd/raven/acp3x-i2s.c
@@ -170,6 +170,7 @@ static int acp3x_i2s_trigger(struct snd_pcm_substream *substream,
struct snd_soc_card *card;
struct acp3x_platform_info *pinfo;
u32 ret, val, period_bytes, reg_val, ier_val, water_val;
+ u32 buf_size, buf_reg;
prtd = substream->private_data;
rtd = substream->runtime->private_data;
@@ -183,6 +184,8 @@ static int acp3x_i2s_trigger(struct snd_pcm_substream *substream,
}
period_bytes = frames_to_bytes(substream->runtime,
substream->runtime->period_size);
+ buf_size = frames_to_bytes(substream->runtime,
+ substream->runtime->buffer_size);
switch (cmd) {
case SNDRV_PCM_TRIGGER_START:
case SNDRV_PCM_TRIGGER_RESUME:
@@ -196,6 +199,7 @@ static int acp3x_i2s_trigger(struct snd_pcm_substream *substream,
mmACP_BT_TX_INTR_WATERMARK_SIZE;
reg_val = mmACP_BTTDM_ITER;
ier_val = mmACP_BTTDM_IER;
+ buf_reg = mmACP_BT_TX_RINGBUFSIZE;
break;
case I2S_SP_INSTANCE:
default:
@@ -203,6 +207,7 @@ static int acp3x_i2s_trigger(struct snd_pcm_substream *substream,
mmACP_I2S_TX_INTR_WATERMARK_SIZE;
reg_val = mmACP_I2STDM_ITER;
ier_val = mmACP_I2STDM_IER;
+ buf_reg = mmACP_I2S_TX_RINGBUFSIZE;
}
} else {
switch (rtd->i2s_instance) {
@@ -211,6 +216,7 @@ static int acp3x_i2s_trigger(struct snd_pcm_substream *substream,
mmACP_BT_RX_INTR_WATERMARK_SIZE;
reg_val = mmACP_BTTDM_IRER;
ier_val = mmACP_BTTDM_IER;
+ buf_reg = mmACP_BT_RX_RINGBUFSIZE;
break;
case I2S_SP_INSTANCE:
default:
@@ -218,9 +224,11 @@ static int acp3x_i2s_trigger(struct snd_pcm_substream *substream,
mmACP_I2S_RX_INTR_WATERMARK_SIZE;
reg_val = mmACP_I2STDM_IRER;
ier_val = mmACP_I2STDM_IER;
+ buf_reg = mmACP_I2S_RX_RINGBUFSIZE;
}
}
rv_writel(period_bytes, rtd->acp3x_base + water_val);
+ rv_writel(buf_size, rtd->acp3x_base + buf_reg);
val = rv_readl(rtd->acp3x_base + reg_val);
val = val | BIT(0);
rv_writel(val, rtd->acp3x_base + reg_val);
diff --git a/sound/soc/amd/raven/acp3x-pcm-dma.c b/sound/soc/amd/raven/acp3x-pcm-dma.c
index aecc3c0..d62c0d9 100644
--- a/sound/soc/amd/raven/acp3x-pcm-dma.c
+++ b/sound/soc/amd/raven/acp3x-pcm-dma.c
@@ -110,7 +110,7 @@ static void config_acp3x_dma(struct i2s_stream_instance *rtd, int direction)
{
u16 page_idx;
u32 low, high, val, acp_fifo_addr, reg_fifo_addr;
- u32 reg_ringbuf_size, reg_dma_size, reg_fifo_size;
+ u32 reg_dma_size, reg_fifo_size;
dma_addr_t addr;
addr = rtd->dma_addr;
@@ -157,7 +157,6 @@ static void config_acp3x_dma(struct i2s_stream_instance *rtd, int direction)
if (direction == SNDRV_PCM_STREAM_PLAYBACK) {
switch (rtd->i2s_instance) {
case I2S_BT_INSTANCE:
- reg_ringbuf_size = mmACP_BT_TX_RINGBUFSIZE;
reg_dma_size = mmACP_BT_TX_DMA_SIZE;
acp_fifo_addr = ACP_SRAM_PTE_OFFSET +
BT_PB_FIFO_ADDR_OFFSET;
@@ -169,7 +168,6 @@ static void config_acp3x_dma(struct i2s_stream_instance *rtd, int direction)
case I2S_SP_INSTANCE:
default:
- reg_ringbuf_size = mmACP_I2S_TX_RINGBUFSIZE;
reg_dma_size = mmACP_I2S_TX_DMA_SIZE;
acp_fifo_addr = ACP_SRAM_PTE_OFFSET +
SP_PB_FIFO_ADDR_OFFSET;
@@ -181,7 +179,6 @@ static void config_acp3x_dma(struct i2s_stream_instance *rtd, int direction)
} else {
switch (rtd->i2s_instance) {
case I2S_BT_INSTANCE:
- reg_ringbuf_size = mmACP_BT_RX_RINGBUFSIZE;
reg_dma_size = mmACP_BT_RX_DMA_SIZE;
acp_fifo_addr = ACP_SRAM_PTE_OFFSET +
BT_CAPT_FIFO_ADDR_OFFSET;
@@ -193,7 +190,6 @@ static void config_acp3x_dma(struct i2s_stream_instance *rtd, int direction)
case I2S_SP_INSTANCE:
default:
- reg_ringbuf_size = mmACP_I2S_RX_RINGBUFSIZE;
reg_dma_size = mmACP_I2S_RX_DMA_SIZE;
acp_fifo_addr = ACP_SRAM_PTE_OFFSET +
SP_CAPT_FIFO_ADDR_OFFSET;
@@ -203,7 +199,6 @@ static void config_acp3x_dma(struct i2s_stream_instance *rtd, int direction)
rtd->acp3x_base + mmACP_I2S_RX_RINGBUFADDR);
}
}
- rv_writel(MAX_BUFFER, rtd->acp3x_base + reg_ringbuf_size);
rv_writel(DMA_SIZE, rtd->acp3x_base + reg_dma_size);
rv_writel(acp_fifo_addr, rtd->acp3x_base + reg_fifo_addr);
rv_writel(FIFO_SIZE, rtd->acp3x_base + reg_fifo_size);
diff --git a/sound/soc/amd/raven/pci-acp3x.c b/sound/soc/amd/raven/pci-acp3x.c
index 65330bb..da60e2e 100644
--- a/sound/soc/amd/raven/pci-acp3x.c
+++ b/sound/soc/amd/raven/pci-acp3x.c
@@ -45,23 +45,6 @@ static int acp3x_power_on(void __iomem *acp3x_base)
return -ETIMEDOUT;
}
-static int acp3x_power_off(void __iomem *acp3x_base)
-{
- u32 val;
- int timeout;
-
- rv_writel(ACP_PGFSM_CNTL_POWER_OFF_MASK,
- acp3x_base + mmACP_PGFSM_CONTROL);
- timeout = 0;
- while (++timeout < 500) {
- val = rv_readl(acp3x_base + mmACP_PGFSM_STATUS);
- if ((val & ACP_PGFSM_STATUS_MASK) == ACP_POWERED_OFF)
- return 0;
- udelay(1);
- }
- return -ETIMEDOUT;
-}
-
static int acp3x_reset(void __iomem *acp3x_base)
{
u32 val;
@@ -115,12 +98,6 @@ static int acp3x_deinit(void __iomem *acp3x_base)
pr_err("ACP3x reset failed\n");
return ret;
}
- /* power off */
- ret = acp3x_power_off(acp3x_base);
- if (ret) {
- pr_err("ACP3x power off failed\n");
- return ret;
- }
return 0;
}
diff --git a/sound/soc/atmel/Kconfig b/sound/soc/atmel/Kconfig
index d1dc8e6..71f2d42 100644
--- a/sound/soc/atmel/Kconfig
+++ b/sound/soc/atmel/Kconfig
@@ -10,11 +10,11 @@
if SND_ATMEL_SOC
config SND_ATMEL_SOC_PDC
- tristate
+ bool
depends on HAS_DMA
config SND_ATMEL_SOC_DMA
- tristate
+ bool
select SND_SOC_GENERIC_DMAENGINE_PCM
config SND_ATMEL_SOC_SSC
diff --git a/sound/soc/atmel/Makefile b/sound/soc/atmel/Makefile
index 1f6890e..c7d2989 100644
--- a/sound/soc/atmel/Makefile
+++ b/sound/soc/atmel/Makefile
@@ -6,8 +6,14 @@
snd-soc-atmel-i2s-objs := atmel-i2s.o
snd-soc-mchp-i2s-mcc-objs := mchp-i2s-mcc.o
-obj-$(CONFIG_SND_ATMEL_SOC_PDC) += snd-soc-atmel-pcm-pdc.o
-obj-$(CONFIG_SND_ATMEL_SOC_DMA) += snd-soc-atmel-pcm-dma.o
+# pdc and dma need to both be built-in if any user of
+# ssc is built-in.
+ifdef CONFIG_SND_ATMEL_SOC_PDC
+obj-$(CONFIG_SND_ATMEL_SOC_SSC) += snd-soc-atmel-pcm-pdc.o
+endif
+ifdef CONFIG_SND_ATMEL_SOC_DMA
+obj-$(CONFIG_SND_ATMEL_SOC_SSC) += snd-soc-atmel-pcm-dma.o
+endif
obj-$(CONFIG_SND_ATMEL_SOC_SSC) += snd-soc-atmel_ssc_dai.o
obj-$(CONFIG_SND_ATMEL_SOC_I2S) += snd-soc-atmel-i2s.o
obj-$(CONFIG_SND_MCHP_SOC_I2S_MCC) += snd-soc-mchp-i2s-mcc.o
diff --git a/sound/soc/codecs/Kconfig b/sound/soc/codecs/Kconfig
index 7e90f5d..ea91243 100644
--- a/sound/soc/codecs/Kconfig
+++ b/sound/soc/codecs/Kconfig
@@ -1406,7 +1406,7 @@
depends on SND_SOC_I2C_AND_SPI
config SND_SOC_WM8741
- tristate "Wolfson Microelectronics WM8737 DAC"
+ tristate "Wolfson Microelectronics WM8741 DAC"
depends on SND_SOC_I2C_AND_SPI
config SND_SOC_WM8750
diff --git a/sound/soc/codecs/hdmi-codec.c b/sound/soc/codecs/hdmi-codec.c
index 444cc4e..f005751 100644
--- a/sound/soc/codecs/hdmi-codec.c
+++ b/sound/soc/codecs/hdmi-codec.c
@@ -779,7 +779,17 @@ static int hdmi_of_xlate_dai_id(struct snd_soc_component *component,
return ret;
}
+static void hdmi_remove(struct snd_soc_component *component)
+{
+ struct hdmi_codec_priv *hcp = snd_soc_component_get_drvdata(component);
+
+ if (hcp->hcd.ops->hook_plugged_cb)
+ hcp->hcd.ops->hook_plugged_cb(component->dev->parent,
+ hcp->hcd.data, NULL, NULL);
+}
+
static const struct snd_soc_component_driver hdmi_driver = {
+ .remove = hdmi_remove,
.dapm_widgets = hdmi_widgets,
.num_dapm_widgets = ARRAY_SIZE(hdmi_widgets),
.of_xlate_dai_id = hdmi_of_xlate_dai_id,
diff --git a/sound/soc/codecs/max98090.c b/sound/soc/codecs/max98090.c
index 5bc2c64..032adc1 100644
--- a/sound/soc/codecs/max98090.c
+++ b/sound/soc/codecs/max98090.c
@@ -5,150 +5,24 @@
* Copyright 2011-2012 Maxim Integrated Products
*/
-#include <linux/acpi.h>
-#include <linux/clk.h>
#include <linux/delay.h>
#include <linux/i2c.h>
#include <linux/module.h>
-#include <linux/mutex.h>
#include <linux/of.h>
#include <linux/pm.h>
#include <linux/pm_runtime.h>
#include <linux/regmap.h>
#include <linux/slab.h>
+#include <linux/acpi.h>
+#include <linux/clk.h>
#include <sound/jack.h>
-#include <sound/max98090.h>
#include <sound/pcm.h>
#include <sound/pcm_params.h>
#include <sound/soc.h>
#include <sound/tlv.h>
+#include <sound/max98090.h>
#include "max98090.h"
-static void max98090_shdn_save_locked(struct max98090_priv *max98090)
-{
- int shdn = 0;
-
- /* saved_shdn, saved_count, SHDN are protected by card->dapm_mutex */
- regmap_read(max98090->regmap, M98090_REG_DEVICE_SHUTDOWN, &shdn);
- max98090->saved_shdn |= shdn;
- ++max98090->saved_count;
-
- if (shdn)
- regmap_write(max98090->regmap, M98090_REG_DEVICE_SHUTDOWN, 0x0);
-}
-
-static void max98090_shdn_restore_locked(struct max98090_priv *max98090)
-{
- /* saved_shdn, saved_count, SHDN are protected by card->dapm_mutex */
- if (--max98090->saved_count == 0) {
- if (max98090->saved_shdn) {
- regmap_write(max98090->regmap,
- M98090_REG_DEVICE_SHUTDOWN,
- M98090_SHDNN_MASK);
- max98090->saved_shdn = 0;
- }
- }
-}
-
-static void max98090_shdn_save(struct max98090_priv *max98090)
-{
- mutex_lock_nested(&max98090->component->card->dapm_mutex,
- SND_SOC_DAPM_CLASS_RUNTIME);
- max98090_shdn_save_locked(max98090);
-}
-
-static void max98090_shdn_restore(struct max98090_priv *max98090)
-{
- max98090_shdn_restore_locked(max98090);
- mutex_unlock(&max98090->component->card->dapm_mutex);
-}
-
-static int max98090_put_volsw(struct snd_kcontrol *kcontrol,
- struct snd_ctl_elem_value *ucontrol)
-{
- struct snd_soc_component *component =
- snd_soc_kcontrol_component(kcontrol);
- struct max98090_priv *max98090 =
- snd_soc_component_get_drvdata(component);
- int ret;
-
- max98090_shdn_save(max98090);
- ret = snd_soc_put_volsw(kcontrol, ucontrol);
- max98090_shdn_restore(max98090);
-
- return ret;
-}
-
-static int max98090_dapm_put_enum_double(struct snd_kcontrol *kcontrol,
- struct snd_ctl_elem_value *ucontrol)
-{
- struct snd_soc_component *component =
- snd_soc_dapm_kcontrol_component(kcontrol);
- struct max98090_priv *max98090 =
- snd_soc_component_get_drvdata(component);
- int ret;
-
- max98090_shdn_save(max98090);
- ret = snd_soc_dapm_put_enum_double_locked(kcontrol, ucontrol);
- max98090_shdn_restore(max98090);
-
- return ret;
-}
-
-static int max98090_put_enum_double(struct snd_kcontrol *kcontrol,
- struct snd_ctl_elem_value *ucontrol)
-{
- struct snd_soc_component *component =
- snd_soc_kcontrol_component(kcontrol);
- struct max98090_priv *max98090 =
- snd_soc_component_get_drvdata(component);
- int ret;
-
- max98090_shdn_save(max98090);
- ret = snd_soc_put_enum_double(kcontrol, ucontrol);
- max98090_shdn_restore(max98090);
-
- return ret;
-}
-
-static int max98090_bytes_put(struct snd_kcontrol *kcontrol,
- struct snd_ctl_elem_value *ucontrol)
-{
- struct snd_soc_component *component =
- snd_soc_kcontrol_component(kcontrol);
- struct max98090_priv *max98090 =
- snd_soc_component_get_drvdata(component);
- int ret;
-
- max98090_shdn_save(max98090);
- ret = snd_soc_bytes_put(kcontrol, ucontrol);
- max98090_shdn_restore(max98090);
-
- return ret;
-}
-
-static int max98090_dapm_event(struct snd_soc_dapm_widget *w,
- struct snd_kcontrol *kcontrol, int event)
-{
- struct snd_soc_component *component =
- snd_soc_dapm_to_component(w->dapm);
- struct max98090_priv *max98090 =
- snd_soc_component_get_drvdata(component);
-
- switch (event) {
- case SND_SOC_DAPM_PRE_PMU:
- case SND_SOC_DAPM_PRE_PMD:
- max98090_shdn_save_locked(max98090);
- break;
- case SND_SOC_DAPM_POST_PMU:
- case SND_SOC_DAPM_POST_PMD:
- max98090_shdn_restore_locked(max98090);
- break;
- }
-
- return 0;
-}
-
/* Allows for sparsely populated register maps */
static const struct reg_default max98090_reg[] = {
{ 0x00, 0x00 }, /* 00 Software Reset */
@@ -632,13 +506,10 @@ static SOC_ENUM_SINGLE_DECL(max98090_adchp_enum,
max98090_pwr_perf_text);
static const struct snd_kcontrol_new max98090_snd_controls[] = {
- SOC_ENUM_EXT("MIC Bias VCM Bandgap", max98090_vcmbandgap_enum,
- snd_soc_get_enum_double, max98090_put_enum_double),
+ SOC_ENUM("MIC Bias VCM Bandgap", max98090_vcmbandgap_enum),
- SOC_SINGLE_EXT("DMIC MIC Comp Filter Config",
- M98090_REG_DIGITAL_MIC_CONFIG,
- M98090_DMIC_COMP_SHIFT, M98090_DMIC_COMP_NUM - 1, 0,
- snd_soc_get_volsw, max98090_put_volsw),
+ SOC_SINGLE("DMIC MIC Comp Filter Config", M98090_REG_DIGITAL_MIC_CONFIG,
+ M98090_DMIC_COMP_SHIFT, M98090_DMIC_COMP_NUM - 1, 0),
SOC_SINGLE_EXT_TLV("MIC1 Boost Volume",
M98090_REG_MIC1_INPUT_LEVEL, M98090_MIC_PA1EN_SHIFT,
@@ -693,34 +564,24 @@ static const struct snd_kcontrol_new max98090_snd_controls[] = {
M98090_AVR_SHIFT, M98090_AVR_NUM - 1, 1,
max98090_av_tlv),
- SOC_ENUM_EXT("ADC Oversampling Rate", max98090_osr128_enum,
- snd_soc_get_enum_double, max98090_put_enum_double),
- SOC_SINGLE_EXT("ADC Quantizer Dither", M98090_REG_ADC_CONTROL,
- M98090_ADCDITHER_SHIFT, M98090_ADCDITHER_NUM - 1, 0,
- snd_soc_get_volsw, max98090_put_volsw),
- SOC_ENUM_EXT("ADC High Performance Mode", max98090_adchp_enum,
- snd_soc_get_enum_double, max98090_put_enum_double),
+ SOC_ENUM("ADC Oversampling Rate", max98090_osr128_enum),
+ SOC_SINGLE("ADC Quantizer Dither", M98090_REG_ADC_CONTROL,
+ M98090_ADCDITHER_SHIFT, M98090_ADCDITHER_NUM - 1, 0),
+ SOC_ENUM("ADC High Performance Mode", max98090_adchp_enum),
- SOC_SINGLE_EXT("DAC Mono Mode", M98090_REG_IO_CONFIGURATION,
- M98090_DMONO_SHIFT, M98090_DMONO_NUM - 1, 0,
- snd_soc_get_volsw, max98090_put_volsw),
- SOC_SINGLE_EXT("SDIN Mode", M98090_REG_IO_CONFIGURATION,
- M98090_SDIEN_SHIFT, M98090_SDIEN_NUM - 1, 0,
- snd_soc_get_volsw, max98090_put_volsw),
- SOC_SINGLE_EXT("SDOUT Mode", M98090_REG_IO_CONFIGURATION,
- M98090_SDOEN_SHIFT, M98090_SDOEN_NUM - 1, 0,
- snd_soc_get_volsw, max98090_put_volsw),
- SOC_SINGLE_EXT("SDOUT Hi-Z Mode", M98090_REG_IO_CONFIGURATION,
- M98090_HIZOFF_SHIFT, M98090_HIZOFF_NUM - 1, 1,
- snd_soc_get_volsw, max98090_put_volsw),
- SOC_ENUM_EXT("Filter Mode", max98090_mode_enum,
- snd_soc_get_enum_double, max98090_put_enum_double),
- SOC_SINGLE_EXT("Record Path DC Blocking", M98090_REG_FILTER_CONFIG,
- M98090_AHPF_SHIFT, M98090_AHPF_NUM - 1, 0,
- snd_soc_get_volsw, max98090_put_volsw),
- SOC_SINGLE_EXT("Playback Path DC Blocking", M98090_REG_FILTER_CONFIG,
- M98090_DHPF_SHIFT, M98090_DHPF_NUM - 1, 0,
- snd_soc_get_volsw, max98090_put_volsw),
+ SOC_SINGLE("DAC Mono Mode", M98090_REG_IO_CONFIGURATION,
+ M98090_DMONO_SHIFT, M98090_DMONO_NUM - 1, 0),
+ SOC_SINGLE("SDIN Mode", M98090_REG_IO_CONFIGURATION,
+ M98090_SDIEN_SHIFT, M98090_SDIEN_NUM - 1, 0),
+ SOC_SINGLE("SDOUT Mode", M98090_REG_IO_CONFIGURATION,
+ M98090_SDOEN_SHIFT, M98090_SDOEN_NUM - 1, 0),
+ SOC_SINGLE("SDOUT Hi-Z Mode", M98090_REG_IO_CONFIGURATION,
+ M98090_HIZOFF_SHIFT, M98090_HIZOFF_NUM - 1, 1),
+ SOC_ENUM("Filter Mode", max98090_mode_enum),
+ SOC_SINGLE("Record Path DC Blocking", M98090_REG_FILTER_CONFIG,
+ M98090_AHPF_SHIFT, M98090_AHPF_NUM - 1, 0),
+ SOC_SINGLE("Playback Path DC Blocking", M98090_REG_FILTER_CONFIG,
+ M98090_DHPF_SHIFT, M98090_DHPF_NUM - 1, 0),
SOC_SINGLE_TLV("Digital BQ Volume", M98090_REG_ADC_BIQUAD_LEVEL,
M98090_AVBQ_SHIFT, M98090_AVBQ_NUM - 1, 1, max98090_dv_tlv),
SOC_SINGLE_EXT_TLV("Digital Sidetone Volume",
@@ -733,17 +594,13 @@ static const struct snd_kcontrol_new max98090_snd_controls[] = {
SOC_SINGLE_TLV("Digital Volume", M98090_REG_DAI_PLAYBACK_LEVEL,
M98090_DV_SHIFT, M98090_DV_NUM - 1, 1,
max98090_dv_tlv),
- SND_SOC_BYTES_E("EQ Coefficients", M98090_REG_EQUALIZER_BASE, 105,
- snd_soc_bytes_get, max98090_bytes_put),
- SOC_SINGLE_EXT("Digital EQ 3 Band Switch", M98090_REG_DSP_FILTER_ENABLE,
- M98090_EQ3BANDEN_SHIFT, M98090_EQ3BANDEN_NUM - 1, 0,
- snd_soc_get_volsw, max98090_put_volsw),
- SOC_SINGLE_EXT("Digital EQ 5 Band Switch", M98090_REG_DSP_FILTER_ENABLE,
- M98090_EQ5BANDEN_SHIFT, M98090_EQ5BANDEN_NUM - 1, 0,
- snd_soc_get_volsw, max98090_put_volsw),
- SOC_SINGLE_EXT("Digital EQ 7 Band Switch", M98090_REG_DSP_FILTER_ENABLE,
- M98090_EQ7BANDEN_SHIFT, M98090_EQ7BANDEN_NUM - 1, 0,
- snd_soc_get_volsw, max98090_put_volsw),
+ SND_SOC_BYTES("EQ Coefficients", M98090_REG_EQUALIZER_BASE, 105),
+ SOC_SINGLE("Digital EQ 3 Band Switch", M98090_REG_DSP_FILTER_ENABLE,
+ M98090_EQ3BANDEN_SHIFT, M98090_EQ3BANDEN_NUM - 1, 0),
+ SOC_SINGLE("Digital EQ 5 Band Switch", M98090_REG_DSP_FILTER_ENABLE,
+ M98090_EQ5BANDEN_SHIFT, M98090_EQ5BANDEN_NUM - 1, 0),
+ SOC_SINGLE("Digital EQ 7 Band Switch", M98090_REG_DSP_FILTER_ENABLE,
+ M98090_EQ7BANDEN_SHIFT, M98090_EQ7BANDEN_NUM - 1, 0),
SOC_SINGLE("Digital EQ Clipping Detection", M98090_REG_DAI_PLAYBACK_LEVEL_EQ,
M98090_EQCLPN_SHIFT, M98090_EQCLPN_NUM - 1,
1),
@@ -751,34 +608,25 @@ static const struct snd_kcontrol_new max98090_snd_controls[] = {
M98090_DVEQ_SHIFT, M98090_DVEQ_NUM - 1, 1,
max98090_dv_tlv),
- SOC_SINGLE_EXT("ALC Enable", M98090_REG_DRC_TIMING,
- M98090_DRCEN_SHIFT, M98090_DRCEN_NUM - 1, 0,
- snd_soc_get_volsw, max98090_put_volsw),
- SOC_ENUM_EXT("ALC Attack Time", max98090_drcatk_enum,
- snd_soc_get_enum_double, max98090_put_enum_double),
- SOC_ENUM_EXT("ALC Release Time", max98090_drcrls_enum,
- snd_soc_get_enum_double, max98090_put_enum_double),
+ SOC_SINGLE("ALC Enable", M98090_REG_DRC_TIMING,
+ M98090_DRCEN_SHIFT, M98090_DRCEN_NUM - 1, 0),
+ SOC_ENUM("ALC Attack Time", max98090_drcatk_enum),
+ SOC_ENUM("ALC Release Time", max98090_drcrls_enum),
SOC_SINGLE_TLV("ALC Make Up Volume", M98090_REG_DRC_GAIN,
M98090_DRCG_SHIFT, M98090_DRCG_NUM - 1, 0,
max98090_alcmakeup_tlv),
- SOC_ENUM_EXT("ALC Compression Ratio", max98090_alccmp_enum,
- snd_soc_get_enum_double, max98090_put_enum_double),
- SOC_ENUM_EXT("ALC Expansion Ratio", max98090_drcexp_enum,
- snd_soc_get_enum_double, max98090_put_enum_double),
- SOC_SINGLE_EXT_TLV("ALC Compression Threshold Volume",
+ SOC_ENUM("ALC Compression Ratio", max98090_alccmp_enum),
+ SOC_ENUM("ALC Expansion Ratio", max98090_drcexp_enum),
+ SOC_SINGLE_TLV("ALC Compression Threshold Volume",
M98090_REG_DRC_COMPRESSOR, M98090_DRCTHC_SHIFT,
- M98090_DRCTHC_NUM - 1, 1,
- snd_soc_get_volsw, max98090_put_volsw, max98090_alccomp_tlv),
- SOC_SINGLE_EXT_TLV("ALC Expansion Threshold Volume",
+ M98090_DRCTHC_NUM - 1, 1, max98090_alccomp_tlv),
+ SOC_SINGLE_TLV("ALC Expansion Threshold Volume",
M98090_REG_DRC_EXPANDER, M98090_DRCTHE_SHIFT,
- M98090_DRCTHE_NUM - 1, 1,
- snd_soc_get_volsw, max98090_put_volsw, max98090_drcexp_tlv),
+ M98090_DRCTHE_NUM - 1, 1, max98090_drcexp_tlv),
- SOC_ENUM_EXT("DAC HP Playback Performance Mode",
- max98090_dac_perfmode_enum,
- snd_soc_get_enum_double, max98090_put_enum_double),
- SOC_ENUM_EXT("DAC High Performance Mode", max98090_dachp_enum,
- snd_soc_get_enum_double, max98090_put_enum_double),
+ SOC_ENUM("DAC HP Playback Performance Mode",
+ max98090_dac_perfmode_enum),
+ SOC_ENUM("DAC High Performance Mode", max98090_dachp_enum),
SOC_SINGLE_TLV("Headphone Left Mixer Volume",
M98090_REG_HP_CONTROL, M98090_MIXHPLG_SHIFT,
@@ -836,12 +684,9 @@ static const struct snd_kcontrol_new max98090_snd_controls[] = {
SOC_SINGLE("Volume Adjustment Smoothing", M98090_REG_LEVEL_CONTROL,
M98090_VSENN_SHIFT, M98090_VSENN_NUM - 1, 1),
- SND_SOC_BYTES_E("Biquad Coefficients",
- M98090_REG_RECORD_BIQUAD_BASE, 15,
- snd_soc_bytes_get, max98090_bytes_put),
- SOC_SINGLE_EXT("Biquad Switch", M98090_REG_DSP_FILTER_ENABLE,
- M98090_ADCBQEN_SHIFT, M98090_ADCBQEN_NUM - 1, 0,
- snd_soc_get_volsw, max98090_put_volsw),
+ SND_SOC_BYTES("Biquad Coefficients", M98090_REG_RECORD_BIQUAD_BASE, 15),
+ SOC_SINGLE("Biquad Switch", M98090_REG_DSP_FILTER_ENABLE,
+ M98090_ADCBQEN_SHIFT, M98090_ADCBQEN_NUM - 1, 0),
};
static const struct snd_kcontrol_new max98091_snd_controls[] = {
@@ -850,12 +695,10 @@ static const struct snd_kcontrol_new max98091_snd_controls[] = {
M98090_DMIC34_ZEROPAD_SHIFT,
M98090_DMIC34_ZEROPAD_NUM - 1, 0),
- SOC_ENUM_EXT("Filter DMIC34 Mode", max98090_filter_dmic34mode_enum,
- snd_soc_get_enum_double, max98090_put_enum_double),
- SOC_SINGLE_EXT("DMIC34 DC Blocking", M98090_REG_FILTER_CONFIG,
+ SOC_ENUM("Filter DMIC34 Mode", max98090_filter_dmic34mode_enum),
+ SOC_SINGLE("DMIC34 DC Blocking", M98090_REG_FILTER_CONFIG,
M98090_FLT_DMIC34HPF_SHIFT,
- M98090_FLT_DMIC34HPF_NUM - 1, 0,
- snd_soc_get_volsw, max98090_put_volsw),
+ M98090_FLT_DMIC34HPF_NUM - 1, 0),
SOC_SINGLE_TLV("DMIC3 Boost Volume", M98090_REG_DMIC3_VOLUME,
M98090_DMIC_AV3G_SHIFT, M98090_DMIC_AV3G_NUM - 1, 0,
@@ -873,9 +716,8 @@ static const struct snd_kcontrol_new max98091_snd_controls[] = {
SND_SOC_BYTES("DMIC34 Biquad Coefficients",
M98090_REG_DMIC34_BIQUAD_BASE, 15),
- SOC_SINGLE_EXT("DMIC34 Biquad Switch", M98090_REG_DSP_FILTER_ENABLE,
- M98090_DMIC34BQEN_SHIFT, M98090_DMIC34BQEN_NUM - 1, 0,
- snd_soc_get_volsw, max98090_put_volsw),
+ SOC_SINGLE("DMIC34 Biquad Switch", M98090_REG_DSP_FILTER_ENABLE,
+ M98090_DMIC34BQEN_SHIFT, M98090_DMIC34BQEN_NUM - 1, 0),
SOC_SINGLE_TLV("DMIC34 BQ PreAttenuation Volume",
M98090_REG_DMIC34_BQ_PREATTEN, M98090_AV34BQ_SHIFT,
@@ -929,6 +771,19 @@ static int max98090_micinput_event(struct snd_soc_dapm_widget *w,
return 0;
}
+static int max98090_shdn_event(struct snd_soc_dapm_widget *w,
+ struct snd_kcontrol *kcontrol, int event)
+{
+ struct snd_soc_component *component = snd_soc_dapm_to_component(w->dapm);
+ struct max98090_priv *max98090 = snd_soc_component_get_drvdata(component);
+
+ if (event & SND_SOC_DAPM_POST_PMU)
+ max98090->shdn_pending = true;
+
+ return 0;
+
+}
+
static const char *mic1_mux_text[] = { "IN12", "IN56" };
static SOC_ENUM_SINGLE_DECL(mic1_mux_enum,
@@ -1029,14 +884,10 @@ static SOC_ENUM_SINGLE_DECL(ltenr_mux_enum,
lten_mux_text);
static const struct snd_kcontrol_new max98090_ltenl_mux =
- SOC_DAPM_ENUM_EXT("LTENL Mux", ltenl_mux_enum,
- snd_soc_dapm_get_enum_double,
- max98090_dapm_put_enum_double);
+ SOC_DAPM_ENUM("LTENL Mux", ltenl_mux_enum);
static const struct snd_kcontrol_new max98090_ltenr_mux =
- SOC_DAPM_ENUM_EXT("LTENR Mux", ltenr_mux_enum,
- snd_soc_dapm_get_enum_double,
- max98090_dapm_put_enum_double);
+ SOC_DAPM_ENUM("LTENR Mux", ltenr_mux_enum);
static const char *lben_mux_text[] = { "Normal", "Loopback" };
@@ -1051,14 +902,10 @@ static SOC_ENUM_SINGLE_DECL(lbenr_mux_enum,
lben_mux_text);
static const struct snd_kcontrol_new max98090_lbenl_mux =
- SOC_DAPM_ENUM_EXT("LBENL Mux", lbenl_mux_enum,
- snd_soc_dapm_get_enum_double,
- max98090_dapm_put_enum_double);
+ SOC_DAPM_ENUM("LBENL Mux", lbenl_mux_enum);
static const struct snd_kcontrol_new max98090_lbenr_mux =
- SOC_DAPM_ENUM_EXT("LBENR Mux", lbenr_mux_enum,
- snd_soc_dapm_get_enum_double,
- max98090_dapm_put_enum_double);
+ SOC_DAPM_ENUM("LBENR Mux", lbenr_mux_enum);
static const char *stenl_mux_text[] = { "Normal", "Sidetone Left" };
@@ -1225,25 +1072,21 @@ static const struct snd_soc_dapm_widget max98090_dapm_widgets[] = {
SND_SOC_DAPM_INPUT("IN56"),
SND_SOC_DAPM_SUPPLY("MICBIAS", M98090_REG_INPUT_ENABLE,
- M98090_MBEN_SHIFT, 0, max98090_dapm_event,
- SND_SOC_DAPM_PRE_POST_PMU | SND_SOC_DAPM_PRE_POST_PMD),
+ M98090_MBEN_SHIFT, 0, NULL, 0),
SND_SOC_DAPM_SUPPLY("SHDN", M98090_REG_DEVICE_SHUTDOWN,
M98090_SHDNN_SHIFT, 0, NULL, 0),
SND_SOC_DAPM_SUPPLY("SDIEN", M98090_REG_IO_CONFIGURATION,
- M98090_SDIEN_SHIFT, 0, max98090_dapm_event,
- SND_SOC_DAPM_PRE_POST_PMU | SND_SOC_DAPM_PRE_POST_PMD),
+ M98090_SDIEN_SHIFT, 0, NULL, 0),
SND_SOC_DAPM_SUPPLY("SDOEN", M98090_REG_IO_CONFIGURATION,
- M98090_SDOEN_SHIFT, 0, max98090_dapm_event,
- SND_SOC_DAPM_PRE_POST_PMU | SND_SOC_DAPM_PRE_POST_PMD),
+ M98090_SDOEN_SHIFT, 0, NULL, 0),
SND_SOC_DAPM_SUPPLY("DMICL_ENA", M98090_REG_DIGITAL_MIC_ENABLE,
- M98090_DIGMICL_SHIFT, 0, max98090_dapm_event,
- SND_SOC_DAPM_PRE_POST_PMU | SND_SOC_DAPM_PRE_POST_PMD),
+ M98090_DIGMICL_SHIFT, 0, max98090_shdn_event,
+ SND_SOC_DAPM_POST_PMU),
SND_SOC_DAPM_SUPPLY("DMICR_ENA", M98090_REG_DIGITAL_MIC_ENABLE,
- M98090_DIGMICR_SHIFT, 0, max98090_dapm_event,
- SND_SOC_DAPM_PRE_POST_PMU | SND_SOC_DAPM_PRE_POST_PMD),
+ M98090_DIGMICR_SHIFT, 0, max98090_shdn_event,
+ SND_SOC_DAPM_POST_PMU),
SND_SOC_DAPM_SUPPLY("AHPF", M98090_REG_FILTER_CONFIG,
- M98090_AHPF_SHIFT, 0, max98090_dapm_event,
- SND_SOC_DAPM_PRE_POST_PMU | SND_SOC_DAPM_PRE_POST_PMD),
+ M98090_AHPF_SHIFT, 0, NULL, 0),
/*
* Note: Sysclk and misc power supplies are taken care of by SHDN
@@ -1273,12 +1116,10 @@ static const struct snd_soc_dapm_widget max98090_dapm_widgets[] = {
&max98090_lineb_mixer_controls[0],
ARRAY_SIZE(max98090_lineb_mixer_controls)),
- SND_SOC_DAPM_PGA_E("LINEA Input", M98090_REG_INPUT_ENABLE,
- M98090_LINEAEN_SHIFT, 0, NULL, 0, max98090_dapm_event,
- SND_SOC_DAPM_PRE_POST_PMU | SND_SOC_DAPM_PRE_POST_PMD),
- SND_SOC_DAPM_PGA_E("LINEB Input", M98090_REG_INPUT_ENABLE,
- M98090_LINEBEN_SHIFT, 0, NULL, 0, max98090_dapm_event,
- SND_SOC_DAPM_PRE_POST_PMU | SND_SOC_DAPM_PRE_POST_PMD),
+ SND_SOC_DAPM_PGA("LINEA Input", M98090_REG_INPUT_ENABLE,
+ M98090_LINEAEN_SHIFT, 0, NULL, 0),
+ SND_SOC_DAPM_PGA("LINEB Input", M98090_REG_INPUT_ENABLE,
+ M98090_LINEBEN_SHIFT, 0, NULL, 0),
SND_SOC_DAPM_MIXER("Left ADC Mixer", SND_SOC_NOPM, 0, 0,
&max98090_left_adc_mixer_controls[0],
@@ -1289,11 +1130,11 @@ static const struct snd_soc_dapm_widget max98090_dapm_widgets[] = {
ARRAY_SIZE(max98090_right_adc_mixer_controls)),
SND_SOC_DAPM_ADC_E("ADCL", NULL, M98090_REG_INPUT_ENABLE,
- M98090_ADLEN_SHIFT, 0, max98090_dapm_event,
- SND_SOC_DAPM_PRE_POST_PMU | SND_SOC_DAPM_PRE_POST_PMD),
+ M98090_ADLEN_SHIFT, 0, max98090_shdn_event,
+ SND_SOC_DAPM_POST_PMU),
SND_SOC_DAPM_ADC_E("ADCR", NULL, M98090_REG_INPUT_ENABLE,
- M98090_ADREN_SHIFT, 0, max98090_dapm_event,
- SND_SOC_DAPM_PRE_POST_PMU | SND_SOC_DAPM_PRE_POST_PMD),
+ M98090_ADREN_SHIFT, 0, max98090_shdn_event,
+ SND_SOC_DAPM_POST_PMU),
SND_SOC_DAPM_AIF_OUT("AIFOUTL", "HiFi Capture", 0,
SND_SOC_NOPM, 0, 0),
@@ -1321,12 +1162,10 @@ static const struct snd_soc_dapm_widget max98090_dapm_widgets[] = {
SND_SOC_DAPM_AIF_IN("AIFINL", "HiFi Playback", 0, SND_SOC_NOPM, 0, 0),
SND_SOC_DAPM_AIF_IN("AIFINR", "HiFi Playback", 1, SND_SOC_NOPM, 0, 0),
- SND_SOC_DAPM_DAC_E("DACL", NULL, M98090_REG_OUTPUT_ENABLE,
- M98090_DALEN_SHIFT, 0, max98090_dapm_event,
- SND_SOC_DAPM_PRE_POST_PMU | SND_SOC_DAPM_PRE_POST_PMD),
- SND_SOC_DAPM_DAC_E("DACR", NULL, M98090_REG_OUTPUT_ENABLE,
- M98090_DAREN_SHIFT, 0, max98090_dapm_event,
- SND_SOC_DAPM_PRE_POST_PMU | SND_SOC_DAPM_PRE_POST_PMD),
+ SND_SOC_DAPM_DAC("DACL", NULL, M98090_REG_OUTPUT_ENABLE,
+ M98090_DALEN_SHIFT, 0),
+ SND_SOC_DAPM_DAC("DACR", NULL, M98090_REG_OUTPUT_ENABLE,
+ M98090_DAREN_SHIFT, 0),
SND_SOC_DAPM_MIXER("Left Headphone Mixer", SND_SOC_NOPM, 0, 0,
&max98090_left_hp_mixer_controls[0],
@@ -1361,26 +1200,20 @@ static const struct snd_soc_dapm_widget max98090_dapm_widgets[] = {
SND_SOC_DAPM_MUX("MIXHPRSEL Mux", SND_SOC_NOPM, 0, 0,
&max98090_mixhprsel_mux),
- SND_SOC_DAPM_PGA_E("HP Left Out", M98090_REG_OUTPUT_ENABLE,
- M98090_HPLEN_SHIFT, 0, NULL, 0, max98090_dapm_event,
- SND_SOC_DAPM_PRE_POST_PMU | SND_SOC_DAPM_PRE_POST_PMD),
- SND_SOC_DAPM_PGA_E("HP Right Out", M98090_REG_OUTPUT_ENABLE,
- M98090_HPREN_SHIFT, 0, NULL, 0, max98090_dapm_event,
- SND_SOC_DAPM_PRE_POST_PMU | SND_SOC_DAPM_PRE_POST_PMD),
+ SND_SOC_DAPM_PGA("HP Left Out", M98090_REG_OUTPUT_ENABLE,
+ M98090_HPLEN_SHIFT, 0, NULL, 0),
+ SND_SOC_DAPM_PGA("HP Right Out", M98090_REG_OUTPUT_ENABLE,
+ M98090_HPREN_SHIFT, 0, NULL, 0),
- SND_SOC_DAPM_PGA_E("SPK Left Out", M98090_REG_OUTPUT_ENABLE,
- M98090_SPLEN_SHIFT, 0, NULL, 0, max98090_dapm_event,
- SND_SOC_DAPM_PRE_POST_PMU | SND_SOC_DAPM_PRE_POST_PMD),
- SND_SOC_DAPM_PGA_E("SPK Right Out", M98090_REG_OUTPUT_ENABLE,
- M98090_SPREN_SHIFT, 0, NULL, 0, max98090_dapm_event,
- SND_SOC_DAPM_PRE_POST_PMU | SND_SOC_DAPM_PRE_POST_PMD),
+ SND_SOC_DAPM_PGA("SPK Left Out", M98090_REG_OUTPUT_ENABLE,
+ M98090_SPLEN_SHIFT, 0, NULL, 0),
+ SND_SOC_DAPM_PGA("SPK Right Out", M98090_REG_OUTPUT_ENABLE,
+ M98090_SPREN_SHIFT, 0, NULL, 0),
- SND_SOC_DAPM_PGA_E("RCV Left Out", M98090_REG_OUTPUT_ENABLE,
- M98090_RCVLEN_SHIFT, 0, NULL, 0, max98090_dapm_event,
- SND_SOC_DAPM_PRE_POST_PMU | SND_SOC_DAPM_PRE_POST_PMD),
- SND_SOC_DAPM_PGA_E("RCV Right Out", M98090_REG_OUTPUT_ENABLE,
- M98090_RCVREN_SHIFT, 0, NULL, 0, max98090_dapm_event,
- SND_SOC_DAPM_PRE_POST_PMU | SND_SOC_DAPM_PRE_POST_PMD),
+ SND_SOC_DAPM_PGA("RCV Left Out", M98090_REG_OUTPUT_ENABLE,
+ M98090_RCVLEN_SHIFT, 0, NULL, 0),
+ SND_SOC_DAPM_PGA("RCV Right Out", M98090_REG_OUTPUT_ENABLE,
+ M98090_RCVREN_SHIFT, 0, NULL, 0),
SND_SOC_DAPM_OUTPUT("HPL"),
SND_SOC_DAPM_OUTPUT("HPR"),
@@ -1395,11 +1228,9 @@ static const struct snd_soc_dapm_widget max98091_dapm_widgets[] = {
SND_SOC_DAPM_INPUT("DMIC4"),
SND_SOC_DAPM_SUPPLY("DMIC3_ENA", M98090_REG_DIGITAL_MIC_ENABLE,
- M98090_DIGMIC3_SHIFT, 0, max98090_dapm_event,
- SND_SOC_DAPM_PRE_POST_PMU | SND_SOC_DAPM_PRE_POST_PMD),
+ M98090_DIGMIC3_SHIFT, 0, NULL, 0),
SND_SOC_DAPM_SUPPLY("DMIC4_ENA", M98090_REG_DIGITAL_MIC_ENABLE,
- M98090_DIGMIC4_SHIFT, 0, max98090_dapm_event,
- SND_SOC_DAPM_PRE_POST_PMU | SND_SOC_DAPM_PRE_POST_PMD),
+ M98090_DIGMIC4_SHIFT, 0, NULL, 0),
};
static const struct snd_soc_dapm_route max98090_dapm_routes[] = {
@@ -1670,11 +1501,6 @@ static void max98090_configure_bclk(struct snd_soc_component *component)
return;
}
- /*
- * Master mode: no need to save and restore SHDN for the following
- * sensitive registers.
- */
-
/* Check for supported PCLK to LRCLK ratios */
for (i = 0; i < ARRAY_SIZE(pclk_rates); i++) {
if ((pclk_rates[i] == max98090->sysclk) &&
@@ -1761,14 +1587,12 @@ static int max98090_dai_set_fmt(struct snd_soc_dai *codec_dai,
switch (fmt & SND_SOC_DAIFMT_MASTER_MASK) {
case SND_SOC_DAIFMT_CBS_CFS:
/* Set to slave mode PLL - MAS mode off */
- max98090_shdn_save(max98090);
snd_soc_component_write(component,
M98090_REG_CLOCK_RATIO_NI_MSB, 0x00);
snd_soc_component_write(component,
M98090_REG_CLOCK_RATIO_NI_LSB, 0x00);
snd_soc_component_update_bits(component, M98090_REG_CLOCK_MODE,
M98090_USE_M1_MASK, 0);
- max98090_shdn_restore(max98090);
max98090->master = false;
break;
case SND_SOC_DAIFMT_CBM_CFM:
@@ -1794,9 +1618,7 @@ static int max98090_dai_set_fmt(struct snd_soc_dai *codec_dai,
dev_err(component->dev, "DAI clock mode unsupported");
return -EINVAL;
}
- max98090_shdn_save(max98090);
snd_soc_component_write(component, M98090_REG_MASTER_MODE, regval);
- max98090_shdn_restore(max98090);
regval = 0;
switch (fmt & SND_SOC_DAIFMT_FORMAT_MASK) {
@@ -1841,10 +1663,8 @@ static int max98090_dai_set_fmt(struct snd_soc_dai *codec_dai,
if (max98090->tdm_slots > 1)
regval ^= M98090_BCI_MASK;
- max98090_shdn_save(max98090);
snd_soc_component_write(component,
M98090_REG_INTERFACE_FORMAT, regval);
- max98090_shdn_restore(max98090);
}
return 0;
@@ -1856,7 +1676,6 @@ static int max98090_set_tdm_slot(struct snd_soc_dai *codec_dai,
struct snd_soc_component *component = codec_dai->component;
struct max98090_priv *max98090 = snd_soc_component_get_drvdata(component);
struct max98090_cdata *cdata;
-
cdata = &max98090->dai[0];
if (slots < 0 || slots > 4)
@@ -1866,7 +1685,6 @@ static int max98090_set_tdm_slot(struct snd_soc_dai *codec_dai,
max98090->tdm_width = slot_width;
if (max98090->tdm_slots > 1) {
- max98090_shdn_save(max98090);
/* SLOTL SLOTR SLOTDLY */
snd_soc_component_write(component, M98090_REG_TDM_FORMAT,
0 << M98090_TDM_SLOTL_SHIFT |
@@ -1877,7 +1695,6 @@ static int max98090_set_tdm_slot(struct snd_soc_dai *codec_dai,
snd_soc_component_update_bits(component, M98090_REG_TDM_CONTROL,
M98090_TDM_MASK,
M98090_TDM_MASK);
- max98090_shdn_restore(max98090);
}
/*
@@ -2077,7 +1894,6 @@ static int max98090_configure_dmic(struct max98090_priv *max98090,
dmic_freq = dmic_table[pclk_index].settings[micclk_index].freq;
dmic_comp = dmic_table[pclk_index].settings[micclk_index].comp[i];
- max98090_shdn_save(max98090);
regmap_update_bits(max98090->regmap, M98090_REG_DIGITAL_MIC_ENABLE,
M98090_MICCLK_MASK,
micclk_index << M98090_MICCLK_SHIFT);
@@ -2086,7 +1902,6 @@ static int max98090_configure_dmic(struct max98090_priv *max98090,
M98090_DMIC_COMP_MASK | M98090_DMIC_FREQ_MASK,
dmic_comp << M98090_DMIC_COMP_SHIFT |
dmic_freq << M98090_DMIC_FREQ_SHIFT);
- max98090_shdn_restore(max98090);
return 0;
}
@@ -2123,10 +1938,8 @@ static int max98090_dai_hw_params(struct snd_pcm_substream *substream,
switch (params_width(params)) {
case 16:
- max98090_shdn_save(max98090);
snd_soc_component_update_bits(component, M98090_REG_INTERFACE_FORMAT,
M98090_WS_MASK, 0);
- max98090_shdn_restore(max98090);
break;
default:
return -EINVAL;
@@ -2137,7 +1950,6 @@ static int max98090_dai_hw_params(struct snd_pcm_substream *substream,
cdata->rate = max98090->lrclk;
- max98090_shdn_save(max98090);
/* Update filter mode */
if (max98090->lrclk < 24000)
snd_soc_component_update_bits(component, M98090_REG_FILTER_CONFIG,
@@ -2153,7 +1965,6 @@ static int max98090_dai_hw_params(struct snd_pcm_substream *substream,
else
snd_soc_component_update_bits(component, M98090_REG_FILTER_CONFIG,
M98090_DHF_MASK, M98090_DHF_MASK);
- max98090_shdn_restore(max98090);
max98090_configure_dmic(max98090, max98090->dmic_freq, max98090->pclk,
max98090->lrclk);
@@ -2184,7 +1995,6 @@ static int max98090_dai_set_sysclk(struct snd_soc_dai *dai,
* 0x02 (when master clk is 20MHz to 40MHz)..
* 0x03 (when master clk is 40MHz to 60MHz)..
*/
- max98090_shdn_save(max98090);
if ((freq >= 10000000) && (freq <= 20000000)) {
snd_soc_component_write(component, M98090_REG_SYSTEM_CLOCK,
M98090_PSCLK_DIV1);
@@ -2199,10 +2009,8 @@ static int max98090_dai_set_sysclk(struct snd_soc_dai *dai,
max98090->pclk = freq >> 2;
} else {
dev_err(component->dev, "Invalid master clock frequency\n");
- max98090_shdn_restore(max98090);
return -EINVAL;
}
- max98090_shdn_restore(max98090);
max98090->sysclk = freq;
@@ -2314,12 +2122,10 @@ static void max98090_pll_work(struct max98090_priv *max98090)
*/
/* Toggle shutdown OFF then ON */
- mutex_lock(&component->card->dapm_mutex);
snd_soc_component_update_bits(component, M98090_REG_DEVICE_SHUTDOWN,
M98090_SHDNN_MASK, 0);
snd_soc_component_update_bits(component, M98090_REG_DEVICE_SHUTDOWN,
M98090_SHDNN_MASK, M98090_SHDNN_MASK);
- mutex_unlock(&component->card->dapm_mutex);
for (i = 0; i < 10; ++i) {
/* Give PLL time to lock */
@@ -2642,12 +2448,7 @@ static int max98090_probe(struct snd_soc_component *component)
*/
snd_soc_component_read32(component, M98090_REG_DEVICE_STATUS);
- /*
- * SHDN should be 0 at the point, no need to save/restore for the
- * following registers.
- *
- * High Performance is default
- */
+ /* High Performance is default */
snd_soc_component_update_bits(component, M98090_REG_DAC_CONTROL,
M98090_DACHP_MASK,
1 << M98090_DACHP_SHIFT);
@@ -2658,12 +2459,7 @@ static int max98090_probe(struct snd_soc_component *component)
M98090_ADCHP_MASK,
1 << M98090_ADCHP_SHIFT);
- /*
- * SHDN should be 0 at the point, no need to save/restore for the
- * following registers.
- *
- * Turn on VCM bandgap reference
- */
+ /* Turn on VCM bandgap reference */
snd_soc_component_write(component, M98090_REG_BIAS_CONTROL,
M98090_VCM_MODE_MASK);
@@ -2695,9 +2491,25 @@ static void max98090_remove(struct snd_soc_component *component)
max98090->component = NULL;
}
+static void max98090_seq_notifier(struct snd_soc_component *component,
+ enum snd_soc_dapm_type event, int subseq)
+{
+ struct max98090_priv *max98090 = snd_soc_component_get_drvdata(component);
+
+ if (max98090->shdn_pending) {
+ snd_soc_component_update_bits(component, M98090_REG_DEVICE_SHUTDOWN,
+ M98090_SHDNN_MASK, 0);
+ msleep(40);
+ snd_soc_component_update_bits(component, M98090_REG_DEVICE_SHUTDOWN,
+ M98090_SHDNN_MASK, M98090_SHDNN_MASK);
+ max98090->shdn_pending = false;
+ }
+}
+
static const struct snd_soc_component_driver soc_component_dev_max98090 = {
.probe = max98090_probe,
.remove = max98090_remove,
+ .seq_notifier = max98090_seq_notifier,
.set_bias_level = max98090_set_bias_level,
.idle_bias_on = 1,
.use_pmdown_time = 1,
diff --git a/sound/soc/codecs/max98090.h b/sound/soc/codecs/max98090.h
index 0a31708..a197114 100644
--- a/sound/soc/codecs/max98090.h
+++ b/sound/soc/codecs/max98090.h
@@ -1539,8 +1539,7 @@ struct max98090_priv {
unsigned int pa2en;
unsigned int sidetone;
bool master;
- int saved_count;
- int saved_shdn;
+ bool shdn_pending;
};
int max98090_mic_detect(struct snd_soc_component *component,
diff --git a/sound/soc/codecs/pcm512x.c b/sound/soc/codecs/pcm512x.c
index 861210f..4cbef9a 100644
--- a/sound/soc/codecs/pcm512x.c
+++ b/sound/soc/codecs/pcm512x.c
@@ -1564,13 +1564,15 @@ int pcm512x_probe(struct device *dev, struct regmap *regmap)
}
pcm512x->sclk = devm_clk_get(dev, NULL);
- if (PTR_ERR(pcm512x->sclk) == -EPROBE_DEFER)
- return -EPROBE_DEFER;
+ if (PTR_ERR(pcm512x->sclk) == -EPROBE_DEFER) {
+ ret = -EPROBE_DEFER;
+ goto err;
+ }
if (!IS_ERR(pcm512x->sclk)) {
ret = clk_prepare_enable(pcm512x->sclk);
if (ret != 0) {
dev_err(dev, "Failed to enable SCLK: %d\n", ret);
- return ret;
+ goto err;
}
}
diff --git a/sound/soc/codecs/rt1015.c b/sound/soc/codecs/rt1015.c
index 6d490e2..66eb55b 100644
--- a/sound/soc/codecs/rt1015.c
+++ b/sound/soc/codecs/rt1015.c
@@ -664,7 +664,7 @@ static int rt1015_hw_params(struct snd_pcm_substream *substream,
snd_soc_component_update_bits(component, RT1015_TDM_MASTER,
RT1015_I2S_DL_MASK, val_len);
snd_soc_component_update_bits(component, RT1015_CLK2,
- RT1015_FS_PD_MASK, pre_div);
+ RT1015_FS_PD_MASK, pre_div << RT1015_FS_PD_SFT);
return 0;
}
@@ -857,6 +857,7 @@ struct snd_soc_dai_driver rt1015_dai[] = {
.rates = RT1015_STEREO_RATES,
.formats = RT1015_FORMATS,
},
+ .ops = &rt1015_aif_dai_ops,
}
};
diff --git a/sound/soc/codecs/tas2562.c b/sound/soc/codecs/tas2562.c
index 729acd8..be52886 100644
--- a/sound/soc/codecs/tas2562.c
+++ b/sound/soc/codecs/tas2562.c
@@ -215,7 +215,8 @@ static int tas2562_set_bitwidth(struct tas2562_data *tas2562, int bitwidth)
break;
default:
- dev_info(tas2562->dev, "Not supported params format\n");
+ dev_info(tas2562->dev, "Unsupported bitwidth format\n");
+ return -EINVAL;
}
ret = snd_soc_component_update_bits(tas2562->component,
@@ -251,7 +252,7 @@ static int tas2562_hw_params(struct snd_pcm_substream *substream,
ret = tas2562_set_samplerate(tas2562, params_rate(params));
if (ret)
- dev_err(tas2562->dev, "set bitwidth failed, %d\n", ret);
+ dev_err(tas2562->dev, "set sample rate failed, %d\n", ret);
return ret;
}
diff --git a/sound/soc/fsl/fsl_sai.c b/sound/soc/fsl/fsl_sai.c
index 8c3ea73..9d436b0 100644
--- a/sound/soc/fsl/fsl_sai.c
+++ b/sound/soc/fsl/fsl_sai.c
@@ -1020,12 +1020,24 @@ static int fsl_sai_probe(struct platform_device *pdev)
ret = devm_snd_soc_register_component(&pdev->dev, &fsl_component,
&fsl_sai_dai, 1);
if (ret)
- return ret;
+ goto err_pm_disable;
- if (sai->soc_data->use_imx_pcm)
- return imx_pcm_dma_init(pdev, IMX_SAI_DMABUF_SIZE);
- else
- return devm_snd_dmaengine_pcm_register(&pdev->dev, NULL, 0);
+ if (sai->soc_data->use_imx_pcm) {
+ ret = imx_pcm_dma_init(pdev, IMX_SAI_DMABUF_SIZE);
+ if (ret)
+ goto err_pm_disable;
+ } else {
+ ret = devm_snd_dmaengine_pcm_register(&pdev->dev, NULL, 0);
+ if (ret)
+ goto err_pm_disable;
+ }
+
+ return ret;
+
+err_pm_disable:
+ pm_runtime_disable(&pdev->dev);
+
+ return ret;
}
static int fsl_sai_remove(struct platform_device *pdev)
diff --git a/sound/soc/intel/skylake/skl-debug.c b/sound/soc/intel/skylake/skl-debug.c
index 3466675..a15aa2f 100644
--- a/sound/soc/intel/skylake/skl-debug.c
+++ b/sound/soc/intel/skylake/skl-debug.c
@@ -34,8 +34,8 @@ static ssize_t skl_print_pins(struct skl_module_pin *m_pin, char *buf,
int i;
ssize_t ret = 0;
- for (i = 0; i < max_pin; i++)
- ret += snprintf(buf + size, MOD_BUF - size,
+ for (i = 0; i < max_pin; i++) {
+ ret += scnprintf(buf + size, MOD_BUF - size,
"%s %d\n\tModule %d\n\tInstance %d\n\t"
"In-used %s\n\tType %s\n"
"\tState %d\n\tIndex %d\n",
@@ -45,13 +45,15 @@ static ssize_t skl_print_pins(struct skl_module_pin *m_pin, char *buf,
m_pin[i].in_use ? "Used" : "Unused",
m_pin[i].is_dynamic ? "Dynamic" : "Static",
m_pin[i].pin_state, i);
+ size += ret;
+ }
return ret;
}
static ssize_t skl_print_fmt(struct skl_module_fmt *fmt, char *buf,
ssize_t size, bool direction)
{
- return snprintf(buf + size, MOD_BUF - size,
+ return scnprintf(buf + size, MOD_BUF - size,
"%s\n\tCh %d\n\tFreq %d\n\tBit depth %d\n\t"
"Valid bit depth %d\n\tCh config %#x\n\tInterleaving %d\n\t"
"Sample Type %d\n\tCh Map %#x\n",
@@ -75,16 +77,16 @@ static ssize_t module_read(struct file *file, char __user *user_buf,
if (!buf)
return -ENOMEM;
- ret = snprintf(buf, MOD_BUF, "Module:\n\tUUID %pUL\n\tModule id %d\n"
+ ret = scnprintf(buf, MOD_BUF, "Module:\n\tUUID %pUL\n\tModule id %d\n"
"\tInstance id %d\n\tPvt_id %d\n", mconfig->guid,
mconfig->id.module_id, mconfig->id.instance_id,
mconfig->id.pvt_id);
- ret += snprintf(buf + ret, MOD_BUF - ret,
+ ret += scnprintf(buf + ret, MOD_BUF - ret,
"Resources:\n\tCPC %#x\n\tIBS %#x\n\tOBS %#x\t\n",
res->cpc, res->ibs, res->obs);
- ret += snprintf(buf + ret, MOD_BUF - ret,
+ ret += scnprintf(buf + ret, MOD_BUF - ret,
"Module data:\n\tCore %d\n\tIn queue %d\n\t"
"Out queue %d\n\tType %s\n",
mconfig->core_id, mconfig->max_in_queue,
@@ -94,38 +96,38 @@ static ssize_t module_read(struct file *file, char __user *user_buf,
ret += skl_print_fmt(mconfig->in_fmt, buf, ret, true);
ret += skl_print_fmt(mconfig->out_fmt, buf, ret, false);
- ret += snprintf(buf + ret, MOD_BUF - ret,
+ ret += scnprintf(buf + ret, MOD_BUF - ret,
"Fixup:\n\tParams %#x\n\tConverter %#x\n",
mconfig->params_fixup, mconfig->converter);
- ret += snprintf(buf + ret, MOD_BUF - ret,
+ ret += scnprintf(buf + ret, MOD_BUF - ret,
"Module Gateway:\n\tType %#x\n\tVbus %#x\n\tHW conn %#x\n\tSlot %#x\n",
mconfig->dev_type, mconfig->vbus_id,
mconfig->hw_conn_type, mconfig->time_slot);
- ret += snprintf(buf + ret, MOD_BUF - ret,
+ ret += scnprintf(buf + ret, MOD_BUF - ret,
"Pipeline:\n\tID %d\n\tPriority %d\n\tConn Type %d\n\t"
"Pages %#x\n", mconfig->pipe->ppl_id,
mconfig->pipe->pipe_priority, mconfig->pipe->conn_type,
mconfig->pipe->memory_pages);
- ret += snprintf(buf + ret, MOD_BUF - ret,
+ ret += scnprintf(buf + ret, MOD_BUF - ret,
"\tParams:\n\t\tHost DMA %d\n\t\tLink DMA %d\n",
mconfig->pipe->p_params->host_dma_id,
mconfig->pipe->p_params->link_dma_id);
- ret += snprintf(buf + ret, MOD_BUF - ret,
+ ret += scnprintf(buf + ret, MOD_BUF - ret,
"\tPCM params:\n\t\tCh %d\n\t\tFreq %d\n\t\tFormat %d\n",
mconfig->pipe->p_params->ch,
mconfig->pipe->p_params->s_freq,
mconfig->pipe->p_params->s_fmt);
- ret += snprintf(buf + ret, MOD_BUF - ret,
+ ret += scnprintf(buf + ret, MOD_BUF - ret,
"\tLink %#x\n\tStream %#x\n",
mconfig->pipe->p_params->linktype,
mconfig->pipe->p_params->stream);
- ret += snprintf(buf + ret, MOD_BUF - ret,
+ ret += scnprintf(buf + ret, MOD_BUF - ret,
"\tState %d\n\tPassthru %s\n",
mconfig->pipe->state,
mconfig->pipe->passthru ? "true" : "false");
@@ -135,7 +137,7 @@ static ssize_t module_read(struct file *file, char __user *user_buf,
ret += skl_print_pins(mconfig->m_out_pin, buf,
mconfig->max_out_queue, ret, false);
- ret += snprintf(buf + ret, MOD_BUF - ret,
+ ret += scnprintf(buf + ret, MOD_BUF - ret,
"Other:\n\tDomain %d\n\tHomogeneous Input %s\n\t"
"Homogeneous Output %s\n\tIn Queue Mask %d\n\t"
"Out Queue Mask %d\n\tDMA ID %d\n\tMem Pages %d\n\t"
@@ -191,7 +193,7 @@ static ssize_t fw_softreg_read(struct file *file, char __user *user_buf,
__ioread32_copy(d->fw_read_buff, fw_reg_addr, w0_stat_sz >> 2);
for (offset = 0; offset < FW_REG_SIZE; offset += 16) {
- ret += snprintf(tmp + ret, FW_REG_BUF - ret, "%#.4x: ", offset);
+ ret += scnprintf(tmp + ret, FW_REG_BUF - ret, "%#.4x: ", offset);
hex_dump_to_buffer(d->fw_read_buff + offset, 16, 16, 4,
tmp + ret, FW_REG_BUF - ret, 0);
ret += strlen(tmp + ret);
diff --git a/sound/soc/intel/skylake/skl-ssp-clk.c b/sound/soc/intel/skylake/skl-ssp-clk.c
index 1c0e522..bd43885 100644
--- a/sound/soc/intel/skylake/skl-ssp-clk.c
+++ b/sound/soc/intel/skylake/skl-ssp-clk.c
@@ -384,9 +384,11 @@ static int skl_clk_dev_probe(struct platform_device *pdev)
&clks[i], clk_pdata, i);
if (IS_ERR(data->clk[data->avail_clk_cnt])) {
- ret = PTR_ERR(data->clk[data->avail_clk_cnt++]);
+ ret = PTR_ERR(data->clk[data->avail_clk_cnt]);
goto err_unreg_skl_clk;
}
+
+ data->avail_clk_cnt++;
}
platform_set_drvdata(pdev, data);
diff --git a/sound/soc/meson/g12a-tohdmitx.c b/sound/soc/meson/g12a-tohdmitx.c
index 9cfbd34..8a0db28 100644
--- a/sound/soc/meson/g12a-tohdmitx.c
+++ b/sound/soc/meson/g12a-tohdmitx.c
@@ -8,6 +8,7 @@
#include <linux/module.h>
#include <sound/pcm_params.h>
#include <linux/regmap.h>
+#include <linux/reset.h>
#include <sound/soc.h>
#include <sound/soc-dai.h>
@@ -378,6 +379,11 @@ static int g12a_tohdmitx_probe(struct platform_device *pdev)
struct device *dev = &pdev->dev;
void __iomem *regs;
struct regmap *map;
+ int ret;
+
+ ret = device_reset(dev);
+ if (ret)
+ return ret;
regs = devm_platform_ioremap_resource(pdev, 0);
if (IS_ERR(regs))
diff --git a/sound/soc/soc-component.c b/sound/soc/soc-component.c
index 14e175c..785a038 100644
--- a/sound/soc/soc-component.c
+++ b/sound/soc/soc-component.c
@@ -451,7 +451,7 @@ int snd_soc_pcm_component_sync_stop(struct snd_pcm_substream *substream)
int i, ret;
for_each_rtd_components(rtd, i, component) {
- if (component->driver->ioctl) {
+ if (component->driver->sync_stop) {
ret = component->driver->sync_stop(component,
substream);
if (ret < 0)
diff --git a/sound/soc/soc-compress.c b/sound/soc/soc-compress.c
index 223cd04..392a1c5 100644
--- a/sound/soc/soc-compress.c
+++ b/sound/soc/soc-compress.c
@@ -299,7 +299,7 @@ static int soc_compr_free_fe(struct snd_compr_stream *cstream)
for_each_dpcm_be(fe, stream, dpcm)
dpcm->state = SND_SOC_DPCM_LINK_STATE_FREE;
- snd_soc_dapm_stream_stop(fe, stream);
+ dpcm_dapm_stream_event(fe, stream, SND_SOC_DAPM_STREAM_STOP);
fe->dpcm[stream].state = SND_SOC_DPCM_STATE_CLOSE;
fe->dpcm[stream].runtime_update = SND_SOC_DPCM_UPDATE_NO;
diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c
index bc20ad9..9fb54e6fe 100644
--- a/sound/soc/soc-dapm.c
+++ b/sound/soc/soc-dapm.c
@@ -3441,8 +3441,17 @@ int snd_soc_dapm_get_enum_double(struct snd_kcontrol *kcontrol,
}
EXPORT_SYMBOL_GPL(snd_soc_dapm_get_enum_double);
-static int __snd_soc_dapm_put_enum_double(struct snd_kcontrol *kcontrol,
- struct snd_ctl_elem_value *ucontrol, int locked)
+/**
+ * snd_soc_dapm_put_enum_double - dapm enumerated double mixer set callback
+ * @kcontrol: mixer control
+ * @ucontrol: control element information
+ *
+ * Callback to set the value of a dapm enumerated double mixer control.
+ *
+ * Returns 0 for success.
+ */
+int snd_soc_dapm_put_enum_double(struct snd_kcontrol *kcontrol,
+ struct snd_ctl_elem_value *ucontrol)
{
struct snd_soc_dapm_context *dapm = snd_soc_dapm_kcontrol_dapm(kcontrol);
struct snd_soc_card *card = dapm->card;
@@ -3465,9 +3474,7 @@ static int __snd_soc_dapm_put_enum_double(struct snd_kcontrol *kcontrol,
mask |= e->mask << e->shift_r;
}
- if (!locked)
- mutex_lock_nested(&card->dapm_mutex,
- SND_SOC_DAPM_CLASS_RUNTIME);
+ mutex_lock_nested(&card->dapm_mutex, SND_SOC_DAPM_CLASS_RUNTIME);
change = dapm_kcontrol_set_value(kcontrol, val);
@@ -3489,51 +3496,16 @@ static int __snd_soc_dapm_put_enum_double(struct snd_kcontrol *kcontrol,
card->update = NULL;
}
- if (!locked)
- mutex_unlock(&card->dapm_mutex);
+ mutex_unlock(&card->dapm_mutex);
if (ret > 0)
soc_dpcm_runtime_update(card);
return change;
}
-
-/**
- * snd_soc_dapm_put_enum_double - dapm enumerated double mixer set callback
- * @kcontrol: mixer control
- * @ucontrol: control element information
- *
- * Callback to set the value of a dapm enumerated double mixer control.
- *
- * Returns 0 for success.
- */
-int snd_soc_dapm_put_enum_double(struct snd_kcontrol *kcontrol,
- struct snd_ctl_elem_value *ucontrol)
-{
- return __snd_soc_dapm_put_enum_double(kcontrol, ucontrol, 0);
-}
EXPORT_SYMBOL_GPL(snd_soc_dapm_put_enum_double);
/**
- * snd_soc_dapm_put_enum_double_locked - dapm enumerated double mixer set
- * callback
- * @kcontrol: mixer control
- * @ucontrol: control element information
- *
- * Callback to set the value of a dapm enumerated double mixer control.
- * Must acquire dapm_mutex before calling the function.
- *
- * Returns 0 for success.
- */
-int snd_soc_dapm_put_enum_double_locked(struct snd_kcontrol *kcontrol,
- struct snd_ctl_elem_value *ucontrol)
-{
- dapm_assert_locked(snd_soc_dapm_kcontrol_dapm(kcontrol));
- return __snd_soc_dapm_put_enum_double(kcontrol, ucontrol, 1);
-}
-EXPORT_SYMBOL_GPL(snd_soc_dapm_put_enum_double_locked);
-
-/**
* snd_soc_dapm_info_pin_switch - Info for a pin switch
*
* @kcontrol: mixer control
@@ -3916,9 +3888,6 @@ snd_soc_dai_link_event_pre_pmu(struct snd_soc_dapm_widget *w,
runtime->rate = params_rate(params);
out:
- if (ret < 0)
- kfree(runtime);
-
kfree(params);
return ret;
}
@@ -4803,7 +4772,7 @@ static void soc_dapm_shutdown_dapm(struct snd_soc_dapm_context *dapm)
continue;
if (w->power) {
dapm_seq_insert(w, &down_list, false);
- w->power = 0;
+ w->new_power = 0;
powerdown = 1;
}
}
diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c
index ff1b7c7..2c59b36 100644
--- a/sound/soc/soc-pcm.c
+++ b/sound/soc/soc-pcm.c
@@ -2006,7 +2006,7 @@ static int dpcm_fe_dai_shutdown(struct snd_pcm_substream *substream)
soc_pcm_close(substream);
/* run the stream event for each BE */
- snd_soc_dapm_stream_stop(fe, stream);
+ dpcm_dapm_stream_event(fe, stream, SND_SOC_DAPM_STREAM_STOP);
fe->dpcm[stream].state = SND_SOC_DPCM_STATE_CLOSE;
dpcm_set_fe_update_state(fe, stream, SND_SOC_DPCM_UPDATE_NO);
@@ -3171,16 +3171,16 @@ static ssize_t dpcm_show_state(struct snd_soc_pcm_runtime *fe,
unsigned long flags;
/* FE state */
- offset += snprintf(buf + offset, size - offset,
+ offset += scnprintf(buf + offset, size - offset,
"[%s - %s]\n", fe->dai_link->name,
stream ? "Capture" : "Playback");
- offset += snprintf(buf + offset, size - offset, "State: %s\n",
+ offset += scnprintf(buf + offset, size - offset, "State: %s\n",
dpcm_state_string(fe->dpcm[stream].state));
if ((fe->dpcm[stream].state >= SND_SOC_DPCM_STATE_HW_PARAMS) &&
(fe->dpcm[stream].state <= SND_SOC_DPCM_STATE_STOP))
- offset += snprintf(buf + offset, size - offset,
+ offset += scnprintf(buf + offset, size - offset,
"Hardware Params: "
"Format = %s, Channels = %d, Rate = %d\n",
snd_pcm_format_name(params_format(params)),
@@ -3188,10 +3188,10 @@ static ssize_t dpcm_show_state(struct snd_soc_pcm_runtime *fe,
params_rate(params));
/* BEs state */
- offset += snprintf(buf + offset, size - offset, "Backends:\n");
+ offset += scnprintf(buf + offset, size - offset, "Backends:\n");
if (list_empty(&fe->dpcm[stream].be_clients)) {
- offset += snprintf(buf + offset, size - offset,
+ offset += scnprintf(buf + offset, size - offset,
" No active DSP links\n");
goto out;
}
@@ -3201,16 +3201,16 @@ static ssize_t dpcm_show_state(struct snd_soc_pcm_runtime *fe,
struct snd_soc_pcm_runtime *be = dpcm->be;
params = &dpcm->hw_params;
- offset += snprintf(buf + offset, size - offset,
+ offset += scnprintf(buf + offset, size - offset,
"- %s\n", be->dai_link->name);
- offset += snprintf(buf + offset, size - offset,
+ offset += scnprintf(buf + offset, size - offset,
" State: %s\n",
dpcm_state_string(be->dpcm[stream].state));
if ((be->dpcm[stream].state >= SND_SOC_DPCM_STATE_HW_PARAMS) &&
(be->dpcm[stream].state <= SND_SOC_DPCM_STATE_STOP))
- offset += snprintf(buf + offset, size - offset,
+ offset += scnprintf(buf + offset, size - offset,
" Hardware Params: "
"Format = %s, Channels = %d, Rate = %d\n",
snd_pcm_format_name(params_format(params)),
diff --git a/sound/soc/soc-topology.c b/sound/soc/soc-topology.c
index d2ee6ad..575da6a 100644
--- a/sound/soc/soc-topology.c
+++ b/sound/soc/soc-topology.c
@@ -2377,8 +2377,11 @@ static int soc_tplg_link_elems_load(struct soc_tplg *tplg,
}
ret = soc_tplg_link_config(tplg, _link);
- if (ret < 0)
+ if (ret < 0) {
+ if (!abi_match)
+ kfree(_link);
return ret;
+ }
/* offset by version-specific struct size and
* real priv data size
@@ -2542,7 +2545,7 @@ static int soc_tplg_manifest_load(struct soc_tplg *tplg,
{
struct snd_soc_tplg_manifest *manifest, *_manifest;
bool abi_match;
- int err;
+ int ret = 0;
if (tplg->pass != SOC_TPLG_PASS_MANIFEST)
return 0;
@@ -2555,19 +2558,19 @@ static int soc_tplg_manifest_load(struct soc_tplg *tplg,
_manifest = manifest;
} else {
abi_match = false;
- err = manifest_new_ver(tplg, manifest, &_manifest);
- if (err < 0)
- return err;
+ ret = manifest_new_ver(tplg, manifest, &_manifest);
+ if (ret < 0)
+ return ret;
}
/* pass control to component driver for optional further init */
if (tplg->comp && tplg->ops && tplg->ops->manifest)
- return tplg->ops->manifest(tplg->comp, tplg->index, _manifest);
+ ret = tplg->ops->manifest(tplg->comp, tplg->index, _manifest);
if (!abi_match) /* free the duplicated one */
kfree(_manifest);
- return 0;
+ return ret;
}
/* validate header magic, size and type */
diff --git a/sound/soc/sof/intel/hda-codec.c b/sound/soc/sof/intel/hda-codec.c
index 9106ab8..ff45075 100644
--- a/sound/soc/sof/intel/hda-codec.c
+++ b/sound/soc/sof/intel/hda-codec.c
@@ -174,8 +174,10 @@ void hda_codec_i915_display_power(struct snd_sof_dev *sdev, bool enable)
{
struct hdac_bus *bus = sof_to_bus(sdev);
- dev_dbg(bus->dev, "Turning i915 HDAC power %d\n", enable);
- snd_hdac_display_power(bus, HDA_CODEC_IDX_CONTROLLER, enable);
+ if (HDA_IDISP_CODEC(bus->codec_mask)) {
+ dev_dbg(bus->dev, "Turning i915 HDAC power %d\n", enable);
+ snd_hdac_display_power(bus, HDA_CODEC_IDX_CONTROLLER, enable);
+ }
}
EXPORT_SYMBOL_NS(hda_codec_i915_display_power, SND_SOC_SOF_HDA_AUDIO_CODEC_I915);
@@ -189,7 +191,8 @@ int hda_codec_i915_init(struct snd_sof_dev *sdev)
if (ret < 0)
return ret;
- hda_codec_i915_display_power(sdev, true);
+ /* codec_mask not yet known, power up for probe */
+ snd_hdac_display_power(bus, HDA_CODEC_IDX_CONTROLLER, true);
return 0;
}
@@ -200,7 +203,8 @@ int hda_codec_i915_exit(struct snd_sof_dev *sdev)
struct hdac_bus *bus = sof_to_bus(sdev);
int ret;
- hda_codec_i915_display_power(sdev, false);
+ /* power down unconditionally */
+ snd_hdac_display_power(bus, HDA_CODEC_IDX_CONTROLLER, false);
ret = snd_hdac_i915_exit(bus);
diff --git a/sound/soc/sof/intel/hda-dsp.c b/sound/soc/sof/intel/hda-dsp.c
index 4a4d318..0848b79 100644
--- a/sound/soc/sof/intel/hda-dsp.c
+++ b/sound/soc/sof/intel/hda-dsp.c
@@ -428,6 +428,9 @@ static int hda_suspend(struct snd_sof_dev *sdev, bool runtime_suspend)
return ret;
}
+ /* display codec can powered off after link reset */
+ hda_codec_i915_display_power(sdev, false);
+
return 0;
}
@@ -439,6 +442,9 @@ static int hda_resume(struct snd_sof_dev *sdev, bool runtime_resume)
#endif
int ret;
+ /* display codec must be powered before link reset */
+ hda_codec_i915_display_power(sdev, true);
+
/*
* clear TCSEL to clear playback on some HD Audio
* codecs. PCI TCSEL is defined in the Intel manuals.
@@ -482,6 +488,8 @@ int hda_dsp_resume(struct snd_sof_dev *sdev)
struct pci_dev *pci = to_pci_dev(sdev->dev);
if (sdev->s0_suspend) {
+ hda_codec_i915_display_power(sdev, true);
+
/* restore L1SEN bit */
if (hda->l1_support_changed)
snd_sof_dsp_update_bits(sdev, HDA_DSP_HDA_BAR,
@@ -531,6 +539,9 @@ int hda_dsp_suspend(struct snd_sof_dev *sdev)
int ret;
if (sdev->s0_suspend) {
+ /* we can't keep a wakeref to display driver at suspend */
+ hda_codec_i915_display_power(sdev, false);
+
/* enable L1SEN to make sure the system can enter S0Ix */
hda->l1_support_changed =
snd_sof_dsp_update_bits(sdev, HDA_DSP_HDA_BAR,
diff --git a/sound/soc/sof/intel/hda.c b/sound/soc/sof/intel/hda.c
index 65b86dd..25946a1 100644
--- a/sound/soc/sof/intel/hda.c
+++ b/sound/soc/sof/intel/hda.c
@@ -286,6 +286,13 @@ static int hda_init(struct snd_sof_dev *sdev)
/* HDA base */
sdev->bar[HDA_DSP_HDA_BAR] = bus->remap_addr;
+ /* init i915 and HDMI codecs */
+ ret = hda_codec_i915_init(sdev);
+ if (ret < 0) {
+ dev_err(sdev->dev, "error: init i915 and HDMI codec failed\n");
+ return ret;
+ }
+
/* get controller capabilities */
ret = hda_dsp_ctrl_get_caps(sdev);
if (ret < 0)
@@ -353,15 +360,6 @@ static int hda_init_caps(struct snd_sof_dev *sdev)
if (bus->ppcap)
dev_dbg(sdev->dev, "PP capability, will probe DSP later.\n");
-#if IS_ENABLED(CONFIG_SND_SOC_SOF_HDA)
- /* init i915 and HDMI codecs */
- ret = hda_codec_i915_init(sdev);
- if (ret < 0) {
- dev_err(sdev->dev, "error: init i915 and HDMI codec failed\n");
- return ret;
- }
-#endif
-
/* Init HDA controller after i915 init */
ret = hda_dsp_ctrl_init_chip(sdev, true);
if (ret < 0) {
@@ -381,7 +379,7 @@ static int hda_init_caps(struct snd_sof_dev *sdev)
hda_codec_probe_bus(sdev, hda_codec_use_common_hdmi);
if (!HDA_IDISP_CODEC(bus->codec_mask))
- hda_codec_i915_display_power(sdev, false);
+ hda_codec_i915_exit(sdev);
/*
* we are done probing so decrement link counts
@@ -611,6 +609,7 @@ int hda_dsp_probe(struct snd_sof_dev *sdev)
iounmap(sdev->bar[HDA_DSP_BAR]);
hdac_bus_unmap:
iounmap(bus->remap_addr);
+ hda_codec_i915_exit(sdev);
err:
return ret;
}
diff --git a/sound/soc/sof/ipc.c b/sound/soc/sof/ipc.c
index b63fc52..78aa1da 100644
--- a/sound/soc/sof/ipc.c
+++ b/sound/soc/sof/ipc.c
@@ -499,7 +499,7 @@ int snd_sof_ipc_stream_posn(struct snd_soc_component *scomp,
/* send IPC to the DSP */
err = sof_ipc_tx_message(sdev->ipc,
- stream.hdr.cmd, &stream, sizeof(stream), &posn,
+ stream.hdr.cmd, &stream, sizeof(stream), posn,
sizeof(*posn));
if (err < 0) {
dev_err(sdev->dev, "error: failed to get stream %d position\n",
diff --git a/sound/soc/stm/stm32_sai_sub.c b/sound/soc/stm/stm32_sai_sub.c
index 30bcd5d..10eb4b8 100644
--- a/sound/soc/stm/stm32_sai_sub.c
+++ b/sound/soc/stm/stm32_sai_sub.c
@@ -1543,20 +1543,20 @@ static int stm32_sai_sub_probe(struct platform_device *pdev)
return ret;
}
- ret = devm_snd_soc_register_component(&pdev->dev, &stm32_component,
- &sai->cpu_dai_drv, 1);
+ ret = snd_dmaengine_pcm_register(&pdev->dev, conf, 0);
+ if (ret) {
+ dev_err(&pdev->dev, "Could not register pcm dma\n");
+ return ret;
+ }
+
+ ret = snd_soc_register_component(&pdev->dev, &stm32_component,
+ &sai->cpu_dai_drv, 1);
if (ret)
return ret;
if (STM_SAI_PROTOCOL_IS_SPDIF(sai))
conf = &stm32_sai_pcm_config_spdif;
- ret = devm_snd_dmaengine_pcm_register(&pdev->dev, conf, 0);
- if (ret) {
- dev_err(&pdev->dev, "Could not register pcm dma\n");
- return ret;
- }
-
return 0;
}
@@ -1565,6 +1565,8 @@ static int stm32_sai_sub_remove(struct platform_device *pdev)
struct stm32_sai_sub_data *sai = dev_get_drvdata(&pdev->dev);
clk_unprepare(sai->pdata->pclk);
+ snd_dmaengine_pcm_unregister(&pdev->dev);
+ snd_soc_unregister_component(&pdev->dev);
return 0;
}
diff --git a/sound/soc/sunxi/sun8i-codec.c b/sound/soc/sunxi/sun8i-codec.c
index 55798bc..686561d 100644
--- a/sound/soc/sunxi/sun8i-codec.c
+++ b/sound/soc/sunxi/sun8i-codec.c
@@ -80,6 +80,7 @@
#define SUN8I_SYS_SR_CTRL_AIF1_FS_MASK GENMASK(15, 12)
#define SUN8I_SYS_SR_CTRL_AIF2_FS_MASK GENMASK(11, 8)
+#define SUN8I_AIF1CLK_CTRL_AIF1_DATA_FMT_MASK GENMASK(3, 2)
#define SUN8I_AIF1CLK_CTRL_AIF1_WORD_SIZ_MASK GENMASK(5, 4)
#define SUN8I_AIF1CLK_CTRL_AIF1_LRCK_DIV_MASK GENMASK(8, 6)
#define SUN8I_AIF1CLK_CTRL_AIF1_BCLK_DIV_MASK GENMASK(12, 9)
@@ -241,7 +242,7 @@ static int sun8i_set_fmt(struct snd_soc_dai *dai, unsigned int fmt)
return -EINVAL;
}
regmap_update_bits(scodec->regmap, SUN8I_AIF1CLK_CTRL,
- BIT(SUN8I_AIF1CLK_CTRL_AIF1_DATA_FMT),
+ SUN8I_AIF1CLK_CTRL_AIF1_DATA_FMT_MASK,
value << SUN8I_AIF1CLK_CTRL_AIF1_DATA_FMT);
return 0;
diff --git a/sound/usb/clock.c b/sound/usb/clock.c
index 018b1ec..a48313d 100644
--- a/sound/usb/clock.c
+++ b/sound/usb/clock.c
@@ -151,8 +151,34 @@ static int uac_clock_selector_set_val(struct snd_usb_audio *chip, int selector_i
return ret;
}
+/*
+ * Assume the clock is valid if clock source supports only one single sample
+ * rate, the terminal is connected directly to it (there is no clock selector)
+ * and clock type is internal. This is to deal with some Denon DJ controllers
+ * that always reports that clock is invalid.
+ */
+static bool uac_clock_source_is_valid_quirk(struct snd_usb_audio *chip,
+ struct audioformat *fmt,
+ int source_id)
+{
+ if (fmt->protocol == UAC_VERSION_2) {
+ struct uac_clock_source_descriptor *cs_desc =
+ snd_usb_find_clock_source(chip->ctrl_intf, source_id);
+
+ if (!cs_desc)
+ return false;
+
+ return (fmt->nr_rates == 1 &&
+ (fmt->clock & 0xff) == cs_desc->bClockID &&
+ (cs_desc->bmAttributes & 0x3) !=
+ UAC_CLOCK_SOURCE_TYPE_EXT);
+ }
+
+ return false;
+}
+
static bool uac_clock_source_is_valid(struct snd_usb_audio *chip,
- int protocol,
+ struct audioformat *fmt,
int source_id)
{
int err;
@@ -160,7 +186,7 @@ static bool uac_clock_source_is_valid(struct snd_usb_audio *chip,
struct usb_device *dev = chip->dev;
u32 bmControls;
- if (protocol == UAC_VERSION_3) {
+ if (fmt->protocol == UAC_VERSION_3) {
struct uac3_clock_source_descriptor *cs_desc =
snd_usb_find_clock_source_v3(chip->ctrl_intf, source_id);
@@ -194,10 +220,14 @@ static bool uac_clock_source_is_valid(struct snd_usb_audio *chip,
return false;
}
- return data ? true : false;
+ if (data)
+ return true;
+ else
+ return uac_clock_source_is_valid_quirk(chip, fmt, source_id);
}
-static int __uac_clock_find_source(struct snd_usb_audio *chip, int entity_id,
+static int __uac_clock_find_source(struct snd_usb_audio *chip,
+ struct audioformat *fmt, int entity_id,
unsigned long *visited, bool validate)
{
struct uac_clock_source_descriptor *source;
@@ -217,7 +247,7 @@ static int __uac_clock_find_source(struct snd_usb_audio *chip, int entity_id,
source = snd_usb_find_clock_source(chip->ctrl_intf, entity_id);
if (source) {
entity_id = source->bClockID;
- if (validate && !uac_clock_source_is_valid(chip, UAC_VERSION_2,
+ if (validate && !uac_clock_source_is_valid(chip, fmt,
entity_id)) {
usb_audio_err(chip,
"clock source %d is not valid, cannot use\n",
@@ -248,8 +278,9 @@ static int __uac_clock_find_source(struct snd_usb_audio *chip, int entity_id,
}
cur = ret;
- ret = __uac_clock_find_source(chip, selector->baCSourceID[ret - 1],
- visited, validate);
+ ret = __uac_clock_find_source(chip, fmt,
+ selector->baCSourceID[ret - 1],
+ visited, validate);
if (!validate || ret > 0 || !chip->autoclock)
return ret;
@@ -260,8 +291,9 @@ static int __uac_clock_find_source(struct snd_usb_audio *chip, int entity_id,
if (i == cur)
continue;
- ret = __uac_clock_find_source(chip, selector->baCSourceID[i - 1],
- visited, true);
+ ret = __uac_clock_find_source(chip, fmt,
+ selector->baCSourceID[i - 1],
+ visited, true);
if (ret < 0)
continue;
@@ -281,14 +313,16 @@ static int __uac_clock_find_source(struct snd_usb_audio *chip, int entity_id,
/* FIXME: multipliers only act as pass-thru element for now */
multiplier = snd_usb_find_clock_multiplier(chip->ctrl_intf, entity_id);
if (multiplier)
- return __uac_clock_find_source(chip, multiplier->bCSourceID,
- visited, validate);
+ return __uac_clock_find_source(chip, fmt,
+ multiplier->bCSourceID,
+ visited, validate);
return -EINVAL;
}
-static int __uac3_clock_find_source(struct snd_usb_audio *chip, int entity_id,
- unsigned long *visited, bool validate)
+static int __uac3_clock_find_source(struct snd_usb_audio *chip,
+ struct audioformat *fmt, int entity_id,
+ unsigned long *visited, bool validate)
{
struct uac3_clock_source_descriptor *source;
struct uac3_clock_selector_descriptor *selector;
@@ -307,7 +341,7 @@ static int __uac3_clock_find_source(struct snd_usb_audio *chip, int entity_id,
source = snd_usb_find_clock_source_v3(chip->ctrl_intf, entity_id);
if (source) {
entity_id = source->bClockID;
- if (validate && !uac_clock_source_is_valid(chip, UAC_VERSION_3,
+ if (validate && !uac_clock_source_is_valid(chip, fmt,
entity_id)) {
usb_audio_err(chip,
"clock source %d is not valid, cannot use\n",
@@ -338,7 +372,8 @@ static int __uac3_clock_find_source(struct snd_usb_audio *chip, int entity_id,
}
cur = ret;
- ret = __uac3_clock_find_source(chip, selector->baCSourceID[ret - 1],
+ ret = __uac3_clock_find_source(chip, fmt,
+ selector->baCSourceID[ret - 1],
visited, validate);
if (!validate || ret > 0 || !chip->autoclock)
return ret;
@@ -350,8 +385,9 @@ static int __uac3_clock_find_source(struct snd_usb_audio *chip, int entity_id,
if (i == cur)
continue;
- ret = __uac3_clock_find_source(chip, selector->baCSourceID[i - 1],
- visited, true);
+ ret = __uac3_clock_find_source(chip, fmt,
+ selector->baCSourceID[i - 1],
+ visited, true);
if (ret < 0)
continue;
@@ -372,7 +408,8 @@ static int __uac3_clock_find_source(struct snd_usb_audio *chip, int entity_id,
multiplier = snd_usb_find_clock_multiplier_v3(chip->ctrl_intf,
entity_id);
if (multiplier)
- return __uac3_clock_find_source(chip, multiplier->bCSourceID,
+ return __uac3_clock_find_source(chip, fmt,
+ multiplier->bCSourceID,
visited, validate);
return -EINVAL;
@@ -389,18 +426,18 @@ static int __uac3_clock_find_source(struct snd_usb_audio *chip, int entity_id,
*
* Returns the clock source UnitID (>=0) on success, or an error.
*/
-int snd_usb_clock_find_source(struct snd_usb_audio *chip, int protocol,
- int entity_id, bool validate)
+int snd_usb_clock_find_source(struct snd_usb_audio *chip,
+ struct audioformat *fmt, bool validate)
{
DECLARE_BITMAP(visited, 256);
memset(visited, 0, sizeof(visited));
- switch (protocol) {
+ switch (fmt->protocol) {
case UAC_VERSION_2:
- return __uac_clock_find_source(chip, entity_id, visited,
+ return __uac_clock_find_source(chip, fmt, fmt->clock, visited,
validate);
case UAC_VERSION_3:
- return __uac3_clock_find_source(chip, entity_id, visited,
+ return __uac3_clock_find_source(chip, fmt, fmt->clock, visited,
validate);
default:
return -EINVAL;
@@ -501,8 +538,7 @@ static int set_sample_rate_v2v3(struct snd_usb_audio *chip, int iface,
* automatic clock selection if the current clock is not
* valid.
*/
- clock = snd_usb_clock_find_source(chip, fmt->protocol,
- fmt->clock, true);
+ clock = snd_usb_clock_find_source(chip, fmt, true);
if (clock < 0) {
/* We did not find a valid clock, but that might be
* because the current sample rate does not match an
@@ -510,8 +546,7 @@ static int set_sample_rate_v2v3(struct snd_usb_audio *chip, int iface,
* and we will do another validation after setting the
* rate.
*/
- clock = snd_usb_clock_find_source(chip, fmt->protocol,
- fmt->clock, false);
+ clock = snd_usb_clock_find_source(chip, fmt, false);
if (clock < 0)
return clock;
}
@@ -577,7 +612,7 @@ static int set_sample_rate_v2v3(struct snd_usb_audio *chip, int iface,
validation:
/* validate clock after rate change */
- if (!uac_clock_source_is_valid(chip, fmt->protocol, clock))
+ if (!uac_clock_source_is_valid(chip, fmt, clock))
return -ENXIO;
return 0;
}
diff --git a/sound/usb/clock.h b/sound/usb/clock.h
index 076e31b..68df0fb 100644
--- a/sound/usb/clock.h
+++ b/sound/usb/clock.h
@@ -6,7 +6,7 @@ int snd_usb_init_sample_rate(struct snd_usb_audio *chip, int iface,
struct usb_host_interface *alts,
struct audioformat *fmt, int rate);
-int snd_usb_clock_find_source(struct snd_usb_audio *chip, int protocol,
- int entity_id, bool validate);
+int snd_usb_clock_find_source(struct snd_usb_audio *chip,
+ struct audioformat *fmt, bool validate);
#endif /* __USBAUDIO_CLOCK_H */
diff --git a/sound/usb/format.c b/sound/usb/format.c
index 9260136..9f5cb4e 100644
--- a/sound/usb/format.c
+++ b/sound/usb/format.c
@@ -151,6 +151,19 @@ static u64 parse_audio_format_i_type(struct snd_usb_audio *chip,
return pcm_formats;
}
+static int set_fixed_rate(struct audioformat *fp, int rate, int rate_bits)
+{
+ kfree(fp->rate_table);
+ fp->rate_table = kmalloc(sizeof(int), GFP_KERNEL);
+ if (!fp->rate_table)
+ return -ENOMEM;
+ fp->nr_rates = 1;
+ fp->rate_min = rate;
+ fp->rate_max = rate;
+ fp->rates = rate_bits;
+ fp->rate_table[0] = rate;
+ return 0;
+}
/*
* parse the format descriptor and stores the possible sample rates
@@ -223,6 +236,14 @@ static int parse_audio_format_rates_v1(struct snd_usb_audio *chip, struct audiof
fp->rate_min = combine_triple(&fmt[offset + 1]);
fp->rate_max = combine_triple(&fmt[offset + 4]);
}
+
+ /* Jabra Evolve 65 headset */
+ if (chip->usb_id == USB_ID(0x0b0e, 0x030b)) {
+ /* only 48kHz for playback while keeping 16kHz for capture */
+ if (fp->nr_rates != 1)
+ return set_fixed_rate(fp, 48000, SNDRV_PCM_RATE_48000);
+ }
+
return 0;
}
@@ -299,17 +320,7 @@ static int line6_parse_audio_format_rates_quirk(struct snd_usb_audio *chip,
case USB_ID(0x0e41, 0x4248): /* Line6 Helix >= fw 2.82 */
case USB_ID(0x0e41, 0x4249): /* Line6 Helix Rack >= fw 2.82 */
case USB_ID(0x0e41, 0x424a): /* Line6 Helix LT >= fw 2.82 */
- /* supported rates: 48Khz */
- kfree(fp->rate_table);
- fp->rate_table = kmalloc(sizeof(int), GFP_KERNEL);
- if (!fp->rate_table)
- return -ENOMEM;
- fp->nr_rates = 1;
- fp->rate_min = 48000;
- fp->rate_max = 48000;
- fp->rates = SNDRV_PCM_RATE_48000;
- fp->rate_table[0] = 48000;
- return 0;
+ return set_fixed_rate(fp, 48000, SNDRV_PCM_RATE_48000);
}
return -ENODEV;
@@ -325,8 +336,7 @@ static int parse_audio_format_rates_v2v3(struct snd_usb_audio *chip,
struct usb_device *dev = chip->dev;
unsigned char tmp[2], *data;
int nr_triplets, data_size, ret = 0, ret_l6;
- int clock = snd_usb_clock_find_source(chip, fp->protocol,
- fp->clock, false);
+ int clock = snd_usb_clock_find_source(chip, fp, false);
if (clock < 0) {
dev_err(&dev->dev,
diff --git a/sound/usb/line6/driver.c b/sound/usb/line6/driver.c
index b5a3f75..4f09668 100644
--- a/sound/usb/line6/driver.c
+++ b/sound/usb/line6/driver.c
@@ -305,7 +305,7 @@ static void line6_data_received(struct urb *urb)
line6_midibuf_read(mb, line6->buffer_message,
LINE6_MIDI_MESSAGE_MAXLEN);
- if (done == 0)
+ if (done <= 0)
break;
line6->message_length = done;
diff --git a/sound/usb/line6/midibuf.c b/sound/usb/line6/midibuf.c
index 8d6eefa..6a70463 100644
--- a/sound/usb/line6/midibuf.c
+++ b/sound/usb/line6/midibuf.c
@@ -159,7 +159,7 @@ int line6_midibuf_read(struct midi_buffer *this, unsigned char *data,
int midi_length_prev =
midibuf_message_length(this->command_prev);
- if (midi_length_prev > 0) {
+ if (midi_length_prev > 1) {
midi_length = midi_length_prev - 1;
repeat = 1;
} else
diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c
index d659fdb..81b2db0 100644
--- a/sound/usb/mixer.c
+++ b/sound/usb/mixer.c
@@ -897,6 +897,15 @@ static int parse_term_proc_unit(struct mixer_build *state,
return 0;
}
+static int parse_term_effect_unit(struct mixer_build *state,
+ struct usb_audio_term *term,
+ void *p1, int id)
+{
+ term->type = UAC3_EFFECT_UNIT << 16; /* virtual type */
+ term->id = id;
+ return 0;
+}
+
static int parse_term_uac2_clock_source(struct mixer_build *state,
struct usb_audio_term *term,
void *p1, int id)
@@ -981,8 +990,7 @@ static int __check_input_term(struct mixer_build *state, int id,
UAC3_PROCESSING_UNIT);
case PTYPE(UAC_VERSION_2, UAC2_EFFECT_UNIT):
case PTYPE(UAC_VERSION_3, UAC3_EFFECT_UNIT):
- return parse_term_proc_unit(state, term, p1, id,
- UAC3_EFFECT_UNIT);
+ return parse_term_effect_unit(state, term, p1, id);
case PTYPE(UAC_VERSION_1, UAC1_EXTENSION_UNIT):
case PTYPE(UAC_VERSION_2, UAC2_EXTENSION_UNIT_V2):
case PTYPE(UAC_VERSION_3, UAC3_EXTENSION_UNIT):
diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c
index 3a5242e3..7f558f4 100644
--- a/sound/usb/quirks.c
+++ b/sound/usb/quirks.c
@@ -1440,6 +1440,7 @@ bool snd_usb_get_sample_rate_quirk(struct snd_usb_audio *chip)
case USB_ID(0x1395, 0x740a): /* Sennheiser DECT */
case USB_ID(0x1901, 0x0191): /* GE B850V3 CP2114 audio interface */
case USB_ID(0x21b4, 0x0081): /* AudioQuest DragonFly */
+ case USB_ID(0x2912, 0x30c8): /* Audioengine D1 */
return true;
}
diff --git a/tools/arch/arm64/include/uapi/asm/kvm.h b/tools/arch/arm64/include/uapi/asm/kvm.h
index 820e575..ba85bb2 100644
--- a/tools/arch/arm64/include/uapi/asm/kvm.h
+++ b/tools/arch/arm64/include/uapi/asm/kvm.h
@@ -220,10 +220,18 @@ struct kvm_vcpu_events {
#define KVM_REG_ARM_PTIMER_CVAL ARM64_SYS_REG(3, 3, 14, 2, 2)
#define KVM_REG_ARM_PTIMER_CNT ARM64_SYS_REG(3, 3, 14, 0, 1)
-/* EL0 Virtual Timer Registers */
+/*
+ * EL0 Virtual Timer Registers
+ *
+ * WARNING:
+ * KVM_REG_ARM_TIMER_CVAL and KVM_REG_ARM_TIMER_CNT are not defined
+ * with the appropriate register encodings. Their values have been
+ * accidentally swapped. As this is set API, the definitions here
+ * must be used, rather than ones derived from the encodings.
+ */
#define KVM_REG_ARM_TIMER_CTL ARM64_SYS_REG(3, 3, 14, 3, 1)
-#define KVM_REG_ARM_TIMER_CNT ARM64_SYS_REG(3, 3, 14, 3, 2)
#define KVM_REG_ARM_TIMER_CVAL ARM64_SYS_REG(3, 3, 14, 0, 2)
+#define KVM_REG_ARM_TIMER_CNT ARM64_SYS_REG(3, 3, 14, 3, 2)
/* KVM-as-firmware specific pseudo-registers */
#define KVM_REG_ARM_FW (0x0014 << KVM_REG_ARM_COPROC_SHIFT)
diff --git a/tools/arch/arm64/include/uapi/asm/unistd.h b/tools/arch/arm64/include/uapi/asm/unistd.h
index 4703d21..f83a70e 100644
--- a/tools/arch/arm64/include/uapi/asm/unistd.h
+++ b/tools/arch/arm64/include/uapi/asm/unistd.h
@@ -19,5 +19,6 @@
#define __ARCH_WANT_NEW_STAT
#define __ARCH_WANT_SET_GET_RLIMIT
#define __ARCH_WANT_TIME32_SYSCALLS
+#define __ARCH_WANT_SYS_CLONE3
#include <asm-generic/unistd.h>
diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h
index e9b6249..f3327cb 100644
--- a/tools/arch/x86/include/asm/cpufeatures.h
+++ b/tools/arch/x86/include/asm/cpufeatures.h
@@ -220,6 +220,7 @@
#define X86_FEATURE_ZEN ( 7*32+28) /* "" CPU is AMD family 0x17 (Zen) */
#define X86_FEATURE_L1TF_PTEINV ( 7*32+29) /* "" L1TF workaround PTE inversion */
#define X86_FEATURE_IBRS_ENHANCED ( 7*32+30) /* Enhanced IBRS */
+#define X86_FEATURE_MSR_IA32_FEAT_CTL ( 7*32+31) /* "" MSR IA32_FEAT_CTL configured */
/* Virtualization flags: Linux defined, word 8 */
#define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */
@@ -357,6 +358,7 @@
/* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */
#define X86_FEATURE_AVX512_4VNNIW (18*32+ 2) /* AVX-512 Neural Network Instructions */
#define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */
+#define X86_FEATURE_FSRM (18*32+ 4) /* Fast Short Rep Mov */
#define X86_FEATURE_AVX512_VP2INTERSECT (18*32+ 8) /* AVX-512 Intersect for D/Q */
#define X86_FEATURE_MD_CLEAR (18*32+10) /* VERW clears CPU buffers */
#define X86_FEATURE_TSX_FORCE_ABORT (18*32+13) /* "" TSX_FORCE_ABORT */
diff --git a/tools/arch/x86/include/asm/disabled-features.h b/tools/arch/x86/include/asm/disabled-features.h
index 8e1d0bb..4ea8584 100644
--- a/tools/arch/x86/include/asm/disabled-features.h
+++ b/tools/arch/x86/include/asm/disabled-features.h
@@ -10,12 +10,6 @@
* cpu_feature_enabled().
*/
-#ifdef CONFIG_X86_INTEL_MPX
-# define DISABLE_MPX 0
-#else
-# define DISABLE_MPX (1<<(X86_FEATURE_MPX & 31))
-#endif
-
#ifdef CONFIG_X86_SMAP
# define DISABLE_SMAP 0
#else
@@ -74,7 +68,7 @@
#define DISABLED_MASK6 0
#define DISABLED_MASK7 (DISABLE_PTI)
#define DISABLED_MASK8 0
-#define DISABLED_MASK9 (DISABLE_MPX|DISABLE_SMAP)
+#define DISABLED_MASK9 (DISABLE_SMAP)
#define DISABLED_MASK10 0
#define DISABLED_MASK11 0
#define DISABLED_MASK12 0
diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h
index ebe1685..d5e517d 100644
--- a/tools/arch/x86/include/asm/msr-index.h
+++ b/tools/arch/x86/include/asm/msr-index.h
@@ -512,6 +512,8 @@
#define MSR_K7_HWCR 0xc0010015
#define MSR_K7_HWCR_SMMLOCK_BIT 0
#define MSR_K7_HWCR_SMMLOCK BIT_ULL(MSR_K7_HWCR_SMMLOCK_BIT)
+#define MSR_K7_HWCR_IRPERF_EN_BIT 30
+#define MSR_K7_HWCR_IRPERF_EN BIT_ULL(MSR_K7_HWCR_IRPERF_EN_BIT)
#define MSR_K7_FID_VID_CTL 0xc0010041
#define MSR_K7_FID_VID_STATUS 0xc0010042
diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h
index 503d3f4..3f3f780 100644
--- a/tools/arch/x86/include/uapi/asm/kvm.h
+++ b/tools/arch/x86/include/uapi/asm/kvm.h
@@ -390,6 +390,7 @@ struct kvm_sync_regs {
#define KVM_STATE_NESTED_GUEST_MODE 0x00000001
#define KVM_STATE_NESTED_RUN_PENDING 0x00000002
#define KVM_STATE_NESTED_EVMCS 0x00000004
+#define KVM_STATE_NESTED_MTF_PENDING 0x00000008
#define KVM_STATE_NESTED_SMM_GUEST_MODE 0x00000001
#define KVM_STATE_NESTED_SMM_VMXON 0x00000002
diff --git a/tools/bootconfig/include/linux/memblock.h b/tools/bootconfig/include/linux/memblock.h
new file mode 100644
index 0000000..7862f21
--- /dev/null
+++ b/tools/bootconfig/include/linux/memblock.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _XBC_LINUX_MEMBLOCK_H
+#define _XBC_LINUX_MEMBLOCK_H
+
+#include <stdlib.h>
+
+#define __pa(addr) (addr)
+#define SMP_CACHE_BYTES 0
+#define memblock_alloc(size, align) malloc(size)
+#define memblock_free(paddr, size) free(paddr)
+
+#endif
diff --git a/tools/bootconfig/include/linux/printk.h b/tools/bootconfig/include/linux/printk.h
index 017bcd69..036e667 100644
--- a/tools/bootconfig/include/linux/printk.h
+++ b/tools/bootconfig/include/linux/printk.h
@@ -4,10 +4,7 @@
#include <stdio.h>
-/* controllable printf */
-extern int pr_output;
-#define printk(fmt, ...) \
- (pr_output ? printf(fmt, __VA_ARGS__) : 0)
+#define printk(fmt, ...) printf(fmt, ##__VA_ARGS__)
#define pr_err printk
#define pr_warn printk
diff --git a/tools/bootconfig/main.c b/tools/bootconfig/main.c
index 47f4884..a9b9781 100644
--- a/tools/bootconfig/main.c
+++ b/tools/bootconfig/main.c
@@ -14,8 +14,6 @@
#include <linux/kernel.h>
#include <linux/bootconfig.h>
-int pr_output = 1;
-
static int xbc_show_array(struct xbc_node *node)
{
const char *val;
@@ -131,16 +129,27 @@ int load_xbc_from_initrd(int fd, char **buf)
struct stat stat;
int ret;
u32 size = 0, csum = 0, rcsum;
+ char magic[BOOTCONFIG_MAGIC_LEN];
ret = fstat(fd, &stat);
if (ret < 0)
return -errno;
- if (stat.st_size < 8)
+ if (stat.st_size < 8 + BOOTCONFIG_MAGIC_LEN)
return 0;
- if (lseek(fd, -8, SEEK_END) < 0) {
- printf("Failed to lseek: %d\n", -errno);
+ if (lseek(fd, -BOOTCONFIG_MAGIC_LEN, SEEK_END) < 0) {
+ pr_err("Failed to lseek: %d\n", -errno);
+ return -errno;
+ }
+ if (read(fd, magic, BOOTCONFIG_MAGIC_LEN) < 0)
+ return -errno;
+ /* Check the bootconfig magic bytes */
+ if (memcmp(magic, BOOTCONFIG_MAGIC, BOOTCONFIG_MAGIC_LEN) != 0)
+ return 0;
+
+ if (lseek(fd, -(8 + BOOTCONFIG_MAGIC_LEN), SEEK_END) < 0) {
+ pr_err("Failed to lseek: %d\n", -errno);
return -errno;
}
@@ -150,12 +159,15 @@ int load_xbc_from_initrd(int fd, char **buf)
if (read(fd, &csum, sizeof(u32)) < 0)
return -errno;
- /* Wrong size, maybe no boot config here */
- if (stat.st_size < size + 8)
- return 0;
+ /* Wrong size error */
+ if (stat.st_size < size + 8 + BOOTCONFIG_MAGIC_LEN) {
+ pr_err("bootconfig size is too big\n");
+ return -E2BIG;
+ }
- if (lseek(fd, stat.st_size - 8 - size, SEEK_SET) < 0) {
- printf("Failed to lseek: %d\n", -errno);
+ if (lseek(fd, stat.st_size - (size + 8 + BOOTCONFIG_MAGIC_LEN),
+ SEEK_SET) < 0) {
+ pr_err("Failed to lseek: %d\n", -errno);
return -errno;
}
@@ -163,17 +175,17 @@ int load_xbc_from_initrd(int fd, char **buf)
if (ret < 0)
return ret;
- /* Wrong Checksum, maybe no boot config here */
+ /* Wrong Checksum */
rcsum = checksum((unsigned char *)*buf, size);
if (csum != rcsum) {
- printf("checksum error: %d != %d\n", csum, rcsum);
- return 0;
+ pr_err("checksum error: %d != %d\n", csum, rcsum);
+ return -EINVAL;
}
ret = xbc_init(*buf);
- /* Wrong data, maybe no boot config here */
+ /* Wrong data */
if (ret < 0)
- return 0;
+ return ret;
return size;
}
@@ -185,13 +197,13 @@ int show_xbc(const char *path)
fd = open(path, O_RDONLY);
if (fd < 0) {
- printf("Failed to open initrd %s: %d\n", path, fd);
+ pr_err("Failed to open initrd %s: %d\n", path, fd);
return -errno;
}
ret = load_xbc_from_initrd(fd, &buf);
if (ret < 0)
- printf("Failed to load a boot config from initrd: %d\n", ret);
+ pr_err("Failed to load a boot config from initrd: %d\n", ret);
else
xbc_show_compact_tree();
@@ -209,24 +221,19 @@ int delete_xbc(const char *path)
fd = open(path, O_RDWR);
if (fd < 0) {
- printf("Failed to open initrd %s: %d\n", path, fd);
+ pr_err("Failed to open initrd %s: %d\n", path, fd);
return -errno;
}
- /*
- * Suppress error messages in xbc_init() because it can be just a
- * data which concidentally matches the size and checksum footer.
- */
- pr_output = 0;
size = load_xbc_from_initrd(fd, &buf);
- pr_output = 1;
if (size < 0) {
ret = size;
- printf("Failed to load a boot config from initrd: %d\n", ret);
+ pr_err("Failed to load a boot config from initrd: %d\n", ret);
} else if (size > 0) {
ret = fstat(fd, &stat);
if (!ret)
- ret = ftruncate(fd, stat.st_size - size - 8);
+ ret = ftruncate(fd, stat.st_size
+ - size - 8 - BOOTCONFIG_MAGIC_LEN);
if (ret)
ret = -errno;
} /* Ignore if there is no boot config in initrd */
@@ -245,7 +252,7 @@ int apply_xbc(const char *path, const char *xbc_path)
ret = load_xbc_file(xbc_path, &buf);
if (ret < 0) {
- printf("Failed to load %s : %d\n", xbc_path, ret);
+ pr_err("Failed to load %s : %d\n", xbc_path, ret);
return ret;
}
size = strlen(buf) + 1;
@@ -262,7 +269,7 @@ int apply_xbc(const char *path, const char *xbc_path)
/* Check the data format */
ret = xbc_init(buf);
if (ret < 0) {
- printf("Failed to parse %s: %d\n", xbc_path, ret);
+ pr_err("Failed to parse %s: %d\n", xbc_path, ret);
free(data);
free(buf);
return ret;
@@ -279,20 +286,26 @@ int apply_xbc(const char *path, const char *xbc_path)
/* Remove old boot config if exists */
ret = delete_xbc(path);
if (ret < 0) {
- printf("Failed to delete previous boot config: %d\n", ret);
+ pr_err("Failed to delete previous boot config: %d\n", ret);
return ret;
}
/* Apply new one */
fd = open(path, O_RDWR | O_APPEND);
if (fd < 0) {
- printf("Failed to open %s: %d\n", path, fd);
+ pr_err("Failed to open %s: %d\n", path, fd);
return fd;
}
/* TODO: Ensure the @path is initramfs/initrd image */
ret = write(fd, data, size + 8);
if (ret < 0) {
- printf("Failed to apply a boot config: %d\n", ret);
+ pr_err("Failed to apply a boot config: %d\n", ret);
+ return ret;
+ }
+ /* Write a magic word of the bootconfig */
+ ret = write(fd, BOOTCONFIG_MAGIC, BOOTCONFIG_MAGIC_LEN);
+ if (ret < 0) {
+ pr_err("Failed to apply a boot config magic: %d\n", ret);
return ret;
}
close(fd);
@@ -334,12 +347,12 @@ int main(int argc, char **argv)
}
if (apply && delete) {
- printf("Error: You can not specify both -a and -d at once.\n");
+ pr_err("Error: You can not specify both -a and -d at once.\n");
return usage();
}
if (optind >= argc) {
- printf("Error: No initrd is specified.\n");
+ pr_err("Error: No initrd is specified.\n");
return usage();
}
diff --git a/tools/bootconfig/samples/bad-mixed-kv1.bconf b/tools/bootconfig/samples/bad-mixed-kv1.bconf
new file mode 100644
index 0000000..1761547
--- /dev/null
+++ b/tools/bootconfig/samples/bad-mixed-kv1.bconf
@@ -0,0 +1,3 @@
+# value -> subkey pattern
+key = value
+key.subkey = another-value
diff --git a/tools/bootconfig/samples/bad-mixed-kv2.bconf b/tools/bootconfig/samples/bad-mixed-kv2.bconf
new file mode 100644
index 0000000..6b32e0c
--- /dev/null
+++ b/tools/bootconfig/samples/bad-mixed-kv2.bconf
@@ -0,0 +1,3 @@
+# subkey -> value pattern
+key.subkey = value
+key = another-value
diff --git a/tools/bootconfig/samples/bad-samekey.bconf b/tools/bootconfig/samples/bad-samekey.bconf
new file mode 100644
index 0000000..e8d983a
--- /dev/null
+++ b/tools/bootconfig/samples/bad-samekey.bconf
@@ -0,0 +1,6 @@
+# Same key value is not allowed
+key {
+ foo = value
+ bar = value2
+}
+key.foo = value
diff --git a/tools/bootconfig/test-bootconfig.sh b/tools/bootconfig/test-bootconfig.sh
index 87725e8..1411f4c 100755
--- a/tools/bootconfig/test-bootconfig.sh
+++ b/tools/bootconfig/test-bootconfig.sh
@@ -9,7 +9,7 @@
NG=0
cleanup() {
- rm -f $INITRD $TEMPCONF
+ rm -f $INITRD $TEMPCONF $OUTFILE
exit $NG
}
@@ -49,7 +49,7 @@
new_size=$(stat -c %s $INITRD)
echo "File size check"
-xpass test $new_size -eq $(expr $bconf_size + $initrd_size + 9)
+xpass test $new_size -eq $(expr $bconf_size + $initrd_size + 9 + 12)
echo "Apply command repeat test"
xpass $BOOTCONF -a $TEMPCONF $INITRD
@@ -64,6 +64,14 @@
new_size=$(stat -c %s $INITRD)
xpass test $new_size -eq $initrd_size
+echo "No error messge while applying"
+OUTFILE=`mktemp tempout-XXXX`
+dd if=/dev/zero of=$INITRD bs=4096 count=1
+printf " \0\0\0 \0\0\0" >> $INITRD
+$BOOTCONF -a $TEMPCONF $INITRD > $OUTFILE 2>&1
+xfail grep -i "failed" $OUTFILE
+xfail grep -i "error" $OUTFILE
+
echo "Max node number check"
echo -n > $TEMPCONF
@@ -87,6 +95,19 @@
echo "\"" >> $TEMPCONF # add 2 bytes + terminal ('\"\n\0')
xpass $BOOTCONF -a $TEMPCONF $INITRD
+echo "Adding same-key values"
+cat > $TEMPCONF << EOF
+key = bar, baz
+key += qux
+EOF
+echo > $INITRD
+
+xpass $BOOTCONF -a $TEMPCONF $INITRD
+$BOOTCONF $INITRD > $OUTFILE
+xpass grep -q "bar" $OUTFILE
+xpass grep -q "baz" $OUTFILE
+xpass grep -q "qux" $OUTFILE
+
echo "=== expected failure cases ==="
for i in samples/bad-* ; do
xfail $BOOTCONF -a $i $INITRD
diff --git a/Documentation/EDID/1024x768.S b/tools/edid/1024x768.S
similarity index 100%
rename from Documentation/EDID/1024x768.S
rename to tools/edid/1024x768.S
diff --git a/Documentation/EDID/1280x1024.S b/tools/edid/1280x1024.S
similarity index 100%
rename from Documentation/EDID/1280x1024.S
rename to tools/edid/1280x1024.S
diff --git a/Documentation/EDID/1600x1200.S b/tools/edid/1600x1200.S
similarity index 100%
rename from Documentation/EDID/1600x1200.S
rename to tools/edid/1600x1200.S
diff --git a/Documentation/EDID/1680x1050.S b/tools/edid/1680x1050.S
similarity index 100%
rename from Documentation/EDID/1680x1050.S
rename to tools/edid/1680x1050.S
diff --git a/Documentation/EDID/1920x1080.S b/tools/edid/1920x1080.S
similarity index 100%
rename from Documentation/EDID/1920x1080.S
rename to tools/edid/1920x1080.S
diff --git a/Documentation/EDID/800x600.S b/tools/edid/800x600.S
similarity index 100%
rename from Documentation/EDID/800x600.S
rename to tools/edid/800x600.S
diff --git a/Documentation/EDID/Makefile b/tools/edid/Makefile
similarity index 100%
rename from Documentation/EDID/Makefile
rename to tools/edid/Makefile
diff --git a/Documentation/EDID/edid.S b/tools/edid/edid.S
similarity index 100%
rename from Documentation/EDID/edid.S
rename to tools/edid/edid.S
diff --git a/Documentation/EDID/hex b/tools/edid/hex
similarity index 100%
rename from Documentation/EDID/hex
rename to tools/edid/hex
diff --git a/tools/include/uapi/asm-generic/mman-common.h b/tools/include/uapi/asm-generic/mman-common.h
index c160a53..f94f65d 100644
--- a/tools/include/uapi/asm-generic/mman-common.h
+++ b/tools/include/uapi/asm-generic/mman-common.h
@@ -11,6 +11,8 @@
#define PROT_WRITE 0x2 /* page can be written */
#define PROT_EXEC 0x4 /* page can be executed */
#define PROT_SEM 0x8 /* page may be used for atomic ops */
+/* 0x10 reserved for arch-specific use */
+/* 0x20 reserved for arch-specific use */
#define PROT_NONE 0x0 /* page can not be accessed */
#define PROT_GROWSDOWN 0x01000000 /* mprotect flag: extend change to start of growsdown vma */
#define PROT_GROWSUP 0x02000000 /* mprotect flag: extend change to end of growsup vma */
diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h
index 1fc8faa..3a3201e 100644
--- a/tools/include/uapi/asm-generic/unistd.h
+++ b/tools/include/uapi/asm-generic/unistd.h
@@ -851,8 +851,13 @@ __SYSCALL(__NR_pidfd_open, sys_pidfd_open)
__SYSCALL(__NR_clone3, sys_clone3)
#endif
+#define __NR_openat2 437
+__SYSCALL(__NR_openat2, sys_openat2)
+#define __NR_pidfd_getfd 438
+__SYSCALL(__NR_pidfd_getfd, sys_pidfd_getfd)
+
#undef __NR_syscalls
-#define __NR_syscalls 436
+#define __NR_syscalls 439
/*
* 32 bit systems traditionally used different
diff --git a/tools/include/uapi/asm/errno.h b/tools/include/uapi/asm/errno.h
index ce3c594..637189e 100644
--- a/tools/include/uapi/asm/errno.h
+++ b/tools/include/uapi/asm/errno.h
@@ -1,18 +1,18 @@
/* SPDX-License-Identifier: GPL-2.0 */
#if defined(__i386__) || defined(__x86_64__)
-#include "../../arch/x86/include/uapi/asm/errno.h"
+#include "../../../arch/x86/include/uapi/asm/errno.h"
#elif defined(__powerpc__)
-#include "../../arch/powerpc/include/uapi/asm/errno.h"
+#include "../../../arch/powerpc/include/uapi/asm/errno.h"
#elif defined(__sparc__)
-#include "../../arch/sparc/include/uapi/asm/errno.h"
+#include "../../../arch/sparc/include/uapi/asm/errno.h"
#elif defined(__alpha__)
-#include "../../arch/alpha/include/uapi/asm/errno.h"
+#include "../../../arch/alpha/include/uapi/asm/errno.h"
#elif defined(__mips__)
-#include "../../arch/mips/include/uapi/asm/errno.h"
+#include "../../../arch/mips/include/uapi/asm/errno.h"
#elif defined(__ia64__)
-#include "../../arch/ia64/include/uapi/asm/errno.h"
+#include "../../../arch/ia64/include/uapi/asm/errno.h"
#elif defined(__xtensa__)
-#include "../../arch/xtensa/include/uapi/asm/errno.h"
+#include "../../../arch/xtensa/include/uapi/asm/errno.h"
#else
#include <asm-generic/errno.h>
#endif
diff --git a/tools/include/uapi/drm/i915_drm.h b/tools/include/uapi/drm/i915_drm.h
index 5400d7e..829c0a4 100644
--- a/tools/include/uapi/drm/i915_drm.h
+++ b/tools/include/uapi/drm/i915_drm.h
@@ -395,6 +395,7 @@ typedef struct _drm_i915_sarea {
#define DRM_IOCTL_I915_GEM_PWRITE DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_PWRITE, struct drm_i915_gem_pwrite)
#define DRM_IOCTL_I915_GEM_MMAP DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_MMAP, struct drm_i915_gem_mmap)
#define DRM_IOCTL_I915_GEM_MMAP_GTT DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_MMAP_GTT, struct drm_i915_gem_mmap_gtt)
+#define DRM_IOCTL_I915_GEM_MMAP_OFFSET DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_MMAP_GTT, struct drm_i915_gem_mmap_offset)
#define DRM_IOCTL_I915_GEM_SET_DOMAIN DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_SET_DOMAIN, struct drm_i915_gem_set_domain)
#define DRM_IOCTL_I915_GEM_SW_FINISH DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_SW_FINISH, struct drm_i915_gem_sw_finish)
#define DRM_IOCTL_I915_GEM_SET_TILING DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_SET_TILING, struct drm_i915_gem_set_tiling)
@@ -793,6 +794,37 @@ struct drm_i915_gem_mmap_gtt {
__u64 offset;
};
+struct drm_i915_gem_mmap_offset {
+ /** Handle for the object being mapped. */
+ __u32 handle;
+ __u32 pad;
+ /**
+ * Fake offset to use for subsequent mmap call
+ *
+ * This is a fixed-size type for 32/64 compatibility.
+ */
+ __u64 offset;
+
+ /**
+ * Flags for extended behaviour.
+ *
+ * It is mandatory that one of the MMAP_OFFSET types
+ * (GTT, WC, WB, UC, etc) should be included.
+ */
+ __u64 flags;
+#define I915_MMAP_OFFSET_GTT 0
+#define I915_MMAP_OFFSET_WC 1
+#define I915_MMAP_OFFSET_WB 2
+#define I915_MMAP_OFFSET_UC 3
+
+ /*
+ * Zero-terminated chain of extensions.
+ *
+ * No current extensions defined; mbz.
+ */
+ __u64 extensions;
+};
+
struct drm_i915_gem_set_domain {
/** Handle for the object */
__u32 handle;
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index f1d74a2..22f2352 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -1045,9 +1045,9 @@ union bpf_attr {
* supports redirection to the egress interface, and accepts no
* flag at all.
*
- * The same effect can be attained with the more generic
- * **bpf_redirect_map**\ (), which requires specific maps to be
- * used but offers better performance.
+ * The same effect can also be attained with the more generic
+ * **bpf_redirect_map**\ (), which uses a BPF map to store the
+ * redirect target instead of providing it directly to the helper.
* Return
* For XDP, the helper returns **XDP_REDIRECT** on success or
* **XDP_ABORTED** on error. For other program types, the values
@@ -1611,13 +1611,11 @@ union bpf_attr {
* the caller. Any higher bits in the *flags* argument must be
* unset.
*
- * When used to redirect packets to net devices, this helper
- * provides a high performance increase over **bpf_redirect**\ ().
- * This is due to various implementation details of the underlying
- * mechanisms, one of which is the fact that **bpf_redirect_map**\
- * () tries to send packet as a "bulk" to the device.
+ * See also bpf_redirect(), which only supports redirecting to an
+ * ifindex, but doesn't require a map to do so.
* Return
- * **XDP_REDIRECT** on success, or **XDP_ABORTED** on error.
+ * **XDP_REDIRECT** on success, or the value of the two lower bits
+ * of the **flags* argument on error.
*
* int bpf_sk_redirect_map(struct sk_buff *skb, struct bpf_map *map, u32 key, u64 flags)
* Description
diff --git a/tools/include/uapi/linux/fcntl.h b/tools/include/uapi/linux/fcntl.h
index 1f97b33..ca88b7b 100644
--- a/tools/include/uapi/linux/fcntl.h
+++ b/tools/include/uapi/linux/fcntl.h
@@ -3,6 +3,7 @@
#define _UAPI_LINUX_FCNTL_H
#include <asm/fcntl.h>
+#include <linux/openat2.h>
#define F_SETLEASE (F_LINUX_SPECIFIC_BASE + 0)
#define F_GETLEASE (F_LINUX_SPECIFIC_BASE + 1)
@@ -100,5 +101,4 @@
#define AT_RECURSIVE 0x8000 /* Apply to the entire subtree */
-
#endif /* _UAPI_LINUX_FCNTL_H */
diff --git a/tools/include/uapi/linux/fscrypt.h b/tools/include/uapi/linux/fscrypt.h
index 1beb174..0d8a6f4 100644
--- a/tools/include/uapi/linux/fscrypt.h
+++ b/tools/include/uapi/linux/fscrypt.h
@@ -8,6 +8,7 @@
#ifndef _UAPI_LINUX_FSCRYPT_H
#define _UAPI_LINUX_FSCRYPT_H
+#include <linux/ioctl.h>
#include <linux/types.h>
/* Encryption policy flags */
@@ -109,11 +110,22 @@ struct fscrypt_key_specifier {
} u;
};
+/*
+ * Payload of Linux keyring key of type "fscrypt-provisioning", referenced by
+ * fscrypt_add_key_arg::key_id as an alternative to fscrypt_add_key_arg::raw.
+ */
+struct fscrypt_provisioning_key_payload {
+ __u32 type;
+ __u32 __reserved;
+ __u8 raw[];
+};
+
/* Struct passed to FS_IOC_ADD_ENCRYPTION_KEY */
struct fscrypt_add_key_arg {
struct fscrypt_key_specifier key_spec;
__u32 raw_size;
- __u32 __reserved[9];
+ __u32 key_id;
+ __u32 __reserved[8];
__u8 raw[];
};
diff --git a/tools/include/uapi/linux/in.h b/tools/include/uapi/linux/in.h
index 1521073..8533bf07 100644
--- a/tools/include/uapi/linux/in.h
+++ b/tools/include/uapi/linux/in.h
@@ -74,6 +74,8 @@ enum {
#define IPPROTO_UDPLITE IPPROTO_UDPLITE
IPPROTO_MPLS = 137, /* MPLS in IP (RFC 4023) */
#define IPPROTO_MPLS IPPROTO_MPLS
+ IPPROTO_ETHERNET = 143, /* Ethernet-within-IPv6 Encapsulation */
+#define IPPROTO_ETHERNET IPPROTO_ETHERNET
IPPROTO_RAW = 255, /* Raw IP packets */
#define IPPROTO_RAW IPPROTO_RAW
IPPROTO_MPTCP = 262, /* Multipath TCP connection */
diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h
index f0a16b4..4b95f9a 100644
--- a/tools/include/uapi/linux/kvm.h
+++ b/tools/include/uapi/linux/kvm.h
@@ -1009,6 +1009,7 @@ struct kvm_ppc_resize_hpt {
#define KVM_CAP_PPC_GUEST_DEBUG_SSTEP 176
#define KVM_CAP_ARM_NISV_TO_USER 177
#define KVM_CAP_ARM_INJECT_EXT_DABT 178
+#define KVM_CAP_S390_VCPU_RESETS 179
#ifdef KVM_CAP_IRQ_ROUTING
@@ -1473,6 +1474,10 @@ struct kvm_enc_region {
/* Available with KVM_CAP_ARM_SVE */
#define KVM_ARM_VCPU_FINALIZE _IOW(KVMIO, 0xc2, int)
+/* Available with KVM_CAP_S390_VCPU_RESETS */
+#define KVM_S390_NORMAL_RESET _IO(KVMIO, 0xc3)
+#define KVM_S390_CLEAR_RESET _IO(KVMIO, 0xc4)
+
/* Secure Encrypted Virtualization command */
enum sev_cmd_id {
/* Guest initialization commands */
diff --git a/tools/include/uapi/linux/openat2.h b/tools/include/uapi/linux/openat2.h
new file mode 100644
index 0000000..58b1eb7
--- /dev/null
+++ b/tools/include/uapi/linux/openat2.h
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_LINUX_OPENAT2_H
+#define _UAPI_LINUX_OPENAT2_H
+
+#include <linux/types.h>
+
+/*
+ * Arguments for how openat2(2) should open the target path. If only @flags and
+ * @mode are non-zero, then openat2(2) operates very similarly to openat(2).
+ *
+ * However, unlike openat(2), unknown or invalid bits in @flags result in
+ * -EINVAL rather than being silently ignored. @mode must be zero unless one of
+ * {O_CREAT, O_TMPFILE} are set.
+ *
+ * @flags: O_* flags.
+ * @mode: O_CREAT/O_TMPFILE file mode.
+ * @resolve: RESOLVE_* flags.
+ */
+struct open_how {
+ __u64 flags;
+ __u64 mode;
+ __u64 resolve;
+};
+
+/* how->resolve flags for openat2(2). */
+#define RESOLVE_NO_XDEV 0x01 /* Block mount-point crossings
+ (includes bind-mounts). */
+#define RESOLVE_NO_MAGICLINKS 0x02 /* Block traversal through procfs-style
+ "magic-links". */
+#define RESOLVE_NO_SYMLINKS 0x04 /* Block traversal through all symlinks
+ (implies OEXT_NO_MAGICLINKS) */
+#define RESOLVE_BENEATH 0x08 /* Block "lexical" trickery like
+ "..", symlinks, and absolute
+ paths which escape the dirfd. */
+#define RESOLVE_IN_ROOT 0x10 /* Make all jumps to "/" and ".."
+ be scoped inside the dirfd
+ (similar to chroot(2)). */
+
+#endif /* _UAPI_LINUX_OPENAT2_H */
diff --git a/tools/include/uapi/linux/prctl.h b/tools/include/uapi/linux/prctl.h
index 7da1b37..07b4f81 100644
--- a/tools/include/uapi/linux/prctl.h
+++ b/tools/include/uapi/linux/prctl.h
@@ -234,4 +234,8 @@ struct prctl_mm_map {
#define PR_GET_TAGGED_ADDR_CTRL 56
# define PR_TAGGED_ADDR_ENABLE (1UL << 0)
+/* Control reclaim behavior when allocating memory */
+#define PR_SET_IO_FLUSHER 57
+#define PR_GET_IO_FLUSHER 58
+
#endif /* _LINUX_PRCTL_H */
diff --git a/tools/include/uapi/linux/sched.h b/tools/include/uapi/linux/sched.h
index 4a02178..2e3bc22 100644
--- a/tools/include/uapi/linux/sched.h
+++ b/tools/include/uapi/linux/sched.h
@@ -36,6 +36,12 @@
/* Flags for the clone3() syscall. */
#define CLONE_CLEAR_SIGHAND 0x100000000ULL /* Clear any signal handler and reset to SIG_DFL. */
+/*
+ * cloning flags intersect with CSIGNAL so can be used with unshare and clone3
+ * syscalls only:
+ */
+#define CLONE_NEWTIME 0x00000080 /* New time namespace */
+
#ifndef __ASSEMBLY__
/**
* struct clone_args - arguments for the clone3 syscall
diff --git a/tools/include/uapi/sound/asound.h b/tools/include/uapi/sound/asound.h
index df1153c..535a722 100644
--- a/tools/include/uapi/sound/asound.h
+++ b/tools/include/uapi/sound/asound.h
@@ -26,7 +26,9 @@
#if defined(__KERNEL__) || defined(__linux__)
#include <linux/types.h>
+#include <asm/byteorder.h>
#else
+#include <endian.h>
#include <sys/ioctl.h>
#endif
@@ -154,7 +156,7 @@ struct snd_hwdep_dsp_image {
* *
*****************************************************************************/
-#define SNDRV_PCM_VERSION SNDRV_PROTOCOL_VERSION(2, 0, 14)
+#define SNDRV_PCM_VERSION SNDRV_PROTOCOL_VERSION(2, 0, 15)
typedef unsigned long snd_pcm_uframes_t;
typedef signed long snd_pcm_sframes_t;
@@ -301,7 +303,9 @@ typedef int __bitwise snd_pcm_subformat_t;
#define SNDRV_PCM_INFO_DRAIN_TRIGGER 0x40000000 /* internal kernel flag - trigger in drain */
#define SNDRV_PCM_INFO_FIFO_IN_FRAMES 0x80000000 /* internal kernel flag - FIFO size is in frames */
-
+#if (__BITS_PER_LONG == 32 && defined(__USE_TIME_BITS64)) || defined __KERNEL__
+#define __SND_STRUCT_TIME64
+#endif
typedef int __bitwise snd_pcm_state_t;
#define SNDRV_PCM_STATE_OPEN ((__force snd_pcm_state_t) 0) /* stream is open */
@@ -317,8 +321,17 @@ typedef int __bitwise snd_pcm_state_t;
enum {
SNDRV_PCM_MMAP_OFFSET_DATA = 0x00000000,
- SNDRV_PCM_MMAP_OFFSET_STATUS = 0x80000000,
- SNDRV_PCM_MMAP_OFFSET_CONTROL = 0x81000000,
+ SNDRV_PCM_MMAP_OFFSET_STATUS_OLD = 0x80000000,
+ SNDRV_PCM_MMAP_OFFSET_CONTROL_OLD = 0x81000000,
+ SNDRV_PCM_MMAP_OFFSET_STATUS_NEW = 0x82000000,
+ SNDRV_PCM_MMAP_OFFSET_CONTROL_NEW = 0x83000000,
+#ifdef __SND_STRUCT_TIME64
+ SNDRV_PCM_MMAP_OFFSET_STATUS = SNDRV_PCM_MMAP_OFFSET_STATUS_NEW,
+ SNDRV_PCM_MMAP_OFFSET_CONTROL = SNDRV_PCM_MMAP_OFFSET_CONTROL_NEW,
+#else
+ SNDRV_PCM_MMAP_OFFSET_STATUS = SNDRV_PCM_MMAP_OFFSET_STATUS_OLD,
+ SNDRV_PCM_MMAP_OFFSET_CONTROL = SNDRV_PCM_MMAP_OFFSET_CONTROL_OLD,
+#endif
};
union snd_pcm_sync_id {
@@ -456,8 +469,13 @@ enum {
SNDRV_PCM_AUDIO_TSTAMP_TYPE_LAST = SNDRV_PCM_AUDIO_TSTAMP_TYPE_LINK_SYNCHRONIZED
};
+#ifndef __KERNEL__
+/* explicit padding avoids incompatibility between i386 and x86-64 */
+typedef struct { unsigned char pad[sizeof(time_t) - sizeof(int)]; } __time_pad;
+
struct snd_pcm_status {
snd_pcm_state_t state; /* stream state */
+ __time_pad pad1; /* align to timespec */
struct timespec trigger_tstamp; /* time when stream was started/stopped/paused */
struct timespec tstamp; /* reference timestamp */
snd_pcm_uframes_t appl_ptr; /* appl ptr */
@@ -473,17 +491,48 @@ struct snd_pcm_status {
__u32 audio_tstamp_accuracy; /* in ns units, only valid if indicated in audio_tstamp_data */
unsigned char reserved[52-2*sizeof(struct timespec)]; /* must be filled with zero */
};
+#endif
-struct snd_pcm_mmap_status {
+/*
+ * For mmap operations, we need the 64-bit layout, both for compat mode,
+ * and for y2038 compatibility. For 64-bit applications, the two definitions
+ * are identical, so we keep the traditional version.
+ */
+#ifdef __SND_STRUCT_TIME64
+#define __snd_pcm_mmap_status64 snd_pcm_mmap_status
+#define __snd_pcm_mmap_control64 snd_pcm_mmap_control
+#define __snd_pcm_sync_ptr64 snd_pcm_sync_ptr
+#ifdef __KERNEL__
+#define __snd_timespec64 __kernel_timespec
+#else
+#define __snd_timespec64 timespec
+#endif
+struct __snd_timespec {
+ __s32 tv_sec;
+ __s32 tv_nsec;
+};
+#else
+#define __snd_pcm_mmap_status snd_pcm_mmap_status
+#define __snd_pcm_mmap_control snd_pcm_mmap_control
+#define __snd_pcm_sync_ptr snd_pcm_sync_ptr
+#define __snd_timespec timespec
+struct __snd_timespec64 {
+ __s64 tv_sec;
+ __s64 tv_nsec;
+};
+
+#endif
+
+struct __snd_pcm_mmap_status {
snd_pcm_state_t state; /* RO: state - SNDRV_PCM_STATE_XXXX */
int pad1; /* Needed for 64 bit alignment */
snd_pcm_uframes_t hw_ptr; /* RO: hw ptr (0...boundary-1) */
- struct timespec tstamp; /* Timestamp */
+ struct __snd_timespec tstamp; /* Timestamp */
snd_pcm_state_t suspended_state; /* RO: suspended stream state */
- struct timespec audio_tstamp; /* from sample counter or wall clock */
+ struct __snd_timespec audio_tstamp; /* from sample counter or wall clock */
};
-struct snd_pcm_mmap_control {
+struct __snd_pcm_mmap_control {
snd_pcm_uframes_t appl_ptr; /* RW: appl ptr (0...boundary-1) */
snd_pcm_uframes_t avail_min; /* RW: min available frames for wakeup */
};
@@ -492,14 +541,59 @@ struct snd_pcm_mmap_control {
#define SNDRV_PCM_SYNC_PTR_APPL (1<<1) /* get appl_ptr from driver (r/w op) */
#define SNDRV_PCM_SYNC_PTR_AVAIL_MIN (1<<2) /* get avail_min from driver */
-struct snd_pcm_sync_ptr {
+struct __snd_pcm_sync_ptr {
unsigned int flags;
union {
- struct snd_pcm_mmap_status status;
+ struct __snd_pcm_mmap_status status;
unsigned char reserved[64];
} s;
union {
- struct snd_pcm_mmap_control control;
+ struct __snd_pcm_mmap_control control;
+ unsigned char reserved[64];
+ } c;
+};
+
+#if defined(__BYTE_ORDER) ? __BYTE_ORDER == __BIG_ENDIAN : defined(__BIG_ENDIAN)
+typedef char __pad_before_uframe[sizeof(__u64) - sizeof(snd_pcm_uframes_t)];
+typedef char __pad_after_uframe[0];
+#endif
+
+#if defined(__BYTE_ORDER) ? __BYTE_ORDER == __LITTLE_ENDIAN : defined(__LITTLE_ENDIAN)
+typedef char __pad_before_uframe[0];
+typedef char __pad_after_uframe[sizeof(__u64) - sizeof(snd_pcm_uframes_t)];
+#endif
+
+struct __snd_pcm_mmap_status64 {
+ snd_pcm_state_t state; /* RO: state - SNDRV_PCM_STATE_XXXX */
+ __u32 pad1; /* Needed for 64 bit alignment */
+ __pad_before_uframe __pad1;
+ snd_pcm_uframes_t hw_ptr; /* RO: hw ptr (0...boundary-1) */
+ __pad_after_uframe __pad2;
+ struct __snd_timespec64 tstamp; /* Timestamp */
+ snd_pcm_state_t suspended_state;/* RO: suspended stream state */
+ __u32 pad3; /* Needed for 64 bit alignment */
+ struct __snd_timespec64 audio_tstamp; /* sample counter or wall clock */
+};
+
+struct __snd_pcm_mmap_control64 {
+ __pad_before_uframe __pad1;
+ snd_pcm_uframes_t appl_ptr; /* RW: appl ptr (0...boundary-1) */
+ __pad_before_uframe __pad2;
+
+ __pad_before_uframe __pad3;
+ snd_pcm_uframes_t avail_min; /* RW: min available frames for wakeup */
+ __pad_after_uframe __pad4;
+};
+
+struct __snd_pcm_sync_ptr64 {
+ __u32 flags;
+ __u32 pad1;
+ union {
+ struct __snd_pcm_mmap_status64 status;
+ unsigned char reserved[64];
+ } s;
+ union {
+ struct __snd_pcm_mmap_control64 control;
unsigned char reserved[64];
} c;
};
@@ -584,6 +678,8 @@ enum {
#define SNDRV_PCM_IOCTL_STATUS _IOR('A', 0x20, struct snd_pcm_status)
#define SNDRV_PCM_IOCTL_DELAY _IOR('A', 0x21, snd_pcm_sframes_t)
#define SNDRV_PCM_IOCTL_HWSYNC _IO('A', 0x22)
+#define __SNDRV_PCM_IOCTL_SYNC_PTR _IOWR('A', 0x23, struct __snd_pcm_sync_ptr)
+#define __SNDRV_PCM_IOCTL_SYNC_PTR64 _IOWR('A', 0x23, struct __snd_pcm_sync_ptr64)
#define SNDRV_PCM_IOCTL_SYNC_PTR _IOWR('A', 0x23, struct snd_pcm_sync_ptr)
#define SNDRV_PCM_IOCTL_STATUS_EXT _IOWR('A', 0x24, struct snd_pcm_status)
#define SNDRV_PCM_IOCTL_CHANNEL_INFO _IOR('A', 0x32, struct snd_pcm_channel_info)
@@ -614,7 +710,7 @@ enum {
* Raw MIDI section - /dev/snd/midi??
*/
-#define SNDRV_RAWMIDI_VERSION SNDRV_PROTOCOL_VERSION(2, 0, 0)
+#define SNDRV_RAWMIDI_VERSION SNDRV_PROTOCOL_VERSION(2, 0, 1)
enum {
SNDRV_RAWMIDI_STREAM_OUTPUT = 0,
@@ -648,13 +744,16 @@ struct snd_rawmidi_params {
unsigned char reserved[16]; /* reserved for future use */
};
+#ifndef __KERNEL__
struct snd_rawmidi_status {
int stream;
+ __time_pad pad1;
struct timespec tstamp; /* Timestamp */
size_t avail; /* available bytes */
size_t xruns; /* count of overruns since last status (in bytes) */
unsigned char reserved[16]; /* reserved for future use */
};
+#endif
#define SNDRV_RAWMIDI_IOCTL_PVERSION _IOR('W', 0x00, int)
#define SNDRV_RAWMIDI_IOCTL_INFO _IOR('W', 0x01, struct snd_rawmidi_info)
@@ -667,7 +766,7 @@ struct snd_rawmidi_status {
* Timer section - /dev/snd/timer
*/
-#define SNDRV_TIMER_VERSION SNDRV_PROTOCOL_VERSION(2, 0, 6)
+#define SNDRV_TIMER_VERSION SNDRV_PROTOCOL_VERSION(2, 0, 7)
enum {
SNDRV_TIMER_CLASS_NONE = -1,
@@ -761,6 +860,7 @@ struct snd_timer_params {
unsigned char reserved[60]; /* reserved */
};
+#ifndef __KERNEL__
struct snd_timer_status {
struct timespec tstamp; /* Timestamp - last update */
unsigned int resolution; /* current period resolution in ns */
@@ -769,10 +869,11 @@ struct snd_timer_status {
unsigned int queue; /* used queue size */
unsigned char reserved[64]; /* reserved */
};
+#endif
#define SNDRV_TIMER_IOCTL_PVERSION _IOR('T', 0x00, int)
#define SNDRV_TIMER_IOCTL_NEXT_DEVICE _IOWR('T', 0x01, struct snd_timer_id)
-#define SNDRV_TIMER_IOCTL_TREAD _IOW('T', 0x02, int)
+#define SNDRV_TIMER_IOCTL_TREAD_OLD _IOW('T', 0x02, int)
#define SNDRV_TIMER_IOCTL_GINFO _IOWR('T', 0x03, struct snd_timer_ginfo)
#define SNDRV_TIMER_IOCTL_GPARAMS _IOW('T', 0x04, struct snd_timer_gparams)
#define SNDRV_TIMER_IOCTL_GSTATUS _IOWR('T', 0x05, struct snd_timer_gstatus)
@@ -785,6 +886,15 @@ struct snd_timer_status {
#define SNDRV_TIMER_IOCTL_STOP _IO('T', 0xa1)
#define SNDRV_TIMER_IOCTL_CONTINUE _IO('T', 0xa2)
#define SNDRV_TIMER_IOCTL_PAUSE _IO('T', 0xa3)
+#define SNDRV_TIMER_IOCTL_TREAD64 _IOW('T', 0xa4, int)
+
+#if __BITS_PER_LONG == 64
+#define SNDRV_TIMER_IOCTL_TREAD SNDRV_TIMER_IOCTL_TREAD_OLD
+#else
+#define SNDRV_TIMER_IOCTL_TREAD ((sizeof(__kernel_long_t) >= sizeof(time_t)) ? \
+ SNDRV_TIMER_IOCTL_TREAD_OLD : \
+ SNDRV_TIMER_IOCTL_TREAD64)
+#endif
struct snd_timer_read {
unsigned int resolution;
@@ -810,11 +920,15 @@ enum {
SNDRV_TIMER_EVENT_MRESUME = SNDRV_TIMER_EVENT_RESUME + 10,
};
+#ifndef __KERNEL__
struct snd_timer_tread {
int event;
+ __time_pad pad1;
struct timespec tstamp;
unsigned int val;
+ __time_pad pad2;
};
+#endif
/****************************************************************************
* *
@@ -822,7 +936,7 @@ struct snd_timer_tread {
* *
****************************************************************************/
-#define SNDRV_CTL_VERSION SNDRV_PROTOCOL_VERSION(2, 0, 7)
+#define SNDRV_CTL_VERSION SNDRV_PROTOCOL_VERSION(2, 0, 8)
struct snd_ctl_card_info {
int card; /* card number */
@@ -860,7 +974,7 @@ typedef int __bitwise snd_ctl_elem_iface_t;
#define SNDRV_CTL_ELEM_ACCESS_WRITE (1<<1)
#define SNDRV_CTL_ELEM_ACCESS_READWRITE (SNDRV_CTL_ELEM_ACCESS_READ|SNDRV_CTL_ELEM_ACCESS_WRITE)
#define SNDRV_CTL_ELEM_ACCESS_VOLATILE (1<<2) /* control value may be changed without a notification */
-#define SNDRV_CTL_ELEM_ACCESS_TIMESTAMP (1<<3) /* when was control changed */
+// (1 << 3) is unused.
#define SNDRV_CTL_ELEM_ACCESS_TLV_READ (1<<4) /* TLV read is possible */
#define SNDRV_CTL_ELEM_ACCESS_TLV_WRITE (1<<5) /* TLV write is possible */
#define SNDRV_CTL_ELEM_ACCESS_TLV_READWRITE (SNDRV_CTL_ELEM_ACCESS_TLV_READ|SNDRV_CTL_ELEM_ACCESS_TLV_WRITE)
@@ -926,11 +1040,7 @@ struct snd_ctl_elem_info {
} enumerated;
unsigned char reserved[128];
} value;
- union {
- unsigned short d[4]; /* dimensions */
- unsigned short *d_ptr; /* indirect - obsoleted */
- } dimen;
- unsigned char reserved[64-4*sizeof(unsigned short)];
+ unsigned char reserved[64];
};
struct snd_ctl_elem_value {
@@ -955,8 +1065,7 @@ struct snd_ctl_elem_value {
} bytes;
struct snd_aes_iec958 iec958;
} value; /* RO */
- struct timespec tstamp;
- unsigned char reserved[128-sizeof(struct timespec)];
+ unsigned char reserved[128];
};
struct snd_ctl_tlv {
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 514b1a5..7469c7d 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -24,6 +24,7 @@
#include <endian.h>
#include <fcntl.h>
#include <errno.h>
+#include <ctype.h>
#include <asm/unistd.h>
#include <linux/err.h>
#include <linux/kernel.h>
@@ -1283,7 +1284,7 @@ static size_t bpf_map_mmap_sz(const struct bpf_map *map)
static char *internal_map_name(struct bpf_object *obj,
enum libbpf_map_type type)
{
- char map_name[BPF_OBJ_NAME_LEN];
+ char map_name[BPF_OBJ_NAME_LEN], *p;
const char *sfx = libbpf_type_to_btf_name[type];
int sfx_len = max((size_t)7, strlen(sfx));
int pfx_len = min((size_t)BPF_OBJ_NAME_LEN - sfx_len - 1,
@@ -1292,6 +1293,11 @@ static char *internal_map_name(struct bpf_object *obj,
snprintf(map_name, sizeof(map_name), "%.*s%.*s", pfx_len, obj->name,
sfx_len, libbpf_type_to_btf_name[type]);
+ /* sanitise map name to characters allowed by kernel */
+ for (p = map_name; *p && p < map_name + sizeof(map_name); p++)
+ if (!isalnum(*p) && *p != '_' && *p != '.')
+ *p = '_';
+
return strdup(map_name);
}
diff --git a/tools/perf/Documentation/perf-config.txt b/tools/perf/Documentation/perf-config.txt
index c4dd23c..8ead555 100644
--- a/tools/perf/Documentation/perf-config.txt
+++ b/tools/perf/Documentation/perf-config.txt
@@ -239,7 +239,6 @@
set buildid.dir to /dev/null. The default is $HOME/.debug
annotate.*::
- These options work only for TUI.
These are in control of addresses, jump function, source code
in lines of assembly code from a specific program.
@@ -269,6 +268,8 @@
│ mov (%rdi),%rdx
│ return n;
+ This option works with tui, stdio2 browsers.
+
annotate.use_offset::
Basing on a first address of a loaded function, offset can be used.
Instead of using original addresses of assembly code,
@@ -287,6 +288,8 @@
368:│ mov 0x8(%r14),%rdi
+ This option works with tui, stdio2 browsers.
+
annotate.jump_arrows::
There can be jump instruction among assembly code.
Depending on a boolean value of jump_arrows,
@@ -306,6 +309,8 @@
│1330: mov %r15,%r10
│1333: cmp %r15,%r14
+ This option works with tui browser.
+
annotate.show_linenr::
When showing source code if this option is 'true',
line numbers are printed as below.
@@ -325,6 +330,8 @@
│ array++;
│ }
+ This option works with tui, stdio2 browsers.
+
annotate.show_nr_jumps::
Let's see a part of assembly code.
@@ -335,6 +342,8 @@
│1 1382: movb $0x1,-0x270(%rbp)
+ This option works with tui, stdio2 browsers.
+
annotate.show_total_period::
To compare two records on an instruction base, with this option
provided, display total number of samples that belong to a line
@@ -348,11 +357,30 @@
99.93 │ mov %eax,%eax
+ This option works with tui, stdio2, stdio browsers.
+
+ annotate.show_nr_samples::
+ By default perf annotate shows percentage of samples. This option
+ can be used to print absolute number of samples. Ex, when set as
+ false:
+
+ Percent│
+ 74.03 │ mov %fs:0x28,%rax
+
+ When set as true:
+
+ Samples│
+ 6 │ mov %fs:0x28,%rax
+
+ This option works with tui, stdio2, stdio browsers.
+
annotate.offset_level::
Default is '1', meaning just jump targets will have offsets show right beside
the instruction. When set to '2' 'call' instructions will also have its offsets
shown, 3 or higher will show offsets for all instructions.
+ This option works with tui, stdio2 browsers.
+
hist.*::
hist.percentage::
This option control the way to calculate overhead of filtered entries -
@@ -490,6 +518,12 @@
column by default.
The default is 'true'.
+ top.call-graph::
+ This is identical to 'call-graph.record-mode', except it is
+ applicable only for 'top' subcommand. This option ONLY setup
+ the unwind method. To enable 'perf top' to actually use it,
+ the command line option -g must be specified.
+
man.*::
man.viewer::
This option can assign a tool to view manual pages when 'help'
@@ -517,6 +551,16 @@
But if this option is 'no-cache', it will not update the build-id cache.
'skip' skips post-processing and does not update the cache.
+ record.call-graph::
+ This is identical to 'call-graph.record-mode', except it is
+ applicable only for 'record' subcommand. This option ONLY setup
+ the unwind method. To enable 'perf record' to actually use it,
+ the command line option -g must be specified.
+
+ record.aio::
+ Use 'n' control blocks in asynchronous (Posix AIO) trace writing
+ mode ('n' default: 1, max: 4).
+
diff.*::
diff.order::
This option sets the number of columns to sort the result.
@@ -566,6 +610,11 @@
"libbeauty", the default, to use the same argument beautifiers used in the
strace-like sys_enter+sys_exit lines.
+ftrace.*::
+ ftrace.tracer::
+ Can be used to select the default tracer. Possible values are
+ 'function' and 'function_graph'.
+
llvm.*::
llvm.clang-path::
Path to clang. If omit, search it from $PATH.
@@ -610,6 +659,29 @@
The script gets the same options passed as a full perf script,
in particular -i perfdata file, --cpu, --tid
+convert.*::
+
+ convert.queue-size::
+ Limit the size of ordered_events queue, so we could control
+ allocation size of perf data files without proper finished
+ round events.
+
+intel-pt.*::
+
+ intel-pt.cache-divisor::
+
+ intel-pt.mispred-all::
+ If set, Intel PT decoder will set the mispred flag on all
+ branches.
+
+auxtrace.*::
+
+ auxtrace.dumpdir::
+ s390 only. The directory to save the auxiliary trace buffer
+ can be changed using this option. Ex, auxtrace.dumpdir=/tmp.
+ If the directory does not exist or has the wrong file type,
+ the current directory is used.
+
SEE ALSO
--------
linkperf:perf[1]
diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index 7902a56..b8fc7d9 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -35,7 +35,7 @@
# Only pass canonical directory names as the output directory:
#
ifneq ($(O),)
- FULL_O := $(shell readlink -f $(O) || echo $(O))
+ FULL_O := $(shell cd $(PWD); readlink -f $(O) || echo $(O))
endif
#
diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c
index 2898cfd..941f814 100644
--- a/tools/perf/arch/arm/util/cs-etm.c
+++ b/tools/perf/arch/arm/util/cs-etm.c
@@ -858,21 +858,6 @@ static void cs_etm_recording_free(struct auxtrace_record *itr)
free(ptr);
}
-static int cs_etm_read_finish(struct auxtrace_record *itr, int idx)
-{
- struct cs_etm_recording *ptr =
- container_of(itr, struct cs_etm_recording, itr);
- struct evsel *evsel;
-
- evlist__for_each_entry(ptr->evlist, evsel) {
- if (evsel->core.attr.type == ptr->cs_etm_pmu->type)
- return perf_evlist__enable_event_idx(ptr->evlist,
- evsel, idx);
- }
-
- return -EINVAL;
-}
-
struct auxtrace_record *cs_etm_record_init(int *err)
{
struct perf_pmu *cs_etm_pmu;
@@ -892,6 +877,7 @@ struct auxtrace_record *cs_etm_record_init(int *err)
}
ptr->cs_etm_pmu = cs_etm_pmu;
+ ptr->itr.pmu = cs_etm_pmu;
ptr->itr.parse_snapshot_options = cs_etm_parse_snapshot_options;
ptr->itr.recording_options = cs_etm_recording_options;
ptr->itr.info_priv_size = cs_etm_info_priv_size;
@@ -901,7 +887,7 @@ struct auxtrace_record *cs_etm_record_init(int *err)
ptr->itr.snapshot_finish = cs_etm_snapshot_finish;
ptr->itr.reference = cs_etm_reference;
ptr->itr.free = cs_etm_recording_free;
- ptr->itr.read_finish = cs_etm_read_finish;
+ ptr->itr.read_finish = auxtrace_record__read_finish;
*err = 0;
return &ptr->itr;
diff --git a/tools/perf/arch/arm64/util/arm-spe.c b/tools/perf/arch/arm64/util/arm-spe.c
index eba6541..27653be 100644
--- a/tools/perf/arch/arm64/util/arm-spe.c
+++ b/tools/perf/arch/arm64/util/arm-spe.c
@@ -11,17 +11,17 @@
#include <linux/zalloc.h>
#include <time.h>
-#include "../../util/cpumap.h"
-#include "../../util/event.h"
-#include "../../util/evsel.h"
-#include "../../util/evlist.h"
-#include "../../util/session.h"
+#include "../../../util/cpumap.h"
+#include "../../../util/event.h"
+#include "../../../util/evsel.h"
+#include "../../../util/evlist.h"
+#include "../../../util/session.h"
#include <internal/lib.h> // page_size
-#include "../../util/pmu.h"
-#include "../../util/debug.h"
-#include "../../util/auxtrace.h"
-#include "../../util/record.h"
-#include "../../util/arm-spe.h"
+#include "../../../util/pmu.h"
+#include "../../../util/debug.h"
+#include "../../../util/auxtrace.h"
+#include "../../../util/record.h"
+#include "../../../util/arm-spe.h"
#define KiB(x) ((x) * 1024)
#define MiB(x) ((x) * 1024 * 1024)
@@ -158,20 +158,6 @@ static void arm_spe_recording_free(struct auxtrace_record *itr)
free(sper);
}
-static int arm_spe_read_finish(struct auxtrace_record *itr, int idx)
-{
- struct arm_spe_recording *sper =
- container_of(itr, struct arm_spe_recording, itr);
- struct evsel *evsel;
-
- evlist__for_each_entry(sper->evlist, evsel) {
- if (evsel->core.attr.type == sper->arm_spe_pmu->type)
- return perf_evlist__enable_event_idx(sper->evlist,
- evsel, idx);
- }
- return -EINVAL;
-}
-
struct auxtrace_record *arm_spe_recording_init(int *err,
struct perf_pmu *arm_spe_pmu)
{
@@ -189,12 +175,13 @@ struct auxtrace_record *arm_spe_recording_init(int *err,
}
sper->arm_spe_pmu = arm_spe_pmu;
+ sper->itr.pmu = arm_spe_pmu;
sper->itr.recording_options = arm_spe_recording_options;
sper->itr.info_priv_size = arm_spe_info_priv_size;
sper->itr.info_fill = arm_spe_info_fill;
sper->itr.free = arm_spe_recording_free;
sper->itr.reference = arm_spe_reference;
- sper->itr.read_finish = arm_spe_read_finish;
+ sper->itr.read_finish = auxtrace_record__read_finish;
sper->itr.alignment = 0;
*err = 0;
diff --git a/tools/perf/arch/arm64/util/header.c b/tools/perf/arch/arm64/util/header.c
index a32e4b7..d730666 100644
--- a/tools/perf/arch/arm64/util/header.c
+++ b/tools/perf/arch/arm64/util/header.c
@@ -1,8 +1,10 @@
#include <stdio.h>
#include <stdlib.h>
#include <perf/cpumap.h>
+#include <util/cpumap.h>
#include <internal/cpumap.h>
#include <api/fs/fs.h>
+#include <errno.h>
#include "debug.h"
#include "header.h"
@@ -12,26 +14,21 @@
#define MIDR_VARIANT_SHIFT 20
#define MIDR_VARIANT_MASK (0xf << MIDR_VARIANT_SHIFT)
-char *get_cpuid_str(struct perf_pmu *pmu)
+static int _get_cpuid(char *buf, size_t sz, struct perf_cpu_map *cpus)
{
- char *buf = NULL;
- char path[PATH_MAX];
const char *sysfs = sysfs__mountpoint();
- int cpu;
u64 midr = 0;
- struct perf_cpu_map *cpus;
- FILE *file;
+ int cpu;
- if (!sysfs || !pmu || !pmu->cpus)
- return NULL;
+ if (!sysfs || sz < MIDR_SIZE)
+ return EINVAL;
- buf = malloc(MIDR_SIZE);
- if (!buf)
- return NULL;
+ cpus = perf_cpu_map__get(cpus);
- /* read midr from list of cpus mapped to this pmu */
- cpus = perf_cpu_map__get(pmu->cpus);
for (cpu = 0; cpu < perf_cpu_map__nr(cpus); cpu++) {
+ char path[PATH_MAX];
+ FILE *file;
+
scnprintf(path, PATH_MAX, "%s/devices/system/cpu/cpu%d"MIDR,
sysfs, cpus->map[cpu]);
@@ -57,12 +54,48 @@ char *get_cpuid_str(struct perf_pmu *pmu)
break;
}
- if (!midr) {
+ perf_cpu_map__put(cpus);
+
+ if (!midr)
+ return EINVAL;
+
+ return 0;
+}
+
+int get_cpuid(char *buf, size_t sz)
+{
+ struct perf_cpu_map *cpus = perf_cpu_map__new(NULL);
+ int ret;
+
+ if (!cpus)
+ return EINVAL;
+
+ ret = _get_cpuid(buf, sz, cpus);
+
+ perf_cpu_map__put(cpus);
+
+ return ret;
+}
+
+char *get_cpuid_str(struct perf_pmu *pmu)
+{
+ char *buf = NULL;
+ int res;
+
+ if (!pmu || !pmu->cpus)
+ return NULL;
+
+ buf = malloc(MIDR_SIZE);
+ if (!buf)
+ return NULL;
+
+ /* read midr from list of cpus mapped to this pmu */
+ res = _get_cpuid(buf, MIDR_SIZE, pmu->cpus);
+ if (res) {
pr_err("failed to get cpuid string for PMU %s\n", pmu->name);
free(buf);
buf = NULL;
}
- perf_cpu_map__put(cpus);
return buf;
}
diff --git a/tools/perf/arch/arm64/util/perf_regs.c b/tools/perf/arch/arm64/util/perf_regs.c
index 2864e2e..2833e10 100644
--- a/tools/perf/arch/arm64/util/perf_regs.c
+++ b/tools/perf/arch/arm64/util/perf_regs.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-#include "../../util/perf_regs.h"
+#include "../../../util/perf_regs.h"
const struct sample_reg sample_reg_masks[] = {
SMPL_REG_END
diff --git a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
index 43f736e..35b61bf 100644
--- a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
@@ -517,3 +517,5 @@
433 common fspick sys_fspick
434 common pidfd_open sys_pidfd_open
435 nospu clone3 ppc_clone3
+437 common openat2 sys_openat2
+438 common pidfd_getfd sys_pidfd_getfd
diff --git a/tools/perf/arch/powerpc/util/perf_regs.c b/tools/perf/arch/powerpc/util/perf_regs.c
index e9c436e..0a52429 100644
--- a/tools/perf/arch/powerpc/util/perf_regs.c
+++ b/tools/perf/arch/powerpc/util/perf_regs.c
@@ -4,8 +4,8 @@
#include <regex.h>
#include <linux/zalloc.h>
-#include "../../util/perf_regs.h"
-#include "../../util/debug.h"
+#include "../../../util/perf_regs.h"
+#include "../../../util/debug.h"
#include <linux/kernel.h>
diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
index c29976e..44d510b 100644
--- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
@@ -357,6 +357,8 @@
433 common fspick __x64_sys_fspick
434 common pidfd_open __x64_sys_pidfd_open
435 common clone3 __x64_sys_clone3/ptregs
+437 common openat2 __x64_sys_openat2
+438 common pidfd_getfd __x64_sys_pidfd_getfd
#
# x32-specific system call numbers start at 512 to avoid cache impact
diff --git a/tools/perf/arch/x86/util/auxtrace.c b/tools/perf/arch/x86/util/auxtrace.c
index 7abc9fd..3da506e 100644
--- a/tools/perf/arch/x86/util/auxtrace.c
+++ b/tools/perf/arch/x86/util/auxtrace.c
@@ -7,13 +7,13 @@
#include <errno.h>
#include <stdbool.h>
-#include "../../util/header.h"
-#include "../../util/debug.h"
-#include "../../util/pmu.h"
-#include "../../util/auxtrace.h"
-#include "../../util/intel-pt.h"
-#include "../../util/intel-bts.h"
-#include "../../util/evlist.h"
+#include "../../../util/header.h"
+#include "../../../util/debug.h"
+#include "../../../util/pmu.h"
+#include "../../../util/auxtrace.h"
+#include "../../../util/intel-pt.h"
+#include "../../../util/intel-bts.h"
+#include "../../../util/evlist.h"
static
struct auxtrace_record *auxtrace_record__init_intel(struct evlist *evlist,
diff --git a/tools/perf/arch/x86/util/event.c b/tools/perf/arch/x86/util/event.c
index ac45015..047dc00 100644
--- a/tools/perf/arch/x86/util/event.c
+++ b/tools/perf/arch/x86/util/event.c
@@ -3,12 +3,12 @@
#include <linux/string.h>
#include <linux/zalloc.h>
-#include "../../util/event.h"
-#include "../../util/synthetic-events.h"
-#include "../../util/machine.h"
-#include "../../util/tool.h"
-#include "../../util/map.h"
-#include "../../util/debug.h"
+#include "../../../util/event.h"
+#include "../../../util/synthetic-events.h"
+#include "../../../util/machine.h"
+#include "../../../util/tool.h"
+#include "../../../util/map.h"
+#include "../../../util/debug.h"
#if defined(__x86_64__)
diff --git a/tools/perf/arch/x86/util/header.c b/tools/perf/arch/x86/util/header.c
index aa6deb4..578c8c5 100644
--- a/tools/perf/arch/x86/util/header.c
+++ b/tools/perf/arch/x86/util/header.c
@@ -7,8 +7,8 @@
#include <string.h>
#include <regex.h>
-#include "../../util/debug.h"
-#include "../../util/header.h"
+#include "../../../util/debug.h"
+#include "../../../util/header.h"
static inline void
cpuid(unsigned int op, unsigned int *a, unsigned int *b, unsigned int *c,
diff --git a/tools/perf/arch/x86/util/intel-bts.c b/tools/perf/arch/x86/util/intel-bts.c
index 27d9e21..09f9380 100644
--- a/tools/perf/arch/x86/util/intel-bts.c
+++ b/tools/perf/arch/x86/util/intel-bts.c
@@ -11,18 +11,18 @@
#include <linux/log2.h>
#include <linux/zalloc.h>
-#include "../../util/cpumap.h"
-#include "../../util/event.h"
-#include "../../util/evsel.h"
-#include "../../util/evlist.h"
-#include "../../util/mmap.h"
-#include "../../util/session.h"
-#include "../../util/pmu.h"
-#include "../../util/debug.h"
-#include "../../util/record.h"
-#include "../../util/tsc.h"
-#include "../../util/auxtrace.h"
-#include "../../util/intel-bts.h"
+#include "../../../util/cpumap.h"
+#include "../../../util/event.h"
+#include "../../../util/evsel.h"
+#include "../../../util/evlist.h"
+#include "../../../util/mmap.h"
+#include "../../../util/session.h"
+#include "../../../util/pmu.h"
+#include "../../../util/debug.h"
+#include "../../../util/record.h"
+#include "../../../util/tsc.h"
+#include "../../../util/auxtrace.h"
+#include "../../../util/intel-bts.h"
#include <internal/lib.h> // page_size
#define KiB(x) ((x) * 1024)
@@ -413,20 +413,6 @@ static int intel_bts_find_snapshot(struct auxtrace_record *itr, int idx,
return err;
}
-static int intel_bts_read_finish(struct auxtrace_record *itr, int idx)
-{
- struct intel_bts_recording *btsr =
- container_of(itr, struct intel_bts_recording, itr);
- struct evsel *evsel;
-
- evlist__for_each_entry(btsr->evlist, evsel) {
- if (evsel->core.attr.type == btsr->intel_bts_pmu->type)
- return perf_evlist__enable_event_idx(btsr->evlist,
- evsel, idx);
- }
- return -EINVAL;
-}
-
struct auxtrace_record *intel_bts_recording_init(int *err)
{
struct perf_pmu *intel_bts_pmu = perf_pmu__find(INTEL_BTS_PMU_NAME);
@@ -447,6 +433,7 @@ struct auxtrace_record *intel_bts_recording_init(int *err)
}
btsr->intel_bts_pmu = intel_bts_pmu;
+ btsr->itr.pmu = intel_bts_pmu;
btsr->itr.recording_options = intel_bts_recording_options;
btsr->itr.info_priv_size = intel_bts_info_priv_size;
btsr->itr.info_fill = intel_bts_info_fill;
@@ -456,7 +443,7 @@ struct auxtrace_record *intel_bts_recording_init(int *err)
btsr->itr.find_snapshot = intel_bts_find_snapshot;
btsr->itr.parse_snapshot_options = intel_bts_parse_snapshot_options;
btsr->itr.reference = intel_bts_reference;
- btsr->itr.read_finish = intel_bts_read_finish;
+ btsr->itr.read_finish = auxtrace_record__read_finish;
btsr->itr.alignment = sizeof(struct branch);
return &btsr->itr;
}
diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c
index 20df442..1643aed 100644
--- a/tools/perf/arch/x86/util/intel-pt.c
+++ b/tools/perf/arch/x86/util/intel-pt.c
@@ -13,23 +13,23 @@
#include <linux/zalloc.h>
#include <cpuid.h>
-#include "../../util/session.h"
-#include "../../util/event.h"
-#include "../../util/evlist.h"
-#include "../../util/evsel.h"
-#include "../../util/evsel_config.h"
-#include "../../util/cpumap.h"
-#include "../../util/mmap.h"
+#include "../../../util/session.h"
+#include "../../../util/event.h"
+#include "../../../util/evlist.h"
+#include "../../../util/evsel.h"
+#include "../../../util/evsel_config.h"
+#include "../../../util/cpumap.h"
+#include "../../../util/mmap.h"
#include <subcmd/parse-options.h>
-#include "../../util/parse-events.h"
-#include "../../util/pmu.h"
-#include "../../util/debug.h"
-#include "../../util/auxtrace.h"
-#include "../../util/record.h"
-#include "../../util/target.h"
-#include "../../util/tsc.h"
+#include "../../../util/parse-events.h"
+#include "../../../util/pmu.h"
+#include "../../../util/debug.h"
+#include "../../../util/auxtrace.h"
+#include "../../../util/record.h"
+#include "../../../util/target.h"
+#include "../../../util/tsc.h"
#include <internal/lib.h> // page_size
-#include "../../util/intel-pt.h"
+#include "../../../util/intel-pt.h"
#define KiB(x) ((x) * 1024)
#define MiB(x) ((x) * 1024 * 1024)
@@ -1166,20 +1166,6 @@ static u64 intel_pt_reference(struct auxtrace_record *itr __maybe_unused)
return rdtsc();
}
-static int intel_pt_read_finish(struct auxtrace_record *itr, int idx)
-{
- struct intel_pt_recording *ptr =
- container_of(itr, struct intel_pt_recording, itr);
- struct evsel *evsel;
-
- evlist__for_each_entry(ptr->evlist, evsel) {
- if (evsel->core.attr.type == ptr->intel_pt_pmu->type)
- return perf_evlist__enable_event_idx(ptr->evlist, evsel,
- idx);
- }
- return -EINVAL;
-}
-
struct auxtrace_record *intel_pt_recording_init(int *err)
{
struct perf_pmu *intel_pt_pmu = perf_pmu__find(INTEL_PT_PMU_NAME);
@@ -1200,6 +1186,7 @@ struct auxtrace_record *intel_pt_recording_init(int *err)
}
ptr->intel_pt_pmu = intel_pt_pmu;
+ ptr->itr.pmu = intel_pt_pmu;
ptr->itr.recording_options = intel_pt_recording_options;
ptr->itr.info_priv_size = intel_pt_info_priv_size;
ptr->itr.info_fill = intel_pt_info_fill;
@@ -1209,7 +1196,7 @@ struct auxtrace_record *intel_pt_recording_init(int *err)
ptr->itr.find_snapshot = intel_pt_find_snapshot;
ptr->itr.parse_snapshot_options = intel_pt_parse_snapshot_options;
ptr->itr.reference = intel_pt_reference;
- ptr->itr.read_finish = intel_pt_read_finish;
+ ptr->itr.read_finish = auxtrace_record__read_finish;
/*
* Decoding starts at a PSB packet. Minimum PSB period is 2K so 4K
* should give at least 1 PSB per sample.
diff --git a/tools/perf/arch/x86/util/machine.c b/tools/perf/arch/x86/util/machine.c
index e17e080..31679c3 100644
--- a/tools/perf/arch/x86/util/machine.c
+++ b/tools/perf/arch/x86/util/machine.c
@@ -5,9 +5,9 @@
#include <stdlib.h>
#include <internal/lib.h> // page_size
-#include "../../util/machine.h"
-#include "../../util/map.h"
-#include "../../util/symbol.h"
+#include "../../../util/machine.h"
+#include "../../../util/map.h"
+#include "../../../util/symbol.h"
#include <linux/ctype.h>
#include <symbol/kallsyms.h>
diff --git a/tools/perf/arch/x86/util/perf_regs.c b/tools/perf/arch/x86/util/perf_regs.c
index c218b83..fca81b3 100644
--- a/tools/perf/arch/x86/util/perf_regs.c
+++ b/tools/perf/arch/x86/util/perf_regs.c
@@ -5,10 +5,10 @@
#include <linux/kernel.h>
#include <linux/zalloc.h>
-#include "../../perf-sys.h"
-#include "../../util/perf_regs.h"
-#include "../../util/debug.h"
-#include "../../util/event.h"
+#include "../../../perf-sys.h"
+#include "../../../util/perf_regs.h"
+#include "../../../util/debug.h"
+#include "../../../util/event.h"
const struct sample_reg sample_reg_masks[] = {
SMPL_REG(AX, PERF_REG_X86_AX),
diff --git a/tools/perf/arch/x86/util/pmu.c b/tools/perf/arch/x86/util/pmu.c
index e33ef5b..d48d608 100644
--- a/tools/perf/arch/x86/util/pmu.c
+++ b/tools/perf/arch/x86/util/pmu.c
@@ -4,9 +4,9 @@
#include <linux/stddef.h>
#include <linux/perf_event.h>
-#include "../../util/intel-pt.h"
-#include "../../util/intel-bts.h"
-#include "../../util/pmu.h"
+#include "../../../util/intel-pt.h"
+#include "../../../util/intel-bts.h"
+#include "../../../util/pmu.h"
struct perf_event_attr *perf_pmu__get_default_config(struct perf_pmu *pmu __maybe_unused)
{
diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h
index fddb3ce..4aa6de1 100644
--- a/tools/perf/bench/bench.h
+++ b/tools/perf/bench/bench.h
@@ -2,6 +2,10 @@
#ifndef BENCH_H
#define BENCH_H
+#include <sys/time.h>
+
+extern struct timeval bench__start, bench__end, bench__runtime;
+
/*
* The madvise transparent hugepage constants were added in glibc
* 2.13. For compatibility with older versions of glibc, define these
diff --git a/tools/perf/bench/epoll-ctl.c b/tools/perf/bench/epoll-ctl.c
index bb617e5..cadc18d 100644
--- a/tools/perf/bench/epoll-ctl.c
+++ b/tools/perf/bench/epoll-ctl.c
@@ -35,7 +35,6 @@
static unsigned int nthreads = 0;
static unsigned int nsecs = 8;
-struct timeval start, end, runtime;
static bool done, __verbose, randomize;
/*
@@ -94,8 +93,8 @@ static void toggle_done(int sig __maybe_unused,
{
/* inform all threads that we're done for the day */
done = true;
- gettimeofday(&end, NULL);
- timersub(&end, &start, &runtime);
+ gettimeofday(&bench__end, NULL);
+ timersub(&bench__end, &bench__start, &bench__runtime);
}
static void nest_epollfd(void)
@@ -313,6 +312,7 @@ int bench_epoll_ctl(int argc, const char **argv)
exit(EXIT_FAILURE);
}
+ memset(&act, 0, sizeof(act));
sigfillset(&act.sa_mask);
act.sa_sigaction = toggle_done;
sigaction(SIGINT, &act, NULL);
@@ -361,7 +361,7 @@ int bench_epoll_ctl(int argc, const char **argv)
threads_starting = nthreads;
- gettimeofday(&start, NULL);
+ gettimeofday(&bench__start, NULL);
do_threads(worker, cpu);
diff --git a/tools/perf/bench/epoll-wait.c b/tools/perf/bench/epoll-wait.c
index 7af6944..f938c58 100644
--- a/tools/perf/bench/epoll-wait.c
+++ b/tools/perf/bench/epoll-wait.c
@@ -90,7 +90,6 @@
static unsigned int nthreads = 0;
static unsigned int nsecs = 8;
-struct timeval start, end, runtime;
static bool wdone, done, __verbose, randomize, nonblocking;
/*
@@ -276,8 +275,8 @@ static void toggle_done(int sig __maybe_unused,
{
/* inform all threads that we're done for the day */
done = true;
- gettimeofday(&end, NULL);
- timersub(&end, &start, &runtime);
+ gettimeofday(&bench__end, NULL);
+ timersub(&bench__end, &bench__start, &bench__runtime);
}
static void print_summary(void)
@@ -287,7 +286,7 @@ static void print_summary(void)
printf("\nAveraged %ld operations/sec (+- %.2f%%), total secs = %d\n",
avg, rel_stddev_stats(stddev, avg),
- (int) runtime.tv_sec);
+ (int)bench__runtime.tv_sec);
}
static int do_threads(struct worker *worker, struct perf_cpu_map *cpu)
@@ -427,6 +426,7 @@ int bench_epoll_wait(int argc, const char **argv)
exit(EXIT_FAILURE);
}
+ memset(&act, 0, sizeof(act));
sigfillset(&act.sa_mask);
act.sa_sigaction = toggle_done;
sigaction(SIGINT, &act, NULL);
@@ -479,7 +479,7 @@ int bench_epoll_wait(int argc, const char **argv)
threads_starting = nthreads;
- gettimeofday(&start, NULL);
+ gettimeofday(&bench__start, NULL);
do_threads(worker, cpu);
@@ -519,7 +519,7 @@ int bench_epoll_wait(int argc, const char **argv)
qsort(worker, nthreads, sizeof(struct worker), cmpworker);
for (i = 0; i < nthreads; i++) {
- unsigned long t = worker[i].ops/runtime.tv_sec;
+ unsigned long t = worker[i].ops / bench__runtime.tv_sec;
update_stats(&throughput_stats, t);
diff --git a/tools/perf/bench/futex-hash.c b/tools/perf/bench/futex-hash.c
index 8ba0c33..65eebe0 100644
--- a/tools/perf/bench/futex-hash.c
+++ b/tools/perf/bench/futex-hash.c
@@ -37,7 +37,7 @@ static unsigned int nfutexes = 1024;
static bool fshared = false, done = false, silent = false;
static int futex_flag = 0;
-struct timeval start, end, runtime;
+struct timeval bench__start, bench__end, bench__runtime;
static pthread_mutex_t thread_lock;
static unsigned int threads_starting;
static struct stats throughput_stats;
@@ -103,8 +103,8 @@ static void toggle_done(int sig __maybe_unused,
{
/* inform all threads that we're done for the day */
done = true;
- gettimeofday(&end, NULL);
- timersub(&end, &start, &runtime);
+ gettimeofday(&bench__end, NULL);
+ timersub(&bench__end, &bench__start, &bench__runtime);
}
static void print_summary(void)
@@ -114,7 +114,7 @@ static void print_summary(void)
printf("%sAveraged %ld operations/sec (+- %.2f%%), total secs = %d\n",
!silent ? "\n" : "", avg, rel_stddev_stats(stddev, avg),
- (int) runtime.tv_sec);
+ (int)bench__runtime.tv_sec);
}
int bench_futex_hash(int argc, const char **argv)
@@ -137,6 +137,7 @@ int bench_futex_hash(int argc, const char **argv)
if (!cpu)
goto errmem;
+ memset(&act, 0, sizeof(act));
sigfillset(&act.sa_mask);
act.sa_sigaction = toggle_done;
sigaction(SIGINT, &act, NULL);
@@ -161,7 +162,7 @@ int bench_futex_hash(int argc, const char **argv)
threads_starting = nthreads;
pthread_attr_init(&thread_attr);
- gettimeofday(&start, NULL);
+ gettimeofday(&bench__start, NULL);
for (i = 0; i < nthreads; i++) {
worker[i].tid = i;
worker[i].futex = calloc(nfutexes, sizeof(*worker[i].futex));
@@ -204,7 +205,7 @@ int bench_futex_hash(int argc, const char **argv)
pthread_mutex_destroy(&thread_lock);
for (i = 0; i < nthreads; i++) {
- unsigned long t = worker[i].ops/runtime.tv_sec;
+ unsigned long t = worker[i].ops / bench__runtime.tv_sec;
update_stats(&throughput_stats, t);
if (!silent) {
if (nfutexes == 1)
diff --git a/tools/perf/bench/futex-lock-pi.c b/tools/perf/bench/futex-lock-pi.c
index d0cae81..89fd8f3 100644
--- a/tools/perf/bench/futex-lock-pi.c
+++ b/tools/perf/bench/futex-lock-pi.c
@@ -37,7 +37,6 @@ static bool silent = false, multi = false;
static bool done = false, fshared = false;
static unsigned int nthreads = 0;
static int futex_flag = 0;
-struct timeval start, end, runtime;
static pthread_mutex_t thread_lock;
static unsigned int threads_starting;
static struct stats throughput_stats;
@@ -64,7 +63,7 @@ static void print_summary(void)
printf("%sAveraged %ld operations/sec (+- %.2f%%), total secs = %d\n",
!silent ? "\n" : "", avg, rel_stddev_stats(stddev, avg),
- (int) runtime.tv_sec);
+ (int)bench__runtime.tv_sec);
}
static void toggle_done(int sig __maybe_unused,
@@ -73,8 +72,8 @@ static void toggle_done(int sig __maybe_unused,
{
/* inform all threads that we're done for the day */
done = true;
- gettimeofday(&end, NULL);
- timersub(&end, &start, &runtime);
+ gettimeofday(&bench__end, NULL);
+ timersub(&bench__end, &bench__start, &bench__runtime);
}
static void *workerfn(void *arg)
@@ -161,6 +160,7 @@ int bench_futex_lock_pi(int argc, const char **argv)
if (!cpu)
err(EXIT_FAILURE, "calloc");
+ memset(&act, 0, sizeof(act));
sigfillset(&act.sa_mask);
act.sa_sigaction = toggle_done;
sigaction(SIGINT, &act, NULL);
@@ -185,7 +185,7 @@ int bench_futex_lock_pi(int argc, const char **argv)
threads_starting = nthreads;
pthread_attr_init(&thread_attr);
- gettimeofday(&start, NULL);
+ gettimeofday(&bench__start, NULL);
create_threads(worker, thread_attr, cpu);
pthread_attr_destroy(&thread_attr);
@@ -211,7 +211,7 @@ int bench_futex_lock_pi(int argc, const char **argv)
pthread_mutex_destroy(&thread_lock);
for (i = 0; i < nthreads; i++) {
- unsigned long t = worker[i].ops/runtime.tv_sec;
+ unsigned long t = worker[i].ops / bench__runtime.tv_sec;
update_stats(&throughput_stats, t);
if (!silent)
diff --git a/tools/perf/bench/futex-requeue.c b/tools/perf/bench/futex-requeue.c
index a00a689..7a15c2e 100644
--- a/tools/perf/bench/futex-requeue.c
+++ b/tools/perf/bench/futex-requeue.c
@@ -128,6 +128,7 @@ int bench_futex_requeue(int argc, const char **argv)
if (!cpu)
err(EXIT_FAILURE, "cpu_map__new");
+ memset(&act, 0, sizeof(act));
sigfillset(&act.sa_mask);
act.sa_sigaction = toggle_done;
sigaction(SIGINT, &act, NULL);
diff --git a/tools/perf/bench/futex-wake-parallel.c b/tools/perf/bench/futex-wake-parallel.c
index a053cf2..cd2b81a 100644
--- a/tools/perf/bench/futex-wake-parallel.c
+++ b/tools/perf/bench/futex-wake-parallel.c
@@ -234,6 +234,7 @@ int bench_futex_wake_parallel(int argc, const char **argv)
exit(EXIT_FAILURE);
}
+ memset(&act, 0, sizeof(act));
sigfillset(&act.sa_mask);
act.sa_sigaction = toggle_done;
sigaction(SIGINT, &act, NULL);
diff --git a/tools/perf/bench/futex-wake.c b/tools/perf/bench/futex-wake.c
index df81009..2dfcef3 100644
--- a/tools/perf/bench/futex-wake.c
+++ b/tools/perf/bench/futex-wake.c
@@ -43,7 +43,7 @@ static bool done = false, silent = false, fshared = false;
static pthread_mutex_t thread_lock;
static pthread_cond_t thread_parent, thread_worker;
static struct stats waketime_stats, wakeup_stats;
-static unsigned int ncpus, threads_starting, nthreads = 0;
+static unsigned int threads_starting, nthreads = 0;
static int futex_flag = 0;
static const struct option options[] = {
@@ -136,12 +136,13 @@ int bench_futex_wake(int argc, const char **argv)
if (!cpu)
err(EXIT_FAILURE, "calloc");
+ memset(&act, 0, sizeof(act));
sigfillset(&act.sa_mask);
act.sa_sigaction = toggle_done;
sigaction(SIGINT, &act, NULL);
if (!nthreads)
- nthreads = ncpus;
+ nthreads = cpu->nr;
worker = calloc(nthreads, sizeof(*worker));
if (!worker)
diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index ff61795..6c0a041 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -566,6 +566,8 @@ int cmd_annotate(int argc, const char **argv)
if (ret < 0)
return ret;
+ annotation_config__init(&annotate.opts);
+
argc = parse_options(argc, argv, options, annotate_usage, 0);
if (argc) {
/*
@@ -605,8 +607,6 @@ int cmd_annotate(int argc, const char **argv)
if (ret < 0)
goto out_delete;
- annotation_config__init();
-
symbol_conf.try_vmlinux_path = true;
ret = symbol__init(&annotate.session->header.env);
diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
index f8b6ae5..c03c36f 100644
--- a/tools/perf/builtin-diff.c
+++ b/tools/perf/builtin-diff.c
@@ -1312,7 +1312,8 @@ static int cycles_printf(struct hist_entry *he, struct hist_entry *pair,
end_line = map__srcline(he->ms.map, bi->sym->start + bi->end,
he->ms.sym);
- if ((start_line != SRCLINE_UNKNOWN) && (end_line != SRCLINE_UNKNOWN)) {
+ if ((strncmp(start_line, SRCLINE_UNKNOWN, strlen(SRCLINE_UNKNOWN)) != 0) &&
+ (strncmp(end_line, SRCLINE_UNKNOWN, strlen(SRCLINE_UNKNOWN)) != 0)) {
scnprintf(buf, sizeof(buf), "[%s -> %s] %4ld",
start_line, end_line, block_he->diff.cycles);
} else {
diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c
index 26bc592..70548df 100644
--- a/tools/perf/builtin-probe.c
+++ b/tools/perf/builtin-probe.c
@@ -449,7 +449,8 @@ static int perf_del_probe_events(struct strfilter *filter)
ret = probe_file__del_strlist(kfd, klist);
if (ret < 0)
goto error;
- }
+ } else if (ret == -ENOMEM)
+ goto error;
ret2 = probe_file__get_events(ufd, filter, ulist);
if (ret2 == 0) {
@@ -459,7 +460,8 @@ static int perf_del_probe_events(struct strfilter *filter)
ret2 = probe_file__del_strlist(ufd, ulist);
if (ret2 < 0)
goto error;
- }
+ } else if (ret2 == -ENOMEM)
+ goto error;
if (ret == -ENOENT && ret2 == -ENOENT)
pr_warning("\"%s\" does not hit any event.\n", str);
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 9483b3f..72a12b6 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -1507,7 +1507,7 @@ int cmd_report(int argc, const char **argv)
symbol_conf.priv_size += sizeof(u32);
symbol_conf.sort_by_name = true;
}
- annotation_config__init();
+ annotation_config__init(&report.annotation_opts);
}
if (symbol__init(&session->header.env) < 0)
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 8affcab..d2539b7 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -143,7 +143,7 @@ static int perf_top__parse_source(struct perf_top *top, struct hist_entry *he)
return err;
}
- err = symbol__annotate(&he->ms, evsel, 0, &top->annotation_opts, NULL);
+ err = symbol__annotate(&he->ms, evsel, &top->annotation_opts, NULL);
if (err == 0) {
top->sym_filter_entry = he;
} else {
@@ -684,7 +684,9 @@ static void *display_thread(void *arg)
delay_msecs = top->delay_secs * MSEC_PER_SEC;
set_term_quiet_input(&save);
/* trash return*/
- getc(stdin);
+ clearerr(stdin);
+ if (poll(&stdin_poll, 1, 0) > 0)
+ getc(stdin);
while (!done) {
perf_top__print_sym_table(top);
@@ -1683,7 +1685,7 @@ int cmd_top(int argc, const char **argv)
if (status < 0)
goto out_delete_evlist;
- annotation_config__init();
+ annotation_config__init(&top.annotation_opts);
symbol_conf.try_vmlinux_path = (symbol_conf.vmlinux_name == NULL);
status = symbol__init(NULL);
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 46a72ec..01d5420 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -1065,7 +1065,9 @@ static struct syscall_fmt syscall_fmts[] = {
{ .name = "poll", .timeout = true, },
{ .name = "ppoll", .timeout = true, },
{ .name = "prctl",
- .arg = { [0] = { .scnprintf = SCA_PRCTL_OPTION, /* option */ },
+ .arg = { [0] = { .scnprintf = SCA_PRCTL_OPTION, /* option */
+ .strtoul = STUL_STRARRAY,
+ .parm = &strarray__prctl_options, },
[1] = { .scnprintf = SCA_PRCTL_ARG2, /* arg2 */ },
[2] = { .scnprintf = SCA_PRCTL_ARG3, /* arg3 */ }, }, },
{ .name = "pread", .alias = "pread64", },
diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh
index 68039a96..bfb21d0 100755
--- a/tools/perf/check-headers.sh
+++ b/tools/perf/check-headers.sh
@@ -13,6 +13,7 @@
include/uapi/linux/kvm.h
include/uapi/linux/in.h
include/uapi/linux/mount.h
+include/uapi/linux/openat2.h
include/uapi/linux/perf_event.h
include/uapi/linux/prctl.h
include/uapi/linux/sched.h
diff --git a/tools/perf/include/bpf/pid_filter.h b/tools/perf/include/bpf/pid_filter.h
index 607189a..6e61c4b 100644
--- a/tools/perf/include/bpf/pid_filter.h
+++ b/tools/perf/include/bpf/pid_filter.h
@@ -3,7 +3,7 @@
#ifndef _PERF_BPF_PID_FILTER_
#define _PERF_BPF_PID_FILTER_
-#include <bpf/bpf.h>
+#include <bpf.h>
#define pid_filter(name) pid_map(name, bool)
diff --git a/tools/perf/include/bpf/stdio.h b/tools/perf/include/bpf/stdio.h
index 7ca6fa5..316af5b 100644
--- a/tools/perf/include/bpf/stdio.h
+++ b/tools/perf/include/bpf/stdio.h
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
-#include <bpf/bpf.h>
+#include <bpf.h>
struct bpf_map SEC("maps") __bpf_stdout__ = {
.type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,
diff --git a/tools/perf/include/bpf/unistd.h b/tools/perf/include/bpf/unistd.h
index d1a35b6..ca7877f 100644
--- a/tools/perf/include/bpf/unistd.h
+++ b/tools/perf/include/bpf/unistd.h
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: LGPL-2.1
-#include <bpf/bpf.h>
+#include <bpf.h>
static int (*bpf_get_current_pid_tgid)(void) = (void *)BPF_FUNC_get_current_pid_tgid;
diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c
index 079c77b..27b4da8 100644
--- a/tools/perf/pmu-events/jevents.c
+++ b/tools/perf/pmu-events/jevents.c
@@ -1082,10 +1082,9 @@ static int process_one_file(const char *fpath, const struct stat *sb,
*/
int main(int argc, char *argv[])
{
- int rc;
+ int rc, ret = 0;
int maxfds;
char ldirname[PATH_MAX];
-
const char *arch;
const char *output_file;
const char *start_dirname;
@@ -1156,7 +1155,8 @@ int main(int argc, char *argv[])
/* Make build fail */
fclose(eventsfp);
free_arch_std_events();
- return 1;
+ ret = 1;
+ goto out_free_mapfile;
} else if (rc) {
goto empty_map;
}
@@ -1174,14 +1174,17 @@ int main(int argc, char *argv[])
/* Make build fail */
fclose(eventsfp);
free_arch_std_events();
- return 1;
+ ret = 1;
}
- return 0;
+
+ goto out_free_mapfile;
empty_map:
fclose(eventsfp);
create_empty_mapping(output_file);
free_arch_std_events();
- return 0;
+out_free_mapfile:
+ free(mapfile);
+ return ret;
}
diff --git a/tools/perf/tests/bp_account.c b/tools/perf/tests/bp_account.c
index d0b9353..489b5060 100644
--- a/tools/perf/tests/bp_account.c
+++ b/tools/perf/tests/bp_account.c
@@ -19,7 +19,7 @@
#include "../perf-sys.h"
#include "cloexec.h"
-volatile long the_var;
+static volatile long the_var;
static noinline int test_function(void)
{
diff --git a/tools/perf/tests/shell/lib/probe_vfs_getname.sh b/tools/perf/tests/shell/lib/probe_vfs_getname.sh
index 7cb99b4..c2cc42d 100644
--- a/tools/perf/tests/shell/lib/probe_vfs_getname.sh
+++ b/tools/perf/tests/shell/lib/probe_vfs_getname.sh
@@ -14,7 +14,7 @@
if [ $had_vfs_getname -eq 1 ] ; then
line=$(perf probe -L getname_flags 2>&1 | egrep 'result.*=.*filename;' | sed -r 's/[[:space:]]+([[:digit:]]+)[[:space:]]+result->uptr.*/\1/')
perf probe -q "vfs_getname=getname_flags:${line} pathname=result->name:string" || \
- perf probe $verbose "vfs_getname=getname_flags:${line} pathname=filename:string"
+ perf probe $verbose "vfs_getname=getname_flags:${line} pathname=filename:ustring"
fi
}
diff --git a/tools/perf/trace/beauty/beauty.h b/tools/perf/trace/beauty/beauty.h
index 5a61043..d6dfe68 100644
--- a/tools/perf/trace/beauty/beauty.h
+++ b/tools/perf/trace/beauty/beauty.h
@@ -213,6 +213,8 @@ size_t syscall_arg__scnprintf_x86_arch_prctl_code(char *bf, size_t size, struct
size_t syscall_arg__scnprintf_prctl_option(char *bf, size_t size, struct syscall_arg *arg);
#define SCA_PRCTL_OPTION syscall_arg__scnprintf_prctl_option
+extern struct strarray strarray__prctl_options;
+
size_t syscall_arg__scnprintf_prctl_arg2(char *bf, size_t size, struct syscall_arg *arg);
#define SCA_PRCTL_ARG2 syscall_arg__scnprintf_prctl_arg2
diff --git a/tools/perf/trace/beauty/prctl.c b/tools/perf/trace/beauty/prctl.c
index ba2179a..6fe5ad5 100644
--- a/tools/perf/trace/beauty/prctl.c
+++ b/tools/perf/trace/beauty/prctl.c
@@ -11,9 +11,10 @@
#include "trace/beauty/generated/prctl_option_array.c"
+DEFINE_STRARRAY(prctl_options, "PR_");
+
static size_t prctl__scnprintf_option(int option, char *bf, size_t size, bool show_prefix)
{
- static DEFINE_STRARRAY(prctl_options, "PR_");
return strarray__scnprintf(&strarray__prctl_options, bf, size, "%d", show_prefix, option);
}
diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
index badbddb..9023267 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -754,10 +754,9 @@ static int annotate_browser__run(struct annotate_browser *browser,
"? Search string backwards\n");
continue;
case 'r':
- {
- script_browse(NULL, NULL);
- continue;
- }
+ script_browse(NULL, NULL);
+ annotate_browser__show(&browser->b, title, help);
+ continue;
case 'k':
notes->options->show_linenr = !notes->options->show_linenr;
break;
@@ -834,13 +833,13 @@ static int annotate_browser__run(struct annotate_browser *browser,
map_symbol__annotation_dump(ms, evsel, browser->opts);
continue;
case 't':
- if (notes->options->show_total_period) {
- notes->options->show_total_period = false;
- notes->options->show_nr_samples = true;
- } else if (notes->options->show_nr_samples)
- notes->options->show_nr_samples = false;
+ if (symbol_conf.show_total_period) {
+ symbol_conf.show_total_period = false;
+ symbol_conf.show_nr_samples = true;
+ } else if (symbol_conf.show_nr_samples)
+ symbol_conf.show_nr_samples = false;
else
- notes->options->show_total_period = true;
+ symbol_conf.show_total_period = true;
annotation__update_column_widths(notes);
continue;
case 'c':
diff --git a/tools/perf/ui/gtk/annotate.c b/tools/perf/ui/gtk/annotate.c
index 22cc240..35f9641 100644
--- a/tools/perf/ui/gtk/annotate.c
+++ b/tools/perf/ui/gtk/annotate.c
@@ -174,7 +174,7 @@ static int symbol__gtk_annotate(struct map_symbol *ms, struct evsel *evsel,
if (ms->map->dso->annotate_warned)
return -1;
- err = symbol__annotate(ms, evsel, 0, &annotation__default_options, NULL);
+ err = symbol__annotate(ms, evsel, &annotation__default_options, NULL);
if (err) {
char msg[BUFSIZ];
symbol__strerror_disassemble(ms, err, msg, sizeof(msg));
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index ca73fb7..0ea95be 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -1143,93 +1143,70 @@ static int disasm_line__parse(char *line, const char **namep, char **rawp)
}
struct annotate_args {
- size_t privsize;
- struct arch *arch;
- struct map_symbol ms;
- struct evsel *evsel;
+ struct arch *arch;
+ struct map_symbol ms;
+ struct evsel *evsel;
struct annotation_options *options;
- s64 offset;
- char *line;
- int line_nr;
+ s64 offset;
+ char *line;
+ int line_nr;
};
-static void annotation_line__delete(struct annotation_line *al)
+static void annotation_line__init(struct annotation_line *al,
+ struct annotate_args *args,
+ int nr)
{
- void *ptr = (void *) al - al->privsize;
-
- free_srcline(al->path);
- zfree(&al->line);
- free(ptr);
+ al->offset = args->offset;
+ al->line = strdup(args->line);
+ al->line_nr = args->line_nr;
+ al->data_nr = nr;
}
-/*
- * Allocating the annotation line data with following
- * structure:
- *
- * --------------------------------------
- * private space | struct annotation_line
- * --------------------------------------
- *
- * Size of the private space is stored in 'struct annotation_line'.
- *
- */
-static struct annotation_line *
-annotation_line__new(struct annotate_args *args, size_t privsize)
+static void annotation_line__exit(struct annotation_line *al)
+{
+ free_srcline(al->path);
+ zfree(&al->line);
+}
+
+static size_t disasm_line_size(int nr)
{
struct annotation_line *al;
- struct evsel *evsel = args->evsel;
- size_t size = privsize + sizeof(*al);
- int nr = 1;
- if (perf_evsel__is_group_event(evsel))
- nr = evsel->core.nr_members;
-
- size += sizeof(al->data[0]) * nr;
-
- al = zalloc(size);
- if (al) {
- al = (void *) al + privsize;
- al->privsize = privsize;
- al->offset = args->offset;
- al->line = strdup(args->line);
- al->line_nr = args->line_nr;
- al->data_nr = nr;
- }
-
- return al;
+ return (sizeof(struct disasm_line) + (sizeof(al->data[0]) * nr));
}
/*
* Allocating the disasm annotation line data with
* following structure:
*
- * ------------------------------------------------------------
- * privsize space | struct disasm_line | struct annotation_line
- * ------------------------------------------------------------
+ * -------------------------------------------
+ * struct disasm_line | struct annotation_line
+ * -------------------------------------------
*
* We have 'struct annotation_line' member as last member
* of 'struct disasm_line' to have an easy access.
- *
*/
static struct disasm_line *disasm_line__new(struct annotate_args *args)
{
struct disasm_line *dl = NULL;
- struct annotation_line *al;
- size_t privsize = args->privsize + offsetof(struct disasm_line, al);
+ int nr = 1;
- al = annotation_line__new(args, privsize);
- if (al != NULL) {
- dl = disasm_line(al);
+ if (perf_evsel__is_group_event(args->evsel))
+ nr = args->evsel->core.nr_members;
- if (dl->al.line == NULL)
- goto out_delete;
+ dl = zalloc(disasm_line_size(nr));
+ if (!dl)
+ return NULL;
- if (args->offset != -1) {
- if (disasm_line__parse(dl->al.line, &dl->ins.name, &dl->ops.raw) < 0)
- goto out_free_line;
+ annotation_line__init(&dl->al, args, nr);
+ if (dl->al.line == NULL)
+ goto out_delete;
- disasm_line__init_ins(dl, args->arch, &args->ms);
- }
+ if (args->offset != -1) {
+ if (disasm_line__parse(dl->al.line, &dl->ins.name, &dl->ops.raw) < 0)
+ goto out_free_line;
+
+ disasm_line__init_ins(dl, args->arch, &args->ms);
}
return dl;
@@ -1248,7 +1225,8 @@ void disasm_line__free(struct disasm_line *dl)
else
ins__delete(&dl->ops);
zfree(&dl->ins.name);
- annotation_line__delete(&dl->al);
+ annotation_line__exit(&dl->al);
+ free(dl);
}
int disasm_line__scnprintf(struct disasm_line *dl, char *bf, size_t size, bool raw, int max_ins_name)
@@ -2149,13 +2127,12 @@ void symbol__calc_percent(struct symbol *sym, struct evsel *evsel)
annotation__calc_percent(notes, evsel, symbol__size(sym));
}
-int symbol__annotate(struct map_symbol *ms, struct evsel *evsel, size_t privsize,
+int symbol__annotate(struct map_symbol *ms, struct evsel *evsel,
struct annotation_options *options, struct arch **parch)
{
struct symbol *sym = ms->sym;
struct annotation *notes = symbol__annotation(sym);
struct annotate_args args = {
- .privsize = privsize,
.evsel = evsel,
.options = options,
};
@@ -2644,6 +2621,8 @@ void annotation__set_offsets(struct annotation *notes, s64 size)
struct annotation_line *al;
notes->max_line_len = 0;
+ notes->nr_entries = 0;
+ notes->nr_asm_entries = 0;
list_for_each_entry(al, ¬es->src->source, node) {
size_t line_len = strlen(al->line);
@@ -2790,7 +2769,7 @@ int symbol__tty_annotate(struct map_symbol *ms, struct evsel *evsel,
struct symbol *sym = ms->sym;
struct rb_root source_line = RB_ROOT;
- if (symbol__annotate(ms, evsel, 0, opts, NULL) < 0)
+ if (symbol__annotate(ms, evsel, opts, NULL) < 0)
return -1;
symbol__calc_percent(sym, evsel);
@@ -2915,9 +2894,9 @@ static void __annotation_line__write(struct annotation_line *al, struct annotati
percent = annotation_data__percent(&al->data[i], percent_type);
obj__set_percent_color(obj, percent, current_entry);
- if (notes->options->show_total_period) {
+ if (symbol_conf.show_total_period) {
obj__printf(obj, "%11" PRIu64 " ", al->data[i].he.period);
- } else if (notes->options->show_nr_samples) {
+ } else if (symbol_conf.show_nr_samples) {
obj__printf(obj, "%6" PRIu64 " ",
al->data[i].he.nr_samples);
} else {
@@ -2931,8 +2910,8 @@ static void __annotation_line__write(struct annotation_line *al, struct annotati
obj__printf(obj, "%-*s", pcnt_width, " ");
else {
obj__printf(obj, "%-*s", pcnt_width,
- notes->options->show_total_period ? "Period" :
- notes->options->show_nr_samples ? "Samples" : "Percent");
+ symbol_conf.show_total_period ? "Period" :
+ symbol_conf.show_nr_samples ? "Samples" : "Percent");
}
}
@@ -3070,7 +3049,7 @@ int symbol__annotate2(struct map_symbol *ms, struct evsel *evsel,
if (perf_evsel__is_group_event(evsel))
nr_pcnt = evsel->core.nr_members;
- err = symbol__annotate(ms, evsel, 0, options, parch);
+ err = symbol__annotate(ms, evsel, options, parch);
if (err)
goto out_free_offsets;
@@ -3094,69 +3073,46 @@ int symbol__annotate2(struct map_symbol *ms, struct evsel *evsel,
return err;
}
-#define ANNOTATION__CFG(n) \
- { .name = #n, .value = &annotation__default_options.n, }
-
-/*
- * Keep the entries sorted, they are bsearch'ed
- */
-static struct annotation_config {
- const char *name;
- void *value;
-} annotation__configs[] = {
- ANNOTATION__CFG(hide_src_code),
- ANNOTATION__CFG(jump_arrows),
- ANNOTATION__CFG(offset_level),
- ANNOTATION__CFG(show_linenr),
- ANNOTATION__CFG(show_nr_jumps),
- ANNOTATION__CFG(show_nr_samples),
- ANNOTATION__CFG(show_total_period),
- ANNOTATION__CFG(use_offset),
-};
-
-#undef ANNOTATION__CFG
-
-static int annotation_config__cmp(const void *name, const void *cfgp)
+static int annotation__config(const char *var, const char *value, void *data)
{
- const struct annotation_config *cfg = cfgp;
-
- return strcmp(name, cfg->name);
-}
-
-static int annotation__config(const char *var, const char *value,
- void *data __maybe_unused)
-{
- struct annotation_config *cfg;
- const char *name;
+ struct annotation_options *opt = data;
if (!strstarts(var, "annotate."))
return 0;
- name = var + 9;
- cfg = bsearch(name, annotation__configs, ARRAY_SIZE(annotation__configs),
- sizeof(struct annotation_config), annotation_config__cmp);
+ if (!strcmp(var, "annotate.offset_level")) {
+ perf_config_u8(&opt->offset_level, "offset_level", value);
- if (cfg == NULL)
- pr_debug("%s variable unknown, ignoring...", var);
- else if (strcmp(var, "annotate.offset_level") == 0) {
- perf_config_int(cfg->value, name, value);
-
- if (*(int *)cfg->value > ANNOTATION__MAX_OFFSET_LEVEL)
- *(int *)cfg->value = ANNOTATION__MAX_OFFSET_LEVEL;
- else if (*(int *)cfg->value < ANNOTATION__MIN_OFFSET_LEVEL)
- *(int *)cfg->value = ANNOTATION__MIN_OFFSET_LEVEL;
+ if (opt->offset_level > ANNOTATION__MAX_OFFSET_LEVEL)
+ opt->offset_level = ANNOTATION__MAX_OFFSET_LEVEL;
+ else if (opt->offset_level < ANNOTATION__MIN_OFFSET_LEVEL)
+ opt->offset_level = ANNOTATION__MIN_OFFSET_LEVEL;
+ } else if (!strcmp(var, "annotate.hide_src_code")) {
+ opt->hide_src_code = perf_config_bool("hide_src_code", value);
+ } else if (!strcmp(var, "annotate.jump_arrows")) {
+ opt->jump_arrows = perf_config_bool("jump_arrows", value);
+ } else if (!strcmp(var, "annotate.show_linenr")) {
+ opt->show_linenr = perf_config_bool("show_linenr", value);
+ } else if (!strcmp(var, "annotate.show_nr_jumps")) {
+ opt->show_nr_jumps = perf_config_bool("show_nr_jumps", value);
+ } else if (!strcmp(var, "annotate.show_nr_samples")) {
+ symbol_conf.show_nr_samples = perf_config_bool("show_nr_samples",
+ value);
+ } else if (!strcmp(var, "annotate.show_total_period")) {
+ symbol_conf.show_total_period = perf_config_bool("show_total_period",
+ value);
+ } else if (!strcmp(var, "annotate.use_offset")) {
+ opt->use_offset = perf_config_bool("use_offset", value);
} else {
- *(bool *)cfg->value = perf_config_bool(name, value);
+ pr_debug("%s variable unknown, ignoring...", var);
}
+
return 0;
}
-void annotation_config__init(void)
+void annotation_config__init(struct annotation_options *opt)
{
- perf_config(annotation__config, NULL);
-
- annotation__default_options.show_total_period = symbol_conf.show_total_period;
- annotation__default_options.show_nr_samples = symbol_conf.show_nr_samples;
+ perf_config(annotation__config, opt);
}
static unsigned int parse_percent_type(char *str1, char *str2)
diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h
index 455403e..0012586 100644
--- a/tools/perf/util/annotate.h
+++ b/tools/perf/util/annotate.h
@@ -83,8 +83,6 @@ struct annotation_options {
full_path,
show_linenr,
show_nr_jumps,
- show_nr_samples,
- show_total_period,
show_minmax_cycle,
show_asm_raw,
annotate_src;
@@ -141,7 +139,6 @@ struct annotation_line {
u64 cycles;
u64 cycles_max;
u64 cycles_min;
- size_t privsize;
char *path;
u32 idx;
int idx_asm;
@@ -309,7 +306,7 @@ static inline int annotation__cycles_width(struct annotation *notes)
static inline int annotation__pcnt_width(struct annotation *notes)
{
- return (notes->options->show_total_period ? 12 : 7) * notes->nr_events;
+ return (symbol_conf.show_total_period ? 12 : 7) * notes->nr_events;
}
static inline bool annotation_line__filter(struct annotation_line *al, struct annotation *notes)
@@ -352,7 +349,7 @@ struct annotated_source *symbol__hists(struct symbol *sym, int nr_hists);
void symbol__annotate_zero_histograms(struct symbol *sym);
int symbol__annotate(struct map_symbol *ms,
- struct evsel *evsel, size_t privsize,
+ struct evsel *evsel,
struct annotation_options *options,
struct arch **parch);
int symbol__annotate2(struct map_symbol *ms,
@@ -413,7 +410,7 @@ static inline int symbol__tui_annotate(struct map_symbol *ms __maybe_unused,
}
#endif
-void annotation_config__init(void);
+void annotation_config__init(struct annotation_options *opt);
int annotate_parse_percent_type(const struct option *opt, const char *_str,
int unset);
diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c
index eb087e7..3571ce7 100644
--- a/tools/perf/util/auxtrace.c
+++ b/tools/perf/util/auxtrace.c
@@ -629,8 +629,10 @@ int auxtrace_record__options(struct auxtrace_record *itr,
struct evlist *evlist,
struct record_opts *opts)
{
- if (itr)
+ if (itr) {
+ itr->evlist = evlist;
return itr->recording_options(itr, evlist, opts);
+ }
return 0;
}
@@ -664,6 +666,24 @@ int auxtrace_parse_snapshot_options(struct auxtrace_record *itr,
return -EINVAL;
}
+int auxtrace_record__read_finish(struct auxtrace_record *itr, int idx)
+{
+ struct evsel *evsel;
+
+ if (!itr->evlist || !itr->pmu)
+ return -EINVAL;
+
+ evlist__for_each_entry(itr->evlist, evsel) {
+ if (evsel->core.attr.type == itr->pmu->type) {
+ if (evsel->disabled)
+ return 0;
+ return perf_evlist__enable_event_idx(itr->evlist, evsel,
+ idx);
+ }
+ }
+ return -EINVAL;
+}
+
/*
* Event record size is 16-bit which results in a maximum size of about 64KiB.
* Allow about 4KiB for the rest of the sample record, to give a maximum
diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h
index 749d72c..e58ef16 100644
--- a/tools/perf/util/auxtrace.h
+++ b/tools/perf/util/auxtrace.h
@@ -29,6 +29,7 @@ struct record_opts;
struct perf_record_auxtrace_error;
struct perf_record_auxtrace_info;
struct events_stats;
+struct perf_pmu;
enum auxtrace_error_type {
PERF_AUXTRACE_ERROR_ITRACE = 1,
@@ -322,6 +323,8 @@ struct auxtrace_mmap_params {
* @read_finish: called after reading from an auxtrace mmap
* @alignment: alignment (if any) for AUX area data
* @default_aux_sample_size: default sample size for --aux sample option
+ * @pmu: associated pmu
+ * @evlist: selected events list
*/
struct auxtrace_record {
int (*recording_options)(struct auxtrace_record *itr,
@@ -346,6 +349,8 @@ struct auxtrace_record {
int (*read_finish)(struct auxtrace_record *itr, int idx);
unsigned int alignment;
unsigned int default_aux_sample_size;
+ struct perf_pmu *pmu;
+ struct evlist *evlist;
};
/**
@@ -537,6 +542,7 @@ int auxtrace_record__find_snapshot(struct auxtrace_record *itr, int idx,
struct auxtrace_mmap *mm,
unsigned char *data, u64 *head, u64 *old);
u64 auxtrace_record__reference(struct auxtrace_record *itr);
+int auxtrace_record__read_finish(struct auxtrace_record *itr, int idx);
int auxtrace_index__auxtrace_event(struct list_head *head, union perf_event *event,
off_t file_offset);
diff --git a/tools/perf/util/block-info.c b/tools/perf/util/block-info.c
index c4b030b..fbbb6d6 100644
--- a/tools/perf/util/block-info.c
+++ b/tools/perf/util/block-info.c
@@ -295,7 +295,8 @@ static int block_range_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
end_line = map__srcline(he->ms.map, bi->sym->start + bi->end,
he->ms.sym);
- if ((start_line != SRCLINE_UNKNOWN) && (end_line != SRCLINE_UNKNOWN)) {
+ if ((strncmp(start_line, SRCLINE_UNKNOWN, strlen(SRCLINE_UNKNOWN)) != 0) &&
+ (strncmp(end_line, SRCLINE_UNKNOWN, strlen(SRCLINE_UNKNOWN)) != 0)) {
scnprintf(buf, sizeof(buf), "[%s -> %s]",
start_line, end_line);
} else {
diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c
index 0bc9c4d..ef38eba 100644
--- a/tools/perf/util/config.c
+++ b/tools/perf/util/config.c
@@ -374,6 +374,18 @@ int perf_config_int(int *dest, const char *name, const char *value)
return 0;
}
+int perf_config_u8(u8 *dest, const char *name, const char *value)
+{
+ long ret = 0;
+
+ if (!perf_parse_long(value, &ret)) {
+ bad_config(name);
+ return -1;
+ }
+ *dest = ret;
+ return 0;
+}
+
static int perf_config_bool_or_int(const char *name, const char *value, int *is_bool)
{
int ret;
diff --git a/tools/perf/util/config.h b/tools/perf/util/config.h
index bd0a589..c10b66d 100644
--- a/tools/perf/util/config.h
+++ b/tools/perf/util/config.h
@@ -29,6 +29,7 @@ typedef int (*config_fn_t)(const char *, const char *, void *);
int perf_default_config(const char *, const char *, void *);
int perf_config(config_fn_t fn, void *);
int perf_config_int(int *dest, const char *, const char *);
+int perf_config_u8(u8 *dest, const char *name, const char *value);
int perf_config_u64(u64 *dest, const char *, const char *);
int perf_config_bool(const char *, const char *);
int config_error_nonbool(const char *);
diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c
index 6242a92..4154f94 100644
--- a/tools/perf/util/env.c
+++ b/tools/perf/util/env.c
@@ -343,11 +343,11 @@ static const char *normalize_arch(char *arch)
const char *perf_env__arch(struct perf_env *env)
{
- struct utsname uts;
char *arch_name;
if (!env || !env->arch) { /* Assume local operation */
- if (uname(&uts) < 0)
+ static struct utsname uts = { .machine[0] = '\0', };
+ if (uts.machine[0] == '\0' && uname(&uts) < 0)
return NULL;
arch_name = uts.machine;
} else
diff --git a/tools/perf/util/llvm-utils.c b/tools/perf/util/llvm-utils.c
index eae47c2..b5af680 100644
--- a/tools/perf/util/llvm-utils.c
+++ b/tools/perf/util/llvm-utils.c
@@ -288,6 +288,7 @@ static const char *kinc_fetch_script =
"obj-y := dummy.o\n"
"\\$(obj)/%.o: \\$(src)/%.c\n"
"\t@echo -n \"\\$(NOSTDINC_FLAGS) \\$(LINUXINCLUDE) \\$(EXTRA_CFLAGS)\"\n"
+"\t\\$(CC) -c -o \\$@ \\$<\n"
"EOF\n"
"touch $TMPDIR/dummy.c\n"
"make -s -C $KBUILD_DIR M=$TMPDIR $KBUILD_OPTS dummy.o 2>/dev/null\n"
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index c8c5410..fb5c2cd 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -686,6 +686,7 @@ static struct dso *machine__findnew_module_dso(struct machine *machine,
dso__set_module_info(dso, m, machine);
dso__set_long_name(dso, strdup(filename), true);
+ dso->kernel = DSO_TYPE_KERNEL;
}
dso__get(dso);
@@ -726,9 +727,17 @@ static int machine__process_ksymbol_register(struct machine *machine,
struct map *map = maps__find(&machine->kmaps, event->ksymbol.addr);
if (!map) {
- map = dso__new_map(event->ksymbol.name);
- if (!map)
+ struct dso *dso = dso__new(event->ksymbol.name);
+
+ if (dso) {
+ dso->kernel = DSO_TYPE_KERNEL;
+ map = map__new2(0, dso);
+ }
+
+ if (!dso || !map) {
+ dso__put(dso);
return -ENOMEM;
+ }
map->start = event->ksymbol.addr;
map->end = map->start + event->ksymbol.len;
@@ -972,7 +981,6 @@ int machine__create_extra_kernel_map(struct machine *machine,
kmap = map__kmap(map);
- kmap->kmaps = &machine->kmaps;
strlcpy(kmap->name, xm->name, KMAP_NAME_LEN);
maps__insert(&machine->kmaps, map);
@@ -1082,9 +1090,6 @@ int __weak machine__create_extra_kernel_maps(struct machine *machine __maybe_unu
static int
__machine__create_kernel_maps(struct machine *machine, struct dso *kernel)
{
- struct kmap *kmap;
- struct map *map;
-
/* In case of renewal the kernel map, destroy previous one */
machine__destroy_kernel_maps(machine);
@@ -1093,14 +1098,7 @@ __machine__create_kernel_maps(struct machine *machine, struct dso *kernel)
return -1;
machine->vmlinux_map->map_ip = machine->vmlinux_map->unmap_ip = identity__map_ip;
- map = machine__kernel_map(machine);
- kmap = map__kmap(map);
- if (!kmap)
- return -1;
-
- kmap->kmaps = &machine->kmaps;
- maps__insert(&machine->kmaps, map);
-
+ maps__insert(&machine->kmaps, machine->vmlinux_map);
return 0;
}
diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
index f67960b..b342f74 100644
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c
@@ -89,7 +89,7 @@ static inline bool replace_android_lib(const char *filename, char *newfilename)
return true;
}
- if (!strncmp(filename, "/system/lib/", 11)) {
+ if (!strncmp(filename, "/system/lib/", 12)) {
char *ndk, *app;
const char *arch;
size_t ndk_length;
@@ -375,8 +375,13 @@ struct symbol *map__find_symbol_by_name(struct map *map, const char *name)
struct map *map__clone(struct map *from)
{
- struct map *map = memdup(from, sizeof(*map));
+ size_t size = sizeof(struct map);
+ struct map *map;
+ if (from->dso && from->dso->kernel)
+ size += sizeof(struct kmap);
+
+ map = memdup(from, size);
if (map != NULL) {
refcount_set(&map->refcnt, 1);
RB_CLEAR_NODE(&map->rb_node);
@@ -426,7 +431,7 @@ int map__fprintf_srcline(struct map *map, u64 addr, const char *prefix,
if (map && map->dso) {
char *srcline = map__srcline(map, addr, NULL);
- if (srcline != SRCLINE_UNKNOWN)
+ if (strncmp(srcline, SRCLINE_UNKNOWN, strlen(SRCLINE_UNKNOWN)) != 0)
ret = fprintf(fp, "%s%s", prefix, srcline);
free_srcline(srcline);
}
@@ -538,6 +543,16 @@ void maps__insert(struct maps *maps, struct map *map)
__maps__insert(maps, map);
++maps->nr_maps;
+ if (map->dso && map->dso->kernel) {
+ struct kmap *kmap = map__kmap(map);
+
+ if (kmap)
+ kmap->kmaps = maps;
+ else
+ pr_err("Internal error: kernel dso with non kernel map\n");
+ }
+
+
/*
* If we already performed some search by name, then we need to add the just
* inserted map and resort.
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index c01ba6f..a7dc0b0 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -257,21 +257,15 @@ struct tracepoint_path *tracepoint_id_to_path(u64 config)
path = zalloc(sizeof(*path));
if (!path)
return NULL;
- path->system = malloc(MAX_EVENT_LENGTH);
- if (!path->system) {
+ if (asprintf(&path->system, "%.*s", MAX_EVENT_LENGTH, sys_dirent->d_name) < 0) {
free(path);
return NULL;
}
- path->name = malloc(MAX_EVENT_LENGTH);
- if (!path->name) {
+ if (asprintf(&path->name, "%.*s", MAX_EVENT_LENGTH, evt_dirent->d_name) < 0) {
zfree(&path->system);
free(path);
return NULL;
}
- strncpy(path->system, sys_dirent->d_name,
- MAX_EVENT_LENGTH);
- strncpy(path->name, evt_dirent->d_name,
- MAX_EVENT_LENGTH);
return path;
}
}
@@ -1219,7 +1213,7 @@ static int config_attr(struct perf_event_attr *attr,
static int get_config_terms(struct list_head *head_config,
struct list_head *head_terms __maybe_unused)
{
-#define ADD_CONFIG_TERM(__type) \
+#define ADD_CONFIG_TERM(__type, __weak) \
struct perf_evsel_config_term *__t; \
\
__t = zalloc(sizeof(*__t)); \
@@ -1228,18 +1222,18 @@ static int get_config_terms(struct list_head *head_config,
\
INIT_LIST_HEAD(&__t->list); \
__t->type = PERF_EVSEL__CONFIG_TERM_ ## __type; \
- __t->weak = term->weak; \
+ __t->weak = __weak; \
list_add_tail(&__t->list, head_terms)
-#define ADD_CONFIG_TERM_VAL(__type, __name, __val) \
+#define ADD_CONFIG_TERM_VAL(__type, __name, __val, __weak) \
do { \
- ADD_CONFIG_TERM(__type); \
+ ADD_CONFIG_TERM(__type, __weak); \
__t->val.__name = __val; \
} while (0)
-#define ADD_CONFIG_TERM_STR(__type, __val) \
+#define ADD_CONFIG_TERM_STR(__type, __val, __weak) \
do { \
- ADD_CONFIG_TERM(__type); \
+ ADD_CONFIG_TERM(__type, __weak); \
__t->val.str = strdup(__val); \
if (!__t->val.str) { \
zfree(&__t); \
@@ -1253,62 +1247,62 @@ do { \
list_for_each_entry(term, head_config, list) {
switch (term->type_term) {
case PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD:
- ADD_CONFIG_TERM_VAL(PERIOD, period, term->val.num);
+ ADD_CONFIG_TERM_VAL(PERIOD, period, term->val.num, term->weak);
break;
case PARSE_EVENTS__TERM_TYPE_SAMPLE_FREQ:
- ADD_CONFIG_TERM_VAL(FREQ, freq, term->val.num);
+ ADD_CONFIG_TERM_VAL(FREQ, freq, term->val.num, term->weak);
break;
case PARSE_EVENTS__TERM_TYPE_TIME:
- ADD_CONFIG_TERM_VAL(TIME, time, term->val.num);
+ ADD_CONFIG_TERM_VAL(TIME, time, term->val.num, term->weak);
break;
case PARSE_EVENTS__TERM_TYPE_CALLGRAPH:
- ADD_CONFIG_TERM_STR(CALLGRAPH, term->val.str);
+ ADD_CONFIG_TERM_STR(CALLGRAPH, term->val.str, term->weak);
break;
case PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE:
- ADD_CONFIG_TERM_STR(BRANCH, term->val.str);
+ ADD_CONFIG_TERM_STR(BRANCH, term->val.str, term->weak);
break;
case PARSE_EVENTS__TERM_TYPE_STACKSIZE:
ADD_CONFIG_TERM_VAL(STACK_USER, stack_user,
- term->val.num);
+ term->val.num, term->weak);
break;
case PARSE_EVENTS__TERM_TYPE_INHERIT:
ADD_CONFIG_TERM_VAL(INHERIT, inherit,
- term->val.num ? 1 : 0);
+ term->val.num ? 1 : 0, term->weak);
break;
case PARSE_EVENTS__TERM_TYPE_NOINHERIT:
ADD_CONFIG_TERM_VAL(INHERIT, inherit,
- term->val.num ? 0 : 1);
+ term->val.num ? 0 : 1, term->weak);
break;
case PARSE_EVENTS__TERM_TYPE_MAX_STACK:
ADD_CONFIG_TERM_VAL(MAX_STACK, max_stack,
- term->val.num);
+ term->val.num, term->weak);
break;
case PARSE_EVENTS__TERM_TYPE_MAX_EVENTS:
ADD_CONFIG_TERM_VAL(MAX_EVENTS, max_events,
- term->val.num);
+ term->val.num, term->weak);
break;
case PARSE_EVENTS__TERM_TYPE_OVERWRITE:
ADD_CONFIG_TERM_VAL(OVERWRITE, overwrite,
- term->val.num ? 1 : 0);
+ term->val.num ? 1 : 0, term->weak);
break;
case PARSE_EVENTS__TERM_TYPE_NOOVERWRITE:
ADD_CONFIG_TERM_VAL(OVERWRITE, overwrite,
- term->val.num ? 0 : 1);
+ term->val.num ? 0 : 1, term->weak);
break;
case PARSE_EVENTS__TERM_TYPE_DRV_CFG:
- ADD_CONFIG_TERM_STR(DRV_CFG, term->val.str);
+ ADD_CONFIG_TERM_STR(DRV_CFG, term->val.str, term->weak);
break;
case PARSE_EVENTS__TERM_TYPE_PERCORE:
ADD_CONFIG_TERM_VAL(PERCORE, percore,
- term->val.num ? true : false);
+ term->val.num ? true : false, term->weak);
break;
case PARSE_EVENTS__TERM_TYPE_AUX_OUTPUT:
ADD_CONFIG_TERM_VAL(AUX_OUTPUT, aux_output,
- term->val.num ? 1 : 0);
+ term->val.num ? 1 : 0, term->weak);
break;
case PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE:
ADD_CONFIG_TERM_VAL(AUX_SAMPLE_SIZE, aux_sample_size,
- term->val.num);
+ term->val.num, term->weak);
break;
default:
break;
@@ -1345,7 +1339,7 @@ static int get_config_chgs(struct perf_pmu *pmu, struct list_head *head_config,
}
if (bits)
- ADD_CONFIG_TERM_VAL(CFG_CHG, cfg_chg, bits);
+ ADD_CONFIG_TERM_VAL(CFG_CHG, cfg_chg, bits, false);
#undef ADD_CONFIG_TERM
return 0;
diff --git a/tools/perf/util/probe-file.c b/tools/perf/util/probe-file.c
index 5003ba4..8c85294 100644
--- a/tools/perf/util/probe-file.c
+++ b/tools/perf/util/probe-file.c
@@ -206,6 +206,9 @@ static struct strlist *__probe_file__get_namelist(int fd, bool include_group)
} else
ret = strlist__add(sl, tev.event);
clear_probe_trace_event(&tev);
+ /* Skip if there is same name multi-probe event in the list */
+ if (ret == -EEXIST)
+ ret = 0;
if (ret < 0)
break;
}
@@ -301,10 +304,15 @@ int probe_file__get_events(int fd, struct strfilter *filter,
p = strchr(ent->s, ':');
if ((p && strfilter__compare(filter, p + 1)) ||
strfilter__compare(filter, ent->s)) {
- strlist__add(plist, ent->s);
+ ret = strlist__add(plist, ent->s);
+ if (ret == -ENOMEM) {
+ pr_err("strlist__add failed with -ENOMEM\n");
+ goto out;
+ }
ret = 0;
}
}
+out:
strlist__delete(namelist);
return ret;
@@ -511,7 +519,11 @@ static int probe_cache__load(struct probe_cache *pcache)
ret = -EINVAL;
goto out;
}
- strlist__add(entry->tevlist, buf);
+ ret = strlist__add(entry->tevlist, buf);
+ if (ret == -ENOMEM) {
+ pr_err("strlist__add failed with -ENOMEM\n");
+ goto out;
+ }
}
}
out:
@@ -672,7 +684,12 @@ int probe_cache__add_entry(struct probe_cache *pcache,
command = synthesize_probe_trace_command(&tevs[i]);
if (!command)
goto out_err;
- strlist__add(entry->tevlist, command);
+ ret = strlist__add(entry->tevlist, command);
+ if (ret == -ENOMEM) {
+ pr_err("strlist__add failed with -ENOMEM\n");
+ goto out_err;
+ }
+
free(command);
}
list_add_tail(&entry->node, &pcache->entries);
@@ -853,9 +870,15 @@ int probe_cache__scan_sdt(struct probe_cache *pcache, const char *pathname)
break;
}
- strlist__add(entry->tevlist, buf);
+ ret = strlist__add(entry->tevlist, buf);
+
free(buf);
entry = NULL;
+
+ if (ret == -ENOMEM) {
+ pr_err("strlist__add failed with -ENOMEM\n");
+ break;
+ }
}
if (entry) {
list_del_init(&entry->node);
diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c
index 1c817ad..e4cff49 100644
--- a/tools/perf/util/probe-finder.c
+++ b/tools/perf/util/probe-finder.c
@@ -637,14 +637,19 @@ static int convert_to_trace_point(Dwarf_Die *sp_die, Dwfl_Module *mod,
return -EINVAL;
}
- /* Try to get actual symbol name from symtab */
- symbol = dwfl_module_addrsym(mod, paddr, &sym, NULL);
+ if (dwarf_entrypc(sp_die, &eaddr) == 0) {
+ /* If the DIE has entrypc, use it. */
+ symbol = dwarf_diename(sp_die);
+ } else {
+ /* Try to get actual symbol name and address from symtab */
+ symbol = dwfl_module_addrsym(mod, paddr, &sym, NULL);
+ eaddr = sym.st_value;
+ }
if (!symbol) {
pr_warning("Failed to find symbol at 0x%lx\n",
(unsigned long)paddr);
return -ENOENT;
}
- eaddr = sym.st_value;
tp->offset = (unsigned long)(paddr - eaddr);
tp->address = (unsigned long)paddr;
diff --git a/tools/perf/util/setup.py b/tools/perf/util/setup.py
index aa344a1..8a065a6 100644
--- a/tools/perf/util/setup.py
+++ b/tools/perf/util/setup.py
@@ -2,11 +2,13 @@
from subprocess import Popen, PIPE
from re import sub
-def clang_has_option(option):
- return [o for o in Popen(['clang', option], stderr=PIPE).stderr.readlines() if b"unknown argument" in o] == [ ]
-
cc = getenv("CC")
-if cc == "clang":
+cc_is_clang = b"clang version" in Popen([cc, "-v"], stderr=PIPE).stderr.readline()
+
+def clang_has_option(option):
+ return [o for o in Popen([cc, option], stderr=PIPE).stderr.readlines() if b"unknown argument" in o] == [ ]
+
+if cc_is_clang:
from distutils.sysconfig import get_config_vars
vars = get_config_vars()
for var in ('CFLAGS', 'OPT'):
@@ -40,7 +42,7 @@
cflags = getenv('CFLAGS', '').split()
# switch off several checks (need to be at the end of cflags list)
cflags += ['-fno-strict-aliasing', '-Wno-write-strings', '-Wno-unused-parameter', '-Wno-redundant-decls' ]
-if cc != "clang":
+if not cc_is_clang:
cflags += ['-Wno-cast-function-type' ]
src_perf = getenv('srctree') + '/tools/perf'
diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c
index 2c41d47..90d23cc 100644
--- a/tools/perf/util/stat-shadow.c
+++ b/tools/perf/util/stat-shadow.c
@@ -18,7 +18,6 @@
* AGGR_NONE: Use matching CPU
* AGGR_THREAD: Not supported?
*/
-static bool have_frontend_stalled;
struct runtime_stat rt_stat;
struct stats walltime_nsecs_stats;
@@ -144,7 +143,6 @@ void runtime_stat__exit(struct runtime_stat *st)
void perf_stat__init_shadow_stats(void)
{
- have_frontend_stalled = pmu_have_event("cpu", "stalled-cycles-frontend");
runtime_stat__init(&rt_stat);
}
@@ -853,10 +851,6 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
print_metric(config, ctxp, NULL, "%7.2f ",
"stalled cycles per insn",
ratio);
- } else if (have_frontend_stalled) {
- out->new_line(config, ctxp);
- print_metric(config, ctxp, NULL, "%7.2f ",
- "stalled cycles per insn", 0);
}
} else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES)) {
if (runtime_stat_n(st, STAT_BRANCHES, ctx, cpu) != 0)
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 3b379b1..26bc6a0 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -635,9 +635,12 @@ int modules__parse(const char *filename, void *arg,
static bool symbol__is_idle(const char *name)
{
const char * const idle_symbols[] = {
+ "acpi_idle_do_entry",
+ "acpi_processor_ffh_cstate_enter",
"arch_cpu_idle",
"cpu_idle",
"cpu_startup_entry",
+ "idle_cpu",
"intel_idle",
"default_idle",
"native_safe_halt",
@@ -651,13 +654,17 @@ static bool symbol__is_idle(const char *name)
NULL
};
int i;
+ static struct strlist *idle_symbols_list;
- for (i = 0; idle_symbols[i]; i++) {
- if (!strcmp(idle_symbols[i], name))
- return true;
- }
+ if (idle_symbols_list)
+ return strlist__has_entry(idle_symbols_list, name);
- return false;
+ idle_symbols_list = strlist__new(NULL, NULL);
+
+ for (i = 0; idle_symbols[i]; i++)
+ strlist__add(idle_symbols_list, idle_symbols[i]);
+
+ return strlist__has_entry(idle_symbols_list, name);
}
static int map__process_kallsym_symbol(void *arg, const char *name,
@@ -1615,7 +1622,12 @@ int dso__load(struct dso *dso, struct map *map)
goto out;
}
- if (dso->kernel) {
+ kmod = dso->symtab_type == DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE ||
+ dso->symtab_type == DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE_COMP ||
+ dso->symtab_type == DSO_BINARY_TYPE__GUEST_KMODULE ||
+ dso->symtab_type == DSO_BINARY_TYPE__GUEST_KMODULE_COMP;
+
+ if (dso->kernel && !kmod) {
if (dso->kernel == DSO_TYPE_KERNEL)
ret = dso__load_kernel_sym(dso, map);
else if (dso->kernel == DSO_TYPE_GUEST_KERNEL)
@@ -1643,12 +1655,6 @@ int dso__load(struct dso *dso, struct map *map)
if (!name)
goto out;
- kmod = dso->symtab_type == DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE ||
- dso->symtab_type == DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE_COMP ||
- dso->symtab_type == DSO_BINARY_TYPE__GUEST_KMODULE ||
- dso->symtab_type == DSO_BINARY_TYPE__GUEST_KMODULE_COMP;
-
-
/*
* Read the build id if possible. This is required for
* DSO_BINARY_TYPE__BUILDID_DEBUGINFO to work
diff --git a/tools/power/cpupower/utils/idle_monitor/amd_fam14h_idle.c b/tools/power/cpupower/utils/idle_monitor/amd_fam14h_idle.c
index 33dc34d..20f4634 100644
--- a/tools/power/cpupower/utils/idle_monitor/amd_fam14h_idle.c
+++ b/tools/power/cpupower/utils/idle_monitor/amd_fam14h_idle.c
@@ -82,7 +82,7 @@ static struct pci_access *pci_acc;
static struct pci_dev *amd_fam14h_pci_dev;
static int nbp1_entered;
-struct timespec start_time;
+static struct timespec start_time;
static unsigned long long timediff;
#ifdef DEBUG
diff --git a/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c b/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c
index 3c4cee1..a65f7d0 100644
--- a/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c
+++ b/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c
@@ -19,7 +19,7 @@ struct cpuidle_monitor cpuidle_sysfs_monitor;
static unsigned long long **previous_count;
static unsigned long long **current_count;
-struct timespec start_time;
+static struct timespec start_time;
static unsigned long long timediff;
static int cpuidle_get_count_percent(unsigned int id, double *percent,
diff --git a/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c
index 6d44fec..7c77045 100644
--- a/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c
+++ b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c
@@ -27,6 +27,8 @@ struct cpuidle_monitor *all_monitors[] = {
0
};
+int cpu_count;
+
static struct cpuidle_monitor *monitors[MONITORS_MAX];
static unsigned int avail_monitors;
diff --git a/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.h b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.h
index 5b5eb1d..c559d31 100644
--- a/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.h
+++ b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.h
@@ -25,7 +25,7 @@
#endif
#define CSTATE_DESC_LEN 60
-int cpu_count;
+extern int cpu_count;
/* Hard to define the right names ...: */
enum power_range_e {
diff --git a/tools/power/x86/turbostat/Makefile b/tools/power/x86/turbostat/Makefile
index 13f1e8b..2b65512 100644
--- a/tools/power/x86/turbostat/Makefile
+++ b/tools/power/x86/turbostat/Makefile
@@ -16,7 +16,7 @@
%: %.c
@mkdir -p $(BUILD_OUTPUT)
- $(CC) $(CFLAGS) $< -o $(BUILD_OUTPUT)/$@ $(LDFLAGS)
+ $(CC) $(CFLAGS) $< -o $(BUILD_OUTPUT)/$@ $(LDFLAGS) -lcap
.PHONY : clean
clean :
diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 31c1ca0..33b3708 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -30,7 +30,7 @@
#include <sched.h>
#include <time.h>
#include <cpuid.h>
-#include <linux/capability.h>
+#include <sys/capability.h>
#include <errno.h>
#include <math.h>
@@ -304,6 +304,10 @@ int *irqs_per_cpu; /* indexed by cpu_num */
void setup_all_buffers(void);
+char *sys_lpi_file;
+char *sys_lpi_file_sysfs = "/sys/devices/system/cpu/cpuidle/low_power_idle_system_residency_us";
+char *sys_lpi_file_debugfs = "/sys/kernel/debug/pmc_core/slp_s0_residency_usec";
+
int cpu_is_not_present(int cpu)
{
return !CPU_ISSET_S(cpu, cpu_present_setsize, cpu_present_set);
@@ -2916,8 +2920,6 @@ int snapshot_gfx_mhz(void)
*
* record snapshot of
* /sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us
- *
- * return 1 if config change requires a restart, else return 0
*/
int snapshot_cpu_lpi_us(void)
{
@@ -2941,17 +2943,14 @@ int snapshot_cpu_lpi_us(void)
/*
* snapshot_sys_lpi()
*
- * record snapshot of
- * /sys/devices/system/cpu/cpuidle/low_power_idle_system_residency_us
- *
- * return 1 if config change requires a restart, else return 0
+ * record snapshot of sys_lpi_file
*/
int snapshot_sys_lpi_us(void)
{
FILE *fp;
int retval;
- fp = fopen_or_die("/sys/devices/system/cpu/cpuidle/low_power_idle_system_residency_us", "r");
+ fp = fopen_or_die(sys_lpi_file, "r");
retval = fscanf(fp, "%lld", &cpuidle_cur_sys_lpi_us);
if (retval != 1) {
@@ -3151,27 +3150,41 @@ void check_dev_msr()
err(-5, "no /dev/cpu/0/msr, Try \"# modprobe msr\" ");
}
-void check_permissions()
+/*
+ * check for CAP_SYS_RAWIO
+ * return 0 on success
+ * return 1 on fail
+ */
+int check_for_cap_sys_rawio(void)
{
- struct __user_cap_header_struct cap_header_data;
- cap_user_header_t cap_header = &cap_header_data;
- struct __user_cap_data_struct cap_data_data;
- cap_user_data_t cap_data = &cap_data_data;
- extern int capget(cap_user_header_t hdrp, cap_user_data_t datap);
+ cap_t caps;
+ cap_flag_value_t cap_flag_value;
+
+ caps = cap_get_proc();
+ if (caps == NULL)
+ err(-6, "cap_get_proc\n");
+
+ if (cap_get_flag(caps, CAP_SYS_RAWIO, CAP_EFFECTIVE, &cap_flag_value))
+ err(-6, "cap_get\n");
+
+ if (cap_flag_value != CAP_SET) {
+ warnx("capget(CAP_SYS_RAWIO) failed,"
+ " try \"# setcap cap_sys_rawio=ep %s\"", progname);
+ return 1;
+ }
+
+ if (cap_free(caps) == -1)
+ err(-6, "cap_free\n");
+
+ return 0;
+}
+void check_permissions(void)
+{
int do_exit = 0;
char pathname[32];
/* check for CAP_SYS_RAWIO */
- cap_header->pid = getpid();
- cap_header->version = _LINUX_CAPABILITY_VERSION;
- if (capget(cap_header, cap_data) < 0)
- err(-6, "capget(2) failed");
-
- if ((cap_data->effective & (1 << CAP_SYS_RAWIO)) == 0) {
- do_exit++;
- warnx("capget(CAP_SYS_RAWIO) failed,"
- " try \"# setcap cap_sys_rawio=ep %s\"", progname);
- }
+ do_exit += check_for_cap_sys_rawio();
/* test file permissions */
sprintf(pathname, "/dev/cpu/%d/msr", base_cpu);
@@ -3265,6 +3278,7 @@ int probe_nhm_msrs(unsigned int family, unsigned int model)
case INTEL_FAM6_ATOM_GOLDMONT: /* BXT */
case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
case INTEL_FAM6_ATOM_GOLDMONT_D: /* DNV */
+ case INTEL_FAM6_ATOM_TREMONT: /* EHL */
pkg_cstate_limits = glm_pkg_cstate_limits;
break;
default:
@@ -3336,6 +3350,17 @@ int is_skx(unsigned int family, unsigned int model)
}
return 0;
}
+int is_ehl(unsigned int family, unsigned int model)
+{
+ if (!genuine_intel)
+ return 0;
+
+ switch (model) {
+ case INTEL_FAM6_ATOM_TREMONT:
+ return 1;
+ }
+ return 0;
+}
int has_turbo_ratio_limit(unsigned int family, unsigned int model)
{
@@ -3478,6 +3503,23 @@ dump_cstate_pstate_config_info(unsigned int family, unsigned int model)
dump_nhm_cst_cfg();
}
+static void dump_sysfs_file(char *path)
+{
+ FILE *input;
+ char cpuidle_buf[64];
+
+ input = fopen(path, "r");
+ if (input == NULL) {
+ if (debug)
+ fprintf(outf, "NSFOD %s\n", path);
+ return;
+ }
+ if (!fgets(cpuidle_buf, sizeof(cpuidle_buf), input))
+ err(1, "%s: failed to read file", path);
+ fclose(input);
+
+ fprintf(outf, "%s: %s", strrchr(path, '/') + 1, cpuidle_buf);
+}
static void
dump_sysfs_cstate_config(void)
{
@@ -3491,6 +3533,15 @@ dump_sysfs_cstate_config(void)
if (!DO_BIC(BIC_sysfs))
return;
+ if (access("/sys/devices/system/cpu/cpuidle", R_OK)) {
+ fprintf(outf, "cpuidle not loaded\n");
+ return;
+ }
+
+ dump_sysfs_file("/sys/devices/system/cpu/cpuidle/current_driver");
+ dump_sysfs_file("/sys/devices/system/cpu/cpuidle/current_governor");
+ dump_sysfs_file("/sys/devices/system/cpu/cpuidle/current_governor_ro");
+
for (state = 0; state < 10; ++state) {
sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name",
@@ -3894,6 +3945,20 @@ void rapl_probe_intel(unsigned int family, unsigned int model)
else
BIC_PRESENT(BIC_PkgWatt);
break;
+ case INTEL_FAM6_ATOM_TREMONT: /* EHL */
+ do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_GFX | RAPL_PKG_POWER_INFO;
+ if (rapl_joules) {
+ BIC_PRESENT(BIC_Pkg_J);
+ BIC_PRESENT(BIC_Cor_J);
+ BIC_PRESENT(BIC_RAM_J);
+ BIC_PRESENT(BIC_GFX_J);
+ } else {
+ BIC_PRESENT(BIC_PkgWatt);
+ BIC_PRESENT(BIC_CorWatt);
+ BIC_PRESENT(BIC_RAMWatt);
+ BIC_PRESENT(BIC_GFXWatt);
+ }
+ break;
case INTEL_FAM6_SKYLAKE_L: /* SKL */
case INTEL_FAM6_CANNONLAKE_L: /* CNL */
do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_GFX | RAPL_PKG_POWER_INFO;
@@ -4295,6 +4360,7 @@ int has_snb_msrs(unsigned int family, unsigned int model)
case INTEL_FAM6_ATOM_GOLDMONT: /* BXT */
case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
case INTEL_FAM6_ATOM_GOLDMONT_D: /* DNV */
+ case INTEL_FAM6_ATOM_TREMONT: /* EHL */
return 1;
}
return 0;
@@ -4324,6 +4390,7 @@ int has_c8910_msrs(unsigned int family, unsigned int model)
case INTEL_FAM6_CANNONLAKE_L: /* CNL */
case INTEL_FAM6_ATOM_GOLDMONT: /* BXT */
case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
+ case INTEL_FAM6_ATOM_TREMONT: /* EHL */
return 1;
}
return 0;
@@ -4610,14 +4677,24 @@ unsigned int intel_model_duplicates(unsigned int model)
case INTEL_FAM6_SKYLAKE:
case INTEL_FAM6_KABYLAKE_L:
case INTEL_FAM6_KABYLAKE:
+ case INTEL_FAM6_COMETLAKE_L:
+ case INTEL_FAM6_COMETLAKE:
return INTEL_FAM6_SKYLAKE_L;
case INTEL_FAM6_ICELAKE_L:
case INTEL_FAM6_ICELAKE_NNPI:
+ case INTEL_FAM6_TIGERLAKE_L:
+ case INTEL_FAM6_TIGERLAKE:
return INTEL_FAM6_CANNONLAKE_L;
case INTEL_FAM6_ATOM_TREMONT_D:
return INTEL_FAM6_ATOM_GOLDMONT_D;
+
+ case INTEL_FAM6_ATOM_TREMONT_L:
+ return INTEL_FAM6_ATOM_TREMONT;
+
+ case INTEL_FAM6_ICELAKE_X:
+ return INTEL_FAM6_SKYLAKE_X;
}
return model;
}
@@ -4872,7 +4949,8 @@ void process_cpuid()
do_slm_cstates = is_slm(family, model);
do_knl_cstates = is_knl(family, model);
- if (do_slm_cstates || do_knl_cstates || is_cnl(family, model))
+ if (do_slm_cstates || do_knl_cstates || is_cnl(family, model) ||
+ is_ehl(family, model))
BIC_NOT_PRESENT(BIC_CPU_c3);
if (!quiet)
@@ -4907,10 +4985,16 @@ void process_cpuid()
else
BIC_NOT_PRESENT(BIC_CPU_LPI);
- if (!access("/sys/devices/system/cpu/cpuidle/low_power_idle_system_residency_us", R_OK))
+ if (!access(sys_lpi_file_sysfs, R_OK)) {
+ sys_lpi_file = sys_lpi_file_sysfs;
BIC_PRESENT(BIC_SYS_LPI);
- else
+ } else if (!access(sys_lpi_file_debugfs, R_OK)) {
+ sys_lpi_file = sys_lpi_file_debugfs;
+ BIC_PRESENT(BIC_SYS_LPI);
+ } else {
+ sys_lpi_file_sysfs = NULL;
BIC_NOT_PRESENT(BIC_SYS_LPI);
+ }
if (!quiet)
decode_misc_feature_control();
@@ -5306,7 +5390,7 @@ int get_and_dump_counters(void)
}
void print_version() {
- fprintf(outf, "turbostat version 19.08.31"
+ fprintf(outf, "turbostat version 20.03.20"
" - Len Brown <lenb@kernel.org>\n");
}
@@ -5323,9 +5407,9 @@ int add_counter(unsigned int msr_num, char *path, char *name,
}
msrp->msr_num = msr_num;
- strncpy(msrp->name, name, NAME_BYTES);
+ strncpy(msrp->name, name, NAME_BYTES - 1);
if (path)
- strncpy(msrp->path, path, PATH_BYTES);
+ strncpy(msrp->path, path, PATH_BYTES - 1);
msrp->width = width;
msrp->type = type;
msrp->format = format;
diff --git a/tools/scripts/Makefile.include b/tools/scripts/Makefile.include
index ded7a95..6d2f3a1 100644
--- a/tools/scripts/Makefile.include
+++ b/tools/scripts/Makefile.include
@@ -1,8 +1,8 @@
# SPDX-License-Identifier: GPL-2.0
ifneq ($(O),)
ifeq ($(origin O), command line)
- dummy := $(if $(shell test -d $(O) || echo $(O)),$(error O=$(O) does not exist),)
- ABSOLUTE_O := $(shell cd $(O) ; pwd)
+ dummy := $(if $(shell cd $(PWD); test -d $(O) || echo $(O)),$(error O=$(O) does not exist),)
+ ABSOLUTE_O := $(shell cd $(PWD); cd $(O) ; pwd)
OUTPUT := $(ABSOLUTE_O)/$(if $(subdir),$(subdir)/)
COMMAND_O := O=$(ABSOLUTE_O)
ifeq ($(objtree),)
diff --git a/tools/testing/ktest/ktest.pl b/tools/testing/ktest/ktest.pl
index 220d04f..7570e36 100755
--- a/tools/testing/ktest/ktest.pl
+++ b/tools/testing/ktest/ktest.pl
@@ -30,7 +30,7 @@
"EMAIL_WHEN_STARTED" => 0,
"NUM_TESTS" => 1,
"TEST_TYPE" => "build",
- "BUILD_TYPE" => "randconfig",
+ "BUILD_TYPE" => "oldconfig",
"MAKE_CMD" => "make",
"CLOSE_CONSOLE_SIGNAL" => "INT",
"TIMEOUT" => 120,
@@ -1030,7 +1030,7 @@
}
if (!$skip && $rest !~ /^\s*$/) {
- die "$name: $.: Gargbage found after $type\n$_";
+ die "$name: $.: Garbage found after $type\n$_";
}
if ($skip && $type eq "TEST_START") {
@@ -1063,7 +1063,7 @@
}
if ($rest !~ /^\s*$/) {
- die "$name: $.: Gargbage found after DEFAULTS\n$_";
+ die "$name: $.: Garbage found after DEFAULTS\n$_";
}
} elsif (/^\s*INCLUDE\s+(\S+)/) {
@@ -1154,7 +1154,7 @@
# on of these sections that have SKIP defined.
# The save variable can be
# defined multiple times and the new one simply overrides
- # the prevous one.
+ # the previous one.
set_variable($lvalue, $rvalue);
} else {
@@ -1234,7 +1234,7 @@
foreach my $option (keys %not_used) {
print "$option\n";
}
- print "Set IGRNORE_UNUSED = 1 to have ktest ignore unused variables\n";
+ print "Set IGNORE_UNUSED = 1 to have ktest ignore unused variables\n";
if (!read_yn "Do you want to continue?") {
exit -1;
}
@@ -1345,7 +1345,7 @@
# Check for recursive evaluations.
# 100 deep should be more than enough.
if ($r++ > 100) {
- die "Over 100 evaluations accurred with $option\n" .
+ die "Over 100 evaluations occurred with $option\n" .
"Check for recursive variables\n";
}
$prev = $option;
@@ -1383,7 +1383,7 @@
} else {
# Make sure everything has been written to disk
- run_ssh("sync");
+ run_ssh("sync", 10);
if (defined($time)) {
start_monitor;
@@ -1461,7 +1461,7 @@
sub dodie {
- # avoid recusion
+ # avoid recursion
return if ($in_die);
$in_die = 1;
diff --git a/tools/testing/ktest/sample.conf b/tools/testing/ktest/sample.conf
index c3bc933..27666b8 100644
--- a/tools/testing/ktest/sample.conf
+++ b/tools/testing/ktest/sample.conf
@@ -10,7 +10,7 @@
#
# Options set in the beginning of the file are considered to be
-# default options. These options can be overriden by test specific
+# default options. These options can be overridden by test specific
# options, with the following exceptions:
#
# LOG_FILE
@@ -204,7 +204,7 @@
#
# This config file can also contain "config variables".
# These are assigned with ":=" instead of the ktest option
-# assigment "=".
+# assignment "=".
#
# The difference between ktest options and config variables
# is that config variables can be used multiple times,
@@ -263,7 +263,7 @@
#### Using options in other options ####
#
# Options that are defined in the config file may also be used
-# by other options. All options are evaulated at time of
+# by other options. All options are evaluated at time of
# use (except that config variables are evaluated at config
# processing time).
#
@@ -505,7 +505,7 @@
#TEST = ssh user@machine /root/run_test
# The build type is any make config type or special command
-# (default randconfig)
+# (default oldconfig)
# nobuild - skip the clean and build step
# useconfig:/path/to/config - use the given config and run
# oldconfig on it.
@@ -707,7 +707,7 @@
# Line to define a successful boot up in console output.
# This is what the line contains, not the entire line. If you need
-# the entire line to match, then use regural expression syntax like:
+# the entire line to match, then use regular expression syntax like:
# (do not add any quotes around it)
#
# SUCCESS_LINE = ^MyBox Login:$
@@ -839,7 +839,7 @@
# (ignored if POWEROFF_ON_SUCCESS is set)
#REBOOT_ON_SUCCESS = 1
-# In case there are isses with rebooting, you can specify this
+# In case there are issues with rebooting, you can specify this
# to always powercycle after this amount of time after calling
# reboot.
# Note, POWERCYCLE_AFTER_REBOOT = 0 does NOT disable it. It just
@@ -848,7 +848,7 @@
# (default undefined)
#POWERCYCLE_AFTER_REBOOT = 5
-# In case there's isses with halting, you can specify this
+# In case there's issues with halting, you can specify this
# to always poweroff after this amount of time after calling
# halt.
# Note, POWEROFF_AFTER_HALT = 0 does NOT disable it. It just
@@ -972,7 +972,7 @@
#
# PATCHCHECK_START is required and is the first patch to
# test (the SHA1 of the commit). You may also specify anything
-# that git checkout allows (branch name, tage, HEAD~3).
+# that git checkout allows (branch name, tag, HEAD~3).
#
# PATCHCHECK_END is the last patch to check (default HEAD)
#
@@ -994,7 +994,7 @@
# IGNORE_WARNINGS is set for the given commit's sha1
#
# IGNORE_WARNINGS can be used to disable the failure of patchcheck
-# on a particuler commit (SHA1). You can add more than one commit
+# on a particular commit (SHA1). You can add more than one commit
# by adding a list of SHA1s that are space delimited.
#
# If BUILD_NOCLEAN is set, then make mrproper will not be run on
@@ -1093,7 +1093,7 @@
# whatever reason. (Can't reboot, want to inspect each iteration)
# Doing a BISECT_MANUAL will have the test wait for you to
# tell it if the test passed or failed after each iteration.
-# This is basicall the same as running git bisect yourself
+# This is basically the same as running git bisect yourself
# but ktest will rebuild and install the kernel for you.
#
# BISECT_CHECK = 1 (optional, default 0)
@@ -1239,7 +1239,7 @@
#
# CONFIG_BISECT_EXEC (optional)
# The config bisect is a separate program that comes with ktest.pl.
-# By befault, it will look for:
+# By default, it will look for:
# `pwd`/config-bisect.pl # the location ktest.pl was executed from.
# If it does not find it there, it will look for:
# `dirname <ktest.pl>`/config-bisect.pl # The directory that holds ktest.pl
diff --git a/tools/testing/kunit/kunit.py b/tools/testing/kunit/kunit.py
index e59eb9e..180ad1e 100755
--- a/tools/testing/kunit/kunit.py
+++ b/tools/testing/kunit/kunit.py
@@ -24,6 +24,8 @@
KunitRequest = namedtuple('KunitRequest', ['raw_output','timeout', 'jobs', 'build_dir', 'defconfig'])
+KernelDirectoryPath = sys.argv[0].split('tools/testing/kunit/')[0]
+
class KunitStatus(Enum):
SUCCESS = auto()
CONFIG_FAILURE = auto()
@@ -35,6 +37,13 @@
shutil.copyfile('arch/um/configs/kunit_defconfig',
kunit_kernel.kunitconfig_path)
+def get_kernel_root_path():
+ parts = sys.argv[0] if not __file__ else __file__
+ parts = os.path.realpath(parts).split('tools/testing/kunit')
+ if len(parts) != 2:
+ sys.exit(1)
+ return parts[0]
+
def run_tests(linux: kunit_kernel.LinuxSourceTree,
request: KunitRequest) -> KunitResult:
config_start = time.time()
@@ -114,6 +123,9 @@
cli_args = parser.parse_args(argv)
if cli_args.subcommand == 'run':
+ if get_kernel_root_path():
+ os.chdir(get_kernel_root_path())
+
if cli_args.build_dir:
if not os.path.exists(cli_args.build_dir):
os.mkdir(cli_args.build_dir)
diff --git a/tools/testing/kunit/kunit_kernel.py b/tools/testing/kunit/kunit_kernel.py
index cc5d844..d99ae75 100644
--- a/tools/testing/kunit/kunit_kernel.py
+++ b/tools/testing/kunit/kunit_kernel.py
@@ -93,6 +93,20 @@
return False
return True
+ def validate_config(self, build_dir):
+ kconfig_path = get_kconfig_path(build_dir)
+ validated_kconfig = kunit_config.Kconfig()
+ validated_kconfig.read_from_file(kconfig_path)
+ if not self._kconfig.is_subset_of(validated_kconfig):
+ invalid = self._kconfig.entries() - validated_kconfig.entries()
+ message = 'Provided Kconfig is not contained in validated .config. Following fields found in kunitconfig, ' \
+ 'but not in .config: %s' % (
+ ', '.join([str(e) for e in invalid])
+ )
+ logging.error(message)
+ return False
+ return True
+
def build_config(self, build_dir):
kconfig_path = get_kconfig_path(build_dir)
if build_dir and not os.path.exists(build_dir):
@@ -103,12 +117,7 @@
except ConfigError as e:
logging.error(e)
return False
- validated_kconfig = kunit_config.Kconfig()
- validated_kconfig.read_from_file(kconfig_path)
- if not self._kconfig.is_subset_of(validated_kconfig):
- logging.error('Provided Kconfig is not contained in validated .config!')
- return False
- return True
+ return self.validate_config(build_dir)
def build_reconfig(self, build_dir):
"""Creates a new .config if it is not a subset of the .kunitconfig."""
@@ -133,12 +142,7 @@
except (ConfigError, BuildError) as e:
logging.error(e)
return False
- used_kconfig = kunit_config.Kconfig()
- used_kconfig.read_from_file(get_kconfig_path(build_dir))
- if not self._kconfig.is_subset_of(used_kconfig):
- logging.error('Provided Kconfig is not contained in final config!')
- return False
- return True
+ return self.validate_config(build_dir)
def run_kernel(self, args=[], timeout=None, build_dir=''):
args.extend(['mem=256M'])
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index 63430e2..b93fa64 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -33,6 +33,7 @@
TARGETS += mount
TARGETS += mqueue
TARGETS += net
+TARGETS += net/forwarding
TARGETS += net/mptcp
TARGETS += netfilter
TARGETS += networking/timestamping
@@ -77,6 +78,12 @@
override TARGETS := $(TMP)
endif
+# User can set FORCE_TARGETS to 1 to require all targets to be successfully
+# built; make will fail if any of the targets cannot be built. If
+# FORCE_TARGETS is not set (the default), make will succeed if at least one
+# of the targets gets built.
+FORCE_TARGETS ?=
+
# Clear LDFLAGS and MAKEFLAGS if called from main
# Makefile to avoid test build failures when test
# Makefile doesn't have explicit build rules.
@@ -151,7 +158,8 @@
for TARGET in $(TARGETS); do \
BUILD_TARGET=$$BUILD/$$TARGET; \
mkdir $$BUILD_TARGET -p; \
- $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET; \
+ $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET \
+ $(if $(FORCE_TARGETS),|| exit); \
ret=$$((ret * $$?)); \
done; exit $$ret;
@@ -205,7 +213,8 @@
@ret=1; \
for TARGET in $(TARGETS); do \
BUILD_TARGET=$$BUILD/$$TARGET; \
- $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET INSTALL_PATH=$(INSTALL_PATH)/$$TARGET install; \
+ $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET INSTALL_PATH=$(INSTALL_PATH)/$$TARGET install \
+ $(if $(FORCE_TARGETS),|| exit); \
ret=$$((ret * $$?)); \
done; exit $$ret;
diff --git a/tools/testing/selftests/bpf/prog_tests/select_reuseport.c b/tools/testing/selftests/bpf/prog_tests/select_reuseport.c
index 098bcae..0800036 100644
--- a/tools/testing/selftests/bpf/prog_tests/select_reuseport.c
+++ b/tools/testing/selftests/bpf/prog_tests/select_reuseport.c
@@ -506,8 +506,10 @@ static void test_syncookie(int type, sa_family_t family)
.pass_on_failure = 0,
};
- if (type != SOCK_STREAM)
+ if (type != SOCK_STREAM) {
+ test__skip();
return;
+ }
/*
* +1 for TCP-SYN and
@@ -822,8 +824,10 @@ void test_select_reuseport(void)
goto out;
saved_tcp_fo = read_int_sysctl(TCP_FO_SYSCTL);
+ if (saved_tcp_fo < 0)
+ goto out;
saved_tcp_syncookie = read_int_sysctl(TCP_SYNCOOKIE_SYSCTL);
- if (saved_tcp_syncookie < 0 || saved_tcp_syncookie < 0)
+ if (saved_tcp_syncookie < 0)
goto out;
if (enable_fastopen())
diff --git a/tools/testing/selftests/bpf/prog_tests/send_signal_sched_switch.c b/tools/testing/selftests/bpf/prog_tests/send_signal_sched_switch.c
new file mode 100644
index 0000000..189a34a
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/send_signal_sched_switch.c
@@ -0,0 +1,60 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+#include <pthread.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include "test_send_signal_kern.skel.h"
+
+static void sigusr1_handler(int signum)
+{
+}
+
+#define THREAD_COUNT 100
+
+static void *worker(void *p)
+{
+ int i;
+
+ for ( i = 0; i < 1000; i++)
+ usleep(1);
+
+ return NULL;
+}
+
+void test_send_signal_sched_switch(void)
+{
+ struct test_send_signal_kern *skel;
+ pthread_t threads[THREAD_COUNT];
+ u32 duration = 0;
+ int i, err;
+
+ signal(SIGUSR1, sigusr1_handler);
+
+ skel = test_send_signal_kern__open_and_load();
+ if (CHECK(!skel, "skel_open_and_load", "skeleton open_and_load failed\n"))
+ return;
+
+ skel->bss->pid = getpid();
+ skel->bss->sig = SIGUSR1;
+
+ err = test_send_signal_kern__attach(skel);
+ if (CHECK(err, "skel_attach", "skeleton attach failed\n"))
+ goto destroy_skel;
+
+ for (i = 0; i < THREAD_COUNT; i++) {
+ err = pthread_create(threads + i, NULL, worker, NULL);
+ if (CHECK(err, "pthread_create", "Error creating thread, %s\n",
+ strerror(errno)))
+ goto destroy_skel;
+ }
+
+ for (i = 0; i < THREAD_COUNT; i++)
+ pthread_join(threads[i], NULL);
+
+destroy_skel:
+ test_send_signal_kern__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
index 07f5b46..aa43e0b 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
@@ -3,6 +3,11 @@
#include "test_progs.h"
+#define TCP_REPAIR 19 /* TCP sock is under repair right now */
+
+#define TCP_REPAIR_ON 1
+#define TCP_REPAIR_OFF_NO_WP -1 /* Turn off without window probes */
+
static int connected_socket_v4(void)
{
struct sockaddr_in addr = {
diff --git a/tools/testing/selftests/bpf/progs/test_send_signal_kern.c b/tools/testing/selftests/bpf/progs/test_send_signal_kern.c
index 1acc91e..b4233d3 100644
--- a/tools/testing/selftests/bpf/progs/test_send_signal_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_send_signal_kern.c
@@ -31,6 +31,12 @@ int send_signal_tp(void *ctx)
return bpf_send_signal_test(ctx);
}
+SEC("tracepoint/sched/sched_switch")
+int send_signal_tp_sched(void *ctx)
+{
+ return bpf_send_signal_test(ctx);
+}
+
SEC("perf_event")
int send_signal_perf(void *ctx)
{
diff --git a/tools/testing/selftests/bpf/test_btf.c b/tools/testing/selftests/bpf/test_btf.c
index 93040ca..8da77cd 100644
--- a/tools/testing/selftests/bpf/test_btf.c
+++ b/tools/testing/selftests/bpf/test_btf.c
@@ -1062,6 +1062,48 @@ static struct btf_raw_test raw_tests[] = {
.err_str = "Member exceeds struct_size",
},
+/* Test member unexceeds the size of struct
+ *
+ * enum E {
+ * E0,
+ * E1,
+ * };
+ *
+ * struct A {
+ * char m;
+ * enum E __attribute__((packed)) n;
+ * };
+ */
+{
+ .descr = "size check test #5",
+ .raw_types = {
+ /* int */ /* [1] */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, sizeof(int)),
+ /* char */ /* [2] */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 8, 1),
+ /* enum E { */ /* [3] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_ENUM, 0, 2), 1),
+ BTF_ENUM_ENC(NAME_TBD, 0),
+ BTF_ENUM_ENC(NAME_TBD, 1),
+ /* } */
+ /* struct A { */ /* [4] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), 2),
+ BTF_MEMBER_ENC(NAME_TBD, 2, 0), /* char m; */
+ BTF_MEMBER_ENC(NAME_TBD, 3, 8),/* enum E __attribute__((packed)) n; */
+ /* } */
+ BTF_END_RAW,
+ },
+ .str_sec = "\0E\0E0\0E1\0A\0m\0n",
+ .str_sec_size = sizeof("\0E\0E0\0E1\0A\0m\0n"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "size_check5_map",
+ .key_size = sizeof(int),
+ .value_size = 2,
+ .key_type_id = 1,
+ .value_type_id = 4,
+ .max_entries = 4,
+},
+
/* typedef const void * const_void_ptr;
* struct A {
* const_void_ptr m;
diff --git a/tools/testing/selftests/bpf/verifier/jmp32.c b/tools/testing/selftests/bpf/verifier/jmp32.c
index bf0322e..bd5cae4 100644
--- a/tools/testing/selftests/bpf/verifier/jmp32.c
+++ b/tools/testing/selftests/bpf/verifier/jmp32.c
@@ -62,6 +62,21 @@
.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
},
{
+ "jset32: ignores upper bits",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_LD_IMM64(BPF_REG_7, 0x8000000000000000),
+ BPF_LD_IMM64(BPF_REG_8, 0x8000000000000000),
+ BPF_JMP_REG(BPF_JSET, BPF_REG_7, BPF_REG_8, 1),
+ BPF_EXIT_INSN(),
+ BPF_JMP32_REG(BPF_JSET, BPF_REG_7, BPF_REG_8, 1),
+ BPF_MOV64_IMM(BPF_REG_0, 2),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .retval = 2,
+},
+{
"jset32: min/max deduction",
.insns = {
BPF_RAND_UEXT_R7,
diff --git a/tools/testing/selftests/ftrace/Makefile b/tools/testing/selftests/ftrace/Makefile
index cd1f5b3..d6e106f 100644
--- a/tools/testing/selftests/ftrace/Makefile
+++ b/tools/testing/selftests/ftrace/Makefile
@@ -2,7 +2,7 @@
all:
TEST_PROGS := ftracetest
-TEST_FILES := test.d
+TEST_FILES := test.d settings
EXTRA_CLEAN := $(OUTPUT)/logs/*
include ../lib.mk
diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc
index 64cfcc7..f2ee1e8 100644
--- a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc
@@ -1,6 +1,7 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
# description: ftrace - function pid filters
+# flags: instance
# Make sure that function pid matching filter works.
# Also test it on an instance directory
@@ -96,13 +97,6 @@
}
do_test
-
-mkdir instances/foo
-cd instances/foo
-do_test
-cd ../../
-rmdir instances/foo
-
do_reset
exit 0
diff --git a/tools/testing/selftests/futex/functional/Makefile b/tools/testing/selftests/futex/functional/Makefile
index 30996306..2320782 100644
--- a/tools/testing/selftests/futex/functional/Makefile
+++ b/tools/testing/selftests/futex/functional/Makefile
@@ -1,7 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
INCLUDES := -I../include -I../../
CFLAGS := $(CFLAGS) -g -O2 -Wall -D_GNU_SOURCE -pthread $(INCLUDES)
-LDFLAGS := $(LDFLAGS) -pthread -lrt
+LDLIBS := -lpthread -lrt
HEADERS := \
../include/futextest.h \
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index 67abc1d..d91c53b7 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -8,7 +8,7 @@
UNAME_M := $(shell uname -m)
LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/sparsebit.c
-LIBKVM_x86_64 = lib/x86_64/processor.c lib/x86_64/vmx.c lib/x86_64/ucall.c
+LIBKVM_x86_64 = lib/x86_64/processor.c lib/x86_64/vmx.c lib/x86_64/svm.c lib/x86_64/ucall.c
LIBKVM_aarch64 = lib/aarch64/processor.c lib/aarch64/ucall.c
LIBKVM_s390x = lib/s390x/processor.c lib/s390x/ucall.c
@@ -26,6 +26,7 @@
TEST_GEN_PROGS_x86_64 += x86_64/vmx_set_nested_state_test
TEST_GEN_PROGS_x86_64 += x86_64/vmx_tsc_adjust_test
TEST_GEN_PROGS_x86_64 += x86_64/xss_msr_test
+TEST_GEN_PROGS_x86_64 += x86_64/svm_vmcall_test
TEST_GEN_PROGS_x86_64 += clear_dirty_log_test
TEST_GEN_PROGS_x86_64 += dirty_log_test
TEST_GEN_PROGS_x86_64 += kvm_create_max_vcpus
diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h
index aa6451b..7428513 100644
--- a/tools/testing/selftests/kvm/include/x86_64/processor.h
+++ b/tools/testing/selftests/kvm/include/x86_64/processor.h
@@ -36,24 +36,24 @@
#define X86_CR4_SMAP (1ul << 21)
#define X86_CR4_PKE (1ul << 22)
-/* The enum values match the intruction encoding of each register */
-enum x86_register {
- RAX = 0,
- RCX,
- RDX,
- RBX,
- RSP,
- RBP,
- RSI,
- RDI,
- R8,
- R9,
- R10,
- R11,
- R12,
- R13,
- R14,
- R15,
+/* General Registers in 64-Bit Mode */
+struct gpr64_regs {
+ u64 rax;
+ u64 rcx;
+ u64 rdx;
+ u64 rbx;
+ u64 rsp;
+ u64 rbp;
+ u64 rsi;
+ u64 rdi;
+ u64 r8;
+ u64 r9;
+ u64 r10;
+ u64 r11;
+ u64 r12;
+ u64 r13;
+ u64 r14;
+ u64 r15;
};
struct desc64 {
@@ -220,20 +220,20 @@ static inline void set_cr4(uint64_t val)
__asm__ __volatile__("mov %0, %%cr4" : : "r" (val) : "memory");
}
-static inline uint64_t get_gdt_base(void)
+static inline struct desc_ptr get_gdt(void)
{
struct desc_ptr gdt;
__asm__ __volatile__("sgdt %[gdt]"
: /* output */ [gdt]"=m"(gdt));
- return gdt.address;
+ return gdt;
}
-static inline uint64_t get_idt_base(void)
+static inline struct desc_ptr get_idt(void)
{
struct desc_ptr idt;
__asm__ __volatile__("sidt %[idt]"
: /* output */ [idt]"=m"(idt));
- return idt.address;
+ return idt;
}
#define SET_XMM(__var, __xmm) \
diff --git a/tools/testing/selftests/kvm/include/x86_64/svm.h b/tools/testing/selftests/kvm/include/x86_64/svm.h
new file mode 100644
index 0000000..f4ea235
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/x86_64/svm.h
@@ -0,0 +1,297 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * tools/testing/selftests/kvm/include/x86_64/svm.h
+ * This is a copy of arch/x86/include/asm/svm.h
+ *
+ */
+
+#ifndef SELFTEST_KVM_SVM_H
+#define SELFTEST_KVM_SVM_H
+
+enum {
+ INTERCEPT_INTR,
+ INTERCEPT_NMI,
+ INTERCEPT_SMI,
+ INTERCEPT_INIT,
+ INTERCEPT_VINTR,
+ INTERCEPT_SELECTIVE_CR0,
+ INTERCEPT_STORE_IDTR,
+ INTERCEPT_STORE_GDTR,
+ INTERCEPT_STORE_LDTR,
+ INTERCEPT_STORE_TR,
+ INTERCEPT_LOAD_IDTR,
+ INTERCEPT_LOAD_GDTR,
+ INTERCEPT_LOAD_LDTR,
+ INTERCEPT_LOAD_TR,
+ INTERCEPT_RDTSC,
+ INTERCEPT_RDPMC,
+ INTERCEPT_PUSHF,
+ INTERCEPT_POPF,
+ INTERCEPT_CPUID,
+ INTERCEPT_RSM,
+ INTERCEPT_IRET,
+ INTERCEPT_INTn,
+ INTERCEPT_INVD,
+ INTERCEPT_PAUSE,
+ INTERCEPT_HLT,
+ INTERCEPT_INVLPG,
+ INTERCEPT_INVLPGA,
+ INTERCEPT_IOIO_PROT,
+ INTERCEPT_MSR_PROT,
+ INTERCEPT_TASK_SWITCH,
+ INTERCEPT_FERR_FREEZE,
+ INTERCEPT_SHUTDOWN,
+ INTERCEPT_VMRUN,
+ INTERCEPT_VMMCALL,
+ INTERCEPT_VMLOAD,
+ INTERCEPT_VMSAVE,
+ INTERCEPT_STGI,
+ INTERCEPT_CLGI,
+ INTERCEPT_SKINIT,
+ INTERCEPT_RDTSCP,
+ INTERCEPT_ICEBP,
+ INTERCEPT_WBINVD,
+ INTERCEPT_MONITOR,
+ INTERCEPT_MWAIT,
+ INTERCEPT_MWAIT_COND,
+ INTERCEPT_XSETBV,
+ INTERCEPT_RDPRU,
+};
+
+
+struct __attribute__ ((__packed__)) vmcb_control_area {
+ u32 intercept_cr;
+ u32 intercept_dr;
+ u32 intercept_exceptions;
+ u64 intercept;
+ u8 reserved_1[40];
+ u16 pause_filter_thresh;
+ u16 pause_filter_count;
+ u64 iopm_base_pa;
+ u64 msrpm_base_pa;
+ u64 tsc_offset;
+ u32 asid;
+ u8 tlb_ctl;
+ u8 reserved_2[3];
+ u32 int_ctl;
+ u32 int_vector;
+ u32 int_state;
+ u8 reserved_3[4];
+ u32 exit_code;
+ u32 exit_code_hi;
+ u64 exit_info_1;
+ u64 exit_info_2;
+ u32 exit_int_info;
+ u32 exit_int_info_err;
+ u64 nested_ctl;
+ u64 avic_vapic_bar;
+ u8 reserved_4[8];
+ u32 event_inj;
+ u32 event_inj_err;
+ u64 nested_cr3;
+ u64 virt_ext;
+ u32 clean;
+ u32 reserved_5;
+ u64 next_rip;
+ u8 insn_len;
+ u8 insn_bytes[15];
+ u64 avic_backing_page; /* Offset 0xe0 */
+ u8 reserved_6[8]; /* Offset 0xe8 */
+ u64 avic_logical_id; /* Offset 0xf0 */
+ u64 avic_physical_id; /* Offset 0xf8 */
+ u8 reserved_7[768];
+};
+
+
+#define TLB_CONTROL_DO_NOTHING 0
+#define TLB_CONTROL_FLUSH_ALL_ASID 1
+#define TLB_CONTROL_FLUSH_ASID 3
+#define TLB_CONTROL_FLUSH_ASID_LOCAL 7
+
+#define V_TPR_MASK 0x0f
+
+#define V_IRQ_SHIFT 8
+#define V_IRQ_MASK (1 << V_IRQ_SHIFT)
+
+#define V_GIF_SHIFT 9
+#define V_GIF_MASK (1 << V_GIF_SHIFT)
+
+#define V_INTR_PRIO_SHIFT 16
+#define V_INTR_PRIO_MASK (0x0f << V_INTR_PRIO_SHIFT)
+
+#define V_IGN_TPR_SHIFT 20
+#define V_IGN_TPR_MASK (1 << V_IGN_TPR_SHIFT)
+
+#define V_INTR_MASKING_SHIFT 24
+#define V_INTR_MASKING_MASK (1 << V_INTR_MASKING_SHIFT)
+
+#define V_GIF_ENABLE_SHIFT 25
+#define V_GIF_ENABLE_MASK (1 << V_GIF_ENABLE_SHIFT)
+
+#define AVIC_ENABLE_SHIFT 31
+#define AVIC_ENABLE_MASK (1 << AVIC_ENABLE_SHIFT)
+
+#define LBR_CTL_ENABLE_MASK BIT_ULL(0)
+#define VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK BIT_ULL(1)
+
+#define SVM_INTERRUPT_SHADOW_MASK 1
+
+#define SVM_IOIO_STR_SHIFT 2
+#define SVM_IOIO_REP_SHIFT 3
+#define SVM_IOIO_SIZE_SHIFT 4
+#define SVM_IOIO_ASIZE_SHIFT 7
+
+#define SVM_IOIO_TYPE_MASK 1
+#define SVM_IOIO_STR_MASK (1 << SVM_IOIO_STR_SHIFT)
+#define SVM_IOIO_REP_MASK (1 << SVM_IOIO_REP_SHIFT)
+#define SVM_IOIO_SIZE_MASK (7 << SVM_IOIO_SIZE_SHIFT)
+#define SVM_IOIO_ASIZE_MASK (7 << SVM_IOIO_ASIZE_SHIFT)
+
+#define SVM_VM_CR_VALID_MASK 0x001fULL
+#define SVM_VM_CR_SVM_LOCK_MASK 0x0008ULL
+#define SVM_VM_CR_SVM_DIS_MASK 0x0010ULL
+
+#define SVM_NESTED_CTL_NP_ENABLE BIT(0)
+#define SVM_NESTED_CTL_SEV_ENABLE BIT(1)
+
+struct __attribute__ ((__packed__)) vmcb_seg {
+ u16 selector;
+ u16 attrib;
+ u32 limit;
+ u64 base;
+};
+
+struct __attribute__ ((__packed__)) vmcb_save_area {
+ struct vmcb_seg es;
+ struct vmcb_seg cs;
+ struct vmcb_seg ss;
+ struct vmcb_seg ds;
+ struct vmcb_seg fs;
+ struct vmcb_seg gs;
+ struct vmcb_seg gdtr;
+ struct vmcb_seg ldtr;
+ struct vmcb_seg idtr;
+ struct vmcb_seg tr;
+ u8 reserved_1[43];
+ u8 cpl;
+ u8 reserved_2[4];
+ u64 efer;
+ u8 reserved_3[112];
+ u64 cr4;
+ u64 cr3;
+ u64 cr0;
+ u64 dr7;
+ u64 dr6;
+ u64 rflags;
+ u64 rip;
+ u8 reserved_4[88];
+ u64 rsp;
+ u8 reserved_5[24];
+ u64 rax;
+ u64 star;
+ u64 lstar;
+ u64 cstar;
+ u64 sfmask;
+ u64 kernel_gs_base;
+ u64 sysenter_cs;
+ u64 sysenter_esp;
+ u64 sysenter_eip;
+ u64 cr2;
+ u8 reserved_6[32];
+ u64 g_pat;
+ u64 dbgctl;
+ u64 br_from;
+ u64 br_to;
+ u64 last_excp_from;
+ u64 last_excp_to;
+};
+
+struct __attribute__ ((__packed__)) vmcb {
+ struct vmcb_control_area control;
+ struct vmcb_save_area save;
+};
+
+#define SVM_CPUID_FUNC 0x8000000a
+
+#define SVM_VM_CR_SVM_DISABLE 4
+
+#define SVM_SELECTOR_S_SHIFT 4
+#define SVM_SELECTOR_DPL_SHIFT 5
+#define SVM_SELECTOR_P_SHIFT 7
+#define SVM_SELECTOR_AVL_SHIFT 8
+#define SVM_SELECTOR_L_SHIFT 9
+#define SVM_SELECTOR_DB_SHIFT 10
+#define SVM_SELECTOR_G_SHIFT 11
+
+#define SVM_SELECTOR_TYPE_MASK (0xf)
+#define SVM_SELECTOR_S_MASK (1 << SVM_SELECTOR_S_SHIFT)
+#define SVM_SELECTOR_DPL_MASK (3 << SVM_SELECTOR_DPL_SHIFT)
+#define SVM_SELECTOR_P_MASK (1 << SVM_SELECTOR_P_SHIFT)
+#define SVM_SELECTOR_AVL_MASK (1 << SVM_SELECTOR_AVL_SHIFT)
+#define SVM_SELECTOR_L_MASK (1 << SVM_SELECTOR_L_SHIFT)
+#define SVM_SELECTOR_DB_MASK (1 << SVM_SELECTOR_DB_SHIFT)
+#define SVM_SELECTOR_G_MASK (1 << SVM_SELECTOR_G_SHIFT)
+
+#define SVM_SELECTOR_WRITE_MASK (1 << 1)
+#define SVM_SELECTOR_READ_MASK SVM_SELECTOR_WRITE_MASK
+#define SVM_SELECTOR_CODE_MASK (1 << 3)
+
+#define INTERCEPT_CR0_READ 0
+#define INTERCEPT_CR3_READ 3
+#define INTERCEPT_CR4_READ 4
+#define INTERCEPT_CR8_READ 8
+#define INTERCEPT_CR0_WRITE (16 + 0)
+#define INTERCEPT_CR3_WRITE (16 + 3)
+#define INTERCEPT_CR4_WRITE (16 + 4)
+#define INTERCEPT_CR8_WRITE (16 + 8)
+
+#define INTERCEPT_DR0_READ 0
+#define INTERCEPT_DR1_READ 1
+#define INTERCEPT_DR2_READ 2
+#define INTERCEPT_DR3_READ 3
+#define INTERCEPT_DR4_READ 4
+#define INTERCEPT_DR5_READ 5
+#define INTERCEPT_DR6_READ 6
+#define INTERCEPT_DR7_READ 7
+#define INTERCEPT_DR0_WRITE (16 + 0)
+#define INTERCEPT_DR1_WRITE (16 + 1)
+#define INTERCEPT_DR2_WRITE (16 + 2)
+#define INTERCEPT_DR3_WRITE (16 + 3)
+#define INTERCEPT_DR4_WRITE (16 + 4)
+#define INTERCEPT_DR5_WRITE (16 + 5)
+#define INTERCEPT_DR6_WRITE (16 + 6)
+#define INTERCEPT_DR7_WRITE (16 + 7)
+
+#define SVM_EVTINJ_VEC_MASK 0xff
+
+#define SVM_EVTINJ_TYPE_SHIFT 8
+#define SVM_EVTINJ_TYPE_MASK (7 << SVM_EVTINJ_TYPE_SHIFT)
+
+#define SVM_EVTINJ_TYPE_INTR (0 << SVM_EVTINJ_TYPE_SHIFT)
+#define SVM_EVTINJ_TYPE_NMI (2 << SVM_EVTINJ_TYPE_SHIFT)
+#define SVM_EVTINJ_TYPE_EXEPT (3 << SVM_EVTINJ_TYPE_SHIFT)
+#define SVM_EVTINJ_TYPE_SOFT (4 << SVM_EVTINJ_TYPE_SHIFT)
+
+#define SVM_EVTINJ_VALID (1 << 31)
+#define SVM_EVTINJ_VALID_ERR (1 << 11)
+
+#define SVM_EXITINTINFO_VEC_MASK SVM_EVTINJ_VEC_MASK
+#define SVM_EXITINTINFO_TYPE_MASK SVM_EVTINJ_TYPE_MASK
+
+#define SVM_EXITINTINFO_TYPE_INTR SVM_EVTINJ_TYPE_INTR
+#define SVM_EXITINTINFO_TYPE_NMI SVM_EVTINJ_TYPE_NMI
+#define SVM_EXITINTINFO_TYPE_EXEPT SVM_EVTINJ_TYPE_EXEPT
+#define SVM_EXITINTINFO_TYPE_SOFT SVM_EVTINJ_TYPE_SOFT
+
+#define SVM_EXITINTINFO_VALID SVM_EVTINJ_VALID
+#define SVM_EXITINTINFO_VALID_ERR SVM_EVTINJ_VALID_ERR
+
+#define SVM_EXITINFOSHIFT_TS_REASON_IRET 36
+#define SVM_EXITINFOSHIFT_TS_REASON_JMP 38
+#define SVM_EXITINFOSHIFT_TS_HAS_ERROR_CODE 44
+
+#define SVM_EXITINFO_REG_MASK 0x0F
+
+#define SVM_CR0_SELECTIVE_MASK (X86_CR0_TS | X86_CR0_MP)
+
+#endif /* SELFTEST_KVM_SVM_H */
diff --git a/tools/testing/selftests/kvm/include/x86_64/svm_util.h b/tools/testing/selftests/kvm/include/x86_64/svm_util.h
new file mode 100644
index 0000000..cd03791
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/x86_64/svm_util.h
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * tools/testing/selftests/kvm/include/x86_64/svm_utils.h
+ * Header for nested SVM testing
+ *
+ * Copyright (C) 2020, Red Hat, Inc.
+ */
+
+#ifndef SELFTEST_KVM_SVM_UTILS_H
+#define SELFTEST_KVM_SVM_UTILS_H
+
+#include <stdint.h>
+#include "svm.h"
+#include "processor.h"
+
+#define CPUID_SVM_BIT 2
+#define CPUID_SVM BIT_ULL(CPUID_SVM_BIT)
+
+#define SVM_EXIT_VMMCALL 0x081
+
+struct svm_test_data {
+ /* VMCB */
+ struct vmcb *vmcb; /* gva */
+ void *vmcb_hva;
+ uint64_t vmcb_gpa;
+
+ /* host state-save area */
+ struct vmcb_save_area *save_area; /* gva */
+ void *save_area_hva;
+ uint64_t save_area_gpa;
+};
+
+struct svm_test_data *vcpu_alloc_svm(struct kvm_vm *vm, vm_vaddr_t *p_svm_gva);
+void generic_svm_setup(struct svm_test_data *svm, void *guest_rip, void *guest_rsp);
+void run_guest(struct vmcb *vmcb, uint64_t vmcb_gpa);
+void nested_svm_check_supported(void);
+
+#endif /* SELFTEST_KVM_SVM_UTILS_H */
diff --git a/tools/testing/selftests/kvm/lib/x86_64/svm.c b/tools/testing/selftests/kvm/lib/x86_64/svm.c
new file mode 100644
index 0000000..6e05a8f
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/x86_64/svm.c
@@ -0,0 +1,161 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * tools/testing/selftests/kvm/lib/x86_64/svm.c
+ * Helpers used for nested SVM testing
+ * Largely inspired from KVM unit test svm.c
+ *
+ * Copyright (C) 2020, Red Hat, Inc.
+ */
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "../kvm_util_internal.h"
+#include "processor.h"
+#include "svm_util.h"
+
+struct gpr64_regs guest_regs;
+u64 rflags;
+
+/* Allocate memory regions for nested SVM tests.
+ *
+ * Input Args:
+ * vm - The VM to allocate guest-virtual addresses in.
+ *
+ * Output Args:
+ * p_svm_gva - The guest virtual address for the struct svm_test_data.
+ *
+ * Return:
+ * Pointer to structure with the addresses of the SVM areas.
+ */
+struct svm_test_data *
+vcpu_alloc_svm(struct kvm_vm *vm, vm_vaddr_t *p_svm_gva)
+{
+ vm_vaddr_t svm_gva = vm_vaddr_alloc(vm, getpagesize(),
+ 0x10000, 0, 0);
+ struct svm_test_data *svm = addr_gva2hva(vm, svm_gva);
+
+ svm->vmcb = (void *)vm_vaddr_alloc(vm, getpagesize(),
+ 0x10000, 0, 0);
+ svm->vmcb_hva = addr_gva2hva(vm, (uintptr_t)svm->vmcb);
+ svm->vmcb_gpa = addr_gva2gpa(vm, (uintptr_t)svm->vmcb);
+
+ svm->save_area = (void *)vm_vaddr_alloc(vm, getpagesize(),
+ 0x10000, 0, 0);
+ svm->save_area_hva = addr_gva2hva(vm, (uintptr_t)svm->save_area);
+ svm->save_area_gpa = addr_gva2gpa(vm, (uintptr_t)svm->save_area);
+
+ *p_svm_gva = svm_gva;
+ return svm;
+}
+
+static void vmcb_set_seg(struct vmcb_seg *seg, u16 selector,
+ u64 base, u32 limit, u32 attr)
+{
+ seg->selector = selector;
+ seg->attrib = attr;
+ seg->limit = limit;
+ seg->base = base;
+}
+
+void generic_svm_setup(struct svm_test_data *svm, void *guest_rip, void *guest_rsp)
+{
+ struct vmcb *vmcb = svm->vmcb;
+ uint64_t vmcb_gpa = svm->vmcb_gpa;
+ struct vmcb_save_area *save = &vmcb->save;
+ struct vmcb_control_area *ctrl = &vmcb->control;
+ u32 data_seg_attr = 3 | SVM_SELECTOR_S_MASK | SVM_SELECTOR_P_MASK
+ | SVM_SELECTOR_DB_MASK | SVM_SELECTOR_G_MASK;
+ u32 code_seg_attr = 9 | SVM_SELECTOR_S_MASK | SVM_SELECTOR_P_MASK
+ | SVM_SELECTOR_L_MASK | SVM_SELECTOR_G_MASK;
+ uint64_t efer;
+
+ efer = rdmsr(MSR_EFER);
+ wrmsr(MSR_EFER, efer | EFER_SVME);
+ wrmsr(MSR_VM_HSAVE_PA, svm->save_area_gpa);
+
+ memset(vmcb, 0, sizeof(*vmcb));
+ asm volatile ("vmsave\n\t" : : "a" (vmcb_gpa) : "memory");
+ vmcb_set_seg(&save->es, get_es(), 0, -1U, data_seg_attr);
+ vmcb_set_seg(&save->cs, get_cs(), 0, -1U, code_seg_attr);
+ vmcb_set_seg(&save->ss, get_ss(), 0, -1U, data_seg_attr);
+ vmcb_set_seg(&save->ds, get_ds(), 0, -1U, data_seg_attr);
+ vmcb_set_seg(&save->gdtr, 0, get_gdt().address, get_gdt().size, 0);
+ vmcb_set_seg(&save->idtr, 0, get_idt().address, get_idt().size, 0);
+
+ ctrl->asid = 1;
+ save->cpl = 0;
+ save->efer = rdmsr(MSR_EFER);
+ asm volatile ("mov %%cr4, %0" : "=r"(save->cr4) : : "memory");
+ asm volatile ("mov %%cr3, %0" : "=r"(save->cr3) : : "memory");
+ asm volatile ("mov %%cr0, %0" : "=r"(save->cr0) : : "memory");
+ asm volatile ("mov %%dr7, %0" : "=r"(save->dr7) : : "memory");
+ asm volatile ("mov %%dr6, %0" : "=r"(save->dr6) : : "memory");
+ asm volatile ("mov %%cr2, %0" : "=r"(save->cr2) : : "memory");
+ save->g_pat = rdmsr(MSR_IA32_CR_PAT);
+ save->dbgctl = rdmsr(MSR_IA32_DEBUGCTLMSR);
+ ctrl->intercept = (1ULL << INTERCEPT_VMRUN) |
+ (1ULL << INTERCEPT_VMMCALL);
+
+ vmcb->save.rip = (u64)guest_rip;
+ vmcb->save.rsp = (u64)guest_rsp;
+ guest_regs.rdi = (u64)svm;
+}
+
+/*
+ * save/restore 64-bit general registers except rax, rip, rsp
+ * which are directly handed through the VMCB guest processor state
+ */
+#define SAVE_GPR_C \
+ "xchg %%rbx, guest_regs+0x20\n\t" \
+ "xchg %%rcx, guest_regs+0x10\n\t" \
+ "xchg %%rdx, guest_regs+0x18\n\t" \
+ "xchg %%rbp, guest_regs+0x30\n\t" \
+ "xchg %%rsi, guest_regs+0x38\n\t" \
+ "xchg %%rdi, guest_regs+0x40\n\t" \
+ "xchg %%r8, guest_regs+0x48\n\t" \
+ "xchg %%r9, guest_regs+0x50\n\t" \
+ "xchg %%r10, guest_regs+0x58\n\t" \
+ "xchg %%r11, guest_regs+0x60\n\t" \
+ "xchg %%r12, guest_regs+0x68\n\t" \
+ "xchg %%r13, guest_regs+0x70\n\t" \
+ "xchg %%r14, guest_regs+0x78\n\t" \
+ "xchg %%r15, guest_regs+0x80\n\t"
+
+#define LOAD_GPR_C SAVE_GPR_C
+
+/*
+ * selftests do not use interrupts so we dropped clgi/sti/cli/stgi
+ * for now. registers involved in LOAD/SAVE_GPR_C are eventually
+ * unmodified so they do not need to be in the clobber list.
+ */
+void run_guest(struct vmcb *vmcb, uint64_t vmcb_gpa)
+{
+ asm volatile (
+ "vmload\n\t"
+ "mov rflags, %%r15\n\t" // rflags
+ "mov %%r15, 0x170(%[vmcb])\n\t"
+ "mov guest_regs, %%r15\n\t" // rax
+ "mov %%r15, 0x1f8(%[vmcb])\n\t"
+ LOAD_GPR_C
+ "vmrun\n\t"
+ SAVE_GPR_C
+ "mov 0x170(%[vmcb]), %%r15\n\t" // rflags
+ "mov %%r15, rflags\n\t"
+ "mov 0x1f8(%[vmcb]), %%r15\n\t" // rax
+ "mov %%r15, guest_regs\n\t"
+ "vmsave\n\t"
+ : : [vmcb] "r" (vmcb), [vmcb_gpa] "a" (vmcb_gpa)
+ : "r15", "memory");
+}
+
+void nested_svm_check_supported(void)
+{
+ struct kvm_cpuid_entry2 *entry =
+ kvm_get_supported_cpuid_entry(0x80000001);
+
+ if (!(entry->ecx & CPUID_SVM)) {
+ fprintf(stderr, "nested SVM not enabled, skipping test\n");
+ exit(KSFT_SKIP);
+ }
+}
+
diff --git a/tools/testing/selftests/kvm/lib/x86_64/vmx.c b/tools/testing/selftests/kvm/lib/x86_64/vmx.c
index 85064ba..7aaa99c 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/vmx.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/vmx.c
@@ -288,9 +288,9 @@ static inline void init_vmcs_host_state(void)
vmwrite(HOST_FS_BASE, rdmsr(MSR_FS_BASE));
vmwrite(HOST_GS_BASE, rdmsr(MSR_GS_BASE));
vmwrite(HOST_TR_BASE,
- get_desc64_base((struct desc64 *)(get_gdt_base() + get_tr())));
- vmwrite(HOST_GDTR_BASE, get_gdt_base());
- vmwrite(HOST_IDTR_BASE, get_idt_base());
+ get_desc64_base((struct desc64 *)(get_gdt().address + get_tr())));
+ vmwrite(HOST_GDTR_BASE, get_gdt().address);
+ vmwrite(HOST_IDTR_BASE, get_idt().address);
vmwrite(HOST_IA32_SYSENTER_ESP, rdmsr(MSR_IA32_SYSENTER_ESP));
vmwrite(HOST_IA32_SYSENTER_EIP, rdmsr(MSR_IA32_SYSENTER_EIP));
}
diff --git a/tools/testing/selftests/kvm/x86_64/svm_vmcall_test.c b/tools/testing/selftests/kvm/x86_64/svm_vmcall_test.c
new file mode 100644
index 0000000..e280f68
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/svm_vmcall_test.c
@@ -0,0 +1,79 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * svm_vmcall_test
+ *
+ * Copyright (C) 2020, Red Hat, Inc.
+ *
+ * Nested SVM testing: VMCALL
+ */
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "svm_util.h"
+
+#define VCPU_ID 5
+
+static struct kvm_vm *vm;
+
+static void l2_guest_code(struct svm_test_data *svm)
+{
+ __asm__ __volatile__("vmcall");
+}
+
+static void l1_guest_code(struct svm_test_data *svm)
+{
+ #define L2_GUEST_STACK_SIZE 64
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+ struct vmcb *vmcb = svm->vmcb;
+
+ /* Prepare for L2 execution. */
+ generic_svm_setup(svm, l2_guest_code,
+ &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ run_guest(vmcb, svm->vmcb_gpa);
+
+ GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL);
+ GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+ vm_vaddr_t svm_gva;
+
+ nested_svm_check_supported();
+
+ vm = vm_create_default(VCPU_ID, 0, (void *) l1_guest_code);
+ vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+
+ vcpu_alloc_svm(vm, &svm_gva);
+ vcpu_args_set(vm, VCPU_ID, 1, svm_gva);
+
+ for (;;) {
+ volatile struct kvm_run *run = vcpu_state(vm, VCPU_ID);
+ struct ucall uc;
+
+ vcpu_run(vm, VCPU_ID);
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+ "Got exit_reason other than KVM_EXIT_IO: %u (%s)\n",
+ run->exit_reason,
+ exit_reason_str(run->exit_reason));
+
+ switch (get_ucall(vm, VCPU_ID, &uc)) {
+ case UCALL_ABORT:
+ TEST_ASSERT(false, "%s",
+ (const char *)uc.args[0]);
+ /* NOT REACHED */
+ case UCALL_SYNC:
+ break;
+ case UCALL_DONE:
+ goto done;
+ default:
+ TEST_ASSERT(false,
+ "Unknown ucall 0x%x.", uc.cmd);
+ }
+ }
+done:
+ kvm_vm_free(vm);
+ return 0;
+}
diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk
index 1c8a196..3ed0134 100644
--- a/tools/testing/selftests/lib.mk
+++ b/tools/testing/selftests/lib.mk
@@ -83,17 +83,20 @@
$(call RUN_TESTS, $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) $(TEST_PROGS))
endif
+define INSTALL_SINGLE_RULE
+ $(if $(INSTALL_LIST),@mkdir -p $(INSTALL_PATH))
+ $(if $(INSTALL_LIST),@echo rsync -a $(INSTALL_LIST) $(INSTALL_PATH)/)
+ $(if $(INSTALL_LIST),@rsync -a $(INSTALL_LIST) $(INSTALL_PATH)/)
+endef
+
define INSTALL_RULE
- @if [ "X$(TEST_PROGS)$(TEST_PROGS_EXTENDED)$(TEST_FILES)" != "X" ]; then \
- mkdir -p ${INSTALL_PATH}; \
- echo "rsync -a $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES) $(INSTALL_PATH)/"; \
- rsync -a $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES) $(INSTALL_PATH)/; \
- fi
- @if [ "X$(TEST_GEN_PROGS)$(TEST_CUSTOM_PROGS)$(TEST_GEN_PROGS_EXTENDED)$(TEST_GEN_FILES)" != "X" ]; then \
- mkdir -p ${INSTALL_PATH}; \
- echo "rsync -a $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) $(INSTALL_PATH)/"; \
- rsync -a $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) $(INSTALL_PATH)/; \
- fi
+ $(eval INSTALL_LIST = $(TEST_PROGS)) $(INSTALL_SINGLE_RULE)
+ $(eval INSTALL_LIST = $(TEST_PROGS_EXTENDED)) $(INSTALL_SINGLE_RULE)
+ $(eval INSTALL_LIST = $(TEST_FILES)) $(INSTALL_SINGLE_RULE)
+ $(eval INSTALL_LIST = $(TEST_GEN_PROGS)) $(INSTALL_SINGLE_RULE)
+ $(eval INSTALL_LIST = $(TEST_CUSTOM_PROGS)) $(INSTALL_SINGLE_RULE)
+ $(eval INSTALL_LIST = $(TEST_GEN_PROGS_EXTENDED)) $(INSTALL_SINGLE_RULE)
+ $(eval INSTALL_LIST = $(TEST_GEN_FILES)) $(INSTALL_SINGLE_RULE)
endef
install: all
diff --git a/tools/testing/selftests/livepatch/Makefile b/tools/testing/selftests/livepatch/Makefile
index 3876d8d..1acc9e1 100644
--- a/tools/testing/selftests/livepatch/Makefile
+++ b/tools/testing/selftests/livepatch/Makefile
@@ -8,4 +8,6 @@
test-state.sh \
test-ftrace.sh
+TEST_FILES := settings
+
include ../lib.mk
diff --git a/tools/testing/selftests/lkdtm/.gitignore b/tools/testing/selftests/lkdtm/.gitignore
new file mode 100644
index 0000000..f262126
--- /dev/null
+++ b/tools/testing/selftests/lkdtm/.gitignore
@@ -0,0 +1,2 @@
+*.sh
+!run.sh
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index b569419..4c1bd03 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -11,7 +11,9 @@
TEST_PROGS += udpgro_bench.sh udpgro.sh test_vxlan_under_vrf.sh reuseport_addr_any.sh
TEST_PROGS += test_vxlan_fdb_changelink.sh so_txtime.sh ipv6_flowlabel.sh
TEST_PROGS += tcp_fastopen_backup_key.sh fcnal-test.sh l2tp.sh traceroute.sh
-TEST_PROGS += fin_ack_lat.sh
+TEST_PROGS += fin_ack_lat.sh fib_nexthop_multiprefix.sh fib_nexthops.sh
+TEST_PROGS += altnames.sh icmp_redirect.sh ip6_gre_headroom.sh
+TEST_PROGS += route_localnet.sh
TEST_PROGS_EXTENDED := in_netns.sh
TEST_GEN_FILES = socket nettest
TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any
@@ -27,5 +29,5 @@
include ../lib.mk
$(OUTPUT)/reuseport_bpf_numa: LDLIBS += -lnuma
-$(OUTPUT)/tcp_mmap: LDFLAGS += -lpthread
-$(OUTPUT)/tcp_inq: LDFLAGS += -lpthread
+$(OUTPUT)/tcp_mmap: LDLIBS += -lpthread
+$(OUTPUT)/tcp_inq: LDLIBS += -lpthread
diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh
index 6dd4031..b761670 100755
--- a/tools/testing/selftests/net/fib_tests.sh
+++ b/tools/testing/selftests/net/fib_tests.sh
@@ -910,6 +910,12 @@
check_route6 "2001:db8:104::/64 via 2001:db8:101::3 dev veth1 metric 1024"
log_test $? 0 "Multipath with single path via multipath attribute"
+ # multipath with dev-only
+ add_initial_route6 "nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2"
+ run_cmd "$IP -6 ro replace 2001:db8:104::/64 dev veth1"
+ check_route6 "2001:db8:104::/64 dev veth1 metric 1024"
+ log_test $? 0 "Multipath with dev-only"
+
# route replace fails - invalid nexthop 1
add_initial_route6 "nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2"
run_cmd "$IP -6 ro replace 2001:db8:104::/64 nexthop via 2001:db8:111::3 nexthop via 2001:db8:103::3"
@@ -1035,6 +1041,27 @@
fi
log_test $rc 0 "Prefix route with metric on link up"
+ # verify peer metric added correctly
+ set -e
+ run_cmd "$IP -6 addr flush dev dummy2"
+ run_cmd "$IP -6 addr add dev dummy2 2001:db8:104::1 peer 2001:db8:104::2 metric 260"
+ set +e
+
+ check_route6 "2001:db8:104::1 dev dummy2 proto kernel metric 260"
+ log_test $? 0 "Set metric with peer route on local side"
+ log_test $? 0 "User specified metric on local address"
+ check_route6 "2001:db8:104::2 dev dummy2 proto kernel metric 260"
+ log_test $? 0 "Set metric with peer route on peer side"
+
+ set -e
+ run_cmd "$IP -6 addr change dev dummy2 2001:db8:104::1 peer 2001:db8:104::3 metric 261"
+ set +e
+
+ check_route6 "2001:db8:104::1 dev dummy2 proto kernel metric 261"
+ log_test $? 0 "Modify metric and peer address on local side"
+ check_route6 "2001:db8:104::3 dev dummy2 proto kernel metric 261"
+ log_test $? 0 "Modify metric and peer address on peer side"
+
$IP li del dummy1
$IP li del dummy2
cleanup
@@ -1451,13 +1478,20 @@
run_cmd "$IP addr flush dev dummy2"
run_cmd "$IP addr add dev dummy2 172.16.104.1/32 peer 172.16.104.2 metric 260"
- run_cmd "$IP addr change dev dummy2 172.16.104.1/32 peer 172.16.104.2 metric 261"
rc=$?
if [ $rc -eq 0 ]; then
- check_route "172.16.104.2 dev dummy2 proto kernel scope link src 172.16.104.1 metric 261"
+ check_route "172.16.104.2 dev dummy2 proto kernel scope link src 172.16.104.1 metric 260"
rc=$?
fi
- log_test $rc 0 "Modify metric of address with peer route"
+ log_test $rc 0 "Set metric of address with peer route"
+
+ run_cmd "$IP addr change dev dummy2 172.16.104.1/32 peer 172.16.104.3 metric 261"
+ rc=$?
+ if [ $rc -eq 0 ]; then
+ check_route "172.16.104.3 dev dummy2 proto kernel scope link src 172.16.104.1 metric 261"
+ rc=$?
+ fi
+ log_test $rc 0 "Modify metric and peer address for peer route"
$IP li del dummy1
$IP li del dummy2
diff --git a/tools/testing/selftests/net/forwarding/Makefile b/tools/testing/selftests/net/forwarding/Makefile
new file mode 100644
index 0000000..250fbb2
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/Makefile
@@ -0,0 +1,76 @@
+# SPDX-License-Identifier: GPL-2.0+ OR MIT
+
+TEST_PROGS = bridge_igmp.sh \
+ bridge_port_isolation.sh \
+ bridge_sticky_fdb.sh \
+ bridge_vlan_aware.sh \
+ bridge_vlan_unaware.sh \
+ ethtool.sh \
+ gre_inner_v4_multipath.sh \
+ gre_inner_v6_multipath.sh \
+ gre_multipath.sh \
+ ip6gre_inner_v4_multipath.sh \
+ ip6gre_inner_v6_multipath.sh \
+ ipip_flat_gre_key.sh \
+ ipip_flat_gre_keys.sh \
+ ipip_flat_gre.sh \
+ ipip_hier_gre_key.sh \
+ ipip_hier_gre_keys.sh \
+ ipip_hier_gre.sh \
+ loopback.sh \
+ mirror_gre_bound.sh \
+ mirror_gre_bridge_1d.sh \
+ mirror_gre_bridge_1d_vlan.sh \
+ mirror_gre_bridge_1q_lag.sh \
+ mirror_gre_bridge_1q.sh \
+ mirror_gre_changes.sh \
+ mirror_gre_flower.sh \
+ mirror_gre_lag_lacp.sh \
+ mirror_gre_neigh.sh \
+ mirror_gre_nh.sh \
+ mirror_gre.sh \
+ mirror_gre_vlan_bridge_1q.sh \
+ mirror_gre_vlan.sh \
+ mirror_vlan.sh \
+ router_bridge.sh \
+ router_bridge_vlan.sh \
+ router_broadcast.sh \
+ router_mpath_nh.sh \
+ router_multicast.sh \
+ router_multipath.sh \
+ router.sh \
+ router_vid_1.sh \
+ sch_ets.sh \
+ sch_tbf_ets.sh \
+ sch_tbf_prio.sh \
+ sch_tbf_root.sh \
+ tc_actions.sh \
+ tc_chains.sh \
+ tc_flower_router.sh \
+ tc_flower.sh \
+ tc_shblocks.sh \
+ tc_vlan_modify.sh \
+ vxlan_asymmetric.sh \
+ vxlan_bridge_1d_port_8472.sh \
+ vxlan_bridge_1d.sh \
+ vxlan_bridge_1q_port_8472.sh \
+ vxlan_bridge_1q.sh \
+ vxlan_symmetric.sh
+
+TEST_PROGS_EXTENDED := devlink_lib.sh \
+ ethtool_lib.sh \
+ fib_offload_lib.sh \
+ forwarding.config.sample \
+ ipip_lib.sh \
+ lib.sh \
+ mirror_gre_lib.sh \
+ mirror_gre_topo_lib.sh \
+ mirror_lib.sh \
+ mirror_topo_lib.sh \
+ sch_ets_core.sh \
+ sch_ets_tests.sh \
+ sch_tbf_core.sh \
+ sch_tbf_etsprio.sh \
+ tc_common.sh
+
+include ../../lib.mk
diff --git a/tools/testing/selftests/net/forwarding/ethtool_lib.sh b/tools/testing/selftests/net/forwarding/ethtool_lib.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre.sh b/tools/testing/selftests/net/forwarding/mirror_gre.sh
index e6fd7a1..0266443 100755
--- a/tools/testing/selftests/net/forwarding/mirror_gre.sh
+++ b/tools/testing/selftests/net/forwarding/mirror_gre.sh
@@ -63,22 +63,23 @@
{
local tundev=$1; shift
local direction=$1; shift
- local prot=$1; shift
local what=$1; shift
- local swp3mac=$(mac_get $swp3)
- local h3mac=$(mac_get $h3)
+ case "$direction" in
+ ingress) local src_mac=$(mac_get $h1); local dst_mac=$(mac_get $h2)
+ ;;
+ egress) local src_mac=$(mac_get $h2); local dst_mac=$(mac_get $h1)
+ ;;
+ esac
RET=0
mirror_install $swp1 $direction $tundev "matchall $tcflags"
- tc filter add dev $h3 ingress pref 77 prot $prot \
- flower ip_proto 0x2f src_mac $swp3mac dst_mac $h3mac \
- action pass
+ icmp_capture_install h3-${tundev} "src_mac $src_mac dst_mac $dst_mac"
- mirror_test v$h1 192.0.2.1 192.0.2.2 $h3 77 10
+ mirror_test v$h1 192.0.2.1 192.0.2.2 h3-${tundev} 100 10
- tc filter del dev $h3 ingress pref 77
+ icmp_capture_uninstall h3-${tundev}
mirror_uninstall $swp1 $direction
log_test "$direction $what: envelope MAC ($tcflags)"
@@ -120,14 +121,14 @@
test_gretap_mac()
{
- test_span_gre_mac gt4 ingress ip "mirror to gretap"
- test_span_gre_mac gt4 egress ip "mirror to gretap"
+ test_span_gre_mac gt4 ingress "mirror to gretap"
+ test_span_gre_mac gt4 egress "mirror to gretap"
}
test_ip6gretap_mac()
{
- test_span_gre_mac gt6 ingress ipv6 "mirror to ip6gretap"
- test_span_gre_mac gt6 egress ipv6 "mirror to ip6gretap"
+ test_span_gre_mac gt6 ingress "mirror to ip6gretap"
+ test_span_gre_mac gt6 egress "mirror to ip6gretap"
}
test_all()
diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh
index bb10e33..ce6bea967 100755
--- a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh
+++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh
@@ -516,9 +516,9 @@
RET=0
tc filter add dev v1 egress pref 77 prot ip \
- flower ip_tos 0x40 action pass
- vxlan_ping_test $h1 192.0.2.3 "-Q 0x40" v1 egress 77 10
- vxlan_ping_test $h1 192.0.2.3 "-Q 0x30" v1 egress 77 0
+ flower ip_tos 0x14 action pass
+ vxlan_ping_test $h1 192.0.2.3 "-Q 0x14" v1 egress 77 10
+ vxlan_ping_test $h1 192.0.2.3 "-Q 0x18" v1 egress 77 0
tc filter del dev v1 egress pref 77 prot ip
log_test "VXLAN: envelope TOS inheritance"
diff --git a/tools/testing/selftests/net/mptcp/Makefile b/tools/testing/selftests/net/mptcp/Makefile
index 93de520..ba450e62 100644
--- a/tools/testing/selftests/net/mptcp/Makefile
+++ b/tools/testing/selftests/net/mptcp/Makefile
@@ -8,6 +8,8 @@
TEST_GEN_FILES = mptcp_connect
+TEST_FILES := settings
+
EXTRA_CLEAN := *.pcap
include ../../lib.mk
diff --git a/tools/testing/selftests/net/reuseport_addr_any.c b/tools/testing/selftests/net/reuseport_addr_any.c
index c623393..b8475cb2 100644
--- a/tools/testing/selftests/net/reuseport_addr_any.c
+++ b/tools/testing/selftests/net/reuseport_addr_any.c
@@ -21,6 +21,10 @@
#include <sys/socket.h>
#include <unistd.h>
+#ifndef SOL_DCCP
+#define SOL_DCCP 269
+#endif
+
static const char *IP4_ADDR = "127.0.0.1";
static const char *IP6_ADDR = "::1";
static const char *IP4_MAPPED6 = "::ffff:127.0.0.1";
diff --git a/tools/testing/selftests/netfilter/Makefile b/tools/testing/selftests/netfilter/Makefile
index 08194aa..9c0f758 100644
--- a/tools/testing/selftests/netfilter/Makefile
+++ b/tools/testing/selftests/netfilter/Makefile
@@ -3,6 +3,10 @@
TEST_PROGS := nft_trans_stress.sh nft_nat.sh bridge_brouter.sh \
conntrack_icmp_related.sh nft_flowtable.sh ipvs.sh \
- nft_concat_range.sh
+ nft_concat_range.sh \
+ nft_queue.sh
+
+LDLIBS = -lmnl
+TEST_GEN_FILES = nf-queue
include ../lib.mk
diff --git a/tools/testing/selftests/netfilter/config b/tools/testing/selftests/netfilter/config
index 59caa8f..4faf2ce 100644
--- a/tools/testing/selftests/netfilter/config
+++ b/tools/testing/selftests/netfilter/config
@@ -1,2 +1,8 @@
CONFIG_NET_NS=y
CONFIG_NF_TABLES_INET=y
+CONFIG_NFT_QUEUE=m
+CONFIG_NFT_NAT=m
+CONFIG_NFT_REDIR=m
+CONFIG_NFT_MASQ=m
+CONFIG_NFT_FLOW_OFFLOAD=m
+CONFIG_NF_CT_NETLINK=m
diff --git a/tools/testing/selftests/netfilter/nf-queue.c b/tools/testing/selftests/netfilter/nf-queue.c
new file mode 100644
index 0000000..29c73bc
--- /dev/null
+++ b/tools/testing/selftests/netfilter/nf-queue.c
@@ -0,0 +1,352 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <errno.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include <time.h>
+#include <arpa/inet.h>
+
+#include <libmnl/libmnl.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nfnetlink_queue.h>
+
+struct options {
+ bool count_packets;
+ int verbose;
+ unsigned int queue_num;
+ unsigned int timeout;
+};
+
+static unsigned int queue_stats[5];
+static struct options opts;
+
+static void help(const char *p)
+{
+ printf("Usage: %s [-c|-v [-vv] ] [-t timeout] [-q queue_num]\n", p);
+}
+
+static int parse_attr_cb(const struct nlattr *attr, void *data)
+{
+ const struct nlattr **tb = data;
+ int type = mnl_attr_get_type(attr);
+
+ /* skip unsupported attribute in user-space */
+ if (mnl_attr_type_valid(attr, NFQA_MAX) < 0)
+ return MNL_CB_OK;
+
+ switch (type) {
+ case NFQA_MARK:
+ case NFQA_IFINDEX_INDEV:
+ case NFQA_IFINDEX_OUTDEV:
+ case NFQA_IFINDEX_PHYSINDEV:
+ case NFQA_IFINDEX_PHYSOUTDEV:
+ if (mnl_attr_validate(attr, MNL_TYPE_U32) < 0) {
+ perror("mnl_attr_validate");
+ return MNL_CB_ERROR;
+ }
+ break;
+ case NFQA_TIMESTAMP:
+ if (mnl_attr_validate2(attr, MNL_TYPE_UNSPEC,
+ sizeof(struct nfqnl_msg_packet_timestamp)) < 0) {
+ perror("mnl_attr_validate2");
+ return MNL_CB_ERROR;
+ }
+ break;
+ case NFQA_HWADDR:
+ if (mnl_attr_validate2(attr, MNL_TYPE_UNSPEC,
+ sizeof(struct nfqnl_msg_packet_hw)) < 0) {
+ perror("mnl_attr_validate2");
+ return MNL_CB_ERROR;
+ }
+ break;
+ case NFQA_PAYLOAD:
+ break;
+ }
+ tb[type] = attr;
+ return MNL_CB_OK;
+}
+
+static int queue_cb(const struct nlmsghdr *nlh, void *data)
+{
+ struct nlattr *tb[NFQA_MAX+1] = { 0 };
+ struct nfqnl_msg_packet_hdr *ph = NULL;
+ uint32_t id = 0;
+
+ (void)data;
+
+ mnl_attr_parse(nlh, sizeof(struct nfgenmsg), parse_attr_cb, tb);
+ if (tb[NFQA_PACKET_HDR]) {
+ ph = mnl_attr_get_payload(tb[NFQA_PACKET_HDR]);
+ id = ntohl(ph->packet_id);
+
+ if (opts.verbose > 0)
+ printf("packet hook=%u, hwproto 0x%x",
+ ntohs(ph->hw_protocol), ph->hook);
+
+ if (ph->hook >= 5) {
+ fprintf(stderr, "Unknown hook %d\n", ph->hook);
+ return MNL_CB_ERROR;
+ }
+
+ if (opts.verbose > 0) {
+ uint32_t skbinfo = 0;
+
+ if (tb[NFQA_SKB_INFO])
+ skbinfo = ntohl(mnl_attr_get_u32(tb[NFQA_SKB_INFO]));
+ if (skbinfo & NFQA_SKB_CSUMNOTREADY)
+ printf(" csumnotready");
+ if (skbinfo & NFQA_SKB_GSO)
+ printf(" gso");
+ if (skbinfo & NFQA_SKB_CSUM_NOTVERIFIED)
+ printf(" csumnotverified");
+ puts("");
+ }
+
+ if (opts.count_packets)
+ queue_stats[ph->hook]++;
+ }
+
+ return MNL_CB_OK + id;
+}
+
+static struct nlmsghdr *
+nfq_build_cfg_request(char *buf, uint8_t command, int queue_num)
+{
+ struct nlmsghdr *nlh = mnl_nlmsg_put_header(buf);
+ struct nfqnl_msg_config_cmd cmd = {
+ .command = command,
+ .pf = htons(AF_INET),
+ };
+ struct nfgenmsg *nfg;
+
+ nlh->nlmsg_type = (NFNL_SUBSYS_QUEUE << 8) | NFQNL_MSG_CONFIG;
+ nlh->nlmsg_flags = NLM_F_REQUEST;
+
+ nfg = mnl_nlmsg_put_extra_header(nlh, sizeof(*nfg));
+
+ nfg->nfgen_family = AF_UNSPEC;
+ nfg->version = NFNETLINK_V0;
+ nfg->res_id = htons(queue_num);
+
+ mnl_attr_put(nlh, NFQA_CFG_CMD, sizeof(cmd), &cmd);
+
+ return nlh;
+}
+
+static struct nlmsghdr *
+nfq_build_cfg_params(char *buf, uint8_t mode, int range, int queue_num)
+{
+ struct nlmsghdr *nlh = mnl_nlmsg_put_header(buf);
+ struct nfqnl_msg_config_params params = {
+ .copy_range = htonl(range),
+ .copy_mode = mode,
+ };
+ struct nfgenmsg *nfg;
+
+ nlh->nlmsg_type = (NFNL_SUBSYS_QUEUE << 8) | NFQNL_MSG_CONFIG;
+ nlh->nlmsg_flags = NLM_F_REQUEST;
+
+ nfg = mnl_nlmsg_put_extra_header(nlh, sizeof(*nfg));
+ nfg->nfgen_family = AF_UNSPEC;
+ nfg->version = NFNETLINK_V0;
+ nfg->res_id = htons(queue_num);
+
+ mnl_attr_put(nlh, NFQA_CFG_PARAMS, sizeof(params), ¶ms);
+
+ return nlh;
+}
+
+static struct nlmsghdr *
+nfq_build_verdict(char *buf, int id, int queue_num, int verd)
+{
+ struct nfqnl_msg_verdict_hdr vh = {
+ .verdict = htonl(verd),
+ .id = htonl(id),
+ };
+ struct nlmsghdr *nlh;
+ struct nfgenmsg *nfg;
+
+ nlh = mnl_nlmsg_put_header(buf);
+ nlh->nlmsg_type = (NFNL_SUBSYS_QUEUE << 8) | NFQNL_MSG_VERDICT;
+ nlh->nlmsg_flags = NLM_F_REQUEST;
+ nfg = mnl_nlmsg_put_extra_header(nlh, sizeof(*nfg));
+ nfg->nfgen_family = AF_UNSPEC;
+ nfg->version = NFNETLINK_V0;
+ nfg->res_id = htons(queue_num);
+
+ mnl_attr_put(nlh, NFQA_VERDICT_HDR, sizeof(vh), &vh);
+
+ return nlh;
+}
+
+static void print_stats(void)
+{
+ unsigned int last, total;
+ int i;
+
+ if (!opts.count_packets)
+ return;
+
+ total = 0;
+ last = queue_stats[0];
+
+ for (i = 0; i < 5; i++) {
+ printf("hook %d packets %08u\n", i, queue_stats[i]);
+ last = queue_stats[i];
+ total += last;
+ }
+
+ printf("%u packets total\n", total);
+}
+
+struct mnl_socket *open_queue(void)
+{
+ char buf[MNL_SOCKET_BUFFER_SIZE];
+ unsigned int queue_num;
+ struct mnl_socket *nl;
+ struct nlmsghdr *nlh;
+ struct timeval tv;
+ uint32_t flags;
+
+ nl = mnl_socket_open(NETLINK_NETFILTER);
+ if (nl == NULL) {
+ perror("mnl_socket_open");
+ exit(EXIT_FAILURE);
+ }
+
+ if (mnl_socket_bind(nl, 0, MNL_SOCKET_AUTOPID) < 0) {
+ perror("mnl_socket_bind");
+ exit(EXIT_FAILURE);
+ }
+
+ queue_num = opts.queue_num;
+ nlh = nfq_build_cfg_request(buf, NFQNL_CFG_CMD_BIND, queue_num);
+
+ if (mnl_socket_sendto(nl, nlh, nlh->nlmsg_len) < 0) {
+ perror("mnl_socket_sendto");
+ exit(EXIT_FAILURE);
+ }
+
+ nlh = nfq_build_cfg_params(buf, NFQNL_COPY_PACKET, 0xFFFF, queue_num);
+
+ flags = NFQA_CFG_F_GSO | NFQA_CFG_F_UID_GID;
+ mnl_attr_put_u32(nlh, NFQA_CFG_FLAGS, htonl(flags));
+ mnl_attr_put_u32(nlh, NFQA_CFG_MASK, htonl(flags));
+
+ if (mnl_socket_sendto(nl, nlh, nlh->nlmsg_len) < 0) {
+ perror("mnl_socket_sendto");
+ exit(EXIT_FAILURE);
+ }
+
+ memset(&tv, 0, sizeof(tv));
+ tv.tv_sec = opts.timeout;
+ if (opts.timeout && setsockopt(mnl_socket_get_fd(nl),
+ SOL_SOCKET, SO_RCVTIMEO,
+ &tv, sizeof(tv))) {
+ perror("setsockopt(SO_RCVTIMEO)");
+ exit(EXIT_FAILURE);
+ }
+
+ return nl;
+}
+
+static int mainloop(void)
+{
+ unsigned int buflen = 64 * 1024 + MNL_SOCKET_BUFFER_SIZE;
+ struct mnl_socket *nl;
+ struct nlmsghdr *nlh;
+ unsigned int portid;
+ char *buf;
+ int ret;
+
+ buf = malloc(buflen);
+ if (!buf) {
+ perror("malloc");
+ exit(EXIT_FAILURE);
+ }
+
+ nl = open_queue();
+ portid = mnl_socket_get_portid(nl);
+
+ for (;;) {
+ uint32_t id;
+
+ ret = mnl_socket_recvfrom(nl, buf, buflen);
+ if (ret == -1) {
+ if (errno == ENOBUFS)
+ continue;
+
+ if (errno == EAGAIN) {
+ errno = 0;
+ ret = 0;
+ break;
+ }
+
+ perror("mnl_socket_recvfrom");
+ exit(EXIT_FAILURE);
+ }
+
+ ret = mnl_cb_run(buf, ret, 0, portid, queue_cb, NULL);
+ if (ret < 0) {
+ perror("mnl_cb_run");
+ exit(EXIT_FAILURE);
+ }
+
+ id = ret - MNL_CB_OK;
+ nlh = nfq_build_verdict(buf, id, opts.queue_num, NF_ACCEPT);
+ if (mnl_socket_sendto(nl, nlh, nlh->nlmsg_len) < 0) {
+ perror("mnl_socket_sendto");
+ exit(EXIT_FAILURE);
+ }
+ }
+
+ mnl_socket_close(nl);
+
+ return ret;
+}
+
+static void parse_opts(int argc, char **argv)
+{
+ int c;
+
+ while ((c = getopt(argc, argv, "chvt:q:")) != -1) {
+ switch (c) {
+ case 'c':
+ opts.count_packets = true;
+ break;
+ case 'h':
+ help(argv[0]);
+ exit(0);
+ break;
+ case 'q':
+ opts.queue_num = atoi(optarg);
+ if (opts.queue_num > 0xffff)
+ opts.queue_num = 0;
+ break;
+ case 't':
+ opts.timeout = atoi(optarg);
+ break;
+ case 'v':
+ opts.verbose++;
+ break;
+ }
+ }
+}
+
+int main(int argc, char *argv[])
+{
+ int ret;
+
+ parse_opts(argc, argv);
+
+ ret = mainloop();
+ if (opts.count_packets)
+ print_stats();
+
+ return ret;
+}
diff --git a/tools/testing/selftests/netfilter/nft_concat_range.sh b/tools/testing/selftests/netfilter/nft_concat_range.sh
index aca21dd..5a4938d 100755
--- a/tools/testing/selftests/netfilter/nft_concat_range.sh
+++ b/tools/testing/selftests/netfilter/nft_concat_range.sh
@@ -13,11 +13,12 @@
KSELFTEST_SKIP=4
# Available test groups:
+# - reported_issues: check for issues that were reported in the past
# - correctness: check that packets match given entries, and only those
# - concurrency: attempt races between insertion, deletion and lookup
# - timeout: check that packets match entries until they expire
# - performance: estimate matching rate, compare with rbtree and hash baselines
-TESTS="correctness concurrency timeout"
+TESTS="reported_issues correctness concurrency timeout"
[ "${quicktest}" != "1" ] && TESTS="${TESTS} performance"
# Set types, defined by TYPE_ variables below
@@ -25,6 +26,9 @@
net_port_net net_mac net_mac_icmp net6_mac_icmp net6_port_net6_port
net_port_mac_proto_net"
+# Reported bugs, also described by TYPE_ variables below
+BUGS="flush_remove_add"
+
# List of possible paths to pktgen script from kernel tree for performance tests
PKTGEN_SCRIPT_PATHS="
../../../samples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh
@@ -327,6 +331,12 @@
perf_duration 0
"
+# Definition of tests for bugs reported in the past:
+# display display text for test report
+TYPE_flush_remove_add="
+display Add two elements, flush, re-add
+"
+
# Set template for all tests, types and rules are filled in depending on test
set_template='
flush ruleset
@@ -440,6 +450,8 @@
# Check that at least one of the needed tools is available
check_tools() {
+ [ -z "${tools}" ] && return 0
+
__tools=
for tool in ${tools}; do
if [ "${tool}" = "nc" ] && [ "${proto}" = "udp6" ] && \
@@ -1025,7 +1037,7 @@
add() {
if ! nft add element inet filter test "${1}"; then
err "Failed to add ${1} given ruleset:"
- err "$(nft list ruleset -a)"
+ err "$(nft -a list ruleset)"
return 1
fi
}
@@ -1045,7 +1057,7 @@
add_perf_norange() {
if ! nft add element netdev perf norange "${1}"; then
err "Failed to add ${1} given ruleset:"
- err "$(nft list ruleset -a)"
+ err "$(nft -a list ruleset)"
return 1
fi
}
@@ -1054,7 +1066,7 @@
add_perf_noconcat() {
if ! nft add element netdev perf noconcat "${1}"; then
err "Failed to add ${1} given ruleset:"
- err "$(nft list ruleset -a)"
+ err "$(nft -a list ruleset)"
return 1
fi
}
@@ -1063,7 +1075,7 @@
del() {
if ! nft delete element inet filter test "${1}"; then
err "Failed to delete ${1} given ruleset:"
- err "$(nft list ruleset -a)"
+ err "$(nft -a list ruleset)"
return 1
fi
}
@@ -1134,7 +1146,7 @@
err " $(for f in ${src}; do
eval format_\$f "${2}"; printf ' '; done)"
err "should have matched ruleset:"
- err "$(nft list ruleset -a)"
+ err "$(nft -a list ruleset)"
return 1
fi
nft reset counter inet filter test >/dev/null
@@ -1160,7 +1172,7 @@
err " $(for f in ${src}; do
eval format_\$f "${2}"; printf ' '; done)"
err "should not have matched ruleset:"
- err "$(nft list ruleset -a)"
+ err "$(nft -a list ruleset)"
return 1
fi
}
@@ -1430,6 +1442,23 @@
kill "${perf_pid}"
}
+test_bug_flush_remove_add() {
+ set_cmd='{ set s { type ipv4_addr . inet_service; flags interval; }; }'
+ elem1='{ 10.0.0.1 . 22-25, 10.0.0.1 . 10-20 }'
+ elem2='{ 10.0.0.1 . 10-20, 10.0.0.1 . 22-25 }'
+ for i in `seq 1 100`; do
+ nft add table t ${set_cmd} || return ${KSELFTEST_SKIP}
+ nft add element t s ${elem1} 2>/dev/null || return 1
+ nft flush set t s 2>/dev/null || return 1
+ nft add element t s ${elem2} 2>/dev/null || return 1
+ done
+ nft flush ruleset
+}
+
+test_reported_issues() {
+ eval test_bug_"${subtest}"
+}
+
# Run everything in a separate network namespace
[ "${1}" != "run" ] && { unshare -n "${0}" run; exit $?; }
tmp="$(mktemp)"
@@ -1438,9 +1467,15 @@
# Entry point for test runs
passed=0
for name in ${TESTS}; do
- printf "TEST: %s\n" "${name}"
- for type in ${TYPES}; do
- eval desc=\$TYPE_"${type}"
+ printf "TEST: %s\n" "$(echo ${name} | tr '_' ' ')"
+ if [ "${name}" = "reported_issues" ]; then
+ SUBTESTS="${BUGS}"
+ else
+ SUBTESTS="${TYPES}"
+ fi
+
+ for subtest in ${SUBTESTS}; do
+ eval desc=\$TYPE_"${subtest}"
IFS='
'
for __line in ${desc}; do
diff --git a/tools/testing/selftests/netfilter/nft_queue.sh b/tools/testing/selftests/netfilter/nft_queue.sh
new file mode 100755
index 0000000..6898448
--- /dev/null
+++ b/tools/testing/selftests/netfilter/nft_queue.sh
@@ -0,0 +1,332 @@
+#!/bin/bash
+#
+# This tests nf_queue:
+# 1. can process packets from all hooks
+# 2. support running nfqueue from more than one base chain
+#
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+ret=0
+
+sfx=$(mktemp -u "XXXXXXXX")
+ns1="ns1-$sfx"
+ns2="ns2-$sfx"
+nsrouter="nsrouter-$sfx"
+
+cleanup()
+{
+ ip netns del ${ns1}
+ ip netns del ${ns2}
+ ip netns del ${nsrouter}
+ rm -f "$TMPFILE0"
+ rm -f "$TMPFILE1"
+}
+
+nft --version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not run test without nft tool"
+ exit $ksft_skip
+fi
+
+ip -Version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not run test without ip tool"
+ exit $ksft_skip
+fi
+
+ip netns add ${nsrouter}
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not create net namespace"
+ exit $ksft_skip
+fi
+
+TMPFILE0=$(mktemp)
+TMPFILE1=$(mktemp)
+trap cleanup EXIT
+
+ip netns add ${ns1}
+ip netns add ${ns2}
+
+ip link add veth0 netns ${nsrouter} type veth peer name eth0 netns ${ns1} > /dev/null 2>&1
+if [ $? -ne 0 ];then
+ echo "SKIP: No virtual ethernet pair device support in kernel"
+ exit $ksft_skip
+fi
+ip link add veth1 netns ${nsrouter} type veth peer name eth0 netns ${ns2}
+
+ip -net ${nsrouter} link set lo up
+ip -net ${nsrouter} link set veth0 up
+ip -net ${nsrouter} addr add 10.0.1.1/24 dev veth0
+ip -net ${nsrouter} addr add dead:1::1/64 dev veth0
+
+ip -net ${nsrouter} link set veth1 up
+ip -net ${nsrouter} addr add 10.0.2.1/24 dev veth1
+ip -net ${nsrouter} addr add dead:2::1/64 dev veth1
+
+ip -net ${ns1} link set lo up
+ip -net ${ns1} link set eth0 up
+
+ip -net ${ns2} link set lo up
+ip -net ${ns2} link set eth0 up
+
+ip -net ${ns1} addr add 10.0.1.99/24 dev eth0
+ip -net ${ns1} addr add dead:1::99/64 dev eth0
+ip -net ${ns1} route add default via 10.0.1.1
+ip -net ${ns1} route add default via dead:1::1
+
+ip -net ${ns2} addr add 10.0.2.99/24 dev eth0
+ip -net ${ns2} addr add dead:2::99/64 dev eth0
+ip -net ${ns2} route add default via 10.0.2.1
+ip -net ${ns2} route add default via dead:2::1
+
+load_ruleset() {
+ local name=$1
+ local prio=$2
+
+ip netns exec ${nsrouter} nft -f - <<EOF
+table inet $name {
+ chain nfq {
+ ip protocol icmp queue bypass
+ icmpv6 type { "echo-request", "echo-reply" } queue num 1 bypass
+ }
+ chain pre {
+ type filter hook prerouting priority $prio; policy accept;
+ jump nfq
+ }
+ chain input {
+ type filter hook input priority $prio; policy accept;
+ jump nfq
+ }
+ chain forward {
+ type filter hook forward priority $prio; policy accept;
+ tcp dport 12345 queue num 2
+ jump nfq
+ }
+ chain output {
+ type filter hook output priority $prio; policy accept;
+ tcp dport 12345 queue num 3
+ jump nfq
+ }
+ chain post {
+ type filter hook postrouting priority $prio; policy accept;
+ jump nfq
+ }
+}
+EOF
+}
+
+load_counter_ruleset() {
+ local prio=$1
+
+ip netns exec ${nsrouter} nft -f - <<EOF
+table inet countrules {
+ chain pre {
+ type filter hook prerouting priority $prio; policy accept;
+ counter
+ }
+ chain input {
+ type filter hook input priority $prio; policy accept;
+ counter
+ }
+ chain forward {
+ type filter hook forward priority $prio; policy accept;
+ counter
+ }
+ chain output {
+ type filter hook output priority $prio; policy accept;
+ counter
+ }
+ chain post {
+ type filter hook postrouting priority $prio; policy accept;
+ counter
+ }
+}
+EOF
+}
+
+test_ping() {
+ ip netns exec ${ns1} ping -c 1 -q 10.0.2.99 > /dev/null
+ if [ $? -ne 0 ];then
+ return 1
+ fi
+
+ ip netns exec ${ns1} ping -c 1 -q dead:2::99 > /dev/null
+ if [ $? -ne 0 ];then
+ return 1
+ fi
+
+ return 0
+}
+
+test_ping_router() {
+ ip netns exec ${ns1} ping -c 1 -q 10.0.2.1 > /dev/null
+ if [ $? -ne 0 ];then
+ return 1
+ fi
+
+ ip netns exec ${ns1} ping -c 1 -q dead:2::1 > /dev/null
+ if [ $? -ne 0 ];then
+ return 1
+ fi
+
+ return 0
+}
+
+test_queue_blackhole() {
+ local proto=$1
+
+ip netns exec ${nsrouter} nft -f - <<EOF
+table $proto blackh {
+ chain forward {
+ type filter hook forward priority 0; policy accept;
+ queue num 600
+ }
+}
+EOF
+ if [ $proto = "ip" ] ;then
+ ip netns exec ${ns1} ping -c 1 -q 10.0.2.99 > /dev/null
+ lret=$?
+ elif [ $proto = "ip6" ]; then
+ ip netns exec ${ns1} ping -c 1 -q dead:2::99 > /dev/null
+ lret=$?
+ else
+ lret=111
+ fi
+
+ # queue without bypass keyword should drop traffic if no listener exists.
+ if [ $lret -eq 0 ];then
+ echo "FAIL: $proto expected failure, got $lret" 1>&2
+ exit 1
+ fi
+
+ ip netns exec ${nsrouter} nft delete table $proto blackh
+ if [ $? -ne 0 ] ;then
+ echo "FAIL: $proto: Could not delete blackh table"
+ exit 1
+ fi
+
+ echo "PASS: $proto: statement with no listener results in packet drop"
+}
+
+test_queue()
+{
+ local expected=$1
+ local last=""
+
+ # spawn nf-queue listeners
+ ip netns exec ${nsrouter} ./nf-queue -c -q 0 -t 3 > "$TMPFILE0" &
+ ip netns exec ${nsrouter} ./nf-queue -c -q 1 -t 3 > "$TMPFILE1" &
+ sleep 1
+ test_ping
+ ret=$?
+ if [ $ret -ne 0 ];then
+ echo "FAIL: netns routing/connectivity with active listener on queue $queue: $ret" 1>&2
+ exit $ret
+ fi
+
+ test_ping_router
+ ret=$?
+ if [ $ret -ne 0 ];then
+ echo "FAIL: netns router unreachable listener on queue $queue: $ret" 1>&2
+ exit $ret
+ fi
+
+ wait
+ ret=$?
+
+ for file in $TMPFILE0 $TMPFILE1; do
+ last=$(tail -n1 "$file")
+ if [ x"$last" != x"$expected packets total" ]; then
+ echo "FAIL: Expected $expected packets total, but got $last" 1>&2
+ cat "$file" 1>&2
+
+ ip netns exec ${nsrouter} nft list ruleset
+ exit 1
+ fi
+ done
+
+ echo "PASS: Expected and received $last"
+}
+
+test_tcp_forward()
+{
+ ip netns exec ${nsrouter} ./nf-queue -q 2 -t 10 &
+ local nfqpid=$!
+
+ tmpfile=$(mktemp) || exit 1
+ dd conv=sparse status=none if=/dev/zero bs=1M count=100 of=$tmpfile
+ ip netns exec ${ns2} nc -w 5 -l -p 12345 <"$tmpfile" >/dev/null &
+ local rpid=$!
+
+ sleep 1
+ ip netns exec ${ns1} nc -w 5 10.0.2.99 12345 <"$tmpfile" >/dev/null &
+
+ rm -f "$tmpfile"
+
+ wait $rpid
+ wait $lpid
+ [ $? -eq 0 ] && echo "PASS: tcp and nfqueue in forward chain"
+}
+
+test_tcp_localhost()
+{
+ tc -net "${nsrouter}" qdisc add dev lo root netem loss random 1%
+
+ tmpfile=$(mktemp) || exit 1
+
+ dd conv=sparse status=none if=/dev/zero bs=1M count=900 of=$tmpfile
+ ip netns exec ${nsrouter} nc -w 5 -l -p 12345 <"$tmpfile" >/dev/null &
+ local rpid=$!
+
+ ip netns exec ${nsrouter} ./nf-queue -q 3 -t 30 &
+ local nfqpid=$!
+
+ sleep 1
+ ip netns exec ${nsrouter} nc -w 5 127.0.0.1 12345 <"$tmpfile" > /dev/null
+ rm -f "$tmpfile"
+
+ wait $rpid
+ [ $? -eq 0 ] && echo "PASS: tcp via loopback"
+}
+
+ip netns exec ${nsrouter} sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
+ip netns exec ${nsrouter} sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
+ip netns exec ${nsrouter} sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null
+
+load_ruleset "filter" 0
+
+sleep 3
+
+test_ping
+ret=$?
+if [ $ret -eq 0 ];then
+ # queue bypass works (rules were skipped, no listener)
+ echo "PASS: ${ns1} can reach ${ns2}"
+else
+ echo "FAIL: ${ns1} cannot reach ${ns2}: $ret" 1>&2
+ exit $ret
+fi
+
+test_queue_blackhole ip
+test_queue_blackhole ip6
+
+# dummy ruleset to add base chains between the
+# queueing rules. We don't want the second reinject
+# to re-execute the old hooks.
+load_counter_ruleset 10
+
+# we are hooking all: prerouting/input/forward/output/postrouting.
+# we ping ${ns2} from ${ns1} via ${nsrouter} using ipv4 and ipv6, so:
+# 1x icmp prerouting,forward,postrouting -> 3 queue events (6 incl. reply).
+# 1x icmp prerouting,input,output postrouting -> 4 queue events incl. reply.
+# so we expect that userspace program receives 10 packets.
+test_queue 10
+
+# same. We queue to a second program as well.
+load_ruleset "filter2" 20
+test_queue 20
+
+test_tcp_forward
+test_tcp_localhost
+
+exit $ret
diff --git a/tools/testing/selftests/openat2/helpers.c b/tools/testing/selftests/openat2/helpers.c
index e9a6557..5074681 100644
--- a/tools/testing/selftests/openat2/helpers.c
+++ b/tools/testing/selftests/openat2/helpers.c
@@ -46,7 +46,7 @@ int sys_renameat2(int olddirfd, const char *oldpath,
int touchat(int dfd, const char *path)
{
- int fd = openat(dfd, path, O_CREAT);
+ int fd = openat(dfd, path, O_CREAT, 0700);
if (fd >= 0)
close(fd);
return fd;
diff --git a/tools/testing/selftests/openat2/resolve_test.c b/tools/testing/selftests/openat2/resolve_test.c
index 7a94b1d..bbafad4 100644
--- a/tools/testing/selftests/openat2/resolve_test.c
+++ b/tools/testing/selftests/openat2/resolve_test.c
@@ -230,7 +230,7 @@ void test_openat2_opath_tests(void)
{ .name = "[in_root] garbage link to /root",
.path = "cheeky/garbageself", .how.resolve = RESOLVE_IN_ROOT,
.out.path = "root", .pass = true },
- { .name = "[in_root] chainged garbage links to /root",
+ { .name = "[in_root] chained garbage links to /root",
.path = "abscheeky/garbageself", .how.resolve = RESOLVE_IN_ROOT,
.out.path = "root", .pass = true },
{ .name = "[in_root] relative path to 'root'",
diff --git a/tools/testing/selftests/pidfd/.gitignore b/tools/testing/selftests/pidfd/.gitignore
index 3a779c0..39559d7 100644
--- a/tools/testing/selftests/pidfd/.gitignore
+++ b/tools/testing/selftests/pidfd/.gitignore
@@ -2,4 +2,5 @@
pidfd_poll_test
pidfd_test
pidfd_wait
+pidfd_fdinfo_test
pidfd_getfd_test
diff --git a/tools/testing/selftests/rseq/Makefile b/tools/testing/selftests/rseq/Makefile
index d646953..2af9d39 100644
--- a/tools/testing/selftests/rseq/Makefile
+++ b/tools/testing/selftests/rseq/Makefile
@@ -4,7 +4,7 @@
CLANG_FLAGS += -no-integrated-as
endif
-CFLAGS += -O2 -Wall -g -I./ -I../../../../usr/include/ -L./ -Wl,-rpath=./ \
+CFLAGS += -O2 -Wall -g -I./ -I../../../../usr/include/ -L$(OUTPUT) -Wl,-rpath=./ \
$(CLANG_FLAGS)
LDLIBS += -lpthread
@@ -19,6 +19,8 @@
TEST_PROGS = run_param_test.sh
+TEST_FILES := settings
+
include ../lib.mk
$(OUTPUT)/librseq.so: rseq.c rseq.h rseq-*.h
diff --git a/tools/testing/selftests/rtc/Makefile b/tools/testing/selftests/rtc/Makefile
index de9c856..55198ec 100644
--- a/tools/testing/selftests/rtc/Makefile
+++ b/tools/testing/selftests/rtc/Makefile
@@ -1,9 +1,11 @@
# SPDX-License-Identifier: GPL-2.0
CFLAGS += -O3 -Wl,-no-as-needed -Wall
-LDFLAGS += -lrt -lpthread -lm
+LDLIBS += -lrt -lpthread -lm
TEST_GEN_PROGS = rtctest
TEST_GEN_PROGS_EXTENDED = setdate
+TEST_FILES := settings
+
include ../lib.mk
diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
index ee1b727..a9ad3bd8 100644
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -212,6 +212,10 @@ struct seccomp_notif_sizes {
#define SECCOMP_USER_NOTIF_FLAG_CONTINUE 0x00000001
#endif
+#ifndef SECCOMP_FILTER_FLAG_TSYNC_ESRCH
+#define SECCOMP_FILTER_FLAG_TSYNC_ESRCH (1UL << 4)
+#endif
+
#ifndef seccomp
int seccomp(unsigned int op, unsigned int flags, void *args)
{
@@ -2187,7 +2191,8 @@ TEST(detect_seccomp_filter_flags)
unsigned int flags[] = { SECCOMP_FILTER_FLAG_TSYNC,
SECCOMP_FILTER_FLAG_LOG,
SECCOMP_FILTER_FLAG_SPEC_ALLOW,
- SECCOMP_FILTER_FLAG_NEW_LISTENER };
+ SECCOMP_FILTER_FLAG_NEW_LISTENER,
+ SECCOMP_FILTER_FLAG_TSYNC_ESRCH };
unsigned int exclusive[] = {
SECCOMP_FILTER_FLAG_TSYNC,
SECCOMP_FILTER_FLAG_NEW_LISTENER };
@@ -2645,6 +2650,55 @@ TEST_F(TSYNC, two_siblings_with_one_divergence)
EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
}
+TEST_F(TSYNC, two_siblings_with_one_divergence_no_tid_in_err)
+{
+ long ret, flags;
+ void *status;
+
+ ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
+ TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+ }
+
+ ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog);
+ ASSERT_NE(ENOSYS, errno) {
+ TH_LOG("Kernel does not support seccomp syscall!");
+ }
+ ASSERT_EQ(0, ret) {
+ TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!");
+ }
+ self->sibling[0].diverge = 1;
+ tsync_start_sibling(&self->sibling[0]);
+ tsync_start_sibling(&self->sibling[1]);
+
+ while (self->sibling_count < TSYNC_SIBLINGS) {
+ sem_wait(&self->started);
+ self->sibling_count++;
+ }
+
+ flags = SECCOMP_FILTER_FLAG_TSYNC | \
+ SECCOMP_FILTER_FLAG_TSYNC_ESRCH;
+ ret = seccomp(SECCOMP_SET_MODE_FILTER, flags, &self->apply_prog);
+ ASSERT_EQ(ESRCH, errno) {
+ TH_LOG("Did not return ESRCH for diverged sibling.");
+ }
+ ASSERT_EQ(-1, ret) {
+ TH_LOG("Did not fail on diverged sibling.");
+ }
+
+ /* Wake the threads */
+ pthread_mutex_lock(&self->mutex);
+ ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
+ TH_LOG("cond broadcast non-zero");
+ }
+ pthread_mutex_unlock(&self->mutex);
+
+ /* Ensure they are both unkilled. */
+ PTHREAD_JOIN(self->sibling[0].tid, &status);
+ EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
+ PTHREAD_JOIN(self->sibling[1].tid, &status);
+ EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
+}
+
TEST_F(TSYNC, two_siblings_not_under_filter)
{
long ret, sib;
@@ -3196,6 +3250,24 @@ TEST(user_notification_basic)
EXPECT_EQ(0, WEXITSTATUS(status));
}
+TEST(user_notification_with_tsync)
+{
+ int ret;
+ unsigned int flags;
+
+ /* these were exclusive */
+ flags = SECCOMP_FILTER_FLAG_NEW_LISTENER |
+ SECCOMP_FILTER_FLAG_TSYNC;
+ ASSERT_EQ(-1, user_trap_syscall(__NR_getppid, flags));
+ ASSERT_EQ(EINVAL, errno);
+
+ /* but now they're not */
+ flags |= SECCOMP_FILTER_FLAG_TSYNC_ESRCH;
+ ret = user_trap_syscall(__NR_getppid, flags);
+ close(ret);
+ ASSERT_LE(0, ret);
+}
+
TEST(user_notification_kill_in_middle)
{
pid_t pid;
diff --git a/tools/testing/selftests/tc-testing/config b/tools/testing/selftests/tc-testing/config
index 477bc61..c03af46 100644
--- a/tools/testing/selftests/tc-testing/config
+++ b/tools/testing/selftests/tc-testing/config
@@ -57,3 +57,4 @@
CONFIG_NET_IFE_SKBPRIO=m
CONFIG_NET_IFE_SKBTCINDEX=m
CONFIG_NET_SCH_FIFO=y
+CONFIG_NET_SCH_ETS=m
diff --git a/tools/testing/selftests/timens/Makefile b/tools/testing/selftests/timens/Makefile
index e9fb30b..b4fd9a9 100644
--- a/tools/testing/selftests/timens/Makefile
+++ b/tools/testing/selftests/timens/Makefile
@@ -2,6 +2,6 @@
TEST_GEN_PROGS_EXTENDED := gettime_perf
CFLAGS := -Wall -Werror -pthread
-LDFLAGS := -lrt -ldl
+LDLIBS := -lrt -ldl
include ../lib.mk
diff --git a/tools/testing/selftests/tpm2/test_smoke.sh b/tools/testing/selftests/tpm2/test_smoke.sh
index 8155c2e..b630c7b 100755
--- a/tools/testing/selftests/tpm2/test_smoke.sh
+++ b/tools/testing/selftests/tpm2/test_smoke.sh
@@ -1,8 +1,17 @@
#!/bin/bash
# SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
+self.flags = flags
-python -m unittest -v tpm2_tests.SmokeTest
-python -m unittest -v tpm2_tests.AsyncTest
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+
+if [ -f /dev/tpm0 ] ; then
+ python -m unittest -v tpm2_tests.SmokeTest
+ python -m unittest -v tpm2_tests.AsyncTest
+else
+ exit $ksft_skip
+fi
CLEAR_CMD=$(which tpm2_clear)
if [ -n $CLEAR_CMD ]; then
diff --git a/tools/testing/selftests/tpm2/test_space.sh b/tools/testing/selftests/tpm2/test_space.sh
index a6f5e34..180b469 100755
--- a/tools/testing/selftests/tpm2/test_space.sh
+++ b/tools/testing/selftests/tpm2/test_space.sh
@@ -1,4 +1,11 @@
#!/bin/bash
# SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
-python -m unittest -v tpm2_tests.SpaceTest
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+if [ -f /dev/tpmrm0 ] ; then
+ python -m unittest -v tpm2_tests.SpaceTest
+else
+ exit $ksft_skip
+fi
diff --git a/tools/testing/selftests/vm/run_vmtests b/tools/testing/selftests/vm/run_vmtests
index a692ea82..f337148 100755
--- a/tools/testing/selftests/vm/run_vmtests
+++ b/tools/testing/selftests/vm/run_vmtests
@@ -112,6 +112,17 @@
echo " https://github.com/libhugetlbfs/libhugetlbfs.git for"
echo " hugetlb regression testing."
+echo "---------------------------"
+echo "running map_fixed_noreplace"
+echo "---------------------------"
+./map_fixed_noreplace
+if [ $? -ne 0 ]; then
+ echo "[FAIL]"
+ exitcode=1
+else
+ echo "[PASS]"
+fi
+
echo "-------------------"
echo "running userfaultfd"
echo "-------------------"
@@ -186,6 +197,17 @@
echo "[PASS]"
fi
+echo "-------------------------"
+echo "running mlock-random-test"
+echo "-------------------------"
+./mlock-random-test
+if [ $? -ne 0 ]; then
+ echo "[FAIL]"
+ exitcode=1
+else
+ echo "[PASS]"
+fi
+
echo "--------------------"
echo "running mlock2-tests"
echo "--------------------"
@@ -197,6 +219,17 @@
echo "[PASS]"
fi
+echo "-----------------"
+echo "running thuge-gen"
+echo "-----------------"
+./thuge-gen
+if [ $? -ne 0 ]; then
+ echo "[FAIL]"
+ exitcode=1
+else
+ echo "[PASS]"
+fi
+
if [ $VADDR64 -ne 0 ]; then
echo "-----------------------------"
echo "running virtual_address_range"
diff --git a/tools/testing/selftests/wireguard/netns.sh b/tools/testing/selftests/wireguard/netns.sh
index f5ab1cd..936e1ca 100755
--- a/tools/testing/selftests/wireguard/netns.sh
+++ b/tools/testing/selftests/wireguard/netns.sh
@@ -24,6 +24,7 @@
set -e
exec 3>&1
+export LANG=C
export WG_HIDE_KEYS=never
netns0="wg-test-$$-0"
netns1="wg-test-$$-1"
@@ -297,7 +298,17 @@
n1 ping -W 1 -c 100 -f 192.168.99.7
n1 ping -W 1 -c 100 -f abab::1111
+# Have ns2 NAT into wg0 packets from ns0, but return an icmp error along the right route.
+n2 iptables -t nat -A POSTROUTING -s 10.0.0.0/24 -d 192.168.241.0/24 -j SNAT --to 192.168.241.2
+n0 iptables -t filter -A INPUT \! -s 10.0.0.0/24 -i vethrs -j DROP # Manual rpfilter just to be explicit.
+n2 bash -c 'printf 1 > /proc/sys/net/ipv4/ip_forward'
+ip0 -4 route add 192.168.241.1 via 10.0.0.100
+n2 wg set wg0 peer "$pub1" remove
+[[ $(! n0 ping -W 1 -c 1 192.168.241.1 || false) == *"From 10.0.0.100 icmp_seq=1 Destination Host Unreachable"* ]]
+
n0 iptables -t nat -F
+n0 iptables -t filter -F
+n2 iptables -t nat -F
ip0 link del vethrc
ip0 link del vethrs
ip1 link del wg0
@@ -516,11 +527,16 @@
n0 wg set wg0 peer "$pub2" allowed-ips ::/0,1700::/111,5000::/4,e000::/37,9000::/75
n0 wg set wg0 peer "$pub2" allowed-ips ::/0
n0 wg set wg0 peer "$pub2" remove
-low_order_points=( AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA= AQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA= 4Ot6fDtBuK4WVuP68Z/EatoJjeucMrH9hmIFFl9JuAA= X5yVvKNQjCSx0LFVnIPvWwREXMRYHI6G2CJO3dCfEVc= 7P///////////////////////////////////////38= 7f///////////////////////////////////////38= 7v///////////////////////////////////////38= )
-n0 wg set wg0 private-key /dev/null ${low_order_points[@]/#/peer }
-[[ -z $(n0 wg show wg0 peers) ]]
-n0 wg set wg0 private-key <(echo "$key1") ${low_order_points[@]/#/peer }
-[[ -z $(n0 wg show wg0 peers) ]]
+for low_order_point in AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA= AQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA= 4Ot6fDtBuK4WVuP68Z/EatoJjeucMrH9hmIFFl9JuAA= X5yVvKNQjCSx0LFVnIPvWwREXMRYHI6G2CJO3dCfEVc= 7P///////////////////////////////////////38= 7f///////////////////////////////////////38= 7v///////////////////////////////////////38=; do
+ n0 wg set wg0 peer "$low_order_point" persistent-keepalive 1 endpoint 127.0.0.1:1111
+done
+[[ -n $(n0 wg show wg0 peers) ]]
+exec 4< <(n0 ncat -l -u -p 1111)
+ncat_pid=$!
+waitncatudp $netns0 $ncat_pid
+ip0 link set wg0 up
+! read -r -n 1 -t 2 <&4 || false
+kill $ncat_pid
ip0 link del wg0
declare -A objects
diff --git a/tools/testing/selftests/wireguard/qemu/Makefile b/tools/testing/selftests/wireguard/qemu/Makefile
index f10aa35..90598a4 100644
--- a/tools/testing/selftests/wireguard/qemu/Makefile
+++ b/tools/testing/selftests/wireguard/qemu/Makefile
@@ -38,19 +38,17 @@
define file_download =
$(DISTFILES_PATH)/$(1):
mkdir -p $(DISTFILES_PATH)
- flock -x $$@.lock -c '[ -f $$@ ] && exit 0; wget -O $$@.tmp $(MIRROR)$(1) || wget -O $$@.tmp $(2)$(1) || rm -f $$@.tmp'
- if echo "$(3) $$@.tmp" | sha256sum -c -; then mv $$@.tmp $$@; else rm -f $$@.tmp; exit 71; fi
+ flock -x $$@.lock -c '[ -f $$@ ] && exit 0; wget -O $$@.tmp $(MIRROR)$(1) || wget -O $$@.tmp $(2)$(1) || rm -f $$@.tmp; [ -f $$@.tmp ] || exit 1; if echo "$(3) $$@.tmp" | sha256sum -c -; then mv $$@.tmp $$@; else rm -f $$@.tmp; exit 71; fi'
endef
-$(eval $(call tar_download,MUSL,musl,1.1.24,.tar.gz,https://www.musl-libc.org/releases/,1370c9a812b2cf2a7d92802510cca0058cc37e66a7bedd70051f0a34015022a3))
-$(eval $(call tar_download,LIBMNL,libmnl,1.0.4,.tar.bz2,https://www.netfilter.org/projects/libmnl/files/,171f89699f286a5854b72b91d06e8f8e3683064c5901fb09d954a9ab6f551f81))
+$(eval $(call tar_download,MUSL,musl,1.2.0,.tar.gz,https://musl.libc.org/releases/,c6de7b191139142d3f9a7b5b702c9cae1b5ee6e7f57e582da9328629408fd4e8))
$(eval $(call tar_download,IPERF,iperf,3.7,.tar.gz,https://downloads.es.net/pub/iperf/,d846040224317caf2f75c843d309a950a7db23f9b44b94688ccbe557d6d1710c))
$(eval $(call tar_download,BASH,bash,5.0,.tar.gz,https://ftp.gnu.org/gnu/bash/,b4a80f2ac66170b2913efbfb9f2594f1f76c7b1afd11f799e22035d63077fb4d))
$(eval $(call tar_download,IPROUTE2,iproute2,5.4.0,.tar.xz,https://www.kernel.org/pub/linux/utils/net/iproute2/,fe97aa60a0d4c5ac830be18937e18dc3400ca713a33a89ad896ff1e3d46086ae))
$(eval $(call tar_download,IPTABLES,iptables,1.8.4,.tar.bz2,https://www.netfilter.org/projects/iptables/files/,993a3a5490a544c2cbf2ef15cf7e7ed21af1845baf228318d5c36ef8827e157c))
$(eval $(call tar_download,NMAP,nmap,7.80,.tar.bz2,https://nmap.org/dist/,fcfa5a0e42099e12e4bf7a68ebe6fde05553383a682e816a7ec9256ab4773faa))
$(eval $(call tar_download,IPUTILS,iputils,s20190709,.tar.gz,https://github.com/iputils/iputils/archive/s20190709.tar.gz/#,a15720dd741d7538dd2645f9f516d193636ae4300ff7dbc8bfca757bf166490a))
-$(eval $(call tar_download,WIREGUARD_TOOLS,wireguard-tools,1.0.20191226,.tar.xz,https://git.zx2c4.com/wireguard-tools/snapshot/,aa8af0fdc9872d369d8c890a84dbc2a2466b55795dccd5b47721b2d97644b04f))
+$(eval $(call tar_download,WIREGUARD_TOOLS,wireguard-tools,1.0.20200206,.tar.xz,https://git.zx2c4.com/wireguard-tools/snapshot/,f5207248c6a3c3e3bfc9ab30b91c1897b00802ed861e1f9faaed873366078c64))
KERNEL_BUILD_PATH := $(BUILD_PATH)/kernel$(if $(findstring yes,$(DEBUG_KERNEL)),-debug)
rwildcard=$(foreach d,$(wildcard $1*),$(call rwildcard,$d/,$2) $(filter $(subst *,%,$2),$d))
@@ -295,21 +293,13 @@
$(MAKE) -C $(IPERF_PATH)
$(STRIP) -s $@
-$(LIBMNL_PATH)/.installed: $(LIBMNL_TAR)
- flock -s $<.lock tar -C $(BUILD_PATH) -xf $<
- touch $@
-
-$(LIBMNL_PATH)/src/.libs/libmnl.a: | $(LIBMNL_PATH)/.installed $(USERSPACE_DEPS)
- cd $(LIBMNL_PATH) && ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --enable-static --disable-shared
- $(MAKE) -C $(LIBMNL_PATH)
- sed -i 's:prefix=.*:prefix=$(LIBMNL_PATH):' $(LIBMNL_PATH)/libmnl.pc
-
$(WIREGUARD_TOOLS_PATH)/.installed: $(WIREGUARD_TOOLS_TAR)
+ mkdir -p $(BUILD_PATH)
flock -s $<.lock tar -C $(BUILD_PATH) -xf $<
touch $@
-$(WIREGUARD_TOOLS_PATH)/src/wg: | $(WIREGUARD_TOOLS_PATH)/.installed $(LIBMNL_PATH)/src/.libs/libmnl.a $(USERSPACE_DEPS)
- LDFLAGS="$(LDFLAGS) -L$(LIBMNL_PATH)/src/.libs" $(MAKE) -C $(WIREGUARD_TOOLS_PATH)/src LIBMNL_CFLAGS="-I$(LIBMNL_PATH)/include" LIBMNL_LDLIBS="-lmnl" wg
+$(WIREGUARD_TOOLS_PATH)/src/wg: | $(WIREGUARD_TOOLS_PATH)/.installed $(USERSPACE_DEPS)
+ $(MAKE) -C $(WIREGUARD_TOOLS_PATH)/src wg
$(STRIP) -s $@
$(BUILD_PATH)/init: init.c | $(USERSPACE_DEPS)
@@ -340,17 +330,17 @@
$(IPROUTE2_PATH)/.installed: $(IPROUTE2_TAR)
mkdir -p $(BUILD_PATH)
flock -s $<.lock tar -C $(BUILD_PATH) -xf $<
- printf 'CC:=$(CC)\nPKG_CONFIG:=pkg-config\nTC_CONFIG_XT:=n\nTC_CONFIG_ATM:=n\nTC_CONFIG_IPSET:=n\nIP_CONFIG_SETNS:=y\nHAVE_ELF:=n\nHAVE_MNL:=y\nHAVE_BERKELEY_DB:=n\nHAVE_LATEX:=n\nHAVE_PDFLATEX:=n\nCFLAGS+=-DHAVE_SETNS -DHAVE_LIBMNL -I$(LIBMNL_PATH)/include\nLDLIBS+=-lmnl' > $(IPROUTE2_PATH)/config.mk
+ printf 'CC:=$(CC)\nPKG_CONFIG:=pkg-config\nTC_CONFIG_XT:=n\nTC_CONFIG_ATM:=n\nTC_CONFIG_IPSET:=n\nIP_CONFIG_SETNS:=y\nHAVE_ELF:=n\nHAVE_MNL:=n\nHAVE_BERKELEY_DB:=n\nHAVE_LATEX:=n\nHAVE_PDFLATEX:=n\nCFLAGS+=-DHAVE_SETNS\n' > $(IPROUTE2_PATH)/config.mk
printf 'lib: snapshot\n\t$$(MAKE) -C lib\nip/ip: lib\n\t$$(MAKE) -C ip ip\nmisc/ss: lib\n\t$$(MAKE) -C misc ss\n' >> $(IPROUTE2_PATH)/Makefile
touch $@
-$(IPROUTE2_PATH)/ip/ip: | $(IPROUTE2_PATH)/.installed $(LIBMNL_PATH)/src/.libs/libmnl.a $(USERSPACE_DEPS)
- LDFLAGS="$(LDFLAGS) -L$(LIBMNL_PATH)/src/.libs" PKG_CONFIG_LIBDIR="$(LIBMNL_PATH)" $(MAKE) -C $(IPROUTE2_PATH) PREFIX=/ ip/ip
- $(STRIP) -s $(IPROUTE2_PATH)/ip/ip
+$(IPROUTE2_PATH)/ip/ip: | $(IPROUTE2_PATH)/.installed $(USERSPACE_DEPS)
+ $(MAKE) -C $(IPROUTE2_PATH) PREFIX=/ ip/ip
+ $(STRIP) -s $@
-$(IPROUTE2_PATH)/misc/ss: | $(IPROUTE2_PATH)/.installed $(LIBMNL_PATH)/src/.libs/libmnl.a $(USERSPACE_DEPS)
- LDFLAGS="$(LDFLAGS) -L$(LIBMNL_PATH)/src/.libs" PKG_CONFIG_LIBDIR="$(LIBMNL_PATH)" $(MAKE) -C $(IPROUTE2_PATH) PREFIX=/ misc/ss
- $(STRIP) -s $(IPROUTE2_PATH)/misc/ss
+$(IPROUTE2_PATH)/misc/ss: | $(IPROUTE2_PATH)/.installed $(USERSPACE_DEPS)
+ $(MAKE) -C $(IPROUTE2_PATH) PREFIX=/ misc/ss
+ $(STRIP) -s $@
$(IPTABLES_PATH)/.installed: $(IPTABLES_TAR)
mkdir -p $(BUILD_PATH)
@@ -358,8 +348,8 @@
sed -i -e "/nfnetlink=[01]/s:=[01]:=0:" -e "/nfconntrack=[01]/s:=[01]:=0:" $(IPTABLES_PATH)/configure
touch $@
-$(IPTABLES_PATH)/iptables/xtables-legacy-multi: | $(IPTABLES_PATH)/.installed $(LIBMNL_PATH)/src/.libs/libmnl.a $(USERSPACE_DEPS)
- cd $(IPTABLES_PATH) && PKG_CONFIG_LIBDIR="$(LIBMNL_PATH)" ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --enable-static --disable-shared --disable-nftables --disable-bpf-compiler --disable-nfsynproxy --disable-libipq --with-kernel=$(BUILD_PATH)/include
+$(IPTABLES_PATH)/iptables/xtables-legacy-multi: | $(IPTABLES_PATH)/.installed $(USERSPACE_DEPS)
+ cd $(IPTABLES_PATH) && ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --enable-static --disable-shared --disable-nftables --disable-bpf-compiler --disable-nfsynproxy --disable-libipq --disable-connlabel --with-kernel=$(BUILD_PATH)/include
$(MAKE) -C $(IPTABLES_PATH)
$(STRIP) -s $@
diff --git a/tools/testing/selftests/wireguard/qemu/init.c b/tools/testing/selftests/wireguard/qemu/init.c
index 90bc981..c969812 100644
--- a/tools/testing/selftests/wireguard/qemu/init.c
+++ b/tools/testing/selftests/wireguard/qemu/init.c
@@ -13,7 +13,6 @@
#include <fcntl.h>
#include <sys/wait.h>
#include <sys/mount.h>
-#include <sys/types.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <sys/io.h>
diff --git a/tools/testing/selftests/wireguard/qemu/kernel.config b/tools/testing/selftests/wireguard/qemu/kernel.config
index af9323a..d531de1 100644
--- a/tools/testing/selftests/wireguard/qemu/kernel.config
+++ b/tools/testing/selftests/wireguard/qemu/kernel.config
@@ -56,7 +56,6 @@
CONFIG_NO_HZ_FULL=n
CONFIG_HZ_PERIODIC=n
CONFIG_HIGH_RES_TIMERS=y
-CONFIG_COMPAT_32BIT_TIME=y
CONFIG_ARCH_RANDOM=y
CONFIG_FILE_LOCKING=y
CONFIG_POSIX_TIMERS=y
diff --git a/usr/Kconfig b/usr/Kconfig
index bdf5bbd..96afb03 100644
--- a/usr/Kconfig
+++ b/usr/Kconfig
@@ -124,17 +124,6 @@
If in doubt, select 'None'
-config INITRAMFS_COMPRESSION_NONE
- bool "None"
- help
- Do not compress the built-in initramfs at all. This may sound wasteful
- in space, but, you should be aware that the built-in initramfs will be
- compressed at a later stage anyways along with the rest of the kernel,
- on those architectures that support this. However, not compressing the
- initramfs may lead to slightly higher memory consumption during a
- short time at boot, while both the cpio image and the unpacked
- filesystem image will be present in memory simultaneously
-
config INITRAMFS_COMPRESSION_GZIP
bool "Gzip"
depends on RD_GZIP
@@ -207,4 +196,15 @@
If you choose this, keep in mind that most distros don't provide lz4
by default which could cause a build failure.
+config INITRAMFS_COMPRESSION_NONE
+ bool "None"
+ help
+ Do not compress the built-in initramfs at all. This may sound wasteful
+ in space, but, you should be aware that the built-in initramfs will be
+ compressed at a later stage anyways along with the rest of the kernel,
+ on those architectures that support this. However, not compressing the
+ initramfs may lead to slightly higher memory consumption during a
+ short time at boot, while both the cpio image and the unpacked
+ filesystem image will be present in memory simultaneously
+
endchoice
diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
index d65a0fa..eda7b62 100644
--- a/virt/kvm/arm/arm.c
+++ b/virt/kvm/arm/arm.c
@@ -742,9 +742,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
guest_enter_irqoff();
if (has_vhe()) {
- kvm_arm_vhe_guest_enter();
ret = kvm_vcpu_run_vhe(vcpu);
- kvm_arm_vhe_guest_exit();
} else {
ret = kvm_call_hyp_ret(__kvm_vcpu_run_nvhe, vcpu);
}
diff --git a/virt/kvm/arm/trace.h b/virt/kvm/arm/trace.h
index 204d210..cc94ccc 100644
--- a/virt/kvm/arm/trace.h
+++ b/virt/kvm/arm/trace.h
@@ -4,6 +4,7 @@
#include <kvm/arm_arch_timer.h>
#include <linux/tracepoint.h>
+#include <asm/kvm_arm.h>
#undef TRACE_SYSTEM
#define TRACE_SYSTEM kvm
diff --git a/virt/kvm/arm/vgic/vgic-mmio.c b/virt/kvm/arm/vgic/vgic-mmio.c
index d656ebd..97fb2a4 100644
--- a/virt/kvm/arm/vgic/vgic-mmio.c
+++ b/virt/kvm/arm/vgic/vgic-mmio.c
@@ -179,18 +179,6 @@ unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu,
return value;
}
-/*
- * This function will return the VCPU that performed the MMIO access and
- * trapped from within the VM, and will return NULL if this is a userspace
- * access.
- *
- * We can disable preemption locally around accessing the per-CPU variable,
- * and use the resolved vcpu pointer after enabling preemption again, because
- * even if the current thread is migrated to another CPU, reading the per-CPU
- * value later will give us the same value as we update the per-CPU variable
- * in the preempt notifier handlers.
- */
-
/* Must be called with irq->irq_lock held */
static void vgic_hw_irq_spending(struct kvm_vcpu *vcpu, struct vgic_irq *irq,
bool is_uaccess)
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 67ae2d5..70f03ce 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -4409,12 +4409,22 @@ static void kvm_sched_out(struct preempt_notifier *pn,
/**
* kvm_get_running_vcpu - get the vcpu running on the current CPU.
- * Thanks to preempt notifiers, this can also be called from
- * preemptible context.
+ *
+ * We can disable preemption locally around accessing the per-CPU variable,
+ * and use the resolved vcpu pointer after enabling preemption again,
+ * because even if the current thread is migrated to another CPU, reading
+ * the per-CPU value later will give us the same value as we update the
+ * per-CPU variable in the preempt notifier handlers.
*/
struct kvm_vcpu *kvm_get_running_vcpu(void)
{
- return __this_cpu_read(kvm_running_vcpu);
+ struct kvm_vcpu *vcpu;
+
+ preempt_disable();
+ vcpu = __this_cpu_read(kvm_running_vcpu);
+ preempt_enable();
+
+ return vcpu;
}
/**