Merge tag 'fs.idmapped.v5.16-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/brauner/linux
Pull setattr idmapping fix from Christian Brauner:
"This contains a simple fix for setattr. When determining the validity
of the attributes the ia_{g,u}id fields contain the value that will be
written to inode->i_{g,u}id. When the {g,u}id attribute of the file
isn't altered and the caller's fs{g,u}id matches the current {g,u}id
attribute the attribute change is allowed.
The value in ia_{g,u}id does already account for idmapped mounts and
will have taken the relevant idmapping into account. So in order to
verify that the {g,u}id attribute isn't changed we simple need to
compare the ia_{g,u}id value against the inode's i_{g,u}id value.
This only has any meaning for idmapped mounts as idmapping helpers are
idempotent without them. And for idmapped mounts this really only has
a meaning when circular idmappings are used, i.e. mappings where e.g.
id 1000 is mapped to id 1001 and id 1001 is mapped to id 1000. Such
ciruclar mappings can e.g. be useful when sharing the same home
directory between multiple users at the same time.
Before this patch we could end up denying legitimate attribute changes
and allowing invalid attribute changes when circular mappings are
used. To even get into this situation the caller must've been
privileged both to create that mapping and to create that idmapped
mount.
This hasn't been seen in the wild anywhere but came up when expanding
the fstest suite during work on a series of hardening patches. All
idmapped fstests pass without any regressions and we're adding new
tests to verify the behavior of circular mappings.
The new tests can be found at [1]"
Link: https://lore.kernel.org/linux-fsdevel/20211109145713.1868404-2-brauner@kernel.org [1]
* tag 'fs.idmapped.v5.16-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/brauner/linux:
fs: handle circular mappings correctly
diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst
index 4261620..0e486f4 100644
--- a/Documentation/admin-guide/sysctl/kernel.rst
+++ b/Documentation/admin-guide/sysctl/kernel.rst
@@ -1099,7 +1099,7 @@
===============
Enables/disables task delay accounting (see
-:doc:`accounting/delay-accounting.rst`). Enabling this feature incurs
+Documentation/accounting/delay-accounting.rst. Enabling this feature incurs
a small amount of overhead in the scheduler but is useful for debugging
and performance tuning. It is required by some tools such as iotop.
diff --git a/Documentation/arm/marvell.rst b/Documentation/arm/marvell.rst
index 8323c79..9485a5a 100644
--- a/Documentation/arm/marvell.rst
+++ b/Documentation/arm/marvell.rst
@@ -104,6 +104,8 @@
Not supported by the Linux kernel.
+ Homepage:
+ https://web.archive.org/web/20110924171043/http://www.marvell.com/embedded-processors/discovery-innovation/
Core:
Feroceon 88fr571-vd ARMv5 compatible
@@ -120,6 +122,7 @@
- 88F6707
- 88F6W11
+ - Product infos: https://web.archive.org/web/20141002083258/http://www.marvell.com/embedded-processors/armada-370/
- Product Brief: https://web.archive.org/web/20121115063038/http://www.marvell.com/embedded-processors/armada-300/assets/Marvell_ARMADA_370_SoC.pdf
- Hardware Spec: https://web.archive.org/web/20140617183747/http://www.marvell.com/embedded-processors/armada-300/assets/ARMADA370-datasheet.pdf
- Functional Spec: https://web.archive.org/web/20140617183701/http://www.marvell.com/embedded-processors/armada-300/assets/ARMADA370-FunctionalSpec-datasheet.pdf
@@ -127,9 +130,29 @@
Core:
Sheeva ARMv7 compatible PJ4B
+ Armada XP Flavors:
+ - MV78230
+ - MV78260
+ - MV78460
+
+ NOTE:
+ not to be confused with the non-SMP 78xx0 SoCs
+
+ - Product infos: https://web.archive.org/web/20150101215721/http://www.marvell.com/embedded-processors/armada-xp/
+ - Product Brief: https://web.archive.org/web/20121021173528/http://www.marvell.com/embedded-processors/armada-xp/assets/Marvell-ArmadaXP-SoC-product%20brief.pdf
+ - Functional Spec: https://web.archive.org/web/20180829171131/http://www.marvell.com/embedded-processors/armada-xp/assets/ARMADA-XP-Functional-SpecDatasheet.pdf
+ - Hardware Specs:
+ - https://web.archive.org/web/20141127013651/http://www.marvell.com/embedded-processors/armada-xp/assets/HW_MV78230_OS.PDF
+ - https://web.archive.org/web/20141222000224/http://www.marvell.com/embedded-processors/armada-xp/assets/HW_MV78260_OS.PDF
+ - https://web.archive.org/web/20141222000230/http://www.marvell.com/embedded-processors/armada-xp/assets/HW_MV78460_OS.PDF
+
+ Core:
+ Sheeva ARMv7 compatible Dual-core or Quad-core PJ4B-MP
+
Armada 375 Flavors:
- 88F6720
+ - Product infos: https://web.archive.org/web/20140108032402/http://www.marvell.com/embedded-processors/armada-375/
- Product Brief: https://web.archive.org/web/20131216023516/http://www.marvell.com/embedded-processors/armada-300/assets/ARMADA_375_SoC-01_product_brief.pdf
Core:
@@ -162,29 +185,6 @@
Core:
ARM Cortex-A9
- Armada XP Flavors:
- - MV78230
- - MV78260
- - MV78460
-
- NOTE:
- not to be confused with the non-SMP 78xx0 SoCs
-
- Product Brief:
- https://web.archive.org/web/20121021173528/http://www.marvell.com/embedded-processors/armada-xp/assets/Marvell-ArmadaXP-SoC-product%20brief.pdf
-
- Functional Spec:
- https://web.archive.org/web/20180829171131/http://www.marvell.com/embedded-processors/armada-xp/assets/ARMADA-XP-Functional-SpecDatasheet.pdf
-
- - Hardware Specs:
-
- - https://web.archive.org/web/20141127013651/http://www.marvell.com/embedded-processors/armada-xp/assets/HW_MV78230_OS.PDF
- - https://web.archive.org/web/20141222000224/http://www.marvell.com/embedded-processors/armada-xp/assets/HW_MV78260_OS.PDF
- - https://web.archive.org/web/20141222000230/http://www.marvell.com/embedded-processors/armada-xp/assets/HW_MV78460_OS.PDF
-
- Core:
- Sheeva ARMv7 compatible Dual-core or Quad-core PJ4B-MP
-
Linux kernel mach directory:
arch/arm/mach-mvebu
Linux kernel plat directory:
@@ -436,7 +436,7 @@
- Flavors:
- 88DE3010, Armada 1000 (no Linux support)
- Core: Marvell PJ1 (ARMv5TE), Dual-core
- - Product Brief: http://www.marvell.com.cn/digital-entertainment/assets/armada_1000_pb.pdf
+ - Product Brief: https://web.archive.org/web/20131103162620/http://www.marvell.com/digital-entertainment/assets/armada_1000_pb.pdf
- 88DE3005, Armada 1500 Mini
- Design name: BG2CD
- Core: ARM Cortex-A9, PL310 L2CC
diff --git a/Documentation/bpf/index.rst b/Documentation/bpf/index.rst
index 37f273a..610450f 100644
--- a/Documentation/bpf/index.rst
+++ b/Documentation/bpf/index.rst
@@ -15,7 +15,7 @@
libbpf
======
-Documentation/bpf/libbpf/libbpf.rst is a userspace library for loading and interacting with bpf programs.
+Documentation/bpf/libbpf/index.rst is a userspace library for loading and interacting with bpf programs.
BPF Type Format (BTF)
=====================
diff --git a/Documentation/doc-guide/sphinx.rst b/Documentation/doc-guide/sphinx.rst
index ec3e71f..e445cb1 100644
--- a/Documentation/doc-guide/sphinx.rst
+++ b/Documentation/doc-guide/sphinx.rst
@@ -27,7 +27,7 @@
==============
The ReST markups currently used by the Documentation/ files are meant to be
-built with ``Sphinx`` version 1.3 or higher.
+built with ``Sphinx`` version 1.7 or higher.
There's a script that checks for the Sphinx requirements. Please see
:ref:`sphinx-pre-install` for further details.
@@ -43,10 +43,6 @@
.. note::
- #) Sphinx versions below 1.5 don't work properly with Python's
- docutils version 0.13.1 or higher. So, if you're willing to use
- those versions, you should run ``pip install 'docutils==0.12'``.
-
#) It is recommended to use the RTD theme for html output. Depending
on the Sphinx version, it should be installed separately,
with ``pip install sphinx_rtd_theme``.
@@ -55,13 +51,13 @@
those expressions are written using LaTeX notation. It needs texlive
installed with amsfonts and amsmath in order to evaluate them.
-In summary, if you want to install Sphinx version 1.7.9, you should do::
+In summary, if you want to install Sphinx version 2.4.4, you should do::
- $ virtualenv sphinx_1.7.9
- $ . sphinx_1.7.9/bin/activate
- (sphinx_1.7.9) $ pip install -r Documentation/sphinx/requirements.txt
+ $ virtualenv sphinx_2.4.4
+ $ . sphinx_2.4.4/bin/activate
+ (sphinx_2.4.4) $ pip install -r Documentation/sphinx/requirements.txt
-After running ``. sphinx_1.7.9/bin/activate``, the prompt will change,
+After running ``. sphinx_2.4.4/bin/activate``, the prompt will change,
in order to indicate that you're using the new environment. If you
open a new shell, you need to rerun this command to enter again at
the virtual environment before building the documentation.
@@ -81,7 +77,7 @@
PDF and LaTeX builds
--------------------
-Such builds are currently supported only with Sphinx versions 1.4 and higher.
+Such builds are currently supported only with Sphinx versions 2.4 and higher.
For PDF and LaTeX output, you'll also need ``XeLaTeX`` version 3.14159265.
@@ -104,8 +100,8 @@
You should run:
sudo dnf install -y texlive-luatex85
- /usr/bin/virtualenv sphinx_1.7.9
- . sphinx_1.7.9/bin/activate
+ /usr/bin/virtualenv sphinx_2.4.4
+ . sphinx_2.4.4/bin/activate
pip install -r Documentation/sphinx/requirements.txt
Can't build as 1 mandatory dependency is missing at ./scripts/sphinx-pre-install line 468.
diff --git a/Documentation/filesystems/autofs.rst b/Documentation/filesystems/autofs.rst
index 681c6a4..4f49027 100644
--- a/Documentation/filesystems/autofs.rst
+++ b/Documentation/filesystems/autofs.rst
@@ -35,7 +35,7 @@
required with any user-space program. Subsequent text refers to this
as the "automount daemon" or simply "the daemon".
-"autofs" is a Linux kernel module with provides the "autofs"
+"autofs" is a Linux kernel module which provides the "autofs"
filesystem type. Several "autofs" filesystems can be mounted and they
can each be managed separately, or all managed by the same daemon.
diff --git a/Documentation/process/changes.rst b/Documentation/process/changes.rst
index e35ab74..b398b85 100644
--- a/Documentation/process/changes.rst
+++ b/Documentation/process/changes.rst
@@ -54,7 +54,7 @@
iptables 1.4.2 iptables -V
openssl & libcrypto 1.0.0 openssl version
bc 1.06.95 bc --version
-Sphinx\ [#f1]_ 1.3 sphinx-build --version
+Sphinx\ [#f1]_ 1.7 sphinx-build --version
====================== =============== ========================================
.. [#f1] Sphinx is needed only to build the Kernel documentation
diff --git a/Documentation/process/submitting-patches.rst b/Documentation/process/submitting-patches.rst
index a0cc969..da085d6 100644
--- a/Documentation/process/submitting-patches.rst
+++ b/Documentation/process/submitting-patches.rst
@@ -22,8 +22,8 @@
easier.
Some subsystems and maintainer trees have additional information about
-their workflow and expectations, see :ref:`Documentation/process/maintainer
-handbooks <maintainer_handbooks_main>`.
+their workflow and expectations, see
+:ref:`Documentation/process/maintainer-handbooks.rst <maintainer_handbooks_main>`.
Obtain a current source tree
----------------------------
diff --git a/Documentation/trace/ftrace.rst b/Documentation/trace/ftrace.rst
index 4e5b26f..b3166c4 100644
--- a/Documentation/trace/ftrace.rst
+++ b/Documentation/trace/ftrace.rst
@@ -2442,11 +2442,10 @@
#!/bin/bash
tracefs=`sed -ne 's/^tracefs \(.*\) tracefs.*/\1/p' /proc/mounts`
- echo nop > $tracefs/tracing/current_tracer
- echo 0 > $tracefs/tracing/tracing_on
- echo $$ > $tracefs/tracing/set_ftrace_pid
- echo function > $tracefs/tracing/current_tracer
- echo 1 > $tracefs/tracing/tracing_on
+ echo 0 > $tracefs/tracing_on
+ echo $$ > $tracefs/set_ftrace_pid
+ echo function > $tracefs/current_tracer
+ echo 1 > $tracefs/tracing_on
exec "$@"
diff --git a/Documentation/translations/it_IT/doc-guide/sphinx.rst b/Documentation/translations/it_IT/doc-guide/sphinx.rst
index 0046d75..9762452 100644
--- a/Documentation/translations/it_IT/doc-guide/sphinx.rst
+++ b/Documentation/translations/it_IT/doc-guide/sphinx.rst
@@ -35,7 +35,7 @@
====================
I marcatori ReST utilizzati nei file in Documentation/ sono pensati per essere
-processati da ``Sphinx`` nella versione 1.3 o superiore.
+processati da ``Sphinx`` nella versione 1.7 o superiore.
Esiste uno script che verifica i requisiti Sphinx. Per ulteriori dettagli
consultate :ref:`it_sphinx-pre-install`.
@@ -53,11 +53,6 @@
.. note::
- #) Le versioni di Sphinx inferiori alla 1.5 non funzionano bene
- con il pacchetto Python docutils versione 0.13.1 o superiore.
- Se volete usare queste versioni, allora dovere eseguire
- ``pip install 'docutils==0.12'``.
-
#) Viene raccomandato l'uso del tema RTD per la documentazione in HTML.
A seconda della versione di Sphinx, potrebbe essere necessaria
l'installazione tramite il comando ``pip install sphinx_rtd_theme``.
@@ -67,13 +62,13 @@
utilizzando LaTeX. Per una corretta interpretazione, è necessario aver
installato texlive con i pacchetti amdfonts e amsmath.
-Riassumendo, se volete installare la versione 1.7.9 di Sphinx dovete eseguire::
+Riassumendo, se volete installare la versione 2.4.4 di Sphinx dovete eseguire::
- $ virtualenv sphinx_1.7.9
- $ . sphinx_1.7.9/bin/activate
- (sphinx_1.7.9) $ pip install -r Documentation/sphinx/requirements.txt
+ $ virtualenv sphinx_2.4.4
+ $ . sphinx_2.4.4/bin/activate
+ (sphinx_2.4.4) $ pip install -r Documentation/sphinx/requirements.txt
-Dopo aver eseguito ``. sphinx_1.7.9/bin/activate``, il prompt cambierà per
+Dopo aver eseguito ``. sphinx_2.4.4/bin/activate``, il prompt cambierà per
indicare che state usando il nuovo ambiente. Se aprite un nuova sessione,
prima di generare la documentazione, dovrete rieseguire questo comando per
rientrare nell'ambiente virtuale.
@@ -94,7 +89,7 @@
--------------------------
Al momento, la generazione di questi documenti è supportata solo dalle
-versioni di Sphinx superiori alla 1.4.
+versioni di Sphinx superiori alla 2.4.
Per la generazione di PDF e LaTeX, avrete bisogno anche del pacchetto
``XeLaTeX`` nella versione 3.14159265
@@ -119,8 +114,8 @@
You should run:
sudo dnf install -y texlive-luatex85
- /usr/bin/virtualenv sphinx_1.7.9
- . sphinx_1.7.9/bin/activate
+ /usr/bin/virtualenv sphinx_2.4.4
+ . sphinx_2.4.4/bin/activate
pip install -r Documentation/sphinx/requirements.txt
Can't build as 1 mandatory dependency is missing at ./scripts/sphinx-pre-install line 468.
diff --git a/Documentation/translations/it_IT/process/changes.rst b/Documentation/translations/it_IT/process/changes.rst
index 87d0818..dc71933 100644
--- a/Documentation/translations/it_IT/process/changes.rst
+++ b/Documentation/translations/it_IT/process/changes.rst
@@ -57,7 +57,7 @@
iptables 1.4.2 iptables -V
openssl & libcrypto 1.0.0 openssl version
bc 1.06.95 bc --version
-Sphinx\ [#f1]_ 1.3 sphinx-build --version
+Sphinx\ [#f1]_ 1.7 sphinx-build --version
====================== ================= ========================================
.. [#f1] Sphinx è necessario solo per produrre la documentazione del Kernel
diff --git a/Documentation/translations/zh_CN/doc-guide/sphinx.rst b/Documentation/translations/zh_CN/doc-guide/sphinx.rst
index 951595c..23eac67 100644
--- a/Documentation/translations/zh_CN/doc-guide/sphinx.rst
+++ b/Documentation/translations/zh_CN/doc-guide/sphinx.rst
@@ -26,7 +26,7 @@
安装Sphinx
==========
-Documentation/ 下的ReST文件现在使用sphinx1.3或更高版本构建。
+Documentation/ 下的ReST文件现在使用sphinx1.7或更高版本构建。
这有一个脚本可以检查Sphinx的依赖项。更多详细信息见
:ref:`sphinx-pre-install_zh` 。
@@ -40,22 +40,19 @@
.. note::
- #) 低于1.5版本的Sphinx无法与Python的0.13.1或更高版本docutils一起正常工作。
- 如果您想使用这些版本,那么应该运行 ``pip install 'docutils==0.12'`` 。
-
#) html输出建议使用RTD主题。根据Sphinx版本的不同,它应该用
``pip install sphinx_rtd_theme`` 单独安装。
#) 一些ReST页面包含数学表达式。由于Sphinx的工作方式,这些表达式是使用 LaTeX
编写的。它需要安装amsfonts和amsmath宏包,以便显示。
-总之,如您要安装Sphinx 1.7.9版本,应执行::
+总之,如您要安装Sphinx 2.4.4版本,应执行::
- $ virtualenv sphinx_1.7.9
- $ . sphinx_1.7.9/bin/activate
- (sphinx_1.7.9) $ pip install -r Documentation/sphinx/requirements.txt
+ $ virtualenv sphinx_2.4.4
+ $ . sphinx_2.4.4/bin/activate
+ (sphinx_2.4.4) $ pip install -r Documentation/sphinx/requirements.txt
-在运行 ``. sphinx_1.7.9/bin/activate`` 之后,提示符将变化,以指示您正在使用新
+在运行 ``. sphinx_2.4.4/bin/activate`` 之后,提示符将变化,以指示您正在使用新
环境。如果您打开了一个新的shell,那么在构建文档之前,您需要重新运行此命令以再
次进入虚拟环境中。
@@ -71,7 +68,7 @@
PDF和LaTeX构建
--------------
-目前只有Sphinx 1.4及更高版本才支持这种构建。
+目前只有Sphinx 2.4及更高版本才支持这种构建。
对于PDF和LaTeX输出,还需要 ``XeLaTeX`` 3.14159265版本。(译注:此版本号真实
存在)
@@ -93,8 +90,8 @@
You should run:
sudo dnf install -y texlive-luatex85
- /usr/bin/virtualenv sphinx_1.7.9
- . sphinx_1.7.9/bin/activate
+ /usr/bin/virtualenv sphinx_2.4.4
+ . sphinx_2.4.4/bin/activate
pip install -r Documentation/sphinx/requirements.txt
Can't build as 1 mandatory dependency is missing at ./scripts/sphinx-pre-install line 468.
diff --git a/Documentation/translations/zh_CN/process/management-style.rst b/Documentation/translations/zh_CN/process/management-style.rst
index c6a5bb2..8053ae4 100644
--- a/Documentation/translations/zh_CN/process/management-style.rst
+++ b/Documentation/translations/zh_CN/process/management-style.rst
@@ -36,14 +36,14 @@
每个人都认为管理者做决定,而且决策很重要。决定越大越痛苦,管理者就必须越高级。
这很明显,但事实并非如此。
-游戏的名字是 **避免** 做出决定。尤其是,如果有人告诉你“选择(a)或(b),
+最重要的是 **避免** 做出决定。尤其是,如果有人告诉你“选择(a)或(b),
我们真的需要你来做决定”,你就是陷入麻烦的管理者。你管理的人比你更了解细节,
所以如果他们来找你做技术决策,你完蛋了。你显然没有能力为他们做这个决定。
(推论:如果你管理的人不比你更了解细节,你也会被搞砸,尽管原因完全不同。
也就是说,你的工作是错的,他们应该管理你的才智)
-所以游戏的名字是 **避免** 做出决定,至少是那些大而痛苦的决定。做一些小的
+所以最重要的是 **避免** 做出决定,至少是那些大而痛苦的决定。做一些小的
和非结果性的决定是很好的,并且使您看起来好像知道自己在做什么,所以内核管理者
需要做的是将那些大的和痛苦的决定变成那些没有人真正关心的小事情。
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index 2f03cbf..e4727dc 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -223,7 +223,14 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
r = 1;
break;
case KVM_CAP_NR_VCPUS:
- r = num_online_cpus();
+ /*
+ * ARM64 treats KVM_CAP_NR_CPUS differently from all other
+ * architectures, as it does not always bound it to
+ * KVM_CAP_MAX_VCPUS. It should not matter much because
+ * this is just an advisory value.
+ */
+ r = min_t(unsigned int, num_online_cpus(),
+ kvm_arm_default_max_vcpus());
break;
case KVM_CAP_MAX_VCPUS:
case KVM_CAP_MAX_VCPU_ID:
diff --git a/arch/mips/bcm63xx/clk.c b/arch/mips/bcm63xx/clk.c
index 5a3e325..1c91064 100644
--- a/arch/mips/bcm63xx/clk.c
+++ b/arch/mips/bcm63xx/clk.c
@@ -381,6 +381,12 @@ void clk_disable(struct clk *clk)
EXPORT_SYMBOL(clk_disable);
+struct clk *clk_get_parent(struct clk *clk)
+{
+ return NULL;
+}
+EXPORT_SYMBOL(clk_get_parent);
+
unsigned long clk_get_rate(struct clk *clk)
{
if (!clk)
diff --git a/arch/mips/generic/yamon-dt.c b/arch/mips/generic/yamon-dt.c
index a3aa22c..a07a5ed 100644
--- a/arch/mips/generic/yamon-dt.c
+++ b/arch/mips/generic/yamon-dt.c
@@ -75,7 +75,7 @@ static unsigned int __init gen_fdt_mem_array(
__init int yamon_dt_append_memory(void *fdt,
const struct yamon_mem_region *regions)
{
- unsigned long phys_memsize, memsize;
+ unsigned long phys_memsize = 0, memsize;
__be32 mem_array[2 * MAX_MEM_ARRAY_ENTRIES];
unsigned int mem_entries;
int i, err, mem_off;
diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl
index 70e32de..72d02d36 100644
--- a/arch/mips/kernel/syscalls/syscall_n32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_n32.tbl
@@ -387,3 +387,4 @@
446 n32 landlock_restrict_self sys_landlock_restrict_self
# 447 reserved for memfd_secret
448 n32 process_mrelease sys_process_mrelease
+449 n32 futex_waitv sys_futex_waitv
diff --git a/arch/mips/kernel/syscalls/syscall_n64.tbl b/arch/mips/kernel/syscalls/syscall_n64.tbl
index 1ca7bc3..e2c481f 100644
--- a/arch/mips/kernel/syscalls/syscall_n64.tbl
+++ b/arch/mips/kernel/syscalls/syscall_n64.tbl
@@ -363,3 +363,4 @@
446 n64 landlock_restrict_self sys_landlock_restrict_self
# 447 reserved for memfd_secret
448 n64 process_mrelease sys_process_mrelease
+449 n64 futex_waitv sys_futex_waitv
diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl
index a61c35e..3714c97 100644
--- a/arch/mips/kernel/syscalls/syscall_o32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_o32.tbl
@@ -436,3 +436,4 @@
446 o32 landlock_restrict_self sys_landlock_restrict_self
# 447 reserved for memfd_secret
448 o32 process_mrelease sys_process_mrelease
+449 o32 futex_waitv sys_futex_waitv
diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c
index 562aa87..aa20d07 100644
--- a/arch/mips/kvm/mips.c
+++ b/arch/mips/kvm/mips.c
@@ -1067,7 +1067,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
r = 1;
break;
case KVM_CAP_NR_VCPUS:
- r = num_online_cpus();
+ r = min_t(unsigned int, num_online_cpus(), KVM_MAX_VCPUS);
break;
case KVM_CAP_MAX_VCPUS:
r = KVM_MAX_VCPUS;
diff --git a/arch/mips/lantiq/clk.c b/arch/mips/lantiq/clk.c
index dd819e3..4916ccc 100644
--- a/arch/mips/lantiq/clk.c
+++ b/arch/mips/lantiq/clk.c
@@ -158,6 +158,12 @@ void clk_deactivate(struct clk *clk)
}
EXPORT_SYMBOL(clk_deactivate);
+struct clk *clk_get_parent(struct clk *clk)
+{
+ return NULL;
+}
+EXPORT_SYMBOL(clk_get_parent);
+
static inline u32 get_counter_resolution(void)
{
u32 res;
diff --git a/arch/parisc/configs/generic-32bit_defconfig b/arch/parisc/configs/generic-32bit_defconfig
index d6fd8fa..53061cb 100644
--- a/arch/parisc/configs/generic-32bit_defconfig
+++ b/arch/parisc/configs/generic-32bit_defconfig
@@ -231,6 +231,7 @@
CONFIG_CRC_CCITT=m
CONFIG_CRC_T10DIF=y
CONFIG_FONTS=y
+CONFIG_PRINTK_TIME=y
CONFIG_MAGIC_SYSRQ=y
CONFIG_DEBUG_FS=y
CONFIG_DEBUG_MEMORY_INIT=y
diff --git a/arch/parisc/include/asm/assembly.h b/arch/parisc/include/asm/assembly.h
index 7085df0..39e7985 100644
--- a/arch/parisc/include/asm/assembly.h
+++ b/arch/parisc/include/asm/assembly.h
@@ -3,38 +3,19 @@
* Copyright (C) 1999 Hewlett-Packard (Frank Rowand)
* Copyright (C) 1999 Philipp Rumpf <prumpf@tux.org>
* Copyright (C) 1999 SuSE GmbH
+ * Copyright (C) 2021 Helge Deller <deller@gmx.de>
*/
#ifndef _PARISC_ASSEMBLY_H
#define _PARISC_ASSEMBLY_H
-#define CALLEE_FLOAT_FRAME_SIZE 80
-
#ifdef CONFIG_64BIT
-#define LDREG ldd
-#define STREG std
-#define LDREGX ldd,s
-#define LDREGM ldd,mb
-#define STREGM std,ma
-#define SHRREG shrd
-#define SHLREG shld
-#define ANDCM andcm,*
-#define COND(x) * ## x
#define RP_OFFSET 16
#define FRAME_SIZE 128
#define CALLEE_REG_FRAME_SIZE 144
#define REG_SZ 8
#define ASM_ULONG_INSN .dword
#else /* CONFIG_64BIT */
-#define LDREG ldw
-#define STREG stw
-#define LDREGX ldwx,s
-#define LDREGM ldwm
-#define STREGM stwm
-#define SHRREG shr
-#define SHLREG shlw
-#define ANDCM andcm
-#define COND(x) x
#define RP_OFFSET 20
#define FRAME_SIZE 64
#define CALLEE_REG_FRAME_SIZE 128
@@ -45,6 +26,7 @@
/* Frame alignment for 32- and 64-bit */
#define FRAME_ALIGN 64
+#define CALLEE_FLOAT_FRAME_SIZE 80
#define CALLEE_SAVE_FRAME_SIZE (CALLEE_REG_FRAME_SIZE + CALLEE_FLOAT_FRAME_SIZE)
#ifdef CONFIG_PA20
@@ -68,6 +50,28 @@
#ifdef __ASSEMBLY__
#ifdef CONFIG_64BIT
+#define LDREG ldd
+#define STREG std
+#define LDREGX ldd,s
+#define LDREGM ldd,mb
+#define STREGM std,ma
+#define SHRREG shrd
+#define SHLREG shld
+#define ANDCM andcm,*
+#define COND(x) * ## x
+#else /* CONFIG_64BIT */
+#define LDREG ldw
+#define STREG stw
+#define LDREGX ldwx,s
+#define LDREGM ldwm
+#define STREGM stwm
+#define SHRREG shr
+#define SHLREG shlw
+#define ANDCM andcm
+#define COND(x) x
+#endif
+
+#ifdef CONFIG_64BIT
/* the 64-bit pa gnu assembler unfortunately defaults to .level 1.1 or 2.0 so
* work around that for now... */
.level 2.0w
diff --git a/arch/parisc/include/asm/jump_label.h b/arch/parisc/include/asm/jump_label.h
index 7efb1aa..af2a598 100644
--- a/arch/parisc/include/asm/jump_label.h
+++ b/arch/parisc/include/asm/jump_label.h
@@ -5,6 +5,7 @@
#ifndef __ASSEMBLY__
#include <linux/types.h>
+#include <linux/stringify.h>
#include <asm/assembly.h>
#define JUMP_LABEL_NOP_SIZE 4
diff --git a/arch/parisc/include/asm/rt_sigframe.h b/arch/parisc/include/asm/rt_sigframe.h
index 4b9e3d7..2b3010a 100644
--- a/arch/parisc/include/asm/rt_sigframe.h
+++ b/arch/parisc/include/asm/rt_sigframe.h
@@ -2,7 +2,7 @@
#ifndef _ASM_PARISC_RT_SIGFRAME_H
#define _ASM_PARISC_RT_SIGFRAME_H
-#define SIGRETURN_TRAMP 3
+#define SIGRETURN_TRAMP 4
#define SIGRESTARTBLOCK_TRAMP 5
#define TRAMP_SIZE (SIGRETURN_TRAMP + SIGRESTARTBLOCK_TRAMP)
diff --git a/arch/parisc/kernel/signal.c b/arch/parisc/kernel/signal.c
index bbfe23c..46b1050 100644
--- a/arch/parisc/kernel/signal.c
+++ b/arch/parisc/kernel/signal.c
@@ -288,21 +288,22 @@ setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs,
already in userspace. The first words of tramp are used to
save the previous sigrestartblock trampoline that might be
on the stack. We start the sigreturn trampoline at
- SIGRESTARTBLOCK_TRAMP. */
+ SIGRESTARTBLOCK_TRAMP+X. */
err |= __put_user(in_syscall ? INSN_LDI_R25_1 : INSN_LDI_R25_0,
&frame->tramp[SIGRESTARTBLOCK_TRAMP+0]);
- err |= __put_user(INSN_BLE_SR2_R0,
+ err |= __put_user(INSN_LDI_R20,
&frame->tramp[SIGRESTARTBLOCK_TRAMP+1]);
- err |= __put_user(INSN_LDI_R20,
+ err |= __put_user(INSN_BLE_SR2_R0,
&frame->tramp[SIGRESTARTBLOCK_TRAMP+2]);
+ err |= __put_user(INSN_NOP, &frame->tramp[SIGRESTARTBLOCK_TRAMP+3]);
- start = (unsigned long) &frame->tramp[SIGRESTARTBLOCK_TRAMP+0];
- end = (unsigned long) &frame->tramp[SIGRESTARTBLOCK_TRAMP+3];
+ start = (unsigned long) &frame->tramp[0];
+ end = (unsigned long) &frame->tramp[TRAMP_SIZE];
flush_user_dcache_range_asm(start, end);
flush_user_icache_range_asm(start, end);
/* TRAMP Words 0-4, Length 5 = SIGRESTARTBLOCK_TRAMP
- * TRAMP Words 5-7, Length 3 = SIGRETURN_TRAMP
+ * TRAMP Words 5-9, Length 4 = SIGRETURN_TRAMP
* So the SIGRETURN_TRAMP is at the end of SIGRESTARTBLOCK_TRAMP
*/
rp = (unsigned long) &frame->tramp[SIGRESTARTBLOCK_TRAMP];
diff --git a/arch/parisc/kernel/signal32.h b/arch/parisc/kernel/signal32.h
index a5bdbb56..f166250 100644
--- a/arch/parisc/kernel/signal32.h
+++ b/arch/parisc/kernel/signal32.h
@@ -36,7 +36,7 @@ struct compat_regfile {
compat_int_t rf_sar;
};
-#define COMPAT_SIGRETURN_TRAMP 3
+#define COMPAT_SIGRETURN_TRAMP 4
#define COMPAT_SIGRESTARTBLOCK_TRAMP 5
#define COMPAT_TRAMP_SIZE (COMPAT_SIGRETURN_TRAMP + \
COMPAT_SIGRESTARTBLOCK_TRAMP)
diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl
index bf751e0..358c000 100644
--- a/arch/parisc/kernel/syscalls/syscall.tbl
+++ b/arch/parisc/kernel/syscalls/syscall.tbl
@@ -446,3 +446,4 @@
446 common landlock_restrict_self sys_landlock_restrict_self
# 447 reserved for memfd_secret
448 common process_mrelease sys_process_mrelease
+449 common futex_waitv sys_futex_waitv
diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c
index f9ea0e5..3fa6d24 100644
--- a/arch/powerpc/kernel/watchdog.c
+++ b/arch/powerpc/kernel/watchdog.c
@@ -187,6 +187,12 @@ static void watchdog_smp_panic(int cpu, u64 tb)
if (sysctl_hardlockup_all_cpu_backtrace)
trigger_allbutself_cpu_backtrace();
+ /*
+ * Force flush any remote buffers that might be stuck in IRQ context
+ * and therefore could not run their irq_work.
+ */
+ printk_trigger_flush();
+
if (hardlockup_panic)
nmi_panic(NULL, "Hard LOCKUP");
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 35e9ccc..a72920f 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -641,9 +641,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
* implementations just count online CPUs.
*/
if (hv_enabled)
- r = num_present_cpus();
+ r = min_t(unsigned int, num_present_cpus(), KVM_MAX_VCPUS);
else
- r = num_online_cpus();
+ r = min_t(unsigned int, num_online_cpus(), KVM_MAX_VCPUS);
break;
case KVM_CAP_MAX_VCPUS:
r = KVM_MAX_VCPUS;
diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c
index e3d3aed..fb84619 100644
--- a/arch/riscv/kvm/vcpu.c
+++ b/arch/riscv/kvm/vcpu.c
@@ -740,7 +740,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
* Ensure we set mode to IN_GUEST_MODE after we disable
* interrupts and before the final VCPU requests check.
* See the comment in kvm_vcpu_exiting_guest_mode() and
- * Documentation/virtual/kvm/vcpu-requests.rst
+ * Documentation/virt/kvm/vcpu-requests.rst
*/
vcpu->mode = IN_GUEST_MODE;
diff --git a/arch/riscv/kvm/vcpu_sbi.c b/arch/riscv/kvm/vcpu_sbi.c
index eb3c045..3b0e703 100644
--- a/arch/riscv/kvm/vcpu_sbi.c
+++ b/arch/riscv/kvm/vcpu_sbi.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/**
+/*
* Copyright (c) 2019 Western Digital Corporation or its affiliates.
*
* Authors:
diff --git a/arch/riscv/kvm/vm.c b/arch/riscv/kvm/vm.c
index 26399df..fb18af3 100644
--- a/arch/riscv/kvm/vm.c
+++ b/arch/riscv/kvm/vm.c
@@ -74,7 +74,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
r = 1;
break;
case KVM_CAP_NR_VCPUS:
- r = num_online_cpus();
+ r = min_t(unsigned int, num_online_cpus(), KVM_MAX_VCPUS);
break;
case KVM_CAP_MAX_VCPUS:
r = KVM_MAX_VCPUS;
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index c6257f6..14a18ba 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -585,6 +585,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
r = KVM_MAX_VCPUS;
else if (sclp.has_esca && sclp.has_64bscao)
r = KVM_S390_ESCA_CPU_SLOTS;
+ if (ext == KVM_CAP_NR_VCPUS)
+ r = min_t(unsigned int, num_online_cpus(), r);
break;
case KVM_CAP_S390_COW:
r = MACHINE_HAS_ESOP;
diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
index 24f4a06..96eb7db 100644
--- a/arch/x86/hyperv/hv_init.c
+++ b/arch/x86/hyperv/hv_init.c
@@ -177,6 +177,9 @@ void set_hv_tscchange_cb(void (*cb)(void))
return;
}
+ if (!hv_vp_index)
+ return;
+
hv_reenlightenment_cb = cb;
/* Make sure callback is registered before we write to MSRs */
@@ -383,20 +386,13 @@ static void __init hv_get_partition_id(void)
*/
void __init hyperv_init(void)
{
- u64 guest_id, required_msrs;
+ u64 guest_id;
union hv_x64_msr_hypercall_contents hypercall_msr;
int cpuhp;
if (x86_hyper_type != X86_HYPER_MS_HYPERV)
return;
- /* Absolutely required MSRs */
- required_msrs = HV_MSR_HYPERCALL_AVAILABLE |
- HV_MSR_VP_INDEX_AVAILABLE;
-
- if ((ms_hyperv.features & required_msrs) != required_msrs)
- return;
-
if (hv_common_init())
return;
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index e5d8700..6ac61f8 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -363,6 +363,7 @@ union kvm_mmu_extended_role {
unsigned int cr4_smap:1;
unsigned int cr4_smep:1;
unsigned int cr4_la57:1;
+ unsigned int efer_lma:1;
};
};
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index 4794b71..ff55df6 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -163,12 +163,22 @@ static uint32_t __init ms_hyperv_platform(void)
cpuid(HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS,
&eax, &hyp_signature[0], &hyp_signature[1], &hyp_signature[2]);
- if (eax >= HYPERV_CPUID_MIN &&
- eax <= HYPERV_CPUID_MAX &&
- !memcmp("Microsoft Hv", hyp_signature, 12))
- return HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS;
+ if (eax < HYPERV_CPUID_MIN || eax > HYPERV_CPUID_MAX ||
+ memcmp("Microsoft Hv", hyp_signature, 12))
+ return 0;
- return 0;
+ /* HYPERCALL and VP_INDEX MSRs are mandatory for all features. */
+ eax = cpuid_eax(HYPERV_CPUID_FEATURES);
+ if (!(eax & HV_MSR_HYPERCALL_AVAILABLE)) {
+ pr_warn("x86/hyperv: HYPERCALL MSR not available.\n");
+ return 0;
+ }
+ if (!(eax & HV_MSR_VP_INDEX_AVAILABLE)) {
+ pr_warn("x86/hyperv: VP_INDEX MSR not available.\n");
+ return 0;
+ }
+
+ return HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS;
}
static unsigned char hv_get_nmi_reason(void)
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index e19dabf..07e9215 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -125,7 +125,7 @@ static void kvm_update_kvm_cpuid_base(struct kvm_vcpu *vcpu)
}
}
-struct kvm_cpuid_entry2 *kvm_find_kvm_cpuid_features(struct kvm_vcpu *vcpu)
+static struct kvm_cpuid_entry2 *kvm_find_kvm_cpuid_features(struct kvm_vcpu *vcpu)
{
u32 base = vcpu->arch.kvm_cpuid_base;
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 4a555f3..5e19e6e 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -2022,7 +2022,7 @@ static void kvm_hv_hypercall_set_result(struct kvm_vcpu *vcpu, u64 result)
{
bool longmode;
- longmode = is_64_bit_mode(vcpu);
+ longmode = is_64_bit_hypercall(vcpu);
if (longmode)
kvm_rax_write(vcpu, result);
else {
@@ -2171,7 +2171,7 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
}
#ifdef CONFIG_X86_64
- if (is_64_bit_mode(vcpu)) {
+ if (is_64_bit_hypercall(vcpu)) {
hc.param = kvm_rcx_read(vcpu);
hc.ingpa = kvm_rdx_read(vcpu);
hc.outgpa = kvm_r8_read(vcpu);
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 3379437..3be9bee 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -4682,6 +4682,7 @@ static union kvm_mmu_extended_role kvm_calc_mmu_role_ext(struct kvm_vcpu *vcpu,
/* PKEY and LA57 are active iff long mode is active. */
ext.cr4_pke = ____is_efer_lma(regs) && ____is_cr4_pke(regs);
ext.cr4_la57 = ____is_efer_lma(regs) && ____is_cr4_la57(regs);
+ ext.efer_lma = ____is_efer_lma(regs);
}
ext.valid = 1;
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 902c52a..21ac0a5 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -237,7 +237,6 @@ static void sev_unbind_asid(struct kvm *kvm, unsigned int handle)
static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp)
{
struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
- bool es_active = argp->id == KVM_SEV_ES_INIT;
int asid, ret;
if (kvm->created_vcpus)
@@ -247,7 +246,8 @@ static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp)
if (unlikely(sev->active))
return ret;
- sev->es_active = es_active;
+ sev->active = true;
+ sev->es_active = argp->id == KVM_SEV_ES_INIT;
asid = sev_asid_new(sev);
if (asid < 0)
goto e_no_asid;
@@ -257,8 +257,6 @@ static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp)
if (ret)
goto e_free;
- sev->active = true;
- sev->asid = asid;
INIT_LIST_HEAD(&sev->regions_list);
return 0;
@@ -268,6 +266,7 @@ static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp)
sev->asid = 0;
e_no_asid:
sev->es_active = false;
+ sev->active = false;
return ret;
}
@@ -1530,7 +1529,7 @@ static int sev_receive_finish(struct kvm *kvm, struct kvm_sev_cmd *argp)
return sev_issue_cmd(kvm, SEV_CMD_RECEIVE_FINISH, &data, &argp->error);
}
-static bool cmd_allowed_from_miror(u32 cmd_id)
+static bool is_cmd_allowed_from_mirror(u32 cmd_id)
{
/*
* Allow mirrors VM to call KVM_SEV_LAUNCH_UPDATE_VMSA to enable SEV-ES
@@ -1757,7 +1756,7 @@ int svm_mem_enc_op(struct kvm *kvm, void __user *argp)
/* Only the enc_context_owner handles some memory enc operations. */
if (is_mirroring_enc_context(kvm) &&
- !cmd_allowed_from_miror(sev_cmd.id)) {
+ !is_cmd_allowed_from_mirror(sev_cmd.id)) {
r = -EINVAL;
goto out;
}
@@ -1990,7 +1989,12 @@ int svm_vm_copy_asid_from(struct kvm *kvm, unsigned int source_fd)
mutex_unlock(&source_kvm->lock);
mutex_lock(&kvm->lock);
- if (sev_guest(kvm)) {
+ /*
+ * Disallow out-of-band SEV/SEV-ES init if the target is already an
+ * SEV guest, or if vCPUs have been created. KVM relies on vCPUs being
+ * created after SEV/SEV-ES initialization, e.g. to init intercepts.
+ */
+ if (sev_guest(kvm) || kvm->created_vcpus) {
ret = -EINVAL;
goto e_mirror_unlock;
}
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index 437e685..5faad3d 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -247,7 +247,7 @@ static __always_inline bool sev_es_guest(struct kvm *kvm)
#ifdef CONFIG_KVM_AMD_SEV
struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
- return sev_guest(kvm) && sev->es_active;
+ return sev->es_active && !WARN_ON_ONCE(!sev->active);
#else
return false;
#endif
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index b213ca9..1e2f669 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -670,33 +670,39 @@ static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu,
static void nested_cache_shadow_vmcs12(struct kvm_vcpu *vcpu,
struct vmcs12 *vmcs12)
{
- struct kvm_host_map map;
- struct vmcs12 *shadow;
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+ struct gfn_to_hva_cache *ghc = &vmx->nested.shadow_vmcs12_cache;
if (!nested_cpu_has_shadow_vmcs(vmcs12) ||
vmcs12->vmcs_link_pointer == INVALID_GPA)
return;
- shadow = get_shadow_vmcs12(vcpu);
-
- if (kvm_vcpu_map(vcpu, gpa_to_gfn(vmcs12->vmcs_link_pointer), &map))
+ if (ghc->gpa != vmcs12->vmcs_link_pointer &&
+ kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc,
+ vmcs12->vmcs_link_pointer, VMCS12_SIZE))
return;
- memcpy(shadow, map.hva, VMCS12_SIZE);
- kvm_vcpu_unmap(vcpu, &map, false);
+ kvm_read_guest_cached(vmx->vcpu.kvm, ghc, get_shadow_vmcs12(vcpu),
+ VMCS12_SIZE);
}
static void nested_flush_cached_shadow_vmcs12(struct kvm_vcpu *vcpu,
struct vmcs12 *vmcs12)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
+ struct gfn_to_hva_cache *ghc = &vmx->nested.shadow_vmcs12_cache;
if (!nested_cpu_has_shadow_vmcs(vmcs12) ||
vmcs12->vmcs_link_pointer == INVALID_GPA)
return;
- kvm_write_guest(vmx->vcpu.kvm, vmcs12->vmcs_link_pointer,
- get_shadow_vmcs12(vcpu), VMCS12_SIZE);
+ if (ghc->gpa != vmcs12->vmcs_link_pointer &&
+ kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc,
+ vmcs12->vmcs_link_pointer, VMCS12_SIZE))
+ return;
+
+ kvm_write_guest_cached(vmx->vcpu.kvm, ghc, get_shadow_vmcs12(vcpu),
+ VMCS12_SIZE);
}
/*
@@ -2830,6 +2836,17 @@ static int nested_vmx_check_controls(struct kvm_vcpu *vcpu,
return 0;
}
+static int nested_vmx_check_address_space_size(struct kvm_vcpu *vcpu,
+ struct vmcs12 *vmcs12)
+{
+#ifdef CONFIG_X86_64
+ if (CC(!!(vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE) !=
+ !!(vcpu->arch.efer & EFER_LMA)))
+ return -EINVAL;
+#endif
+ return 0;
+}
+
static int nested_vmx_check_host_state(struct kvm_vcpu *vcpu,
struct vmcs12 *vmcs12)
{
@@ -2854,18 +2871,16 @@ static int nested_vmx_check_host_state(struct kvm_vcpu *vcpu,
return -EINVAL;
#ifdef CONFIG_X86_64
- ia32e = !!(vcpu->arch.efer & EFER_LMA);
+ ia32e = !!(vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE);
#else
ia32e = false;
#endif
if (ia32e) {
- if (CC(!(vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE)) ||
- CC(!(vmcs12->host_cr4 & X86_CR4_PAE)))
+ if (CC(!(vmcs12->host_cr4 & X86_CR4_PAE)))
return -EINVAL;
} else {
- if (CC(vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE) ||
- CC(vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE) ||
+ if (CC(vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE) ||
CC(vmcs12->host_cr4 & X86_CR4_PCIDE) ||
CC((vmcs12->host_rip) >> 32))
return -EINVAL;
@@ -2910,9 +2925,9 @@ static int nested_vmx_check_host_state(struct kvm_vcpu *vcpu,
static int nested_vmx_check_vmcs_link_ptr(struct kvm_vcpu *vcpu,
struct vmcs12 *vmcs12)
{
- int r = 0;
- struct vmcs12 *shadow;
- struct kvm_host_map map;
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+ struct gfn_to_hva_cache *ghc = &vmx->nested.shadow_vmcs12_cache;
+ struct vmcs_hdr hdr;
if (vmcs12->vmcs_link_pointer == INVALID_GPA)
return 0;
@@ -2920,17 +2935,21 @@ static int nested_vmx_check_vmcs_link_ptr(struct kvm_vcpu *vcpu,
if (CC(!page_address_valid(vcpu, vmcs12->vmcs_link_pointer)))
return -EINVAL;
- if (CC(kvm_vcpu_map(vcpu, gpa_to_gfn(vmcs12->vmcs_link_pointer), &map)))
+ if (ghc->gpa != vmcs12->vmcs_link_pointer &&
+ CC(kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc,
+ vmcs12->vmcs_link_pointer, VMCS12_SIZE)))
+ return -EINVAL;
+
+ if (CC(kvm_read_guest_offset_cached(vcpu->kvm, ghc, &hdr,
+ offsetof(struct vmcs12, hdr),
+ sizeof(hdr))))
return -EINVAL;
- shadow = map.hva;
+ if (CC(hdr.revision_id != VMCS12_REVISION) ||
+ CC(hdr.shadow_vmcs != nested_cpu_has_shadow_vmcs(vmcs12)))
+ return -EINVAL;
- if (CC(shadow->hdr.revision_id != VMCS12_REVISION) ||
- CC(shadow->hdr.shadow_vmcs != nested_cpu_has_shadow_vmcs(vmcs12)))
- r = -EINVAL;
-
- kvm_vcpu_unmap(vcpu, &map, false);
- return r;
+ return 0;
}
/*
@@ -3535,6 +3554,9 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
if (nested_vmx_check_controls(vcpu, vmcs12))
return nested_vmx_fail(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
+ if (nested_vmx_check_address_space_size(vcpu, vmcs12))
+ return nested_vmx_fail(vcpu, VMXERR_ENTRY_INVALID_HOST_STATE_FIELD);
+
if (nested_vmx_check_host_state(vcpu, vmcs12))
return nested_vmx_fail(vcpu, VMXERR_ENTRY_INVALID_HOST_STATE_FIELD);
@@ -5264,10 +5286,11 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu)
return 1;
if (vmx->nested.current_vmptr != vmptr) {
- struct kvm_host_map map;
- struct vmcs12 *new_vmcs12;
+ struct gfn_to_hva_cache *ghc = &vmx->nested.vmcs12_cache;
+ struct vmcs_hdr hdr;
- if (kvm_vcpu_map(vcpu, gpa_to_gfn(vmptr), &map)) {
+ if (ghc->gpa != vmptr &&
+ kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, vmptr, VMCS12_SIZE)) {
/*
* Reads from an unbacked page return all 1s,
* which means that the 32 bits located at the
@@ -5278,12 +5301,16 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu)
VMXERR_VMPTRLD_INCORRECT_VMCS_REVISION_ID);
}
- new_vmcs12 = map.hva;
+ if (kvm_read_guest_offset_cached(vcpu->kvm, ghc, &hdr,
+ offsetof(struct vmcs12, hdr),
+ sizeof(hdr))) {
+ return nested_vmx_fail(vcpu,
+ VMXERR_VMPTRLD_INCORRECT_VMCS_REVISION_ID);
+ }
- if (new_vmcs12->hdr.revision_id != VMCS12_REVISION ||
- (new_vmcs12->hdr.shadow_vmcs &&
+ if (hdr.revision_id != VMCS12_REVISION ||
+ (hdr.shadow_vmcs &&
!nested_cpu_has_vmx_shadow_vmcs(vcpu))) {
- kvm_vcpu_unmap(vcpu, &map, false);
return nested_vmx_fail(vcpu,
VMXERR_VMPTRLD_INCORRECT_VMCS_REVISION_ID);
}
@@ -5294,8 +5321,11 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu)
* Load VMCS12 from guest memory since it is not already
* cached.
*/
- memcpy(vmx->nested.cached_vmcs12, new_vmcs12, VMCS12_SIZE);
- kvm_vcpu_unmap(vcpu, &map, false);
+ if (kvm_read_guest_cached(vcpu->kvm, ghc, vmx->nested.cached_vmcs12,
+ VMCS12_SIZE)) {
+ return nested_vmx_fail(vcpu,
+ VMXERR_VMPTRLD_INCORRECT_VMCS_REVISION_ID);
+ }
set_current_vmptr(vmx, vmptr);
}
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index a4ead60..4df2ac2 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -142,6 +142,16 @@ struct nested_vmx {
struct vmcs12 *cached_shadow_vmcs12;
/*
+ * GPA to HVA cache for accessing vmcs12->vmcs_link_pointer
+ */
+ struct gfn_to_hva_cache shadow_vmcs12_cache;
+
+ /*
+ * GPA to HVA cache for VMCS12
+ */
+ struct gfn_to_hva_cache vmcs12_cache;
+
+ /*
* Indicates if the shadow vmcs or enlightened vmcs must be updated
* with the data held by struct vmcs12.
*/
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index dc7eb5f..5a403d9 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3307,9 +3307,9 @@ static void record_steal_time(struct kvm_vcpu *vcpu)
"xor %1, %1\n"
"2:\n"
_ASM_EXTABLE_UA(1b, 2b)
- : "+r" (st_preempted),
- "+&r" (err)
- : "m" (st->preempted));
+ : "+q" (st_preempted),
+ "+&r" (err),
+ "+m" (st->preempted));
if (err)
goto out;
@@ -4179,7 +4179,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
r = !static_call(kvm_x86_cpu_has_accelerated_tpr)();
break;
case KVM_CAP_NR_VCPUS:
- r = num_online_cpus();
+ r = min_t(unsigned int, num_online_cpus(), KVM_MAX_VCPUS);
break;
case KVM_CAP_MAX_VCPUS:
r = KVM_MAX_VCPUS;
@@ -8848,7 +8848,7 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
trace_kvm_hypercall(nr, a0, a1, a2, a3);
- op_64_bit = is_64_bit_mode(vcpu);
+ op_64_bit = is_64_bit_hypercall(vcpu);
if (!op_64_bit) {
nr &= 0xFFFFFFFF;
a0 &= 0xFFFFFFFF;
@@ -9547,12 +9547,16 @@ static void vcpu_load_eoi_exitmap(struct kvm_vcpu *vcpu)
if (!kvm_apic_hw_enabled(vcpu->arch.apic))
return;
- if (to_hv_vcpu(vcpu))
+ if (to_hv_vcpu(vcpu)) {
bitmap_or((ulong *)eoi_exit_bitmap,
vcpu->arch.ioapic_handled_vectors,
to_hv_synic(vcpu)->vec_bitmap, 256);
+ static_call(kvm_x86_load_eoi_exitmap)(vcpu, eoi_exit_bitmap);
+ return;
+ }
- static_call(kvm_x86_load_eoi_exitmap)(vcpu, eoi_exit_bitmap);
+ static_call(kvm_x86_load_eoi_exitmap)(
+ vcpu, (u64 *)vcpu->arch.ioapic_handled_vectors);
}
void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index ea264c4..997669a 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -153,12 +153,24 @@ static inline bool is_64_bit_mode(struct kvm_vcpu *vcpu)
{
int cs_db, cs_l;
+ WARN_ON_ONCE(vcpu->arch.guest_state_protected);
+
if (!is_long_mode(vcpu))
return false;
static_call(kvm_x86_get_cs_db_l_bits)(vcpu, &cs_db, &cs_l);
return cs_l;
}
+static inline bool is_64_bit_hypercall(struct kvm_vcpu *vcpu)
+{
+ /*
+ * If running with protected guest state, the CS register is not
+ * accessible. The hypercall register values will have had to been
+ * provided in 64-bit mode, so assume the guest is in 64-bit.
+ */
+ return vcpu->arch.guest_state_protected || is_64_bit_mode(vcpu);
+}
+
static inline bool x86_exception_has_error_code(unsigned int vector)
{
static u32 exception_has_error_code = BIT(DF_VECTOR) | BIT(TS_VECTOR) |
diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c
index 8f62bae..dff2bdf 100644
--- a/arch/x86/kvm/xen.c
+++ b/arch/x86/kvm/xen.c
@@ -127,9 +127,9 @@ void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, int state)
state_entry_time = vx->runstate_entry_time;
state_entry_time |= XEN_RUNSTATE_UPDATE;
- BUILD_BUG_ON(sizeof(((struct vcpu_runstate_info *)0)->state_entry_time) !=
+ BUILD_BUG_ON(sizeof_field(struct vcpu_runstate_info, state_entry_time) !=
sizeof(state_entry_time));
- BUILD_BUG_ON(sizeof(((struct compat_vcpu_runstate_info *)0)->state_entry_time) !=
+ BUILD_BUG_ON(sizeof_field(struct compat_vcpu_runstate_info, state_entry_time) !=
sizeof(state_entry_time));
if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache,
@@ -144,9 +144,9 @@ void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, int state)
*/
BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state) !=
offsetof(struct compat_vcpu_runstate_info, state));
- BUILD_BUG_ON(sizeof(((struct vcpu_runstate_info *)0)->state) !=
+ BUILD_BUG_ON(sizeof_field(struct vcpu_runstate_info, state) !=
sizeof(vx->current_runstate));
- BUILD_BUG_ON(sizeof(((struct compat_vcpu_runstate_info *)0)->state) !=
+ BUILD_BUG_ON(sizeof_field(struct compat_vcpu_runstate_info, state) !=
sizeof(vx->current_runstate));
if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache,
@@ -163,9 +163,9 @@ void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, int state)
offsetof(struct vcpu_runstate_info, time) - sizeof(u64));
BUILD_BUG_ON(offsetof(struct compat_vcpu_runstate_info, state_entry_time) !=
offsetof(struct compat_vcpu_runstate_info, time) - sizeof(u64));
- BUILD_BUG_ON(sizeof(((struct vcpu_runstate_info *)0)->time) !=
- sizeof(((struct compat_vcpu_runstate_info *)0)->time));
- BUILD_BUG_ON(sizeof(((struct vcpu_runstate_info *)0)->time) !=
+ BUILD_BUG_ON(sizeof_field(struct vcpu_runstate_info, time) !=
+ sizeof_field(struct compat_vcpu_runstate_info, time));
+ BUILD_BUG_ON(sizeof_field(struct vcpu_runstate_info, time) !=
sizeof(vx->runstate_times));
if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache,
@@ -205,9 +205,9 @@ int __kvm_xen_has_interrupt(struct kvm_vcpu *v)
BUILD_BUG_ON(offsetof(struct vcpu_info, evtchn_upcall_pending) !=
offsetof(struct compat_vcpu_info, evtchn_upcall_pending));
BUILD_BUG_ON(sizeof(rc) !=
- sizeof(((struct vcpu_info *)0)->evtchn_upcall_pending));
+ sizeof_field(struct vcpu_info, evtchn_upcall_pending));
BUILD_BUG_ON(sizeof(rc) !=
- sizeof(((struct compat_vcpu_info *)0)->evtchn_upcall_pending));
+ sizeof_field(struct compat_vcpu_info, evtchn_upcall_pending));
/*
* For efficiency, this mirrors the checks for using the valid
@@ -299,7 +299,7 @@ int kvm_xen_hvm_get_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data)
break;
case KVM_XEN_ATTR_TYPE_SHARED_INFO:
- data->u.shared_info.gfn = gpa_to_gfn(kvm->arch.xen.shinfo_gfn);
+ data->u.shared_info.gfn = kvm->arch.xen.shinfo_gfn;
r = 0;
break;
@@ -698,7 +698,7 @@ int kvm_xen_hypercall(struct kvm_vcpu *vcpu)
kvm_hv_hypercall_enabled(vcpu))
return kvm_hv_hypercall(vcpu);
- longmode = is_64_bit_mode(vcpu);
+ longmode = is_64_bit_hypercall(vcpu);
if (!longmode) {
params[0] = (u32)kvm_rbx_read(vcpu);
params[1] = (u32)kvm_rcx_read(vcpu);
diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c
index 7f11ea0..ca873a3 100644
--- a/drivers/hv/hv_balloon.c
+++ b/drivers/hv/hv_balloon.c
@@ -480,7 +480,7 @@ module_param(pressure_report_delay, uint, (S_IRUGO | S_IWUSR));
MODULE_PARM_DESC(pressure_report_delay, "Delay in secs in reporting pressure");
static atomic_t trans_id = ATOMIC_INIT(0);
-static int dm_ring_size = 20 * 1024;
+static int dm_ring_size = VMBUS_RING_SIZE(16 * 1024);
/*
* Driver specific state.
diff --git a/drivers/video/console/sticon.c b/drivers/video/console/sticon.c
index 1b45116..40496e9 100644
--- a/drivers/video/console/sticon.c
+++ b/drivers/video/console/sticon.c
@@ -332,13 +332,13 @@ static u8 sticon_build_attr(struct vc_data *conp, u8 color,
bool blink, bool underline, bool reverse,
bool italic)
{
- u8 attr = ((color & 0x70) >> 1) | ((color & 7));
+ u8 fg = color & 7;
+ u8 bg = (color & 0x70) >> 4;
- if (reverse) {
- color = ((color >> 3) & 0x7) | ((color & 0x7) << 3);
- }
-
- return attr;
+ if (reverse)
+ return (fg << 3) | bg;
+ else
+ return (bg << 3) | fg;
}
static void sticon_invert_region(struct vc_data *conp, u16 *p, int count)
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 7235d53..d671084 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -940,7 +940,7 @@ static int __gfs2_iomap_get(struct inode *inode, loff_t pos, loff_t length,
else if (height == ip->i_height)
ret = gfs2_hole_size(inode, lblock, len, mp, iomap);
else
- iomap->length = size - pos;
+ iomap->length = size - iomap->offset;
} else if (flags & IOMAP_WRITE) {
u64 alloc_size;
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index adafaaf..3e718cf 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -773,8 +773,8 @@ static inline bool should_fault_in_pages(ssize_t ret, struct iov_iter *i,
size_t *prev_count,
size_t *window_size)
{
- char __user *p = i->iov[0].iov_base + i->iov_offset;
size_t count = iov_iter_count(i);
+ char __user *p;
int pages = 1;
if (likely(!count))
@@ -787,14 +787,14 @@ static inline bool should_fault_in_pages(ssize_t ret, struct iov_iter *i,
if (*prev_count != count || !*window_size) {
int pages, nr_dirtied;
- pages = min_t(int, BIO_MAX_VECS,
- DIV_ROUND_UP(iov_iter_count(i), PAGE_SIZE));
+ pages = min_t(int, BIO_MAX_VECS, DIV_ROUND_UP(count, PAGE_SIZE));
nr_dirtied = max(current->nr_dirtied_pause -
current->nr_dirtied, 1);
pages = min(pages, nr_dirtied);
}
*prev_count = count;
+ p = i->iov[0].iov_base + i->iov_offset;
*window_size = (size_t)PAGE_SIZE * pages - offset_in_page(p);
return true;
}
@@ -1013,6 +1013,7 @@ static ssize_t gfs2_file_buffered_write(struct kiocb *iocb,
struct gfs2_sbd *sdp = GFS2_SB(inode);
struct gfs2_holder *statfs_gh = NULL;
size_t prev_count = 0, window_size = 0;
+ size_t orig_count = iov_iter_count(from);
size_t read = 0;
ssize_t ret;
@@ -1057,6 +1058,7 @@ static ssize_t gfs2_file_buffered_write(struct kiocb *iocb,
if (inode == sdp->sd_rindex)
gfs2_glock_dq_uninit(statfs_gh);
+ from->count = orig_count - read;
if (should_fault_in_pages(ret, from, &prev_count, &window_size)) {
size_t leftover;
@@ -1064,6 +1066,7 @@ static ssize_t gfs2_file_buffered_write(struct kiocb *iocb,
leftover = fault_in_iov_iter_readable(from, window_size);
gfs2_holder_disallow_demote(gh);
if (leftover != window_size) {
+ from->count = min(from->count, window_size - leftover);
if (!gfs2_holder_queued(gh)) {
if (read)
goto out_uninit;
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 19f38ae..8dbd6fe 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -411,14 +411,14 @@ static void do_error(struct gfs2_glock *gl, const int ret)
static void demote_incompat_holders(struct gfs2_glock *gl,
struct gfs2_holder *new_gh)
{
- struct gfs2_holder *gh;
+ struct gfs2_holder *gh, *tmp;
/*
* Demote incompatible holders before we make ourselves eligible.
* (This holder may or may not allow auto-demoting, but we don't want
* to demote the new holder before it's even granted.)
*/
- list_for_each_entry(gh, &gl->gl_holders, gh_list) {
+ list_for_each_entry_safe(gh, tmp, &gl->gl_holders, gh_list) {
/*
* Since holders are at the front of the list, we stop when we
* find the first non-holder.
@@ -496,7 +496,7 @@ int gfs2_instantiate(struct gfs2_holder *gh)
* Since we unlock the lockref lock, we set a flag to indicate
* instantiate is in progress.
*/
- if (test_bit(GLF_INSTANTIATE_IN_PROG, &gl->gl_flags)) {
+ if (test_and_set_bit(GLF_INSTANTIATE_IN_PROG, &gl->gl_flags)) {
wait_on_bit(&gl->gl_flags, GLF_INSTANTIATE_IN_PROG,
TASK_UNINTERRUPTIBLE);
/*
@@ -509,14 +509,10 @@ int gfs2_instantiate(struct gfs2_holder *gh)
goto again;
}
- set_bit(GLF_INSTANTIATE_IN_PROG, &gl->gl_flags);
-
ret = glops->go_instantiate(gh);
if (!ret)
clear_bit(GLF_INSTANTIATE_NEEDED, &gl->gl_flags);
- clear_bit(GLF_INSTANTIATE_IN_PROG, &gl->gl_flags);
- smp_mb__after_atomic();
- wake_up_bit(&gl->gl_flags, GLF_INSTANTIATE_IN_PROG);
+ clear_and_wake_up_bit(GLF_INSTANTIATE_IN_PROG, &gl->gl_flags);
return ret;
}
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 5b12137..0f93e8b 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -1402,13 +1402,6 @@ static void gfs2_evict_inode(struct inode *inode)
gfs2_ordered_del_inode(ip);
clear_inode(inode);
gfs2_dir_hash_inval(ip);
- if (ip->i_gl) {
- glock_clear_object(ip->i_gl, ip);
- wait_on_bit_io(&ip->i_flags, GIF_GLOP_PENDING, TASK_UNINTERRUPTIBLE);
- gfs2_glock_add_to_lru(ip->i_gl);
- gfs2_glock_put_eventually(ip->i_gl);
- ip->i_gl = NULL;
- }
if (gfs2_holder_initialized(&ip->i_iopen_gh)) {
struct gfs2_glock *gl = ip->i_iopen_gh.gh_gl;
@@ -1421,6 +1414,13 @@ static void gfs2_evict_inode(struct inode *inode)
gfs2_holder_uninit(&ip->i_iopen_gh);
gfs2_glock_put_eventually(gl);
}
+ if (ip->i_gl) {
+ glock_clear_object(ip->i_gl, ip);
+ wait_on_bit_io(&ip->i_flags, GIF_GLOP_PENDING, TASK_UNINTERRUPTIBLE);
+ gfs2_glock_add_to_lru(ip->i_gl);
+ gfs2_glock_put_eventually(ip->i_gl);
+ ip->i_gl = NULL;
+ }
}
static struct inode *gfs2_alloc_inode(struct super_block *sb)
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index b2a1d96..5a93a5d 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -288,11 +288,8 @@ nfsd4_decode_bitmap4(struct nfsd4_compoundargs *argp, u32 *bmval, u32 bmlen)
p = xdr_inline_decode(argp->xdr, count << 2);
if (!p)
return nfserr_bad_xdr;
- i = 0;
- while (i < count)
- bmval[i++] = be32_to_cpup(p++);
- while (i < bmlen)
- bmval[i++] = 0;
+ for (i = 0; i < bmlen; i++)
+ bmval[i] = (i < count) ? be32_to_cpup(p++) : 0;
return nfs_ok;
}
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 9e0667e..c310648 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -874,7 +874,7 @@ void kvm_release_pfn_dirty(kvm_pfn_t pfn);
void kvm_set_pfn_dirty(kvm_pfn_t pfn);
void kvm_set_pfn_accessed(kvm_pfn_t pfn);
-void kvm_release_pfn(kvm_pfn_t pfn, bool dirty, struct gfn_to_pfn_cache *cache);
+void kvm_release_pfn(kvm_pfn_t pfn, bool dirty);
int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset,
int len);
int kvm_read_guest(struct kvm *kvm, gpa_t gpa, void *data, unsigned long len);
@@ -950,12 +950,8 @@ struct kvm_memory_slot *kvm_vcpu_gfn_to_memslot(struct kvm_vcpu *vcpu, gfn_t gfn
kvm_pfn_t kvm_vcpu_gfn_to_pfn_atomic(struct kvm_vcpu *vcpu, gfn_t gfn);
kvm_pfn_t kvm_vcpu_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn);
int kvm_vcpu_map(struct kvm_vcpu *vcpu, gpa_t gpa, struct kvm_host_map *map);
-int kvm_map_gfn(struct kvm_vcpu *vcpu, gfn_t gfn, struct kvm_host_map *map,
- struct gfn_to_pfn_cache *cache, bool atomic);
struct page *kvm_vcpu_gfn_to_page(struct kvm_vcpu *vcpu, gfn_t gfn);
void kvm_vcpu_unmap(struct kvm_vcpu *vcpu, struct kvm_host_map *map, bool dirty);
-int kvm_unmap_gfn(struct kvm_vcpu *vcpu, struct kvm_host_map *map,
- struct gfn_to_pfn_cache *cache, bool dirty, bool atomic);
unsigned long kvm_vcpu_gfn_to_hva(struct kvm_vcpu *vcpu, gfn_t gfn);
unsigned long kvm_vcpu_gfn_to_hva_prot(struct kvm_vcpu *vcpu, gfn_t gfn, bool *writable);
int kvm_vcpu_read_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn, void *data, int offset,
diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h
index 2237abb..234eab0 100644
--- a/include/linux/kvm_types.h
+++ b/include/linux/kvm_types.h
@@ -53,13 +53,6 @@ struct gfn_to_hva_cache {
struct kvm_memory_slot *memslot;
};
-struct gfn_to_pfn_cache {
- u64 generation;
- gfn_t gfn;
- kvm_pfn_t pfn;
- bool dirty;
-};
-
#ifdef KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE
/*
* Memory caches are used to preallocate memory ahead of various MMU flows,
diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index 98a9371..ae4004e 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -6,7 +6,6 @@
#include <linux/preempt.h>
#include <linux/smp.h>
#include <linux/cpumask.h>
-#include <linux/printk.h>
#include <linux/pfn.h>
#include <linux/init.h>
diff --git a/include/linux/printk.h b/include/linux/printk.h
index 85b656f..9497f6b 100644
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -198,6 +198,7 @@ void dump_stack_print_info(const char *log_lvl);
void show_regs_print_info(const char *log_lvl);
extern asmlinkage void dump_stack_lvl(const char *log_lvl) __cold;
extern asmlinkage void dump_stack(void) __cold;
+void printk_trigger_flush(void);
#else
static inline __printf(1, 0)
int vprintk(const char *s, va_list args)
@@ -274,6 +275,9 @@ static inline void dump_stack_lvl(const char *log_lvl)
static inline void dump_stack(void)
{
}
+static inline void printk_trigger_flush(void)
+{
+}
#endif
#ifdef CONFIG_SMP
diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index 50453b2..2d167ac 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -673,7 +673,7 @@ struct trace_event_file {
#define PERF_MAX_TRACE_SIZE 8192
-#define MAX_FILTER_STR_VAL 256 /* Should handle KSYM_SYMBOL_LEN */
+#define MAX_FILTER_STR_VAL 256U /* Should handle KSYM_SYMBOL_LEN */
enum event_trigger_type {
ETT_NONE = (0),
diff --git a/init/Kconfig b/init/Kconfig
index 036b750..4b7bac1 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -887,7 +887,7 @@
config CC_IMPLICIT_FALLTHROUGH
string
- default "-Wimplicit-fallthrough=5" if CC_IS_GCC
+ default "-Wimplicit-fallthrough=5" if CC_IS_GCC && $(cc-option,-Wimplicit-fallthrough=5)
default "-Wimplicit-fallthrough" if CC_IS_CLANG && $(cc-option,-Wunreachable-code-fallthrough)
#
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 013bfd6..57b132b 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -3253,6 +3253,11 @@ void defer_console_output(void)
preempt_enable();
}
+void printk_trigger_flush(void)
+{
+ defer_console_output();
+}
+
int vprintk_deferred(const char *fmt, va_list args)
{
int r;
diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
index 8a10046..5ea2c9e 100644
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -3026,8 +3026,10 @@ static inline void __update_field_vars(struct tracing_map_elt *elt,
if (val->flags & HIST_FIELD_FL_STRING) {
char *str = elt_data->field_var_str[j++];
char *val_str = (char *)(uintptr_t)var_val;
+ unsigned int size;
- strscpy(str, val_str, val->size);
+ size = min(val->size, STR_VAR_LEN_MAX);
+ strscpy(str, val_str, size);
var_val = (u64)(uintptr_t)str;
}
tracing_map_set_var(elt, var_idx, var_val);
@@ -4914,6 +4916,7 @@ static void hist_trigger_elt_update(struct hist_trigger_data *hist_data,
if (hist_field->flags & HIST_FIELD_FL_STRING) {
unsigned int str_start, var_str_idx, idx;
char *str, *val_str;
+ unsigned int size;
str_start = hist_data->n_field_var_str +
hist_data->n_save_var_str;
@@ -4922,7 +4925,9 @@ static void hist_trigger_elt_update(struct hist_trigger_data *hist_data,
str = elt_data->field_var_str[idx];
val_str = (char *)(uintptr_t)hist_val;
- strscpy(str, val_str, hist_field->size);
+
+ size = min(hist_field->size, STR_VAR_LEN_MAX);
+ strscpy(str, val_str, size);
hist_val = (u64)(uintptr_t)str;
}
diff --git a/lib/nmi_backtrace.c b/lib/nmi_backtrace.c
index f9e8900..199ab20 100644
--- a/lib/nmi_backtrace.c
+++ b/lib/nmi_backtrace.c
@@ -75,6 +75,12 @@ void nmi_trigger_cpumask_backtrace(const cpumask_t *mask,
touch_softlockup_watchdog();
}
+ /*
+ * Force flush any remote buffers that might be stuck in IRQ context
+ * and therefore could not run their irq_work.
+ */
+ printk_trigger_flush();
+
clear_bit_unlock(0, &backtrace_flag);
put_cpu();
}
diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore
index d4a8301..3763105 100644
--- a/tools/testing/selftests/kvm/.gitignore
+++ b/tools/testing/selftests/kvm/.gitignore
@@ -23,6 +23,7 @@
/x86_64/platform_info_test
/x86_64/set_boot_cpu_id
/x86_64/set_sregs_test
+/x86_64/sev_migrate_tests
/x86_64/smm_test
/x86_64/state_test
/x86_64/svm_vmcall_test
diff --git a/tools/testing/selftests/kvm/access_tracking_perf_test.c b/tools/testing/selftests/kvm/access_tracking_perf_test.c
index 5d95113..d890903 100644
--- a/tools/testing/selftests/kvm/access_tracking_perf_test.c
+++ b/tools/testing/selftests/kvm/access_tracking_perf_test.c
@@ -47,7 +47,7 @@
#include "guest_modes.h"
/* Global variable used to synchronize all of the vCPU threads. */
-static int iteration = -1;
+static int iteration;
/* Defines what vCPU threads should do during a given iteration. */
static enum {
@@ -215,12 +215,11 @@ static bool spin_wait_for_next_iteration(int *current_iteration)
return true;
}
-static void *vcpu_thread_main(void *arg)
+static void vcpu_thread_main(struct perf_test_vcpu_args *vcpu_args)
{
- struct perf_test_vcpu_args *vcpu_args = arg;
struct kvm_vm *vm = perf_test_args.vm;
int vcpu_id = vcpu_args->vcpu_id;
- int current_iteration = -1;
+ int current_iteration = 0;
while (spin_wait_for_next_iteration(¤t_iteration)) {
switch (READ_ONCE(iteration_work)) {
@@ -235,8 +234,6 @@ static void *vcpu_thread_main(void *arg)
vcpu_last_completed_iteration[vcpu_id] = current_iteration;
}
-
- return NULL;
}
static void spin_wait_for_vcpu(int vcpu_id, int target_iteration)
@@ -277,8 +274,7 @@ static void run_iteration(struct kvm_vm *vm, int vcpus, const char *description)
static void access_memory(struct kvm_vm *vm, int vcpus, enum access_type access,
const char *description)
{
- perf_test_args.wr_fract = (access == ACCESS_READ) ? INT_MAX : 1;
- sync_global_to_guest(vm, perf_test_args);
+ perf_test_set_wr_fract(vm, (access == ACCESS_READ) ? INT_MAX : 1);
iteration_work = ITERATION_ACCESS_MEMORY;
run_iteration(vm, vcpus, description);
}
@@ -296,48 +292,16 @@ static void mark_memory_idle(struct kvm_vm *vm, int vcpus)
run_iteration(vm, vcpus, "Mark memory idle");
}
-static pthread_t *create_vcpu_threads(int vcpus)
-{
- pthread_t *vcpu_threads;
- int i;
-
- vcpu_threads = malloc(vcpus * sizeof(vcpu_threads[0]));
- TEST_ASSERT(vcpu_threads, "Failed to allocate vcpu_threads.");
-
- for (i = 0; i < vcpus; i++) {
- vcpu_last_completed_iteration[i] = iteration;
- pthread_create(&vcpu_threads[i], NULL, vcpu_thread_main,
- &perf_test_args.vcpu_args[i]);
- }
-
- return vcpu_threads;
-}
-
-static void terminate_vcpu_threads(pthread_t *vcpu_threads, int vcpus)
-{
- int i;
-
- /* Set done to signal the vCPU threads to exit */
- done = true;
-
- for (i = 0; i < vcpus; i++)
- pthread_join(vcpu_threads[i], NULL);
-}
-
static void run_test(enum vm_guest_mode mode, void *arg)
{
struct test_params *params = arg;
struct kvm_vm *vm;
- pthread_t *vcpu_threads;
int vcpus = params->vcpus;
vm = perf_test_create_vm(mode, vcpus, params->vcpu_memory_bytes, 1,
- params->backing_src);
+ params->backing_src, !overlap_memory_access);
- perf_test_setup_vcpus(vm, vcpus, params->vcpu_memory_bytes,
- !overlap_memory_access);
-
- vcpu_threads = create_vcpu_threads(vcpus);
+ perf_test_start_vcpu_threads(vcpus, vcpu_thread_main);
pr_info("\n");
access_memory(vm, vcpus, ACCESS_WRITE, "Populating memory");
@@ -352,8 +316,10 @@ static void run_test(enum vm_guest_mode mode, void *arg)
mark_memory_idle(vm, vcpus);
access_memory(vm, vcpus, ACCESS_READ, "Reading from idle memory");
- terminate_vcpu_threads(vcpu_threads, vcpus);
- free(vcpu_threads);
+ /* Set done to signal the vCPU threads to exit */
+ done = true;
+
+ perf_test_join_vcpu_threads(vcpus);
perf_test_destroy_vm(vm);
}
diff --git a/tools/testing/selftests/kvm/demand_paging_test.c b/tools/testing/selftests/kvm/demand_paging_test.c
index 1510b21..6a719d0 100644
--- a/tools/testing/selftests/kvm/demand_paging_test.c
+++ b/tools/testing/selftests/kvm/demand_paging_test.c
@@ -42,10 +42,9 @@ static uint64_t guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE;
static size_t demand_paging_size;
static char *guest_data_prototype;
-static void *vcpu_worker(void *data)
+static void vcpu_worker(struct perf_test_vcpu_args *vcpu_args)
{
int ret;
- struct perf_test_vcpu_args *vcpu_args = (struct perf_test_vcpu_args *)data;
int vcpu_id = vcpu_args->vcpu_id;
struct kvm_vm *vm = perf_test_args.vm;
struct kvm_run *run;
@@ -68,8 +67,6 @@ static void *vcpu_worker(void *data)
ts_diff = timespec_elapsed(start);
PER_VCPU_DEBUG("vCPU %d execution time: %ld.%.9lds\n", vcpu_id,
ts_diff.tv_sec, ts_diff.tv_nsec);
-
- return NULL;
}
static int handle_uffd_page_request(int uffd_mode, int uffd, uint64_t addr)
@@ -282,7 +279,6 @@ struct test_params {
static void run_test(enum vm_guest_mode mode, void *arg)
{
struct test_params *p = arg;
- pthread_t *vcpu_threads;
pthread_t *uffd_handler_threads = NULL;
struct uffd_handler_args *uffd_args = NULL;
struct timespec start;
@@ -293,9 +289,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
int r;
vm = perf_test_create_vm(mode, nr_vcpus, guest_percpu_mem_size, 1,
- p->src_type);
-
- perf_test_args.wr_fract = 1;
+ p->src_type, p->partition_vcpu_memory_access);
demand_paging_size = get_backing_src_pagesz(p->src_type);
@@ -304,12 +298,6 @@ static void run_test(enum vm_guest_mode mode, void *arg)
"Failed to allocate buffer for guest data pattern");
memset(guest_data_prototype, 0xAB, demand_paging_size);
- vcpu_threads = malloc(nr_vcpus * sizeof(*vcpu_threads));
- TEST_ASSERT(vcpu_threads, "Memory allocation failed");
-
- perf_test_setup_vcpus(vm, nr_vcpus, guest_percpu_mem_size,
- p->partition_vcpu_memory_access);
-
if (p->uffd_mode) {
uffd_handler_threads =
malloc(nr_vcpus * sizeof(*uffd_handler_threads));
@@ -322,26 +310,15 @@ static void run_test(enum vm_guest_mode mode, void *arg)
TEST_ASSERT(pipefds, "Unable to allocate memory for pipefd");
for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) {
- vm_paddr_t vcpu_gpa;
+ struct perf_test_vcpu_args *vcpu_args;
void *vcpu_hva;
void *vcpu_alias;
- uint64_t vcpu_mem_size;
-
- if (p->partition_vcpu_memory_access) {
- vcpu_gpa = guest_test_phys_mem +
- (vcpu_id * guest_percpu_mem_size);
- vcpu_mem_size = guest_percpu_mem_size;
- } else {
- vcpu_gpa = guest_test_phys_mem;
- vcpu_mem_size = guest_percpu_mem_size * nr_vcpus;
- }
- PER_VCPU_DEBUG("Added VCPU %d with test mem gpa [%lx, %lx)\n",
- vcpu_id, vcpu_gpa, vcpu_gpa + vcpu_mem_size);
+ vcpu_args = &perf_test_args.vcpu_args[vcpu_id];
/* Cache the host addresses of the region */
- vcpu_hva = addr_gpa2hva(vm, vcpu_gpa);
- vcpu_alias = addr_gpa2alias(vm, vcpu_gpa);
+ vcpu_hva = addr_gpa2hva(vm, vcpu_args->gpa);
+ vcpu_alias = addr_gpa2alias(vm, vcpu_args->gpa);
/*
* Set up user fault fd to handle demand paging
@@ -355,32 +332,18 @@ static void run_test(enum vm_guest_mode mode, void *arg)
pipefds[vcpu_id * 2], p->uffd_mode,
p->uffd_delay, &uffd_args[vcpu_id],
vcpu_hva, vcpu_alias,
- vcpu_mem_size);
+ vcpu_args->pages * perf_test_args.guest_page_size);
}
}
- /* Export the shared variables to the guest */
- sync_global_to_guest(vm, perf_test_args);
-
pr_info("Finished creating vCPUs and starting uffd threads\n");
clock_gettime(CLOCK_MONOTONIC, &start);
-
- for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) {
- pthread_create(&vcpu_threads[vcpu_id], NULL, vcpu_worker,
- &perf_test_args.vcpu_args[vcpu_id]);
- }
-
+ perf_test_start_vcpu_threads(nr_vcpus, vcpu_worker);
pr_info("Started all vCPUs\n");
- /* Wait for the vcpu threads to quit */
- for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) {
- pthread_join(vcpu_threads[vcpu_id], NULL);
- PER_VCPU_DEBUG("Joined thread for vCPU %d\n", vcpu_id);
- }
-
+ perf_test_join_vcpu_threads(nr_vcpus);
ts_diff = timespec_elapsed(start);
-
pr_info("All vCPU threads joined\n");
if (p->uffd_mode) {
@@ -404,7 +367,6 @@ static void run_test(enum vm_guest_mode mode, void *arg)
perf_test_destroy_vm(vm);
free(guest_data_prototype);
- free(vcpu_threads);
if (p->uffd_mode) {
free(uffd_handler_threads);
free(uffd_args);
diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c
index 7ffab5b..1954b96 100644
--- a/tools/testing/selftests/kvm/dirty_log_perf_test.c
+++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c
@@ -31,7 +31,7 @@ static bool host_quit;
static int iteration;
static int vcpu_last_completed_iteration[KVM_MAX_VCPUS];
-static void *vcpu_worker(void *data)
+static void vcpu_worker(struct perf_test_vcpu_args *vcpu_args)
{
int ret;
struct kvm_vm *vm = perf_test_args.vm;
@@ -41,7 +41,6 @@ static void *vcpu_worker(void *data)
struct timespec ts_diff;
struct timespec total = (struct timespec){0};
struct timespec avg;
- struct perf_test_vcpu_args *vcpu_args = (struct perf_test_vcpu_args *)data;
int vcpu_id = vcpu_args->vcpu_id;
run = vcpu_state(vm, vcpu_id);
@@ -83,8 +82,6 @@ static void *vcpu_worker(void *data)
pr_debug("\nvCPU %d dirtied 0x%lx pages over %d iterations in %ld.%.9lds. (Avg %ld.%.9lds/iteration)\n",
vcpu_id, pages_count, vcpu_last_completed_iteration[vcpu_id],
total.tv_sec, total.tv_nsec, avg.tv_sec, avg.tv_nsec);
-
- return NULL;
}
struct test_params {
@@ -170,7 +167,6 @@ static void free_bitmaps(unsigned long *bitmaps[], int slots)
static void run_test(enum vm_guest_mode mode, void *arg)
{
struct test_params *p = arg;
- pthread_t *vcpu_threads;
struct kvm_vm *vm;
unsigned long **bitmaps;
uint64_t guest_num_pages;
@@ -186,9 +182,10 @@ static void run_test(enum vm_guest_mode mode, void *arg)
struct timespec clear_dirty_log_total = (struct timespec){0};
vm = perf_test_create_vm(mode, nr_vcpus, guest_percpu_mem_size,
- p->slots, p->backing_src);
+ p->slots, p->backing_src,
+ p->partition_vcpu_memory_access);
- perf_test_args.wr_fract = p->wr_fract;
+ perf_test_set_wr_fract(vm, p->wr_fract);
guest_num_pages = (nr_vcpus * guest_percpu_mem_size) >> vm_get_page_shift(vm);
guest_num_pages = vm_adjust_num_guest_pages(mode, guest_num_pages);
@@ -203,25 +200,15 @@ static void run_test(enum vm_guest_mode mode, void *arg)
vm_enable_cap(vm, &cap);
}
- vcpu_threads = malloc(nr_vcpus * sizeof(*vcpu_threads));
- TEST_ASSERT(vcpu_threads, "Memory allocation failed");
-
- perf_test_setup_vcpus(vm, nr_vcpus, guest_percpu_mem_size,
- p->partition_vcpu_memory_access);
-
- sync_global_to_guest(vm, perf_test_args);
-
/* Start the iterations */
iteration = 0;
host_quit = false;
clock_gettime(CLOCK_MONOTONIC, &start);
- for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) {
+ for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++)
vcpu_last_completed_iteration[vcpu_id] = -1;
- pthread_create(&vcpu_threads[vcpu_id], NULL, vcpu_worker,
- &perf_test_args.vcpu_args[vcpu_id]);
- }
+ perf_test_start_vcpu_threads(nr_vcpus, vcpu_worker);
/* Allow the vCPUs to populate memory */
pr_debug("Starting iteration %d - Populating\n", iteration);
@@ -290,8 +277,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
/* Tell the vcpu thread to quit */
host_quit = true;
- for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++)
- pthread_join(vcpu_threads[vcpu_id], NULL);
+ perf_test_join_vcpu_threads(nr_vcpus);
avg = timespec_div(get_dirty_log_total, p->iterations);
pr_info("Get dirty log over %lu iterations took %ld.%.9lds. (Avg %ld.%.9lds/iteration)\n",
@@ -306,7 +292,6 @@ static void run_test(enum vm_guest_mode mode, void *arg)
}
free_bitmaps(bitmaps, p->slots);
- free(vcpu_threads);
perf_test_destroy_vm(vm);
}
diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c
index 792c60e1..3fcd89e 100644
--- a/tools/testing/selftests/kvm/dirty_log_test.c
+++ b/tools/testing/selftests/kvm/dirty_log_test.c
@@ -115,7 +115,7 @@ static void guest_code(void)
addr = guest_test_virt_mem;
addr += (READ_ONCE(random_array[i]) % guest_num_pages)
* guest_page_size;
- addr &= ~(host_page_size - 1);
+ addr = align_down(addr, host_page_size);
*(uint64_t *)addr = READ_ONCE(iteration);
}
@@ -737,14 +737,14 @@ static void run_test(enum vm_guest_mode mode, void *arg)
if (!p->phys_offset) {
guest_test_phys_mem = (vm_get_max_gfn(vm) -
guest_num_pages) * guest_page_size;
- guest_test_phys_mem &= ~(host_page_size - 1);
+ guest_test_phys_mem = align_down(guest_test_phys_mem, host_page_size);
} else {
guest_test_phys_mem = p->phys_offset;
}
#ifdef __s390x__
/* Align to 1M (segment size) */
- guest_test_phys_mem &= ~((1 << 20) - 1);
+ guest_test_phys_mem = align_down(guest_test_phys_mem, 1 << 20);
#endif
pr_info("guest physical test memory offset: 0x%lx\n", guest_test_phys_mem);
diff --git a/tools/testing/selftests/kvm/include/perf_test_util.h b/tools/testing/selftests/kvm/include/perf_test_util.h
index df9f1a3..a86f953 100644
--- a/tools/testing/selftests/kvm/include/perf_test_util.h
+++ b/tools/testing/selftests/kvm/include/perf_test_util.h
@@ -8,6 +8,8 @@
#ifndef SELFTEST_KVM_PERF_TEST_UTIL_H
#define SELFTEST_KVM_PERF_TEST_UTIL_H
+#include <pthread.h>
+
#include "kvm_util.h"
/* Default guest test virtual memory offset */
@@ -18,6 +20,7 @@
#define PERF_TEST_MEM_SLOT_INDEX 1
struct perf_test_vcpu_args {
+ uint64_t gpa;
uint64_t gva;
uint64_t pages;
@@ -27,7 +30,7 @@ struct perf_test_vcpu_args {
struct perf_test_args {
struct kvm_vm *vm;
- uint64_t host_page_size;
+ uint64_t gpa;
uint64_t guest_page_size;
int wr_fract;
@@ -36,19 +39,15 @@ struct perf_test_args {
extern struct perf_test_args perf_test_args;
-/*
- * Guest physical memory offset of the testing memory slot.
- * This will be set to the topmost valid physical address minus
- * the test memory size.
- */
-extern uint64_t guest_test_phys_mem;
-
struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int vcpus,
uint64_t vcpu_memory_bytes, int slots,
- enum vm_mem_backing_src_type backing_src);
+ enum vm_mem_backing_src_type backing_src,
+ bool partition_vcpu_memory_access);
void perf_test_destroy_vm(struct kvm_vm *vm);
-void perf_test_setup_vcpus(struct kvm_vm *vm, int vcpus,
- uint64_t vcpu_memory_bytes,
- bool partition_vcpu_memory_access);
+
+void perf_test_set_wr_fract(struct kvm_vm *vm, int wr_fract);
+
+void perf_test_start_vcpu_threads(int vcpus, void (*vcpu_fn)(struct perf_test_vcpu_args *));
+void perf_test_join_vcpu_threads(int vcpus);
#endif /* SELFTEST_KVM_PERF_TEST_UTIL_H */
diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h
index f8fddc8..99e0dcd 100644
--- a/tools/testing/selftests/kvm/include/test_util.h
+++ b/tools/testing/selftests/kvm/include/test_util.h
@@ -104,6 +104,7 @@ size_t get_trans_hugepagesz(void);
size_t get_def_hugetlb_pagesz(void);
const struct vm_mem_backing_src_alias *vm_mem_backing_src_alias(uint32_t i);
size_t get_backing_src_pagesz(uint32_t i);
+bool is_backing_src_hugetlb(uint32_t i);
void backing_src_help(const char *flag);
enum vm_mem_backing_src_type parse_backing_src_type(const char *type_name);
long get_run_delay(void);
@@ -117,4 +118,29 @@ static inline bool backing_src_is_shared(enum vm_mem_backing_src_type t)
return vm_mem_backing_src_alias(t)->flag & MAP_SHARED;
}
+/* Aligns x up to the next multiple of size. Size must be a power of 2. */
+static inline uint64_t align_up(uint64_t x, uint64_t size)
+{
+ uint64_t mask = size - 1;
+
+ TEST_ASSERT(size != 0 && !(size & (size - 1)),
+ "size not a power of 2: %lu", size);
+ return ((x + mask) & ~mask);
+}
+
+static inline uint64_t align_down(uint64_t x, uint64_t size)
+{
+ uint64_t x_aligned_up = align_up(x, size);
+
+ if (x == x_aligned_up)
+ return x;
+ else
+ return x_aligned_up - size;
+}
+
+static inline void *align_ptr_up(void *x, size_t size)
+{
+ return (void *)align_up((unsigned long)x, size);
+}
+
#endif /* SELFTEST_KVM_TEST_UTIL_H */
diff --git a/tools/testing/selftests/kvm/kvm_page_table_test.c b/tools/testing/selftests/kvm/kvm_page_table_test.c
index 36407cb..3836322 100644
--- a/tools/testing/selftests/kvm/kvm_page_table_test.c
+++ b/tools/testing/selftests/kvm/kvm_page_table_test.c
@@ -280,7 +280,7 @@ static struct kvm_vm *pre_init_before_test(enum vm_guest_mode mode, void *arg)
#ifdef __s390x__
alignment = max(0x100000, alignment);
#endif
- guest_test_phys_mem &= ~(alignment - 1);
+ guest_test_phys_mem = align_down(guest_test_virt_mem, alignment);
/* Set up the shared data structure test_args */
test_args.vm = vm;
diff --git a/tools/testing/selftests/kvm/lib/elf.c b/tools/testing/selftests/kvm/lib/elf.c
index eac44f5..13e8e3d 100644
--- a/tools/testing/selftests/kvm/lib/elf.c
+++ b/tools/testing/selftests/kvm/lib/elf.c
@@ -157,8 +157,7 @@ void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename)
"memsize of 0,\n"
" phdr index: %u p_memsz: 0x%" PRIx64,
n1, (uint64_t) phdr.p_memsz);
- vm_vaddr_t seg_vstart = phdr.p_vaddr;
- seg_vstart &= ~(vm_vaddr_t)(vm->page_size - 1);
+ vm_vaddr_t seg_vstart = align_down(phdr.p_vaddr, vm->page_size);
vm_vaddr_t seg_vend = phdr.p_vaddr + phdr.p_memsz - 1;
seg_vend |= vm->page_size - 1;
size_t seg_size = seg_vend - seg_vstart + 1;
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index 14bb4d5..8f2e0bb 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -22,15 +22,6 @@
static int vcpu_mmap_sz(void);
-/* Aligns x up to the next multiple of size. Size must be a power of 2. */
-static void *align(void *x, size_t size)
-{
- size_t mask = size - 1;
- TEST_ASSERT(size != 0 && !(size & (size - 1)),
- "size not a power of 2: %lu", size);
- return (void *) (((size_t) x + mask) & ~mask);
-}
-
int open_path_or_exit(const char *path, int flags)
{
int fd;
@@ -191,15 +182,15 @@ const char *vm_guest_mode_string(uint32_t i)
}
const struct vm_guest_mode_params vm_guest_mode_params[] = {
- { 52, 48, 0x1000, 12 },
- { 52, 48, 0x10000, 16 },
- { 48, 48, 0x1000, 12 },
- { 48, 48, 0x10000, 16 },
- { 40, 48, 0x1000, 12 },
- { 40, 48, 0x10000, 16 },
- { 0, 0, 0x1000, 12 },
- { 47, 64, 0x1000, 12 },
- { 44, 64, 0x1000, 12 },
+ [VM_MODE_P52V48_4K] = { 52, 48, 0x1000, 12 },
+ [VM_MODE_P52V48_64K] = { 52, 48, 0x10000, 16 },
+ [VM_MODE_P48V48_4K] = { 48, 48, 0x1000, 12 },
+ [VM_MODE_P48V48_64K] = { 48, 48, 0x10000, 16 },
+ [VM_MODE_P40V48_4K] = { 40, 48, 0x1000, 12 },
+ [VM_MODE_P40V48_64K] = { 40, 48, 0x10000, 16 },
+ [VM_MODE_PXXV48_4K] = { 0, 0, 0x1000, 12 },
+ [VM_MODE_P47V64_4K] = { 47, 64, 0x1000, 12 },
+ [VM_MODE_P44V64_4K] = { 44, 64, 0x1000, 12 },
};
_Static_assert(sizeof(vm_guest_mode_params)/sizeof(struct vm_guest_mode_params) == NUM_VM_MODES,
"Missing new mode params?");
@@ -879,9 +870,17 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm,
alignment = 1;
#endif
+ /*
+ * When using THP mmap is not guaranteed to returned a hugepage aligned
+ * address so we have to pad the mmap. Padding is not needed for HugeTLB
+ * because mmap will always return an address aligned to the HugeTLB
+ * page size.
+ */
if (src_type == VM_MEM_SRC_ANONYMOUS_THP)
alignment = max(backing_src_pagesz, alignment);
+ ASSERT_EQ(guest_paddr, align_up(guest_paddr, backing_src_pagesz));
+
/* Add enough memory to align up if necessary */
if (alignment > 1)
region->mmap_size += alignment;
@@ -914,8 +913,13 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm,
"test_malloc failed, mmap_start: %p errno: %i",
region->mmap_start, errno);
+ TEST_ASSERT(!is_backing_src_hugetlb(src_type) ||
+ region->mmap_start == align_ptr_up(region->mmap_start, backing_src_pagesz),
+ "mmap_start %p is not aligned to HugeTLB page size 0x%lx",
+ region->mmap_start, backing_src_pagesz);
+
/* Align host address */
- region->host_mem = align(region->mmap_start, alignment);
+ region->host_mem = align_ptr_up(region->mmap_start, alignment);
/* As needed perform madvise */
if ((src_type == VM_MEM_SRC_ANONYMOUS ||
@@ -958,7 +962,7 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm,
"mmap of alias failed, errno: %i", errno);
/* Align host alias address */
- region->host_alias = align(region->mmap_alias, alignment);
+ region->host_alias = align_ptr_up(region->mmap_alias, alignment);
}
}
diff --git a/tools/testing/selftests/kvm/lib/perf_test_util.c b/tools/testing/selftests/kvm/lib/perf_test_util.c
index 0ef80db..722df3a 100644
--- a/tools/testing/selftests/kvm/lib/perf_test_util.c
+++ b/tools/testing/selftests/kvm/lib/perf_test_util.c
@@ -10,21 +10,40 @@
struct perf_test_args perf_test_args;
-uint64_t guest_test_phys_mem;
-
/*
* Guest virtual memory offset of the testing memory slot.
* Must not conflict with identity mapped test code.
*/
static uint64_t guest_test_virt_mem = DEFAULT_GUEST_TEST_MEM;
+struct vcpu_thread {
+ /* The id of the vCPU. */
+ int vcpu_id;
+
+ /* The pthread backing the vCPU. */
+ pthread_t thread;
+
+ /* Set to true once the vCPU thread is up and running. */
+ bool running;
+};
+
+/* The vCPU threads involved in this test. */
+static struct vcpu_thread vcpu_threads[KVM_MAX_VCPUS];
+
+/* The function run by each vCPU thread, as provided by the test. */
+static void (*vcpu_thread_fn)(struct perf_test_vcpu_args *);
+
+/* Set to true once all vCPU threads are up and running. */
+static bool all_vcpu_threads_running;
+
/*
* Continuously write to the first 8 bytes of each page in the
* specified region.
*/
static void guest_code(uint32_t vcpu_id)
{
- struct perf_test_vcpu_args *vcpu_args = &perf_test_args.vcpu_args[vcpu_id];
+ struct perf_test_args *pta = &perf_test_args;
+ struct perf_test_vcpu_args *vcpu_args = &pta->vcpu_args[vcpu_id];
uint64_t gva;
uint64_t pages;
int i;
@@ -37,9 +56,9 @@ static void guest_code(uint32_t vcpu_id)
while (true) {
for (i = 0; i < pages; i++) {
- uint64_t addr = gva + (i * perf_test_args.guest_page_size);
+ uint64_t addr = gva + (i * pta->guest_page_size);
- if (i % perf_test_args.wr_fract == 0)
+ if (i % pta->wr_fract == 0)
*(uint64_t *)addr = 0x0123456789ABCDEF;
else
READ_ONCE(*(uint64_t *)addr);
@@ -49,35 +68,81 @@ static void guest_code(uint32_t vcpu_id)
}
}
+void perf_test_setup_vcpus(struct kvm_vm *vm, int vcpus,
+ uint64_t vcpu_memory_bytes,
+ bool partition_vcpu_memory_access)
+{
+ struct perf_test_args *pta = &perf_test_args;
+ struct perf_test_vcpu_args *vcpu_args;
+ int vcpu_id;
+
+ for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) {
+ vcpu_args = &pta->vcpu_args[vcpu_id];
+
+ vcpu_args->vcpu_id = vcpu_id;
+ if (partition_vcpu_memory_access) {
+ vcpu_args->gva = guest_test_virt_mem +
+ (vcpu_id * vcpu_memory_bytes);
+ vcpu_args->pages = vcpu_memory_bytes /
+ pta->guest_page_size;
+ vcpu_args->gpa = pta->gpa + (vcpu_id * vcpu_memory_bytes);
+ } else {
+ vcpu_args->gva = guest_test_virt_mem;
+ vcpu_args->pages = (vcpus * vcpu_memory_bytes) /
+ pta->guest_page_size;
+ vcpu_args->gpa = pta->gpa;
+ }
+
+ vcpu_args_set(vm, vcpu_id, 1, vcpu_id);
+
+ pr_debug("Added VCPU %d with test mem gpa [%lx, %lx)\n",
+ vcpu_id, vcpu_args->gpa, vcpu_args->gpa +
+ (vcpu_args->pages * pta->guest_page_size));
+ }
+}
+
struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int vcpus,
uint64_t vcpu_memory_bytes, int slots,
- enum vm_mem_backing_src_type backing_src)
+ enum vm_mem_backing_src_type backing_src,
+ bool partition_vcpu_memory_access)
{
+ struct perf_test_args *pta = &perf_test_args;
struct kvm_vm *vm;
uint64_t guest_num_pages;
+ uint64_t backing_src_pagesz = get_backing_src_pagesz(backing_src);
int i;
pr_info("Testing guest mode: %s\n", vm_guest_mode_string(mode));
- perf_test_args.host_page_size = getpagesize();
- perf_test_args.guest_page_size = vm_guest_mode_params[mode].page_size;
+ /* By default vCPUs will write to memory. */
+ pta->wr_fract = 1;
+
+ /*
+ * Snapshot the non-huge page size. This is used by the guest code to
+ * access/dirty pages at the logging granularity.
+ */
+ pta->guest_page_size = vm_guest_mode_params[mode].page_size;
guest_num_pages = vm_adjust_num_guest_pages(mode,
- (vcpus * vcpu_memory_bytes) / perf_test_args.guest_page_size);
+ (vcpus * vcpu_memory_bytes) / pta->guest_page_size);
- TEST_ASSERT(vcpu_memory_bytes % perf_test_args.host_page_size == 0,
+ TEST_ASSERT(vcpu_memory_bytes % getpagesize() == 0,
"Guest memory size is not host page size aligned.");
- TEST_ASSERT(vcpu_memory_bytes % perf_test_args.guest_page_size == 0,
+ TEST_ASSERT(vcpu_memory_bytes % pta->guest_page_size == 0,
"Guest memory size is not guest page size aligned.");
TEST_ASSERT(guest_num_pages % slots == 0,
"Guest memory cannot be evenly divided into %d slots.",
slots);
+ /*
+ * Pass guest_num_pages to populate the page tables for test memory.
+ * The memory is also added to memslot 0, but that's a benign side
+ * effect as KVM allows aliasing HVAs in meslots.
+ */
vm = vm_create_with_vcpus(mode, vcpus, DEFAULT_GUEST_PHY_PAGES,
- (vcpus * vcpu_memory_bytes) / perf_test_args.guest_page_size,
- 0, guest_code, NULL);
+ guest_num_pages, 0, guest_code, NULL);
- perf_test_args.vm = vm;
+ pta->vm = vm;
/*
* If there should be more memory in the guest test region than there
@@ -90,20 +155,18 @@ struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int vcpus,
guest_num_pages, vm_get_max_gfn(vm), vcpus,
vcpu_memory_bytes);
- guest_test_phys_mem = (vm_get_max_gfn(vm) - guest_num_pages) *
- perf_test_args.guest_page_size;
- guest_test_phys_mem &= ~(perf_test_args.host_page_size - 1);
+ pta->gpa = (vm_get_max_gfn(vm) - guest_num_pages) * pta->guest_page_size;
+ pta->gpa = align_down(pta->gpa, backing_src_pagesz);
#ifdef __s390x__
/* Align to 1M (segment size) */
- guest_test_phys_mem &= ~((1 << 20) - 1);
+ pta->gpa = align_down(pta->gpa, 1 << 20);
#endif
- pr_info("guest physical test memory offset: 0x%lx\n", guest_test_phys_mem);
+ pr_info("guest physical test memory offset: 0x%lx\n", pta->gpa);
/* Add extra memory slots for testing */
for (i = 0; i < slots; i++) {
uint64_t region_pages = guest_num_pages / slots;
- vm_paddr_t region_start = guest_test_phys_mem +
- region_pages * perf_test_args.guest_page_size * i;
+ vm_paddr_t region_start = pta->gpa + region_pages * pta->guest_page_size * i;
vm_userspace_mem_region_add(vm, backing_src, region_start,
PERF_TEST_MEM_SLOT_INDEX + i,
@@ -111,10 +174,15 @@ struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int vcpus,
}
/* Do mapping for the demand paging memory slot */
- virt_map(vm, guest_test_virt_mem, guest_test_phys_mem, guest_num_pages);
+ virt_map(vm, guest_test_virt_mem, pta->gpa, guest_num_pages);
+
+ perf_test_setup_vcpus(vm, vcpus, vcpu_memory_bytes, partition_vcpu_memory_access);
ucall_init(vm, NULL);
+ /* Export the shared variables to the guest. */
+ sync_global_to_guest(vm, perf_test_args);
+
return vm;
}
@@ -124,36 +192,60 @@ void perf_test_destroy_vm(struct kvm_vm *vm)
kvm_vm_free(vm);
}
-void perf_test_setup_vcpus(struct kvm_vm *vm, int vcpus,
- uint64_t vcpu_memory_bytes,
- bool partition_vcpu_memory_access)
+void perf_test_set_wr_fract(struct kvm_vm *vm, int wr_fract)
{
- vm_paddr_t vcpu_gpa;
- struct perf_test_vcpu_args *vcpu_args;
+ perf_test_args.wr_fract = wr_fract;
+ sync_global_to_guest(vm, perf_test_args);
+}
+
+static void *vcpu_thread_main(void *data)
+{
+ struct vcpu_thread *vcpu = data;
+
+ WRITE_ONCE(vcpu->running, true);
+
+ /*
+ * Wait for all vCPU threads to be up and running before calling the test-
+ * provided vCPU thread function. This prevents thread creation (which
+ * requires taking the mmap_sem in write mode) from interfering with the
+ * guest faulting in its memory.
+ */
+ while (!READ_ONCE(all_vcpu_threads_running))
+ ;
+
+ vcpu_thread_fn(&perf_test_args.vcpu_args[vcpu->vcpu_id]);
+
+ return NULL;
+}
+
+void perf_test_start_vcpu_threads(int vcpus, void (*vcpu_fn)(struct perf_test_vcpu_args *))
+{
int vcpu_id;
+ vcpu_thread_fn = vcpu_fn;
+ WRITE_ONCE(all_vcpu_threads_running, false);
+
for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) {
- vcpu_args = &perf_test_args.vcpu_args[vcpu_id];
+ struct vcpu_thread *vcpu = &vcpu_threads[vcpu_id];
- vcpu_args->vcpu_id = vcpu_id;
- if (partition_vcpu_memory_access) {
- vcpu_args->gva = guest_test_virt_mem +
- (vcpu_id * vcpu_memory_bytes);
- vcpu_args->pages = vcpu_memory_bytes /
- perf_test_args.guest_page_size;
- vcpu_gpa = guest_test_phys_mem +
- (vcpu_id * vcpu_memory_bytes);
- } else {
- vcpu_args->gva = guest_test_virt_mem;
- vcpu_args->pages = (vcpus * vcpu_memory_bytes) /
- perf_test_args.guest_page_size;
- vcpu_gpa = guest_test_phys_mem;
- }
+ vcpu->vcpu_id = vcpu_id;
+ WRITE_ONCE(vcpu->running, false);
- vcpu_args_set(vm, vcpu_id, 1, vcpu_id);
-
- pr_debug("Added VCPU %d with test mem gpa [%lx, %lx)\n",
- vcpu_id, vcpu_gpa, vcpu_gpa +
- (vcpu_args->pages * perf_test_args.guest_page_size));
+ pthread_create(&vcpu->thread, NULL, vcpu_thread_main, vcpu);
}
+
+ for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) {
+ while (!READ_ONCE(vcpu_threads[vcpu_id].running))
+ ;
+ }
+
+ WRITE_ONCE(all_vcpu_threads_running, true);
+}
+
+void perf_test_join_vcpu_threads(int vcpus)
+{
+ int vcpu_id;
+
+ for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++)
+ pthread_join(vcpu_threads[vcpu_id].thread, NULL);
}
diff --git a/tools/testing/selftests/kvm/lib/test_util.c b/tools/testing/selftests/kvm/lib/test_util.c
index b724291..6d23878 100644
--- a/tools/testing/selftests/kvm/lib/test_util.c
+++ b/tools/testing/selftests/kvm/lib/test_util.c
@@ -283,6 +283,11 @@ size_t get_backing_src_pagesz(uint32_t i)
}
}
+bool is_backing_src_hugetlb(uint32_t i)
+{
+ return !!(vm_mem_backing_src_alias(i)->flag & MAP_HUGETLB);
+}
+
static void print_available_backing_src_types(const char *prefix)
{
int i;
diff --git a/tools/testing/selftests/kvm/memslot_modification_stress_test.c b/tools/testing/selftests/kvm/memslot_modification_stress_test.c
index 4cfcafe..1410d0a 100644
--- a/tools/testing/selftests/kvm/memslot_modification_stress_test.c
+++ b/tools/testing/selftests/kvm/memslot_modification_stress_test.c
@@ -36,11 +36,9 @@ static uint64_t guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE;
static bool run_vcpus = true;
-static void *vcpu_worker(void *data)
+static void vcpu_worker(struct perf_test_vcpu_args *vcpu_args)
{
int ret;
- struct perf_test_vcpu_args *vcpu_args =
- (struct perf_test_vcpu_args *)data;
int vcpu_id = vcpu_args->vcpu_id;
struct kvm_vm *vm = perf_test_args.vm;
struct kvm_run *run;
@@ -59,8 +57,6 @@ static void *vcpu_worker(void *data)
"Invalid guest sync status: exit_reason=%s\n",
exit_reason_str(run->exit_reason));
}
-
- return NULL;
}
struct memslot_antagonist_args {
@@ -80,7 +76,7 @@ static void add_remove_memslot(struct kvm_vm *vm, useconds_t delay,
* Add the dummy memslot just below the perf_test_util memslot, which is
* at the top of the guest physical address space.
*/
- gpa = guest_test_phys_mem - pages * vm_get_page_size(vm);
+ gpa = perf_test_args.gpa - pages * vm_get_page_size(vm);
for (i = 0; i < nr_modifications; i++) {
usleep(delay);
@@ -100,29 +96,15 @@ struct test_params {
static void run_test(enum vm_guest_mode mode, void *arg)
{
struct test_params *p = arg;
- pthread_t *vcpu_threads;
struct kvm_vm *vm;
- int vcpu_id;
vm = perf_test_create_vm(mode, nr_vcpus, guest_percpu_mem_size, 1,
- VM_MEM_SRC_ANONYMOUS);
-
- perf_test_args.wr_fract = 1;
-
- vcpu_threads = malloc(nr_vcpus * sizeof(*vcpu_threads));
- TEST_ASSERT(vcpu_threads, "Memory allocation failed");
-
- perf_test_setup_vcpus(vm, nr_vcpus, guest_percpu_mem_size,
- p->partition_vcpu_memory_access);
-
- /* Export the shared variables to the guest */
- sync_global_to_guest(vm, perf_test_args);
+ VM_MEM_SRC_ANONYMOUS,
+ p->partition_vcpu_memory_access);
pr_info("Finished creating vCPUs\n");
- for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++)
- pthread_create(&vcpu_threads[vcpu_id], NULL, vcpu_worker,
- &perf_test_args.vcpu_args[vcpu_id]);
+ perf_test_start_vcpu_threads(nr_vcpus, vcpu_worker);
pr_info("Started all vCPUs\n");
@@ -131,16 +113,10 @@ static void run_test(enum vm_guest_mode mode, void *arg)
run_vcpus = false;
- /* Wait for the vcpu threads to quit */
- for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++)
- pthread_join(vcpu_threads[vcpu_id], NULL);
-
+ perf_test_join_vcpu_threads(nr_vcpus);
pr_info("All vCPU threads joined\n");
- ucall_uninit(vm);
- kvm_vm_free(vm);
-
- free(vcpu_threads);
+ perf_test_destroy_vm(vm);
}
static void help(char *name)
diff --git a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
index eda0d2a..a0699f0 100644
--- a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
+++ b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
@@ -24,8 +24,12 @@
#define PVTIME_ADDR (SHINFO_REGION_GPA + PAGE_SIZE)
#define RUNSTATE_ADDR (SHINFO_REGION_GPA + PAGE_SIZE + 0x20)
+#define VCPU_INFO_ADDR (SHINFO_REGION_GPA + 0x40)
#define RUNSTATE_VADDR (SHINFO_REGION_GVA + PAGE_SIZE + 0x20)
+#define VCPU_INFO_VADDR (SHINFO_REGION_GVA + 0x40)
+
+#define EVTCHN_VECTOR 0x10
static struct kvm_vm *vm;
@@ -56,15 +60,44 @@ struct vcpu_runstate_info {
uint64_t time[4];
};
+struct arch_vcpu_info {
+ unsigned long cr2;
+ unsigned long pad; /* sizeof(vcpu_info_t) == 64 */
+};
+
+struct vcpu_info {
+ uint8_t evtchn_upcall_pending;
+ uint8_t evtchn_upcall_mask;
+ unsigned long evtchn_pending_sel;
+ struct arch_vcpu_info arch;
+ struct pvclock_vcpu_time_info time;
+}; /* 64 bytes (x86) */
+
#define RUNSTATE_running 0
#define RUNSTATE_runnable 1
#define RUNSTATE_blocked 2
#define RUNSTATE_offline 3
+static void evtchn_handler(struct ex_regs *regs)
+{
+ struct vcpu_info *vi = (void *)VCPU_INFO_VADDR;
+ vi->evtchn_upcall_pending = 0;
+
+ GUEST_SYNC(0x20);
+}
+
static void guest_code(void)
{
struct vcpu_runstate_info *rs = (void *)RUNSTATE_VADDR;
+ __asm__ __volatile__(
+ "sti\n"
+ "nop\n"
+ );
+
+ /* Trigger an interrupt injection */
+ GUEST_SYNC(0);
+
/* Test having the host set runstates manually */
GUEST_SYNC(RUNSTATE_runnable);
GUEST_ASSERT(rs->time[RUNSTATE_runnable] != 0);
@@ -153,7 +186,7 @@ int main(int argc, char *argv[])
struct kvm_xen_vcpu_attr vi = {
.type = KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO,
- .u.gpa = SHINFO_REGION_GPA + 0x40,
+ .u.gpa = VCPU_INFO_ADDR,
};
vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &vi);
@@ -163,6 +196,16 @@ int main(int argc, char *argv[])
};
vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &pvclock);
+ struct kvm_xen_hvm_attr vec = {
+ .type = KVM_XEN_ATTR_TYPE_UPCALL_VECTOR,
+ .u.vector = EVTCHN_VECTOR,
+ };
+ vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &vec);
+
+ vm_init_descriptor_tables(vm);
+ vcpu_init_descriptor_tables(vm, VCPU_ID);
+ vm_install_exception_handler(vm, EVTCHN_VECTOR, evtchn_handler);
+
if (do_runstate_tests) {
struct kvm_xen_vcpu_attr st = {
.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR,
@@ -171,9 +214,14 @@ int main(int argc, char *argv[])
vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &st);
}
+ struct vcpu_info *vinfo = addr_gpa2hva(vm, VCPU_INFO_VADDR);
+ vinfo->evtchn_upcall_pending = 0;
+
struct vcpu_runstate_info *rs = addr_gpa2hva(vm, RUNSTATE_ADDR);
rs->state = 0x5a;
+ bool evtchn_irq_expected = false;
+
for (;;) {
volatile struct kvm_run *run = vcpu_state(vm, VCPU_ID);
struct ucall uc;
@@ -193,16 +241,21 @@ int main(int argc, char *argv[])
struct kvm_xen_vcpu_attr rst;
long rundelay;
- /* If no runstate support, bail out early */
- if (!do_runstate_tests)
- goto done;
-
- TEST_ASSERT(rs->state_entry_time == rs->time[0] +
- rs->time[1] + rs->time[2] + rs->time[3],
- "runstate times don't add up");
+ if (do_runstate_tests)
+ TEST_ASSERT(rs->state_entry_time == rs->time[0] +
+ rs->time[1] + rs->time[2] + rs->time[3],
+ "runstate times don't add up");
switch (uc.args[1]) {
- case RUNSTATE_running...RUNSTATE_offline:
+ case 0:
+ evtchn_irq_expected = true;
+ vinfo->evtchn_upcall_pending = 1;
+ break;
+
+ case RUNSTATE_runnable...RUNSTATE_offline:
+ TEST_ASSERT(!evtchn_irq_expected, "Event channel IRQ not seen");
+ if (!do_runstate_tests)
+ goto done;
rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT;
rst.u.runstate.state = uc.args[1];
vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &rst);
@@ -236,6 +289,10 @@ int main(int argc, char *argv[])
sched_yield();
} while (get_run_delay() < rundelay);
break;
+ case 0x20:
+ TEST_ASSERT(evtchn_irq_expected, "Unexpected event channel IRQ");
+ evtchn_irq_expected = false;
+ break;
}
break;
}
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index d317245..9646bb9 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2548,72 +2548,36 @@ struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn)
}
EXPORT_SYMBOL_GPL(gfn_to_page);
-void kvm_release_pfn(kvm_pfn_t pfn, bool dirty, struct gfn_to_pfn_cache *cache)
+void kvm_release_pfn(kvm_pfn_t pfn, bool dirty)
{
if (pfn == 0)
return;
- if (cache)
- cache->pfn = cache->gfn = 0;
-
if (dirty)
kvm_release_pfn_dirty(pfn);
else
kvm_release_pfn_clean(pfn);
}
-static void kvm_cache_gfn_to_pfn(struct kvm_memory_slot *slot, gfn_t gfn,
- struct gfn_to_pfn_cache *cache, u64 gen)
-{
- kvm_release_pfn(cache->pfn, cache->dirty, cache);
-
- cache->pfn = gfn_to_pfn_memslot(slot, gfn);
- cache->gfn = gfn;
- cache->dirty = false;
- cache->generation = gen;
-}
-
-static int __kvm_map_gfn(struct kvm_memslots *slots, gfn_t gfn,
- struct kvm_host_map *map,
- struct gfn_to_pfn_cache *cache,
- bool atomic)
+int kvm_vcpu_map(struct kvm_vcpu *vcpu, gfn_t gfn, struct kvm_host_map *map)
{
kvm_pfn_t pfn;
void *hva = NULL;
struct page *page = KVM_UNMAPPED_PAGE;
- struct kvm_memory_slot *slot = __gfn_to_memslot(slots, gfn);
- u64 gen = slots->generation;
if (!map)
return -EINVAL;
- if (cache) {
- if (!cache->pfn || cache->gfn != gfn ||
- cache->generation != gen) {
- if (atomic)
- return -EAGAIN;
- kvm_cache_gfn_to_pfn(slot, gfn, cache, gen);
- }
- pfn = cache->pfn;
- } else {
- if (atomic)
- return -EAGAIN;
- pfn = gfn_to_pfn_memslot(slot, gfn);
- }
+ pfn = gfn_to_pfn(vcpu->kvm, gfn);
if (is_error_noslot_pfn(pfn))
return -EINVAL;
if (pfn_valid(pfn)) {
page = pfn_to_page(pfn);
- if (atomic)
- hva = kmap_atomic(page);
- else
- hva = kmap(page);
+ hva = kmap(page);
#ifdef CONFIG_HAS_IOMEM
- } else if (!atomic) {
- hva = memremap(pfn_to_hpa(pfn), PAGE_SIZE, MEMREMAP_WB);
} else {
- return -EINVAL;
+ hva = memremap(pfn_to_hpa(pfn), PAGE_SIZE, MEMREMAP_WB);
#endif
}
@@ -2627,27 +2591,9 @@ static int __kvm_map_gfn(struct kvm_memslots *slots, gfn_t gfn,
return 0;
}
-
-int kvm_map_gfn(struct kvm_vcpu *vcpu, gfn_t gfn, struct kvm_host_map *map,
- struct gfn_to_pfn_cache *cache, bool atomic)
-{
- return __kvm_map_gfn(kvm_memslots(vcpu->kvm), gfn, map,
- cache, atomic);
-}
-EXPORT_SYMBOL_GPL(kvm_map_gfn);
-
-int kvm_vcpu_map(struct kvm_vcpu *vcpu, gfn_t gfn, struct kvm_host_map *map)
-{
- return __kvm_map_gfn(kvm_vcpu_memslots(vcpu), gfn, map,
- NULL, false);
-}
EXPORT_SYMBOL_GPL(kvm_vcpu_map);
-static void __kvm_unmap_gfn(struct kvm *kvm,
- struct kvm_memory_slot *memslot,
- struct kvm_host_map *map,
- struct gfn_to_pfn_cache *cache,
- bool dirty, bool atomic)
+void kvm_vcpu_unmap(struct kvm_vcpu *vcpu, struct kvm_host_map *map, bool dirty)
{
if (!map)
return;
@@ -2655,45 +2601,21 @@ static void __kvm_unmap_gfn(struct kvm *kvm,
if (!map->hva)
return;
- if (map->page != KVM_UNMAPPED_PAGE) {
- if (atomic)
- kunmap_atomic(map->hva);
- else
- kunmap(map->page);
- }
+ if (map->page != KVM_UNMAPPED_PAGE)
+ kunmap(map->page);
#ifdef CONFIG_HAS_IOMEM
- else if (!atomic)
- memunmap(map->hva);
else
- WARN_ONCE(1, "Unexpected unmapping in atomic context");
+ memunmap(map->hva);
#endif
if (dirty)
- mark_page_dirty_in_slot(kvm, memslot, map->gfn);
+ kvm_vcpu_mark_page_dirty(vcpu, map->gfn);
- if (cache)
- cache->dirty |= dirty;
- else
- kvm_release_pfn(map->pfn, dirty, NULL);
+ kvm_release_pfn(map->pfn, dirty);
map->hva = NULL;
map->page = NULL;
}
-
-int kvm_unmap_gfn(struct kvm_vcpu *vcpu, struct kvm_host_map *map,
- struct gfn_to_pfn_cache *cache, bool dirty, bool atomic)
-{
- __kvm_unmap_gfn(vcpu->kvm, gfn_to_memslot(vcpu->kvm, map->gfn), map,
- cache, dirty, atomic);
- return 0;
-}
-EXPORT_SYMBOL_GPL(kvm_unmap_gfn);
-
-void kvm_vcpu_unmap(struct kvm_vcpu *vcpu, struct kvm_host_map *map, bool dirty)
-{
- __kvm_unmap_gfn(vcpu->kvm, kvm_vcpu_gfn_to_memslot(vcpu, map->gfn),
- map, NULL, dirty, false);
-}
EXPORT_SYMBOL_GPL(kvm_vcpu_unmap);
struct page *kvm_vcpu_gfn_to_page(struct kvm_vcpu *vcpu, gfn_t gfn)