Merge tag 's390-5.17-1' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux

Pull s390 updates from Heiko Carstens:
 "Besides all the small improvements and cleanups the most notable part
  is the fast vector/SIMD implementation of the ChaCha20 stream cipher,
  which is an adaptation of Andy Polyakov's code for the kernel.

  Summary:

   - add fast vector/SIMD implementation of the ChaCha20 stream cipher,
     which mainly adapts Andy Polyakov's code for the kernel

   - add status attribute to AP queue device so users can easily figure
     out its status

   - fix race in page table release code, and and lots of documentation

   - remove uevent suppress from cio device driver, since it turned out
     that it generated more problems than it solved problems

   - quite a lot of virtual vs physical address confusion fixes

   - various other small improvements and cleanups all over the place"

* tag 's390-5.17-1' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux: (39 commits)
  s390/dasd: use default_groups in kobj_type
  s390/sclp_sd: use default_groups in kobj_type
  s390/pci: simplify __pciwb_mio() inline asm
  s390: remove unused TASK_SIZE_OF
  s390/crash_dump: fix virtual vs physical address handling
  s390/crypto: fix compile error for ChaCha20 module
  s390/mm: check 2KB-fragment page on release
  s390/mm: better annotate 2KB pagetable fragments handling
  s390/mm: fix 2KB pgtable release race
  s390/sclp: release SCLP early buffer after kernel initialization
  s390/nmi: disable interrupts on extended save area update
  s390/zcrypt: CCA control CPRB sending
  s390/disassembler: update opcode table
  s390/uv: fix memblock virtual vs physical address confusion
  s390/smp: fix memblock_phys_free() vs memblock_free() confusion
  s390/sclp: fix memblock_phys_free() vs memblock_free() confusion
  s390/exit: remove dead reference to do_exit from copy_thread
  s390/ap: add missing virt_to_phys address conversion
  s390/pgalloc: use pointers instead of unsigned long values
  s390/pgalloc: add virt/phys address handling to base asce functions
  ...
diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig
index e45cc27..354e51d 100644
--- a/arch/s390/configs/debug_defconfig
+++ b/arch/s390/configs/debug_defconfig
@@ -770,6 +770,7 @@
 CONFIG_CRYPTO_SHA3_512_S390=m
 CONFIG_CRYPTO_DES_S390=m
 CONFIG_CRYPTO_AES_S390=m
+CONFIG_CRYPTO_CHACHA_S390=m
 CONFIG_CRYPTO_GHASH_S390=m
 CONFIG_CRYPTO_CRC32_S390=y
 CONFIG_CRYPTO_DEV_VIRTIO=m
diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig
index 1c750bfc..8dee6c3 100644
--- a/arch/s390/configs/defconfig
+++ b/arch/s390/configs/defconfig
@@ -757,6 +757,7 @@
 CONFIG_CRYPTO_SHA3_512_S390=m
 CONFIG_CRYPTO_DES_S390=m
 CONFIG_CRYPTO_AES_S390=m
+CONFIG_CRYPTO_CHACHA_S390=m
 CONFIG_CRYPTO_GHASH_S390=m
 CONFIG_CRYPTO_CRC32_S390=y
 CONFIG_CRYPTO_DEV_VIRTIO=m
diff --git a/arch/s390/crypto/Makefile b/arch/s390/crypto/Makefile
index 12889d4..c63abfe 100644
--- a/arch/s390/crypto/Makefile
+++ b/arch/s390/crypto/Makefile
@@ -11,9 +11,11 @@
 obj-$(CONFIG_CRYPTO_DES_S390) += des_s390.o
 obj-$(CONFIG_CRYPTO_AES_S390) += aes_s390.o
 obj-$(CONFIG_CRYPTO_PAES_S390) += paes_s390.o
+obj-$(CONFIG_CRYPTO_CHACHA_S390) += chacha_s390.o
 obj-$(CONFIG_S390_PRNG) += prng.o
 obj-$(CONFIG_CRYPTO_GHASH_S390) += ghash_s390.o
 obj-$(CONFIG_CRYPTO_CRC32_S390) += crc32-vx_s390.o
 obj-$(CONFIG_ARCH_RANDOM) += arch_random.o
 
 crc32-vx_s390-y := crc32-vx.o crc32le-vx.o crc32be-vx.o
+chacha_s390-y := chacha-glue.o chacha-s390.o
diff --git a/arch/s390/crypto/chacha-glue.c b/arch/s390/crypto/chacha-glue.c
new file mode 100644
index 0000000..ccfff73
--- /dev/null
+++ b/arch/s390/crypto/chacha-glue.c
@@ -0,0 +1,100 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * s390 ChaCha stream cipher.
+ *
+ * Copyright IBM Corp. 2021
+ */
+
+#define KMSG_COMPONENT "chacha_s390"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <crypto/internal/chacha.h>
+#include <crypto/internal/skcipher.h>
+#include <crypto/algapi.h>
+#include <linux/cpufeature.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/sizes.h>
+#include <asm/fpu/api.h>
+#include "chacha-s390.h"
+
+static void chacha20_crypt_s390(u32 *state, u8 *dst, const u8 *src,
+				unsigned int nbytes, const u32 *key,
+				u32 *counter)
+{
+	struct kernel_fpu vxstate;
+
+	kernel_fpu_begin(&vxstate, KERNEL_VXR);
+	chacha20_vx(dst, src, nbytes, key, counter);
+	kernel_fpu_end(&vxstate, KERNEL_VXR);
+
+	*counter += round_up(nbytes, CHACHA_BLOCK_SIZE) / CHACHA_BLOCK_SIZE;
+}
+
+static int chacha20_s390(struct skcipher_request *req)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
+	u32 state[CHACHA_STATE_WORDS] __aligned(16);
+	struct skcipher_walk walk;
+	unsigned int nbytes;
+	int rc;
+
+	rc = skcipher_walk_virt(&walk, req, false);
+	chacha_init_generic(state, ctx->key, req->iv);
+
+	while (walk.nbytes > 0) {
+		nbytes = walk.nbytes;
+		if (nbytes < walk.total)
+			nbytes = round_down(nbytes, walk.stride);
+
+		if (nbytes <= CHACHA_BLOCK_SIZE) {
+			chacha_crypt_generic(state, walk.dst.virt.addr,
+					     walk.src.virt.addr, nbytes,
+					     ctx->nrounds);
+		} else {
+			chacha20_crypt_s390(state, walk.dst.virt.addr,
+					    walk.src.virt.addr, nbytes,
+					    &state[4], &state[12]);
+		}
+		rc = skcipher_walk_done(&walk, walk.nbytes - nbytes);
+	}
+	return rc;
+}
+
+static struct skcipher_alg chacha_algs[] = {
+	{
+		.base.cra_name		= "chacha20",
+		.base.cra_driver_name	= "chacha20-s390",
+		.base.cra_priority	= 900,
+		.base.cra_blocksize	= 1,
+		.base.cra_ctxsize	= sizeof(struct chacha_ctx),
+		.base.cra_module	= THIS_MODULE,
+
+		.min_keysize		= CHACHA_KEY_SIZE,
+		.max_keysize		= CHACHA_KEY_SIZE,
+		.ivsize			= CHACHA_IV_SIZE,
+		.chunksize		= CHACHA_BLOCK_SIZE,
+		.setkey			= chacha20_setkey,
+		.encrypt		= chacha20_s390,
+		.decrypt		= chacha20_s390,
+	}
+};
+
+static int __init chacha_mod_init(void)
+{
+	return crypto_register_skciphers(chacha_algs, ARRAY_SIZE(chacha_algs));
+}
+
+static void __exit chacha_mod_fini(void)
+{
+	crypto_unregister_skciphers(chacha_algs, ARRAY_SIZE(chacha_algs));
+}
+
+module_cpu_feature_match(VXRS, chacha_mod_init);
+module_exit(chacha_mod_fini);
+
+MODULE_DESCRIPTION("ChaCha20 stream cipher");
+MODULE_LICENSE("GPL v2");
+
+MODULE_ALIAS_CRYPTO("chacha20");
diff --git a/arch/s390/crypto/chacha-s390.S b/arch/s390/crypto/chacha-s390.S
new file mode 100644
index 0000000..badf5c4
--- /dev/null
+++ b/arch/s390/crypto/chacha-s390.S
@@ -0,0 +1,907 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Original implementation written by Andy Polyakov, @dot-asm.
+ * This is an adaptation of the original code for kernel use.
+ *
+ * Copyright (C) 2006-2019 CRYPTOGAMS by <appro@openssl.org>. All Rights Reserved.
+ */
+
+#include <linux/linkage.h>
+#include <asm/nospec-insn.h>
+#include <asm/vx-insn.h>
+
+#define SP	%r15
+#define FRAME	(16 * 8 + 4 * 8)
+
+.data
+.align	32
+
+.Lsigma:
+.long	0x61707865,0x3320646e,0x79622d32,0x6b206574	# endian-neutral
+.long	1,0,0,0
+.long	2,0,0,0
+.long	3,0,0,0
+.long	0x03020100,0x07060504,0x0b0a0908,0x0f0e0d0c	# byte swap
+
+.long	0,1,2,3
+.long	0x61707865,0x61707865,0x61707865,0x61707865	# smashed sigma
+.long	0x3320646e,0x3320646e,0x3320646e,0x3320646e
+.long	0x79622d32,0x79622d32,0x79622d32,0x79622d32
+.long	0x6b206574,0x6b206574,0x6b206574,0x6b206574
+
+.previous
+
+	GEN_BR_THUNK %r14
+
+.text
+
+#############################################################################
+# void chacha20_vx_4x(u8 *out, counst u8 *inp, size_t len,
+#		      counst u32 *key, const u32 *counter)
+
+#define	OUT		%r2
+#define	INP		%r3
+#define	LEN		%r4
+#define	KEY		%r5
+#define	COUNTER		%r6
+
+#define BEPERM		%v31
+#define CTR		%v26
+
+#define K0		%v16
+#define K1		%v17
+#define K2		%v18
+#define K3		%v19
+
+#define XA0		%v0
+#define XA1		%v1
+#define XA2		%v2
+#define XA3		%v3
+
+#define XB0		%v4
+#define XB1		%v5
+#define XB2		%v6
+#define XB3		%v7
+
+#define XC0		%v8
+#define XC1		%v9
+#define XC2		%v10
+#define XC3		%v11
+
+#define XD0		%v12
+#define XD1		%v13
+#define XD2		%v14
+#define XD3		%v15
+
+#define XT0		%v27
+#define XT1		%v28
+#define XT2		%v29
+#define XT3		%v30
+
+ENTRY(chacha20_vx_4x)
+	stmg	%r6,%r7,6*8(SP)
+
+	larl	%r7,.Lsigma
+	lhi	%r0,10
+	lhi	%r1,0
+
+	VL	K0,0,,%r7		# load sigma
+	VL	K1,0,,KEY		# load key
+	VL	K2,16,,KEY
+	VL	K3,0,,COUNTER		# load counter
+
+	VL	BEPERM,0x40,,%r7
+	VL	CTR,0x50,,%r7
+
+	VLM	XA0,XA3,0x60,%r7,4	# load [smashed] sigma
+
+	VREPF	XB0,K1,0		# smash the key
+	VREPF	XB1,K1,1
+	VREPF	XB2,K1,2
+	VREPF	XB3,K1,3
+
+	VREPF	XD0,K3,0
+	VREPF	XD1,K3,1
+	VREPF	XD2,K3,2
+	VREPF	XD3,K3,3
+	VAF	XD0,XD0,CTR
+
+	VREPF	XC0,K2,0
+	VREPF	XC1,K2,1
+	VREPF	XC2,K2,2
+	VREPF	XC3,K2,3
+
+.Loop_4x:
+	VAF	XA0,XA0,XB0
+	VX	XD0,XD0,XA0
+	VERLLF	XD0,XD0,16
+
+	VAF	XA1,XA1,XB1
+	VX	XD1,XD1,XA1
+	VERLLF	XD1,XD1,16
+
+	VAF	XA2,XA2,XB2
+	VX	XD2,XD2,XA2
+	VERLLF	XD2,XD2,16
+
+	VAF	XA3,XA3,XB3
+	VX	XD3,XD3,XA3
+	VERLLF	XD3,XD3,16
+
+	VAF	XC0,XC0,XD0
+	VX	XB0,XB0,XC0
+	VERLLF	XB0,XB0,12
+
+	VAF	XC1,XC1,XD1
+	VX	XB1,XB1,XC1
+	VERLLF	XB1,XB1,12
+
+	VAF	XC2,XC2,XD2
+	VX	XB2,XB2,XC2
+	VERLLF	XB2,XB2,12
+
+	VAF	XC3,XC3,XD3
+	VX	XB3,XB3,XC3
+	VERLLF	XB3,XB3,12
+
+	VAF	XA0,XA0,XB0
+	VX	XD0,XD0,XA0
+	VERLLF	XD0,XD0,8
+
+	VAF	XA1,XA1,XB1
+	VX	XD1,XD1,XA1
+	VERLLF	XD1,XD1,8
+
+	VAF	XA2,XA2,XB2
+	VX	XD2,XD2,XA2
+	VERLLF	XD2,XD2,8
+
+	VAF	XA3,XA3,XB3
+	VX	XD3,XD3,XA3
+	VERLLF	XD3,XD3,8
+
+	VAF	XC0,XC0,XD0
+	VX	XB0,XB0,XC0
+	VERLLF	XB0,XB0,7
+
+	VAF	XC1,XC1,XD1
+	VX	XB1,XB1,XC1
+	VERLLF	XB1,XB1,7
+
+	VAF	XC2,XC2,XD2
+	VX	XB2,XB2,XC2
+	VERLLF	XB2,XB2,7
+
+	VAF	XC3,XC3,XD3
+	VX	XB3,XB3,XC3
+	VERLLF	XB3,XB3,7
+
+	VAF	XA0,XA0,XB1
+	VX	XD3,XD3,XA0
+	VERLLF	XD3,XD3,16
+
+	VAF	XA1,XA1,XB2
+	VX	XD0,XD0,XA1
+	VERLLF	XD0,XD0,16
+
+	VAF	XA2,XA2,XB3
+	VX	XD1,XD1,XA2
+	VERLLF	XD1,XD1,16
+
+	VAF	XA3,XA3,XB0
+	VX	XD2,XD2,XA3
+	VERLLF	XD2,XD2,16
+
+	VAF	XC2,XC2,XD3
+	VX	XB1,XB1,XC2
+	VERLLF	XB1,XB1,12
+
+	VAF	XC3,XC3,XD0
+	VX	XB2,XB2,XC3
+	VERLLF	XB2,XB2,12
+
+	VAF	XC0,XC0,XD1
+	VX	XB3,XB3,XC0
+	VERLLF	XB3,XB3,12
+
+	VAF	XC1,XC1,XD2
+	VX	XB0,XB0,XC1
+	VERLLF	XB0,XB0,12
+
+	VAF	XA0,XA0,XB1
+	VX	XD3,XD3,XA0
+	VERLLF	XD3,XD3,8
+
+	VAF	XA1,XA1,XB2
+	VX	XD0,XD0,XA1
+	VERLLF	XD0,XD0,8
+
+	VAF	XA2,XA2,XB3
+	VX	XD1,XD1,XA2
+	VERLLF	XD1,XD1,8
+
+	VAF	XA3,XA3,XB0
+	VX	XD2,XD2,XA3
+	VERLLF	XD2,XD2,8
+
+	VAF	XC2,XC2,XD3
+	VX	XB1,XB1,XC2
+	VERLLF	XB1,XB1,7
+
+	VAF	XC3,XC3,XD0
+	VX	XB2,XB2,XC3
+	VERLLF	XB2,XB2,7
+
+	VAF	XC0,XC0,XD1
+	VX	XB3,XB3,XC0
+	VERLLF	XB3,XB3,7
+
+	VAF	XC1,XC1,XD2
+	VX	XB0,XB0,XC1
+	VERLLF	XB0,XB0,7
+	brct	%r0,.Loop_4x
+
+	VAF	XD0,XD0,CTR
+
+	VMRHF	XT0,XA0,XA1		# transpose data
+	VMRHF	XT1,XA2,XA3
+	VMRLF	XT2,XA0,XA1
+	VMRLF	XT3,XA2,XA3
+	VPDI	XA0,XT0,XT1,0b0000
+	VPDI	XA1,XT0,XT1,0b0101
+	VPDI	XA2,XT2,XT3,0b0000
+	VPDI	XA3,XT2,XT3,0b0101
+
+	VMRHF	XT0,XB0,XB1
+	VMRHF	XT1,XB2,XB3
+	VMRLF	XT2,XB0,XB1
+	VMRLF	XT3,XB2,XB3
+	VPDI	XB0,XT0,XT1,0b0000
+	VPDI	XB1,XT0,XT1,0b0101
+	VPDI	XB2,XT2,XT3,0b0000
+	VPDI	XB3,XT2,XT3,0b0101
+
+	VMRHF	XT0,XC0,XC1
+	VMRHF	XT1,XC2,XC3
+	VMRLF	XT2,XC0,XC1
+	VMRLF	XT3,XC2,XC3
+	VPDI	XC0,XT0,XT1,0b0000
+	VPDI	XC1,XT0,XT1,0b0101
+	VPDI	XC2,XT2,XT3,0b0000
+	VPDI	XC3,XT2,XT3,0b0101
+
+	VMRHF	XT0,XD0,XD1
+	VMRHF	XT1,XD2,XD3
+	VMRLF	XT2,XD0,XD1
+	VMRLF	XT3,XD2,XD3
+	VPDI	XD0,XT0,XT1,0b0000
+	VPDI	XD1,XT0,XT1,0b0101
+	VPDI	XD2,XT2,XT3,0b0000
+	VPDI	XD3,XT2,XT3,0b0101
+
+	VAF	XA0,XA0,K0
+	VAF	XB0,XB0,K1
+	VAF	XC0,XC0,K2
+	VAF	XD0,XD0,K3
+
+	VPERM	XA0,XA0,XA0,BEPERM
+	VPERM	XB0,XB0,XB0,BEPERM
+	VPERM	XC0,XC0,XC0,BEPERM
+	VPERM	XD0,XD0,XD0,BEPERM
+
+	VLM	XT0,XT3,0,INP,0
+
+	VX	XT0,XT0,XA0
+	VX	XT1,XT1,XB0
+	VX	XT2,XT2,XC0
+	VX	XT3,XT3,XD0
+
+	VSTM	XT0,XT3,0,OUT,0
+
+	la	INP,0x40(INP)
+	la	OUT,0x40(OUT)
+	aghi	LEN,-0x40
+
+	VAF	XA0,XA1,K0
+	VAF	XB0,XB1,K1
+	VAF	XC0,XC1,K2
+	VAF	XD0,XD1,K3
+
+	VPERM	XA0,XA0,XA0,BEPERM
+	VPERM	XB0,XB0,XB0,BEPERM
+	VPERM	XC0,XC0,XC0,BEPERM
+	VPERM	XD0,XD0,XD0,BEPERM
+
+	.insn	rilu,0xc20e00000000,LEN,0x40	# clgfi LEN,0x40
+	jl	.Ltail_4x
+
+	VLM	XT0,XT3,0,INP,0
+
+	VX	XT0,XT0,XA0
+	VX	XT1,XT1,XB0
+	VX	XT2,XT2,XC0
+	VX	XT3,XT3,XD0
+
+	VSTM	XT0,XT3,0,OUT,0
+
+	la	INP,0x40(INP)
+	la	OUT,0x40(OUT)
+	aghi	LEN,-0x40
+	je	.Ldone_4x
+
+	VAF	XA0,XA2,K0
+	VAF	XB0,XB2,K1
+	VAF	XC0,XC2,K2
+	VAF	XD0,XD2,K3
+
+	VPERM	XA0,XA0,XA0,BEPERM
+	VPERM	XB0,XB0,XB0,BEPERM
+	VPERM	XC0,XC0,XC0,BEPERM
+	VPERM	XD0,XD0,XD0,BEPERM
+
+	.insn	rilu,0xc20e00000000,LEN,0x40	# clgfi LEN,0x40
+	jl	.Ltail_4x
+
+	VLM	XT0,XT3,0,INP,0
+
+	VX	XT0,XT0,XA0
+	VX	XT1,XT1,XB0
+	VX	XT2,XT2,XC0
+	VX	XT3,XT3,XD0
+
+	VSTM	XT0,XT3,0,OUT,0
+
+	la	INP,0x40(INP)
+	la	OUT,0x40(OUT)
+	aghi	LEN,-0x40
+	je	.Ldone_4x
+
+	VAF	XA0,XA3,K0
+	VAF	XB0,XB3,K1
+	VAF	XC0,XC3,K2
+	VAF	XD0,XD3,K3
+
+	VPERM	XA0,XA0,XA0,BEPERM
+	VPERM	XB0,XB0,XB0,BEPERM
+	VPERM	XC0,XC0,XC0,BEPERM
+	VPERM	XD0,XD0,XD0,BEPERM
+
+	.insn	rilu,0xc20e00000000,LEN,0x40	# clgfi LEN,0x40
+	jl	.Ltail_4x
+
+	VLM	XT0,XT3,0,INP,0
+
+	VX	XT0,XT0,XA0
+	VX	XT1,XT1,XB0
+	VX	XT2,XT2,XC0
+	VX	XT3,XT3,XD0
+
+	VSTM	XT0,XT3,0,OUT,0
+
+.Ldone_4x:
+	lmg	%r6,%r7,6*8(SP)
+	BR_EX	%r14
+
+.Ltail_4x:
+	VLR	XT0,XC0
+	VLR	XT1,XD0
+
+	VST	XA0,8*8+0x00,,SP
+	VST	XB0,8*8+0x10,,SP
+	VST	XT0,8*8+0x20,,SP
+	VST	XT1,8*8+0x30,,SP
+
+	lghi	%r1,0
+
+.Loop_tail_4x:
+	llgc	%r5,0(%r1,INP)
+	llgc	%r6,8*8(%r1,SP)
+	xr	%r6,%r5
+	stc	%r6,0(%r1,OUT)
+	la	%r1,1(%r1)
+	brct	LEN,.Loop_tail_4x
+
+	lmg	%r6,%r7,6*8(SP)
+	BR_EX	%r14
+ENDPROC(chacha20_vx_4x)
+
+#undef	OUT
+#undef	INP
+#undef	LEN
+#undef	KEY
+#undef	COUNTER
+
+#undef BEPERM
+
+#undef K0
+#undef K1
+#undef K2
+#undef K3
+
+
+#############################################################################
+# void chacha20_vx(u8 *out, counst u8 *inp, size_t len,
+#		   counst u32 *key, const u32 *counter)
+
+#define	OUT		%r2
+#define	INP		%r3
+#define	LEN		%r4
+#define	KEY		%r5
+#define	COUNTER		%r6
+
+#define BEPERM		%v31
+
+#define K0		%v27
+#define K1		%v24
+#define K2		%v25
+#define K3		%v26
+
+#define A0		%v0
+#define B0		%v1
+#define C0		%v2
+#define D0		%v3
+
+#define A1		%v4
+#define B1		%v5
+#define C1		%v6
+#define D1		%v7
+
+#define A2		%v8
+#define B2		%v9
+#define C2		%v10
+#define D2		%v11
+
+#define A3		%v12
+#define B3		%v13
+#define C3		%v14
+#define D3		%v15
+
+#define A4		%v16
+#define B4		%v17
+#define C4		%v18
+#define D4		%v19
+
+#define A5		%v20
+#define B5		%v21
+#define C5		%v22
+#define D5		%v23
+
+#define T0		%v27
+#define T1		%v28
+#define T2		%v29
+#define T3		%v30
+
+ENTRY(chacha20_vx)
+	.insn	rilu,0xc20e00000000,LEN,256	# clgfi LEN,256
+	jle	chacha20_vx_4x
+	stmg	%r6,%r7,6*8(SP)
+
+	lghi	%r1,-FRAME
+	lgr	%r0,SP
+	la	SP,0(%r1,SP)
+	stg	%r0,0(SP)		# back-chain
+
+	larl	%r7,.Lsigma
+	lhi	%r0,10
+
+	VLM	K1,K2,0,KEY,0		# load key
+	VL	K3,0,,COUNTER		# load counter
+
+	VLM	K0,BEPERM,0,%r7,4	# load sigma, increments, ...
+
+.Loop_outer_vx:
+	VLR	A0,K0
+	VLR	B0,K1
+	VLR	A1,K0
+	VLR	B1,K1
+	VLR	A2,K0
+	VLR	B2,K1
+	VLR	A3,K0
+	VLR	B3,K1
+	VLR	A4,K0
+	VLR	B4,K1
+	VLR	A5,K0
+	VLR	B5,K1
+
+	VLR	D0,K3
+	VAF	D1,K3,T1		# K[3]+1
+	VAF	D2,K3,T2		# K[3]+2
+	VAF	D3,K3,T3		# K[3]+3
+	VAF	D4,D2,T2		# K[3]+4
+	VAF	D5,D2,T3		# K[3]+5
+
+	VLR	C0,K2
+	VLR	C1,K2
+	VLR	C2,K2
+	VLR	C3,K2
+	VLR	C4,K2
+	VLR	C5,K2
+
+	VLR	T1,D1
+	VLR	T2,D2
+	VLR	T3,D3
+
+.Loop_vx:
+	VAF	A0,A0,B0
+	VAF	A1,A1,B1
+	VAF	A2,A2,B2
+	VAF	A3,A3,B3
+	VAF	A4,A4,B4
+	VAF	A5,A5,B5
+	VX	D0,D0,A0
+	VX	D1,D1,A1
+	VX	D2,D2,A2
+	VX	D3,D3,A3
+	VX	D4,D4,A4
+	VX	D5,D5,A5
+	VERLLF	D0,D0,16
+	VERLLF	D1,D1,16
+	VERLLF	D2,D2,16
+	VERLLF	D3,D3,16
+	VERLLF	D4,D4,16
+	VERLLF	D5,D5,16
+
+	VAF	C0,C0,D0
+	VAF	C1,C1,D1
+	VAF	C2,C2,D2
+	VAF	C3,C3,D3
+	VAF	C4,C4,D4
+	VAF	C5,C5,D5
+	VX	B0,B0,C0
+	VX	B1,B1,C1
+	VX	B2,B2,C2
+	VX	B3,B3,C3
+	VX	B4,B4,C4
+	VX	B5,B5,C5
+	VERLLF	B0,B0,12
+	VERLLF	B1,B1,12
+	VERLLF	B2,B2,12
+	VERLLF	B3,B3,12
+	VERLLF	B4,B4,12
+	VERLLF	B5,B5,12
+
+	VAF	A0,A0,B0
+	VAF	A1,A1,B1
+	VAF	A2,A2,B2
+	VAF	A3,A3,B3
+	VAF	A4,A4,B4
+	VAF	A5,A5,B5
+	VX	D0,D0,A0
+	VX	D1,D1,A1
+	VX	D2,D2,A2
+	VX	D3,D3,A3
+	VX	D4,D4,A4
+	VX	D5,D5,A5
+	VERLLF	D0,D0,8
+	VERLLF	D1,D1,8
+	VERLLF	D2,D2,8
+	VERLLF	D3,D3,8
+	VERLLF	D4,D4,8
+	VERLLF	D5,D5,8
+
+	VAF	C0,C0,D0
+	VAF	C1,C1,D1
+	VAF	C2,C2,D2
+	VAF	C3,C3,D3
+	VAF	C4,C4,D4
+	VAF	C5,C5,D5
+	VX	B0,B0,C0
+	VX	B1,B1,C1
+	VX	B2,B2,C2
+	VX	B3,B3,C3
+	VX	B4,B4,C4
+	VX	B5,B5,C5
+	VERLLF	B0,B0,7
+	VERLLF	B1,B1,7
+	VERLLF	B2,B2,7
+	VERLLF	B3,B3,7
+	VERLLF	B4,B4,7
+	VERLLF	B5,B5,7
+
+	VSLDB	C0,C0,C0,8
+	VSLDB	C1,C1,C1,8
+	VSLDB	C2,C2,C2,8
+	VSLDB	C3,C3,C3,8
+	VSLDB	C4,C4,C4,8
+	VSLDB	C5,C5,C5,8
+	VSLDB	B0,B0,B0,4
+	VSLDB	B1,B1,B1,4
+	VSLDB	B2,B2,B2,4
+	VSLDB	B3,B3,B3,4
+	VSLDB	B4,B4,B4,4
+	VSLDB	B5,B5,B5,4
+	VSLDB	D0,D0,D0,12
+	VSLDB	D1,D1,D1,12
+	VSLDB	D2,D2,D2,12
+	VSLDB	D3,D3,D3,12
+	VSLDB	D4,D4,D4,12
+	VSLDB	D5,D5,D5,12
+
+	VAF	A0,A0,B0
+	VAF	A1,A1,B1
+	VAF	A2,A2,B2
+	VAF	A3,A3,B3
+	VAF	A4,A4,B4
+	VAF	A5,A5,B5
+	VX	D0,D0,A0
+	VX	D1,D1,A1
+	VX	D2,D2,A2
+	VX	D3,D3,A3
+	VX	D4,D4,A4
+	VX	D5,D5,A5
+	VERLLF	D0,D0,16
+	VERLLF	D1,D1,16
+	VERLLF	D2,D2,16
+	VERLLF	D3,D3,16
+	VERLLF	D4,D4,16
+	VERLLF	D5,D5,16
+
+	VAF	C0,C0,D0
+	VAF	C1,C1,D1
+	VAF	C2,C2,D2
+	VAF	C3,C3,D3
+	VAF	C4,C4,D4
+	VAF	C5,C5,D5
+	VX	B0,B0,C0
+	VX	B1,B1,C1
+	VX	B2,B2,C2
+	VX	B3,B3,C3
+	VX	B4,B4,C4
+	VX	B5,B5,C5
+	VERLLF	B0,B0,12
+	VERLLF	B1,B1,12
+	VERLLF	B2,B2,12
+	VERLLF	B3,B3,12
+	VERLLF	B4,B4,12
+	VERLLF	B5,B5,12
+
+	VAF	A0,A0,B0
+	VAF	A1,A1,B1
+	VAF	A2,A2,B2
+	VAF	A3,A3,B3
+	VAF	A4,A4,B4
+	VAF	A5,A5,B5
+	VX	D0,D0,A0
+	VX	D1,D1,A1
+	VX	D2,D2,A2
+	VX	D3,D3,A3
+	VX	D4,D4,A4
+	VX	D5,D5,A5
+	VERLLF	D0,D0,8
+	VERLLF	D1,D1,8
+	VERLLF	D2,D2,8
+	VERLLF	D3,D3,8
+	VERLLF	D4,D4,8
+	VERLLF	D5,D5,8
+
+	VAF	C0,C0,D0
+	VAF	C1,C1,D1
+	VAF	C2,C2,D2
+	VAF	C3,C3,D3
+	VAF	C4,C4,D4
+	VAF	C5,C5,D5
+	VX	B0,B0,C0
+	VX	B1,B1,C1
+	VX	B2,B2,C2
+	VX	B3,B3,C3
+	VX	B4,B4,C4
+	VX	B5,B5,C5
+	VERLLF	B0,B0,7
+	VERLLF	B1,B1,7
+	VERLLF	B2,B2,7
+	VERLLF	B3,B3,7
+	VERLLF	B4,B4,7
+	VERLLF	B5,B5,7
+
+	VSLDB	C0,C0,C0,8
+	VSLDB	C1,C1,C1,8
+	VSLDB	C2,C2,C2,8
+	VSLDB	C3,C3,C3,8
+	VSLDB	C4,C4,C4,8
+	VSLDB	C5,C5,C5,8
+	VSLDB	B0,B0,B0,12
+	VSLDB	B1,B1,B1,12
+	VSLDB	B2,B2,B2,12
+	VSLDB	B3,B3,B3,12
+	VSLDB	B4,B4,B4,12
+	VSLDB	B5,B5,B5,12
+	VSLDB	D0,D0,D0,4
+	VSLDB	D1,D1,D1,4
+	VSLDB	D2,D2,D2,4
+	VSLDB	D3,D3,D3,4
+	VSLDB	D4,D4,D4,4
+	VSLDB	D5,D5,D5,4
+	brct	%r0,.Loop_vx
+
+	VAF	A0,A0,K0
+	VAF	B0,B0,K1
+	VAF	C0,C0,K2
+	VAF	D0,D0,K3
+	VAF	A1,A1,K0
+	VAF	D1,D1,T1		# +K[3]+1
+
+	VPERM	A0,A0,A0,BEPERM
+	VPERM	B0,B0,B0,BEPERM
+	VPERM	C0,C0,C0,BEPERM
+	VPERM	D0,D0,D0,BEPERM
+
+	.insn	rilu,0xc20e00000000,LEN,0x40	# clgfi LEN,0x40
+	jl	.Ltail_vx
+
+	VAF	D2,D2,T2		# +K[3]+2
+	VAF	D3,D3,T3		# +K[3]+3
+	VLM	T0,T3,0,INP,0
+
+	VX	A0,A0,T0
+	VX	B0,B0,T1
+	VX	C0,C0,T2
+	VX	D0,D0,T3
+
+	VLM	K0,T3,0,%r7,4		# re-load sigma and increments
+
+	VSTM	A0,D0,0,OUT,0
+
+	la	INP,0x40(INP)
+	la	OUT,0x40(OUT)
+	aghi	LEN,-0x40
+	je	.Ldone_vx
+
+	VAF	B1,B1,K1
+	VAF	C1,C1,K2
+
+	VPERM	A0,A1,A1,BEPERM
+	VPERM	B0,B1,B1,BEPERM
+	VPERM	C0,C1,C1,BEPERM
+	VPERM	D0,D1,D1,BEPERM
+
+	.insn	rilu,0xc20e00000000,LEN,0x40	# clgfi LEN,0x40
+	jl	.Ltail_vx
+
+	VLM	A1,D1,0,INP,0
+
+	VX	A0,A0,A1
+	VX	B0,B0,B1
+	VX	C0,C0,C1
+	VX	D0,D0,D1
+
+	VSTM	A0,D0,0,OUT,0
+
+	la	INP,0x40(INP)
+	la	OUT,0x40(OUT)
+	aghi	LEN,-0x40
+	je	.Ldone_vx
+
+	VAF	A2,A2,K0
+	VAF	B2,B2,K1
+	VAF	C2,C2,K2
+
+	VPERM	A0,A2,A2,BEPERM
+	VPERM	B0,B2,B2,BEPERM
+	VPERM	C0,C2,C2,BEPERM
+	VPERM	D0,D2,D2,BEPERM
+
+	.insn	rilu,0xc20e00000000,LEN,0x40	# clgfi LEN,0x40
+	jl	.Ltail_vx
+
+	VLM	A1,D1,0,INP,0
+
+	VX	A0,A0,A1
+	VX	B0,B0,B1
+	VX	C0,C0,C1
+	VX	D0,D0,D1
+
+	VSTM	A0,D0,0,OUT,0
+
+	la	INP,0x40(INP)
+	la	OUT,0x40(OUT)
+	aghi	LEN,-0x40
+	je	.Ldone_vx
+
+	VAF	A3,A3,K0
+	VAF	B3,B3,K1
+	VAF	C3,C3,K2
+	VAF	D2,K3,T3		# K[3]+3
+
+	VPERM	A0,A3,A3,BEPERM
+	VPERM	B0,B3,B3,BEPERM
+	VPERM	C0,C3,C3,BEPERM
+	VPERM	D0,D3,D3,BEPERM
+
+	.insn	rilu,0xc20e00000000,LEN,0x40	# clgfi LEN,0x40
+	jl	.Ltail_vx
+
+	VAF	D3,D2,T1		# K[3]+4
+	VLM	A1,D1,0,INP,0
+
+	VX	A0,A0,A1
+	VX	B0,B0,B1
+	VX	C0,C0,C1
+	VX	D0,D0,D1
+
+	VSTM	A0,D0,0,OUT,0
+
+	la	INP,0x40(INP)
+	la	OUT,0x40(OUT)
+	aghi	LEN,-0x40
+	je	.Ldone_vx
+
+	VAF	A4,A4,K0
+	VAF	B4,B4,K1
+	VAF	C4,C4,K2
+	VAF	D4,D4,D3		# +K[3]+4
+	VAF	D3,D3,T1		# K[3]+5
+	VAF	K3,D2,T3		# K[3]+=6
+
+	VPERM	A0,A4,A4,BEPERM
+	VPERM	B0,B4,B4,BEPERM
+	VPERM	C0,C4,C4,BEPERM
+	VPERM	D0,D4,D4,BEPERM
+
+	.insn	rilu,0xc20e00000000,LEN,0x40	# clgfi LEN,0x40
+	jl	.Ltail_vx
+
+	VLM	A1,D1,0,INP,0
+
+	VX	A0,A0,A1
+	VX	B0,B0,B1
+	VX	C0,C0,C1
+	VX	D0,D0,D1
+
+	VSTM	A0,D0,0,OUT,0
+
+	la	INP,0x40(INP)
+	la	OUT,0x40(OUT)
+	aghi	LEN,-0x40
+	je	.Ldone_vx
+
+	VAF	A5,A5,K0
+	VAF	B5,B5,K1
+	VAF	C5,C5,K2
+	VAF	D5,D5,D3		# +K[3]+5
+
+	VPERM	A0,A5,A5,BEPERM
+	VPERM	B0,B5,B5,BEPERM
+	VPERM	C0,C5,C5,BEPERM
+	VPERM	D0,D5,D5,BEPERM
+
+	.insn	rilu,0xc20e00000000,LEN,0x40	# clgfi LEN,0x40
+	jl	.Ltail_vx
+
+	VLM	A1,D1,0,INP,0
+
+	VX	A0,A0,A1
+	VX	B0,B0,B1
+	VX	C0,C0,C1
+	VX	D0,D0,D1
+
+	VSTM	A0,D0,0,OUT,0
+
+	la	INP,0x40(INP)
+	la	OUT,0x40(OUT)
+	lhi	%r0,10
+	aghi	LEN,-0x40
+	jne	.Loop_outer_vx
+
+.Ldone_vx:
+	lmg	%r6,%r7,FRAME+6*8(SP)
+	la	SP,FRAME(SP)
+	BR_EX	%r14
+
+.Ltail_vx:
+	VSTM	A0,D0,8*8,SP,3
+	lghi	%r1,0
+
+.Loop_tail_vx:
+	llgc	%r5,0(%r1,INP)
+	llgc	%r6,8*8(%r1,SP)
+	xr	%r6,%r5
+	stc	%r6,0(%r1,OUT)
+	la	%r1,1(%r1)
+	brct	LEN,.Loop_tail_vx
+
+	lmg	%r6,%r7,FRAME+6*8(SP)
+	la	SP,FRAME(SP)
+	BR_EX	%r14
+ENDPROC(chacha20_vx)
+
+.previous
diff --git a/arch/s390/crypto/chacha-s390.h b/arch/s390/crypto/chacha-s390.h
new file mode 100644
index 0000000..733744c
--- /dev/null
+++ b/arch/s390/crypto/chacha-s390.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * s390 ChaCha stream cipher.
+ *
+ * Copyright IBM Corp. 2021
+ */
+
+#ifndef _CHACHA_S390_H
+#define _CHACHA_S390_H
+
+void chacha20_vx(u8 *out, const u8 *inp, size_t len, const u32 *key,
+		 const u32 *counter);
+
+#endif /* _CHACHA_S390_H */
diff --git a/arch/s390/include/asm/ap.h b/arch/s390/include/asm/ap.h
index 3afbee2..c0c8a1f 100644
--- a/arch/s390/include/asm/ap.h
+++ b/arch/s390/include/asm/ap.h
@@ -12,6 +12,8 @@
 #ifndef _ASM_S390_AP_H_
 #define _ASM_S390_AP_H_
 
+#include <linux/io.h>
+
 /**
  * The ap_qid_t identifier of an ap queue.
  * If the AP facilities test (APFT) facility is available,
@@ -238,7 +240,7 @@ static inline struct ap_queue_status ap_aqic(ap_qid_t qid,
 		struct ap_qirq_ctrl qirqctrl;
 		struct ap_queue_status status;
 	} reg1;
-	void *reg2 = ind;
+	unsigned long reg2 = virt_to_phys(ind);
 
 	reg1.qirqctrl = qirqctrl;
 
diff --git a/arch/s390/include/asm/diag.h b/arch/s390/include/asm/diag.h
index b3a8cb4d..bdcd64f 100644
--- a/arch/s390/include/asm/diag.h
+++ b/arch/s390/include/asm/diag.h
@@ -47,8 +47,8 @@ static inline void diag10_range(unsigned long start_pfn, unsigned long num_pfn)
 {
 	unsigned long start_addr, end_addr;
 
-	start_addr = start_pfn << PAGE_SHIFT;
-	end_addr = (start_pfn + num_pfn - 1) << PAGE_SHIFT;
+	start_addr = pfn_to_phys(start_pfn);
+	end_addr = pfn_to_phys(start_pfn + num_pfn - 1);
 
 	diag_stat_inc(DIAG_STAT_X010);
 	asm volatile(
diff --git a/arch/s390/include/asm/nmi.h b/arch/s390/include/asm/nmi.h
index 2db45d7..55c9051 100644
--- a/arch/s390/include/asm/nmi.h
+++ b/arch/s390/include/asm/nmi.h
@@ -98,9 +98,9 @@ struct mcesa {
 
 struct pt_regs;
 
-void nmi_alloc_boot_cpu(struct lowcore *lc);
-int nmi_alloc_per_cpu(struct lowcore *lc);
-void nmi_free_per_cpu(struct lowcore *lc);
+void nmi_alloc_mcesa_early(u64 *mcesad);
+int nmi_alloc_mcesa(u64 *mcesad);
+void nmi_free_mcesa(u64 *mcesad);
 
 void s390_handle_mcck(void);
 void __s390_handle_mcck(void);
diff --git a/arch/s390/include/asm/pci_dma.h b/arch/s390/include/asm/pci_dma.h
index 3b8e89d..91e6342 100644
--- a/arch/s390/include/asm/pci_dma.h
+++ b/arch/s390/include/asm/pci_dma.h
@@ -97,23 +97,23 @@ static inline unsigned int calc_px(dma_addr_t ptr)
 	return ((unsigned long) ptr >> PAGE_SHIFT) & ZPCI_PT_MASK;
 }
 
-static inline void set_pt_pfaa(unsigned long *entry, void *pfaa)
+static inline void set_pt_pfaa(unsigned long *entry, phys_addr_t pfaa)
 {
 	*entry &= ZPCI_PTE_FLAG_MASK;
-	*entry |= ((unsigned long) pfaa & ZPCI_PTE_ADDR_MASK);
+	*entry |= (pfaa & ZPCI_PTE_ADDR_MASK);
 }
 
-static inline void set_rt_sto(unsigned long *entry, void *sto)
+static inline void set_rt_sto(unsigned long *entry, phys_addr_t sto)
 {
 	*entry &= ZPCI_RTE_FLAG_MASK;
-	*entry |= ((unsigned long) sto & ZPCI_RTE_ADDR_MASK);
+	*entry |= (sto & ZPCI_RTE_ADDR_MASK);
 	*entry |= ZPCI_TABLE_TYPE_RTX;
 }
 
-static inline void set_st_pto(unsigned long *entry, void *pto)
+static inline void set_st_pto(unsigned long *entry, phys_addr_t pto)
 {
 	*entry &= ZPCI_STE_FLAG_MASK;
-	*entry |= ((unsigned long) pto & ZPCI_STE_ADDR_MASK);
+	*entry |= (pto & ZPCI_STE_ADDR_MASK);
 	*entry |= ZPCI_TABLE_TYPE_SX;
 }
 
@@ -169,16 +169,19 @@ static inline int pt_entry_isvalid(unsigned long entry)
 
 static inline unsigned long *get_rt_sto(unsigned long entry)
 {
-	return ((entry & ZPCI_TABLE_TYPE_MASK) == ZPCI_TABLE_TYPE_RTX)
-		? (unsigned long *) (entry & ZPCI_RTE_ADDR_MASK)
-		: NULL;
+	if ((entry & ZPCI_TABLE_TYPE_MASK) == ZPCI_TABLE_TYPE_RTX)
+		return phys_to_virt(entry & ZPCI_RTE_ADDR_MASK);
+	else
+		return NULL;
+
 }
 
 static inline unsigned long *get_st_pto(unsigned long entry)
 {
-	return ((entry & ZPCI_TABLE_TYPE_MASK) == ZPCI_TABLE_TYPE_SX)
-		? (unsigned long *) (entry & ZPCI_STE_ADDR_MASK)
-		: NULL;
+	if ((entry & ZPCI_TABLE_TYPE_MASK) == ZPCI_TABLE_TYPE_SX)
+		return phys_to_virt(entry & ZPCI_STE_ADDR_MASK);
+	else
+		return NULL;
 }
 
 /* Prototypes */
@@ -186,7 +189,7 @@ void dma_free_seg_table(unsigned long);
 unsigned long *dma_alloc_cpu_table(void);
 void dma_cleanup_tables(unsigned long *);
 unsigned long *dma_walk_cpu_trans(unsigned long *rto, dma_addr_t dma_addr);
-void dma_update_cpu_trans(unsigned long *entry, void *page_addr, int flags);
+void dma_update_cpu_trans(unsigned long *entry, phys_addr_t page_addr, int flags);
 
 extern const struct dma_map_ops s390_pci_dma_ops;
 
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index f54c152..5581b64 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -88,11 +88,10 @@ extern void __bpon(void);
  * User space process size: 2GB for 31 bit, 4TB or 8PT for 64 bit.
  */
 
-#define TASK_SIZE_OF(tsk)	(test_tsk_thread_flag(tsk, TIF_31BIT) ? \
+#define TASK_SIZE		(test_thread_flag(TIF_31BIT) ? \
 					_REGION3_SIZE : TASK_SIZE_MAX)
 #define TASK_UNMAPPED_BASE	(test_thread_flag(TIF_31BIT) ? \
 					(_REGION3_SIZE >> 1) : (_REGION2_SIZE >> 1))
-#define TASK_SIZE		TASK_SIZE_OF(current)
 #define TASK_SIZE_MAX		(-PAGE_SIZE)
 
 #define STACK_TOP		(test_thread_flag(TIF_31BIT) ? \
diff --git a/arch/s390/include/asm/qdio.h b/arch/s390/include/asm/qdio.h
index 4b9b14b..54ae2dc 100644
--- a/arch/s390/include/asm/qdio.h
+++ b/arch/s390/include/asm/qdio.h
@@ -18,7 +18,6 @@
 #define QDIO_MAX_BUFFERS_MASK		(QDIO_MAX_BUFFERS_PER_Q - 1)
 #define QDIO_BUFNR(num)			((num) & QDIO_MAX_BUFFERS_MASK)
 #define QDIO_MAX_ELEMENTS_PER_BUFFER	16
-#define QDIO_SBAL_SIZE			256
 
 #define QDIO_QETH_QFMT			0
 #define QDIO_ZFCP_QFMT			1
@@ -92,8 +91,8 @@ struct qdr {
  * @pfmt: implementation dependent parameter format
  * @rflags: QEBSM
  * @ac: adapter characteristics
- * @isliba: absolute address of first input SLIB
- * @osliba: absolute address of first output SLIB
+ * @isliba: logical address of first input SLIB
+ * @osliba: logical address of first output SLIB
  * @ebcnam: adapter identifier in EBCDIC
  * @parm: implementation dependent parameters
  */
@@ -313,7 +312,7 @@ typedef void qdio_handler_t(struct ccw_device *, unsigned int, int,
  * @qib_rflags: rflags to set
  * @no_input_qs: number of input queues
  * @no_output_qs: number of output queues
- * @input_handler: handler to be called for input queues
+ * @input_handler: handler to be called for input queues, and device-wide errors
  * @output_handler: handler to be called for output queues
  * @irq_poll: Data IRQ polling handler
  * @scan_threshold: # of in-use buffers that triggers scan on output queue
@@ -337,9 +336,6 @@ struct qdio_initialize {
 	struct qdio_buffer ***output_sbal_addr_array;
 };
 
-#define QDIO_FLAG_SYNC_INPUT		0x01
-#define QDIO_FLAG_SYNC_OUTPUT		0x02
-
 int qdio_alloc_buffers(struct qdio_buffer **buf, unsigned int count);
 void qdio_free_buffers(struct qdio_buffer **buf, unsigned int count);
 void qdio_reset_buffers(struct qdio_buffer **buf, unsigned int count);
@@ -349,13 +345,18 @@ extern int qdio_allocate(struct ccw_device *cdev, unsigned int no_input_qs,
 extern int qdio_establish(struct ccw_device *cdev,
 			  struct qdio_initialize *init_data);
 extern int qdio_activate(struct ccw_device *);
-extern int do_QDIO(struct ccw_device *cdev, unsigned int callflags, int q_nr,
-		   unsigned int bufnr, unsigned int count, struct qaob *aob);
 extern int qdio_start_irq(struct ccw_device *cdev);
 extern int qdio_stop_irq(struct ccw_device *cdev);
-extern int qdio_inspect_queue(struct ccw_device *cdev, unsigned int nr,
-			      bool is_input, unsigned int *bufnr,
-			      unsigned int *error);
+extern int qdio_inspect_input_queue(struct ccw_device *cdev, unsigned int nr,
+				    unsigned int *bufnr, unsigned int *error);
+extern int qdio_inspect_output_queue(struct ccw_device *cdev, unsigned int nr,
+				     unsigned int *bufnr, unsigned int *error);
+extern int qdio_add_bufs_to_input_queue(struct ccw_device *cdev,
+					unsigned int q_nr, unsigned int bufnr,
+					unsigned int count);
+extern int qdio_add_bufs_to_output_queue(struct ccw_device *cdev,
+					 unsigned int q_nr, unsigned int bufnr,
+					 unsigned int count, struct qaob *aob);
 extern int qdio_shutdown(struct ccw_device *, int);
 extern int qdio_free(struct ccw_device *);
 extern int qdio_get_ssqd_desc(struct ccw_device *, struct qdio_ssqd_desc *);
diff --git a/arch/s390/include/asm/vx-insn.h b/arch/s390/include/asm/vx-insn.h
index 0c05a67..87e6cc2 100644
--- a/arch/s390/include/asm/vx-insn.h
+++ b/arch/s390/include/asm/vx-insn.h
@@ -372,6 +372,16 @@
 	MRXBOPC	\hint, 0x36, v1, v3
 .endm
 
+/* VECTOR STORE */
+.macro	VST	vr1, disp, index="%r0", base
+	VX_NUM	v1, \vr1
+	GR_NUM	x2, \index
+	GR_NUM	b2, \base	    /* Base register */
+	.word	0xE700 | ((v1&15) << 4) | (x2&15)
+	.word	(b2 << 12) | (\disp)
+	MRXBOPC	0, 0x0E, v1
+.endm
+
 /* VECTOR STORE MULTIPLE */
 .macro	VSTM	vfrom, vto, disp, base, hint=3
 	VX_NUM	v1, \vfrom
@@ -411,6 +421,81 @@
 	VUPLL	\vr1, \vr2, 2
 .endm
 
+/* VECTOR PERMUTE DOUBLEWORD IMMEDIATE */
+.macro	VPDI	vr1, vr2, vr3, m4
+	VX_NUM	v1, \vr1
+	VX_NUM	v2, \vr2
+	VX_NUM	v3, \vr3
+	.word	0xE700 | ((v1&15) << 4) | (v2&15)
+	.word	((v3&15) << 12)
+	MRXBOPC	\m4, 0x84, v1, v2, v3
+.endm
+
+/* VECTOR REPLICATE */
+.macro	VREP	vr1, vr3, imm2, m4
+	VX_NUM	v1, \vr1
+	VX_NUM	v3, \vr3
+	.word	0xE700 | ((v1&15) << 4) | (v3&15)
+	.word	\imm2
+	MRXBOPC	\m4, 0x4D, v1, v3
+.endm
+.macro	VREPB	vr1, vr3, imm2
+	VREP	\vr1, \vr3, \imm2, 0
+.endm
+.macro	VREPH	vr1, vr3, imm2
+	VREP	\vr1, \vr3, \imm2, 1
+.endm
+.macro	VREPF	vr1, vr3, imm2
+	VREP	\vr1, \vr3, \imm2, 2
+.endm
+.macro	VREPG	vr1, vr3, imm2
+	VREP	\vr1, \vr3, \imm2, 3
+.endm
+
+/* VECTOR MERGE HIGH */
+.macro	VMRH	vr1, vr2, vr3, m4
+	VX_NUM	v1, \vr1
+	VX_NUM	v2, \vr2
+	VX_NUM	v3, \vr3
+	.word	0xE700 | ((v1&15) << 4) | (v2&15)
+	.word	((v3&15) << 12)
+	MRXBOPC	\m4, 0x61, v1, v2, v3
+.endm
+.macro	VMRHB	vr1, vr2, vr3
+	VMRH	\vr1, \vr2, \vr3, 0
+.endm
+.macro	VMRHH	vr1, vr2, vr3
+	VMRH	\vr1, \vr2, \vr3, 1
+.endm
+.macro	VMRHF	vr1, vr2, vr3
+	VMRH	\vr1, \vr2, \vr3, 2
+.endm
+.macro	VMRHG	vr1, vr2, vr3
+	VMRH	\vr1, \vr2, \vr3, 3
+.endm
+
+/* VECTOR MERGE LOW */
+.macro	VMRL	vr1, vr2, vr3, m4
+	VX_NUM	v1, \vr1
+	VX_NUM	v2, \vr2
+	VX_NUM	v3, \vr3
+	.word	0xE700 | ((v1&15) << 4) | (v2&15)
+	.word	((v3&15) << 12)
+	MRXBOPC	\m4, 0x60, v1, v2, v3
+.endm
+.macro	VMRLB	vr1, vr2, vr3
+	VMRL	\vr1, \vr2, \vr3, 0
+.endm
+.macro	VMRLH	vr1, vr2, vr3
+	VMRL	\vr1, \vr2, \vr3, 1
+.endm
+.macro	VMRLF	vr1, vr2, vr3
+	VMRL	\vr1, \vr2, \vr3, 2
+.endm
+.macro	VMRLG	vr1, vr2, vr3
+	VMRL	\vr1, \vr2, \vr3, 3
+.endm
+
 
 /* Vector integer instructions */
 
@@ -557,5 +642,37 @@
 	VESRAV	\vr1, \vr2, \vr3, 3
 .endm
 
+/* VECTOR ELEMENT ROTATE LEFT LOGICAL */
+.macro	VERLL	vr1, vr3, disp, base="%r0", m4
+	VX_NUM	v1, \vr1
+	VX_NUM	v3, \vr3
+	GR_NUM	b2, \base
+	.word	0xE700 | ((v1&15) << 4) | (v3&15)
+	.word	(b2 << 12) | (\disp)
+	MRXBOPC	\m4, 0x33, v1, v3
+.endm
+.macro	VERLLB	vr1, vr3, disp, base="%r0"
+	VERLL	\vr1, \vr3, \disp, \base, 0
+.endm
+.macro	VERLLH	vr1, vr3, disp, base="%r0"
+	VERLL	\vr1, \vr3, \disp, \base, 1
+.endm
+.macro	VERLLF	vr1, vr3, disp, base="%r0"
+	VERLL	\vr1, \vr3, \disp, \base, 2
+.endm
+.macro	VERLLG	vr1, vr3, disp, base="%r0"
+	VERLL	\vr1, \vr3, \disp, \base, 3
+.endm
+
+/* VECTOR SHIFT LEFT DOUBLE BY BYTE */
+.macro	VSLDB	vr1, vr2, vr3, imm4
+	VX_NUM	v1, \vr1
+	VX_NUM	v2, \vr2
+	VX_NUM	v3, \vr3
+	.word	0xE700 | ((v1&15) << 4) | (v2&15)
+	.word	((v3&15) << 12) | (\imm4)
+	MRXBOPC	0, 0x77, v1, v2, v3
+.endm
+
 #endif	/* __ASSEMBLY__ */
 #endif	/* __ASM_S390_VX_INSN_H */
diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c
index 785d54c..af82021 100644
--- a/arch/s390/kernel/crash_dump.c
+++ b/arch/s390/kernel/crash_dump.c
@@ -60,7 +60,7 @@ struct save_area * __init save_area_alloc(bool is_boot_cpu)
 {
 	struct save_area *sa;
 
-	sa = (void *) memblock_phys_alloc(sizeof(*sa), 8);
+	sa = memblock_alloc(sizeof(*sa), 8);
 	if (!sa)
 		panic("Failed to allocate save area\n");
 
diff --git a/arch/s390/kernel/dis.c b/arch/s390/kernel/dis.c
index ec55154..90bbb4e 100644
--- a/arch/s390/kernel/dis.c
+++ b/arch/s390/kernel/dis.c
@@ -278,6 +278,7 @@ static const unsigned char formats[][6] = {
 	[INSTR_SIL_RDI]	     = { D_20, B_16, I16_32, 0, 0, 0 },
 	[INSTR_SIL_RDU]	     = { D_20, B_16, U16_32, 0, 0, 0 },
 	[INSTR_SIY_IRD]	     = { D20_20, B_16, I8_8, 0, 0, 0 },
+	[INSTR_SIY_RD]	     = { D20_20, B_16, 0, 0, 0, 0 },
 	[INSTR_SIY_URD]	     = { D20_20, B_16, U8_8, 0, 0, 0 },
 	[INSTR_SI_RD]	     = { D_20, B_16, 0, 0, 0, 0 },
 	[INSTR_SI_URD]	     = { D_20, B_16, U8_8, 0, 0, 0 },
diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c
index 0505e55..a16467b 100644
--- a/arch/s390/kernel/machine_kexec.c
+++ b/arch/s390/kernel/machine_kexec.c
@@ -86,7 +86,7 @@ static noinline void __machine_kdump(void *image)
 			continue;
 	}
 	/* Store status of the boot CPU */
-	mcesa = (struct mcesa *)(S390_lowcore.mcesad & MCESA_ORIGIN_MASK);
+	mcesa = __va(S390_lowcore.mcesad & MCESA_ORIGIN_MASK);
 	if (MACHINE_HAS_VX)
 		save_vx_regs((__vector128 *) mcesa->vector_save_area);
 	if (MACHINE_HAS_GS) {
diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c
index 20f8e18..1cf1e37 100644
--- a/arch/s390/kernel/nmi.c
+++ b/arch/s390/kernel/nmi.c
@@ -58,27 +58,27 @@ static inline unsigned long nmi_get_mcesa_size(void)
 
 /*
  * The initial machine check extended save area for the boot CPU.
- * It will be replaced by nmi_init() with an allocated structure.
- * The structure is required for machine check happening early in
- * the boot process.
+ * It will be replaced on the boot CPU reinit with an allocated
+ * structure. The structure is required for machine check happening
+ * early in the boot process.
  */
 static struct mcesa boot_mcesa __initdata __aligned(MCESA_MAX_SIZE);
 
-void __init nmi_alloc_boot_cpu(struct lowcore *lc)
+void __init nmi_alloc_mcesa_early(u64 *mcesad)
 {
 	if (!nmi_needs_mcesa())
 		return;
-	lc->mcesad = (unsigned long) &boot_mcesa;
+	*mcesad = __pa(&boot_mcesa);
 	if (MACHINE_HAS_GS)
-		lc->mcesad |= ilog2(MCESA_MAX_SIZE);
+		*mcesad |= ilog2(MCESA_MAX_SIZE);
 }
 
-static int __init nmi_init(void)
+static void __init nmi_alloc_cache(void)
 {
-	unsigned long origin, cr0, size;
+	unsigned long size;
 
 	if (!nmi_needs_mcesa())
-		return 0;
+		return;
 	size = nmi_get_mcesa_size();
 	if (size > MCESA_MIN_SIZE)
 		mcesa_origin_lc = ilog2(size);
@@ -86,40 +86,31 @@ static int __init nmi_init(void)
 	mcesa_cache = kmem_cache_create("nmi_save_areas", size, size, 0, NULL);
 	if (!mcesa_cache)
 		panic("Couldn't create nmi save area cache");
-	origin = (unsigned long) kmem_cache_alloc(mcesa_cache, GFP_KERNEL);
-	if (!origin)
-		panic("Couldn't allocate nmi save area");
-	/* The pointer is stored with mcesa_bits ORed in */
-	kmemleak_not_leak((void *) origin);
-	__ctl_store(cr0, 0, 0);
-	__ctl_clear_bit(0, 28); /* disable lowcore protection */
-	/* Replace boot_mcesa on the boot CPU */
-	S390_lowcore.mcesad = origin | mcesa_origin_lc;
-	__ctl_load(cr0, 0, 0);
-	return 0;
 }
-early_initcall(nmi_init);
 
-int nmi_alloc_per_cpu(struct lowcore *lc)
+int __ref nmi_alloc_mcesa(u64 *mcesad)
 {
 	unsigned long origin;
 
+	*mcesad = 0;
 	if (!nmi_needs_mcesa())
 		return 0;
+	if (!mcesa_cache)
+		nmi_alloc_cache();
 	origin = (unsigned long) kmem_cache_alloc(mcesa_cache, GFP_KERNEL);
 	if (!origin)
 		return -ENOMEM;
 	/* The pointer is stored with mcesa_bits ORed in */
 	kmemleak_not_leak((void *) origin);
-	lc->mcesad = origin | mcesa_origin_lc;
+	*mcesad = __pa(origin) | mcesa_origin_lc;
 	return 0;
 }
 
-void nmi_free_per_cpu(struct lowcore *lc)
+void nmi_free_mcesa(u64 *mcesad)
 {
 	if (!nmi_needs_mcesa())
 		return;
-	kmem_cache_free(mcesa_cache, (void *)(lc->mcesad & MCESA_ORIGIN_MASK));
+	kmem_cache_free(mcesa_cache, __va(*mcesad & MCESA_ORIGIN_MASK));
 }
 
 static notrace void s390_handle_damage(void)
@@ -246,7 +237,7 @@ static int notrace s390_validate_registers(union mci mci, int umode)
 			: "Q" (S390_lowcore.fpt_creg_save_area));
 	}
 
-	mcesa = (struct mcesa *)(S390_lowcore.mcesad & MCESA_ORIGIN_MASK);
+	mcesa = __va(S390_lowcore.mcesad & MCESA_ORIGIN_MASK);
 	if (!MACHINE_HAS_VX) {
 		/* Validate floating point registers */
 		asm volatile(
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index e8858b2..71d86f7 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -139,7 +139,6 @@ int copy_thread(unsigned long clone_flags, unsigned long new_stackp,
 				(unsigned long)__ret_from_fork;
 		frame->childregs.gprs[9] = new_stackp; /* function */
 		frame->childregs.gprs[10] = arg;
-		frame->childregs.gprs[11] = (unsigned long)do_exit;
 		frame->childregs.orig_gpr2 = -1;
 		frame->childregs.last_break = 1;
 		return 0;
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 225ab2d..f2c25d1 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -445,7 +445,7 @@ static void __init setup_lowcore_dat_off(void)
 	lc->lpp = LPP_MAGIC;
 	lc->machine_flags = S390_lowcore.machine_flags;
 	lc->preempt_count = S390_lowcore.preempt_count;
-	nmi_alloc_boot_cpu(lc);
+	nmi_alloc_mcesa_early(&lc->mcesad);
 	lc->sys_enter_timer = S390_lowcore.sys_enter_timer;
 	lc->exit_timer = S390_lowcore.exit_timer;
 	lc->user_timer = S390_lowcore.user_timer;
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 78a8ea6..2bad902 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -212,7 +212,7 @@ static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu)
 	lc->return_lpswe = gen_lpswe(__LC_RETURN_PSW);
 	lc->return_mcck_lpswe = gen_lpswe(__LC_RETURN_MCCK_PSW);
 	lc->preempt_count = PREEMPT_DISABLED;
-	if (nmi_alloc_per_cpu(lc))
+	if (nmi_alloc_mcesa(&lc->mcesad))
 		goto out;
 	lowcore_ptr[cpu] = lc;
 	pcpu_sigp_retry(pcpu, SIGP_SET_PREFIX, (u32)(unsigned long) lc);
@@ -239,7 +239,7 @@ static void pcpu_free_lowcore(struct pcpu *pcpu)
 	mcck_stack = lc->mcck_stack - STACK_INIT_OFFSET;
 	pcpu_sigp_retry(pcpu, SIGP_SET_PREFIX, 0);
 	lowcore_ptr[cpu] = NULL;
-	nmi_free_per_cpu(lc);
+	nmi_free_mcesa(&lc->mcesad);
 	stack_free(async_stack);
 	stack_free(mcck_stack);
 	free_pages(nodat_stack, THREAD_SIZE_ORDER);
@@ -622,7 +622,7 @@ int smp_store_status(int cpu)
 		return -EIO;
 	if (!MACHINE_HAS_VX && !MACHINE_HAS_GS)
 		return 0;
-	pa = __pa(lc->mcesad & MCESA_ORIGIN_MASK);
+	pa = lc->mcesad & MCESA_ORIGIN_MASK;
 	if (MACHINE_HAS_GS)
 		pa |= lc->mcesad & MCESA_LC_MASK;
 	if (__pcpu_sigp_relax(pcpu->address, SIGP_STORE_ADDITIONAL_STATUS,
@@ -658,26 +658,22 @@ int smp_store_status(int cpu)
  *    deactivates the elfcorehdr= kernel parameter
  */
 static __init void smp_save_cpu_vxrs(struct save_area *sa, u16 addr,
-				     bool is_boot_cpu, unsigned long page)
+				     bool is_boot_cpu, __vector128 *vxrs)
 {
-	__vector128 *vxrs = (__vector128 *) page;
-
 	if (is_boot_cpu)
 		vxrs = boot_cpu_vector_save_area;
 	else
-		__pcpu_sigp_relax(addr, SIGP_STORE_ADDITIONAL_STATUS, page);
+		__pcpu_sigp_relax(addr, SIGP_STORE_ADDITIONAL_STATUS, __pa(vxrs));
 	save_area_add_vxrs(sa, vxrs);
 }
 
 static __init void smp_save_cpu_regs(struct save_area *sa, u16 addr,
-				     bool is_boot_cpu, unsigned long page)
+				     bool is_boot_cpu, void *regs)
 {
-	void *regs = (void *) page;
-
 	if (is_boot_cpu)
 		copy_oldmem_kernel(regs, (void *) __LC_FPREGS_SAVE_AREA, 512);
 	else
-		__pcpu_sigp_relax(addr, SIGP_STORE_STATUS_AT_ADDRESS, page);
+		__pcpu_sigp_relax(addr, SIGP_STORE_STATUS_AT_ADDRESS, __pa(regs));
 	save_area_add_regs(sa, regs);
 }
 
@@ -685,14 +681,14 @@ void __init smp_save_dump_cpus(void)
 {
 	int addr, boot_cpu_addr, max_cpu_addr;
 	struct save_area *sa;
-	unsigned long page;
 	bool is_boot_cpu;
+	void *page;
 
 	if (!(oldmem_data.start || is_ipl_type_dump()))
 		/* No previous system present, normal boot. */
 		return;
 	/* Allocate a page as dumping area for the store status sigps */
-	page = memblock_phys_alloc_range(PAGE_SIZE, PAGE_SIZE, 0, 1UL << 31);
+	page = memblock_alloc_low(PAGE_SIZE, PAGE_SIZE);
 	if (!page)
 		panic("ERROR: Failed to allocate %lx bytes below %lx\n",
 		      PAGE_SIZE, 1UL << 31);
@@ -723,7 +719,7 @@ void __init smp_save_dump_cpus(void)
 			/* Get the CPU registers */
 			smp_save_cpu_regs(sa, addr, is_boot_cpu, page);
 	}
-	memblock_phys_free(page, PAGE_SIZE);
+	memblock_free(page, PAGE_SIZE);
 	diag_amode31_ops.diag308_reset();
 	pcpu_set_smt(0);
 }
@@ -880,7 +876,7 @@ void __init smp_detect_cpus(void)
 
 	/* Add CPUs present at boot */
 	__smp_rescan_cpus(info, true);
-	memblock_phys_free((unsigned long)info, sizeof(*info));
+	memblock_free(info, sizeof(*info));
 }
 
 /*
@@ -1271,14 +1267,15 @@ static int __init smp_reinit_ipl_cpu(void)
 {
 	unsigned long async_stack, nodat_stack, mcck_stack;
 	struct lowcore *lc, *lc_ipl;
-	unsigned long flags;
+	unsigned long flags, cr0;
+	u64 mcesad;
 
 	lc_ipl = lowcore_ptr[0];
 	lc = (struct lowcore *)	__get_free_pages(GFP_KERNEL | GFP_DMA, LC_ORDER);
 	nodat_stack = __get_free_pages(GFP_KERNEL, THREAD_SIZE_ORDER);
 	async_stack = stack_alloc();
 	mcck_stack = stack_alloc();
-	if (!lc || !nodat_stack || !async_stack || !mcck_stack)
+	if (!lc || !nodat_stack || !async_stack || !mcck_stack || nmi_alloc_mcesa(&mcesad))
 		panic("Couldn't allocate memory");
 
 	local_irq_save(flags);
@@ -1287,6 +1284,10 @@ static int __init smp_reinit_ipl_cpu(void)
 	S390_lowcore.nodat_stack = nodat_stack + STACK_INIT_OFFSET;
 	S390_lowcore.async_stack = async_stack + STACK_INIT_OFFSET;
 	S390_lowcore.mcck_stack = mcck_stack + STACK_INIT_OFFSET;
+	__ctl_store(cr0, 0, 0);
+	__ctl_clear_bit(0, 28); /* disable lowcore protection */
+	S390_lowcore.mcesad = mcesad;
+	__ctl_load(cr0, 0, 0);
 	lowcore_ptr[0] = lc;
 	local_mcck_enable();
 	local_irq_restore(flags);
diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c
index 386d4e4..a542507 100644
--- a/arch/s390/kernel/uv.c
+++ b/arch/s390/kernel/uv.c
@@ -30,7 +30,7 @@ int __bootdata_preserved(prot_virt_host);
 EXPORT_SYMBOL(prot_virt_host);
 EXPORT_SYMBOL(uv_info);
 
-static int __init uv_init(unsigned long stor_base, unsigned long stor_len)
+static int __init uv_init(phys_addr_t stor_base, unsigned long stor_len)
 {
 	struct uv_cb_init uvcb = {
 		.header.cmd = UVC_CMD_INIT_UV,
@@ -49,12 +49,12 @@ static int __init uv_init(unsigned long stor_base, unsigned long stor_len)
 
 void __init setup_uv(void)
 {
-	unsigned long uv_stor_base;
+	void *uv_stor_base;
 
 	if (!is_prot_virt_host())
 		return;
 
-	uv_stor_base = (unsigned long)memblock_alloc_try_nid(
+	uv_stor_base = memblock_alloc_try_nid(
 		uv_info.uv_base_stor_len, SZ_1M, SZ_2G,
 		MEMBLOCK_ALLOC_ACCESSIBLE, NUMA_NO_NODE);
 	if (!uv_stor_base) {
@@ -63,8 +63,8 @@ void __init setup_uv(void)
 		goto fail;
 	}
 
-	if (uv_init(uv_stor_base, uv_info.uv_base_stor_len)) {
-		memblock_phys_free(uv_stor_base, uv_info.uv_base_stor_len);
+	if (uv_init(__pa(uv_stor_base), uv_info.uv_base_stor_len)) {
+		memblock_free(uv_stor_base, uv_info.uv_base_stor_len);
 		goto fail;
 	}
 
diff --git a/arch/s390/mm/cmm.c b/arch/s390/mm/cmm.c
index 2203164..9141ed4 100644
--- a/arch/s390/mm/cmm.c
+++ b/arch/s390/mm/cmm.c
@@ -90,7 +90,7 @@ static long cmm_alloc_pages(long nr, long *counter,
 			} else
 				free_page((unsigned long) npa);
 		}
-		diag10_range(addr >> PAGE_SHIFT, 1);
+		diag10_range(virt_to_pfn(addr), 1);
 		pa->pages[pa->index++] = addr;
 		(*counter)++;
 		spin_unlock(&cmm_lock);
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index d30f598..6ed2886 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -115,7 +115,7 @@ static void dump_pagetable(unsigned long asce, unsigned long address)
 		pr_cont("R1:%016lx ", *table);
 		if (*table & _REGION_ENTRY_INVALID)
 			goto out;
-		table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
+		table = __va(*table & _REGION_ENTRY_ORIGIN);
 		fallthrough;
 	case _ASCE_TYPE_REGION2:
 		table += (address & _REGION2_INDEX) >> _REGION2_SHIFT;
@@ -124,7 +124,7 @@ static void dump_pagetable(unsigned long asce, unsigned long address)
 		pr_cont("R2:%016lx ", *table);
 		if (*table & _REGION_ENTRY_INVALID)
 			goto out;
-		table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
+		table = __va(*table & _REGION_ENTRY_ORIGIN);
 		fallthrough;
 	case _ASCE_TYPE_REGION3:
 		table += (address & _REGION3_INDEX) >> _REGION3_SHIFT;
@@ -133,7 +133,7 @@ static void dump_pagetable(unsigned long asce, unsigned long address)
 		pr_cont("R3:%016lx ", *table);
 		if (*table & (_REGION_ENTRY_INVALID | _REGION3_ENTRY_LARGE))
 			goto out;
-		table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
+		table = __va(*table & _REGION_ENTRY_ORIGIN);
 		fallthrough;
 	case _ASCE_TYPE_SEGMENT:
 		table += (address & _SEGMENT_INDEX) >> _SEGMENT_SHIFT;
@@ -142,7 +142,7 @@ static void dump_pagetable(unsigned long asce, unsigned long address)
 		pr_cont("S:%016lx ", *table);
 		if (*table & (_SEGMENT_ENTRY_INVALID | _SEGMENT_ENTRY_LARGE))
 			goto out;
-		table = (unsigned long *)(*table & _SEGMENT_ENTRY_ORIGIN);
+		table = __va(*table & _SEGMENT_ENTRY_ORIGIN);
 	}
 	table += (address & _PAGE_INDEX) >> _PAGE_SHIFT;
 	if (bad_address(table))
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index 8c6f258..86ffd0d 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -215,6 +215,9 @@ void free_initmem(void)
 	__set_memory((unsigned long)_sinittext,
 		     (unsigned long)(_einittext - _sinittext) >> PAGE_SHIFT,
 		     SET_MEMORY_RW | SET_MEMORY_NX);
+	free_reserved_area(sclp_early_sccb,
+			   sclp_early_sccb + EXT_SCCB_READ_SCP,
+			   POISON_FREE_INITMEM, "unused early sccb");
 	free_initmem_default(POISON_FREE_INITMEM);
 }
 
diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c
index 781965f..fd35c1a 100644
--- a/arch/s390/mm/pgalloc.c
+++ b/arch/s390/mm/pgalloc.c
@@ -176,7 +176,75 @@ void page_table_free_pgste(struct page *page)
 #endif /* CONFIG_PGSTE */
 
 /*
- * page table entry allocation/free routines.
+ * A 2KB-pgtable is either upper or lower half of a normal page.
+ * The second half of the page may be unused or used as another
+ * 2KB-pgtable.
+ *
+ * Whenever possible the parent page for a new 2KB-pgtable is picked
+ * from the list of partially allocated pages mm_context_t::pgtable_list.
+ * In case the list is empty a new parent page is allocated and added to
+ * the list.
+ *
+ * When a parent page gets fully allocated it contains 2KB-pgtables in both
+ * upper and lower halves and is removed from mm_context_t::pgtable_list.
+ *
+ * When 2KB-pgtable is freed from to fully allocated parent page that
+ * page turns partially allocated and added to mm_context_t::pgtable_list.
+ *
+ * If 2KB-pgtable is freed from the partially allocated parent page that
+ * page turns unused and gets removed from mm_context_t::pgtable_list.
+ * Furthermore, the unused parent page is released.
+ *
+ * As follows from the above, no unallocated or fully allocated parent
+ * pages are contained in mm_context_t::pgtable_list.
+ *
+ * The upper byte (bits 24-31) of the parent page _refcount is used
+ * for tracking contained 2KB-pgtables and has the following format:
+ *
+ *   PP  AA
+ * 01234567    upper byte (bits 24-31) of struct page::_refcount
+ *   ||  ||
+ *   ||  |+--- upper 2KB-pgtable is allocated
+ *   ||  +---- lower 2KB-pgtable is allocated
+ *   |+------- upper 2KB-pgtable is pending for removal
+ *   +-------- lower 2KB-pgtable is pending for removal
+ *
+ * (See commit 620b4e903179 ("s390: use _refcount for pgtables") on why
+ * using _refcount is possible).
+ *
+ * When 2KB-pgtable is allocated the corresponding AA bit is set to 1.
+ * The parent page is either:
+ *   - added to mm_context_t::pgtable_list in case the second half of the
+ *     parent page is still unallocated;
+ *   - removed from mm_context_t::pgtable_list in case both hales of the
+ *     parent page are allocated;
+ * These operations are protected with mm_context_t::lock.
+ *
+ * When 2KB-pgtable is deallocated the corresponding AA bit is set to 0
+ * and the corresponding PP bit is set to 1 in a single atomic operation.
+ * Thus, PP and AA bits corresponding to the same 2KB-pgtable are mutually
+ * exclusive and may never be both set to 1!
+ * The parent page is either:
+ *   - added to mm_context_t::pgtable_list in case the second half of the
+ *     parent page is still allocated;
+ *   - removed from mm_context_t::pgtable_list in case the second half of
+ *     the parent page is unallocated;
+ * These operations are protected with mm_context_t::lock.
+ *
+ * It is important to understand that mm_context_t::lock only protects
+ * mm_context_t::pgtable_list and AA bits, but not the parent page itself
+ * and PP bits.
+ *
+ * Releasing the parent page happens whenever the PP bit turns from 1 to 0,
+ * while both AA bits and the second PP bit are already unset. Then the
+ * parent page does not contain any 2KB-pgtable fragment anymore, and it has
+ * also been removed from mm_context_t::pgtable_list. It is safe to release
+ * the page therefore.
+ *
+ * PGSTE memory spaces use full 4KB-pgtables and do not need most of the
+ * logic described above. Both AA bits are set to 1 to denote a 4KB-pgtable
+ * while the PP bits are never used, nor such a page is added to or removed
+ * from mm_context_t::pgtable_list.
  */
 unsigned long *page_table_alloc(struct mm_struct *mm)
 {
@@ -192,14 +260,23 @@ unsigned long *page_table_alloc(struct mm_struct *mm)
 			page = list_first_entry(&mm->context.pgtable_list,
 						struct page, lru);
 			mask = atomic_read(&page->_refcount) >> 24;
-			mask = (mask | (mask >> 4)) & 3;
-			if (mask != 3) {
+			/*
+			 * The pending removal bits must also be checked.
+			 * Failure to do so might lead to an impossible
+			 * value of (i.e 0x13 or 0x23) written to _refcount.
+			 * Such values violate the assumption that pending and
+			 * allocation bits are mutually exclusive, and the rest
+			 * of the code unrails as result. That could lead to
+			 * a whole bunch of races and corruptions.
+			 */
+			mask = (mask | (mask >> 4)) & 0x03U;
+			if (mask != 0x03U) {
 				table = (unsigned long *) page_to_virt(page);
 				bit = mask & 1;		/* =1 -> second 2K */
 				if (bit)
 					table += PTRS_PER_PTE;
 				atomic_xor_bits(&page->_refcount,
-							1U << (bit + 24));
+							0x01U << (bit + 24));
 				list_del(&page->lru);
 			}
 		}
@@ -220,12 +297,12 @@ unsigned long *page_table_alloc(struct mm_struct *mm)
 	table = (unsigned long *) page_to_virt(page);
 	if (mm_alloc_pgste(mm)) {
 		/* Return 4K page table with PGSTEs */
-		atomic_xor_bits(&page->_refcount, 3 << 24);
+		atomic_xor_bits(&page->_refcount, 0x03U << 24);
 		memset64((u64 *)table, _PAGE_INVALID, PTRS_PER_PTE);
 		memset64((u64 *)table + PTRS_PER_PTE, 0, PTRS_PER_PTE);
 	} else {
 		/* Return the first 2K fragment of the page */
-		atomic_xor_bits(&page->_refcount, 1 << 24);
+		atomic_xor_bits(&page->_refcount, 0x01U << 24);
 		memset64((u64 *)table, _PAGE_INVALID, 2 * PTRS_PER_PTE);
 		spin_lock_bh(&mm->context.lock);
 		list_add(&page->lru, &mm->context.pgtable_list);
@@ -234,29 +311,53 @@ unsigned long *page_table_alloc(struct mm_struct *mm)
 	return table;
 }
 
+static void page_table_release_check(struct page *page, void *table,
+				     unsigned int half, unsigned int mask)
+{
+	char msg[128];
+
+	if (!IS_ENABLED(CONFIG_DEBUG_VM) || !mask)
+		return;
+	snprintf(msg, sizeof(msg),
+		 "Invalid pgtable %p release half 0x%02x mask 0x%02x",
+		 table, half, mask);
+	dump_page(page, msg);
+}
+
 void page_table_free(struct mm_struct *mm, unsigned long *table)
 {
+	unsigned int mask, bit, half;
 	struct page *page;
-	unsigned int bit, mask;
 
 	page = virt_to_page(table);
 	if (!mm_alloc_pgste(mm)) {
 		/* Free 2K page table fragment of a 4K page */
 		bit = ((unsigned long) table & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t));
 		spin_lock_bh(&mm->context.lock);
-		mask = atomic_xor_bits(&page->_refcount, 1U << (bit + 24));
+		/*
+		 * Mark the page for delayed release. The actual release
+		 * will happen outside of the critical section from this
+		 * function or from __tlb_remove_table()
+		 */
+		mask = atomic_xor_bits(&page->_refcount, 0x11U << (bit + 24));
 		mask >>= 24;
-		if (mask & 3)
+		if (mask & 0x03U)
 			list_add(&page->lru, &mm->context.pgtable_list);
 		else
 			list_del(&page->lru);
 		spin_unlock_bh(&mm->context.lock);
-		if (mask != 0)
+		mask = atomic_xor_bits(&page->_refcount, 0x10U << (bit + 24));
+		mask >>= 24;
+		if (mask != 0x00U)
 			return;
+		half = 0x01U << bit;
 	} else {
-		atomic_xor_bits(&page->_refcount, 3U << 24);
+		half = 0x03U;
+		mask = atomic_xor_bits(&page->_refcount, 0x03U << 24);
+		mask >>= 24;
 	}
 
+	page_table_release_check(page, table, half, mask);
 	pgtable_pte_page_dtor(page);
 	__free_page(page);
 }
@@ -272,47 +373,54 @@ void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table,
 	page = virt_to_page(table);
 	if (mm_alloc_pgste(mm)) {
 		gmap_unlink(mm, table, vmaddr);
-		table = (unsigned long *) ((unsigned long)table | 3);
+		table = (unsigned long *) ((unsigned long)table | 0x03U);
 		tlb_remove_table(tlb, table);
 		return;
 	}
 	bit = ((unsigned long) table & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t));
 	spin_lock_bh(&mm->context.lock);
+	/*
+	 * Mark the page for delayed release. The actual release will happen
+	 * outside of the critical section from __tlb_remove_table() or from
+	 * page_table_free()
+	 */
 	mask = atomic_xor_bits(&page->_refcount, 0x11U << (bit + 24));
 	mask >>= 24;
-	if (mask & 3)
+	if (mask & 0x03U)
 		list_add_tail(&page->lru, &mm->context.pgtable_list);
 	else
 		list_del(&page->lru);
 	spin_unlock_bh(&mm->context.lock);
-	table = (unsigned long *) ((unsigned long) table | (1U << bit));
+	table = (unsigned long *) ((unsigned long) table | (0x01U << bit));
 	tlb_remove_table(tlb, table);
 }
 
 void __tlb_remove_table(void *_table)
 {
-	unsigned int mask = (unsigned long) _table & 3;
+	unsigned int mask = (unsigned long) _table & 0x03U, half = mask;
 	void *table = (void *)((unsigned long) _table ^ mask);
 	struct page *page = virt_to_page(table);
 
-	switch (mask) {
-	case 0:		/* pmd, pud, or p4d */
+	switch (half) {
+	case 0x00U:	/* pmd, pud, or p4d */
 		free_pages((unsigned long) table, 2);
-		break;
-	case 1:		/* lower 2K of a 4K page table */
-	case 2:		/* higher 2K of a 4K page table */
+		return;
+	case 0x01U:	/* lower 2K of a 4K page table */
+	case 0x02U:	/* higher 2K of a 4K page table */
 		mask = atomic_xor_bits(&page->_refcount, mask << (4 + 24));
 		mask >>= 24;
-		if (mask != 0)
-			break;
-		fallthrough;
-	case 3:		/* 4K page table with pgstes */
-		if (mask & 3)
-			atomic_xor_bits(&page->_refcount, 3 << 24);
-		pgtable_pte_page_dtor(page);
-		__free_page(page);
+		if (mask != 0x00U)
+			return;
+		break;
+	case 0x03U:	/* 4K page table with pgstes */
+		mask = atomic_xor_bits(&page->_refcount, 0x03U << 24);
+		mask >>= 24;
 		break;
 	}
+
+	page_table_release_check(page, table, half, mask);
+	pgtable_pte_page_dtor(page);
+	__free_page(page);
 }
 
 /*
@@ -322,34 +430,34 @@ void __tlb_remove_table(void *_table)
 
 static struct kmem_cache *base_pgt_cache;
 
-static unsigned long base_pgt_alloc(void)
+static unsigned long *base_pgt_alloc(void)
 {
-	u64 *table;
+	unsigned long *table;
 
 	table = kmem_cache_alloc(base_pgt_cache, GFP_KERNEL);
 	if (table)
-		memset64(table, _PAGE_INVALID, PTRS_PER_PTE);
-	return (unsigned long) table;
-}
-
-static void base_pgt_free(unsigned long table)
-{
-	kmem_cache_free(base_pgt_cache, (void *) table);
-}
-
-static unsigned long base_crst_alloc(unsigned long val)
-{
-	unsigned long table;
-
-	table =	 __get_free_pages(GFP_KERNEL, CRST_ALLOC_ORDER);
-	if (table)
-		crst_table_init((unsigned long *)table, val);
+		memset64((u64 *)table, _PAGE_INVALID, PTRS_PER_PTE);
 	return table;
 }
 
-static void base_crst_free(unsigned long table)
+static void base_pgt_free(unsigned long *table)
 {
-	free_pages(table, CRST_ALLOC_ORDER);
+	kmem_cache_free(base_pgt_cache, table);
+}
+
+static unsigned long *base_crst_alloc(unsigned long val)
+{
+	unsigned long *table;
+
+	table =	(unsigned long *)__get_free_pages(GFP_KERNEL, CRST_ALLOC_ORDER);
+	if (table)
+		crst_table_init(table, val);
+	return table;
+}
+
+static void base_crst_free(unsigned long *table)
+{
+	free_pages((unsigned long)table, CRST_ALLOC_ORDER);
 }
 
 #define BASE_ADDR_END_FUNC(NAME, SIZE)					\
@@ -377,14 +485,14 @@ static inline unsigned long base_lra(unsigned long address)
 	return real;
 }
 
-static int base_page_walk(unsigned long origin, unsigned long addr,
+static int base_page_walk(unsigned long *origin, unsigned long addr,
 			  unsigned long end, int alloc)
 {
 	unsigned long *pte, next;
 
 	if (!alloc)
 		return 0;
-	pte = (unsigned long *) origin;
+	pte = origin;
 	pte += (addr & _PAGE_INDEX) >> _PAGE_SHIFT;
 	do {
 		next = base_page_addr_end(addr, end);
@@ -393,13 +501,13 @@ static int base_page_walk(unsigned long origin, unsigned long addr,
 	return 0;
 }
 
-static int base_segment_walk(unsigned long origin, unsigned long addr,
+static int base_segment_walk(unsigned long *origin, unsigned long addr,
 			     unsigned long end, int alloc)
 {
-	unsigned long *ste, next, table;
+	unsigned long *ste, next, *table;
 	int rc;
 
-	ste = (unsigned long *) origin;
+	ste = origin;
 	ste += (addr & _SEGMENT_INDEX) >> _SEGMENT_SHIFT;
 	do {
 		next = base_segment_addr_end(addr, end);
@@ -409,9 +517,9 @@ static int base_segment_walk(unsigned long origin, unsigned long addr,
 			table = base_pgt_alloc();
 			if (!table)
 				return -ENOMEM;
-			*ste = table | _SEGMENT_ENTRY;
+			*ste = __pa(table) | _SEGMENT_ENTRY;
 		}
-		table = *ste & _SEGMENT_ENTRY_ORIGIN;
+		table = __va(*ste & _SEGMENT_ENTRY_ORIGIN);
 		rc = base_page_walk(table, addr, next, alloc);
 		if (rc)
 			return rc;
@@ -422,13 +530,13 @@ static int base_segment_walk(unsigned long origin, unsigned long addr,
 	return 0;
 }
 
-static int base_region3_walk(unsigned long origin, unsigned long addr,
+static int base_region3_walk(unsigned long *origin, unsigned long addr,
 			     unsigned long end, int alloc)
 {
-	unsigned long *rtte, next, table;
+	unsigned long *rtte, next, *table;
 	int rc;
 
-	rtte = (unsigned long *) origin;
+	rtte = origin;
 	rtte += (addr & _REGION3_INDEX) >> _REGION3_SHIFT;
 	do {
 		next = base_region3_addr_end(addr, end);
@@ -438,9 +546,9 @@ static int base_region3_walk(unsigned long origin, unsigned long addr,
 			table = base_crst_alloc(_SEGMENT_ENTRY_EMPTY);
 			if (!table)
 				return -ENOMEM;
-			*rtte = table | _REGION3_ENTRY;
+			*rtte = __pa(table) | _REGION3_ENTRY;
 		}
-		table = *rtte & _REGION_ENTRY_ORIGIN;
+		table = __va(*rtte & _REGION_ENTRY_ORIGIN);
 		rc = base_segment_walk(table, addr, next, alloc);
 		if (rc)
 			return rc;
@@ -450,13 +558,13 @@ static int base_region3_walk(unsigned long origin, unsigned long addr,
 	return 0;
 }
 
-static int base_region2_walk(unsigned long origin, unsigned long addr,
+static int base_region2_walk(unsigned long *origin, unsigned long addr,
 			     unsigned long end, int alloc)
 {
-	unsigned long *rste, next, table;
+	unsigned long *rste, next, *table;
 	int rc;
 
-	rste = (unsigned long *) origin;
+	rste = origin;
 	rste += (addr & _REGION2_INDEX) >> _REGION2_SHIFT;
 	do {
 		next = base_region2_addr_end(addr, end);
@@ -466,9 +574,9 @@ static int base_region2_walk(unsigned long origin, unsigned long addr,
 			table = base_crst_alloc(_REGION3_ENTRY_EMPTY);
 			if (!table)
 				return -ENOMEM;
-			*rste = table | _REGION2_ENTRY;
+			*rste = __pa(table) | _REGION2_ENTRY;
 		}
-		table = *rste & _REGION_ENTRY_ORIGIN;
+		table = __va(*rste & _REGION_ENTRY_ORIGIN);
 		rc = base_region3_walk(table, addr, next, alloc);
 		if (rc)
 			return rc;
@@ -478,13 +586,13 @@ static int base_region2_walk(unsigned long origin, unsigned long addr,
 	return 0;
 }
 
-static int base_region1_walk(unsigned long origin, unsigned long addr,
+static int base_region1_walk(unsigned long *origin, unsigned long addr,
 			     unsigned long end, int alloc)
 {
-	unsigned long *rfte, next, table;
+	unsigned long *rfte, next, *table;
 	int rc;
 
-	rfte = (unsigned long *) origin;
+	rfte = origin;
 	rfte += (addr & _REGION1_INDEX) >> _REGION1_SHIFT;
 	do {
 		next = base_region1_addr_end(addr, end);
@@ -494,9 +602,9 @@ static int base_region1_walk(unsigned long origin, unsigned long addr,
 			table = base_crst_alloc(_REGION2_ENTRY_EMPTY);
 			if (!table)
 				return -ENOMEM;
-			*rfte = table | _REGION1_ENTRY;
+			*rfte = __pa(table) | _REGION1_ENTRY;
 		}
-		table = *rfte & _REGION_ENTRY_ORIGIN;
+		table = __va(*rfte & _REGION_ENTRY_ORIGIN);
 		rc = base_region2_walk(table, addr, next, alloc);
 		if (rc)
 			return rc;
@@ -515,7 +623,7 @@ static int base_region1_walk(unsigned long origin, unsigned long addr,
  */
 void base_asce_free(unsigned long asce)
 {
-	unsigned long table = asce & _ASCE_ORIGIN;
+	unsigned long *table = __va(asce & _ASCE_ORIGIN);
 
 	if (!asce)
 		return;
@@ -567,7 +675,7 @@ static int base_pgt_cache_init(void)
  */
 unsigned long base_asce_alloc(unsigned long addr, unsigned long num_pages)
 {
-	unsigned long asce, table, end;
+	unsigned long asce, *table, end;
 	int rc;
 
 	if (base_pgt_cache_init())
@@ -578,25 +686,25 @@ unsigned long base_asce_alloc(unsigned long addr, unsigned long num_pages)
 		if (!table)
 			return 0;
 		rc = base_segment_walk(table, addr, end, 1);
-		asce = table | _ASCE_TYPE_SEGMENT | _ASCE_TABLE_LENGTH;
+		asce = __pa(table) | _ASCE_TYPE_SEGMENT | _ASCE_TABLE_LENGTH;
 	} else if (end <= _REGION2_SIZE) {
 		table = base_crst_alloc(_REGION3_ENTRY_EMPTY);
 		if (!table)
 			return 0;
 		rc = base_region3_walk(table, addr, end, 1);
-		asce = table | _ASCE_TYPE_REGION3 | _ASCE_TABLE_LENGTH;
+		asce = __pa(table) | _ASCE_TYPE_REGION3 | _ASCE_TABLE_LENGTH;
 	} else if (end <= _REGION1_SIZE) {
 		table = base_crst_alloc(_REGION2_ENTRY_EMPTY);
 		if (!table)
 			return 0;
 		rc = base_region2_walk(table, addr, end, 1);
-		asce = table | _ASCE_TYPE_REGION2 | _ASCE_TABLE_LENGTH;
+		asce = __pa(table) | _ASCE_TYPE_REGION2 | _ASCE_TABLE_LENGTH;
 	} else {
 		table = base_crst_alloc(_REGION1_ENTRY_EMPTY);
 		if (!table)
 			return 0;
 		rc = base_region1_walk(table, addr, end, 1);
-		asce = table | _ASCE_TYPE_REGION1 | _ASCE_TABLE_LENGTH;
+		asce = __pa(table) | _ASCE_TYPE_REGION1 | _ASCE_TABLE_LENGTH;
 	}
 	if (rc) {
 		base_asce_free(asce);
diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index 2f9b78f..792f8e0 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -771,7 +771,7 @@ int zpci_hot_reset_device(struct zpci_dev *zdev)
 
 	if (zdev->dma_table)
 		rc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
-					(u64)zdev->dma_table);
+					virt_to_phys(zdev->dma_table));
 	else
 		rc = zpci_dma_init_device(zdev);
 	if (rc) {
diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c
index 1f4540d6..f46833a 100644
--- a/arch/s390/pci/pci_dma.c
+++ b/arch/s390/pci/pci_dma.c
@@ -74,7 +74,7 @@ static unsigned long *dma_get_seg_table_origin(unsigned long *entry)
 		if (!sto)
 			return NULL;
 
-		set_rt_sto(entry, sto);
+		set_rt_sto(entry, virt_to_phys(sto));
 		validate_rt_entry(entry);
 		entry_clr_protected(entry);
 	}
@@ -91,7 +91,7 @@ static unsigned long *dma_get_page_table_origin(unsigned long *entry)
 		pto = dma_alloc_page_table();
 		if (!pto)
 			return NULL;
-		set_st_pto(entry, pto);
+		set_st_pto(entry, virt_to_phys(pto));
 		validate_st_entry(entry);
 		entry_clr_protected(entry);
 	}
@@ -117,7 +117,7 @@ unsigned long *dma_walk_cpu_trans(unsigned long *rto, dma_addr_t dma_addr)
 	return &pto[px];
 }
 
-void dma_update_cpu_trans(unsigned long *entry, void *page_addr, int flags)
+void dma_update_cpu_trans(unsigned long *entry, phys_addr_t page_addr, int flags)
 {
 	if (flags & ZPCI_PTE_INVALID) {
 		invalidate_pt_entry(entry);
@@ -132,11 +132,11 @@ void dma_update_cpu_trans(unsigned long *entry, void *page_addr, int flags)
 		entry_clr_protected(entry);
 }
 
-static int __dma_update_trans(struct zpci_dev *zdev, unsigned long pa,
+static int __dma_update_trans(struct zpci_dev *zdev, phys_addr_t pa,
 			      dma_addr_t dma_addr, size_t size, int flags)
 {
 	unsigned int nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT;
-	u8 *page_addr = (u8 *) (pa & PAGE_MASK);
+	phys_addr_t page_addr = (pa & PAGE_MASK);
 	unsigned long irq_flags;
 	unsigned long *entry;
 	int i, rc = 0;
@@ -217,7 +217,7 @@ static int __dma_purge_tlb(struct zpci_dev *zdev, dma_addr_t dma_addr,
 	return ret;
 }
 
-static int dma_update_trans(struct zpci_dev *zdev, unsigned long pa,
+static int dma_update_trans(struct zpci_dev *zdev, phys_addr_t pa,
 			    dma_addr_t dma_addr, size_t size, int flags)
 {
 	int rc;
@@ -400,7 +400,7 @@ static void *s390_dma_alloc(struct device *dev, size_t size,
 {
 	struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
 	struct page *page;
-	unsigned long pa;
+	phys_addr_t pa;
 	dma_addr_t map;
 
 	size = PAGE_ALIGN(size);
@@ -411,18 +411,18 @@ static void *s390_dma_alloc(struct device *dev, size_t size,
 	pa = page_to_phys(page);
 	map = s390_dma_map_pages(dev, page, 0, size, DMA_BIDIRECTIONAL, 0);
 	if (dma_mapping_error(dev, map)) {
-		free_pages(pa, get_order(size));
+		__free_pages(page, get_order(size));
 		return NULL;
 	}
 
 	atomic64_add(size / PAGE_SIZE, &zdev->allocated_pages);
 	if (dma_handle)
 		*dma_handle = map;
-	return (void *) pa;
+	return phys_to_virt(pa);
 }
 
 static void s390_dma_free(struct device *dev, size_t size,
-			  void *pa, dma_addr_t dma_handle,
+			  void *vaddr, dma_addr_t dma_handle,
 			  unsigned long attrs)
 {
 	struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
@@ -430,7 +430,7 @@ static void s390_dma_free(struct device *dev, size_t size,
 	size = PAGE_ALIGN(size);
 	atomic64_sub(size / PAGE_SIZE, &zdev->allocated_pages);
 	s390_dma_unmap_pages(dev, dma_handle, size, DMA_BIDIRECTIONAL, 0);
-	free_pages((unsigned long) pa, get_order(size));
+	free_pages((unsigned long)vaddr, get_order(size));
 }
 
 /* Map a segment into a contiguous dma address area */
@@ -443,7 +443,7 @@ static int __s390_dma_map_sg(struct device *dev, struct scatterlist *sg,
 	dma_addr_t dma_addr_base, dma_addr;
 	int flags = ZPCI_PTE_VALID;
 	struct scatterlist *s;
-	unsigned long pa = 0;
+	phys_addr_t pa = 0;
 	int ret;
 
 	dma_addr_base = dma_alloc_address(dev, nr_pages);
@@ -598,7 +598,7 @@ int zpci_dma_init_device(struct zpci_dev *zdev)
 
 	}
 	if (zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
-			       (u64)zdev->dma_table)) {
+			       virt_to_phys(zdev->dma_table))) {
 		rc = -EIO;
 		goto free_bitmap;
 	}
diff --git a/arch/s390/pci/pci_insn.c b/arch/s390/pci/pci_insn.c
index 28d863a..4dd58b1 100644
--- a/arch/s390/pci/pci_insn.c
+++ b/arch/s390/pci/pci_insn.c
@@ -365,10 +365,7 @@ EXPORT_SYMBOL_GPL(zpci_write_block);
 
 static inline void __pciwb_mio(void)
 {
-	unsigned long unused = 0;
-
-	asm volatile (".insn    rre,0xb9d50000,%[op],%[op]\n"
-		      : [op] "+d" (unused));
+	asm volatile (".insn    rre,0xb9d50000,0,0\n");
 }
 
 void zpci_barrier(void)
diff --git a/arch/s390/pci/pci_irq.c b/arch/s390/pci/pci_irq.c
index 954bb7a..aefd306 100644
--- a/arch/s390/pci/pci_irq.c
+++ b/arch/s390/pci/pci_irq.c
@@ -45,9 +45,9 @@ static int zpci_set_airq(struct zpci_dev *zdev)
 	fib.fmt0.isc = PCI_ISC;
 	fib.fmt0.sum = 1;	/* enable summary notifications */
 	fib.fmt0.noi = airq_iv_end(zdev->aibv);
-	fib.fmt0.aibv = (unsigned long) zdev->aibv->vector;
+	fib.fmt0.aibv = virt_to_phys(zdev->aibv->vector);
 	fib.fmt0.aibvo = 0;	/* each zdev has its own interrupt vector */
-	fib.fmt0.aisb = (unsigned long) zpci_sbv->vector + (zdev->aisb/64)*8;
+	fib.fmt0.aisb = virt_to_phys(zpci_sbv->vector) + (zdev->aisb / 64) * 8;
 	fib.fmt0.aisbo = zdev->aisb & 63;
 
 	return zpci_mod_fc(req, &fib, &status) ? -EIO : 0;
@@ -422,7 +422,7 @@ static int __init zpci_directed_irq_init(void)
 
 	iib.diib.isc = PCI_ISC;
 	iib.diib.nr_cpus = num_possible_cpus();
-	iib.diib.disb_addr = (u64) zpci_sbv->vector;
+	iib.diib.disb_addr = virt_to_phys(zpci_sbv->vector);
 	__zpci_set_irq_ctrl(SIC_IRQ_MODE_DIRECT, 0, &iib);
 
 	zpci_ibv = kcalloc(num_possible_cpus(), sizeof(*zpci_ibv),
diff --git a/arch/s390/tools/opcodes.txt b/arch/s390/tools/opcodes.txt
index 6db9820..5f008e79 100644
--- a/arch/s390/tools/opcodes.txt
+++ b/arch/s390/tools/opcodes.txt
@@ -276,6 +276,7 @@
 b286	qsi	S_RD
 b287	lsctl	S_RD
 b28e	qctri	S_RD
+b28f	qpaci	S_RD
 b299	srnm	S_RD
 b29c	stfpc	S_RD
 b29d	lfpc	S_RD
@@ -1098,7 +1099,7 @@
 eb62	mric	RSY_RDRU
 eb6a	asi	SIY_IRD
 eb6e	alsi	SIY_IRD
-eb71	lpswey	SIY_URD
+eb71	lpswey	SIY_RD
 eb7a	agsi	SIY_IRD
 eb7e	algsi	SIY_IRD
 eb80	icmh	RSY_RURD
diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig
index 51690e7..4f70567 100644
--- a/drivers/crypto/Kconfig
+++ b/drivers/crypto/Kconfig
@@ -213,6 +213,18 @@
 	  key sizes and XTS mode is hardware accelerated for 256 and
 	  512 bit keys.
 
+config CRYPTO_CHACHA_S390
+	tristate "ChaCha20 stream cipher"
+	depends on S390
+	select CRYPTO_ALGAPI
+	select CRYPTO_SKCIPHER
+	select CRYPTO_CHACHA20
+	help
+	  This is the s390 SIMD implementation of the ChaCha20 stream
+	  cipher (RFC 7539).
+
+	  It is available as of z13.
+
 config S390_PRNG
 	tristate "Pseudo random number generator device driver"
 	depends on S390
diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c
index 83df387..50860eb 100644
--- a/drivers/iommu/s390-iommu.c
+++ b/drivers/iommu/s390-iommu.c
@@ -109,7 +109,7 @@ static int s390_iommu_attach_device(struct iommu_domain *domain,
 
 	zdev->dma_table = s390_domain->dma_table;
 	cc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
-				(u64) zdev->dma_table);
+				virt_to_phys(zdev->dma_table));
 	if (cc) {
 		rc = -EIO;
 		goto out_restore;
@@ -205,11 +205,11 @@ static void s390_iommu_release_device(struct device *dev)
 }
 
 static int s390_iommu_update_trans(struct s390_domain *s390_domain,
-				   unsigned long pa, dma_addr_t dma_addr,
+				   phys_addr_t pa, dma_addr_t dma_addr,
 				   size_t size, int flags)
 {
 	struct s390_domain_device *domain_device;
-	u8 *page_addr = (u8 *) (pa & PAGE_MASK);
+	phys_addr_t page_addr = pa & PAGE_MASK;
 	dma_addr_t start_dma_addr = dma_addr;
 	unsigned long irq_flags, nr_pages, i;
 	unsigned long *entry;
@@ -274,7 +274,7 @@ static int s390_iommu_map(struct iommu_domain *domain, unsigned long iova,
 	if (!(prot & IOMMU_WRITE))
 		flags |= ZPCI_TABLE_PROTECTED;
 
-	rc = s390_iommu_update_trans(s390_domain, (unsigned long) paddr, iova,
+	rc = s390_iommu_update_trans(s390_domain, paddr, iova,
 				     size, flags);
 
 	return rc;
@@ -324,7 +324,7 @@ static size_t s390_iommu_unmap(struct iommu_domain *domain,
 	if (!paddr)
 		return 0;
 
-	rc = s390_iommu_update_trans(s390_domain, (unsigned long) paddr, iova,
+	rc = s390_iommu_update_trans(s390_domain, paddr, iova,
 				     size, flags);
 	if (rc)
 		return 0;
diff --git a/drivers/s390/block/dasd_devmap.c b/drivers/s390/block/dasd_devmap.c
index 6043c83..811e79c 100644
--- a/drivers/s390/block/dasd_devmap.c
+++ b/drivers/s390/block/dasd_devmap.c
@@ -1824,10 +1824,11 @@ static struct attribute *paths_info_attrs[] = {
 	&path_fcs_attribute.attr,
 	NULL,
 };
+ATTRIBUTE_GROUPS(paths_info);
 
 static struct kobj_type path_attr_type = {
 	.release	= dasd_path_release,
-	.default_attrs	= paths_info_attrs,
+	.default_groups	= paths_info_groups,
 	.sysfs_ops	= &kobj_sysfs_ops,
 };
 
diff --git a/drivers/s390/char/sclp_early.c b/drivers/s390/char/sclp_early.c
index b64feab..e9943a8 100644
--- a/drivers/s390/char/sclp_early.c
+++ b/drivers/s390/char/sclp_early.c
@@ -139,7 +139,7 @@ int __init sclp_early_get_core_info(struct sclp_core_info *info)
 	}
 	sclp_fill_core_info(info, sccb);
 out:
-	memblock_phys_free((unsigned long)sccb, length);
+	memblock_free(sccb, length);
 	return rc;
 }
 
diff --git a/drivers/s390/char/sclp_sd.c b/drivers/s390/char/sclp_sd.c
index 25c2d76..f9e164b 100644
--- a/drivers/s390/char/sclp_sd.c
+++ b/drivers/s390/char/sclp_sd.c
@@ -438,11 +438,12 @@ static struct attribute *sclp_sd_file_default_attrs[] = {
 	&reload_attr.attr,
 	NULL,
 };
+ATTRIBUTE_GROUPS(sclp_sd_file_default);
 
 static struct kobj_type sclp_sd_file_ktype = {
 	.sysfs_ops = &kobj_sysfs_ops,
 	.release = sclp_sd_file_release,
-	.default_attrs = sclp_sd_file_default_attrs,
+	.default_groups = sclp_sd_file_default_groups,
 };
 
 /**
diff --git a/drivers/s390/char/vmcp.c b/drivers/s390/char/vmcp.c
index 9e06628..4cebfaa 100644
--- a/drivers/s390/char/vmcp.c
+++ b/drivers/s390/char/vmcp.c
@@ -72,7 +72,7 @@ static void vmcp_response_alloc(struct vmcp_session *session)
 	if (order > 2)
 		page = cma_alloc(vmcp_cma, nr_pages, 0, false);
 	if (page) {
-		session->response = (char *)page_to_phys(page);
+		session->response = (char *)page_to_virt(page);
 		session->cma_alloc = 1;
 		return;
 	}
@@ -89,7 +89,7 @@ static void vmcp_response_free(struct vmcp_session *session)
 	order = get_order(session->bufsize);
 	nr_pages = ALIGN(session->bufsize, PAGE_SIZE) >> PAGE_SHIFT;
 	if (session->cma_alloc) {
-		page = phys_to_page((unsigned long)session->response);
+		page = virt_to_page((unsigned long)session->response);
 		cma_release(vmcp_cma, page, nr_pages);
 		session->cma_alloc = 0;
 	} else {
diff --git a/drivers/s390/cio/chsc_sch.c b/drivers/s390/cio/chsc_sch.c
index 684348d..962dfa2 100644
--- a/drivers/s390/cio/chsc_sch.c
+++ b/drivers/s390/cio/chsc_sch.c
@@ -91,11 +91,6 @@ static int chsc_subchannel_probe(struct subchannel *sch)
 			 sch->schid.ssid, sch->schid.sch_no, ret);
 		dev_set_drvdata(&sch->dev, NULL);
 		kfree(private);
-	} else {
-		if (dev_get_uevent_suppress(&sch->dev)) {
-			dev_set_uevent_suppress(&sch->dev, 0);
-			kobject_uevent(&sch->dev.kobj, KOBJ_ADD);
-		}
 	}
 	return ret;
 }
diff --git a/drivers/s390/cio/css.c b/drivers/s390/cio/css.c
index ce9e751..fa82933 100644
--- a/drivers/s390/cio/css.c
+++ b/drivers/s390/cio/css.c
@@ -470,16 +470,6 @@ int css_register_subchannel(struct subchannel *sch)
 	if (sch->st == SUBCHANNEL_TYPE_IO)
 		sch->dev.type = &io_subchannel_type;
 
-	/*
-	 * We don't want to generate uevents for I/O subchannels that don't
-	 * have a working ccw device behind them since they will be
-	 * unregistered before they can be used anyway, so we delay the add
-	 * uevent until after device recognition was successful.
-	 * Note that we suppress the uevent for all subchannel types;
-	 * the subchannel driver can decide itself when it wants to inform
-	 * userspace of its existence.
-	 */
-	dev_set_uevent_suppress(&sch->dev, 1);
 	css_update_ssd_info(sch);
 	/* make it known to the system */
 	ret = css_sch_device_register(sch);
@@ -488,15 +478,6 @@ int css_register_subchannel(struct subchannel *sch)
 			      sch->schid.ssid, sch->schid.sch_no, ret);
 		return ret;
 	}
-	if (!sch->driver) {
-		/*
-		 * No driver matched. Generate the uevent now so that
-		 * a fitting driver module may be loaded based on the
-		 * modalias.
-		 */
-		dev_set_uevent_suppress(&sch->dev, 0);
-		kobject_uevent(&sch->dev.kobj, KOBJ_ADD);
-	}
 	return ret;
 }
 
diff --git a/drivers/s390/cio/device.c b/drivers/s390/cio/device.c
index 07a1761..cd938a2 100644
--- a/drivers/s390/cio/device.c
+++ b/drivers/s390/cio/device.c
@@ -838,14 +838,6 @@ static void io_subchannel_register(struct ccw_device *cdev)
 		adjust_init_count = 0;
 		goto out;
 	}
-	/*
-	 * Now we know this subchannel will stay, we can throw
-	 * our delayed uevent.
-	 */
-	if (dev_get_uevent_suppress(&sch->dev)) {
-		dev_set_uevent_suppress(&sch->dev, 0);
-		kobject_uevent(&sch->dev.kobj, KOBJ_ADD);
-	}
 	/* make it known to the system */
 	ret = device_add(&cdev->dev);
 	if (ret) {
@@ -1036,15 +1028,9 @@ static int io_subchannel_probe(struct subchannel *sch)
 				      "0.%x.%04x (rc=%d)\n",
 				      sch->schid.ssid, sch->schid.sch_no, rc);
 		/*
-		 * The console subchannel already has an associated ccw_device.
-		 * Throw the delayed uevent for the subchannel, register
-		 * the ccw_device and exit.
-		 */
-		if (dev_get_uevent_suppress(&sch->dev)) {
-			/* should always be the case for the console */
-			dev_set_uevent_suppress(&sch->dev, 0);
-			kobject_uevent(&sch->dev.kobj, KOBJ_ADD);
-		}
+		* The console subchannel already has an associated ccw_device.
+		* Register it and exit.
+		*/
 		cdev = sch_get_cdev(sch);
 		rc = device_add(&cdev->dev);
 		if (rc) {
diff --git a/drivers/s390/cio/eadm_sch.c b/drivers/s390/cio/eadm_sch.c
index 15bdae5..8b46368 100644
--- a/drivers/s390/cio/eadm_sch.c
+++ b/drivers/s390/cio/eadm_sch.c
@@ -243,11 +243,6 @@ static int eadm_subchannel_probe(struct subchannel *sch)
 	spin_lock_irq(&list_lock);
 	list_add(&private->head, &eadm_list);
 	spin_unlock_irq(&list_lock);
-
-	if (dev_get_uevent_suppress(&sch->dev)) {
-		dev_set_uevent_suppress(&sch->dev, 0);
-		kobject_uevent(&sch->dev.kobj, KOBJ_ADD);
-	}
 out:
 	return ret;
 }
diff --git a/drivers/s390/cio/qdio.h b/drivers/s390/cio/qdio.h
index 99c2212d..5ea6249 100644
--- a/drivers/s390/cio/qdio.h
+++ b/drivers/s390/cio/qdio.h
@@ -236,12 +236,11 @@ struct qdio_irq {
 	int nr_input_qs;
 	int nr_output_qs;
 
-	struct ccw1 ccw;
-	struct ciw equeue;
-	struct ciw aqueue;
+	struct ccw1 *ccw;
 
 	struct qdio_ssqd_desc ssqd_desc;
 	void (*orig_handler) (struct ccw_device *, unsigned long, struct irb *);
+	qdio_handler_t (*error_handler);
 
 	int perf_stat_enabled;
 
@@ -338,7 +337,7 @@ void qdio_setup_ssqd_info(struct qdio_irq *irq_ptr);
 int qdio_setup_get_ssqd(struct qdio_irq *irq_ptr,
 			struct subchannel_id *schid,
 			struct qdio_ssqd_desc *data);
-int qdio_setup_irq(struct qdio_irq *irq_ptr, struct qdio_initialize *init_data);
+void qdio_setup_irq(struct qdio_irq *irq_ptr, struct qdio_initialize *init_data);
 void qdio_shutdown_irq(struct qdio_irq *irq);
 void qdio_print_subchannel_info(struct qdio_irq *irq_ptr);
 void qdio_free_queues(struct qdio_irq *irq_ptr);
diff --git a/drivers/s390/cio/qdio_main.c b/drivers/s390/cio/qdio_main.c
index 45e810c..9cde557 100644
--- a/drivers/s390/cio/qdio_main.c
+++ b/drivers/s390/cio/qdio_main.c
@@ -10,6 +10,7 @@
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
+#include <linux/kmemleak.h>
 #include <linux/delay.h>
 #include <linux/gfp.h>
 #include <linux/io.h>
@@ -169,8 +170,6 @@ static int qdio_do_sqbs(struct qdio_q *q, unsigned char state, int start,
 	int tmp_count = count, tmp_start = start;
 	int nr = q->nr;
 
-	if (!count)
-		return 0;
 	qperf_inc(q, sqbs);
 
 	if (!q->is_input_q)
@@ -499,6 +498,31 @@ static int get_inbound_buffer_frontier(struct qdio_q *q, unsigned int start,
 	}
 }
 
+int qdio_inspect_input_queue(struct ccw_device *cdev, unsigned int nr,
+			     unsigned int *bufnr, unsigned int *error)
+{
+	struct qdio_irq *irq = cdev->private->qdio_data;
+	unsigned int start;
+	struct qdio_q *q;
+	int count;
+
+	if (!irq)
+		return -ENODEV;
+
+	q = irq->input_qs[nr];
+	start = q->first_to_check;
+	*error = 0;
+
+	count = get_inbound_buffer_frontier(q, start, error);
+	if (count == 0)
+		return 0;
+
+	*bufnr = start;
+	q->first_to_check = add_buf(start, count);
+	return count;
+}
+EXPORT_SYMBOL_GPL(qdio_inspect_input_queue);
+
 static inline int qdio_inbound_q_done(struct qdio_q *q, unsigned int start)
 {
 	unsigned char state = 0;
@@ -578,6 +602,31 @@ static int get_outbound_buffer_frontier(struct qdio_q *q, unsigned int start,
 	}
 }
 
+int qdio_inspect_output_queue(struct ccw_device *cdev, unsigned int nr,
+			      unsigned int *bufnr, unsigned int *error)
+{
+	struct qdio_irq *irq = cdev->private->qdio_data;
+	unsigned int start;
+	struct qdio_q *q;
+	int count;
+
+	if (!irq)
+		return -ENODEV;
+
+	q = irq->output_qs[nr];
+	start = q->first_to_check;
+	*error = 0;
+
+	count = get_outbound_buffer_frontier(q, start, error);
+	if (count == 0)
+		return 0;
+
+	*bufnr = start;
+	q->first_to_check = add_buf(start, count);
+	return count;
+}
+EXPORT_SYMBOL_GPL(qdio_inspect_output_queue);
+
 static int qdio_kick_outbound_q(struct qdio_q *q, unsigned int count,
 				unsigned long aob)
 {
@@ -653,24 +702,18 @@ static void qdio_handle_activate_check(struct qdio_irq *irq_ptr,
 				       unsigned long intparm, int cstat,
 				       int dstat)
 {
-	struct qdio_q *q;
+	unsigned int first_to_check = 0;
 
 	DBF_ERROR("%4x ACT CHECK", irq_ptr->schid.sch_no);
 	DBF_ERROR("intp :%lx", intparm);
 	DBF_ERROR("ds: %2x cs:%2x", dstat, cstat);
 
-	if (irq_ptr->nr_input_qs) {
-		q = irq_ptr->input_qs[0];
-	} else if (irq_ptr->nr_output_qs) {
-		q = irq_ptr->output_qs[0];
-	} else {
-		dump_stack();
-		goto no_handler;
-	}
+	/* zfcp wants this: */
+	if (irq_ptr->nr_input_qs)
+		first_to_check = irq_ptr->input_qs[0]->first_to_check;
 
-	q->handler(q->irq_ptr->cdev, QDIO_ERROR_ACTIVATE,
-		   q->nr, q->first_to_check, 0, irq_ptr->int_parm);
-no_handler:
+	irq_ptr->error_handler(irq_ptr->cdev, QDIO_ERROR_ACTIVATE, 0,
+			       first_to_check, 0, irq_ptr->int_parm);
 	qdio_set_state(irq_ptr, QDIO_IRQ_STATE_STOPPED);
 	/*
 	 * In case of z/VM LGR (Live Guest Migration) QDIO recovery will happen.
@@ -874,6 +917,7 @@ int qdio_free(struct ccw_device *cdev)
 	qdio_free_queues(irq_ptr);
 	free_page((unsigned long) irq_ptr->qdr);
 	free_page(irq_ptr->chsc_page);
+	kfree(irq_ptr->ccw);
 	free_page((unsigned long) irq_ptr);
 	return 0;
 }
@@ -899,11 +943,17 @@ int qdio_allocate(struct ccw_device *cdev, unsigned int no_input_qs,
 	    no_output_qs > QDIO_MAX_QUEUES_PER_IRQ)
 		return -EINVAL;
 
-	/* irq_ptr must be in GFP_DMA since it contains ccw1.cda */
-	irq_ptr = (void *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
+	irq_ptr = (void *) get_zeroed_page(GFP_KERNEL);
 	if (!irq_ptr)
 		return -ENOMEM;
 
+	irq_ptr->ccw = kmalloc(sizeof(*irq_ptr->ccw), GFP_KERNEL | GFP_DMA);
+	if (!irq_ptr->ccw)
+		goto err_ccw;
+
+	/* kmemleak doesn't scan the page-allocated irq_ptr: */
+	kmemleak_not_leak(irq_ptr->ccw);
+
 	irq_ptr->cdev = cdev;
 	mutex_init(&irq_ptr->setup_mutex);
 	if (qdio_allocate_dbf(irq_ptr))
@@ -941,6 +991,8 @@ int qdio_allocate(struct ccw_device *cdev, unsigned int no_input_qs,
 	free_page(irq_ptr->chsc_page);
 err_chsc:
 err_dbf:
+	kfree(irq_ptr->ccw);
+err_ccw:
 	free_page((unsigned long) irq_ptr);
 	return rc;
 }
@@ -972,6 +1024,7 @@ int qdio_establish(struct ccw_device *cdev,
 {
 	struct qdio_irq *irq_ptr = cdev->private->qdio_data;
 	struct subchannel_id schid;
+	struct ciw *ciw;
 	long timeout;
 	int rc;
 
@@ -985,8 +1038,11 @@ int qdio_establish(struct ccw_device *cdev,
 	    init_data->no_output_qs > irq_ptr->max_output_qs)
 		return -EINVAL;
 
-	if ((init_data->no_input_qs && !init_data->input_handler) ||
-	    (init_data->no_output_qs && !init_data->output_handler))
+	/* Needed as error_handler: */
+	if (!init_data->input_handler)
+		return -EINVAL;
+
+	if (init_data->no_output_qs && !init_data->output_handler)
 		return -EINVAL;
 
 	if (!init_data->input_sbal_addr_array ||
@@ -996,6 +1052,12 @@ int qdio_establish(struct ccw_device *cdev,
 	if (!init_data->irq_poll)
 		return -EINVAL;
 
+	ciw = ccw_device_get_ciw(cdev, CIW_TYPE_EQUEUE);
+	if (!ciw) {
+		DBF_ERROR("%4x NO EQ", schid.sch_no);
+		return -EIO;
+	}
+
 	mutex_lock(&irq_ptr->setup_mutex);
 	qdio_trace_init_data(irq_ptr, init_data);
 	qdio_setup_irq(irq_ptr, init_data);
@@ -1005,15 +1067,15 @@ int qdio_establish(struct ccw_device *cdev,
 		goto err_thinint;
 
 	/* establish q */
-	irq_ptr->ccw.cmd_code = irq_ptr->equeue.cmd;
-	irq_ptr->ccw.flags = CCW_FLAG_SLI;
-	irq_ptr->ccw.count = irq_ptr->equeue.count;
-	irq_ptr->ccw.cda = (u32) virt_to_phys(irq_ptr->qdr);
+	irq_ptr->ccw->cmd_code = ciw->cmd;
+	irq_ptr->ccw->flags = CCW_FLAG_SLI;
+	irq_ptr->ccw->count = ciw->count;
+	irq_ptr->ccw->cda = (u32) virt_to_phys(irq_ptr->qdr);
 
 	spin_lock_irq(get_ccwdev_lock(cdev));
 	ccw_device_set_options_mask(cdev, 0);
 
-	rc = ccw_device_start(cdev, &irq_ptr->ccw, QDIO_DOING_ESTABLISH, 0, 0);
+	rc = ccw_device_start(cdev, irq_ptr->ccw, QDIO_DOING_ESTABLISH, 0, 0);
 	spin_unlock_irq(get_ccwdev_lock(cdev));
 	if (rc) {
 		DBF_ERROR("%4x est IO ERR", irq_ptr->schid.sch_no);
@@ -1065,6 +1127,7 @@ int qdio_activate(struct ccw_device *cdev)
 {
 	struct qdio_irq *irq_ptr = cdev->private->qdio_data;
 	struct subchannel_id schid;
+	struct ciw *ciw;
 	int rc;
 
 	ccw_device_get_schid(cdev, &schid);
@@ -1073,21 +1136,27 @@ int qdio_activate(struct ccw_device *cdev)
 	if (!irq_ptr)
 		return -ENODEV;
 
+	ciw = ccw_device_get_ciw(cdev, CIW_TYPE_AQUEUE);
+	if (!ciw) {
+		DBF_ERROR("%4x NO AQ", schid.sch_no);
+		return -EIO;
+	}
+
 	mutex_lock(&irq_ptr->setup_mutex);
 	if (irq_ptr->state == QDIO_IRQ_STATE_INACTIVE) {
 		rc = -EBUSY;
 		goto out;
 	}
 
-	irq_ptr->ccw.cmd_code = irq_ptr->aqueue.cmd;
-	irq_ptr->ccw.flags = CCW_FLAG_SLI;
-	irq_ptr->ccw.count = irq_ptr->aqueue.count;
-	irq_ptr->ccw.cda = 0;
+	irq_ptr->ccw->cmd_code = ciw->cmd;
+	irq_ptr->ccw->flags = CCW_FLAG_SLI;
+	irq_ptr->ccw->count = ciw->count;
+	irq_ptr->ccw->cda = 0;
 
 	spin_lock_irq(get_ccwdev_lock(cdev));
 	ccw_device_set_options(cdev, CCWDEV_REPORT_ALL);
 
-	rc = ccw_device_start(cdev, &irq_ptr->ccw, QDIO_DOING_ACTIVATE,
+	rc = ccw_device_start(cdev, irq_ptr->ccw, QDIO_DOING_ACTIVATE,
 			      0, DOIO_DENY_PREFETCH);
 	spin_unlock_irq(get_ccwdev_lock(cdev));
 	if (rc) {
@@ -1144,6 +1213,35 @@ static int handle_inbound(struct qdio_q *q, int bufnr, int count)
 }
 
 /**
+ * qdio_add_bufs_to_input_queue - process buffers on an Input Queue
+ * @cdev: associated ccw_device for the qdio subchannel
+ * @q_nr: queue number
+ * @bufnr: buffer number
+ * @count: how many buffers to process
+ */
+int qdio_add_bufs_to_input_queue(struct ccw_device *cdev, unsigned int q_nr,
+				 unsigned int bufnr, unsigned int count)
+{
+	struct qdio_irq *irq_ptr = cdev->private->qdio_data;
+
+	if (bufnr >= QDIO_MAX_BUFFERS_PER_Q || count > QDIO_MAX_BUFFERS_PER_Q)
+		return -EINVAL;
+
+	if (!irq_ptr)
+		return -ENODEV;
+
+	DBF_DEV_EVENT(DBF_INFO, irq_ptr, "addi b:%02x c:%02x", bufnr, count);
+
+	if (irq_ptr->state != QDIO_IRQ_STATE_ACTIVE)
+		return -EIO;
+	if (!count)
+		return 0;
+
+	return handle_inbound(irq_ptr->input_qs[q_nr], bufnr, count);
+}
+EXPORT_SYMBOL_GPL(qdio_add_bufs_to_input_queue);
+
+/**
  * handle_outbound - process filled outbound buffers
  * @q: queue containing the buffers
  * @bufnr: first buffer to process
@@ -1184,16 +1282,16 @@ static int handle_outbound(struct qdio_q *q, unsigned int bufnr, unsigned int co
 }
 
 /**
- * do_QDIO - process input or output buffers
+ * qdio_add_bufs_to_output_queue - process buffers on an Output Queue
  * @cdev: associated ccw_device for the qdio subchannel
- * @callflags: input or output and special flags from the program
  * @q_nr: queue number
  * @bufnr: buffer number
  * @count: how many buffers to process
- * @aob: asynchronous operation block (outbound only)
+ * @aob: asynchronous operation block
  */
-int do_QDIO(struct ccw_device *cdev, unsigned int callflags,
-	    int q_nr, unsigned int bufnr, unsigned int count, struct qaob *aob)
+int qdio_add_bufs_to_output_queue(struct ccw_device *cdev, unsigned int q_nr,
+				  unsigned int bufnr, unsigned int count,
+				  struct qaob *aob)
 {
 	struct qdio_irq *irq_ptr = cdev->private->qdio_data;
 
@@ -1203,20 +1301,16 @@ int do_QDIO(struct ccw_device *cdev, unsigned int callflags,
 	if (!irq_ptr)
 		return -ENODEV;
 
-	DBF_DEV_EVENT(DBF_INFO, irq_ptr,
-		      "do%02x b:%02x c:%02x", callflags, bufnr, count);
+	DBF_DEV_EVENT(DBF_INFO, irq_ptr, "addo b:%02x c:%02x", bufnr, count);
 
 	if (irq_ptr->state != QDIO_IRQ_STATE_ACTIVE)
 		return -EIO;
 	if (!count)
 		return 0;
-	if (callflags & QDIO_FLAG_SYNC_INPUT)
-		return handle_inbound(irq_ptr->input_qs[q_nr], bufnr, count);
-	else if (callflags & QDIO_FLAG_SYNC_OUTPUT)
-		return handle_outbound(irq_ptr->output_qs[q_nr], bufnr, count, aob);
-	return -EINVAL;
+
+	return handle_outbound(irq_ptr->output_qs[q_nr], bufnr, count, aob);
 }
-EXPORT_SYMBOL_GPL(do_QDIO);
+EXPORT_SYMBOL_GPL(qdio_add_bufs_to_output_queue);
 
 /**
  * qdio_start_irq - enable interrupt processing for the device
@@ -1263,40 +1357,6 @@ int qdio_start_irq(struct ccw_device *cdev)
 }
 EXPORT_SYMBOL(qdio_start_irq);
 
-static int __qdio_inspect_queue(struct qdio_q *q, unsigned int *bufnr,
-				unsigned int *error)
-{
-	unsigned int start = q->first_to_check;
-	int count;
-
-	*error = 0;
-	count = q->is_input_q ? get_inbound_buffer_frontier(q, start, error) :
-				get_outbound_buffer_frontier(q, start, error);
-	if (count == 0)
-		return 0;
-
-	*bufnr = start;
-
-	/* for the next time */
-	q->first_to_check = add_buf(start, count);
-
-	return count;
-}
-
-int qdio_inspect_queue(struct ccw_device *cdev, unsigned int nr, bool is_input,
-		       unsigned int *bufnr, unsigned int *error)
-{
-	struct qdio_irq *irq_ptr = cdev->private->qdio_data;
-	struct qdio_q *q;
-
-	if (!irq_ptr)
-		return -ENODEV;
-	q = is_input ? irq_ptr->input_qs[nr] : irq_ptr->output_qs[nr];
-
-	return __qdio_inspect_queue(q, bufnr, error);
-}
-EXPORT_SYMBOL_GPL(qdio_inspect_queue);
-
 /**
  * qdio_stop_irq - disable interrupt processing for the device
  * @cdev: associated ccw_device for the qdio subchannel
diff --git a/drivers/s390/cio/qdio_setup.c b/drivers/s390/cio/qdio_setup.c
index efbb5e5..714878e 100644
--- a/drivers/s390/cio/qdio_setup.c
+++ b/drivers/s390/cio/qdio_setup.c
@@ -351,19 +351,18 @@ static void setup_qib(struct qdio_irq *irq_ptr,
 		       sizeof(irq_ptr->qib.parm));
 }
 
-int qdio_setup_irq(struct qdio_irq *irq_ptr, struct qdio_initialize *init_data)
+void qdio_setup_irq(struct qdio_irq *irq_ptr, struct qdio_initialize *init_data)
 {
 	struct ccw_device *cdev = irq_ptr->cdev;
-	struct ciw *ciw;
 
 	irq_ptr->qdioac1 = 0;
-	memset(&irq_ptr->ccw, 0, sizeof(irq_ptr->ccw));
 	memset(&irq_ptr->ssqd_desc, 0, sizeof(irq_ptr->ssqd_desc));
 	memset(&irq_ptr->perf_stat, 0, sizeof(irq_ptr->perf_stat));
 
 	irq_ptr->debugfs_dev = NULL;
 	irq_ptr->sch_token = irq_ptr->perf_stat_enabled = 0;
 	irq_ptr->state = QDIO_IRQ_STATE_INACTIVE;
+	irq_ptr->error_handler = init_data->input_handler;
 
 	irq_ptr->int_parm = init_data->int_parm;
 	irq_ptr->nr_input_qs = init_data->no_input_qs;
@@ -386,23 +385,6 @@ int qdio_setup_irq(struct qdio_irq *irq_ptr, struct qdio_initialize *init_data)
 	irq_ptr->orig_handler = cdev->handler;
 	cdev->handler = qdio_int_handler;
 	spin_unlock_irq(get_ccwdev_lock(cdev));
-
-	/* get qdio commands */
-	ciw = ccw_device_get_ciw(cdev, CIW_TYPE_EQUEUE);
-	if (!ciw) {
-		DBF_ERROR("%4x NO EQ", irq_ptr->schid.sch_no);
-		return -EINVAL;
-	}
-	irq_ptr->equeue = *ciw;
-
-	ciw = ccw_device_get_ciw(cdev, CIW_TYPE_AQUEUE);
-	if (!ciw) {
-		DBF_ERROR("%4x NO AQ", irq_ptr->schid.sch_no);
-		return -EINVAL;
-	}
-	irq_ptr->aqueue = *ciw;
-
-	return 0;
 }
 
 void qdio_shutdown_irq(struct qdio_irq *irq)
diff --git a/drivers/s390/cio/vfio_ccw_drv.c b/drivers/s390/cio/vfio_ccw_drv.c
index 04074277..ee182cf 100644
--- a/drivers/s390/cio/vfio_ccw_drv.c
+++ b/drivers/s390/cio/vfio_ccw_drv.c
@@ -244,11 +244,6 @@ static int vfio_ccw_sch_probe(struct subchannel *sch)
 	if (ret)
 		goto out_disable;
 
-	if (dev_get_uevent_suppress(&sch->dev)) {
-		dev_set_uevent_suppress(&sch->dev, 0);
-		kobject_uevent(&sch->dev.kobj, KOBJ_ADD);
-	}
-
 	VFIO_CCW_MSG_EVENT(4, "bound to subchannel %x.%x.%04x\n",
 			   sch->schid.cssid, sch->schid.ssid,
 			   sch->schid.sch_no);
diff --git a/drivers/s390/crypto/vfio_ap_drv.c b/drivers/s390/crypto/vfio_ap_drv.c
index 03311a4..e043ae2 100644
--- a/drivers/s390/crypto/vfio_ap_drv.c
+++ b/drivers/s390/crypto/vfio_ap_drv.c
@@ -17,6 +17,9 @@
 
 #define VFIO_AP_ROOT_NAME "vfio_ap"
 #define VFIO_AP_DEV_NAME "matrix"
+#define AP_QUEUE_ASSIGNED "assigned"
+#define AP_QUEUE_UNASSIGNED "unassigned"
+#define AP_QUEUE_IN_USE "in use"
 
 MODULE_AUTHOR("IBM Corporation");
 MODULE_DESCRIPTION("VFIO AP device driver, Copyright IBM Corp. 2018");
@@ -41,26 +44,95 @@ static struct ap_device_id ap_queue_ids[] = {
 
 MODULE_DEVICE_TABLE(vfio_ap, ap_queue_ids);
 
+static struct ap_matrix_mdev *vfio_ap_mdev_for_queue(struct vfio_ap_queue *q)
+{
+	struct ap_matrix_mdev *matrix_mdev;
+	unsigned long apid = AP_QID_CARD(q->apqn);
+	unsigned long apqi = AP_QID_QUEUE(q->apqn);
+
+	list_for_each_entry(matrix_mdev, &matrix_dev->mdev_list, node) {
+		if (test_bit_inv(apid, matrix_mdev->matrix.apm) &&
+		    test_bit_inv(apqi, matrix_mdev->matrix.aqm))
+			return matrix_mdev;
+	}
+
+	return NULL;
+}
+
+static ssize_t status_show(struct device *dev,
+			   struct device_attribute *attr,
+			   char *buf)
+{
+	ssize_t nchars = 0;
+	struct vfio_ap_queue *q;
+	struct ap_matrix_mdev *matrix_mdev;
+	struct ap_device *apdev = to_ap_dev(dev);
+
+	mutex_lock(&matrix_dev->lock);
+	q = dev_get_drvdata(&apdev->device);
+	matrix_mdev = vfio_ap_mdev_for_queue(q);
+
+	if (matrix_mdev) {
+		if (matrix_mdev->kvm)
+			nchars = scnprintf(buf, PAGE_SIZE, "%s\n",
+					   AP_QUEUE_IN_USE);
+		else
+			nchars = scnprintf(buf, PAGE_SIZE, "%s\n",
+					   AP_QUEUE_ASSIGNED);
+	} else {
+		nchars = scnprintf(buf, PAGE_SIZE, "%s\n",
+				   AP_QUEUE_UNASSIGNED);
+	}
+
+	mutex_unlock(&matrix_dev->lock);
+
+	return nchars;
+}
+
+static DEVICE_ATTR_RO(status);
+
+static struct attribute *vfio_queue_attrs[] = {
+	&dev_attr_status.attr,
+	NULL,
+};
+
+static const struct attribute_group vfio_queue_attr_group = {
+	.attrs = vfio_queue_attrs,
+};
+
 /**
  * vfio_ap_queue_dev_probe: Allocate a vfio_ap_queue structure and associate it
  *			    with the device as driver_data.
  *
  * @apdev: the AP device being probed
  *
- * Return: returns 0 if the probe succeeded; otherwise, returns -ENOMEM if
- *	   storage could not be allocated for a vfio_ap_queue object.
+ * Return: returns 0 if the probe succeeded; otherwise, returns an error if
+ *	   storage could not be allocated for a vfio_ap_queue object or the
+ *	   sysfs 'status' attribute could not be created for the queue device.
  */
 static int vfio_ap_queue_dev_probe(struct ap_device *apdev)
 {
+	int ret;
 	struct vfio_ap_queue *q;
 
 	q = kzalloc(sizeof(*q), GFP_KERNEL);
 	if (!q)
 		return -ENOMEM;
+
+	mutex_lock(&matrix_dev->lock);
 	dev_set_drvdata(&apdev->device, q);
 	q->apqn = to_ap_queue(&apdev->device)->qid;
 	q->saved_isc = VFIO_AP_ISC_INVALID;
-	return 0;
+
+	ret = sysfs_create_group(&apdev->device.kobj, &vfio_queue_attr_group);
+	if (ret) {
+		dev_set_drvdata(&apdev->device, NULL);
+		kfree(q);
+	}
+
+	mutex_unlock(&matrix_dev->lock);
+
+	return ret;
 }
 
 /**
@@ -75,6 +147,7 @@ static void vfio_ap_queue_dev_remove(struct ap_device *apdev)
 	struct vfio_ap_queue *q;
 
 	mutex_lock(&matrix_dev->lock);
+	sysfs_remove_group(&apdev->device.kobj, &vfio_queue_attr_group);
 	q = dev_get_drvdata(&apdev->device);
 	vfio_ap_mdev_reset_queue(q, 1);
 	dev_set_drvdata(&apdev->device, NULL);
diff --git a/drivers/s390/crypto/zcrypt_api.c b/drivers/s390/crypto/zcrypt_api.c
index 4c3dcc4..9811ab8 100644
--- a/drivers/s390/crypto/zcrypt_api.c
+++ b/drivers/s390/crypto/zcrypt_api.c
@@ -878,14 +878,13 @@ static long _zcrypt_send_cprb(bool userspace, struct ap_perms *perms,
 
 	/*
 	 * If a valid target domain is set and this domain is NOT a usage
-	 * domain but a control only domain, use the default domain as target.
+	 * domain but a control only domain, autoselect target domain.
 	 */
 	tdom = *domain;
 	if (tdom < AP_DOMAINS &&
 	    !ap_test_config_usage_domain(tdom) &&
-	    ap_test_config_ctrl_domain(tdom) &&
-	    ap_domain_index >= 0)
-		tdom = ap_domain_index;
+	    ap_test_config_ctrl_domain(tdom))
+		tdom = AUTOSEL_DOM;
 
 	pref_zc = NULL;
 	pref_zq = NULL;
diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c
index 26c55f6..fe2c4c6 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -355,8 +355,8 @@ static int qeth_cq_init(struct qeth_card *card)
 		qdio_reset_buffers(card->qdio.c_q->qdio_bufs,
 				   QDIO_MAX_BUFFERS_PER_Q);
 		card->qdio.c_q->next_buf_to_init = 127;
-		rc = do_QDIO(CARD_DDEV(card), QDIO_FLAG_SYNC_INPUT, 1, 0, 127,
-			     NULL);
+
+		rc = qdio_add_bufs_to_input_queue(CARD_DDEV(card), 1, 0, 127);
 		if (rc) {
 			QETH_CARD_TEXT_(card, 2, "1err%d", rc);
 			goto out;
@@ -2926,8 +2926,7 @@ static int qeth_init_qdio_queues(struct qeth_card *card)
 	}
 
 	card->qdio.in_q->next_buf_to_init = QDIO_BUFNR(rx_bufs);
-	rc = do_QDIO(CARD_DDEV(card), QDIO_FLAG_SYNC_INPUT, 0, 0, rx_bufs,
-		     NULL);
+	rc = qdio_add_bufs_to_input_queue(CARD_DDEV(card), 0, 0, rx_bufs);
 	if (rc) {
 		QETH_CARD_TEXT_(card, 2, "1err%d", rc);
 		return rc;
@@ -3415,8 +3414,9 @@ static unsigned int qeth_rx_refill_queue(struct qeth_card *card,
 			return 0;
 		}
 
-		rc = do_QDIO(CARD_DDEV(card), QDIO_FLAG_SYNC_INPUT, 0,
-			     queue->next_buf_to_init, count, NULL);
+		rc = qdio_add_bufs_to_input_queue(CARD_DDEV(card), 0,
+						  queue->next_buf_to_init,
+						  count);
 		if (rc) {
 			QETH_CARD_TEXT(card, 2, "qinberr");
 		}
@@ -3588,8 +3588,8 @@ static void qeth_flush_buffers(struct qeth_qdio_out_q *queue, int index,
 	}
 
 	QETH_TXQ_STAT_INC(queue, doorbell);
-	rc = do_QDIO(CARD_DDEV(card), QDIO_FLAG_SYNC_OUTPUT, queue->queue_no,
-		     index, count, aob);
+	rc = qdio_add_bufs_to_output_queue(CARD_DDEV(card), queue->queue_no,
+					   index, count, aob);
 
 	switch (rc) {
 	case 0:
@@ -3739,8 +3739,8 @@ static void qeth_qdio_cq_handler(struct qeth_card *card, unsigned int qdio_err,
 		}
 		qeth_scrub_qdio_buffer(buffer, QDIO_MAX_ELEMENTS_PER_BUFFER);
 	}
-	rc = do_QDIO(CARD_DDEV(card), QDIO_FLAG_SYNC_INPUT, queue,
-		     cq->next_buf_to_init, count, NULL);
+	rc = qdio_add_bufs_to_input_queue(CARD_DDEV(card), queue,
+					  cq->next_buf_to_init, count);
 	if (rc) {
 		dev_warn(&card->gdev->dev,
 			"QDIO reported an error, rc=%i\n", rc);
@@ -5850,10 +5850,10 @@ static unsigned int qeth_rx_poll(struct qeth_card *card, int budget)
 		/* Fetch completed RX buffers: */
 		if (!card->rx.b_count) {
 			card->rx.qdio_err = 0;
-			card->rx.b_count = qdio_inspect_queue(CARD_DDEV(card),
-							      0, true,
-							      &card->rx.b_index,
-							      &card->rx.qdio_err);
+			card->rx.b_count =
+				qdio_inspect_input_queue(CARD_DDEV(card), 0,
+							 &card->rx.b_index,
+							 &card->rx.qdio_err);
 			if (card->rx.b_count <= 0) {
 				card->rx.b_count = 0;
 				break;
@@ -5900,8 +5900,8 @@ static void qeth_cq_poll(struct qeth_card *card)
 		unsigned int start, error;
 		int completed;
 
-		completed = qdio_inspect_queue(CARD_DDEV(card), 1, true, &start,
-					       &error);
+		completed = qdio_inspect_input_queue(CARD_DDEV(card), 1, &start,
+						     &error);
 		if (completed <= 0)
 			return;
 
@@ -6038,8 +6038,8 @@ static int qeth_tx_poll(struct napi_struct *napi, int budget)
 			return 0;
 		}
 
-		completed = qdio_inspect_queue(CARD_DDEV(card), queue_no, false,
-					       &start, &error);
+		completed = qdio_inspect_output_queue(CARD_DDEV(card), queue_no,
+						      &start, &error);
 		if (completed <= 0) {
 			/* Ensure we see TX completion for pending work: */
 			if (napi_complete_done(napi, 0) &&
diff --git a/drivers/s390/scsi/zfcp_qdio.c b/drivers/s390/scsi/zfcp_qdio.c
index 6a27201..f54f506 100644
--- a/drivers/s390/scsi/zfcp_qdio.c
+++ b/drivers/s390/scsi/zfcp_qdio.c
@@ -79,7 +79,7 @@ static void zfcp_qdio_request_tasklet(struct tasklet_struct *tasklet)
 	unsigned int start, error;
 	int completed;
 
-	completed = qdio_inspect_queue(cdev, 0, false, &start, &error);
+	completed = qdio_inspect_output_queue(cdev, 0, &start, &error);
 	if (completed > 0) {
 		if (error) {
 			zfcp_qdio_handler_error(qdio, "qdreqt1", error);
@@ -154,7 +154,7 @@ static void zfcp_qdio_int_resp(struct ccw_device *cdev, unsigned int qdio_err,
 	/*
 	 * put SBALs back to response queue
 	 */
-	if (do_QDIO(cdev, QDIO_FLAG_SYNC_INPUT, 0, idx, count, NULL))
+	if (qdio_add_bufs_to_input_queue(cdev, 0, idx, count))
 		zfcp_erp_adapter_reopen(qdio->adapter, 0, "qdires2");
 }
 
@@ -169,7 +169,7 @@ static void zfcp_qdio_irq_tasklet(struct tasklet_struct *tasklet)
 		tasklet_schedule(&qdio->request_tasklet);
 
 	/* Check the Response Queue: */
-	completed = qdio_inspect_queue(cdev, 0, true, &start, &error);
+	completed = qdio_inspect_input_queue(cdev, 0, &start, &error);
 	if (completed < 0)
 		return;
 	if (completed > 0)
@@ -326,8 +326,9 @@ int zfcp_qdio_send(struct zfcp_qdio *qdio, struct zfcp_qdio_req *q_req)
 
 	atomic_sub(sbal_number, &qdio->req_q_free);
 
-	retval = do_QDIO(qdio->adapter->ccw_device, QDIO_FLAG_SYNC_OUTPUT, 0,
-			 q_req->sbal_first, sbal_number, NULL);
+	retval = qdio_add_bufs_to_output_queue(qdio->adapter->ccw_device, 0,
+					       q_req->sbal_first, sbal_number,
+					       NULL);
 
 	if (unlikely(retval)) {
 		/* Failed to submit the IO, roll back our modifications. */
@@ -395,7 +396,10 @@ void zfcp_qdio_close(struct zfcp_qdio *qdio)
 	if (!(atomic_read(&adapter->status) & ZFCP_STATUS_ADAPTER_QDIOUP))
 		return;
 
-	/* clear QDIOUP flag, thus do_QDIO is not called during qdio_shutdown */
+	/*
+	 * Clear QDIOUP flag, thus qdio_add_bufs_to_output_queue() is not called
+	 * during qdio_shutdown().
+	 */
 	spin_lock_irq(&qdio->req_q_lock);
 	atomic_andnot(ZFCP_STATUS_ADAPTER_QDIOUP, &adapter->status);
 	spin_unlock_irq(&qdio->req_q_lock);
@@ -498,8 +502,7 @@ int zfcp_qdio_open(struct zfcp_qdio *qdio)
 		sbale->addr = 0;
 	}
 
-	if (do_QDIO(cdev, QDIO_FLAG_SYNC_INPUT, 0, 0, QDIO_MAX_BUFFERS_PER_Q,
-		    NULL))
+	if (qdio_add_bufs_to_input_queue(cdev, 0, 0, QDIO_MAX_BUFFERS_PER_Q))
 		goto failed_qdio;
 
 	/* set index of first available SBALS / number of available SBALS */